Coverage for cuda/bindings/runtime.pyx: 27%

2# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

4# This code was automatically generated with version 13.1.0. Do not modify it directly.

5from typing import Any, Optional

6from enum import IntEnum

7import cython

8import ctypes

9from libc.stdlib cimport calloc, malloc, free

10from libc cimport string

11from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t, uintptr_t

12from libc.stddef cimport wchar_t

13from libc.limits cimport CHAR_MIN

14from libcpp.vector cimport vector

15from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS

16from cpython.bytes cimport PyBytes_FromStringAndSize

17import cuda.bindings.driver

18from libcpp.map cimport map

20import cuda.bindings.driver as _driver

21_driver = _driver.__dict__

22include "_lib/utils.pxi"

24ctypedef unsigned long long signed_char_ptr

25ctypedef unsigned long long unsigned_char_ptr

26ctypedef unsigned long long char_ptr

27ctypedef unsigned long long short_ptr

28ctypedef unsigned long long unsigned_short_ptr

29ctypedef unsigned long long int_ptr

30ctypedef unsigned long long long_int_ptr

31ctypedef unsigned long long long_long_int_ptr

32ctypedef unsigned long long unsigned_int_ptr

33ctypedef unsigned long long unsigned_long_int_ptr

34ctypedef unsigned long long unsigned_long_long_int_ptr

35ctypedef unsigned long long uint32_t_ptr

36ctypedef unsigned long long uint64_t_ptr

37ctypedef unsigned long long int32_t_ptr

38ctypedef unsigned long long int64_t_ptr

39ctypedef unsigned long long unsigned_ptr

40ctypedef unsigned long long unsigned_long_long_ptr

41ctypedef unsigned long long long_long_ptr

42ctypedef unsigned long long size_t_ptr

43ctypedef unsigned long long long_ptr

44ctypedef unsigned long long float_ptr

45ctypedef unsigned long long double_ptr

46ctypedef unsigned long long void_ptr

48#: Default page-locked allocation flag

49cudaHostAllocDefault = cyruntime.cudaHostAllocDefault

51#: Pinned memory accessible by all CUDA contexts

52cudaHostAllocPortable = cyruntime.cudaHostAllocPortable

54#: Map allocation into device space

55cudaHostAllocMapped = cyruntime.cudaHostAllocMapped

57#: Write-combined memory

58cudaHostAllocWriteCombined = cyruntime.cudaHostAllocWriteCombined

60#: Default host memory registration flag

61cudaHostRegisterDefault = cyruntime.cudaHostRegisterDefault

63#: Pinned memory accessible by all CUDA contexts

64cudaHostRegisterPortable = cyruntime.cudaHostRegisterPortable

66#: Map registered memory into device space

67cudaHostRegisterMapped = cyruntime.cudaHostRegisterMapped

69#: Memory-mapped I/O space

70cudaHostRegisterIoMemory = cyruntime.cudaHostRegisterIoMemory

72#: Memory-mapped read-only

73cudaHostRegisterReadOnly = cyruntime.cudaHostRegisterReadOnly

75#: Default peer addressing enable flag

76cudaPeerAccessDefault = cyruntime.cudaPeerAccessDefault

78#: Default stream flag

79cudaStreamDefault = cyruntime.cudaStreamDefault

81#: Stream does not synchronize with stream 0 (the NULL stream)

82cudaStreamNonBlocking = cyruntime.cudaStreamNonBlocking

84#: Legacy stream handle

85#:

86#: Stream handle that can be passed as a cudaStream_t to use an implicit

87#: stream with legacy synchronization behavior.

88#:

89#: See details of the \link_sync_behavior

90cudaStreamLegacy = cyruntime.cudaStreamLegacy

92#: Per-thread stream handle

93#:

94#: Stream handle that can be passed as a cudaStream_t to use an implicit

95#: stream with per-thread synchronization behavior.

96#:

97#: See details of the \link_sync_behavior

98cudaStreamPerThread = cyruntime.cudaStreamPerThread

100#: Default event flag

101cudaEventDefault = cyruntime.cudaEventDefault

102

103#: Event uses blocking synchronization

104cudaEventBlockingSync = cyruntime.cudaEventBlockingSync

105

106#: Event will not record timing data

107cudaEventDisableTiming = cyruntime.cudaEventDisableTiming

108

109#: Event is suitable for interprocess use. cudaEventDisableTiming must be

110#: set

111cudaEventInterprocess = cyruntime.cudaEventInterprocess

112

113#: Default event record flag

114cudaEventRecordDefault = cyruntime.cudaEventRecordDefault

115

116#: Event is captured in the graph as an external event node when performing

117#: stream capture

118cudaEventRecordExternal = cyruntime.cudaEventRecordExternal

119

120#: Default event wait flag

121cudaEventWaitDefault = cyruntime.cudaEventWaitDefault

122

123#: Event is captured in the graph as an external event node when performing

124#: stream capture

125cudaEventWaitExternal = cyruntime.cudaEventWaitExternal

126

127#: Device flag - Automatic scheduling

128cudaDeviceScheduleAuto = cyruntime.cudaDeviceScheduleAuto

129

130#: Device flag - Spin default scheduling

131cudaDeviceScheduleSpin = cyruntime.cudaDeviceScheduleSpin

132

133#: Device flag - Yield default scheduling

134cudaDeviceScheduleYield = cyruntime.cudaDeviceScheduleYield

135

136#: Device flag - Use blocking synchronization

137cudaDeviceScheduleBlockingSync = cyruntime.cudaDeviceScheduleBlockingSync

138

139#: Device flag - Use blocking synchronization [Deprecated]

140cudaDeviceBlockingSync = cyruntime.cudaDeviceBlockingSync

141

142#: Device schedule flags mask

143cudaDeviceScheduleMask = cyruntime.cudaDeviceScheduleMask

144

145#: Device flag - Support mapped pinned allocations

146cudaDeviceMapHost = cyruntime.cudaDeviceMapHost

147

148#: Device flag - Keep local memory allocation after launch

149cudaDeviceLmemResizeToMax = cyruntime.cudaDeviceLmemResizeToMax

150

151#: Device flag - Ensure synchronous memory operations on this context will

152#: synchronize

153cudaDeviceSyncMemops = cyruntime.cudaDeviceSyncMemops

154

155#: Device flags mask

156cudaDeviceMask = cyruntime.cudaDeviceMask

157

158#: Default CUDA array allocation flag

159cudaArrayDefault = cyruntime.cudaArrayDefault

160

161#: Must be set in cudaMalloc3DArray to create a layered CUDA array

162cudaArrayLayered = cyruntime.cudaArrayLayered

163

164#: Must be set in cudaMallocArray or cudaMalloc3DArray in order to bind

165#: surfaces to the CUDA array

166cudaArraySurfaceLoadStore = cyruntime.cudaArraySurfaceLoadStore

167

168#: Must be set in cudaMalloc3DArray to create a cubemap CUDA array

169cudaArrayCubemap = cyruntime.cudaArrayCubemap

170

171#: Must be set in cudaMallocArray or cudaMalloc3DArray in order to perform

172#: texture gather operations on the CUDA array

173cudaArrayTextureGather = cyruntime.cudaArrayTextureGather

174

175#: Must be set in cudaExternalMemoryGetMappedMipmappedArray if the

176#: mipmapped array is used as a color target in a graphics API

177cudaArrayColorAttachment = cyruntime.cudaArrayColorAttachment

178

179#: Must be set in cudaMallocArray, cudaMalloc3DArray or

180#: cudaMallocMipmappedArray in order to create a sparse CUDA array or CUDA

181#: mipmapped array

182cudaArraySparse = cyruntime.cudaArraySparse

183

184#: Must be set in cudaMallocArray, cudaMalloc3DArray or

185#: cudaMallocMipmappedArray in order to create a deferred mapping CUDA

186#: array or CUDA mipmapped array

187cudaArrayDeferredMapping = cyruntime.cudaArrayDeferredMapping

188

189#: Automatically enable peer access between remote devices as needed

190cudaIpcMemLazyEnablePeerAccess = cyruntime.cudaIpcMemLazyEnablePeerAccess

191

192#: Memory can be accessed by any stream on any device

193cudaMemAttachGlobal = cyruntime.cudaMemAttachGlobal

194

195#: Memory cannot be accessed by any stream on any device

196cudaMemAttachHost = cyruntime.cudaMemAttachHost

197

198#: Memory can only be accessed by a single stream on the associated device

199cudaMemAttachSingle = cyruntime.cudaMemAttachSingle

200

201#: Default behavior

202cudaOccupancyDefault = cyruntime.cudaOccupancyDefault

203

204#: Assume global caching is enabled and cannot be automatically turned off

205cudaOccupancyDisableCachingOverride = cyruntime.cudaOccupancyDisableCachingOverride

206

207#: Device id that represents the CPU

208cudaCpuDeviceId = cyruntime.cudaCpuDeviceId

209

210#: Device id that represents an invalid device

211cudaInvalidDeviceId = cyruntime.cudaInvalidDeviceId

212

213#: Tell the CUDA runtime that DeviceFlags is being set in cudaInitDevice

214#: call

215cudaInitDeviceFlagsAreValid = cyruntime.cudaInitDeviceFlagsAreValid

216

217#: Indicates that the layered sparse CUDA array or CUDA mipmapped array has

218#: a single mip tail region for all layers

219cudaArraySparsePropertiesSingleMipTail = cyruntime.cudaArraySparsePropertiesSingleMipTail

220

221#: This flag, if set, indicates that the memory will be used as a buffer

222#: for hardware accelerated decompression.

223cudaMemPoolCreateUsageHwDecompress = cyruntime.cudaMemPoolCreateUsageHwDecompress

224

225#: CUDA IPC Handle Size

226CUDA_IPC_HANDLE_SIZE = cyruntime.CUDA_IPC_HANDLE_SIZE

227

228#: Indicates that the external memory object is a dedicated resource

229cudaExternalMemoryDedicated = cyruntime.cudaExternalMemoryDedicated

230

231#: When the /p flags parameter of

232#: :py:obj:`~.cudaExternalSemaphoreSignalParams` contains this flag, it

233#: indicates that signaling an external semaphore object should skip

234#: performing appropriate memory synchronization operations over all the

235#: external memory objects that are imported as

236#: :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are

237#: performed by default to ensure data coherency with other importers of

238#: the same NvSciBuf memory objects.

239cudaExternalSemaphoreSignalSkipNvSciBufMemSync = cyruntime.cudaExternalSemaphoreSignalSkipNvSciBufMemSync

240

241#: When the /p flags parameter of

242#: :py:obj:`~.cudaExternalSemaphoreWaitParams` contains this flag, it

243#: indicates that waiting an external semaphore object should skip

244#: performing appropriate memory synchronization operations over all the

245#: external memory objects that are imported as

246#: :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are

247#: performed by default to ensure data coherency with other importers of

248#: the same NvSciBuf memory objects.

249cudaExternalSemaphoreWaitSkipNvSciBufMemSync = cyruntime.cudaExternalSemaphoreWaitSkipNvSciBufMemSync

250

251#: When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to

252#: this, it indicates that application need signaler specific NvSciSyncAttr

253#: to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.

254cudaNvSciSyncAttrSignal = cyruntime.cudaNvSciSyncAttrSignal

255

256#: When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to

257#: this, it indicates that application need waiter specific NvSciSyncAttr

258#: to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.

259cudaNvSciSyncAttrWait = cyruntime.cudaNvSciSyncAttrWait

260

261#: This port activates when the kernel has finished executing.

262cudaGraphKernelNodePortDefault = cyruntime.cudaGraphKernelNodePortDefault

263

264#: This port activates when all blocks of the kernel have performed

265#: cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be

266#: used with edge type :py:obj:`~.cudaGraphDependencyTypeProgrammatic`. See

267#: also :py:obj:`~.cudaLaunchAttributeProgrammaticEvent`.

268cudaGraphKernelNodePortProgrammatic = cyruntime.cudaGraphKernelNodePortProgrammatic

269

270#: This port activates when all blocks of the kernel have begun execution.

271#: See also :py:obj:`~.cudaLaunchAttributeLaunchCompletionEvent`.

272cudaGraphKernelNodePortLaunchCompletion = cyruntime.cudaGraphKernelNodePortLaunchCompletion

273

274cudaStreamAttributeAccessPolicyWindow = cyruntime.cudaStreamAttributeAccessPolicyWindow

275

276cudaStreamAttributeSynchronizationPolicy = cyruntime.cudaStreamAttributeSynchronizationPolicy

277

278cudaStreamAttributeMemSyncDomainMap = cyruntime.cudaStreamAttributeMemSyncDomainMap

279

280cudaStreamAttributeMemSyncDomain = cyruntime.cudaStreamAttributeMemSyncDomain

281

282cudaStreamAttributePriority = cyruntime.cudaStreamAttributePriority

283

284cudaKernelNodeAttributeAccessPolicyWindow = cyruntime.cudaKernelNodeAttributeAccessPolicyWindow

285

286cudaKernelNodeAttributeCooperative = cyruntime.cudaKernelNodeAttributeCooperative

287

288cudaKernelNodeAttributePriority = cyruntime.cudaKernelNodeAttributePriority

289

290cudaKernelNodeAttributeClusterDimension = cyruntime.cudaKernelNodeAttributeClusterDimension

291

292cudaKernelNodeAttributeClusterSchedulingPolicyPreference = cyruntime.cudaKernelNodeAttributeClusterSchedulingPolicyPreference

293

294cudaKernelNodeAttributeMemSyncDomainMap = cyruntime.cudaKernelNodeAttributeMemSyncDomainMap

295

296cudaKernelNodeAttributeMemSyncDomain = cyruntime.cudaKernelNodeAttributeMemSyncDomain

297

298cudaKernelNodeAttributePreferredSharedMemoryCarveout = cyruntime.cudaKernelNodeAttributePreferredSharedMemoryCarveout

299

300cudaKernelNodeAttributeDeviceUpdatableKernelNode = cyruntime.cudaKernelNodeAttributeDeviceUpdatableKernelNode

301

302cudaKernelNodeAttributeNvlinkUtilCentricScheduling = cyruntime.cudaKernelNodeAttributeNvlinkUtilCentricScheduling

303

304cudaSurfaceType1D = cyruntime.cudaSurfaceType1D

305

306cudaSurfaceType2D = cyruntime.cudaSurfaceType2D

307

308cudaSurfaceType3D = cyruntime.cudaSurfaceType3D

309

310cudaSurfaceTypeCubemap = cyruntime.cudaSurfaceTypeCubemap

311

312cudaSurfaceType1DLayered = cyruntime.cudaSurfaceType1DLayered

313

314cudaSurfaceType2DLayered = cyruntime.cudaSurfaceType2DLayered

315

316cudaSurfaceTypeCubemapLayered = cyruntime.cudaSurfaceTypeCubemapLayered

317

318cudaTextureType1D = cyruntime.cudaTextureType1D

319

320cudaTextureType2D = cyruntime.cudaTextureType2D

321

322cudaTextureType3D = cyruntime.cudaTextureType3D

323

324cudaTextureTypeCubemap = cyruntime.cudaTextureTypeCubemap

325

326cudaTextureType1DLayered = cyruntime.cudaTextureType1DLayered

327

328cudaTextureType2DLayered = cyruntime.cudaTextureType2DLayered

329

330cudaTextureTypeCubemapLayered = cyruntime.cudaTextureTypeCubemapLayered

331

332#: CUDA Runtime API Version

333CUDART_VERSION = cyruntime.CUDART_VERSION

334

335__CUDART_API_VERSION = cyruntime.__CUDART_API_VERSION

336

337#: Maximum number of planes per frame

338CUDA_EGL_MAX_PLANES = cyruntime.CUDA_EGL_MAX_PLANES

339

340class cudaError_t(IntEnum):

341 """

342 impl_private CUDA error types

343 """

344

345 #: The API call returned with no errors. In the case of query calls,

346 #: this also means that the operation being queried is complete (see

347 #: :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`).

348 cudaSuccess = cyruntime.cudaError.cudaSuccess

349

350 #: This indicates that one or more of the parameters passed to the API

351 #: call is not within an acceptable range of values.

352 cudaErrorInvalidValue = cyruntime.cudaError.cudaErrorInvalidValue

353

354 #: The API call failed because it was unable to allocate enough memory

355 #: or other resources to perform the requested operation.

356 cudaErrorMemoryAllocation = cyruntime.cudaError.cudaErrorMemoryAllocation

357

358 #: The API call failed because the CUDA driver and runtime could not be

359 #: initialized.

360 cudaErrorInitializationError = cyruntime.cudaError.cudaErrorInitializationError

361

362 #: This indicates that a CUDA Runtime API call cannot be executed

363 #: because it is being called during process shut down, at a point in

364 #: time after CUDA driver has been unloaded.

365 cudaErrorCudartUnloading = cyruntime.cudaError.cudaErrorCudartUnloading

366

367 #: This indicates profiler is not initialized for this run. This can

368 #: happen when the application is running with external profiling tools

369 #: like visual profiler.

370 cudaErrorProfilerDisabled = cyruntime.cudaError.cudaErrorProfilerDisabled

371

372 #: [Deprecated]

373 cudaErrorProfilerNotInitialized = cyruntime.cudaError.cudaErrorProfilerNotInitialized

374

375 #: [Deprecated]

376 cudaErrorProfilerAlreadyStarted = cyruntime.cudaError.cudaErrorProfilerAlreadyStarted

377

378 #: [Deprecated]

379 cudaErrorProfilerAlreadyStopped = cyruntime.cudaError.cudaErrorProfilerAlreadyStopped

380

381 #: This indicates that a kernel launch is requesting resources that can

382 #: never be satisfied by the current device. Requesting more shared

383 #: memory per block than the device supports will trigger this error,

384 #: as will requesting too many threads or blocks. See

385 #: :py:obj:`~.cudaDeviceProp` for more device limitations.

386 cudaErrorInvalidConfiguration = cyruntime.cudaError.cudaErrorInvalidConfiguration

387

388 #: This indicates that one or more of the pitch-related parameters

389 #: passed to the API call is not within the acceptable range for pitch.

390 cudaErrorInvalidPitchValue = cyruntime.cudaError.cudaErrorInvalidPitchValue

391

392 #: This indicates that the symbol name/identifier passed to the API

393 #: call is not a valid name or identifier.

394 cudaErrorInvalidSymbol = cyruntime.cudaError.cudaErrorInvalidSymbol

395

396 #: This indicates that at least one host pointer passed to the API call

397 #: is not a valid host pointer. [Deprecated]

398 cudaErrorInvalidHostPointer = cyruntime.cudaError.cudaErrorInvalidHostPointer

399

400 #: This indicates that at least one device pointer passed to the API

401 #: call is not a valid device pointer. [Deprecated]

402 cudaErrorInvalidDevicePointer = cyruntime.cudaError.cudaErrorInvalidDevicePointer

403

404 #: This indicates that the texture passed to the API call is not a

405 #: valid texture.

406 cudaErrorInvalidTexture = cyruntime.cudaError.cudaErrorInvalidTexture

407

408 #: This indicates that the texture binding is not valid. This occurs if

409 #: you call :py:obj:`~.cudaGetTextureAlignmentOffset()` with an unbound

410 #: texture.

411 cudaErrorInvalidTextureBinding = cyruntime.cudaError.cudaErrorInvalidTextureBinding

412

413 #: This indicates that the channel descriptor passed to the API call is

414 #: not valid. This occurs if the format is not one of the formats

415 #: specified by :py:obj:`~.cudaChannelFormatKind`, or if one of the

416 #: dimensions is invalid.

417 cudaErrorInvalidChannelDescriptor = cyruntime.cudaError.cudaErrorInvalidChannelDescriptor

418

419 #: This indicates that the direction of the memcpy passed to the API

420 #: call is not one of the types specified by

421 #: :py:obj:`~.cudaMemcpyKind`.

422 cudaErrorInvalidMemcpyDirection = cyruntime.cudaError.cudaErrorInvalidMemcpyDirection

423

424 #: This indicated that the user has taken the address of a constant

425 #: variable, which was forbidden up until the CUDA 3.1 release.

426 #: [Deprecated]

427 cudaErrorAddressOfConstant = cyruntime.cudaError.cudaErrorAddressOfConstant

428

429 #: This indicated that a texture fetch was not able to be performed.

430 #: This was previously used for device emulation of texture operations.

431 #: [Deprecated]

432 cudaErrorTextureFetchFailed = cyruntime.cudaError.cudaErrorTextureFetchFailed

433

434 #: This indicated that a texture was not bound for access. This was

435 #: previously used for device emulation of texture operations.

436 #: [Deprecated]

437 cudaErrorTextureNotBound = cyruntime.cudaError.cudaErrorTextureNotBound

438

439 #: This indicated that a synchronization operation had failed. This was

440 #: previously used for some device emulation functions. [Deprecated]

441 cudaErrorSynchronizationError = cyruntime.cudaError.cudaErrorSynchronizationError

442

443 #: This indicates that a non-float texture was being accessed with

444 #: linear filtering. This is not supported by CUDA.

445 cudaErrorInvalidFilterSetting = cyruntime.cudaError.cudaErrorInvalidFilterSetting

446

447 #: This indicates that an attempt was made to read an unsupported data

448 #: type as a normalized float. This is not supported by CUDA.

449 cudaErrorInvalidNormSetting = cyruntime.cudaError.cudaErrorInvalidNormSetting

450

451 #: Mixing of device and device emulation code was not allowed.

452 #: [Deprecated]

453 cudaErrorMixedDeviceExecution = cyruntime.cudaError.cudaErrorMixedDeviceExecution

454

455 #: This indicates that the API call is not yet implemented. Production

456 #: releases of CUDA will never return this error. [Deprecated]

457 cudaErrorNotYetImplemented = cyruntime.cudaError.cudaErrorNotYetImplemented

458

459 #: This indicated that an emulated device pointer exceeded the 32-bit

460 #: address range. [Deprecated]

461 cudaErrorMemoryValueTooLarge = cyruntime.cudaError.cudaErrorMemoryValueTooLarge

462

463 #: This indicates that the CUDA driver that the application has loaded

464 #: is a stub library. Applications that run with the stub rather than a

465 #: real driver loaded will result in CUDA API returning this error.

466 cudaErrorStubLibrary = cyruntime.cudaError.cudaErrorStubLibrary

467

468 #: This indicates that the installed NVIDIA CUDA driver is older than

469 #: the CUDA runtime library. This is not a supported configuration.

470 #: Users should install an updated NVIDIA display driver to allow the

471 #: application to run.

472 cudaErrorInsufficientDriver = cyruntime.cudaError.cudaErrorInsufficientDriver

473

474 #: This indicates that the API call requires a newer CUDA driver than

475 #: the one currently installed. Users should install an updated NVIDIA

476 #: CUDA driver to allow the API call to succeed.

477 cudaErrorCallRequiresNewerDriver = cyruntime.cudaError.cudaErrorCallRequiresNewerDriver

478

479 #: This indicates that the surface passed to the API call is not a

480 #: valid surface.

481 cudaErrorInvalidSurface = cyruntime.cudaError.cudaErrorInvalidSurface

482

483 #: This indicates that multiple global or constant variables (across

484 #: separate CUDA source files in the application) share the same string

485 #: name.

486 cudaErrorDuplicateVariableName = cyruntime.cudaError.cudaErrorDuplicateVariableName

487

488 #: This indicates that multiple textures (across separate CUDA source

489 #: files in the application) share the same string name.

490 cudaErrorDuplicateTextureName = cyruntime.cudaError.cudaErrorDuplicateTextureName

491

492 #: This indicates that multiple surfaces (across separate CUDA source

493 #: files in the application) share the same string name.

494 cudaErrorDuplicateSurfaceName = cyruntime.cudaError.cudaErrorDuplicateSurfaceName

495

496 #: This indicates that all CUDA devices are busy or unavailable at the

497 #: current time. Devices are often busy/unavailable due to use of

498 #: :py:obj:`~.cudaComputeModeProhibited`,

499 #: :py:obj:`~.cudaComputeModeExclusiveProcess`, or when long running

500 #: CUDA kernels have filled up the GPU and are blocking new work from

501 #: starting. They can also be unavailable due to memory constraints on

502 #: a device that already has active CUDA work being performed.

503 cudaErrorDevicesUnavailable = cyruntime.cudaError.cudaErrorDevicesUnavailable

504

505 #: This indicates that the current context is not compatible with this

506 #: the CUDA Runtime. This can only occur if you are using CUDA

507 #: Runtime/Driver interoperability and have created an existing Driver

508 #: context using the driver API. The Driver context may be incompatible

509 #: either because the Driver context was created using an older version

510 #: of the API, because the Runtime API call expects a primary driver

511 #: context and the Driver context is not primary, or because the Driver

512 #: context has been destroyed. Please see :py:obj:`~.Interactions`with

513 #: the CUDA Driver API" for more information.

514 cudaErrorIncompatibleDriverContext = cyruntime.cudaError.cudaErrorIncompatibleDriverContext

515

516 #: The device function being invoked (usually via

517 #: :py:obj:`~.cudaLaunchKernel()`) was not previously configured via

518 #: the :py:obj:`~.cudaConfigureCall()` function.

519 cudaErrorMissingConfiguration = cyruntime.cudaError.cudaErrorMissingConfiguration

520

521 #: This indicated that a previous kernel launch failed. This was

522 #: previously used for device emulation of kernel launches.

523 #: [Deprecated]

524 cudaErrorPriorLaunchFailure = cyruntime.cudaError.cudaErrorPriorLaunchFailure

525

526 #: This error indicates that a device runtime grid launch did not occur

527 #: because the depth of the child grid would exceed the maximum

528 #: supported number of nested grid launches.

529 cudaErrorLaunchMaxDepthExceeded = cyruntime.cudaError.cudaErrorLaunchMaxDepthExceeded

530

531 #: This error indicates that a grid launch did not occur because the

532 #: kernel uses file-scoped textures which are unsupported by the device

533 #: runtime. Kernels launched via the device runtime only support

534 #: textures created with the Texture Object API's.

535 cudaErrorLaunchFileScopedTex = cyruntime.cudaError.cudaErrorLaunchFileScopedTex

536

537 #: This error indicates that a grid launch did not occur because the

538 #: kernel uses file-scoped surfaces which are unsupported by the device

539 #: runtime. Kernels launched via the device runtime only support

540 #: surfaces created with the Surface Object API's.

541 cudaErrorLaunchFileScopedSurf = cyruntime.cudaError.cudaErrorLaunchFileScopedSurf

542

543 #: This error indicates that a call to

544 #: :py:obj:`~.cudaDeviceSynchronize` made from the device runtime

545 #: failed because the call was made at grid depth greater than than

546 #: either the default (2 levels of grids) or user specified device

547 #: limit :py:obj:`~.cudaLimitDevRuntimeSyncDepth`. To be able to

548 #: synchronize on launched grids at a greater depth successfully, the

549 #: maximum nested depth at which :py:obj:`~.cudaDeviceSynchronize` will

550 #: be called must be specified with the

551 #: :py:obj:`~.cudaLimitDevRuntimeSyncDepth` limit to the

552 #: :py:obj:`~.cudaDeviceSetLimit` api before the host-side launch of a

553 #: kernel using the device runtime. Keep in mind that additional levels

554 #: of sync depth require the runtime to reserve large amounts of device

555 #: memory that cannot be used for user allocations. Note that

556 #: :py:obj:`~.cudaDeviceSynchronize` made from device runtime is only

557 #: supported on devices of compute capability < 9.0.

558 cudaErrorSyncDepthExceeded = cyruntime.cudaError.cudaErrorSyncDepthExceeded

559

560 #: This error indicates that a device runtime grid launch failed

561 #: because the launch would exceed the limit

562 #: :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`. For this launch

563 #: to proceed successfully, :py:obj:`~.cudaDeviceSetLimit` must be

564 #: called to set the :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`

565 #: to be higher than the upper bound of outstanding launches that can

566 #: be issued to the device runtime. Keep in mind that raising the limit

567 #: of pending device runtime launches will require the runtime to

568 #: reserve device memory that cannot be used for user allocations.

569 cudaErrorLaunchPendingCountExceeded = cyruntime.cudaError.cudaErrorLaunchPendingCountExceeded

570

571 #: The requested device function does not exist or is not compiled for

572 #: the proper device architecture.

573 cudaErrorInvalidDeviceFunction = cyruntime.cudaError.cudaErrorInvalidDeviceFunction

574

575 #: This indicates that no CUDA-capable devices were detected by the

576 #: installed CUDA driver.

577 cudaErrorNoDevice = cyruntime.cudaError.cudaErrorNoDevice

578

579 #: This indicates that the device ordinal supplied by the user does not

580 #: correspond to a valid CUDA device or that the action requested is

581 #: invalid for the specified device.

582 cudaErrorInvalidDevice = cyruntime.cudaError.cudaErrorInvalidDevice

583

584 #: This indicates that the device doesn't have a valid Grid License.

585 cudaErrorDeviceNotLicensed = cyruntime.cudaError.cudaErrorDeviceNotLicensed

586

587 #: By default, the CUDA runtime may perform a minimal set of self-

588 #: tests, as well as CUDA driver tests, to establish the validity of

589 #: both. Introduced in CUDA 11.2, this error return indicates that at

590 #: least one of these tests has failed and the validity of either the

591 #: runtime or the driver could not be established.

592 cudaErrorSoftwareValidityNotEstablished = cyruntime.cudaError.cudaErrorSoftwareValidityNotEstablished

593

594 #: This indicates an internal startup failure in the CUDA runtime.

595 cudaErrorStartupFailure = cyruntime.cudaError.cudaErrorStartupFailure

596

597 #: This indicates that the device kernel image is invalid.

598 cudaErrorInvalidKernelImage = cyruntime.cudaError.cudaErrorInvalidKernelImage

599

600 #: This most frequently indicates that there is no context bound to the

601 #: current thread. This can also be returned if the context passed to

602 #: an API call is not a valid handle (such as a context that has had

603 #: :py:obj:`~.cuCtxDestroy()` invoked on it). This can also be returned

604 #: if a user mixes different API versions (i.e. 3010 context with 3020

605 #: API calls). See :py:obj:`~.cuCtxGetApiVersion()` for more details.

606 cudaErrorDeviceUninitialized = cyruntime.cudaError.cudaErrorDeviceUninitialized

607

608 #: This indicates that the buffer object could not be mapped.

609 cudaErrorMapBufferObjectFailed = cyruntime.cudaError.cudaErrorMapBufferObjectFailed

610

611 #: This indicates that the buffer object could not be unmapped.

612 cudaErrorUnmapBufferObjectFailed = cyruntime.cudaError.cudaErrorUnmapBufferObjectFailed

613

614 #: This indicates that the specified array is currently mapped and thus

615 #: cannot be destroyed.

616 cudaErrorArrayIsMapped = cyruntime.cudaError.cudaErrorArrayIsMapped

617

618 #: This indicates that the resource is already mapped.

619 cudaErrorAlreadyMapped = cyruntime.cudaError.cudaErrorAlreadyMapped

620

621 #: This indicates that there is no kernel image available that is

622 #: suitable for the device. This can occur when a user specifies code

623 #: generation options for a particular CUDA source file that do not

624 #: include the corresponding device configuration.

625 cudaErrorNoKernelImageForDevice = cyruntime.cudaError.cudaErrorNoKernelImageForDevice

626

627 #: This indicates that a resource has already been acquired.

628 cudaErrorAlreadyAcquired = cyruntime.cudaError.cudaErrorAlreadyAcquired

629

630 #: This indicates that a resource is not mapped.

631 cudaErrorNotMapped = cyruntime.cudaError.cudaErrorNotMapped

632

633 #: This indicates that a mapped resource is not available for access as

634 #: an array.

635 cudaErrorNotMappedAsArray = cyruntime.cudaError.cudaErrorNotMappedAsArray

636

637 #: This indicates that a mapped resource is not available for access as

638 #: a pointer.

639 cudaErrorNotMappedAsPointer = cyruntime.cudaError.cudaErrorNotMappedAsPointer

640

641 #: This indicates that an uncorrectable ECC error was detected during

642 #: execution.

643 cudaErrorECCUncorrectable = cyruntime.cudaError.cudaErrorECCUncorrectable

644

645 #: This indicates that the :py:obj:`~.cudaLimit` passed to the API call

646 #: is not supported by the active device.

647 cudaErrorUnsupportedLimit = cyruntime.cudaError.cudaErrorUnsupportedLimit

648

649 #: This indicates that a call tried to access an exclusive-thread

650 #: device that is already in use by a different thread.

651 cudaErrorDeviceAlreadyInUse = cyruntime.cudaError.cudaErrorDeviceAlreadyInUse

652

653 #: This error indicates that P2P access is not supported across the

654 #: given devices.

655 cudaErrorPeerAccessUnsupported = cyruntime.cudaError.cudaErrorPeerAccessUnsupported

656

657 #: A PTX compilation failed. The runtime may fall back to compiling PTX

658 #: if an application does not contain a suitable binary for the current

659 #: device.

660 cudaErrorInvalidPtx = cyruntime.cudaError.cudaErrorInvalidPtx

661

662 #: This indicates an error with the OpenGL or DirectX context.

663 cudaErrorInvalidGraphicsContext = cyruntime.cudaError.cudaErrorInvalidGraphicsContext

664

665 #: This indicates that an uncorrectable NVLink error was detected

666 #: during the execution.

667 cudaErrorNvlinkUncorrectable = cyruntime.cudaError.cudaErrorNvlinkUncorrectable

668

669 #: This indicates that the PTX JIT compiler library was not found. The

670 #: JIT Compiler library is used for PTX compilation. The runtime may

671 #: fall back to compiling PTX if an application does not contain a

672 #: suitable binary for the current device.

673 cudaErrorJitCompilerNotFound = cyruntime.cudaError.cudaErrorJitCompilerNotFound

674

675 #: This indicates that the provided PTX was compiled with an

676 #: unsupported toolchain. The most common reason for this, is the PTX

677 #: was generated by a compiler newer than what is supported by the CUDA

678 #: driver and PTX JIT compiler.

679 cudaErrorUnsupportedPtxVersion = cyruntime.cudaError.cudaErrorUnsupportedPtxVersion

680

681 #: This indicates that the JIT compilation was disabled. The JIT

682 #: compilation compiles PTX. The runtime may fall back to compiling PTX

683 #: if an application does not contain a suitable binary for the current

684 #: device.

685 cudaErrorJitCompilationDisabled = cyruntime.cudaError.cudaErrorJitCompilationDisabled

686

687 #: This indicates that the provided execution affinity is not supported

688 #: by the device.

689 cudaErrorUnsupportedExecAffinity = cyruntime.cudaError.cudaErrorUnsupportedExecAffinity

690

691 #: This indicates that the code to be compiled by the PTX JIT contains

692 #: unsupported call to cudaDeviceSynchronize.

693 cudaErrorUnsupportedDevSideSync = cyruntime.cudaError.cudaErrorUnsupportedDevSideSync

694

695 #: This indicates that an exception occurred on the device that is now

696 #: contained by the GPU's error containment capability. Common causes

697 #: are - a. Certain types of invalid accesses of peer GPU memory over

698 #: nvlink b. Certain classes of hardware errors This leaves the process

699 #: in an inconsistent state and any further CUDA work will return the

700 #: same error. To continue using CUDA, the process must be terminated

701 #: and relaunched.

702 cudaErrorContained = cyruntime.cudaError.cudaErrorContained

703

704 #: This indicates that the device kernel source is invalid.

705 cudaErrorInvalidSource = cyruntime.cudaError.cudaErrorInvalidSource

706

707 #: This indicates that the file specified was not found.

708 cudaErrorFileNotFound = cyruntime.cudaError.cudaErrorFileNotFound

709

710 #: This indicates that a link to a shared object failed to resolve.

711 cudaErrorSharedObjectSymbolNotFound = cyruntime.cudaError.cudaErrorSharedObjectSymbolNotFound

712

713 #: This indicates that initialization of a shared object failed.

714 cudaErrorSharedObjectInitFailed = cyruntime.cudaError.cudaErrorSharedObjectInitFailed

715

716 #: This error indicates that an OS call failed.

717 cudaErrorOperatingSystem = cyruntime.cudaError.cudaErrorOperatingSystem

718

719 #: This indicates that a resource handle passed to the API call was not

720 #: valid. Resource handles are opaque types like

721 #: :py:obj:`~.cudaStream_t` and :py:obj:`~.cudaEvent_t`.

722 cudaErrorInvalidResourceHandle = cyruntime.cudaError.cudaErrorInvalidResourceHandle

723

724 #: This indicates that a resource required by the API call is not in a

725 #: valid state to perform the requested operation.

726 cudaErrorIllegalState = cyruntime.cudaError.cudaErrorIllegalState

727

728 #: This indicates an attempt was made to introspect an object in a way

729 #: that would discard semantically important information. This is

730 #: either due to the object using funtionality newer than the API

731 #: version used to introspect it or omission of optional return

732 #: arguments.

733 cudaErrorLossyQuery = cyruntime.cudaError.cudaErrorLossyQuery

734

735 #: This indicates that a named symbol was not found. Examples of

736 #: symbols are global/constant variable names, driver function names,

737 #: texture names, and surface names.

738 cudaErrorSymbolNotFound = cyruntime.cudaError.cudaErrorSymbolNotFound

739

740 #: This indicates that asynchronous operations issued previously have

741 #: not completed yet. This result is not actually an error, but must be

742 #: indicated differently than :py:obj:`~.cudaSuccess` (which indicates

743 #: completion). Calls that may return this value include

744 #: :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`.

745 cudaErrorNotReady = cyruntime.cudaError.cudaErrorNotReady

746

747 #: The device encountered a load or store instruction on an invalid

748 #: memory address. This leaves the process in an inconsistent state and

749 #: any further CUDA work will return the same error. To continue using

750 #: CUDA, the process must be terminated and relaunched.

751 cudaErrorIllegalAddress = cyruntime.cudaError.cudaErrorIllegalAddress

752

753 #: This indicates that a launch did not occur because it did not have

754 #: appropriate resources. Although this error is similar to

755 #: :py:obj:`~.cudaErrorInvalidConfiguration`, this error usually

756 #: indicates that the user has attempted to pass too many arguments to

757 #: the device kernel, or the kernel launch specifies too many threads

758 #: for the kernel's register count.

759 cudaErrorLaunchOutOfResources = cyruntime.cudaError.cudaErrorLaunchOutOfResources

760

761 #: This indicates that the device kernel took too long to execute. This

762 #: can only occur if timeouts are enabled - see the device attribute

763 #: :py:obj:`~.cudaDevAttrKernelExecTimeout` for more information. This

764 #: leaves the process in an inconsistent state and any further CUDA

765 #: work will return the same error. To continue using CUDA, the process

766 #: must be terminated and relaunched.

767 cudaErrorLaunchTimeout = cyruntime.cudaError.cudaErrorLaunchTimeout

768

769 #: This error indicates a kernel launch that uses an incompatible

770 #: texturing mode.

771 cudaErrorLaunchIncompatibleTexturing = cyruntime.cudaError.cudaErrorLaunchIncompatibleTexturing

772

773 #: This error indicates that a call to

774 #: :py:obj:`~.cudaDeviceEnablePeerAccess()` is trying to re-enable peer

775 #: addressing on from a context which has already had peer addressing

776 #: enabled.

777 cudaErrorPeerAccessAlreadyEnabled = cyruntime.cudaError.cudaErrorPeerAccessAlreadyEnabled

778

779 #: This error indicates that :py:obj:`~.cudaDeviceDisablePeerAccess()`

780 #: is trying to disable peer addressing which has not been enabled yet

781 #: via :py:obj:`~.cudaDeviceEnablePeerAccess()`.

782 cudaErrorPeerAccessNotEnabled = cyruntime.cudaError.cudaErrorPeerAccessNotEnabled

783

784 #: This indicates that the user has called

785 #: :py:obj:`~.cudaSetValidDevices()`, :py:obj:`~.cudaSetDeviceFlags()`,

786 #: :py:obj:`~.cudaD3D9SetDirect3DDevice()`,

787 #: :py:obj:`~.cudaD3D10SetDirect3DDevice`,

788 #: :py:obj:`~.cudaD3D11SetDirect3DDevice()`, or

789 #: :py:obj:`~.cudaVDPAUSetVDPAUDevice()` after initializing the CUDA

790 #: runtime by calling non-device management operations (allocating

791 #: memory and launching kernels are examples of non-device management

792 #: operations). This error can also be returned if using runtime/driver

793 #: interoperability and there is an existing :py:obj:`~.CUcontext`

794 #: active on the host thread.

795 cudaErrorSetOnActiveProcess = cyruntime.cudaError.cudaErrorSetOnActiveProcess

796

797 #: This error indicates that the context current to the calling thread

798 #: has been destroyed using :py:obj:`~.cuCtxDestroy`, or is a primary

799 #: context which has not yet been initialized.

800 cudaErrorContextIsDestroyed = cyruntime.cudaError.cudaErrorContextIsDestroyed

801

802 #: An assert triggered in device code during kernel execution. The

803 #: device cannot be used again. All existing allocations are invalid.

804 #: To continue using CUDA, the process must be terminated and

805 #: relaunched.

806 cudaErrorAssert = cyruntime.cudaError.cudaErrorAssert

807

808 #: This error indicates that the hardware resources required to enable

809 #: peer access have been exhausted for one or more of the devices

810 #: passed to :py:obj:`~.cudaEnablePeerAccess()`.

811 cudaErrorTooManyPeers = cyruntime.cudaError.cudaErrorTooManyPeers

812

813 #: This error indicates that the memory range passed to

814 #: :py:obj:`~.cudaHostRegister()` has already been registered.

815 cudaErrorHostMemoryAlreadyRegistered = cyruntime.cudaError.cudaErrorHostMemoryAlreadyRegistered

816

817 #: This error indicates that the pointer passed to

818 #: :py:obj:`~.cudaHostUnregister()` does not correspond to any

819 #: currently registered memory region.

820 cudaErrorHostMemoryNotRegistered = cyruntime.cudaError.cudaErrorHostMemoryNotRegistered

821

822 #: Device encountered an error in the call stack during kernel

823 #: execution, possibly due to stack corruption or exceeding the stack

824 #: size limit. This leaves the process in an inconsistent state and any

825 #: further CUDA work will return the same error. To continue using

826 #: CUDA, the process must be terminated and relaunched.

827 cudaErrorHardwareStackError = cyruntime.cudaError.cudaErrorHardwareStackError

828

829 #: The device encountered an illegal instruction during kernel

830 #: execution This leaves the process in an inconsistent state and any

831 #: further CUDA work will return the same error. To continue using

832 #: CUDA, the process must be terminated and relaunched.

833 cudaErrorIllegalInstruction = cyruntime.cudaError.cudaErrorIllegalInstruction

834

835 #: The device encountered a load or store instruction on a memory

836 #: address which is not aligned. This leaves the process in an

837 #: inconsistent state and any further CUDA work will return the same

838 #: error. To continue using CUDA, the process must be terminated and

839 #: relaunched.

840 cudaErrorMisalignedAddress = cyruntime.cudaError.cudaErrorMisalignedAddress

841

842 #: While executing a kernel, the device encountered an instruction

843 #: which can only operate on memory locations in certain address spaces

844 #: (global, shared, or local), but was supplied a memory address not

845 #: belonging to an allowed address space. This leaves the process in an

846 #: inconsistent state and any further CUDA work will return the same

847 #: error. To continue using CUDA, the process must be terminated and

848 #: relaunched.

849 cudaErrorInvalidAddressSpace = cyruntime.cudaError.cudaErrorInvalidAddressSpace

850

851 #: The device encountered an invalid program counter. This leaves the

852 #: process in an inconsistent state and any further CUDA work will

853 #: return the same error. To continue using CUDA, the process must be

854 #: terminated and relaunched.

855 cudaErrorInvalidPc = cyruntime.cudaError.cudaErrorInvalidPc

856

857 #: An exception occurred on the device while executing a kernel. Common

858 #: causes include dereferencing an invalid device pointer and accessing

859 #: out of bounds shared memory. Less common cases can be system

860 #: specific - more information about these cases can be found in the

861 #: system specific user guide. This leaves the process in an

862 #: inconsistent state and any further CUDA work will return the same

863 #: error. To continue using CUDA, the process must be terminated and

864 #: relaunched.

865 cudaErrorLaunchFailure = cyruntime.cudaError.cudaErrorLaunchFailure

866

867 #: This error indicates that the number of blocks launched per grid for

868 #: a kernel that was launched via either

869 #: :py:obj:`~.cudaLaunchCooperativeKernel` exceeds the maximum number

870 #: of blocks as allowed by

871 #: :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor` or

872 #: :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`

873 #: times the number of multiprocessors as specified by the device

874 #: attribute :py:obj:`~.cudaDevAttrMultiProcessorCount`.

875 cudaErrorCooperativeLaunchTooLarge = cyruntime.cudaError.cudaErrorCooperativeLaunchTooLarge

876

877 #: An exception occurred on the device while exiting a kernel using

878 #: tensor memory: the tensor memory was not completely deallocated.

879 #: This leaves the process in an inconsistent state and any further

880 #: CUDA work will return the same error. To continue using CUDA, the

881 #: process must be terminated and relaunched.

882 cudaErrorTensorMemoryLeak = cyruntime.cudaError.cudaErrorTensorMemoryLeak

883

884 #: This error indicates the attempted operation is not permitted.

885 cudaErrorNotPermitted = cyruntime.cudaError.cudaErrorNotPermitted

886

887 #: This error indicates the attempted operation is not supported on the

888 #: current system or device.

889 cudaErrorNotSupported = cyruntime.cudaError.cudaErrorNotSupported

890

891 #: This error indicates that the system is not yet ready to start any

892 #: CUDA work. To continue using CUDA, verify the system configuration

893 #: is in a valid state and all required driver daemons are actively

894 #: running. More information about this error can be found in the

895 #: system specific user guide.

896 cudaErrorSystemNotReady = cyruntime.cudaError.cudaErrorSystemNotReady

897

898 #: This error indicates that there is a mismatch between the versions

899 #: of the display driver and the CUDA driver. Refer to the

900 #: compatibility documentation for supported versions.

901 cudaErrorSystemDriverMismatch = cyruntime.cudaError.cudaErrorSystemDriverMismatch

902

903 #: This error indicates that the system was upgraded to run with

904 #: forward compatibility but the visible hardware detected by CUDA does

905 #: not support this configuration. Refer to the compatibility

906 #: documentation for the supported hardware matrix or ensure that only

907 #: supported hardware is visible during initialization via the

908 #: CUDA_VISIBLE_DEVICES environment variable.

909 cudaErrorCompatNotSupportedOnDevice = cyruntime.cudaError.cudaErrorCompatNotSupportedOnDevice

910

911 #: This error indicates that the MPS client failed to connect to the

912 #: MPS control daemon or the MPS server.

913 cudaErrorMpsConnectionFailed = cyruntime.cudaError.cudaErrorMpsConnectionFailed

914

915 #: This error indicates that the remote procedural call between the MPS

916 #: server and the MPS client failed.

917 cudaErrorMpsRpcFailure = cyruntime.cudaError.cudaErrorMpsRpcFailure

918

919 #: This error indicates that the MPS server is not ready to accept new

920 #: MPS client requests. This error can be returned when the MPS server

921 #: is in the process of recovering from a fatal failure.

922 cudaErrorMpsServerNotReady = cyruntime.cudaError.cudaErrorMpsServerNotReady

923

924 #: This error indicates that the hardware resources required to create

925 #: MPS client have been exhausted.

926 cudaErrorMpsMaxClientsReached = cyruntime.cudaError.cudaErrorMpsMaxClientsReached

927

928 #: This error indicates the the hardware resources required to device

929 #: connections have been exhausted.

930 cudaErrorMpsMaxConnectionsReached = cyruntime.cudaError.cudaErrorMpsMaxConnectionsReached

931

932 #: This error indicates that the MPS client has been terminated by the

933 #: server. To continue using CUDA, the process must be terminated and

934 #: relaunched.

935 cudaErrorMpsClientTerminated = cyruntime.cudaError.cudaErrorMpsClientTerminated

936

937 #: This error indicates, that the program is using CUDA Dynamic

938 #: Parallelism, but the current configuration, like MPS, does not

939 #: support it.

940 cudaErrorCdpNotSupported = cyruntime.cudaError.cudaErrorCdpNotSupported

941

942 #: This error indicates, that the program contains an unsupported

943 #: interaction between different versions of CUDA Dynamic Parallelism.

944 cudaErrorCdpVersionMismatch = cyruntime.cudaError.cudaErrorCdpVersionMismatch

945

946 #: The operation is not permitted when the stream is capturing.

947 cudaErrorStreamCaptureUnsupported = cyruntime.cudaError.cudaErrorStreamCaptureUnsupported

948

949 #: The current capture sequence on the stream has been invalidated due

950 #: to a previous error.

951 cudaErrorStreamCaptureInvalidated = cyruntime.cudaError.cudaErrorStreamCaptureInvalidated

952

953 #: The operation would have resulted in a merge of two independent

954 #: capture sequences.

955 cudaErrorStreamCaptureMerge = cyruntime.cudaError.cudaErrorStreamCaptureMerge

956

957 #: The capture was not initiated in this stream.

958 cudaErrorStreamCaptureUnmatched = cyruntime.cudaError.cudaErrorStreamCaptureUnmatched

959

960 #: The capture sequence contains a fork that was not joined to the

961 #: primary stream.

962 cudaErrorStreamCaptureUnjoined = cyruntime.cudaError.cudaErrorStreamCaptureUnjoined

963

964 #: A dependency would have been created which crosses the capture

965 #: sequence boundary. Only implicit in-stream ordering dependencies are

966 #: allowed to cross the boundary.

967 cudaErrorStreamCaptureIsolation = cyruntime.cudaError.cudaErrorStreamCaptureIsolation

968

969 #: The operation would have resulted in a disallowed implicit

970 #: dependency on a current capture sequence from cudaStreamLegacy.

971 cudaErrorStreamCaptureImplicit = cyruntime.cudaError.cudaErrorStreamCaptureImplicit

972

973 #: The operation is not permitted on an event which was last recorded

974 #: in a capturing stream.

975 cudaErrorCapturedEvent = cyruntime.cudaError.cudaErrorCapturedEvent

976

977 #: A stream capture sequence not initiated with the

978 #: :py:obj:`~.cudaStreamCaptureModeRelaxed` argument to

979 #: :py:obj:`~.cudaStreamBeginCapture` was passed to

980 #: :py:obj:`~.cudaStreamEndCapture` in a different thread.

981 cudaErrorStreamCaptureWrongThread = cyruntime.cudaError.cudaErrorStreamCaptureWrongThread

982

983 #: This indicates that the wait operation has timed out.

984 cudaErrorTimeout = cyruntime.cudaError.cudaErrorTimeout

985

986 #: This error indicates that the graph update was not performed because

987 #: it included changes which violated constraints specific to

988 #: instantiated graph update.

989 cudaErrorGraphExecUpdateFailure = cyruntime.cudaError.cudaErrorGraphExecUpdateFailure

990

991 #: This indicates that an async error has occurred in a device outside

992 #: of CUDA. If CUDA was waiting for an external device's signal before

993 #: consuming shared data, the external device signaled an error

994 #: indicating that the data is not valid for consumption. This leaves

995 #: the process in an inconsistent state and any further CUDA work will

996 #: return the same error. To continue using CUDA, the process must be

997 #: terminated and relaunched.

998 cudaErrorExternalDevice = cyruntime.cudaError.cudaErrorExternalDevice

999

1000 #: This indicates that a kernel launch error has occurred due to

1001 #: cluster misconfiguration.

1002 cudaErrorInvalidClusterSize = cyruntime.cudaError.cudaErrorInvalidClusterSize

1003

1004 #: Indiciates a function handle is not loaded when calling an API that

1005 #: requires a loaded function.

1006 cudaErrorFunctionNotLoaded = cyruntime.cudaError.cudaErrorFunctionNotLoaded

1007

1008 #: This error indicates one or more resources passed in are not valid

1009 #: resource types for the operation.

1010 cudaErrorInvalidResourceType = cyruntime.cudaError.cudaErrorInvalidResourceType

1011

1012 #: This error indicates one or more resources are insufficient or non-

1013 #: applicable for the operation.

1014 cudaErrorInvalidResourceConfiguration = cyruntime.cudaError.cudaErrorInvalidResourceConfiguration

1015

1016 #: This error indicates that the requested operation is not permitted

1017 #: because the stream is in a detached state. This can occur if the

1018 #: green context associated with the stream has been destroyed,

1019 #: limiting the stream's operational capabilities.

1020 cudaErrorStreamDetached = cyruntime.cudaError.cudaErrorStreamDetached

1021

1022 #: This indicates that an unknown internal error has occurred.

1023 cudaErrorUnknown = cyruntime.cudaError.cudaErrorUnknown

1024 cudaErrorApiFailureBase = cyruntime.cudaError.cudaErrorApiFailureBase

1025

1026_dict_cudaError_t = dict(((int(v), v) for k, v in cudaError_t.__members__.items()))

1027

1028class cudaGraphDependencyType(IntEnum):

1029 """

1030 Type annotations that can be applied to graph edges as part of

1031 :py:obj:`~.cudaGraphEdgeData`.

1032 """

1033

1034 #: This is an ordinary dependency.

1035 cudaGraphDependencyTypeDefault = cyruntime.cudaGraphDependencyType_enum.cudaGraphDependencyTypeDefault

1036

1037 #: This dependency type allows the downstream node to use

1038 #: `cudaGridDependencySynchronize()`. It may only be used between

1039 #: kernel nodes, and must be used with either the

1040 #: :py:obj:`~.cudaGraphKernelNodePortProgrammatic` or

1041 #: :py:obj:`~.cudaGraphKernelNodePortLaunchCompletion` outgoing port.

1042 cudaGraphDependencyTypeProgrammatic = cyruntime.cudaGraphDependencyType_enum.cudaGraphDependencyTypeProgrammatic

1043

1044_dict_cudaGraphDependencyType = dict(((int(v), v) for k, v in cudaGraphDependencyType.__members__.items()))

1045

1046class cudaGraphInstantiateResult(IntEnum):

1047 """

1048 Graph instantiation results

1049 """

1050

1051 #: Instantiation succeeded

1052 cudaGraphInstantiateSuccess = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateSuccess

1053

1054 #: Instantiation failed for an unexpected reason which is described in

1055 #: the return value of the function

1056 cudaGraphInstantiateError = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateError

1057

1058 #: Instantiation failed due to invalid structure, such as cycles

1059 cudaGraphInstantiateInvalidStructure = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateInvalidStructure

1060

1061 #: Instantiation for device launch failed because the graph contained

1062 #: an unsupported operation

1063 cudaGraphInstantiateNodeOperationNotSupported = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateNodeOperationNotSupported

1064

1065 #: Instantiation for device launch failed due to the nodes belonging to

1066 #: different contexts

1067 cudaGraphInstantiateMultipleDevicesNotSupported = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateMultipleDevicesNotSupported

1068

1069 #: One or more conditional handles are not associated with conditional

1070 #: nodes

1071 cudaGraphInstantiateConditionalHandleUnused = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateConditionalHandleUnused

1072

1073_dict_cudaGraphInstantiateResult = dict(((int(v), v) for k, v in cudaGraphInstantiateResult.__members__.items()))

1074

1075class cudaLaunchMemSyncDomain(IntEnum):

1076 """

1077 Memory Synchronization Domain A kernel can be launched in a

1078 specified memory synchronization domain that affects all memory

1079 operations issued by that kernel. A memory barrier issued in one

1080 domain will only order memory operations in that domain, thus

1081 eliminating latency increase from memory barriers ordering

1082 unrelated traffic. By default, kernels are launched in domain 0.

1083 Kernel launched with :py:obj:`~.cudaLaunchMemSyncDomainRemote` will

1084 have a different domain ID. User may also alter the domain ID with

1085 :py:obj:`~.cudaLaunchMemSyncDomainMap` for a specific stream /

1086 graph node / kernel launch. See

1087 :py:obj:`~.cudaLaunchAttributeMemSyncDomain`,

1088 :py:obj:`~.cudaStreamSetAttribute`, :py:obj:`~.cudaLaunchKernelEx`,

1089 :py:obj:`~.cudaGraphKernelNodeSetAttribute`. Memory operations

1090 done in kernels launched in different domains are considered

1091 system-scope distanced. In other words, a GPU scoped memory

1092 synchronization is not sufficient for memory order to be observed

1093 by kernels in another memory synchronization domain even if they

1094 are on the same GPU.

1095 """

1096

1097 #: Launch kernels in the default domain

1098 cudaLaunchMemSyncDomainDefault = cyruntime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainDefault

1099

1100 #: Launch kernels in the remote domain

1101 cudaLaunchMemSyncDomainRemote = cyruntime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainRemote

1102

1103_dict_cudaLaunchMemSyncDomain = dict(((int(v), v) for k, v in cudaLaunchMemSyncDomain.__members__.items()))

1104

1105class cudaLaunchAttributeID(IntEnum):

1106 """

1107 Launch attributes enum; used as id field of

1108 :py:obj:`~.cudaLaunchAttribute`

1109 """

1110

1111 #: Ignored entry, for convenient composition

1112 cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore

1113

1114 #: Valid for streams, graph nodes, launches. See

1115 #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.

1116 cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow

1117

1118 #: Valid for graph nodes, launches. See

1119 #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.

1120 cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative

1121

1122 #: Valid for streams. See

1123 #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.

1124 cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy

1125

1126 #: Valid for graph nodes, launches. See

1127 #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.

1128 cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension

1129

1130 #: Valid for graph nodes, launches. See

1131 #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.

1132 cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference

1133

1134 #: Valid for launches. Setting

1135 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`

1136 #: to non-0 signals that the kernel will use programmatic means to

1137 #: resolve its stream dependency, so that the CUDA runtime should

1138 #: opportunistically allow the grid's execution to overlap with the

1139 #: previous kernel in the stream, if that kernel requests the overlap.

1140 #: The dependent launches can choose to wait on the dependency using

1141 #: the programmatic sync (cudaGridDependencySynchronize() or equivalent

1142 #: PTX instructions).

1143 cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization

1144

1145 #: Valid for launches. Set

1146 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the

1147 #: event. Event recorded through this launch attribute is guaranteed to

1148 #: only trigger after all block in the associated kernel trigger the

1149 #: event. A block can trigger the event programmatically in a future

1150 #: CUDA release. A trigger can also be inserted at the beginning of

1151 #: each block's execution if triggerAtBlockStart is set to non-0. The

1152 #: dependent launches can choose to wait on the dependency using the

1153 #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX

1154 #: instructions). Note that dependents (including the CPU thread

1155 #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to

1156 #: observe the release precisely when it is released. For example,

1157 #: :py:obj:`~.cudaEventSynchronize()` may only observe the event

1158 #: trigger long after the associated kernel has completed. This

1159 #: recording type is primarily meant for establishing programmatic

1160 #: dependency between device tasks. Note also this type of dependency

1161 #: allows, but does not guarantee, concurrent execution of tasks.

1162 #: The event supplied must not be an interprocess or interop event.

1163 #: The event must disable timing (i.e. must be created with the

1164 #: :py:obj:`~.cudaEventDisableTiming` flag set).

1165 cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent

1166

1167 #: Valid for streams, graph nodes, launches. See

1168 #: :py:obj:`~.cudaLaunchAttributeValue.priority`.

1169 cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority

1170

1171 #: Valid for streams, graph nodes, launches. See

1172 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.

1173 cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap

1174

1175 #: Valid for streams, graph nodes, launches. See

1176 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.

1177 cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain

1178

1179 #: Valid for graph nodes and launches. Set

1180 #: :py:obj:`~.cudaLaunchAttributeValue.preferredClusterDim` to allow

1181 #: the kernel launch to specify a preferred substitute cluster

1182 #: dimension. Blocks may be grouped according to either the dimensions

1183 #: specified with this attribute (grouped into a "preferred substitute

1184 #: cluster"), or the one specified with

1185 #: :py:obj:`~.cudaLaunchAttributeClusterDimension` attribute (grouped

1186 #: into a "regular cluster"). The cluster dimensions of a "preferred

1187 #: substitute cluster" shall be an integer multiple greater than zero

1188 #: of the regular cluster dimensions. The device will attempt - on a

1189 #: best-effort basis - to group thread blocks into preferred clusters

1190 #: over grouping them into regular clusters. When it deems necessary

1191 #: (primarily when the device temporarily runs out of physical

1192 #: resources to launch the larger preferred clusters), the device may

1193 #: switch to launch the regular clusters instead to attempt to utilize

1194 #: as much of the physical device resources as possible.

1195 #: Each type of cluster will have its enumeration / coordinate setup

1196 #: as if the grid consists solely of its type of cluster. For example,

1197 #: if the preferred substitute cluster dimensions double the regular

1198 #: cluster dimensions, there might be simultaneously a regular cluster

1199 #: indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In

1200 #: this example, the preferred substitute cluster (1,0,0) replaces

1201 #: regular clusters (2,0,0) and (3,0,0) and groups their blocks.

1202 #: This attribute will only take effect when a regular cluster

1203 #: dimension has been specified. The preferred substitute cluster

1204 #: dimension must be an integer multiple greater than zero of the

1205 #: regular cluster dimension and must divide the grid. It must also be

1206 #: no more than `maxBlocksPerCluster`, if it is set in the kernel's

1207 #: `__launch_bounds__`. Otherwise it must be less than the maximum

1208 #: value the driver can support. Otherwise, setting this attribute to a

1209 #: value physically unable to fit on any particular device is

1210 #: permitted.

1211 cudaLaunchAttributePreferredClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredClusterDimension

1212

1213 #: Valid for launches. Set

1214 #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record

1215 #: the event.

1216 #: Nominally, the event is triggered once all blocks of the kernel

1217 #: have begun execution. Currently this is a best effort. If a kernel B

1218 #: has a launch completion dependency on a kernel A, B may wait until A

1219 #: is complete. Alternatively, blocks of B may begin before all blocks

1220 #: of A have begun, for example if B can claim execution resources

1221 #: unavailable to A (e.g. they run on different GPUs) or if B is a

1222 #: higher priority than A. Exercise caution if such an ordering

1223 #: inversion could lead to deadlock.

1224 #: A launch completion event is nominally similar to a programmatic

1225 #: event with `triggerAtBlockStart` set except that it is not visible

1226 #: to `cudaGridDependencySynchronize()` and can be used with compute

1227 #: capability less than 9.0.

1228 #: The event supplied must not be an interprocess or interop event.

1229 #: The event must disable timing (i.e. must be created with the

1230 #: :py:obj:`~.cudaEventDisableTiming` flag set).

1231 cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent

1232

1233 #: Valid for graph nodes, launches. This attribute is graphs-only, and

1234 #: passing it to a launch in a non-capturing stream will result in an

1235 #: error.

1236 #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable

1237 #: can only be set to 0 or 1. Setting the field to 1 indicates that the

1238 #: corresponding kernel node should be device-updatable. On success, a

1239 #: handle will be returned via

1240 #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode

1241 #: which can be passed to the various device-side update functions to

1242 #: update the node's kernel parameters from within another kernel. For

1243 #: more information on the types of device updates that can be made, as

1244 #: well as the relevant limitations thereof, see

1245 #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.

1246 #: Nodes which are device-updatable have additional restrictions

1247 #: compared to regular kernel nodes. Firstly, device-updatable nodes

1248 #: cannot be removed from their graph via

1249 #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to

1250 #: this functionality, a node cannot opt out, and any attempt to set

1251 #: the deviceUpdatable attribute to 0 will result in an error. Device-

1252 #: updatable kernel nodes also cannot have their attributes copied

1253 #: to/from another kernel node via

1254 #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one

1255 #: or more device-updatable nodes also do not allow multiple

1256 #: instantiation, and neither the graph nor its instantiated version

1257 #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.

1258 #: If a graph contains device-updatable nodes and updates those nodes

1259 #: from the device from within the graph, the graph must be uploaded

1260 #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a

1261 #: graph, if host-side executable graph updates are made to the device-

1262 #: updatable nodes, the graph must be uploaded before it is launched

1263 #: again.

1264 cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode

1265

1266 #: Valid for launches. On devices where the L1 cache and shared memory

1267 #: use the same hardware resources, setting

1268 #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a

1269 #: percentage between 0-100 signals sets the shared memory carveout

1270 #: preference in percent of the total shared memory for that kernel

1271 #: launch. This attribute takes precedence over

1272 #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is

1273 #: only a hint, and the driver can choose a different configuration if

1274 #: required for the launch.

1275 cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout

1276

1277 #: Valid for streams, graph nodes, launches. This attribute is a hint

1278 #: to the CUDA runtime that the launch should attempt to make the

1279 #: kernel maximize its NVLINK utilization.

1280 #:

1281 #: When possible to honor this hint, CUDA will assume each block in

1282 #: the grid launch will carry out an even amount of NVLINK traffic, and

1283 #: make a best-effort attempt to adjust the kernel launch based on that

1284 #: assumption.

1285 #: This attribute is a hint only. CUDA makes no functional or

1286 #: performance guarantee. Its applicability can be affected by many

1287 #: different factors, including driver version (i.e. CUDA doesn't

1288 #: guarantee the performance characteristics will be maintained between

1289 #: driver versions or a driver update could alter or regress previously

1290 #: observed perf characteristics.) It also doesn't guarantee a

1291 #: successful result, i.e. applying the attribute may not improve the

1292 #: performance of either the targeted kernel or the encapsulating

1293 #: application.

1294 #: Valid values for

1295 #: :py:obj:`~.cudaLaunchAttributeValue.nvlinkUtilCentricScheduling` are

1296 #: 0 (disabled) and 1 (enabled).

1297 cudaLaunchAttributeNvlinkUtilCentricScheduling = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeNvlinkUtilCentricScheduling

1298

1299_dict_cudaLaunchAttributeID = dict(((int(v), v) for k, v in cudaLaunchAttributeID.__members__.items()))

1300

1301class cudaAsyncNotificationType(IntEnum):

1302 """

1303 Types of async notification that can occur

1304 """

1305

1306 #: Sent when the process has exceeded its device memory budget

1307 cudaAsyncNotificationTypeOverBudget = cyruntime.cudaAsyncNotificationType_enum.cudaAsyncNotificationTypeOverBudget

1308

1309_dict_cudaAsyncNotificationType = dict(((int(v), v) for k, v in cudaAsyncNotificationType.__members__.items()))

1310

1311class cudaLogLevel(IntEnum):

1312 """

1313

1314 """

1315 cudaLogLevelError = cyruntime.CUDAlogLevel_enum.cudaLogLevelError

1316 cudaLogLevelWarning = cyruntime.CUDAlogLevel_enum.cudaLogLevelWarning

1317

1318_dict_cudaLogLevel = dict(((int(v), v) for k, v in cudaLogLevel.__members__.items()))

1319

1320class cudaDataType(IntEnum):

1321 """"""

1322 CUDA_R_32F = cyruntime.cudaDataType_t.CUDA_R_32F

1323 CUDA_R_64F = cyruntime.cudaDataType_t.CUDA_R_64F

1324 CUDA_R_16F = cyruntime.cudaDataType_t.CUDA_R_16F

1325 CUDA_R_8I = cyruntime.cudaDataType_t.CUDA_R_8I

1326 CUDA_C_32F = cyruntime.cudaDataType_t.CUDA_C_32F

1327 CUDA_C_64F = cyruntime.cudaDataType_t.CUDA_C_64F

1328 CUDA_C_16F = cyruntime.cudaDataType_t.CUDA_C_16F

1329 CUDA_C_8I = cyruntime.cudaDataType_t.CUDA_C_8I

1330 CUDA_R_8U = cyruntime.cudaDataType_t.CUDA_R_8U

1331 CUDA_C_8U = cyruntime.cudaDataType_t.CUDA_C_8U

1332 CUDA_R_32I = cyruntime.cudaDataType_t.CUDA_R_32I

1333 CUDA_C_32I = cyruntime.cudaDataType_t.CUDA_C_32I

1334 CUDA_R_32U = cyruntime.cudaDataType_t.CUDA_R_32U

1335 CUDA_C_32U = cyruntime.cudaDataType_t.CUDA_C_32U

1336 CUDA_R_16BF = cyruntime.cudaDataType_t.CUDA_R_16BF

1337 CUDA_C_16BF = cyruntime.cudaDataType_t.CUDA_C_16BF

1338 CUDA_R_4I = cyruntime.cudaDataType_t.CUDA_R_4I

1339 CUDA_C_4I = cyruntime.cudaDataType_t.CUDA_C_4I

1340 CUDA_R_4U = cyruntime.cudaDataType_t.CUDA_R_4U

1341 CUDA_C_4U = cyruntime.cudaDataType_t.CUDA_C_4U

1342 CUDA_R_16I = cyruntime.cudaDataType_t.CUDA_R_16I

1343 CUDA_C_16I = cyruntime.cudaDataType_t.CUDA_C_16I

1344 CUDA_R_16U = cyruntime.cudaDataType_t.CUDA_R_16U

1345 CUDA_C_16U = cyruntime.cudaDataType_t.CUDA_C_16U

1346 CUDA_R_64I = cyruntime.cudaDataType_t.CUDA_R_64I

1347 CUDA_C_64I = cyruntime.cudaDataType_t.CUDA_C_64I

1348 CUDA_R_64U = cyruntime.cudaDataType_t.CUDA_R_64U

1349 CUDA_C_64U = cyruntime.cudaDataType_t.CUDA_C_64U

1350 CUDA_R_8F_E4M3 = cyruntime.cudaDataType_t.CUDA_R_8F_E4M3

1351 CUDA_R_8F_UE4M3 = cyruntime.cudaDataType_t.CUDA_R_8F_UE4M3

1352 CUDA_R_8F_E5M2 = cyruntime.cudaDataType_t.CUDA_R_8F_E5M2

1353 CUDA_R_8F_UE8M0 = cyruntime.cudaDataType_t.CUDA_R_8F_UE8M0

1354 CUDA_R_6F_E2M3 = cyruntime.cudaDataType_t.CUDA_R_6F_E2M3

1355 CUDA_R_6F_E3M2 = cyruntime.cudaDataType_t.CUDA_R_6F_E3M2

1356 CUDA_R_4F_E2M1 = cyruntime.cudaDataType_t.CUDA_R_4F_E2M1

1357

1358_dict_cudaDataType = dict(((int(v), v) for k, v in cudaDataType.__members__.items()))

1359

1360class cudaEmulationStrategy(IntEnum):

1361 """"""

1362 CUDA_EMULATION_STRATEGY_DEFAULT = cyruntime.cudaEmulationStrategy_t.CUDA_EMULATION_STRATEGY_DEFAULT

1363 CUDA_EMULATION_STRATEGY_PERFORMANT = cyruntime.cudaEmulationStrategy_t.CUDA_EMULATION_STRATEGY_PERFORMANT

1364 CUDA_EMULATION_STRATEGY_EAGER = cyruntime.cudaEmulationStrategy_t.CUDA_EMULATION_STRATEGY_EAGER

1365

1366_dict_cudaEmulationStrategy = dict(((int(v), v) for k, v in cudaEmulationStrategy.__members__.items()))

1367

1368class cudaEmulationMantissaControl(IntEnum):

1369 """"""

1370 CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC = cyruntime.cudaEmulationMantissaControl_t.CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC

1371 CUDA_EMULATION_MANTISSA_CONTROL_FIXED = cyruntime.cudaEmulationMantissaControl_t.CUDA_EMULATION_MANTISSA_CONTROL_FIXED

1372

1373_dict_cudaEmulationMantissaControl = dict(((int(v), v) for k, v in cudaEmulationMantissaControl.__members__.items()))

1374

1375class cudaEmulationSpecialValuesSupport(IntEnum):

1376 """"""

1377 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NONE = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NONE

1378 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_INFINITY = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_INFINITY

1379 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NAN = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NAN

1380 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_DEFAULT = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_DEFAULT

1381

1382_dict_cudaEmulationSpecialValuesSupport = dict(((int(v), v) for k, v in cudaEmulationSpecialValuesSupport.__members__.items()))

1383

1384class libraryPropertyType(IntEnum):

1385 """"""

1386 MAJOR_VERSION = cyruntime.libraryPropertyType_t.MAJOR_VERSION

1387 MINOR_VERSION = cyruntime.libraryPropertyType_t.MINOR_VERSION

1388 PATCH_LEVEL = cyruntime.libraryPropertyType_t.PATCH_LEVEL

1389

1390_dict_libraryPropertyType = dict(((int(v), v) for k, v in libraryPropertyType.__members__.items()))

1391

1392class cudaEglFrameType(IntEnum):

1393 """

1394 CUDA EglFrame type - array or pointer

1395 """

1396

1397 #: Frame type CUDA array

1398 cudaEglFrameTypeArray = cyruntime.cudaEglFrameType_enum.cudaEglFrameTypeArray

1399

1400 #: Frame type CUDA pointer

1401 cudaEglFrameTypePitch = cyruntime.cudaEglFrameType_enum.cudaEglFrameTypePitch

1402

1403_dict_cudaEglFrameType = dict(((int(v), v) for k, v in cudaEglFrameType.__members__.items()))

1404

1405class cudaEglResourceLocationFlags(IntEnum):

1406 """

1407 Resource location flags- sysmem or vidmem For CUDA context on

1408 iGPU, since video and system memory are equivalent - these flags

1409 will not have an effect on the execution. For CUDA context on

1410 dGPU, applications can use the flag

1411 :py:obj:`~.cudaEglResourceLocationFlags` to give a hint about the

1412 desired location. :py:obj:`~.cudaEglResourceLocationSysmem` - the

1413 frame data is made resident on the system memory to be accessed by

1414 CUDA. :py:obj:`~.cudaEglResourceLocationVidmem` - the frame data

1415 is made resident on the dedicated video memory to be accessed by

1416 CUDA. There may be an additional latency due to new allocation and

1417 data migration, if the frame is produced on a different memory.

1418 """

1419

1420 #: Resource location sysmem

1421 cudaEglResourceLocationSysmem = cyruntime.cudaEglResourceLocationFlags_enum.cudaEglResourceLocationSysmem

1422

1423 #: Resource location vidmem

1424 cudaEglResourceLocationVidmem = cyruntime.cudaEglResourceLocationFlags_enum.cudaEglResourceLocationVidmem

1425

1426_dict_cudaEglResourceLocationFlags = dict(((int(v), v) for k, v in cudaEglResourceLocationFlags.__members__.items()))

1427

1428class cudaEglColorFormat(IntEnum):

1429 """

1430 CUDA EGL Color Format - The different planar and multiplanar

1431 formats currently supported for CUDA_EGL interops.

1432 """

1433

1434 #: Y, U, V in three surfaces, each in a separate surface, U/V width =

1435 #: 1/2 Y width, U/V height = 1/2 Y height.

1436 cudaEglColorFormatYUV420Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar

1437

1438 #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,

1439 #: width, height ratio same as YUV420Planar.

1440 cudaEglColorFormatYUV420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar

1441

1442 #: Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V

1443 #: height = Y height.

1444 cudaEglColorFormatYUV422Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422Planar

1445

1446 #: Y, UV in two surfaces with VU byte ordering, width, height ratio

1447 #: same as YUV422Planar.

1448 cudaEglColorFormatYUV422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422SemiPlanar

1449

1450 #: R/G/B/A four channels in one surface with BGRA byte ordering.

1451 cudaEglColorFormatARGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatARGB

1452

1453 #: R/G/B/A four channels in one surface with ABGR byte ordering.

1454 cudaEglColorFormatRGBA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatRGBA

1455

1456 #: single luminance channel in one surface.

1457 cudaEglColorFormatL = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatL

1458

1459 #: single color channel in one surface.

1460 cudaEglColorFormatR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatR

1461

1462 #: Y, U, V in three surfaces, each in a separate surface, U/V width = Y

1463 #: width, U/V height = Y height.

1464 cudaEglColorFormatYUV444Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444Planar

1465

1466 #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,

1467 #: width, height ratio same as YUV444Planar.

1468 cudaEglColorFormatYUV444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444SemiPlanar

1469

1470 #: Y, U, V in one surface, interleaved as UYVY in one channel.

1471 cudaEglColorFormatYUYV422 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUYV422

1472

1473 #: Y, U, V in one surface, interleaved as YUYV in one channel.

1474 cudaEglColorFormatUYVY422 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY422

1475

1476 #: R/G/B/A four channels in one surface with RGBA byte ordering.

1477 cudaEglColorFormatABGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatABGR

1478

1479 #: R/G/B/A four channels in one surface with ARGB byte ordering.

1480 cudaEglColorFormatBGRA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBGRA

1481

1482 #: Alpha color format - one channel in one surface.

1483 cudaEglColorFormatA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatA

1484

1485 #: R/G color format - two channels in one surface with GR byte ordering

1486 cudaEglColorFormatRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatRG

1487

1488 #: Y, U, V, A four channels in one surface, interleaved as VUYA.

1489 cudaEglColorFormatAYUV = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatAYUV

1490

1491 #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V

1492 #: width = Y width, U/V height = Y height.

1493 cudaEglColorFormatYVU444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444SemiPlanar

1494

1495 #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V

1496 #: width = 1/2 Y width, U/V height = Y height.

1497 cudaEglColorFormatYVU422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422SemiPlanar

1498

1499 #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V

1500 #: width = 1/2 Y width, U/V height = 1/2 Y height.

1501 cudaEglColorFormatYVU420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar

1502

1503 #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte

1504 #: ordering, U/V width = Y width, U/V height = Y height.

1505 cudaEglColorFormatY10V10U10_444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar

1506

1507 #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte

1508 #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.

1509 cudaEglColorFormatY10V10U10_420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar

1510

1511 #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte

1512 #: ordering, U/V width = Y width, U/V height = Y height.

1513 cudaEglColorFormatY12V12U12_444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar

1514

1515 #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte

1516 #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.

1517 cudaEglColorFormatY12V12U12_420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar

1518

1519 #: Extended Range Y, U, V in one surface, interleaved as YVYU in one

1520 #: channel.

1521 cudaEglColorFormatVYUY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatVYUY_ER

1522

1523 #: Extended Range Y, U, V in one surface, interleaved as YUYV in one

1524 #: channel.

1525 cudaEglColorFormatUYVY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY_ER

1526

1527 #: Extended Range Y, U, V in one surface, interleaved as UYVY in one

1528 #: channel.

1529 cudaEglColorFormatYUYV_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUYV_ER

1530

1531 #: Extended Range Y, U, V in one surface, interleaved as VYUY in one

1532 #: channel.

1533 cudaEglColorFormatYVYU_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVYU_ER

1534

1535 #: Extended Range Y, U, V, A four channels in one surface, interleaved

1536 #: as AVUY.

1537 cudaEglColorFormatYUVA_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUVA_ER

1538

1539 #: Extended Range Y, U, V, A four channels in one surface, interleaved

1540 #: as VUYA.

1541 cudaEglColorFormatAYUV_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatAYUV_ER

1542

1543 #: Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V

1544 #: height = Y height.

1545 cudaEglColorFormatYUV444Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444Planar_ER

1546

1547 #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,

1548 #: U/V height = Y height.

1549 cudaEglColorFormatYUV422Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422Planar_ER

1550

1551 #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,

1552 #: U/V height = 1/2 Y height.

1553 cudaEglColorFormatYUV420Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_ER

1554

1555 #: Extended Range Y, UV in two surfaces (UV as one surface) with VU

1556 #: byte ordering, U/V width = Y width, U/V height = Y height.

1557 cudaEglColorFormatYUV444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444SemiPlanar_ER

1558

1559 #: Extended Range Y, UV in two surfaces (UV as one surface) with VU

1560 #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.

1561 cudaEglColorFormatYUV422SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422SemiPlanar_ER

1562

1563 #: Extended Range Y, UV in two surfaces (UV as one surface) with VU

1564 #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.

1565 cudaEglColorFormatYUV420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_ER

1566

1567 #: Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V

1568 #: height = Y height.

1569 cudaEglColorFormatYVU444Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444Planar_ER

1570

1571 #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,

1572 #: U/V height = Y height.

1573 cudaEglColorFormatYVU422Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422Planar_ER

1574

1575 #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,

1576 #: U/V height = 1/2 Y height.

1577 cudaEglColorFormatYVU420Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_ER

1578

1579 #: Extended Range Y, VU in two surfaces (VU as one surface) with UV

1580 #: byte ordering, U/V width = Y width, U/V height = Y height.

1581 cudaEglColorFormatYVU444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444SemiPlanar_ER

1582

1583 #: Extended Range Y, VU in two surfaces (VU as one surface) with UV

1584 #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.

1585 cudaEglColorFormatYVU422SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422SemiPlanar_ER

1586

1587 #: Extended Range Y, VU in two surfaces (VU as one surface) with UV

1588 #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.

1589 cudaEglColorFormatYVU420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_ER

1590

1591 #: Bayer format - one channel in one surface with interleaved RGGB

1592 #: ordering.

1593 cudaEglColorFormatBayerRGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerRGGB

1594

1595 #: Bayer format - one channel in one surface with interleaved BGGR

1596 #: ordering.

1597 cudaEglColorFormatBayerBGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerBGGR

1598

1599 #: Bayer format - one channel in one surface with interleaved GRBG

1600 #: ordering.

1601 cudaEglColorFormatBayerGRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerGRBG

1602

1603 #: Bayer format - one channel in one surface with interleaved GBRG

1604 #: ordering.

1605 cudaEglColorFormatBayerGBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerGBRG

1606

1607 #: Bayer10 format - one channel in one surface with interleaved RGGB

1608 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.

1609 cudaEglColorFormatBayer10RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10RGGB

1610

1611 #: Bayer10 format - one channel in one surface with interleaved BGGR

1612 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.

1613 cudaEglColorFormatBayer10BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10BGGR

1614

1615 #: Bayer10 format - one channel in one surface with interleaved GRBG

1616 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.

1617 cudaEglColorFormatBayer10GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10GRBG

1618

1619 #: Bayer10 format - one channel in one surface with interleaved GBRG

1620 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.

1621 cudaEglColorFormatBayer10GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10GBRG

1622

1623 #: Bayer12 format - one channel in one surface with interleaved RGGB

1624 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1625 cudaEglColorFormatBayer12RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12RGGB

1626

1627 #: Bayer12 format - one channel in one surface with interleaved BGGR

1628 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1629 cudaEglColorFormatBayer12BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12BGGR

1630

1631 #: Bayer12 format - one channel in one surface with interleaved GRBG

1632 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1633 cudaEglColorFormatBayer12GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12GRBG

1634

1635 #: Bayer12 format - one channel in one surface with interleaved GBRG

1636 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1637 cudaEglColorFormatBayer12GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12GBRG

1638

1639 #: Bayer14 format - one channel in one surface with interleaved RGGB

1640 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.

1641 cudaEglColorFormatBayer14RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14RGGB

1642

1643 #: Bayer14 format - one channel in one surface with interleaved BGGR

1644 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.

1645 cudaEglColorFormatBayer14BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14BGGR

1646

1647 #: Bayer14 format - one channel in one surface with interleaved GRBG

1648 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.

1649 cudaEglColorFormatBayer14GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14GRBG

1650

1651 #: Bayer14 format - one channel in one surface with interleaved GBRG

1652 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.

1653 cudaEglColorFormatBayer14GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14GBRG

1654

1655 #: Bayer20 format - one channel in one surface with interleaved RGGB

1656 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.

1657 cudaEglColorFormatBayer20RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20RGGB

1658

1659 #: Bayer20 format - one channel in one surface with interleaved BGGR

1660 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.

1661 cudaEglColorFormatBayer20BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20BGGR

1662

1663 #: Bayer20 format - one channel in one surface with interleaved GRBG

1664 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.

1665 cudaEglColorFormatBayer20GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20GRBG

1666

1667 #: Bayer20 format - one channel in one surface with interleaved GBRG

1668 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.

1669 cudaEglColorFormatBayer20GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20GBRG

1670

1671 #: Y, V, U in three surfaces, each in a separate surface, U/V width = Y

1672 #: width, U/V height = Y height.

1673 cudaEglColorFormatYVU444Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444Planar

1674

1675 #: Y, V, U in three surfaces, each in a separate surface, U/V width =

1676 #: 1/2 Y width, U/V height = Y height.

1677 cudaEglColorFormatYVU422Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422Planar

1678

1679 #: Y, V, U in three surfaces, each in a separate surface, U/V width =

1680 #: 1/2 Y width, U/V height = 1/2 Y height.

1681 cudaEglColorFormatYVU420Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar

1682

1683 #: Nvidia proprietary Bayer ISP format - one channel in one surface

1684 #: with interleaved RGGB ordering and mapped to opaque integer

1685 #: datatype.

1686 cudaEglColorFormatBayerIspRGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspRGGB

1687

1688 #: Nvidia proprietary Bayer ISP format - one channel in one surface

1689 #: with interleaved BGGR ordering and mapped to opaque integer

1690 #: datatype.

1691 cudaEglColorFormatBayerIspBGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspBGGR

1692

1693 #: Nvidia proprietary Bayer ISP format - one channel in one surface

1694 #: with interleaved GRBG ordering and mapped to opaque integer

1695 #: datatype.

1696 cudaEglColorFormatBayerIspGRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspGRBG

1697

1698 #: Nvidia proprietary Bayer ISP format - one channel in one surface

1699 #: with interleaved GBRG ordering and mapped to opaque integer

1700 #: datatype.

1701 cudaEglColorFormatBayerIspGBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspGBRG

1702

1703 #: Bayer format - one channel in one surface with interleaved BCCR

1704 #: ordering.

1705 cudaEglColorFormatBayerBCCR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerBCCR

1706

1707 #: Bayer format - one channel in one surface with interleaved RCCB

1708 #: ordering.

1709 cudaEglColorFormatBayerRCCB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerRCCB

1710

1711 #: Bayer format - one channel in one surface with interleaved CRBC

1712 #: ordering.

1713 cudaEglColorFormatBayerCRBC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerCRBC

1714

1715 #: Bayer format - one channel in one surface with interleaved CBRC

1716 #: ordering.

1717 cudaEglColorFormatBayerCBRC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerCBRC

1718

1719 #: Bayer10 format - one channel in one surface with interleaved CCCC

1720 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.

1721 cudaEglColorFormatBayer10CCCC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10CCCC

1722

1723 #: Bayer12 format - one channel in one surface with interleaved BCCR

1724 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1725 cudaEglColorFormatBayer12BCCR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12BCCR

1726

1727 #: Bayer12 format - one channel in one surface with interleaved RCCB

1728 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1729 cudaEglColorFormatBayer12RCCB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12RCCB

1730

1731 #: Bayer12 format - one channel in one surface with interleaved CRBC

1732 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1733 cudaEglColorFormatBayer12CRBC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CRBC

1734

1735 #: Bayer12 format - one channel in one surface with interleaved CBRC

1736 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1737 cudaEglColorFormatBayer12CBRC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CBRC

1738

1739 #: Bayer12 format - one channel in one surface with interleaved CCCC

1740 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.

1741 cudaEglColorFormatBayer12CCCC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CCCC

1742

1743 #: Color format for single Y plane.

1744 cudaEglColorFormatY = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY

1745

1746 #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,

1747 #: U/V height = 1/2 Y height.

1748 cudaEglColorFormatYUV420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_2020

1749

1750 #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,

1751 #: U/V height = 1/2 Y height.

1752 cudaEglColorFormatYVU420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_2020

1753

1754 #: Y, U, V in three surfaces, each in a separate surface, U/V width =

1755 #: 1/2 Y width, U/V height = 1/2 Y height.

1756 cudaEglColorFormatYUV420Planar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_2020

1757

1758 #: Y, V, U in three surfaces, each in a separate surface, U/V width =

1759 #: 1/2 Y width, U/V height = 1/2 Y height.

1760 cudaEglColorFormatYVU420Planar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_2020

1761

1762 #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,

1763 #: U/V height = 1/2 Y height.

1764 cudaEglColorFormatYUV420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_709

1765

1766 #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,

1767 #: U/V height = 1/2 Y height.

1768 cudaEglColorFormatYVU420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_709

1769

1770 #: Y, U, V in three surfaces, each in a separate surface, U/V width =

1771 #: 1/2 Y width, U/V height = 1/2 Y height.

1772 cudaEglColorFormatYUV420Planar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_709

1773

1774 #: Y, V, U in three surfaces, each in a separate surface, U/V width =

1775 #: 1/2 Y width, U/V height = 1/2 Y height.

1776 cudaEglColorFormatYVU420Planar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_709

1777

1778 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y

1779 #: width, U/V height = 1/2 Y height.

1780 cudaEglColorFormatY10V10U10_420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_709

1781

1782 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y

1783 #: width, U/V height = 1/2 Y height.

1784 cudaEglColorFormatY10V10U10_420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_2020

1785

1786 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y

1787 #: width, U/V height = Y height.

1788 cudaEglColorFormatY10V10U10_422SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar_2020

1789

1790 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y

1791 #: width, U/V height = Y height.

1792 cudaEglColorFormatY10V10U10_422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar

1793

1794 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y

1795 #: width, U/V height = Y height.

1796 cudaEglColorFormatY10V10U10_422SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar_709

1797

1798 #: Extended Range Color format for single Y plane.

1799 cudaEglColorFormatY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY_ER

1800

1801 #: Extended Range Color format for single Y plane.

1802 cudaEglColorFormatY_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY_709_ER

1803

1804 #: Extended Range Color format for single Y10 plane.

1805 cudaEglColorFormatY10_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10_ER

1806

1807 #: Extended Range Color format for single Y10 plane.

1808 cudaEglColorFormatY10_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10_709_ER

1809

1810 #: Extended Range Color format for single Y12 plane.

1811 cudaEglColorFormatY12_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12_ER

1812

1813 #: Extended Range Color format for single Y12 plane.

1814 cudaEglColorFormatY12_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12_709_ER

1815

1816 #: Y, U, V, A four channels in one surface, interleaved as AVUY.

1817 cudaEglColorFormatYUVA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUVA

1818

1819 #: Y, U, V in one surface, interleaved as YVYU in one channel.

1820 cudaEglColorFormatYVYU = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVYU

1821

1822 #: Y, U, V in one surface, interleaved as VYUY in one channel.

1823 cudaEglColorFormatVYUY = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatVYUY

1824

1825 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V

1826 #: width = 1/2 Y width, U/V height = 1/2 Y height.

1827 cudaEglColorFormatY10V10U10_420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_ER

1828

1829 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V

1830 #: width = 1/2 Y width, U/V height = 1/2 Y height.

1831 cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER

1832

1833 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V

1834 #: width = Y width, U/V height = Y height.

1835 cudaEglColorFormatY10V10U10_444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar_ER

1836

1837 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V

1838 #: width = Y width, U/V height = Y height.

1839 cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER

1840

1841 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V

1842 #: width = 1/2 Y width, U/V height = 1/2 Y height.

1843 cudaEglColorFormatY12V12U12_420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar_ER

1844

1845 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V

1846 #: width = 1/2 Y width, U/V height = 1/2 Y height.

1847 cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER

1848

1849 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V

1850 #: width = Y width, U/V height = Y height.

1851 cudaEglColorFormatY12V12U12_444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar_ER

1852

1853 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V

1854 #: width = Y width, U/V height = Y height.

1855 cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER

1856

1857 #: Y, U, V in one surface, interleaved as UYVY in one channel.

1858 cudaEglColorFormatUYVY709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY709

1859

1860 #: Extended Range Y, U, V in one surface, interleaved as UYVY in one

1861 #: channel.

1862 cudaEglColorFormatUYVY709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY709_ER

1863

1864 #: Y, U, V in one surface, interleaved as UYVY in one channel.

1865 cudaEglColorFormatUYVY2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY2020

1866

1867_dict_cudaEglColorFormat = dict(((int(v), v) for k, v in cudaEglColorFormat.__members__.items()))

1868

1869class cudaChannelFormatKind(IntEnum):

1870 """

1871 Channel format kind

1872 """

1873

1874 #: Signed channel format

1875 cudaChannelFormatKindSigned = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSigned

1876

1877 #: Unsigned channel format

1878 cudaChannelFormatKindUnsigned = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsigned

1879

1880 #: Float channel format

1881 cudaChannelFormatKindFloat = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindFloat

1882

1883 #: No channel format

1884 cudaChannelFormatKindNone = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindNone

1885

1886 #: Unsigned 8-bit integers, planar 4:2:0 YUV format

1887 cudaChannelFormatKindNV12 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindNV12

1888

1889 #: 1 channel unsigned 8-bit normalized integer

1890 cudaChannelFormatKindUnsignedNormalized8X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X1

1891

1892 #: 2 channel unsigned 8-bit normalized integer

1893 cudaChannelFormatKindUnsignedNormalized8X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X2

1894

1895 #: 4 channel unsigned 8-bit normalized integer

1896 cudaChannelFormatKindUnsignedNormalized8X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X4

1897

1898 #: 1 channel unsigned 16-bit normalized integer

1899 cudaChannelFormatKindUnsignedNormalized16X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X1

1900

1901 #: 2 channel unsigned 16-bit normalized integer

1902 cudaChannelFormatKindUnsignedNormalized16X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X2

1903

1904 #: 4 channel unsigned 16-bit normalized integer

1905 cudaChannelFormatKindUnsignedNormalized16X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X4

1906

1907 #: 1 channel signed 8-bit normalized integer

1908 cudaChannelFormatKindSignedNormalized8X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X1

1909

1910 #: 2 channel signed 8-bit normalized integer

1911 cudaChannelFormatKindSignedNormalized8X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X2

1912

1913 #: 4 channel signed 8-bit normalized integer

1914 cudaChannelFormatKindSignedNormalized8X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X4

1915

1916 #: 1 channel signed 16-bit normalized integer

1917 cudaChannelFormatKindSignedNormalized16X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X1

1918

1919 #: 2 channel signed 16-bit normalized integer

1920 cudaChannelFormatKindSignedNormalized16X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X2

1921

1922 #: 4 channel signed 16-bit normalized integer

1923 cudaChannelFormatKindSignedNormalized16X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X4

1924

1925 #: 4 channel unsigned normalized block-compressed (BC1 compression)

1926 #: format

1927 cudaChannelFormatKindUnsignedBlockCompressed1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1

1928

1929 #: 4 channel unsigned normalized block-compressed (BC1 compression)

1930 #: format with sRGB encoding

1931 cudaChannelFormatKindUnsignedBlockCompressed1SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB

1932

1933 #: 4 channel unsigned normalized block-compressed (BC2 compression)

1934 #: format

1935 cudaChannelFormatKindUnsignedBlockCompressed2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2

1936

1937 #: 4 channel unsigned normalized block-compressed (BC2 compression)

1938 #: format with sRGB encoding

1939 cudaChannelFormatKindUnsignedBlockCompressed2SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB

1940

1941 #: 4 channel unsigned normalized block-compressed (BC3 compression)

1942 #: format

1943 cudaChannelFormatKindUnsignedBlockCompressed3 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3

1944

1945 #: 4 channel unsigned normalized block-compressed (BC3 compression)

1946 #: format with sRGB encoding

1947 cudaChannelFormatKindUnsignedBlockCompressed3SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB

1948

1949 #: 1 channel unsigned normalized block-compressed (BC4 compression)

1950 #: format

1951 cudaChannelFormatKindUnsignedBlockCompressed4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4

1952

1953 #: 1 channel signed normalized block-compressed (BC4 compression)

1954 #: format

1955 cudaChannelFormatKindSignedBlockCompressed4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4

1956

1957 #: 2 channel unsigned normalized block-compressed (BC5 compression)

1958 #: format

1959 cudaChannelFormatKindUnsignedBlockCompressed5 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5

1960

1961 #: 2 channel signed normalized block-compressed (BC5 compression)

1962 #: format

1963 cudaChannelFormatKindSignedBlockCompressed5 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5

1964

1965 #: 3 channel unsigned half-float block-compressed (BC6H compression)

1966 #: format

1967 cudaChannelFormatKindUnsignedBlockCompressed6H = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H

1968

1969 #: 3 channel signed half-float block-compressed (BC6H compression)

1970 #: format

1971 cudaChannelFormatKindSignedBlockCompressed6H = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H

1972

1973 #: 4 channel unsigned normalized block-compressed (BC7 compression)

1974 #: format

1975 cudaChannelFormatKindUnsignedBlockCompressed7 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7

1976

1977 #: 4 channel unsigned normalized block-compressed (BC7 compression)

1978 #: format with sRGB encoding

1979 cudaChannelFormatKindUnsignedBlockCompressed7SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB

1980

1981 #: 4 channel unsigned normalized (10-bit, 10-bit, 10-bit, 2-bit) format

1982 cudaChannelFormatKindUnsignedNormalized1010102 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized1010102

1983

1984_dict_cudaChannelFormatKind = dict(((int(v), v) for k, v in cudaChannelFormatKind.__members__.items()))

1985

1986class cudaMemoryType(IntEnum):

1987 """

1988 CUDA memory types

1989 """

1990

1991 #: Unregistered memory

1992 cudaMemoryTypeUnregistered = cyruntime.cudaMemoryType.cudaMemoryTypeUnregistered

1993

1994 #: Host memory

1995 cudaMemoryTypeHost = cyruntime.cudaMemoryType.cudaMemoryTypeHost

1996

1997 #: Device memory

1998 cudaMemoryTypeDevice = cyruntime.cudaMemoryType.cudaMemoryTypeDevice

1999

2000 #: Managed memory

2001 cudaMemoryTypeManaged = cyruntime.cudaMemoryType.cudaMemoryTypeManaged

2002

2003_dict_cudaMemoryType = dict(((int(v), v) for k, v in cudaMemoryType.__members__.items()))

2004

2005class cudaMemcpyKind(IntEnum):

2006 """

2007 CUDA memory copy types

2008 """

2009

2010 #: Host -> Host

2011 cudaMemcpyHostToHost = cyruntime.cudaMemcpyKind.cudaMemcpyHostToHost

2012

2013 #: Host -> Device

2014 cudaMemcpyHostToDevice = cyruntime.cudaMemcpyKind.cudaMemcpyHostToDevice

2015

2016 #: Device -> Host

2017 cudaMemcpyDeviceToHost = cyruntime.cudaMemcpyKind.cudaMemcpyDeviceToHost

2018

2019 #: Device -> Device

2020 cudaMemcpyDeviceToDevice = cyruntime.cudaMemcpyKind.cudaMemcpyDeviceToDevice

2021

2022 #: Direction of the transfer is inferred from the pointer values.

2023 #: Requires unified virtual addressing

2024 cudaMemcpyDefault = cyruntime.cudaMemcpyKind.cudaMemcpyDefault

2025

2026_dict_cudaMemcpyKind = dict(((int(v), v) for k, v in cudaMemcpyKind.__members__.items()))

2027

2028class cudaAccessProperty(IntEnum):

2029 """

2030 Specifies performance hint with :py:obj:`~.cudaAccessPolicyWindow`

2031 for hitProp and missProp members.

2032 """

2033

2034 #: Normal cache persistence.

2035 cudaAccessPropertyNormal = cyruntime.cudaAccessProperty.cudaAccessPropertyNormal

2036

2037 #: Streaming access is less likely to persit from cache.

2038 cudaAccessPropertyStreaming = cyruntime.cudaAccessProperty.cudaAccessPropertyStreaming

2039

2040 #: Persisting access is more likely to persist in cache.

2041 cudaAccessPropertyPersisting = cyruntime.cudaAccessProperty.cudaAccessPropertyPersisting

2042

2043_dict_cudaAccessProperty = dict(((int(v), v) for k, v in cudaAccessProperty.__members__.items()))

2044

2045class cudaStreamCaptureStatus(IntEnum):

2046 """

2047 Possible stream capture statuses returned by

2048 :py:obj:`~.cudaStreamIsCapturing`

2049 """

2050

2051 #: Stream is not capturing

2052 cudaStreamCaptureStatusNone = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusNone

2053

2054 #: Stream is actively capturing

2055 cudaStreamCaptureStatusActive = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusActive

2056

2057 #: Stream is part of a capture sequence that has been invalidated, but

2058 #: not terminated

2059 cudaStreamCaptureStatusInvalidated = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusInvalidated

2060

2061_dict_cudaStreamCaptureStatus = dict(((int(v), v) for k, v in cudaStreamCaptureStatus.__members__.items()))

2062

2063class cudaStreamCaptureMode(IntEnum):

2064 """

2065 Possible modes for stream capture thread interactions. For more

2066 details see :py:obj:`~.cudaStreamBeginCapture` and

2067 :py:obj:`~.cudaThreadExchangeStreamCaptureMode`

2068 """

2069 cudaStreamCaptureModeGlobal = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal

2070 cudaStreamCaptureModeThreadLocal = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeThreadLocal

2071 cudaStreamCaptureModeRelaxed = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeRelaxed

2072

2073_dict_cudaStreamCaptureMode = dict(((int(v), v) for k, v in cudaStreamCaptureMode.__members__.items()))

2074

2075class cudaSynchronizationPolicy(IntEnum):

2076 """

2077

2078 """

2079 cudaSyncPolicyAuto = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyAuto

2080 cudaSyncPolicySpin = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicySpin

2081 cudaSyncPolicyYield = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyYield

2082 cudaSyncPolicyBlockingSync = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyBlockingSync

2083

2084_dict_cudaSynchronizationPolicy = dict(((int(v), v) for k, v in cudaSynchronizationPolicy.__members__.items()))

2085

2086class cudaClusterSchedulingPolicy(IntEnum):

2087 """

2088 Cluster scheduling policies. These may be passed to

2089 :py:obj:`~.cudaFuncSetAttribute`

2090 """

2091

2092 #: the default policy

2093 cudaClusterSchedulingPolicyDefault = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyDefault

2094

2095 #: spread the blocks within a cluster to the SMs

2096 cudaClusterSchedulingPolicySpread = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicySpread

2097

2098 #: allow the hardware to load-balance the blocks in a cluster to the

2099 #: SMs

2100 cudaClusterSchedulingPolicyLoadBalancing = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyLoadBalancing

2101

2102_dict_cudaClusterSchedulingPolicy = dict(((int(v), v) for k, v in cudaClusterSchedulingPolicy.__members__.items()))

2103

2104class cudaStreamUpdateCaptureDependenciesFlags(IntEnum):

2105 """

2106 Flags for :py:obj:`~.cudaStreamUpdateCaptureDependencies`

2107 """

2108

2109 #: Add new nodes to the dependency set

2110 cudaStreamAddCaptureDependencies = cyruntime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamAddCaptureDependencies

2111

2112 #: Replace the dependency set with the new nodes

2113 cudaStreamSetCaptureDependencies = cyruntime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamSetCaptureDependencies

2114

2115_dict_cudaStreamUpdateCaptureDependenciesFlags = dict(((int(v), v) for k, v in cudaStreamUpdateCaptureDependenciesFlags.__members__.items()))

2116

2117class cudaUserObjectFlags(IntEnum):

2118 """

2119 Flags for user objects for graphs

2120 """

2121

2122 #: Indicates the destructor execution is not synchronized by any CUDA

2123 #: handle.

2124 cudaUserObjectNoDestructorSync = cyruntime.cudaUserObjectFlags.cudaUserObjectNoDestructorSync

2125

2126_dict_cudaUserObjectFlags = dict(((int(v), v) for k, v in cudaUserObjectFlags.__members__.items()))

2127

2128class cudaUserObjectRetainFlags(IntEnum):

2129 """

2130 Flags for retaining user object references for graphs

2131 """

2132

2133 #: Transfer references from the caller rather than creating new

2134 #: references.

2135 cudaGraphUserObjectMove = cyruntime.cudaUserObjectRetainFlags.cudaGraphUserObjectMove

2136

2137_dict_cudaUserObjectRetainFlags = dict(((int(v), v) for k, v in cudaUserObjectRetainFlags.__members__.items()))

2138

2139class cudaGraphicsRegisterFlags(IntEnum):

2140 """

2141 CUDA graphics interop register flags

2142 """

2143

2144 #: Default

2145 cudaGraphicsRegisterFlagsNone = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsNone

2146

2147 #: CUDA will not write to this resource

2148 cudaGraphicsRegisterFlagsReadOnly = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsReadOnly

2149

2150 #: CUDA will only write to and will not read from this resource

2151 cudaGraphicsRegisterFlagsWriteDiscard = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard

2152

2153 #: CUDA will bind this resource to a surface reference

2154 cudaGraphicsRegisterFlagsSurfaceLoadStore = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsSurfaceLoadStore

2155

2156 #: CUDA will perform texture gather operations on this resource

2157 cudaGraphicsRegisterFlagsTextureGather = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsTextureGather

2158

2159_dict_cudaGraphicsRegisterFlags = dict(((int(v), v) for k, v in cudaGraphicsRegisterFlags.__members__.items()))

2160

2161class cudaGraphicsMapFlags(IntEnum):

2162 """

2163 CUDA graphics interop map flags

2164 """

2165

2166 #: Default; Assume resource can be read/written

2167 cudaGraphicsMapFlagsNone = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsNone

2168

2169 #: CUDA will not write to this resource

2170 cudaGraphicsMapFlagsReadOnly = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsReadOnly

2171

2172 #: CUDA will only write to and will not read from this resource

2173 cudaGraphicsMapFlagsWriteDiscard = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsWriteDiscard

2174

2175_dict_cudaGraphicsMapFlags = dict(((int(v), v) for k, v in cudaGraphicsMapFlags.__members__.items()))

2176

2177class cudaGraphicsCubeFace(IntEnum):

2178 """

2179 CUDA graphics interop array indices for cube maps

2180 """

2181

2182 #: Positive X face of cubemap

2183 cudaGraphicsCubeFacePositiveX = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveX

2184

2185 #: Negative X face of cubemap

2186 cudaGraphicsCubeFaceNegativeX = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeX

2187

2188 #: Positive Y face of cubemap

2189 cudaGraphicsCubeFacePositiveY = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveY

2190

2191 #: Negative Y face of cubemap

2192 cudaGraphicsCubeFaceNegativeY = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeY

2193

2194 #: Positive Z face of cubemap

2195 cudaGraphicsCubeFacePositiveZ = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveZ

2196

2197 #: Negative Z face of cubemap

2198 cudaGraphicsCubeFaceNegativeZ = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeZ

2199

2200_dict_cudaGraphicsCubeFace = dict(((int(v), v) for k, v in cudaGraphicsCubeFace.__members__.items()))

2201

2202class cudaResourceType(IntEnum):

2203 """

2204 CUDA resource types

2205 """

2206

2207 #: Array resource

2208 cudaResourceTypeArray = cyruntime.cudaResourceType.cudaResourceTypeArray

2209

2210 #: Mipmapped array resource

2211 cudaResourceTypeMipmappedArray = cyruntime.cudaResourceType.cudaResourceTypeMipmappedArray

2212

2213 #: Linear resource

2214 cudaResourceTypeLinear = cyruntime.cudaResourceType.cudaResourceTypeLinear

2215

2216 #: Pitch 2D resource

2217 cudaResourceTypePitch2D = cyruntime.cudaResourceType.cudaResourceTypePitch2D

2218

2219_dict_cudaResourceType = dict(((int(v), v) for k, v in cudaResourceType.__members__.items()))

2220

2221class cudaResourceViewFormat(IntEnum):

2222 """

2223 CUDA texture resource view formats

2224 """

2225

2226 #: No resource view format (use underlying resource format)

2227 cudaResViewFormatNone = cyruntime.cudaResourceViewFormat.cudaResViewFormatNone

2228

2229 #: 1 channel unsigned 8-bit integers

2230 cudaResViewFormatUnsignedChar1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar1

2231

2232 #: 2 channel unsigned 8-bit integers

2233 cudaResViewFormatUnsignedChar2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar2

2234

2235 #: 4 channel unsigned 8-bit integers

2236 cudaResViewFormatUnsignedChar4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar4

2237

2238 #: 1 channel signed 8-bit integers

2239 cudaResViewFormatSignedChar1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar1

2240

2241 #: 2 channel signed 8-bit integers

2242 cudaResViewFormatSignedChar2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar2

2243

2244 #: 4 channel signed 8-bit integers

2245 cudaResViewFormatSignedChar4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar4

2246

2247 #: 1 channel unsigned 16-bit integers

2248 cudaResViewFormatUnsignedShort1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort1

2249

2250 #: 2 channel unsigned 16-bit integers

2251 cudaResViewFormatUnsignedShort2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort2

2252

2253 #: 4 channel unsigned 16-bit integers

2254 cudaResViewFormatUnsignedShort4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort4

2255

2256 #: 1 channel signed 16-bit integers

2257 cudaResViewFormatSignedShort1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort1

2258

2259 #: 2 channel signed 16-bit integers

2260 cudaResViewFormatSignedShort2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort2

2261

2262 #: 4 channel signed 16-bit integers

2263 cudaResViewFormatSignedShort4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort4

2264

2265 #: 1 channel unsigned 32-bit integers

2266 cudaResViewFormatUnsignedInt1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt1

2267

2268 #: 2 channel unsigned 32-bit integers

2269 cudaResViewFormatUnsignedInt2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt2

2270

2271 #: 4 channel unsigned 32-bit integers

2272 cudaResViewFormatUnsignedInt4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt4

2273

2274 #: 1 channel signed 32-bit integers

2275 cudaResViewFormatSignedInt1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt1

2276

2277 #: 2 channel signed 32-bit integers

2278 cudaResViewFormatSignedInt2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt2

2279

2280 #: 4 channel signed 32-bit integers

2281 cudaResViewFormatSignedInt4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt4

2282

2283 #: 1 channel 16-bit floating point

2284 cudaResViewFormatHalf1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf1

2285

2286 #: 2 channel 16-bit floating point

2287 cudaResViewFormatHalf2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf2

2288

2289 #: 4 channel 16-bit floating point

2290 cudaResViewFormatHalf4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf4

2291

2292 #: 1 channel 32-bit floating point

2293 cudaResViewFormatFloat1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat1

2294

2295 #: 2 channel 32-bit floating point

2296 cudaResViewFormatFloat2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat2

2297

2298 #: 4 channel 32-bit floating point

2299 cudaResViewFormatFloat4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat4

2300

2301 #: Block compressed 1

2302 cudaResViewFormatUnsignedBlockCompressed1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed1

2303

2304 #: Block compressed 2

2305 cudaResViewFormatUnsignedBlockCompressed2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed2

2306

2307 #: Block compressed 3

2308 cudaResViewFormatUnsignedBlockCompressed3 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed3

2309

2310 #: Block compressed 4 unsigned

2311 cudaResViewFormatUnsignedBlockCompressed4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed4

2312

2313 #: Block compressed 4 signed

2314 cudaResViewFormatSignedBlockCompressed4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed4

2315

2316 #: Block compressed 5 unsigned

2317 cudaResViewFormatUnsignedBlockCompressed5 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed5

2318

2319 #: Block compressed 5 signed

2320 cudaResViewFormatSignedBlockCompressed5 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed5

2321

2322 #: Block compressed 6 unsigned half-float

2323 cudaResViewFormatUnsignedBlockCompressed6H = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed6H

2324

2325 #: Block compressed 6 signed half-float

2326 cudaResViewFormatSignedBlockCompressed6H = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed6H

2327

2328 #: Block compressed 7

2329 cudaResViewFormatUnsignedBlockCompressed7 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed7

2330

2331_dict_cudaResourceViewFormat = dict(((int(v), v) for k, v in cudaResourceViewFormat.__members__.items()))

2332

2333class cudaFuncAttribute(IntEnum):

2334 """

2335 CUDA function attributes that can be set using

2336 :py:obj:`~.cudaFuncSetAttribute`

2337 """

2338

2339 #: Maximum dynamic shared memory size

2340 cudaFuncAttributeMaxDynamicSharedMemorySize = cyruntime.cudaFuncAttribute.cudaFuncAttributeMaxDynamicSharedMemorySize

2341

2342 #: Preferred shared memory-L1 cache split

2343 cudaFuncAttributePreferredSharedMemoryCarveout = cyruntime.cudaFuncAttribute.cudaFuncAttributePreferredSharedMemoryCarveout

2344

2345 #: Indicator to enforce valid cluster dimension specification on kernel

2346 #: launch

2347 cudaFuncAttributeClusterDimMustBeSet = cyruntime.cudaFuncAttribute.cudaFuncAttributeClusterDimMustBeSet

2348

2349 #: Required cluster width

2350 cudaFuncAttributeRequiredClusterWidth = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterWidth

2351

2352 #: Required cluster height

2353 cudaFuncAttributeRequiredClusterHeight = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterHeight

2354

2355 #: Required cluster depth

2356 cudaFuncAttributeRequiredClusterDepth = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterDepth

2357

2358 #: Whether non-portable cluster scheduling policy is supported

2359 cudaFuncAttributeNonPortableClusterSizeAllowed = cyruntime.cudaFuncAttribute.cudaFuncAttributeNonPortableClusterSizeAllowed

2360

2361 #: Required cluster scheduling policy preference

2362 cudaFuncAttributeClusterSchedulingPolicyPreference = cyruntime.cudaFuncAttribute.cudaFuncAttributeClusterSchedulingPolicyPreference

2363 cudaFuncAttributeMax = cyruntime.cudaFuncAttribute.cudaFuncAttributeMax

2364

2365_dict_cudaFuncAttribute = dict(((int(v), v) for k, v in cudaFuncAttribute.__members__.items()))

2366

2367class cudaFuncCache(IntEnum):

2368 """

2369 CUDA function cache configurations

2370 """

2371

2372 #: Default function cache configuration, no preference

2373 cudaFuncCachePreferNone = cyruntime.cudaFuncCache.cudaFuncCachePreferNone

2374

2375 #: Prefer larger shared memory and smaller L1 cache

2376 cudaFuncCachePreferShared = cyruntime.cudaFuncCache.cudaFuncCachePreferShared

2377

2378 #: Prefer larger L1 cache and smaller shared memory

2379 cudaFuncCachePreferL1 = cyruntime.cudaFuncCache.cudaFuncCachePreferL1

2380

2381 #: Prefer equal size L1 cache and shared memory

2382 cudaFuncCachePreferEqual = cyruntime.cudaFuncCache.cudaFuncCachePreferEqual

2383

2384_dict_cudaFuncCache = dict(((int(v), v) for k, v in cudaFuncCache.__members__.items()))

2385

2386class cudaSharedMemConfig(IntEnum):

2387 """

2388 CUDA shared memory configuration [Deprecated]

2389 """

2390 cudaSharedMemBankSizeDefault = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeDefault

2391 cudaSharedMemBankSizeFourByte = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeFourByte

2392 cudaSharedMemBankSizeEightByte = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeEightByte

2393

2394_dict_cudaSharedMemConfig = dict(((int(v), v) for k, v in cudaSharedMemConfig.__members__.items()))

2395

2396class cudaSharedCarveout(IntEnum):

2397 """

2398 Shared memory carveout configurations. These may be passed to

2399 cudaFuncSetAttribute

2400 """

2401

2402 #: No preference for shared memory or L1 (default)

2403 cudaSharedmemCarveoutDefault = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutDefault

2404

2405 #: Prefer maximum available L1 cache, minimum shared memory

2406 cudaSharedmemCarveoutMaxL1 = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutMaxL1

2407

2408 #: Prefer maximum available shared memory, minimum L1 cache

2409 cudaSharedmemCarveoutMaxShared = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutMaxShared

2410

2411_dict_cudaSharedCarveout = dict(((int(v), v) for k, v in cudaSharedCarveout.__members__.items()))

2412

2413class cudaComputeMode(IntEnum):

2414 """

2415 CUDA device compute modes

2416 """

2417

2418 #: Default compute mode (Multiple threads can use

2419 #: :py:obj:`~.cudaSetDevice()` with this device)

2420 cudaComputeModeDefault = cyruntime.cudaComputeMode.cudaComputeModeDefault

2421

2422 #: Compute-exclusive-thread mode (Only one thread in one process will

2423 #: be able to use :py:obj:`~.cudaSetDevice()` with this device)

2424 cudaComputeModeExclusive = cyruntime.cudaComputeMode.cudaComputeModeExclusive

2425

2426 #: Compute-prohibited mode (No threads can use

2427 #: :py:obj:`~.cudaSetDevice()` with this device)

2428 cudaComputeModeProhibited = cyruntime.cudaComputeMode.cudaComputeModeProhibited

2429

2430 #: Compute-exclusive-process mode (Many threads in one process will be

2431 #: able to use :py:obj:`~.cudaSetDevice()` with this device)

2432 cudaComputeModeExclusiveProcess = cyruntime.cudaComputeMode.cudaComputeModeExclusiveProcess

2433

2434_dict_cudaComputeMode = dict(((int(v), v) for k, v in cudaComputeMode.__members__.items()))

2435

2436class cudaLimit(IntEnum):

2437 """

2438 CUDA Limits

2439 """

2440

2441 #: GPU thread stack size

2442 cudaLimitStackSize = cyruntime.cudaLimit.cudaLimitStackSize

2443

2444 #: GPU printf FIFO size

2445 cudaLimitPrintfFifoSize = cyruntime.cudaLimit.cudaLimitPrintfFifoSize

2446

2447 #: GPU malloc heap size

2448 cudaLimitMallocHeapSize = cyruntime.cudaLimit.cudaLimitMallocHeapSize

2449

2450 #: GPU device runtime synchronize depth

2451 cudaLimitDevRuntimeSyncDepth = cyruntime.cudaLimit.cudaLimitDevRuntimeSyncDepth

2452

2453 #: GPU device runtime pending launch count

2454 cudaLimitDevRuntimePendingLaunchCount = cyruntime.cudaLimit.cudaLimitDevRuntimePendingLaunchCount

2455

2456 #: A value between 0 and 128 that indicates the maximum fetch

2457 #: granularity of L2 (in Bytes). This is a hint

2458 cudaLimitMaxL2FetchGranularity = cyruntime.cudaLimit.cudaLimitMaxL2FetchGranularity

2459

2460 #: A size in bytes for L2 persisting lines cache size

2461 cudaLimitPersistingL2CacheSize = cyruntime.cudaLimit.cudaLimitPersistingL2CacheSize

2462

2463_dict_cudaLimit = dict(((int(v), v) for k, v in cudaLimit.__members__.items()))

2464

2465class cudaMemoryAdvise(IntEnum):

2466 """

2467 CUDA Memory Advise values

2468 """

2469

2470 #: Data will mostly be read and only occassionally be written to

2471 cudaMemAdviseSetReadMostly = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetReadMostly

2472

2473 #: Undo the effect of :py:obj:`~.cudaMemAdviseSetReadMostly`

2474 cudaMemAdviseUnsetReadMostly = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetReadMostly

2475

2476 #: Set the preferred location for the data as the specified device

2477 cudaMemAdviseSetPreferredLocation = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetPreferredLocation

2478

2479 #: Clear the preferred location for the data

2480 cudaMemAdviseUnsetPreferredLocation = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetPreferredLocation

2481

2482 #: Data will be accessed by the specified device, so prevent page

2483 #: faults as much as possible

2484 cudaMemAdviseSetAccessedBy = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy

2485

2486 #: Let the Unified Memory subsystem decide on the page faulting policy

2487 #: for the specified device

2488 cudaMemAdviseUnsetAccessedBy = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetAccessedBy

2489

2490_dict_cudaMemoryAdvise = dict(((int(v), v) for k, v in cudaMemoryAdvise.__members__.items()))

2491

2492class cudaMemRangeAttribute(IntEnum):

2493 """

2494 CUDA range attributes

2495 """

2496

2497 #: Whether the range will mostly be read and only occassionally be

2498 #: written to

2499 cudaMemRangeAttributeReadMostly = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeReadMostly

2500

2501 #: The preferred location of the range

2502 cudaMemRangeAttributePreferredLocation = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocation

2503

2504 #: Memory range has :py:obj:`~.cudaMemAdviseSetAccessedBy` set for

2505 #: specified device

2506 cudaMemRangeAttributeAccessedBy = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeAccessedBy

2507

2508 #: The last location to which the range was prefetched

2509 cudaMemRangeAttributeLastPrefetchLocation = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocation

2510

2511 #: The preferred location type of the range

2512 cudaMemRangeAttributePreferredLocationType = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationType

2513

2514 #: The preferred location id of the range

2515 cudaMemRangeAttributePreferredLocationId = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationId

2516

2517 #: The last location type to which the range was prefetched

2518 cudaMemRangeAttributeLastPrefetchLocationType = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationType

2519

2520 #: The last location id to which the range was prefetched

2521 cudaMemRangeAttributeLastPrefetchLocationId = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationId

2522

2523_dict_cudaMemRangeAttribute = dict(((int(v), v) for k, v in cudaMemRangeAttribute.__members__.items()))

2524

2525class cudaFlushGPUDirectRDMAWritesOptions(IntEnum):

2526 """

2527 CUDA GPUDirect RDMA flush writes APIs supported on the device

2528 """

2529

2530 #: :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()` and its CUDA Driver

2531 #: API counterpart are supported on the device.

2532 cudaFlushGPUDirectRDMAWritesOptionHost = cyruntime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionHost

2533

2534 #: The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the

2535 #: :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported

2536 #: on the CUDA device.

2537 cudaFlushGPUDirectRDMAWritesOptionMemOps = cyruntime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionMemOps

2538

2539_dict_cudaFlushGPUDirectRDMAWritesOptions = dict(((int(v), v) for k, v in cudaFlushGPUDirectRDMAWritesOptions.__members__.items()))

2540

2541class cudaGPUDirectRDMAWritesOrdering(IntEnum):

2542 """

2543 CUDA GPUDirect RDMA flush writes ordering features of the device

2544 """

2545

2546 #: The device does not natively support ordering of GPUDirect RDMA

2547 #: writes. :py:obj:`~.cudaFlushGPUDirectRDMAWrites()` can be leveraged

2548 #: if supported.

2549 cudaGPUDirectRDMAWritesOrderingNone = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingNone

2550

2551 #: Natively, the device can consistently consume GPUDirect RDMA writes,

2552 #: although other CUDA devices may not.

2553 cudaGPUDirectRDMAWritesOrderingOwner = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingOwner

2554

2555 #: Any CUDA device in the system can consistently consume GPUDirect

2556 #: RDMA writes to this device.

2557 cudaGPUDirectRDMAWritesOrderingAllDevices = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingAllDevices

2558

2559_dict_cudaGPUDirectRDMAWritesOrdering = dict(((int(v), v) for k, v in cudaGPUDirectRDMAWritesOrdering.__members__.items()))

2560

2561class cudaFlushGPUDirectRDMAWritesScope(IntEnum):

2562 """

2563 CUDA GPUDirect RDMA flush writes scopes

2564 """

2565

2566 #: Blocks until remote writes are visible to the CUDA device context

2567 #: owning the data.

2568 cudaFlushGPUDirectRDMAWritesToOwner = cyruntime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToOwner

2569

2570 #: Blocks until remote writes are visible to all CUDA device contexts.

2571 cudaFlushGPUDirectRDMAWritesToAllDevices = cyruntime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToAllDevices

2572

2573_dict_cudaFlushGPUDirectRDMAWritesScope = dict(((int(v), v) for k, v in cudaFlushGPUDirectRDMAWritesScope.__members__.items()))

2574

2575class cudaFlushGPUDirectRDMAWritesTarget(IntEnum):

2576 """

2577 CUDA GPUDirect RDMA flush writes targets

2578 """

2579

2580 #: Sets the target for :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()`

2581 #: to the currently active CUDA device context.

2582 cudaFlushGPUDirectRDMAWritesTargetCurrentDevice = cyruntime.cudaFlushGPUDirectRDMAWritesTarget.cudaFlushGPUDirectRDMAWritesTargetCurrentDevice

2583

2584_dict_cudaFlushGPUDirectRDMAWritesTarget = dict(((int(v), v) for k, v in cudaFlushGPUDirectRDMAWritesTarget.__members__.items()))

2585

2586class cudaDeviceAttr(IntEnum):

2587 """

2588 CUDA device attributes

2589 """

2590

2591 #: Maximum number of threads per block

2592 cudaDevAttrMaxThreadsPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerBlock

2593

2594 #: Maximum block dimension X

2595 cudaDevAttrMaxBlockDimX = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimX

2596

2597 #: Maximum block dimension Y

2598 cudaDevAttrMaxBlockDimY = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimY

2599

2600 #: Maximum block dimension Z

2601 cudaDevAttrMaxBlockDimZ = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimZ

2602

2603 #: Maximum grid dimension X

2604 cudaDevAttrMaxGridDimX = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimX

2605

2606 #: Maximum grid dimension Y

2607 cudaDevAttrMaxGridDimY = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimY

2608

2609 #: Maximum grid dimension Z

2610 cudaDevAttrMaxGridDimZ = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimZ

2611

2612 #: Maximum shared memory available per block in bytes

2613 cudaDevAttrMaxSharedMemoryPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlock

2614

2615 #: Memory available on device for constant variables in a CUDA C kernel

2616 #: in bytes

2617 cudaDevAttrTotalConstantMemory = cyruntime.cudaDeviceAttr.cudaDevAttrTotalConstantMemory

2618

2619 #: Warp size in threads

2620 cudaDevAttrWarpSize = cyruntime.cudaDeviceAttr.cudaDevAttrWarpSize

2621

2622 #: Maximum pitch in bytes allowed by memory copies

2623 cudaDevAttrMaxPitch = cyruntime.cudaDeviceAttr.cudaDevAttrMaxPitch

2624

2625 #: Maximum number of 32-bit registers available per block

2626 cudaDevAttrMaxRegistersPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerBlock

2627

2628 #: Peak clock frequency in kilohertz

2629 cudaDevAttrClockRate = cyruntime.cudaDeviceAttr.cudaDevAttrClockRate

2630

2631 #: Alignment requirement for textures

2632 cudaDevAttrTextureAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrTextureAlignment

2633

2634 #: Device can possibly copy memory and execute a kernel concurrently

2635 cudaDevAttrGpuOverlap = cyruntime.cudaDeviceAttr.cudaDevAttrGpuOverlap

2636

2637 #: Number of multiprocessors on device

2638 cudaDevAttrMultiProcessorCount = cyruntime.cudaDeviceAttr.cudaDevAttrMultiProcessorCount

2639

2640 #: Specifies whether there is a run time limit on kernels

2641 cudaDevAttrKernelExecTimeout = cyruntime.cudaDeviceAttr.cudaDevAttrKernelExecTimeout

2642

2643 #: Device is integrated with host memory

2644 cudaDevAttrIntegrated = cyruntime.cudaDeviceAttr.cudaDevAttrIntegrated

2645

2646 #: Device can map host memory into CUDA address space

2647 cudaDevAttrCanMapHostMemory = cyruntime.cudaDeviceAttr.cudaDevAttrCanMapHostMemory

2648

2649 #: Compute mode (See :py:obj:`~.cudaComputeMode` for details)

2650 cudaDevAttrComputeMode = cyruntime.cudaDeviceAttr.cudaDevAttrComputeMode

2651

2652 #: Maximum 1D texture width

2653 cudaDevAttrMaxTexture1DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DWidth

2654

2655 #: Maximum 2D texture width

2656 cudaDevAttrMaxTexture2DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DWidth

2657

2658 #: Maximum 2D texture height

2659 cudaDevAttrMaxTexture2DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DHeight

2660

2661 #: Maximum 3D texture width

2662 cudaDevAttrMaxTexture3DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidth

2663

2664 #: Maximum 3D texture height

2665 cudaDevAttrMaxTexture3DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeight

2666

2667 #: Maximum 3D texture depth

2668 cudaDevAttrMaxTexture3DDepth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepth

2669

2670 #: Maximum 2D layered texture width

2671 cudaDevAttrMaxTexture2DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredWidth

2672

2673 #: Maximum 2D layered texture height

2674 cudaDevAttrMaxTexture2DLayeredHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredHeight

2675

2676 #: Maximum layers in a 2D layered texture

2677 cudaDevAttrMaxTexture2DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredLayers

2678

2679 #: Alignment requirement for surfaces

2680 cudaDevAttrSurfaceAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrSurfaceAlignment

2681

2682 #: Device can possibly execute multiple kernels concurrently

2683 cudaDevAttrConcurrentKernels = cyruntime.cudaDeviceAttr.cudaDevAttrConcurrentKernels

2684

2685 #: Device has ECC support enabled

2686 cudaDevAttrEccEnabled = cyruntime.cudaDeviceAttr.cudaDevAttrEccEnabled

2687

2688 #: PCI bus ID of the device

2689 cudaDevAttrPciBusId = cyruntime.cudaDeviceAttr.cudaDevAttrPciBusId

2690

2691 #: PCI device ID of the device

2692 cudaDevAttrPciDeviceId = cyruntime.cudaDeviceAttr.cudaDevAttrPciDeviceId

2693

2694 #: Device is using TCC driver model

2695 cudaDevAttrTccDriver = cyruntime.cudaDeviceAttr.cudaDevAttrTccDriver

2696

2697 #: Peak memory clock frequency in kilohertz

2698 cudaDevAttrMemoryClockRate = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryClockRate

2699

2700 #: Global memory bus width in bits

2701 cudaDevAttrGlobalMemoryBusWidth = cyruntime.cudaDeviceAttr.cudaDevAttrGlobalMemoryBusWidth

2702

2703 #: Size of L2 cache in bytes

2704 cudaDevAttrL2CacheSize = cyruntime.cudaDeviceAttr.cudaDevAttrL2CacheSize

2705

2706 #: Maximum resident threads per multiprocessor

2707 cudaDevAttrMaxThreadsPerMultiProcessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerMultiProcessor

2708

2709 #: Number of asynchronous engines

2710 cudaDevAttrAsyncEngineCount = cyruntime.cudaDeviceAttr.cudaDevAttrAsyncEngineCount

2711

2712 #: Device shares a unified address space with the host

2713 cudaDevAttrUnifiedAddressing = cyruntime.cudaDeviceAttr.cudaDevAttrUnifiedAddressing

2714

2715 #: Maximum 1D layered texture width

2716 cudaDevAttrMaxTexture1DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredWidth

2717

2718 #: Maximum layers in a 1D layered texture

2719 cudaDevAttrMaxTexture1DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredLayers

2720

2721 #: Maximum 2D texture width if cudaArrayTextureGather is set

2722 cudaDevAttrMaxTexture2DGatherWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherWidth

2723

2724 #: Maximum 2D texture height if cudaArrayTextureGather is set

2725 cudaDevAttrMaxTexture2DGatherHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherHeight

2726

2727 #: Alternate maximum 3D texture width

2728 cudaDevAttrMaxTexture3DWidthAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidthAlt

2729

2730 #: Alternate maximum 3D texture height

2731 cudaDevAttrMaxTexture3DHeightAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeightAlt

2732

2733 #: Alternate maximum 3D texture depth

2734 cudaDevAttrMaxTexture3DDepthAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepthAlt

2735

2736 #: PCI domain ID of the device

2737 cudaDevAttrPciDomainId = cyruntime.cudaDeviceAttr.cudaDevAttrPciDomainId

2738

2739 #: Pitch alignment requirement for textures

2740 cudaDevAttrTexturePitchAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrTexturePitchAlignment

2741

2742 #: Maximum cubemap texture width/height

2743 cudaDevAttrMaxTextureCubemapWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapWidth

2744

2745 #: Maximum cubemap layered texture width/height

2746 cudaDevAttrMaxTextureCubemapLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredWidth

2747

2748 #: Maximum layers in a cubemap layered texture

2749 cudaDevAttrMaxTextureCubemapLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredLayers

2750

2751 #: Maximum 1D surface width

2752 cudaDevAttrMaxSurface1DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DWidth

2753

2754 #: Maximum 2D surface width

2755 cudaDevAttrMaxSurface2DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DWidth

2756

2757 #: Maximum 2D surface height

2758 cudaDevAttrMaxSurface2DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DHeight

2759

2760 #: Maximum 3D surface width

2761 cudaDevAttrMaxSurface3DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DWidth

2762

2763 #: Maximum 3D surface height

2764 cudaDevAttrMaxSurface3DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DHeight

2765

2766 #: Maximum 3D surface depth

2767 cudaDevAttrMaxSurface3DDepth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DDepth

2768

2769 #: Maximum 1D layered surface width

2770 cudaDevAttrMaxSurface1DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredWidth

2771

2772 #: Maximum layers in a 1D layered surface

2773 cudaDevAttrMaxSurface1DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredLayers

2774

2775 #: Maximum 2D layered surface width

2776 cudaDevAttrMaxSurface2DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredWidth

2777

2778 #: Maximum 2D layered surface height

2779 cudaDevAttrMaxSurface2DLayeredHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredHeight

2780

2781 #: Maximum layers in a 2D layered surface

2782 cudaDevAttrMaxSurface2DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredLayers

2783

2784 #: Maximum cubemap surface width

2785 cudaDevAttrMaxSurfaceCubemapWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapWidth

2786

2787 #: Maximum cubemap layered surface width

2788 cudaDevAttrMaxSurfaceCubemapLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredWidth

2789

2790 #: Maximum layers in a cubemap layered surface

2791 cudaDevAttrMaxSurfaceCubemapLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredLayers

2792

2793 #: Maximum 1D linear texture width

2794 cudaDevAttrMaxTexture1DLinearWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLinearWidth

2795

2796 #: Maximum 2D linear texture width

2797 cudaDevAttrMaxTexture2DLinearWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearWidth

2798

2799 #: Maximum 2D linear texture height

2800 cudaDevAttrMaxTexture2DLinearHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearHeight

2801

2802 #: Maximum 2D linear texture pitch in bytes

2803 cudaDevAttrMaxTexture2DLinearPitch = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearPitch

2804

2805 #: Maximum mipmapped 2D texture width

2806 cudaDevAttrMaxTexture2DMipmappedWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedWidth

2807

2808 #: Maximum mipmapped 2D texture height

2809 cudaDevAttrMaxTexture2DMipmappedHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedHeight

2810

2811 #: Major compute capability version number

2812 cudaDevAttrComputeCapabilityMajor = cyruntime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor

2813

2814 #: Minor compute capability version number

2815 cudaDevAttrComputeCapabilityMinor = cyruntime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor

2816

2817 #: Maximum mipmapped 1D texture width

2818 cudaDevAttrMaxTexture1DMipmappedWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DMipmappedWidth

2819

2820 #: Device supports stream priorities

2821 cudaDevAttrStreamPrioritiesSupported = cyruntime.cudaDeviceAttr.cudaDevAttrStreamPrioritiesSupported

2822

2823 #: Device supports caching globals in L1

2824 cudaDevAttrGlobalL1CacheSupported = cyruntime.cudaDeviceAttr.cudaDevAttrGlobalL1CacheSupported

2825

2826 #: Device supports caching locals in L1

2827 cudaDevAttrLocalL1CacheSupported = cyruntime.cudaDeviceAttr.cudaDevAttrLocalL1CacheSupported

2828

2829 #: Maximum shared memory available per multiprocessor in bytes

2830 cudaDevAttrMaxSharedMemoryPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerMultiprocessor

2831

2832 #: Maximum number of 32-bit registers available per multiprocessor

2833 cudaDevAttrMaxRegistersPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerMultiprocessor

2834

2835 #: Device can allocate managed memory on this system

2836 cudaDevAttrManagedMemory = cyruntime.cudaDeviceAttr.cudaDevAttrManagedMemory

2837

2838 #: Device is on a multi-GPU board

2839 cudaDevAttrIsMultiGpuBoard = cyruntime.cudaDeviceAttr.cudaDevAttrIsMultiGpuBoard

2840

2841 #: Unique identifier for a group of devices on the same multi-GPU board

2842 cudaDevAttrMultiGpuBoardGroupID = cyruntime.cudaDeviceAttr.cudaDevAttrMultiGpuBoardGroupID

2843

2844 #: Link between the device and the host supports native atomic

2845 #: operations

2846 cudaDevAttrHostNativeAtomicSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNativeAtomicSupported

2847

2848 #: Ratio of single precision performance (in floating-point operations

2849 #: per second) to double precision performance

2850 cudaDevAttrSingleToDoublePrecisionPerfRatio = cyruntime.cudaDeviceAttr.cudaDevAttrSingleToDoublePrecisionPerfRatio

2851

2852 #: Device supports coherently accessing pageable memory without calling

2853 #: cudaHostRegister on it

2854 cudaDevAttrPageableMemoryAccess = cyruntime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess

2855

2856 #: Device can coherently access managed memory concurrently with the

2857 #: CPU

2858 cudaDevAttrConcurrentManagedAccess = cyruntime.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess

2859

2860 #: Device supports Compute Preemption

2861 cudaDevAttrComputePreemptionSupported = cyruntime.cudaDeviceAttr.cudaDevAttrComputePreemptionSupported

2862

2863 #: Device can access host registered memory at the same virtual address

2864 #: as the CPU

2865 cudaDevAttrCanUseHostPointerForRegisteredMem = cyruntime.cudaDeviceAttr.cudaDevAttrCanUseHostPointerForRegisteredMem

2866 cudaDevAttrReserved92 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved92

2867 cudaDevAttrReserved93 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved93

2868 cudaDevAttrReserved94 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved94

2869

2870 #: Device supports launching cooperative kernels via

2871 #: :py:obj:`~.cudaLaunchCooperativeKernel`

2872 cudaDevAttrCooperativeLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrCooperativeLaunch

2873 cudaDevAttrReserved96 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved96

2874

2875 #: The maximum optin shared memory per block. This value may vary by

2876 #: chip. See :py:obj:`~.cudaFuncSetAttribute`

2877 cudaDevAttrMaxSharedMemoryPerBlockOptin = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlockOptin

2878

2879 #: Device supports flushing of outstanding remote writes.

2880 cudaDevAttrCanFlushRemoteWrites = cyruntime.cudaDeviceAttr.cudaDevAttrCanFlushRemoteWrites

2881

2882 #: Device supports host memory registration via

2883 #: :py:obj:`~.cudaHostRegister`.

2884 cudaDevAttrHostRegisterSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostRegisterSupported

2885

2886 #: Device accesses pageable memory via the host's page tables.

2887 cudaDevAttrPageableMemoryAccessUsesHostPageTables = cyruntime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccessUsesHostPageTables

2888

2889 #: Host can directly access managed memory on the device without

2890 #: migration.

2891 cudaDevAttrDirectManagedMemAccessFromHost = cyruntime.cudaDeviceAttr.cudaDevAttrDirectManagedMemAccessFromHost

2892

2893 #: Maximum number of blocks per multiprocessor

2894 cudaDevAttrMaxBlocksPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlocksPerMultiprocessor

2895

2896 #: Maximum L2 persisting lines capacity setting in bytes.

2897 cudaDevAttrMaxPersistingL2CacheSize = cyruntime.cudaDeviceAttr.cudaDevAttrMaxPersistingL2CacheSize

2898

2899 #: Maximum value of :py:obj:`~.cudaAccessPolicyWindow.num_bytes`.

2900 cudaDevAttrMaxAccessPolicyWindowSize = cyruntime.cudaDeviceAttr.cudaDevAttrMaxAccessPolicyWindowSize

2901

2902 #: Shared memory reserved by CUDA driver per block in bytes

2903 cudaDevAttrReservedSharedMemoryPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrReservedSharedMemoryPerBlock

2904

2905 #: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays

2906 cudaDevAttrSparseCudaArraySupported = cyruntime.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported

2907

2908 #: Device supports using the :py:obj:`~.cudaHostRegister` flag

2909 #: cudaHostRegisterReadOnly to register memory that must be mapped as

2910 #: read-only to the GPU

2911 cudaDevAttrHostRegisterReadOnlySupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostRegisterReadOnlySupported

2912

2913 #: External timeline semaphore interop is supported on the device

2914 cudaDevAttrTimelineSemaphoreInteropSupported = cyruntime.cudaDeviceAttr.cudaDevAttrTimelineSemaphoreInteropSupported

2915

2916 #: Device supports using the :py:obj:`~.cudaMallocAsync` and

2917 #: :py:obj:`~.cudaMemPool` family of APIs

2918 cudaDevAttrMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported

2919

2920 #: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see

2921 #: https://docs.nvidia.com/cuda/gpudirect-rdma for more information)

2922 cudaDevAttrGPUDirectRDMASupported = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMASupported

2923

2924 #: The returned attribute shall be interpreted as a bitmask, where the

2925 #: individual bits are listed in the

2926 #: :py:obj:`~.cudaFlushGPUDirectRDMAWritesOptions` enum

2927 cudaDevAttrGPUDirectRDMAFlushWritesOptions = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAFlushWritesOptions

2928

2929 #: GPUDirect RDMA writes to the device do not need to be flushed for

2930 #: consumers within the scope indicated by the returned attribute. See

2931 #: :py:obj:`~.cudaGPUDirectRDMAWritesOrdering` for the numerical values

2932 #: returned here.

2933 cudaDevAttrGPUDirectRDMAWritesOrdering = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAWritesOrdering

2934

2935 #: Handle types supported with mempool based IPC

2936 cudaDevAttrMemoryPoolSupportedHandleTypes = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryPoolSupportedHandleTypes

2937

2938 #: Indicates device supports cluster launch

2939 cudaDevAttrClusterLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrClusterLaunch

2940

2941 #: Device supports deferred mapping CUDA arrays and CUDA mipmapped

2942 #: arrays

2943 cudaDevAttrDeferredMappingCudaArraySupported = cyruntime.cudaDeviceAttr.cudaDevAttrDeferredMappingCudaArraySupported

2944 cudaDevAttrReserved122 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved122

2945 cudaDevAttrReserved123 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved123

2946 cudaDevAttrReserved124 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved124

2947

2948 #: Device supports IPC Events.

2949 cudaDevAttrIpcEventSupport = cyruntime.cudaDeviceAttr.cudaDevAttrIpcEventSupport

2950

2951 #: Number of memory synchronization domains the device supports.

2952 cudaDevAttrMemSyncDomainCount = cyruntime.cudaDeviceAttr.cudaDevAttrMemSyncDomainCount

2953 cudaDevAttrReserved127 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved127

2954 cudaDevAttrReserved128 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved128

2955 cudaDevAttrReserved129 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved129

2956

2957 #: NUMA configuration of a device: value is of type

2958 #: :py:obj:`~.cudaDeviceNumaConfig` enum

2959 cudaDevAttrNumaConfig = cyruntime.cudaDeviceAttr.cudaDevAttrNumaConfig

2960

2961 #: NUMA node ID of the GPU memory

2962 cudaDevAttrNumaId = cyruntime.cudaDeviceAttr.cudaDevAttrNumaId

2963 cudaDevAttrReserved132 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved132

2964

2965 #: Contexts created on this device will be shared via MPS

2966 cudaDevAttrMpsEnabled = cyruntime.cudaDeviceAttr.cudaDevAttrMpsEnabled

2967

2968 #: NUMA ID of the host node closest to the device or -1 when system

2969 #: does not support NUMA

2970 cudaDevAttrHostNumaId = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaId

2971

2972 #: Device supports CIG with D3D12.

2973 cudaDevAttrD3D12CigSupported = cyruntime.cudaDeviceAttr.cudaDevAttrD3D12CigSupported

2974

2975 #: Device supports CIG with Vulkan.

2976 cudaDevAttrVulkanCigSupported = cyruntime.cudaDeviceAttr.cudaDevAttrVulkanCigSupported

2977

2978 #: The combined 16-bit PCI device ID and 16-bit PCI vendor ID.

2979 cudaDevAttrGpuPciDeviceId = cyruntime.cudaDeviceAttr.cudaDevAttrGpuPciDeviceId

2980

2981 #: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor

2982 #: ID.

2983 cudaDevAttrGpuPciSubsystemId = cyruntime.cudaDeviceAttr.cudaDevAttrGpuPciSubsystemId

2984 cudaDevAttrReserved141 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved141

2985

2986 #: Device supports HOST_NUMA location with the

2987 #: :py:obj:`~.cudaMallocAsync` and :py:obj:`~.cudaMemPool` family of

2988 #: APIs

2989 cudaDevAttrHostNumaMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaMemoryPoolsSupported

2990

2991 #: Device supports HostNuma location IPC between nodes in a multi-node

2992 #: system.

2993 cudaDevAttrHostNumaMultinodeIpcSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaMultinodeIpcSupported

2994

2995 #: Device suports HOST location with the :py:obj:`~.cuMemAllocAsync`

2996 #: and :py:obj:`~.cuMemPool` family of APIs

2997 cudaDevAttrHostMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostMemoryPoolsSupported

2998 cudaDevAttrReserved145 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved145

2999

3000 #: Link between the device and the host supports only some native

3001 #: atomic operations

3002 cudaDevAttrOnlyPartialHostNativeAtomicSupported = cyruntime.cudaDeviceAttr.cudaDevAttrOnlyPartialHostNativeAtomicSupported

3003 cudaDevAttrMax = cyruntime.cudaDeviceAttr.cudaDevAttrMax

3004

3005_dict_cudaDeviceAttr = dict(((int(v), v) for k, v in cudaDeviceAttr.__members__.items()))

3006

3007class cudaMemPoolAttr(IntEnum):

3008 """

3009 CUDA memory pool attributes

3010 """

3011

3012 #: (value type = int) Allow cuMemAllocAsync to use memory

3013 #: asynchronously freed in another streams as long as a stream ordering

3014 #: dependency of the allocating stream on the free action exists. Cuda

3015 #: events and null stream interactions can create the required stream

3016 #: ordered dependencies. (default enabled)

3017 cudaMemPoolReuseFollowEventDependencies = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies

3018

3019 #: (value type = int) Allow reuse of already completed frees when there

3020 #: is no dependency between the free and allocation. (default enabled)

3021 cudaMemPoolReuseAllowOpportunistic = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic

3022

3023 #: (value type = int) Allow cuMemAllocAsync to insert new stream

3024 #: dependencies in order to establish the stream ordering required to

3025 #: reuse a piece of memory released by cuFreeAsync (default enabled).

3026 cudaMemPoolReuseAllowInternalDependencies = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies

3027

3028 #: (value type = cuuint64_t) Amount of reserved memory in bytes to hold

3029 #: onto before trying to release memory back to the OS. When more than

3030 #: the release threshold bytes of memory are held by the memory pool,

3031 #: the allocator will try to release memory back to the OS on the next

3032 #: call to stream, event or context synchronize. (default 0)

3033 cudaMemPoolAttrReleaseThreshold = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold

3034

3035 #: (value type = cuuint64_t) Amount of backing memory currently

3036 #: allocated for the mempool.

3037 cudaMemPoolAttrReservedMemCurrent = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemCurrent

3038

3039 #: (value type = cuuint64_t) High watermark of backing memory allocated

3040 #: for the mempool since the last time it was reset. High watermark can

3041 #: only be reset to zero.

3042 cudaMemPoolAttrReservedMemHigh = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemHigh

3043

3044 #: (value type = cuuint64_t) Amount of memory from the pool that is

3045 #: currently in use by the application.

3046 cudaMemPoolAttrUsedMemCurrent = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemCurrent

3047

3048 #: (value type = cuuint64_t) High watermark of the amount of memory

3049 #: from the pool that was in use by the application since the last time

3050 #: it was reset. High watermark can only be reset to zero.

3051 cudaMemPoolAttrUsedMemHigh = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemHigh

3052

3053_dict_cudaMemPoolAttr = dict(((int(v), v) for k, v in cudaMemPoolAttr.__members__.items()))

3054

3055class cudaMemLocationType(IntEnum):

3056 """

3057 Specifies the type of location

3058 """

3059 cudaMemLocationTypeInvalid = cyruntime.cudaMemLocationType.cudaMemLocationTypeInvalid

3060

3061 #: Location is unspecified. This is used when creating a managed memory

3062 #: pool to indicate no preferred location for the pool

3063 cudaMemLocationTypeNone = cyruntime.cudaMemLocationType.cudaMemLocationTypeNone

3064

3065 #: Location is a device location, thus id is a device ordinal

3066 cudaMemLocationTypeDevice = cyruntime.cudaMemLocationType.cudaMemLocationTypeDevice

3067

3068 #: Location is host, id is ignored

3069 cudaMemLocationTypeHost = cyruntime.cudaMemLocationType.cudaMemLocationTypeHost

3070

3071 #: Location is a host NUMA node, thus id is a host NUMA node id

3072 cudaMemLocationTypeHostNuma = cyruntime.cudaMemLocationType.cudaMemLocationTypeHostNuma

3073

3074 #: Location is the host NUMA node closest to the current thread's CPU,

3075 #: id is ignored

3076 cudaMemLocationTypeHostNumaCurrent = cyruntime.cudaMemLocationType.cudaMemLocationTypeHostNumaCurrent

3077

3078_dict_cudaMemLocationType = dict(((int(v), v) for k, v in cudaMemLocationType.__members__.items()))

3079

3080class cudaMemAccessFlags(IntEnum):

3081 """

3082 Specifies the memory protection flags for mapping.

3083 """

3084

3085 #: Default, make the address range not accessible

3086 cudaMemAccessFlagsProtNone = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtNone

3087

3088 #: Make the address range read accessible

3089 cudaMemAccessFlagsProtRead = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtRead

3090

3091 #: Make the address range read-write accessible

3092 cudaMemAccessFlagsProtReadWrite = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtReadWrite

3093

3094_dict_cudaMemAccessFlags = dict(((int(v), v) for k, v in cudaMemAccessFlags.__members__.items()))

3095

3096class cudaMemAllocationType(IntEnum):

3097 """

3098 Defines the allocation types available

3099 """

3100 cudaMemAllocationTypeInvalid = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeInvalid

3101

3102 #: This allocation type is 'pinned', i.e. cannot migrate from its

3103 #: current location while the application is actively using it

3104 cudaMemAllocationTypePinned = cyruntime.cudaMemAllocationType.cudaMemAllocationTypePinned

3105

3106 #: This allocation type is managed memory

3107 cudaMemAllocationTypeManaged = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeManaged

3108 cudaMemAllocationTypeMax = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeMax

3109

3110_dict_cudaMemAllocationType = dict(((int(v), v) for k, v in cudaMemAllocationType.__members__.items()))

3111

3112class cudaMemAllocationHandleType(IntEnum):

3113 """

3114 Flags for specifying particular handle types

3115 """

3116

3117 #: Does not allow any export mechanism. >

3118 cudaMemHandleTypeNone = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeNone

3119

3120 #: Allows a file descriptor to be used for exporting. Permitted only on

3121 #: POSIX systems. (int)

3122 cudaMemHandleTypePosixFileDescriptor = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypePosixFileDescriptor

3123

3124 #: Allows a Win32 NT handle to be used for exporting. (HANDLE)

3125 cudaMemHandleTypeWin32 = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32

3126

3127 #: Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)

3128 cudaMemHandleTypeWin32Kmt = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32Kmt

3129

3130 #: Allows a fabric handle to be used for exporting.

3131 #: (cudaMemFabricHandle_t)

3132 cudaMemHandleTypeFabric = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeFabric

3133

3134_dict_cudaMemAllocationHandleType = dict(((int(v), v) for k, v in cudaMemAllocationHandleType.__members__.items()))

3135

3136class cudaGraphMemAttributeType(IntEnum):

3137 """

3138 Graph memory attributes

3139 """

3140

3141 #: (value type = cuuint64_t) Amount of memory, in bytes, currently

3142 #: associated with graphs.

3143 cudaGraphMemAttrUsedMemCurrent = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemCurrent

3144

3145 #: (value type = cuuint64_t) High watermark of memory, in bytes,

3146 #: associated with graphs since the last time it was reset. High

3147 #: watermark can only be reset to zero.

3148 cudaGraphMemAttrUsedMemHigh = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemHigh

3149

3150 #: (value type = cuuint64_t) Amount of memory, in bytes, currently

3151 #: allocated for use by the CUDA graphs asynchronous allocator.

3152 cudaGraphMemAttrReservedMemCurrent = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemCurrent

3153

3154 #: (value type = cuuint64_t) High watermark of memory, in bytes,

3155 #: currently allocated for use by the CUDA graphs asynchronous

3156 #: allocator.

3157 cudaGraphMemAttrReservedMemHigh = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemHigh

3158

3159_dict_cudaGraphMemAttributeType = dict(((int(v), v) for k, v in cudaGraphMemAttributeType.__members__.items()))

3160

3161class cudaMemcpyFlags(IntEnum):

3162 """

3163 Flags to specify for copies within a batch. For more details see

3164 :py:obj:`~.cudaMemcpyBatchAsync`.

3165 """

3166 cudaMemcpyFlagDefault = cyruntime.cudaMemcpyFlags.cudaMemcpyFlagDefault

3167

3168 #: Hint to the driver to try and overlap the copy with compute work on

3169 #: the SMs.

3170 cudaMemcpyFlagPreferOverlapWithCompute = cyruntime.cudaMemcpyFlags.cudaMemcpyFlagPreferOverlapWithCompute

3171

3172_dict_cudaMemcpyFlags = dict(((int(v), v) for k, v in cudaMemcpyFlags.__members__.items()))

3173

3174class cudaMemcpySrcAccessOrder(IntEnum):

3175 """

3176

3177 """

3178

3179 #: Default invalid.

3180 cudaMemcpySrcAccessOrderInvalid = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderInvalid

3181

3182 #: Indicates that access to the source pointer must be in stream order.

3183 cudaMemcpySrcAccessOrderStream = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderStream

3184

3185 #: Indicates that access to the source pointer can be out of stream

3186 #: order and all accesses must be complete before the API call returns.

3187 #: This flag is suited for ephemeral sources (ex., stack variables)

3188 #: when it's known that no prior operations in the stream can be

3189 #: accessing the memory and also that the lifetime of the memory is

3190 #: limited to the scope that the source variable was declared in.

3191 #: Specifying this flag allows the driver to optimize the copy and

3192 #: removes the need for the user to synchronize the stream after the

3193 #: API call.

3194 cudaMemcpySrcAccessOrderDuringApiCall = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderDuringApiCall

3195

3196 #: Indicates that access to the source pointer can be out of stream

3197 #: order and the accesses can happen even after the API call returns.

3198 #: This flag is suited for host pointers allocated outside CUDA (ex.,

3199 #: via malloc) when it's known that no prior operations in the stream

3200 #: can be accessing the memory. Specifying this flag allows the driver

3201 #: to optimize the copy on certain platforms.

3202 cudaMemcpySrcAccessOrderAny = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderAny

3203 cudaMemcpySrcAccessOrderMax = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderMax

3204

3205_dict_cudaMemcpySrcAccessOrder = dict(((int(v), v) for k, v in cudaMemcpySrcAccessOrder.__members__.items()))

3206

3207class cudaMemcpy3DOperandType(IntEnum):

3208 """

3209 These flags allow applications to convey the operand type for

3210 individual copies specified in :py:obj:`~.cudaMemcpy3DBatchAsync`.

3211 """

3212

3213 #: Memcpy operand is a valid pointer.

3214 cudaMemcpyOperandTypePointer = cyruntime.cudaMemcpy3DOperandType.cudaMemcpyOperandTypePointer

3215

3216 #: Memcpy operand is a CUarray.

3217 cudaMemcpyOperandTypeArray = cyruntime.cudaMemcpy3DOperandType.cudaMemcpyOperandTypeArray

3218 cudaMemcpyOperandTypeMax = cyruntime.cudaMemcpy3DOperandType.cudaMemcpyOperandTypeMax

3219

3220_dict_cudaMemcpy3DOperandType = dict(((int(v), v) for k, v in cudaMemcpy3DOperandType.__members__.items()))

3221

3222class cudaDeviceP2PAttr(IntEnum):

3223 """

3224 CUDA device P2P attributes

3225 """

3226

3227 #: A relative value indicating the performance of the link between two

3228 #: devices

3229 cudaDevP2PAttrPerformanceRank = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrPerformanceRank

3230

3231 #: Peer access is enabled

3232 cudaDevP2PAttrAccessSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrAccessSupported

3233

3234 #: Native atomic operation over the link supported

3235 cudaDevP2PAttrNativeAtomicSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrNativeAtomicSupported

3236

3237 #: Accessing CUDA arrays over the link supported

3238 cudaDevP2PAttrCudaArrayAccessSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrCudaArrayAccessSupported

3239

3240 #: Only some CUDA-valid atomic operations over the link are supported.

3241 cudaDevP2PAttrOnlyPartialNativeAtomicSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrOnlyPartialNativeAtomicSupported

3242

3243_dict_cudaDeviceP2PAttr = dict(((int(v), v) for k, v in cudaDeviceP2PAttr.__members__.items()))

3244

3245class cudaAtomicOperation(IntEnum):

3246 """

3247 CUDA-valid Atomic Operations

3248 """

3249 cudaAtomicOperationIntegerAdd = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerAdd

3250 cudaAtomicOperationIntegerMin = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerMin

3251 cudaAtomicOperationIntegerMax = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerMax

3252 cudaAtomicOperationIntegerIncrement = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerIncrement

3253 cudaAtomicOperationIntegerDecrement = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerDecrement

3254 cudaAtomicOperationAnd = cyruntime.cudaAtomicOperation.cudaAtomicOperationAnd

3255 cudaAtomicOperationOr = cyruntime.cudaAtomicOperation.cudaAtomicOperationOr

3256 cudaAtomicOperationXOR = cyruntime.cudaAtomicOperation.cudaAtomicOperationXOR

3257 cudaAtomicOperationExchange = cyruntime.cudaAtomicOperation.cudaAtomicOperationExchange

3258 cudaAtomicOperationCAS = cyruntime.cudaAtomicOperation.cudaAtomicOperationCAS

3259 cudaAtomicOperationFloatAdd = cyruntime.cudaAtomicOperation.cudaAtomicOperationFloatAdd

3260 cudaAtomicOperationFloatMin = cyruntime.cudaAtomicOperation.cudaAtomicOperationFloatMin

3261 cudaAtomicOperationFloatMax = cyruntime.cudaAtomicOperation.cudaAtomicOperationFloatMax

3262

3263_dict_cudaAtomicOperation = dict(((int(v), v) for k, v in cudaAtomicOperation.__members__.items()))

3264

3265class cudaAtomicOperationCapability(IntEnum):

3266 """

3267 CUDA-valid Atomic Operation capabilities

3268 """

3277_dict_cudaAtomicOperationCapability = dict(((int(v), v) for k, v in cudaAtomicOperationCapability.__members__.items()))

3278

3279class cudaExternalMemoryHandleType(IntEnum):

3280 """

3281 External memory handle types

3282 """

3283

3284 #: Handle is an opaque file descriptor

3285 cudaExternalMemoryHandleTypeOpaqueFd = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueFd

3286

3287 #: Handle is an opaque shared NT handle

3288 cudaExternalMemoryHandleTypeOpaqueWin32 = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32

3289

3290 #: Handle is an opaque, globally shared handle

3291 cudaExternalMemoryHandleTypeOpaqueWin32Kmt = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32Kmt

3292

3293 #: Handle is a D3D12 heap object

3294 cudaExternalMemoryHandleTypeD3D12Heap = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Heap

3295

3296 #: Handle is a D3D12 committed resource

3297 cudaExternalMemoryHandleTypeD3D12Resource = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Resource

3298

3299 #: Handle is a shared NT handle to a D3D11 resource

3300 cudaExternalMemoryHandleTypeD3D11Resource = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11Resource

3301

3302 #: Handle is a globally shared handle to a D3D11 resource

3303 cudaExternalMemoryHandleTypeD3D11ResourceKmt = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11ResourceKmt

3304

3305 #: Handle is an NvSciBuf object

3306 cudaExternalMemoryHandleTypeNvSciBuf = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeNvSciBuf

3307

3308_dict_cudaExternalMemoryHandleType = dict(((int(v), v) for k, v in cudaExternalMemoryHandleType.__members__.items()))

3309

3310class cudaExternalSemaphoreHandleType(IntEnum):

3311 """

3312 External semaphore handle types

3313 """

3314

3315 #: Handle is an opaque file descriptor

3316 cudaExternalSemaphoreHandleTypeOpaqueFd = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueFd

3317

3318 #: Handle is an opaque shared NT handle

3319 cudaExternalSemaphoreHandleTypeOpaqueWin32 = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32

3320

3321 #: Handle is an opaque, globally shared handle

3322 cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt

3323

3324 #: Handle is a shared NT handle referencing a D3D12 fence object

3325 cudaExternalSemaphoreHandleTypeD3D12Fence = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D12Fence

3326

3327 #: Handle is a shared NT handle referencing a D3D11 fence object

3328 cudaExternalSemaphoreHandleTypeD3D11Fence = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D11Fence

3329

3330 #: Opaque handle to NvSciSync Object

3331 cudaExternalSemaphoreHandleTypeNvSciSync = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeNvSciSync

3332

3333 #: Handle is a shared NT handle referencing a D3D11 keyed mutex object

3334 cudaExternalSemaphoreHandleTypeKeyedMutex = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutex

3335

3336 #: Handle is a shared KMT handle referencing a D3D11 keyed mutex object

3337 cudaExternalSemaphoreHandleTypeKeyedMutexKmt = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutexKmt

3338

3339 #: Handle is an opaque handle file descriptor referencing a timeline

3340 #: semaphore

3341 cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd

3342

3343 #: Handle is an opaque handle file descriptor referencing a timeline

3344 #: semaphore

3345 cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32

3346

3347_dict_cudaExternalSemaphoreHandleType = dict(((int(v), v) for k, v in cudaExternalSemaphoreHandleType.__members__.items()))

3348

3349class cudaDevSmResourceGroup_flags(IntEnum):

3350 """

3351

3352 """

3353 cudaDevSmResourceGroupDefault = cyruntime.cudaDevSmResourceGroup_flags.cudaDevSmResourceGroupDefault

3354 cudaDevSmResourceGroupBackfill = cyruntime.cudaDevSmResourceGroup_flags.cudaDevSmResourceGroupBackfill

3355

3356_dict_cudaDevSmResourceGroup_flags = dict(((int(v), v) for k, v in cudaDevSmResourceGroup_flags.__members__.items()))

3357

3358class cudaDevSmResourceSplitByCount_flags(IntEnum):

3359 """

3360

3361 """

3362 cudaDevSmResourceSplitIgnoreSmCoscheduling = cyruntime.cudaDevSmResourceSplitByCount_flags.cudaDevSmResourceSplitIgnoreSmCoscheduling

3363 cudaDevSmResourceSplitMaxPotentialClusterSize = cyruntime.cudaDevSmResourceSplitByCount_flags.cudaDevSmResourceSplitMaxPotentialClusterSize

3364

3365_dict_cudaDevSmResourceSplitByCount_flags = dict(((int(v), v) for k, v in cudaDevSmResourceSplitByCount_flags.__members__.items()))

3366

3367class cudaDevResourceType(IntEnum):

3368 """

3369 Type of resource

3370 """

3371 cudaDevResourceTypeInvalid = cyruntime.cudaDevResourceType.cudaDevResourceTypeInvalid

3372

3373 #: Streaming multiprocessors related information

3374 cudaDevResourceTypeSm = cyruntime.cudaDevResourceType.cudaDevResourceTypeSm

3375

3376 #: Workqueue configuration related information

3377 cudaDevResourceTypeWorkqueueConfig = cyruntime.cudaDevResourceType.cudaDevResourceTypeWorkqueueConfig

3378

3379 #: Pre-existing workqueue related information

3380 cudaDevResourceTypeWorkqueue = cyruntime.cudaDevResourceType.cudaDevResourceTypeWorkqueue

3381

3382_dict_cudaDevResourceType = dict(((int(v), v) for k, v in cudaDevResourceType.__members__.items()))

3383

3384class cudaDevWorkqueueConfigScope(IntEnum):

3385 """

3386 Sharing scope for workqueues

3387 """

3388

3389 #: Use all shared workqueue resources on the device. Default driver

3390 #: behaviour.

3391 cudaDevWorkqueueConfigScopeDeviceCtx = cyruntime.cudaDevWorkqueueConfigScope.cudaDevWorkqueueConfigScopeDeviceCtx

3392

3393 #: When possible, use non-overlapping workqueue resources with other

3394 #: balanced green contexts.

3395 cudaDevWorkqueueConfigScopeGreenCtxBalanced = cyruntime.cudaDevWorkqueueConfigScope.cudaDevWorkqueueConfigScopeGreenCtxBalanced

3396

3397_dict_cudaDevWorkqueueConfigScope = dict(((int(v), v) for k, v in cudaDevWorkqueueConfigScope.__members__.items()))

3398

3399class cudaJitOption(IntEnum):

3400 """

3401 Online compiler and linker options

3402 """

3403

3404 #: Max number of registers that a thread may use.

3405 #: Option type: unsigned int

3406 #: Applies to: compiler only

3407 cudaJitMaxRegisters = cyruntime.cudaJitOption.cudaJitMaxRegisters

3408

3409 #: IN: Specifies minimum number of threads per block to target

3410 #: compilation for

3411 #: OUT: Returns the number of threads the compiler actually targeted.

3412 #: This restricts the resource utilization of the compiler (e.g. max

3413 #: registers) such that a block with the given number of threads should

3414 #: be able to launch based on register limitations. Note, this option

3415 #: does not currently take into account any other resource limitations,

3416 #: such as shared memory utilization.

3417 #: Option type: unsigned int

3418 #: Applies to: compiler only

3419 cudaJitThreadsPerBlock = cyruntime.cudaJitOption.cudaJitThreadsPerBlock

3420

3421 #: Overwrites the option value with the total wall clock time, in

3422 #: milliseconds, spent in the compiler and linker

3423 #: Option type: float

3424 #: Applies to: compiler and linker

3425 cudaJitWallTime = cyruntime.cudaJitOption.cudaJitWallTime

3426

3427 #: Pointer to a buffer in which to print any log messages that are

3428 #: informational in nature (the buffer size is specified via option

3429 #: :py:obj:`~.cudaJitInfoLogBufferSizeBytes`)

3430 #: Option type: char *

3431 #: Applies to: compiler and linker

3432 cudaJitInfoLogBuffer = cyruntime.cudaJitOption.cudaJitInfoLogBuffer

3433

3434 #: IN: Log buffer size in bytes. Log messages will be capped at this

3435 #: size (including null terminator)

3436 #: OUT: Amount of log buffer filled with messages

3437 #: Option type: unsigned int

3438 #: Applies to: compiler and linker

3439 cudaJitInfoLogBufferSizeBytes = cyruntime.cudaJitOption.cudaJitInfoLogBufferSizeBytes

3440

3441 #: Pointer to a buffer in which to print any log messages that reflect

3442 #: errors (the buffer size is specified via option

3443 #: :py:obj:`~.cudaJitErrorLogBufferSizeBytes`)

3444 #: Option type: char *

3445 #: Applies to: compiler and linker

3446 cudaJitErrorLogBuffer = cyruntime.cudaJitOption.cudaJitErrorLogBuffer

3447

3448 #: IN: Log buffer size in bytes. Log messages will be capped at this

3449 #: size (including null terminator)

3450 #: OUT: Amount of log buffer filled with messages

3451 #: Option type: unsigned int

3452 #: Applies to: compiler and linker

3453 cudaJitErrorLogBufferSizeBytes = cyruntime.cudaJitOption.cudaJitErrorLogBufferSizeBytes

3454

3455 #: Level of optimizations to apply to generated code (0 - 4), with 4

3456 #: being the default and highest level of optimizations.

3457 #: Option type: unsigned int

3458 #: Applies to: compiler only

3459 cudaJitOptimizationLevel = cyruntime.cudaJitOption.cudaJitOptimizationLevel

3460

3461 #: Specifies choice of fallback strategy if matching cubin is not

3462 #: found. Choice is based on supplied :py:obj:`~.cudaJit_Fallback`.

3463 #: Option type: unsigned int for enumerated type

3464 #: :py:obj:`~.cudaJit_Fallback`

3465 #: Applies to: compiler only

3466 cudaJitFallbackStrategy = cyruntime.cudaJitOption.cudaJitFallbackStrategy

3467

3468 #: Specifies whether to create debug information in output (-g) (0:

3469 #: false, default)

3470 #: Option type: int

3471 #: Applies to: compiler and linker

3472 cudaJitGenerateDebugInfo = cyruntime.cudaJitOption.cudaJitGenerateDebugInfo

3473

3474 #: Generate verbose log messages (0: false, default)

3475 #: Option type: int

3476 #: Applies to: compiler and linker

3477 cudaJitLogVerbose = cyruntime.cudaJitOption.cudaJitLogVerbose

3478

3479 #: Generate line number information (-lineinfo) (0: false, default)

3480 #: Option type: int

3481 #: Applies to: compiler only

3482 cudaJitGenerateLineInfo = cyruntime.cudaJitOption.cudaJitGenerateLineInfo

3483

3484 #: Specifies whether to enable caching explicitly (-dlcm)

3485 #: Choice is based on supplied :py:obj:`~.cudaJit_CacheMode`.

3486 #: Option type: unsigned int for enumerated type

3487 #: :py:obj:`~.cudaJit_CacheMode`

3488 #: Applies to: compiler only

3489 cudaJitCacheMode = cyruntime.cudaJitOption.cudaJitCacheMode

3490

3491 #: Generate position independent code (0: false)

3492 #: Option type: int

3493 #: Applies to: compiler only

3494 cudaJitPositionIndependentCode = cyruntime.cudaJitOption.cudaJitPositionIndependentCode

3495

3496 #: This option hints to the JIT compiler the minimum number of CTAs

3497 #: from the kernel’s grid to be mapped to a SM. This option is ignored

3498 #: when used together with :py:obj:`~.cudaJitMaxRegisters` or

3499 #: :py:obj:`~.cudaJitThreadsPerBlock`. Optimizations based on this

3500 #: option need :py:obj:`~.cudaJitMaxThreadsPerBlock` to be specified as

3501 #: well. For kernels already using PTX directive .minnctapersm, this

3502 #: option will be ignored by default. Use

3503 #: :py:obj:`~.cudaJitOverrideDirectiveValues` to let this option take

3504 #: precedence over the PTX directive. Option type: unsigned int

3505 #: Applies to: compiler only

3506 cudaJitMinCtaPerSm = cyruntime.cudaJitOption.cudaJitMinCtaPerSm

3507

3508 #: Maximum number threads in a thread block, computed as the product of

3509 #: the maximum extent specifed for each dimension of the block. This

3510 #: limit is guaranteed not to be exeeded in any invocation of the

3511 #: kernel. Exceeding the the maximum number of threads results in

3512 #: runtime error or kernel launch failure. For kernels already using

3513 #: PTX directive .maxntid, this option will be ignored by default. Use

3514 #: :py:obj:`~.cudaJitOverrideDirectiveValues` to let this option take

3515 #: precedence over the PTX directive. Option type: int

3516 #: Applies to: compiler only

3517 cudaJitMaxThreadsPerBlock = cyruntime.cudaJitOption.cudaJitMaxThreadsPerBlock

3518

3519 #: This option lets the values specified using

3520 #: :py:obj:`~.cudaJitMaxRegisters`, :py:obj:`~.cudaJitThreadsPerBlock`,

3521 #: :py:obj:`~.cudaJitMaxThreadsPerBlock` and

3522 #: :py:obj:`~.cudaJitMinCtaPerSm` take precedence over any PTX

3523 #: directives. (0: Disable, default; 1: Enable) Option type: int

3524 #: Applies to: compiler only

3525 cudaJitOverrideDirectiveValues = cyruntime.cudaJitOption.cudaJitOverrideDirectiveValues

3526

3527_dict_cudaJitOption = dict(((int(v), v) for k, v in cudaJitOption.__members__.items()))

3528

3529class cudaLibraryOption(IntEnum):

3530 """

3531 Library options to be specified with

3532 :py:obj:`~.cudaLibraryLoadData()` or

3533 :py:obj:`~.cudaLibraryLoadFromFile()`

3534 """

3535 cudaLibraryHostUniversalFunctionAndDataTable = cyruntime.cudaLibraryOption.cudaLibraryHostUniversalFunctionAndDataTable

3536

3537 #: Specifes that the argument `code` passed to

3538 #: :py:obj:`~.cudaLibraryLoadData()` will be preserved. Specifying this

3539 #: option will let the driver know that `code` can be accessed at any

3540 #: point until :py:obj:`~.cudaLibraryUnload()`. The default behavior is

3541 #: for the driver to allocate and maintain its own copy of `code`. Note

3542 #: that this is only a memory usage optimization hint and the driver

3543 #: can choose to ignore it if required. Specifying this option with

3544 #: :py:obj:`~.cudaLibraryLoadFromFile()` is invalid and will return

3545 #: :py:obj:`~.cudaErrorInvalidValue`.

3546 cudaLibraryBinaryIsPreserved = cyruntime.cudaLibraryOption.cudaLibraryBinaryIsPreserved

3547

3548_dict_cudaLibraryOption = dict(((int(v), v) for k, v in cudaLibraryOption.__members__.items()))

3549

3550class cudaJit_CacheMode(IntEnum):

3551 """

3552 Caching modes for dlcm

3553 """

3554

3555 #: Compile with no -dlcm flag specified

3556 cudaJitCacheOptionNone = cyruntime.cudaJit_CacheMode.cudaJitCacheOptionNone

3557

3558 #: Compile with L1 cache disabled

3559 cudaJitCacheOptionCG = cyruntime.cudaJit_CacheMode.cudaJitCacheOptionCG

3560

3561 #: Compile with L1 cache enabled

3562 cudaJitCacheOptionCA = cyruntime.cudaJit_CacheMode.cudaJitCacheOptionCA

3563

3564_dict_cudaJit_CacheMode = dict(((int(v), v) for k, v in cudaJit_CacheMode.__members__.items()))

3565

3566class cudaJit_Fallback(IntEnum):

3567 """

3568 Cubin matching fallback strategies

3569 """

3570

3571 #: Prefer to compile ptx if exact binary match not found

3572 cudaPreferPtx = cyruntime.cudaJit_Fallback.cudaPreferPtx

3573

3574 #: Prefer to fall back to compatible binary code if exact match not

3575 #: found

3576 cudaPreferBinary = cyruntime.cudaJit_Fallback.cudaPreferBinary

3577

3578_dict_cudaJit_Fallback = dict(((int(v), v) for k, v in cudaJit_Fallback.__members__.items()))

3579

3580class cudaCGScope(IntEnum):

3581 """

3582 CUDA cooperative group scope

3583 """

3584

3585 #: Invalid cooperative group scope

3586 cudaCGScopeInvalid = cyruntime.cudaCGScope.cudaCGScopeInvalid

3587

3588 #: Scope represented by a grid_group

3589 cudaCGScopeGrid = cyruntime.cudaCGScope.cudaCGScopeGrid

3590

3591 #: Reserved

3592 cudaCGScopeReserved = cyruntime.cudaCGScope.cudaCGScopeReserved

3593

3594_dict_cudaCGScope = dict(((int(v), v) for k, v in cudaCGScope.__members__.items()))

3595

3596class cudaGraphConditionalHandleFlags(IntEnum):

3597 """

3598

3599 """

3600

3601 #: Apply default handle value when graph is launched.

3602 cudaGraphCondAssignDefault = cyruntime.cudaGraphConditionalHandleFlags.cudaGraphCondAssignDefault

3603

3604_dict_cudaGraphConditionalHandleFlags = dict(((int(v), v) for k, v in cudaGraphConditionalHandleFlags.__members__.items()))

3605

3606class cudaGraphConditionalNodeType(IntEnum):

3607 """

3608 CUDA conditional node types

3609 """

3610

3611 #: Conditional 'if/else' Node. Body[0] executed if condition is non-

3612 #: zero. If `size` == 2, an optional ELSE graph is created and this is

3613 #: executed if the condition is zero.

3614 cudaGraphCondTypeIf = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeIf

3615

3616 #: Conditional 'while' Node. Body executed repeatedly while condition

3617 #: value is non-zero.

3618 cudaGraphCondTypeWhile = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeWhile

3619

3620 #: Conditional 'switch' Node. Body[n] is executed once, where 'n' is

3621 #: the value of the condition. If the condition does not match a body

3622 #: index, no body is launched.

3623 cudaGraphCondTypeSwitch = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeSwitch

3624

3625_dict_cudaGraphConditionalNodeType = dict(((int(v), v) for k, v in cudaGraphConditionalNodeType.__members__.items()))

3626

3627class cudaGraphNodeType(IntEnum):

3628 """

3629 CUDA Graph node types

3630 """

3631

3632 #: GPU kernel node

3633 cudaGraphNodeTypeKernel = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeKernel

3634

3635 #: Memcpy node

3636 cudaGraphNodeTypeMemcpy = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemcpy

3637

3638 #: Memset node

3639 cudaGraphNodeTypeMemset = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemset

3640

3641 #: Host (executable) node

3642 cudaGraphNodeTypeHost = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeHost

3643

3644 #: Node which executes an embedded graph

3645 cudaGraphNodeTypeGraph = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeGraph

3646

3647 #: Empty (no-op) node

3648 cudaGraphNodeTypeEmpty = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeEmpty

3649

3650 #: External event wait node

3651 cudaGraphNodeTypeWaitEvent = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeWaitEvent

3652

3653 #: External event record node

3654 cudaGraphNodeTypeEventRecord = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeEventRecord

3655

3656 #: External semaphore signal node

3657 cudaGraphNodeTypeExtSemaphoreSignal = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreSignal

3658

3659 #: External semaphore wait node

3660 cudaGraphNodeTypeExtSemaphoreWait = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreWait

3661

3662 #: Memory allocation node

3663 cudaGraphNodeTypeMemAlloc = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemAlloc

3664

3665 #: Memory free node

3666 cudaGraphNodeTypeMemFree = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemFree

3667

3668 #: Conditional node May be used to

3669 #: implement a conditional execution path or loop

3670 #: inside of a graph. The graph(s)

3671 #: contained within the body of the conditional node

3672 #: can be selectively executed or

3673 #: iterated upon based on the value of a conditional

3674 #: variable.

3675 #:

3676 #: Handles must be created in

3677 #: advance of creating the node

3678 #: using

3679 #: :py:obj:`~.cudaGraphConditionalHandleCreate`.

3680 #:

3681 #: The following restrictions apply

3682 #: to graphs which contain conditional nodes:

3683 #: The graph cannot be used in a

3684 #: child node.

3685 #: Only one instantiation of the

3686 #: graph may exist at any point in time.

3687 #: The graph cannot be cloned.

3688 #:

3689 #: To set the control value, supply

3690 #: a default value when creating the handle and/or

3691 #: call

3692 #: :py:obj:`~.cudaGraphSetConditional` from device code.

3693 cudaGraphNodeTypeConditional = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeConditional

3694 cudaGraphNodeTypeCount = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeCount

3695

3696_dict_cudaGraphNodeType = dict(((int(v), v) for k, v in cudaGraphNodeType.__members__.items()))

3697

3698class cudaGraphChildGraphNodeOwnership(IntEnum):

3699 """

3700 Child graph node ownership

3701 """

3702

3703 #: Default behavior for a child graph node. Child graph is cloned into

3704 #: the parent and memory allocation/free nodes can't be present in the

3705 #: child graph.

3706 cudaGraphChildGraphOwnershipClone = cyruntime.cudaGraphChildGraphNodeOwnership.cudaGraphChildGraphOwnershipClone

3707

3708 #: The child graph is moved to the parent. The handle to the child

3709 #: graph is owned by the parent and will be destroyed when the parent

3710 #: is destroyed.

3711 #:

3712 #: The following restrictions apply to child graphs after they have

3713 #: been moved: Cannot be independently instantiated or destroyed;

3714 #: Cannot be added as a child graph of a separate parent graph; Cannot

3715 #: be used as an argument to cudaGraphExecUpdate; Cannot have

3716 #: additional memory allocation or free nodes added.

3717 cudaGraphChildGraphOwnershipMove = cyruntime.cudaGraphChildGraphNodeOwnership.cudaGraphChildGraphOwnershipMove

3718

3719_dict_cudaGraphChildGraphNodeOwnership = dict(((int(v), v) for k, v in cudaGraphChildGraphNodeOwnership.__members__.items()))

3720

3721class cudaGraphExecUpdateResult(IntEnum):

3722 """

3723 CUDA Graph Update error types

3724 """

3725

3726 #: The update succeeded

3727 cudaGraphExecUpdateSuccess = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateSuccess

3728

3729 #: The update failed for an unexpected reason which is described in the

3730 #: return value of the function

3731 cudaGraphExecUpdateError = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateError

3732

3733 #: The update failed because the topology changed

3734 cudaGraphExecUpdateErrorTopologyChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorTopologyChanged

3735

3736 #: The update failed because a node type changed

3737 cudaGraphExecUpdateErrorNodeTypeChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNodeTypeChanged

3738

3739 #: The update failed because the function of a kernel node changed

3740 #: (CUDA driver < 11.2)

3741 cudaGraphExecUpdateErrorFunctionChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorFunctionChanged

3742

3743 #: The update failed because the parameters changed in a way that is

3744 #: not supported

3745 cudaGraphExecUpdateErrorParametersChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorParametersChanged

3746

3747 #: The update failed because something about the node is not supported

3748 cudaGraphExecUpdateErrorNotSupported = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNotSupported

3749

3750 #: The update failed because the function of a kernel node changed in

3751 #: an unsupported way

3752 cudaGraphExecUpdateErrorUnsupportedFunctionChange = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorUnsupportedFunctionChange

3753

3754 #: The update failed because the node attributes changed in a way that

3755 #: is not supported

3756 cudaGraphExecUpdateErrorAttributesChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorAttributesChanged

3757

3758_dict_cudaGraphExecUpdateResult = dict(((int(v), v) for k, v in cudaGraphExecUpdateResult.__members__.items()))

3759

3760class cudaGraphKernelNodeField(IntEnum):

3761 """

3762 Specifies the field to update when performing multiple node updates

3763 from the device

3764 """

3765

3766 #: Invalid field

3767 cudaGraphKernelNodeFieldInvalid = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldInvalid

3768

3769 #: Grid dimension update

3770 cudaGraphKernelNodeFieldGridDim = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldGridDim

3771

3772 #: Kernel parameter update

3773 cudaGraphKernelNodeFieldParam = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldParam

3774

3775 #: Node enable/disable

3776 cudaGraphKernelNodeFieldEnabled = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldEnabled

3777

3778_dict_cudaGraphKernelNodeField = dict(((int(v), v) for k, v in cudaGraphKernelNodeField.__members__.items()))

3779

3780class cudaGetDriverEntryPointFlags(IntEnum):

3781 """

3782 Flags to specify search options to be used with

3783 :py:obj:`~.cudaGetDriverEntryPoint` For more details see

3784 :py:obj:`~.cuGetProcAddress`

3785 """

3786

3787 #: Default search mode for driver symbols.

3788 cudaEnableDefault = cyruntime.cudaGetDriverEntryPointFlags.cudaEnableDefault

3789

3790 #: Search for legacy versions of driver symbols.

3791 cudaEnableLegacyStream = cyruntime.cudaGetDriverEntryPointFlags.cudaEnableLegacyStream

3792

3793 #: Search for per-thread versions of driver symbols.

3794 cudaEnablePerThreadDefaultStream = cyruntime.cudaGetDriverEntryPointFlags.cudaEnablePerThreadDefaultStream

3795

3796_dict_cudaGetDriverEntryPointFlags = dict(((int(v), v) for k, v in cudaGetDriverEntryPointFlags.__members__.items()))

3797

3798class cudaDriverEntryPointQueryResult(IntEnum):

3799 """

3800 Enum for status from obtaining driver entry points, used with

3801 :py:obj:`~.cudaApiGetDriverEntryPoint`

3802 """

3803

3804 #: Search for symbol found a match

3805 cudaDriverEntryPointSuccess = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSuccess

3806

3807 #: Search for symbol was not found

3808 cudaDriverEntryPointSymbolNotFound = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSymbolNotFound

3809

3810 #: Search for symbol was found but version wasn't great enough

3811 cudaDriverEntryPointVersionNotSufficent = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointVersionNotSufficent

3812

3813_dict_cudaDriverEntryPointQueryResult = dict(((int(v), v) for k, v in cudaDriverEntryPointQueryResult.__members__.items()))

3814

3815class cudaGraphDebugDotFlags(IntEnum):

3816 """

3817 CUDA Graph debug write options

3818 """

3819

3820 #: Output all debug data as if every debug flag is enabled

3821 cudaGraphDebugDotFlagsVerbose = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsVerbose

3822

3823 #: Adds :py:obj:`~.cudaKernelNodeParams` to output

3824 cudaGraphDebugDotFlagsKernelNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeParams

3825

3826 #: Adds :py:obj:`~.cudaMemcpy3DParms` to output

3827 cudaGraphDebugDotFlagsMemcpyNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemcpyNodeParams

3828

3829 #: Adds :py:obj:`~.cudaMemsetParams` to output

3830 cudaGraphDebugDotFlagsMemsetNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemsetNodeParams

3831

3832 #: Adds :py:obj:`~.cudaHostNodeParams` to output

3833 cudaGraphDebugDotFlagsHostNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHostNodeParams

3834

3835 #: Adds cudaEvent_t handle from record and wait nodes to output

3836 cudaGraphDebugDotFlagsEventNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsEventNodeParams

3837

3838 #: Adds :py:obj:`~.cudaExternalSemaphoreSignalNodeParams` values to

3839 #: output

3840 cudaGraphDebugDotFlagsExtSemasSignalNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasSignalNodeParams

3841

3842 #: Adds :py:obj:`~.cudaExternalSemaphoreWaitNodeParams` to output

3843 cudaGraphDebugDotFlagsExtSemasWaitNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasWaitNodeParams

3844

3845 #: Adds cudaKernelNodeAttrID values to output

3846 cudaGraphDebugDotFlagsKernelNodeAttributes = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeAttributes

3847

3848 #: Adds node handles and every kernel function handle to output

3849 cudaGraphDebugDotFlagsHandles = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHandles

3850

3851 #: Adds :py:obj:`~.cudaConditionalNodeParams` to output

3852 cudaGraphDebugDotFlagsConditionalNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsConditionalNodeParams

3853

3854_dict_cudaGraphDebugDotFlags = dict(((int(v), v) for k, v in cudaGraphDebugDotFlags.__members__.items()))

3855

3856class cudaGraphInstantiateFlags(IntEnum):

3857 """

3858 Flags for instantiating a graph

3859 """

3860

3861 #: Automatically free memory allocated in a graph before relaunching.

3862 cudaGraphInstantiateFlagAutoFreeOnLaunch = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagAutoFreeOnLaunch

3863

3864 #: Automatically upload the graph after instantiation. Only supported

3865 #: by

3866 #: :py:obj:`~.cudaGraphInstantiateWithParams`. The upload will be

3867 #: performed using the

3868 #: stream provided in `instantiateParams`.

3869 cudaGraphInstantiateFlagUpload = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUpload

3870

3871 #: Instantiate the graph to be launchable from the device. This flag

3872 #: can only

3873 #: be used on platforms which support unified addressing. This flag

3874 #: cannot be

3875 #: used in conjunction with cudaGraphInstantiateFlagAutoFreeOnLaunch.

3876 cudaGraphInstantiateFlagDeviceLaunch = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagDeviceLaunch

3877

3878 #: Run the graph using the per-node priority attributes rather than the

3879 #: priority of the stream it is launched into.

3880 cudaGraphInstantiateFlagUseNodePriority = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUseNodePriority

3881

3882_dict_cudaGraphInstantiateFlags = dict(((int(v), v) for k, v in cudaGraphInstantiateFlags.__members__.items()))

3883

3884class cudaDeviceNumaConfig(IntEnum):

3885 """

3886 CUDA device NUMA config

3887 """

3888

3889 #: The GPU is not a NUMA node

3890 cudaDeviceNumaConfigNone = cyruntime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNone

3891

3892 #: The GPU is a NUMA node, cudaDevAttrNumaId contains its NUMA ID

3893 cudaDeviceNumaConfigNumaNode = cyruntime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNumaNode

3894

3895_dict_cudaDeviceNumaConfig = dict(((int(v), v) for k, v in cudaDeviceNumaConfig.__members__.items()))

3896

3897class cudaSurfaceBoundaryMode(IntEnum):

3898 """

3899 CUDA Surface boundary modes

3900 """

3901

3902 #: Zero boundary mode

3903 cudaBoundaryModeZero = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeZero

3904

3905 #: Clamp boundary mode

3906 cudaBoundaryModeClamp = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeClamp

3907

3908 #: Trap boundary mode

3909 cudaBoundaryModeTrap = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeTrap

3910

3911_dict_cudaSurfaceBoundaryMode = dict(((int(v), v) for k, v in cudaSurfaceBoundaryMode.__members__.items()))

3912

3913class cudaSurfaceFormatMode(IntEnum):

3914 """

3915 CUDA Surface format modes

3916 """

3917

3918 #: Forced format mode

3919 cudaFormatModeForced = cyruntime.cudaSurfaceFormatMode.cudaFormatModeForced

3920

3921 #: Auto format mode

3922 cudaFormatModeAuto = cyruntime.cudaSurfaceFormatMode.cudaFormatModeAuto

3923

3924_dict_cudaSurfaceFormatMode = dict(((int(v), v) for k, v in cudaSurfaceFormatMode.__members__.items()))

3925

3926class cudaTextureAddressMode(IntEnum):

3927 """

3928 CUDA texture address modes

3929 """

3930

3931 #: Wrapping address mode

3932 cudaAddressModeWrap = cyruntime.cudaTextureAddressMode.cudaAddressModeWrap

3933

3934 #: Clamp to edge address mode

3935 cudaAddressModeClamp = cyruntime.cudaTextureAddressMode.cudaAddressModeClamp

3936

3937 #: Mirror address mode

3938 cudaAddressModeMirror = cyruntime.cudaTextureAddressMode.cudaAddressModeMirror

3939

3940 #: Border address mode

3941 cudaAddressModeBorder = cyruntime.cudaTextureAddressMode.cudaAddressModeBorder

3942

3943_dict_cudaTextureAddressMode = dict(((int(v), v) for k, v in cudaTextureAddressMode.__members__.items()))

3944

3945class cudaTextureFilterMode(IntEnum):

3946 """

3947 CUDA texture filter modes

3948 """

3949

3950 #: Point filter mode

3951 cudaFilterModePoint = cyruntime.cudaTextureFilterMode.cudaFilterModePoint

3952

3953 #: Linear filter mode

3954 cudaFilterModeLinear = cyruntime.cudaTextureFilterMode.cudaFilterModeLinear

3955

3956_dict_cudaTextureFilterMode = dict(((int(v), v) for k, v in cudaTextureFilterMode.__members__.items()))

3957

3958class cudaTextureReadMode(IntEnum):

3959 """

3960 CUDA texture read modes

3961 """

3962

3963 #: Read texture as specified element type

3964 cudaReadModeElementType = cyruntime.cudaTextureReadMode.cudaReadModeElementType

3965

3966 #: Read texture as normalized float

3967 cudaReadModeNormalizedFloat = cyruntime.cudaTextureReadMode.cudaReadModeNormalizedFloat

3968

3969_dict_cudaTextureReadMode = dict(((int(v), v) for k, v in cudaTextureReadMode.__members__.items()))

3970

3971class cudaRoundMode(IntEnum):

3972 """"""

3973 cudaRoundNearest = cyruntime.cudaRoundMode.cudaRoundNearest

3974 cudaRoundZero = cyruntime.cudaRoundMode.cudaRoundZero

3975 cudaRoundPosInf = cyruntime.cudaRoundMode.cudaRoundPosInf

3976 cudaRoundMinInf = cyruntime.cudaRoundMode.cudaRoundMinInf

3977

3978_dict_cudaRoundMode = dict(((int(v), v) for k, v in cudaRoundMode.__members__.items()))

3979

3980class cudaGLDeviceList(IntEnum):

3981 """

3982 CUDA devices corresponding to the current OpenGL context

3983 """

3984

3985 #: The CUDA devices for all GPUs used by the current OpenGL context

3986 cudaGLDeviceListAll = cyruntime.cudaGLDeviceList.cudaGLDeviceListAll

3987

3988 #: The CUDA devices for the GPUs used by the current OpenGL context in

3989 #: its currently rendering frame

3990 cudaGLDeviceListCurrentFrame = cyruntime.cudaGLDeviceList.cudaGLDeviceListCurrentFrame

3991

3992 #: The CUDA devices for the GPUs to be used by the current OpenGL

3993 #: context in the next frame

3994 cudaGLDeviceListNextFrame = cyruntime.cudaGLDeviceList.cudaGLDeviceListNextFrame

3995

3996_dict_cudaGLDeviceList = dict(((int(v), v) for k, v in cudaGLDeviceList.__members__.items()))

3997

3998class cudaGLMapFlags(IntEnum):

3999 """

4000 CUDA GL Map Flags

4001 """

4002

4003 #: Default; Assume resource can be read/written

4004 cudaGLMapFlagsNone = cyruntime.cudaGLMapFlags.cudaGLMapFlagsNone

4005

4006 #: CUDA kernels will not write to this resource

4007 cudaGLMapFlagsReadOnly = cyruntime.cudaGLMapFlags.cudaGLMapFlagsReadOnly

4008

4009 #: CUDA kernels will only write to and will not read from this resource

4010 cudaGLMapFlagsWriteDiscard = cyruntime.cudaGLMapFlags.cudaGLMapFlagsWriteDiscard

4011

4012_dict_cudaGLMapFlags = dict(((int(v), v) for k, v in cudaGLMapFlags.__members__.items()))

4013

4014class cudaStreamAttrID(IntEnum):

4015 """

4016 Launch attributes enum; used as id field of

4017 :py:obj:`~.cudaLaunchAttribute`

4018 """

4019

4020 #: Ignored entry, for convenient composition

4021 cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore

4022

4023 #: Valid for streams, graph nodes, launches. See

4024 #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.

4025 cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow

4026

4027 #: Valid for graph nodes, launches. See

4028 #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.

4029 cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative

4030

4031 #: Valid for streams. See

4032 #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.

4033 cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy

4034

4035 #: Valid for graph nodes, launches. See

4036 #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.

4037 cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension

4038

4039 #: Valid for graph nodes, launches. See

4040 #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.

4041 cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference

4042

4043 #: Valid for launches. Setting

4044 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`

4045 #: to non-0 signals that the kernel will use programmatic means to

4046 #: resolve its stream dependency, so that the CUDA runtime should

4047 #: opportunistically allow the grid's execution to overlap with the

4048 #: previous kernel in the stream, if that kernel requests the overlap.

4049 #: The dependent launches can choose to wait on the dependency using

4050 #: the programmatic sync (cudaGridDependencySynchronize() or equivalent

4051 #: PTX instructions).

4052 cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization

4053

4054 #: Valid for launches. Set

4055 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the

4056 #: event. Event recorded through this launch attribute is guaranteed to

4057 #: only trigger after all block in the associated kernel trigger the

4058 #: event. A block can trigger the event programmatically in a future

4059 #: CUDA release. A trigger can also be inserted at the beginning of

4060 #: each block's execution if triggerAtBlockStart is set to non-0. The

4061 #: dependent launches can choose to wait on the dependency using the

4062 #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX

4063 #: instructions). Note that dependents (including the CPU thread

4064 #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to

4065 #: observe the release precisely when it is released. For example,

4066 #: :py:obj:`~.cudaEventSynchronize()` may only observe the event

4067 #: trigger long after the associated kernel has completed. This

4068 #: recording type is primarily meant for establishing programmatic

4069 #: dependency between device tasks. Note also this type of dependency

4070 #: allows, but does not guarantee, concurrent execution of tasks.

4071 #: The event supplied must not be an interprocess or interop event.

4072 #: The event must disable timing (i.e. must be created with the

4073 #: :py:obj:`~.cudaEventDisableTiming` flag set).

4074 cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent

4075

4076 #: Valid for streams, graph nodes, launches. See

4077 #: :py:obj:`~.cudaLaunchAttributeValue.priority`.

4078 cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority

4079

4080 #: Valid for streams, graph nodes, launches. See

4081 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.

4082 cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap

4083

4084 #: Valid for streams, graph nodes, launches. See

4085 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.

4086 cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain

4087

4088 #: Valid for graph nodes and launches. Set

4089 #: :py:obj:`~.cudaLaunchAttributeValue.preferredClusterDim` to allow

4090 #: the kernel launch to specify a preferred substitute cluster

4091 #: dimension. Blocks may be grouped according to either the dimensions

4092 #: specified with this attribute (grouped into a "preferred substitute

4093 #: cluster"), or the one specified with

4094 #: :py:obj:`~.cudaLaunchAttributeClusterDimension` attribute (grouped

4095 #: into a "regular cluster"). The cluster dimensions of a "preferred

4096 #: substitute cluster" shall be an integer multiple greater than zero

4097 #: of the regular cluster dimensions. The device will attempt - on a

4098 #: best-effort basis - to group thread blocks into preferred clusters

4099 #: over grouping them into regular clusters. When it deems necessary

4100 #: (primarily when the device temporarily runs out of physical

4101 #: resources to launch the larger preferred clusters), the device may

4102 #: switch to launch the regular clusters instead to attempt to utilize

4103 #: as much of the physical device resources as possible.

4104 #: Each type of cluster will have its enumeration / coordinate setup

4105 #: as if the grid consists solely of its type of cluster. For example,

4106 #: if the preferred substitute cluster dimensions double the regular

4107 #: cluster dimensions, there might be simultaneously a regular cluster

4108 #: indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In

4109 #: this example, the preferred substitute cluster (1,0,0) replaces

4110 #: regular clusters (2,0,0) and (3,0,0) and groups their blocks.

4111 #: This attribute will only take effect when a regular cluster

4112 #: dimension has been specified. The preferred substitute cluster

4113 #: dimension must be an integer multiple greater than zero of the

4114 #: regular cluster dimension and must divide the grid. It must also be

4115 #: no more than `maxBlocksPerCluster`, if it is set in the kernel's

4116 #: `__launch_bounds__`. Otherwise it must be less than the maximum

4117 #: value the driver can support. Otherwise, setting this attribute to a

4118 #: value physically unable to fit on any particular device is

4119 #: permitted.

4120 cudaLaunchAttributePreferredClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredClusterDimension

4121

4122 #: Valid for launches. Set

4123 #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record

4124 #: the event.

4125 #: Nominally, the event is triggered once all blocks of the kernel

4126 #: have begun execution. Currently this is a best effort. If a kernel B

4127 #: has a launch completion dependency on a kernel A, B may wait until A

4128 #: is complete. Alternatively, blocks of B may begin before all blocks

4129 #: of A have begun, for example if B can claim execution resources

4130 #: unavailable to A (e.g. they run on different GPUs) or if B is a

4131 #: higher priority than A. Exercise caution if such an ordering

4132 #: inversion could lead to deadlock.

4133 #: A launch completion event is nominally similar to a programmatic

4134 #: event with `triggerAtBlockStart` set except that it is not visible

4135 #: to `cudaGridDependencySynchronize()` and can be used with compute

4136 #: capability less than 9.0.

4137 #: The event supplied must not be an interprocess or interop event.

4138 #: The event must disable timing (i.e. must be created with the

4139 #: :py:obj:`~.cudaEventDisableTiming` flag set).

4140 cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent

4141

4142 #: Valid for graph nodes, launches. This attribute is graphs-only, and

4143 #: passing it to a launch in a non-capturing stream will result in an

4144 #: error.

4145 #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable

4146 #: can only be set to 0 or 1. Setting the field to 1 indicates that the

4147 #: corresponding kernel node should be device-updatable. On success, a

4148 #: handle will be returned via

4149 #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode

4150 #: which can be passed to the various device-side update functions to

4151 #: update the node's kernel parameters from within another kernel. For

4152 #: more information on the types of device updates that can be made, as

4153 #: well as the relevant limitations thereof, see

4154 #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.

4155 #: Nodes which are device-updatable have additional restrictions

4156 #: compared to regular kernel nodes. Firstly, device-updatable nodes

4157 #: cannot be removed from their graph via

4158 #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to

4159 #: this functionality, a node cannot opt out, and any attempt to set

4160 #: the deviceUpdatable attribute to 0 will result in an error. Device-

4161 #: updatable kernel nodes also cannot have their attributes copied

4162 #: to/from another kernel node via

4163 #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one

4164 #: or more device-updatable nodes also do not allow multiple

4165 #: instantiation, and neither the graph nor its instantiated version

4166 #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.

4167 #: If a graph contains device-updatable nodes and updates those nodes

4168 #: from the device from within the graph, the graph must be uploaded

4169 #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a

4170 #: graph, if host-side executable graph updates are made to the device-

4171 #: updatable nodes, the graph must be uploaded before it is launched

4172 #: again.

4173 cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode

4174

4175 #: Valid for launches. On devices where the L1 cache and shared memory

4176 #: use the same hardware resources, setting

4177 #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a

4178 #: percentage between 0-100 signals sets the shared memory carveout

4179 #: preference in percent of the total shared memory for that kernel

4180 #: launch. This attribute takes precedence over

4181 #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is

4182 #: only a hint, and the driver can choose a different configuration if

4183 #: required for the launch.

4184 cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout

4185

4186 #: Valid for streams, graph nodes, launches. This attribute is a hint

4187 #: to the CUDA runtime that the launch should attempt to make the

4188 #: kernel maximize its NVLINK utilization.

4189 #:

4190 #: When possible to honor this hint, CUDA will assume each block in

4191 #: the grid launch will carry out an even amount of NVLINK traffic, and

4192 #: make a best-effort attempt to adjust the kernel launch based on that

4193 #: assumption.

4194 #: This attribute is a hint only. CUDA makes no functional or

4195 #: performance guarantee. Its applicability can be affected by many

4196 #: different factors, including driver version (i.e. CUDA doesn't

4197 #: guarantee the performance characteristics will be maintained between

4198 #: driver versions or a driver update could alter or regress previously

4199 #: observed perf characteristics.) It also doesn't guarantee a

4200 #: successful result, i.e. applying the attribute may not improve the

4201 #: performance of either the targeted kernel or the encapsulating

4202 #: application.

4203 #: Valid values for

4204 #: :py:obj:`~.cudaLaunchAttributeValue.nvlinkUtilCentricScheduling` are

4205 #: 0 (disabled) and 1 (enabled).

4206 cudaLaunchAttributeNvlinkUtilCentricScheduling = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeNvlinkUtilCentricScheduling

4207

4208_dict_cudaLaunchAttributeID = dict(((int(v), v) for k, v in cudaLaunchAttributeID.__members__.items()))

4209

4210class cudaKernelNodeAttrID(IntEnum):

4211 """

4212 Launch attributes enum; used as id field of

4213 :py:obj:`~.cudaLaunchAttribute`

4214 """

4215

4216 #: Ignored entry, for convenient composition

4217 cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore

4218

4219 #: Valid for streams, graph nodes, launches. See

4220 #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.

4221 cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow

4222

4223 #: Valid for graph nodes, launches. See

4224 #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.

4225 cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative

4226

4227 #: Valid for streams. See

4228 #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.

4229 cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy

4230

4231 #: Valid for graph nodes, launches. See

4232 #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.

4233 cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension

4234

4235 #: Valid for graph nodes, launches. See

4236 #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.

4237 cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference

4238

4239 #: Valid for launches. Setting

4240 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`

4241 #: to non-0 signals that the kernel will use programmatic means to

4242 #: resolve its stream dependency, so that the CUDA runtime should

4243 #: opportunistically allow the grid's execution to overlap with the

4244 #: previous kernel in the stream, if that kernel requests the overlap.

4245 #: The dependent launches can choose to wait on the dependency using

4246 #: the programmatic sync (cudaGridDependencySynchronize() or equivalent

4247 #: PTX instructions).

4248 cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization

4249

4250 #: Valid for launches. Set

4251 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the

4252 #: event. Event recorded through this launch attribute is guaranteed to

4253 #: only trigger after all block in the associated kernel trigger the

4254 #: event. A block can trigger the event programmatically in a future

4255 #: CUDA release. A trigger can also be inserted at the beginning of

4256 #: each block's execution if triggerAtBlockStart is set to non-0. The

4257 #: dependent launches can choose to wait on the dependency using the

4258 #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX

4259 #: instructions). Note that dependents (including the CPU thread

4260 #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to

4261 #: observe the release precisely when it is released. For example,

4262 #: :py:obj:`~.cudaEventSynchronize()` may only observe the event

4263 #: trigger long after the associated kernel has completed. This

4264 #: recording type is primarily meant for establishing programmatic

4265 #: dependency between device tasks. Note also this type of dependency

4266 #: allows, but does not guarantee, concurrent execution of tasks.

4267 #: The event supplied must not be an interprocess or interop event.

4268 #: The event must disable timing (i.e. must be created with the

4269 #: :py:obj:`~.cudaEventDisableTiming` flag set).

4270 cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent

4271

4272 #: Valid for streams, graph nodes, launches. See

4273 #: :py:obj:`~.cudaLaunchAttributeValue.priority`.

4274 cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority

4275

4276 #: Valid for streams, graph nodes, launches. See

4277 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.

4278 cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap

4279

4280 #: Valid for streams, graph nodes, launches. See

4281 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.

4282 cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain

4283

4284 #: Valid for graph nodes and launches. Set

4285 #: :py:obj:`~.cudaLaunchAttributeValue.preferredClusterDim` to allow

4286 #: the kernel launch to specify a preferred substitute cluster

4287 #: dimension. Blocks may be grouped according to either the dimensions

4288 #: specified with this attribute (grouped into a "preferred substitute

4289 #: cluster"), or the one specified with

4290 #: :py:obj:`~.cudaLaunchAttributeClusterDimension` attribute (grouped

4291 #: into a "regular cluster"). The cluster dimensions of a "preferred

4292 #: substitute cluster" shall be an integer multiple greater than zero

4293 #: of the regular cluster dimensions. The device will attempt - on a

4294 #: best-effort basis - to group thread blocks into preferred clusters

4295 #: over grouping them into regular clusters. When it deems necessary

4296 #: (primarily when the device temporarily runs out of physical

4297 #: resources to launch the larger preferred clusters), the device may

4298 #: switch to launch the regular clusters instead to attempt to utilize

4299 #: as much of the physical device resources as possible.

4300 #: Each type of cluster will have its enumeration / coordinate setup

4301 #: as if the grid consists solely of its type of cluster. For example,

4302 #: if the preferred substitute cluster dimensions double the regular

4303 #: cluster dimensions, there might be simultaneously a regular cluster

4304 #: indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In

4305 #: this example, the preferred substitute cluster (1,0,0) replaces

4306 #: regular clusters (2,0,0) and (3,0,0) and groups their blocks.

4307 #: This attribute will only take effect when a regular cluster

4308 #: dimension has been specified. The preferred substitute cluster

4309 #: dimension must be an integer multiple greater than zero of the

4310 #: regular cluster dimension and must divide the grid. It must also be

4311 #: no more than `maxBlocksPerCluster`, if it is set in the kernel's

4312 #: `__launch_bounds__`. Otherwise it must be less than the maximum

4313 #: value the driver can support. Otherwise, setting this attribute to a

4314 #: value physically unable to fit on any particular device is

4315 #: permitted.

4316 cudaLaunchAttributePreferredClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredClusterDimension

4317

4318 #: Valid for launches. Set

4319 #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record

4320 #: the event.

4321 #: Nominally, the event is triggered once all blocks of the kernel

4322 #: have begun execution. Currently this is a best effort. If a kernel B

4323 #: has a launch completion dependency on a kernel A, B may wait until A

4324 #: is complete. Alternatively, blocks of B may begin before all blocks

4325 #: of A have begun, for example if B can claim execution resources

4326 #: unavailable to A (e.g. they run on different GPUs) or if B is a

4327 #: higher priority than A. Exercise caution if such an ordering

4328 #: inversion could lead to deadlock.

4329 #: A launch completion event is nominally similar to a programmatic

4330 #: event with `triggerAtBlockStart` set except that it is not visible

4331 #: to `cudaGridDependencySynchronize()` and can be used with compute

4332 #: capability less than 9.0.

4333 #: The event supplied must not be an interprocess or interop event.

4334 #: The event must disable timing (i.e. must be created with the

4335 #: :py:obj:`~.cudaEventDisableTiming` flag set).

4336 cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent

4337

4338 #: Valid for graph nodes, launches. This attribute is graphs-only, and

4339 #: passing it to a launch in a non-capturing stream will result in an

4340 #: error.

4341 #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable

4342 #: can only be set to 0 or 1. Setting the field to 1 indicates that the

4343 #: corresponding kernel node should be device-updatable. On success, a

4344 #: handle will be returned via

4345 #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode

4346 #: which can be passed to the various device-side update functions to

4347 #: update the node's kernel parameters from within another kernel. For

4348 #: more information on the types of device updates that can be made, as

4349 #: well as the relevant limitations thereof, see

4350 #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.

4351 #: Nodes which are device-updatable have additional restrictions

4352 #: compared to regular kernel nodes. Firstly, device-updatable nodes

4353 #: cannot be removed from their graph via

4354 #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to

4355 #: this functionality, a node cannot opt out, and any attempt to set

4356 #: the deviceUpdatable attribute to 0 will result in an error. Device-

4357 #: updatable kernel nodes also cannot have their attributes copied

4358 #: to/from another kernel node via

4359 #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one

4360 #: or more device-updatable nodes also do not allow multiple

4361 #: instantiation, and neither the graph nor its instantiated version

4362 #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.

4363 #: If a graph contains device-updatable nodes and updates those nodes

4364 #: from the device from within the graph, the graph must be uploaded

4365 #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a

4366 #: graph, if host-side executable graph updates are made to the device-

4367 #: updatable nodes, the graph must be uploaded before it is launched

4368 #: again.

4369 cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode

4370

4371 #: Valid for launches. On devices where the L1 cache and shared memory

4372 #: use the same hardware resources, setting

4373 #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a

4374 #: percentage between 0-100 signals sets the shared memory carveout

4375 #: preference in percent of the total shared memory for that kernel

4376 #: launch. This attribute takes precedence over

4377 #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is

4378 #: only a hint, and the driver can choose a different configuration if

4379 #: required for the launch.

4380 cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout

4381

4382 #: Valid for streams, graph nodes, launches. This attribute is a hint

4383 #: to the CUDA runtime that the launch should attempt to make the

4384 #: kernel maximize its NVLINK utilization.

4385 #:

4386 #: When possible to honor this hint, CUDA will assume each block in

4387 #: the grid launch will carry out an even amount of NVLINK traffic, and

4388 #: make a best-effort attempt to adjust the kernel launch based on that

4389 #: assumption.

4390 #: This attribute is a hint only. CUDA makes no functional or

4391 #: performance guarantee. Its applicability can be affected by many

4392 #: different factors, including driver version (i.e. CUDA doesn't

4393 #: guarantee the performance characteristics will be maintained between

4394 #: driver versions or a driver update could alter or regress previously

4395 #: observed perf characteristics.) It also doesn't guarantee a

4396 #: successful result, i.e. applying the attribute may not improve the

4397 #: performance of either the targeted kernel or the encapsulating

4398 #: application.

4399 #: Valid values for

4400 #: :py:obj:`~.cudaLaunchAttributeValue.nvlinkUtilCentricScheduling` are

4401 #: 0 (disabled) and 1 (enabled).

4402 cudaLaunchAttributeNvlinkUtilCentricScheduling = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeNvlinkUtilCentricScheduling

4403

4404_dict_cudaLaunchAttributeID = dict(((int(v), v) for k, v in cudaLaunchAttributeID.__members__.items()))

4405

4406cdef class cudaDevResourceDesc_t:

4407 """

4408

4409 An opaque descriptor handle. The descriptor encapsulates multiple created and configured resources. Created via ::cudaDeviceResourceGenerateDesc

4410

4411 Methods

4412 -------

4413 getPtr()

4414 Get memory address of class instance

4415

4416 """

4417 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4418 if _ptr == 0:

4419 self._pvt_ptr = &self._pvt_val

4420 self._pvt_ptr[0] = <cyruntime.cudaDevResourceDesc_t>init_value

4421 else:

4422 self._pvt_ptr = <cyruntime.cudaDevResourceDesc_t *>_ptr

4423 def __init__(self, *args, **kwargs):

4424 pass

4425 def __repr__(self):

4426 return '<cudaDevResourceDesc_t ' + str(hex(self.__int__())) + '>'

4427 def __index__(self):

4428 return self.__int__()

4429 def __eq__(self, other):

4430 if not isinstance(other, cudaDevResourceDesc_t):

4431 return False

4432 return self._pvt_ptr[0] == (<cudaDevResourceDesc_t>other)._pvt_ptr[0]

4433 def __hash__(self):

4434 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4435 def __int__(self):

4436 return <void_ptr>self._pvt_ptr[0]

4437 def getPtr(self):

4438 return <void_ptr>self._pvt_ptr

4439

4440cdef class cudaExecutionContext_t:

4441 """

4442

4443 An opaque handle to a CUDA execution context. It represents an execution context created via CUDA Runtime APIs such as cudaGreenCtxCreate.

4444

4445 Methods

4446 -------

4447 getPtr()

4448 Get memory address of class instance

4449

4450 """

4451 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4452 if _ptr == 0:

4453 self._pvt_ptr = &self._pvt_val

4454 self._pvt_ptr[0] = <cyruntime.cudaExecutionContext_t>init_value

4455 else:

4456 self._pvt_ptr = <cyruntime.cudaExecutionContext_t *>_ptr

4457 def __init__(self, *args, **kwargs):

4458 pass

4459 def __repr__(self):

4460 return '<cudaExecutionContext_t ' + str(hex(self.__int__())) + '>'

4461 def __index__(self):

4462 return self.__int__()

4463 def __eq__(self, other):

4464 if not isinstance(other, cudaExecutionContext_t):

4465 return False

4466 return self._pvt_ptr[0] == (<cudaExecutionContext_t>other)._pvt_ptr[0]

4467 def __hash__(self):

4468 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4469 def __int__(self):

4470 return <void_ptr>self._pvt_ptr[0]

4471 def getPtr(self):

4472 return <void_ptr>self._pvt_ptr

4473

4474cdef class cudaArray_t:

4475 """

4476

4477 CUDA array

4478

4479 Methods

4480 -------

4481 getPtr()

4482 Get memory address of class instance

4483

4484 """

4485 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4486 if _ptr == 0:

4487 self._pvt_ptr = &self._pvt_val

4488 self._pvt_ptr[0] = <cyruntime.cudaArray_t>init_value

4489 else:

4490 self._pvt_ptr = <cyruntime.cudaArray_t *>_ptr

4491 def __init__(self, *args, **kwargs):

4492 pass

4493 def __repr__(self):

4494 return '<cudaArray_t ' + str(hex(self.__int__())) + '>'

4495 def __index__(self):

4496 return self.__int__()

4497 def __eq__(self, other):

4498 if not isinstance(other, cudaArray_t):

4499 return False

4500 return self._pvt_ptr[0] == (<cudaArray_t>other)._pvt_ptr[0]

4501 def __hash__(self):

4502 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4503 def __int__(self):

4504 return <void_ptr>self._pvt_ptr[0]

4505 def getPtr(self):

4506 return <void_ptr>self._pvt_ptr

4507

4508cdef class cudaArray_const_t:

4509 """

4510

4511 CUDA array (as source copy argument)

4512

4513 Methods

4514 -------

4515 getPtr()

4516 Get memory address of class instance

4517

4518 """

4519 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4520 if _ptr == 0:

4521 self._pvt_ptr = &self._pvt_val

4522 self._pvt_ptr[0] = <cyruntime.cudaArray_const_t>init_value

4523 else:

4524 self._pvt_ptr = <cyruntime.cudaArray_const_t *>_ptr

4525 def __init__(self, *args, **kwargs):

4526 pass

4527 def __repr__(self):

4528 return '<cudaArray_const_t ' + str(hex(self.__int__())) + '>'

4529 def __index__(self):

4530 return self.__int__()

4531 def __eq__(self, other):

4532 if not isinstance(other, cudaArray_const_t):

4533 return False

4534 return self._pvt_ptr[0] == (<cudaArray_const_t>other)._pvt_ptr[0]

4535 def __hash__(self):

4536 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4537 def __int__(self):

4538 return <void_ptr>self._pvt_ptr[0]

4539 def getPtr(self):

4540 return <void_ptr>self._pvt_ptr

4541

4542cdef class cudaMipmappedArray_t:

4543 """

4544

4545 CUDA mipmapped array

4546

4547 Methods

4548 -------

4549 getPtr()

4550 Get memory address of class instance

4551

4552 """

4553 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4554 if _ptr == 0:

4555 self._pvt_ptr = &self._pvt_val

4556 self._pvt_ptr[0] = <cyruntime.cudaMipmappedArray_t>init_value

4557 else:

4558 self._pvt_ptr = <cyruntime.cudaMipmappedArray_t *>_ptr

4559 def __init__(self, *args, **kwargs):

4560 pass

4561 def __repr__(self):

4562 return '<cudaMipmappedArray_t ' + str(hex(self.__int__())) + '>'

4563 def __index__(self):

4564 return self.__int__()

4565 def __eq__(self, other):

4566 if not isinstance(other, cudaMipmappedArray_t):

4567 return False

4568 return self._pvt_ptr[0] == (<cudaMipmappedArray_t>other)._pvt_ptr[0]

4569 def __hash__(self):

4570 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4571 def __int__(self):

4572 return <void_ptr>self._pvt_ptr[0]

4573 def getPtr(self):

4574 return <void_ptr>self._pvt_ptr

4575

4576cdef class cudaMipmappedArray_const_t:

4577 """

4578

4579 CUDA mipmapped array (as source argument)

4580

4581 Methods

4582 -------

4583 getPtr()

4584 Get memory address of class instance

4585

4586 """

4587 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4588 if _ptr == 0:

4589 self._pvt_ptr = &self._pvt_val

4590 self._pvt_ptr[0] = <cyruntime.cudaMipmappedArray_const_t>init_value

4591 else:

4592 self._pvt_ptr = <cyruntime.cudaMipmappedArray_const_t *>_ptr

4593 def __init__(self, *args, **kwargs):

4594 pass

4595 def __repr__(self):

4596 return '<cudaMipmappedArray_const_t ' + str(hex(self.__int__())) + '>'

4597 def __index__(self):

4598 return self.__int__()

4599 def __eq__(self, other):

4600 if not isinstance(other, cudaMipmappedArray_const_t):

4601 return False

4602 return self._pvt_ptr[0] == (<cudaMipmappedArray_const_t>other)._pvt_ptr[0]

4603 def __hash__(self):

4604 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4605 def __int__(self):

4606 return <void_ptr>self._pvt_ptr[0]

4607 def getPtr(self):

4608 return <void_ptr>self._pvt_ptr

4609

4610cdef class cudaGraphicsResource_t:

4611 """

4612

4613 CUDA graphics resource types

4614

4615 Methods

4616 -------

4617 getPtr()

4618 Get memory address of class instance

4619

4620 """

4621 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4622 if _ptr == 0:

4623 self._pvt_ptr = &self._pvt_val

4624 self._pvt_ptr[0] = <cyruntime.cudaGraphicsResource_t>init_value

4625 else:

4626 self._pvt_ptr = <cyruntime.cudaGraphicsResource_t *>_ptr

4627 def __init__(self, *args, **kwargs):

4628 pass

4629 def __repr__(self):

4630 return '<cudaGraphicsResource_t ' + str(hex(self.__int__())) + '>'

4631 def __index__(self):

4632 return self.__int__()

4633 def __eq__(self, other):

4634 if not isinstance(other, cudaGraphicsResource_t):

4635 return False

4636 return self._pvt_ptr[0] == (<cudaGraphicsResource_t>other)._pvt_ptr[0]

4637 def __hash__(self):

4638 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4639 def __int__(self):

4640 return <void_ptr>self._pvt_ptr[0]

4641 def getPtr(self):

4642 return <void_ptr>self._pvt_ptr

4643

4644cdef class cudaExternalMemory_t:

4645 """

4646

4647 CUDA external memory

4648

4649 Methods

4650 -------

4651 getPtr()

4652 Get memory address of class instance

4653

4654 """

4655 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4656 if _ptr == 0:

4657 self._pvt_ptr = &self._pvt_val

4658 self._pvt_ptr[0] = <cyruntime.cudaExternalMemory_t>init_value

4659 else:

4660 self._pvt_ptr = <cyruntime.cudaExternalMemory_t *>_ptr

4661 def __init__(self, *args, **kwargs):

4662 pass

4663 def __repr__(self):

4664 return '<cudaExternalMemory_t ' + str(hex(self.__int__())) + '>'

4665 def __index__(self):

4666 return self.__int__()

4667 def __eq__(self, other):

4668 if not isinstance(other, cudaExternalMemory_t):

4669 return False

4670 return self._pvt_ptr[0] == (<cudaExternalMemory_t>other)._pvt_ptr[0]

4671 def __hash__(self):

4672 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4673 def __int__(self):

4674 return <void_ptr>self._pvt_ptr[0]

4675 def getPtr(self):

4676 return <void_ptr>self._pvt_ptr

4677

4678cdef class cudaExternalSemaphore_t:

4679 """

4680

4681 CUDA external semaphore

4682

4683 Methods

4684 -------

4685 getPtr()

4686 Get memory address of class instance

4687

4688 """

4689 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4690 if _ptr == 0:

4691 self._pvt_ptr = &self._pvt_val

4692 self._pvt_ptr[0] = <cyruntime.cudaExternalSemaphore_t>init_value

4693 else:

4694 self._pvt_ptr = <cyruntime.cudaExternalSemaphore_t *>_ptr

4695 def __init__(self, *args, **kwargs):

4696 pass

4697 def __repr__(self):

4698 return '<cudaExternalSemaphore_t ' + str(hex(self.__int__())) + '>'

4699 def __index__(self):

4700 return self.__int__()

4701 def __eq__(self, other):

4702 if not isinstance(other, cudaExternalSemaphore_t):

4703 return False

4704 return self._pvt_ptr[0] == (<cudaExternalSemaphore_t>other)._pvt_ptr[0]

4705 def __hash__(self):

4706 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4707 def __int__(self):

4708 return <void_ptr>self._pvt_ptr[0]

4709 def getPtr(self):

4710 return <void_ptr>self._pvt_ptr

4711

4712cdef class cudaKernel_t:

4713 """

4714

4715 CUDA kernel

4716

4717 Methods

4718 -------

4719 getPtr()

4720 Get memory address of class instance

4721

4722 """

4723 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4724 if _ptr == 0:

4725 self._pvt_ptr = &self._pvt_val

4726 self._pvt_ptr[0] = <cyruntime.cudaKernel_t>init_value

4727 else:

4728 self._pvt_ptr = <cyruntime.cudaKernel_t *>_ptr

4729 def __init__(self, *args, **kwargs):

4730 pass

4731 def __repr__(self):

4732 return '<cudaKernel_t ' + str(hex(self.__int__())) + '>'

4733 def __index__(self):

4734 return self.__int__()

4735 def __eq__(self, other):

4736 if not isinstance(other, cudaKernel_t):

4737 return False

4738 return self._pvt_ptr[0] == (<cudaKernel_t>other)._pvt_ptr[0]

4739 def __hash__(self):

4740 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4741 def __int__(self):

4742 return <void_ptr>self._pvt_ptr[0]

4743 def getPtr(self):

4744 return <void_ptr>self._pvt_ptr

4745

4746cdef class cudaLibrary_t:

4747 """

4748

4749 CUDA library

4750

4751 Methods

4752 -------

4753 getPtr()

4754 Get memory address of class instance

4755

4756 """

4757 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4758 if _ptr == 0:

4759 self._pvt_ptr = &self._pvt_val

4760 self._pvt_ptr[0] = <cyruntime.cudaLibrary_t>init_value

4761 else:

4762 self._pvt_ptr = <cyruntime.cudaLibrary_t *>_ptr

4763 def __init__(self, *args, **kwargs):

4764 pass

4765 def __repr__(self):

4766 return '<cudaLibrary_t ' + str(hex(self.__int__())) + '>'

4767 def __index__(self):

4768 return self.__int__()

4769 def __eq__(self, other):

4770 if not isinstance(other, cudaLibrary_t):

4771 return False

4772 return self._pvt_ptr[0] == (<cudaLibrary_t>other)._pvt_ptr[0]

4773 def __hash__(self):

4774 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4775 def __int__(self):

4776 return <void_ptr>self._pvt_ptr[0]

4777 def getPtr(self):

4778 return <void_ptr>self._pvt_ptr

4779

4780cdef class cudaGraphDeviceNode_t:

4781 """

4782

4783 CUDA device node handle for device-side node update

4784

4785 Methods

4786 -------

4787 getPtr()

4788 Get memory address of class instance

4789

4790 """

4791 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4792 if _ptr == 0:

4793 self._pvt_ptr = &self._pvt_val

4794 self._pvt_ptr[0] = <cyruntime.cudaGraphDeviceNode_t>init_value

4795 else:

4796 self._pvt_ptr = <cyruntime.cudaGraphDeviceNode_t *>_ptr

4797 def __init__(self, *args, **kwargs):

4798 pass

4799 def __repr__(self):

4800 return '<cudaGraphDeviceNode_t ' + str(hex(self.__int__())) + '>'

4801 def __index__(self):

4802 return self.__int__()

4803 def __eq__(self, other):

4804 if not isinstance(other, cudaGraphDeviceNode_t):

4805 return False

4806 return self._pvt_ptr[0] == (<cudaGraphDeviceNode_t>other)._pvt_ptr[0]

4807 def __hash__(self):

4808 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4809 def __int__(self):

4810 return <void_ptr>self._pvt_ptr[0]

4811 def getPtr(self):

4812 return <void_ptr>self._pvt_ptr

4813

4814cdef class cudaAsyncCallbackHandle_t:

4815 """

4816

4817 CUDA async callback handle

4818

4819 Methods

4820 -------

4821 getPtr()

4822 Get memory address of class instance

4823

4824 """

4825 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4826 if _ptr == 0:

4827 self._pvt_ptr = &self._pvt_val

4828 self._pvt_ptr[0] = <cyruntime.cudaAsyncCallbackHandle_t>init_value

4829 else:

4830 self._pvt_ptr = <cyruntime.cudaAsyncCallbackHandle_t *>_ptr

4831 def __init__(self, *args, **kwargs):

4832 pass

4833 def __repr__(self):

4834 return '<cudaAsyncCallbackHandle_t ' + str(hex(self.__int__())) + '>'

4835 def __index__(self):

4836 return self.__int__()

4837 def __eq__(self, other):

4838 if not isinstance(other, cudaAsyncCallbackHandle_t):

4839 return False

4840 return self._pvt_ptr[0] == (<cudaAsyncCallbackHandle_t>other)._pvt_ptr[0]

4841 def __hash__(self):

4842 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4843 def __int__(self):

4844 return <void_ptr>self._pvt_ptr[0]

4845 def getPtr(self):

4846 return <void_ptr>self._pvt_ptr

4847

4848cdef class cudaLogsCallbackHandle:

4849 """

4850

4851 Methods

4852 -------

4853 getPtr()

4854 Get memory address of class instance

4855

4856 """

4857 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4858 if _ptr == 0:

4859 self._pvt_ptr = &self._pvt_val

4860 self._pvt_ptr[0] = <cyruntime.cudaLogsCallbackHandle>init_value

4861 else:

4862 self._pvt_ptr = <cyruntime.cudaLogsCallbackHandle *>_ptr

4863 def __init__(self, *args, **kwargs):

4864 pass

4865 def __repr__(self):

4866 return '<cudaLogsCallbackHandle ' + str(hex(self.__int__())) + '>'

4867 def __index__(self):

4868 return self.__int__()

4869 def __eq__(self, other):

4870 if not isinstance(other, cudaLogsCallbackHandle):

4871 return False

4872 return self._pvt_ptr[0] == (<cudaLogsCallbackHandle>other)._pvt_ptr[0]

4873 def __hash__(self):

4874 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4875 def __int__(self):

4876 return <void_ptr>self._pvt_ptr[0]

4877 def getPtr(self):

4878 return <void_ptr>self._pvt_ptr

4879

4880cdef class EGLImageKHR:

4881 """

4882

4883 Methods

4884 -------

4885 getPtr()

4886 Get memory address of class instance

4887

4888 """

4889 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4890 if _ptr == 0:

4891 self._pvt_ptr = &self._pvt_val

4892 self._pvt_ptr[0] = <cyruntime.EGLImageKHR>init_value

4893 else:

4894 self._pvt_ptr = <cyruntime.EGLImageKHR *>_ptr

4895 def __init__(self, *args, **kwargs):

4896 pass

4897 def __repr__(self):

4898 return '<EGLImageKHR ' + str(hex(self.__int__())) + '>'

4899 def __index__(self):

4900 return self.__int__()

4901 def __eq__(self, other):

4902 if not isinstance(other, EGLImageKHR):

4903 return False

4904 return self._pvt_ptr[0] == (<EGLImageKHR>other)._pvt_ptr[0]

4905 def __hash__(self):

4906 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4907 def __int__(self):

4908 return <void_ptr>self._pvt_ptr[0]

4909 def getPtr(self):

4910 return <void_ptr>self._pvt_ptr

4911

4912cdef class EGLStreamKHR:

4913 """

4914

4915 Methods

4916 -------

4917 getPtr()

4918 Get memory address of class instance

4919

4920 """

4921 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4922 if _ptr == 0:

4923 self._pvt_ptr = &self._pvt_val

4924 self._pvt_ptr[0] = <cyruntime.EGLStreamKHR>init_value

4925 else:

4926 self._pvt_ptr = <cyruntime.EGLStreamKHR *>_ptr

4927 def __init__(self, *args, **kwargs):

4928 pass

4929 def __repr__(self):

4930 return '<EGLStreamKHR ' + str(hex(self.__int__())) + '>'

4931 def __index__(self):

4932 return self.__int__()

4933 def __eq__(self, other):

4934 if not isinstance(other, EGLStreamKHR):

4935 return False

4936 return self._pvt_ptr[0] == (<EGLStreamKHR>other)._pvt_ptr[0]

4937 def __hash__(self):

4938 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4939 def __int__(self):

4940 return <void_ptr>self._pvt_ptr[0]

4941 def getPtr(self):

4942 return <void_ptr>self._pvt_ptr

4943

4944cdef class EGLSyncKHR:

4945 """

4946

4947 Methods

4948 -------

4949 getPtr()

4950 Get memory address of class instance

4951

4952 """

4953 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4954 if _ptr == 0:

4955 self._pvt_ptr = &self._pvt_val

4956 self._pvt_ptr[0] = <cyruntime.EGLSyncKHR>init_value

4957 else:

4958 self._pvt_ptr = <cyruntime.EGLSyncKHR *>_ptr

4959 def __init__(self, *args, **kwargs):

4960 pass

4961 def __repr__(self):

4962 return '<EGLSyncKHR ' + str(hex(self.__int__())) + '>'

4963 def __index__(self):

4964 return self.__int__()

4965 def __eq__(self, other):

4966 if not isinstance(other, EGLSyncKHR):

4967 return False

4968 return self._pvt_ptr[0] == (<EGLSyncKHR>other)._pvt_ptr[0]

4969 def __hash__(self):

4970 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))

4971 def __int__(self):

4972 return <void_ptr>self._pvt_ptr[0]

4973 def getPtr(self):

4974 return <void_ptr>self._pvt_ptr

4975

4976cdef class cudaHostFn_t:

4977 """

4978

4979 Methods

4980 -------

4981 getPtr()

4982 Get memory address of class instance

4983

4984 """

4985 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

4986 if _ptr == 0:

4987 self._pvt_ptr = &self._pvt_val

4988 self._pvt_ptr[0] = <cyruntime.cudaHostFn_t>init_value

4989 else:

4990 self._pvt_ptr = <cyruntime.cudaHostFn_t *>_ptr

4991 def __init__(self, *args, **kwargs):

4992 pass

4993 def __repr__(self):

4994 return '<cudaHostFn_t ' + str(hex(self.__int__())) + '>'

4995 def __index__(self):

4996 return self.__int__()

4997 def __int__(self):

4998 return <void_ptr>self._pvt_ptr[0]

4999 def getPtr(self):

5000 return <void_ptr>self._pvt_ptr

5001

5002cdef class cudaAsyncCallback:

5003 """

5004

5005 Methods

5006 -------

5007 getPtr()

5008 Get memory address of class instance

5009

5010 """

5011 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

5012 if _ptr == 0:

5013 self._pvt_ptr = &self._pvt_val

5014 self._pvt_ptr[0] = <cyruntime.cudaAsyncCallback>init_value

5015 else:

5016 self._pvt_ptr = <cyruntime.cudaAsyncCallback *>_ptr

5017 def __init__(self, *args, **kwargs):

5018 pass

5019 def __repr__(self):

5020 return '<cudaAsyncCallback ' + str(hex(self.__int__())) + '>'

5021 def __index__(self):

5022 return self.__int__()

5023 def __int__(self):

5024 return <void_ptr>self._pvt_ptr[0]

5025 def getPtr(self):

5026 return <void_ptr>self._pvt_ptr

5027

5028cdef class cudaStreamCallback_t:

5029 """

5030

5031 Methods

5032 -------

5033 getPtr()

5034 Get memory address of class instance

5035

5036 """

5037 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

5038 if _ptr == 0:

5039 self._pvt_ptr = &self._pvt_val

5040 self._pvt_ptr[0] = <cyruntime.cudaStreamCallback_t>init_value

5041 else:

5042 self._pvt_ptr = <cyruntime.cudaStreamCallback_t *>_ptr

5043 def __init__(self, *args, **kwargs):

5044 pass

5045 def __repr__(self):

5046 return '<cudaStreamCallback_t ' + str(hex(self.__int__())) + '>'

5047 def __index__(self):

5048 return self.__int__()

5049 def __int__(self):

5050 return <void_ptr>self._pvt_ptr[0]

5051 def getPtr(self):

5052 return <void_ptr>self._pvt_ptr

5053

5054cdef class cudaLogsCallback_t:

5055 """

5056

5057 Methods

5058 -------

5059 getPtr()

5060 Get memory address of class instance

5061

5062 """

5063 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):

5064 if _ptr == 0:

5065 self._pvt_ptr = &self._pvt_val

5066 self._pvt_ptr[0] = <cyruntime.cudaLogsCallback_t>init_value

5067 else:

5068 self._pvt_ptr = <cyruntime.cudaLogsCallback_t *>_ptr

5069 def __init__(self, *args, **kwargs):

5070 pass

5071 def __repr__(self):

5072 return '<cudaLogsCallback_t ' + str(hex(self.__int__())) + '>'

5073 def __index__(self):

5074 return self.__int__()

5075 def __int__(self):

5076 return <void_ptr>self._pvt_ptr[0]

5077 def getPtr(self):

5078 return <void_ptr>self._pvt_ptr

5079

5080cdef class dim3:

5081 """

5082 Attributes

5083 ----------

5084 x : unsigned int

5085

5086 y : unsigned int

5087

5088 z : unsigned int

5089

5090

5091 Methods

5092 -------

5093 getPtr()

5094 Get memory address of class instance

5095 """

5096 def __cinit__(self, void_ptr _ptr = 0):

5097 if _ptr == 0:

5098 self._pvt_ptr = &self._pvt_val

5099 else:

5100 self._pvt_ptr = <cyruntime.dim3 *>_ptr

5101 def __init__(self, void_ptr _ptr = 0):

5102 pass

5103 def __dealloc__(self):

5104 pass

5105 def getPtr(self):

5106 return <void_ptr>self._pvt_ptr

5107 def __repr__(self):

5108 if self._pvt_ptr is not NULL:

5109 str_list = []

5110 try:

5111 str_list += ['x : ' + str(self.x)]

5112 except ValueError:

5113 str_list += ['x : <ValueError>']

5114 try:

5115 str_list += ['y : ' + str(self.y)]

5116 except ValueError:

5117 str_list += ['y : <ValueError>']

5118 try:

5119 str_list += ['z : ' + str(self.z)]

5120 except ValueError:

5121 str_list += ['z : <ValueError>']

5122 return '\n'.join(str_list)

5123 else:

5124 return ''

5125 @property

5126 def x(self):

5127 return self._pvt_ptr[0].x

5128 @x.setter

5129 def x(self, unsigned int x):

5130 self._pvt_ptr[0].x = x

5131 @property

5132 def y(self):

5133 return self._pvt_ptr[0].y

5134 @y.setter

5135 def y(self, unsigned int y):

5136 self._pvt_ptr[0].y = y

5137 @property

5138 def z(self):

5139 return self._pvt_ptr[0].z

5140 @z.setter

5141 def z(self, unsigned int z):

5142 self._pvt_ptr[0].z = z

5143

5144cdef class cudaChannelFormatDesc:

5145 """

5146 CUDA Channel format descriptor

5147

5148 Attributes

5149 ----------

5150 x : int

5151 x

5152 y : int

5153 y

5154 z : int

5155 z

5156 w : int

5157 w

5158 f : cudaChannelFormatKind

5159 Channel format kind

5160

5161 Methods

5162 -------

5163 getPtr()

5164 Get memory address of class instance

5165 """

5166 def __cinit__(self, void_ptr _ptr = 0):

5167 if _ptr == 0:

5168 self._pvt_ptr = &self._pvt_val

5169 else:

5170 self._pvt_ptr = <cyruntime.cudaChannelFormatDesc *>_ptr

5171 def __init__(self, void_ptr _ptr = 0):

5172 pass

5173 def __dealloc__(self):

5174 pass

5175 def getPtr(self):

5176 return <void_ptr>self._pvt_ptr

5177 def __repr__(self):

5178 if self._pvt_ptr is not NULL:

5179 str_list = []

5180 try:

5181 str_list += ['x : ' + str(self.x)]

5182 except ValueError:

5183 str_list += ['x : <ValueError>']

5184 try:

5185 str_list += ['y : ' + str(self.y)]

5186 except ValueError:

5187 str_list += ['y : <ValueError>']

5188 try:

5189 str_list += ['z : ' + str(self.z)]

5190 except ValueError:

5191 str_list += ['z : <ValueError>']

5192 try:

5193 str_list += ['w : ' + str(self.w)]

5194 except ValueError:

5195 str_list += ['w : <ValueError>']

5196 try:

5197 str_list += ['f : ' + str(self.f)]

5198 except ValueError:

5199 str_list += ['f : <ValueError>']

5200 return '\n'.join(str_list)

5201 else:

5202 return ''

5203 @property

5204 def x(self):

5205 return self._pvt_ptr[0].x

5206 @x.setter

5207 def x(self, int x):

5208 self._pvt_ptr[0].x = x

5209 @property

5210 def y(self):

5211 return self._pvt_ptr[0].y

5212 @y.setter

5213 def y(self, int y):

5214 self._pvt_ptr[0].y = y

5215 @property

5216 def z(self):

5217 return self._pvt_ptr[0].z

5218 @z.setter

5219 def z(self, int z):

5220 self._pvt_ptr[0].z = z

5221 @property

5222 def w(self):

5223 return self._pvt_ptr[0].w

5224 @w.setter

5225 def w(self, int w):

5226 self._pvt_ptr[0].w = w

5227 @property

5228 def f(self):

5229 if self._pvt_ptr[0].f not in _dict_cudaChannelFormatKind:

5230 return None

5231 return _dict_cudaChannelFormatKind[self._pvt_ptr[0].f]

5232 @f.setter

5233 def f(self, f not None : cudaChannelFormatKind):

5234 self._pvt_ptr[0].f = f.value

5235

5236cdef class anon_struct0:

5237 """

5238 Attributes

5239 ----------

5240 width : unsigned int

5241

5242 height : unsigned int

5243

5244 depth : unsigned int

5245

5246

5247 Methods

5248 -------

5249 getPtr()

5250 Get memory address of class instance

5251 """

5252 def __cinit__(self, void_ptr _ptr):

5253 self._pvt_ptr = <cyruntime.cudaArraySparseProperties *>_ptr

5254

5255 def __init__(self, void_ptr _ptr):

5256 pass

5257 def __dealloc__(self):

5258 pass

5259 def getPtr(self):

5260 return <void_ptr>&self._pvt_ptr[0].tileExtent

5261 def __repr__(self):

5262 if self._pvt_ptr is not NULL:

5263 str_list = []

5264 try:

5265 str_list += ['width : ' + str(self.width)]

5266 except ValueError:

5267 str_list += ['width : <ValueError>']

5268 try:

5269 str_list += ['height : ' + str(self.height)]

5270 except ValueError:

5271 str_list += ['height : <ValueError>']

5272 try:

5273 str_list += ['depth : ' + str(self.depth)]

5274 except ValueError:

5275 str_list += ['depth : <ValueError>']

5276 return '\n'.join(str_list)

5277 else:

5278 return ''

5279 @property

5280 def width(self):

5281 return self._pvt_ptr[0].tileExtent.width

5282 @width.setter

5283 def width(self, unsigned int width):

5284 self._pvt_ptr[0].tileExtent.width = width

5285 @property

5286 def height(self):

5287 return self._pvt_ptr[0].tileExtent.height

5288 @height.setter

5289 def height(self, unsigned int height):

5290 self._pvt_ptr[0].tileExtent.height = height

5291 @property

5292 def depth(self):

5293 return self._pvt_ptr[0].tileExtent.depth

5294 @depth.setter

5295 def depth(self, unsigned int depth):

5296 self._pvt_ptr[0].tileExtent.depth = depth

5297

5298cdef class cudaArraySparseProperties:

5299 """

5300 Sparse CUDA array and CUDA mipmapped array properties

5301

5302 Attributes

5303 ----------

5304 tileExtent : anon_struct0

5305

5306 miptailFirstLevel : unsigned int

5307 First mip level at which the mip tail begins

5308 miptailSize : unsigned long long

5309 Total size of the mip tail.

5310 flags : unsigned int

5311 Flags will either be zero or cudaArraySparsePropertiesSingleMipTail

5312 reserved : list[unsigned int]

5313

5314

5315 Methods

5316 -------

5317 getPtr()

5318 Get memory address of class instance

5319 """

5320 def __cinit__(self, void_ptr _ptr = 0):

5321 if _ptr == 0:

5322 self._pvt_ptr = &self._pvt_val

5323 else:

5324 self._pvt_ptr = <cyruntime.cudaArraySparseProperties *>_ptr

5325 def __init__(self, void_ptr _ptr = 0):

5326 pass

5327 self._tileExtent = anon_struct0(_ptr=<void_ptr>self._pvt_ptr)

5328 def __dealloc__(self):

5329 pass

5330 def getPtr(self):

5331 return <void_ptr>self._pvt_ptr

5332 def __repr__(self):

5333 if self._pvt_ptr is not NULL:

5334 str_list = []

5335 try:

5336 str_list += ['tileExtent :\n' + '\n'.join([' ' + line for line in str(self.tileExtent).splitlines()])]

5337 except ValueError:

5338 str_list += ['tileExtent : <ValueError>']

5339 try:

5340 str_list += ['miptailFirstLevel : ' + str(self.miptailFirstLevel)]

5341 except ValueError:

5342 str_list += ['miptailFirstLevel : <ValueError>']

5343 try:

5344 str_list += ['miptailSize : ' + str(self.miptailSize)]

5345 except ValueError:

5346 str_list += ['miptailSize : <ValueError>']

5347 try:

5348 str_list += ['flags : ' + str(self.flags)]

5349 except ValueError:

5350 str_list += ['flags : <ValueError>']

5351 try:

5352 str_list += ['reserved : ' + str(self.reserved)]

5353 except ValueError:

5354 str_list += ['reserved : <ValueError>']

5355 return '\n'.join(str_list)

5356 else:

5357 return ''

5358 @property

5359 def tileExtent(self):

5360 return self._tileExtent

5361 @tileExtent.setter

5362 def tileExtent(self, tileExtent not None : anon_struct0):

5363 string.memcpy(&self._pvt_ptr[0].tileExtent, <cyruntime.anon_struct0*><void_ptr>tileExtent.getPtr(), sizeof(self._pvt_ptr[0].tileExtent))

5364 @property

5365 def miptailFirstLevel(self):

5366 return self._pvt_ptr[0].miptailFirstLevel

5367 @miptailFirstLevel.setter

5368 def miptailFirstLevel(self, unsigned int miptailFirstLevel):

5369 self._pvt_ptr[0].miptailFirstLevel = miptailFirstLevel

5370 @property

5371 def miptailSize(self):

5372 return self._pvt_ptr[0].miptailSize

5373 @miptailSize.setter

5374 def miptailSize(self, unsigned long long miptailSize):

5375 self._pvt_ptr[0].miptailSize = miptailSize

5376 @property

5377 def flags(self):

5378 return self._pvt_ptr[0].flags

5379 @flags.setter

5380 def flags(self, unsigned int flags):

5381 self._pvt_ptr[0].flags = flags

5382 @property

5383 def reserved(self):

5384 return self._pvt_ptr[0].reserved

5385 @reserved.setter

5386 def reserved(self, reserved):

5387 self._pvt_ptr[0].reserved = reserved

5388

5389cdef class cudaArrayMemoryRequirements:

5390 """

5391 CUDA array and CUDA mipmapped array memory requirements

5392

5393 Attributes

5394 ----------

5395 size : size_t

5396 Total size of the array.

5397 alignment : size_t

5398 Alignment necessary for mapping the array.

5399 reserved : list[unsigned int]

5400

5401

5402 Methods

5403 -------

5404 getPtr()

5405 Get memory address of class instance

5406 """

5407 def __cinit__(self, void_ptr _ptr = 0):

5408 if _ptr == 0:

5409 self._pvt_ptr = &self._pvt_val

5410 else:

5411 self._pvt_ptr = <cyruntime.cudaArrayMemoryRequirements *>_ptr

5412 def __init__(self, void_ptr _ptr = 0):

5413 pass

5414 def __dealloc__(self):

5415 pass

5416 def getPtr(self):

5417 return <void_ptr>self._pvt_ptr

5418 def __repr__(self):

5419 if self._pvt_ptr is not NULL:

5420 str_list = []

5421 try:

5422 str_list += ['size : ' + str(self.size)]

5423 except ValueError:

5424 str_list += ['size : <ValueError>']

5425 try:

5426 str_list += ['alignment : ' + str(self.alignment)]

5427 except ValueError:

5428 str_list += ['alignment : <ValueError>']

5429 try:

5430 str_list += ['reserved : ' + str(self.reserved)]

5431 except ValueError:

5432 str_list += ['reserved : <ValueError>']

5433 return '\n'.join(str_list)

5434 else:

5435 return ''

5436 @property

5437 def size(self):

5438 return self._pvt_ptr[0].size

5439 @size.setter

5440 def size(self, size_t size):

5441 self._pvt_ptr[0].size = size

5442 @property

5443 def alignment(self):

5444 return self._pvt_ptr[0].alignment

5445 @alignment.setter

5446 def alignment(self, size_t alignment):

5447 self._pvt_ptr[0].alignment = alignment

5448 @property

5449 def reserved(self):

5450 return self._pvt_ptr[0].reserved

5451 @reserved.setter

5452 def reserved(self, reserved):

5453 self._pvt_ptr[0].reserved = reserved

5454

5455cdef class cudaPitchedPtr:

5456 """

5457 CUDA Pitched memory pointer ::make_cudaPitchedPtr

5458

5459 Attributes

5460 ----------

5461 ptr : Any

5462 Pointer to allocated memory

5463 pitch : size_t

5464 Pitch of allocated memory in bytes

5465 xsize : size_t

5466 Logical width of allocation in elements

5467 ysize : size_t

5468 Logical height of allocation in elements

5469

5470 Methods

5471 -------

5472 getPtr()

5473 Get memory address of class instance

5474 """

5475 def __cinit__(self, void_ptr _ptr = 0):

5476 if _ptr == 0:

5477 self._pvt_ptr = &self._pvt_val

5478 else:

5479 self._pvt_ptr = <cyruntime.cudaPitchedPtr *>_ptr

5480 def __init__(self, void_ptr _ptr = 0):

5481 pass

5482 def __dealloc__(self):

5483 pass

5484 def getPtr(self):

5485 return <void_ptr>self._pvt_ptr

5486 def __repr__(self):

5487 if self._pvt_ptr is not NULL:

5488 str_list = []

5489 try:

5490 str_list += ['ptr : ' + hex(self.ptr)]

5491 except ValueError:

5492 str_list += ['ptr : <ValueError>']

5493 try:

5494 str_list += ['pitch : ' + str(self.pitch)]

5495 except ValueError:

5496 str_list += ['pitch : <ValueError>']

5497 try:

5498 str_list += ['xsize : ' + str(self.xsize)]

5499 except ValueError:

5500 str_list += ['xsize : <ValueError>']

5501 try:

5502 str_list += ['ysize : ' + str(self.ysize)]

5503 except ValueError:

5504 str_list += ['ysize : <ValueError>']

5505 return '\n'.join(str_list)

5506 else:

5507 return ''

5508 @property

5509 def ptr(self):

5510 return <void_ptr>self._pvt_ptr[0].ptr

5511 @ptr.setter

5512 def ptr(self, ptr):

5513 _cptr = _HelperInputVoidPtr(ptr)

5514 self._pvt_ptr[0].ptr = <void*><void_ptr>_cptr.cptr

5515 @property

5516 def pitch(self):

5517 return self._pvt_ptr[0].pitch

5518 @pitch.setter

5519 def pitch(self, size_t pitch):

5520 self._pvt_ptr[0].pitch = pitch

5521 @property

5522 def xsize(self):

5523 return self._pvt_ptr[0].xsize

5524 @xsize.setter

5525 def xsize(self, size_t xsize):

5526 self._pvt_ptr[0].xsize = xsize

5527 @property

5528 def ysize(self):

5529 return self._pvt_ptr[0].ysize

5530 @ysize.setter

5531 def ysize(self, size_t ysize):

5532 self._pvt_ptr[0].ysize = ysize

5533

5534cdef class cudaExtent:

5535 """

5536 CUDA extent ::make_cudaExtent

5537

5538 Attributes

5539 ----------

5540 width : size_t

5541 Width in elements when referring to array memory, in bytes when

5542 referring to linear memory

5543 height : size_t

5544 Height in elements

5545 depth : size_t

5546 Depth in elements

5547

5548 Methods

5549 -------

5550 getPtr()

5551 Get memory address of class instance

5552 """

5553 def __cinit__(self, void_ptr _ptr = 0):

5554 if _ptr == 0:

5555 self._pvt_ptr = &self._pvt_val

5556 else:

5557 self._pvt_ptr = <cyruntime.cudaExtent *>_ptr

5558 def __init__(self, void_ptr _ptr = 0):

5559 pass

5560 def __dealloc__(self):

5561 pass

5562 def getPtr(self):

5563 return <void_ptr>self._pvt_ptr

5564 def __repr__(self):

5565 if self._pvt_ptr is not NULL:

5566 str_list = []

5567 try:

5568 str_list += ['width : ' + str(self.width)]

5569 except ValueError:

5570 str_list += ['width : <ValueError>']

5571 try:

5572 str_list += ['height : ' + str(self.height)]

5573 except ValueError:

5574 str_list += ['height : <ValueError>']

5575 try:

5576 str_list += ['depth : ' + str(self.depth)]

5577 except ValueError:

5578 str_list += ['depth : <ValueError>']

5579 return '\n'.join(str_list)

5580 else:

5581 return ''

5582 @property

5583 def width(self):

5584 return self._pvt_ptr[0].width

5585 @width.setter

5586 def width(self, size_t width):

5587 self._pvt_ptr[0].width = width

5588 @property

5589 def height(self):

5590 return self._pvt_ptr[0].height

5591 @height.setter

5592 def height(self, size_t height):

5593 self._pvt_ptr[0].height = height

5594 @property

5595 def depth(self):

5596 return self._pvt_ptr[0].depth

5597 @depth.setter

5598 def depth(self, size_t depth):

5599 self._pvt_ptr[0].depth = depth

5600

5601cdef class cudaPos:

5602 """

5603 CUDA 3D position ::make_cudaPos

5604

5605 Attributes

5606 ----------

5607 x : size_t

5608 x

5609 y : size_t

5610 y

5611 z : size_t

5612 z

5613

5614 Methods

5615 -------

5616 getPtr()

5617 Get memory address of class instance

5618 """

5619 def __cinit__(self, void_ptr _ptr = 0):

5620 if _ptr == 0:

5621 self._pvt_ptr = &self._pvt_val

5622 else:

5623 self._pvt_ptr = <cyruntime.cudaPos *>_ptr

5624 def __init__(self, void_ptr _ptr = 0):

5625 pass

5626 def __dealloc__(self):

5627 pass

5628 def getPtr(self):

5629 return <void_ptr>self._pvt_ptr

5630 def __repr__(self):

5631 if self._pvt_ptr is not NULL:

5632 str_list = []

5633 try:

5634 str_list += ['x : ' + str(self.x)]

5635 except ValueError:

5636 str_list += ['x : <ValueError>']

5637 try:

5638 str_list += ['y : ' + str(self.y)]

5639 except ValueError:

5640 str_list += ['y : <ValueError>']

5641 try:

5642 str_list += ['z : ' + str(self.z)]

5643 except ValueError:

5644 str_list += ['z : <ValueError>']

5645 return '\n'.join(str_list)

5646 else:

5647 return ''

5648 @property

5649 def x(self):

5650 return self._pvt_ptr[0].x

5651 @x.setter

5652 def x(self, size_t x):

5653 self._pvt_ptr[0].x = x

5654 @property

5655 def y(self):

5656 return self._pvt_ptr[0].y

5657 @y.setter

5658 def y(self, size_t y):

5659 self._pvt_ptr[0].y = y

5660 @property

5661 def z(self):

5662 return self._pvt_ptr[0].z

5663 @z.setter

5664 def z(self, size_t z):

5665 self._pvt_ptr[0].z = z

5666

5667cdef class cudaMemcpy3DParms:

5668 """

5669 CUDA 3D memory copying parameters

5670

5671 Attributes

5672 ----------

5673 srcArray : cudaArray_t

5674 Source memory address

5675 srcPos : cudaPos

5676 Source position offset

5677 srcPtr : cudaPitchedPtr

5678 Pitched source memory address

5679 dstArray : cudaArray_t

5680 Destination memory address

5681 dstPos : cudaPos

5682 Destination position offset

5683 dstPtr : cudaPitchedPtr

5684 Pitched destination memory address

5685 extent : cudaExtent

5686 Requested memory copy size

5687 kind : cudaMemcpyKind

5688 Type of transfer

5689

5690 Methods

5691 -------

5692 getPtr()

5693 Get memory address of class instance

5694 """

5695 def __cinit__(self, void_ptr _ptr = 0):

5696 if _ptr == 0:

5697 self._pvt_ptr = &self._pvt_val

5698 else:

5699 self._pvt_ptr = <cyruntime.cudaMemcpy3DParms *>_ptr

5700 def __init__(self, void_ptr _ptr = 0):

5701 pass

5702 self._srcArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].srcArray)

5703 self._srcPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].srcPos)

5704 self._srcPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].srcPtr)

5705 self._dstArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].dstArray)

5706 self._dstPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].dstPos)

5707 self._dstPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].dstPtr)

5708 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)

5709 def __dealloc__(self):

5710 pass

5711 def getPtr(self):

5712 return <void_ptr>self._pvt_ptr

5713 def __repr__(self):

5714 if self._pvt_ptr is not NULL:

5715 str_list = []

5716 try:

5717 str_list += ['srcArray : ' + str(self.srcArray)]

5718 except ValueError:

5719 str_list += ['srcArray : <ValueError>']

5720 try:

5721 str_list += ['srcPos :\n' + '\n'.join([' ' + line for line in str(self.srcPos).splitlines()])]

5722 except ValueError:

5723 str_list += ['srcPos : <ValueError>']

5724 try:

5725 str_list += ['srcPtr :\n' + '\n'.join([' ' + line for line in str(self.srcPtr).splitlines()])]

5726 except ValueError:

5727 str_list += ['srcPtr : <ValueError>']

5728 try:

5729 str_list += ['dstArray : ' + str(self.dstArray)]

5730 except ValueError:

5731 str_list += ['dstArray : <ValueError>']

5732 try:

5733 str_list += ['dstPos :\n' + '\n'.join([' ' + line for line in str(self.dstPos).splitlines()])]

5734 except ValueError:

5735 str_list += ['dstPos : <ValueError>']

5736 try:

5737 str_list += ['dstPtr :\n' + '\n'.join([' ' + line for line in str(self.dstPtr).splitlines()])]

5738 except ValueError:

5739 str_list += ['dstPtr : <ValueError>']

5740 try:

5741 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]

5742 except ValueError:

5743 str_list += ['extent : <ValueError>']

5744 try:

5745 str_list += ['kind : ' + str(self.kind)]

5746 except ValueError:

5747 str_list += ['kind : <ValueError>']

5748 return '\n'.join(str_list)

5749 else:

5750 return ''

5751 @property

5752 def srcArray(self):

5753 return self._srcArray

5754 @srcArray.setter

5755 def srcArray(self, srcArray):

5756 cdef cyruntime.cudaArray_t cysrcArray

5757 if srcArray is None:

5758 cysrcArray = <cyruntime.cudaArray_t><void_ptr>0

5759 elif isinstance(srcArray, (cudaArray_t,)):

5760 psrcArray = int(srcArray)

5761 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray

5762 else:

5763 psrcArray = int(cudaArray_t(srcArray))

5764 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray

5765 self._srcArray._pvt_ptr[0] = cysrcArray

5766 @property

5767 def srcPos(self):

5768 return self._srcPos

5769 @srcPos.setter

5770 def srcPos(self, srcPos not None : cudaPos):

5771 string.memcpy(&self._pvt_ptr[0].srcPos, <cyruntime.cudaPos*><void_ptr>srcPos.getPtr(), sizeof(self._pvt_ptr[0].srcPos))

5772 @property

5773 def srcPtr(self):

5774 return self._srcPtr

5775 @srcPtr.setter

5776 def srcPtr(self, srcPtr not None : cudaPitchedPtr):

5777 string.memcpy(&self._pvt_ptr[0].srcPtr, <cyruntime.cudaPitchedPtr*><void_ptr>srcPtr.getPtr(), sizeof(self._pvt_ptr[0].srcPtr))

5778 @property

5779 def dstArray(self):

5780 return self._dstArray

5781 @dstArray.setter

5782 def dstArray(self, dstArray):

5783 cdef cyruntime.cudaArray_t cydstArray

5784 if dstArray is None:

5785 cydstArray = <cyruntime.cudaArray_t><void_ptr>0

5786 elif isinstance(dstArray, (cudaArray_t,)):

5787 pdstArray = int(dstArray)

5788 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray

5789 else:

5790 pdstArray = int(cudaArray_t(dstArray))

5791 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray

5792 self._dstArray._pvt_ptr[0] = cydstArray

5793 @property

5794 def dstPos(self):

5795 return self._dstPos

5796 @dstPos.setter

5797 def dstPos(self, dstPos not None : cudaPos):

5798 string.memcpy(&self._pvt_ptr[0].dstPos, <cyruntime.cudaPos*><void_ptr>dstPos.getPtr(), sizeof(self._pvt_ptr[0].dstPos))

5799 @property

5800 def dstPtr(self):

5801 return self._dstPtr

5802 @dstPtr.setter

5803 def dstPtr(self, dstPtr not None : cudaPitchedPtr):

5804 string.memcpy(&self._pvt_ptr[0].dstPtr, <cyruntime.cudaPitchedPtr*><void_ptr>dstPtr.getPtr(), sizeof(self._pvt_ptr[0].dstPtr))

5805 @property

5806 def extent(self):

5807 return self._extent

5808 @extent.setter

5809 def extent(self, extent not None : cudaExtent):

5810 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))

5811 @property

5812 def kind(self):

5813 if self._pvt_ptr[0].kind not in _dict_cudaMemcpyKind:

5814 return None

5815 return _dict_cudaMemcpyKind[self._pvt_ptr[0].kind]

5816 @kind.setter

5817 def kind(self, kind not None : cudaMemcpyKind):

5818 self._pvt_ptr[0].kind = kind.value

5819

5820cdef class cudaMemcpyNodeParams:

5821 """

5822 Memcpy node parameters

5823

5824 Attributes

5825 ----------

5826 flags : int

5827 Must be zero

5828 reserved : int

5829 Must be zero

5830 ctx : cudaExecutionContext_t

5831 Context in which to run the memcpy. If NULL will try to use the

5832 current context.

5833 copyParams : cudaMemcpy3DParms

5834 Parameters for the memory copy

5835

5836 Methods

5837 -------

5838 getPtr()

5839 Get memory address of class instance

5840 """

5841 def __cinit__(self, void_ptr _ptr = 0):

5842 if _ptr == 0:

5843 self._pvt_ptr = &self._pvt_val

5844 else:

5845 self._pvt_ptr = <cyruntime.cudaMemcpyNodeParams *>_ptr

5846 def __init__(self, void_ptr _ptr = 0):

5847 pass

5848 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)

5849 self._copyParams = cudaMemcpy3DParms(_ptr=<void_ptr>&self._pvt_ptr[0].copyParams)

5850 def __dealloc__(self):

5851 pass

5852 def getPtr(self):

5853 return <void_ptr>self._pvt_ptr

5854 def __repr__(self):

5855 if self._pvt_ptr is not NULL:

5856 str_list = []

5857 try:

5858 str_list += ['flags : ' + str(self.flags)]

5859 except ValueError:

5860 str_list += ['flags : <ValueError>']

5861 try:

5862 str_list += ['reserved : ' + str(self.reserved)]

5863 except ValueError:

5864 str_list += ['reserved : <ValueError>']

5865 try:

5866 str_list += ['ctx : ' + str(self.ctx)]

5867 except ValueError:

5868 str_list += ['ctx : <ValueError>']

5869 try:

5870 str_list += ['copyParams :\n' + '\n'.join([' ' + line for line in str(self.copyParams).splitlines()])]

5871 except ValueError:

5872 str_list += ['copyParams : <ValueError>']

5873 return '\n'.join(str_list)

5874 else:

5875 return ''

5876 @property

5877 def flags(self):

5878 return self._pvt_ptr[0].flags

5879 @flags.setter

5880 def flags(self, int flags):

5881 self._pvt_ptr[0].flags = flags

5882 @property

5883 def reserved(self):

5884 return self._pvt_ptr[0].reserved

5885 @reserved.setter

5886 def reserved(self, int reserved):

5887 self._pvt_ptr[0].reserved = reserved

5888 @property

5889 def ctx(self):

5890 return self._ctx

5891 @ctx.setter

5892 def ctx(self, ctx):

5893 cdef cyruntime.cudaExecutionContext_t cyctx

5894 if ctx is None:

5895 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0

5896 elif isinstance(ctx, (cudaExecutionContext_t,)):

5897 pctx = int(ctx)

5898 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

5899 else:

5900 pctx = int(cudaExecutionContext_t(ctx))

5901 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

5902 self._ctx._pvt_ptr[0] = cyctx

5903 @property

5904 def copyParams(self):

5905 return self._copyParams

5906 @copyParams.setter

5907 def copyParams(self, copyParams not None : cudaMemcpy3DParms):

5908 string.memcpy(&self._pvt_ptr[0].copyParams, <cyruntime.cudaMemcpy3DParms*><void_ptr>copyParams.getPtr(), sizeof(self._pvt_ptr[0].copyParams))

5909

5910cdef class cudaMemcpy3DPeerParms:

5911 """

5912 CUDA 3D cross-device memory copying parameters

5913

5914 Attributes

5915 ----------

5916 srcArray : cudaArray_t

5917 Source memory address

5918 srcPos : cudaPos

5919 Source position offset

5920 srcPtr : cudaPitchedPtr

5921 Pitched source memory address

5922 srcDevice : int

5923 Source device

5924 dstArray : cudaArray_t

5925 Destination memory address

5926 dstPos : cudaPos

5927 Destination position offset

5928 dstPtr : cudaPitchedPtr

5929 Pitched destination memory address

5930 dstDevice : int

5931 Destination device

5932 extent : cudaExtent

5933 Requested memory copy size

5934

5935 Methods

5936 -------

5937 getPtr()

5938 Get memory address of class instance

5939 """

5940 def __cinit__(self, void_ptr _ptr = 0):

5941 if _ptr == 0:

5942 self._pvt_ptr = &self._pvt_val

5943 else:

5944 self._pvt_ptr = <cyruntime.cudaMemcpy3DPeerParms *>_ptr

5945 def __init__(self, void_ptr _ptr = 0):

5946 pass

5947 self._srcArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].srcArray)

5948 self._srcPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].srcPos)

5949 self._srcPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].srcPtr)

5950 self._dstArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].dstArray)

5951 self._dstPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].dstPos)

5952 self._dstPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].dstPtr)

5953 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)

5954 def __dealloc__(self):

5955 pass

5956 def getPtr(self):

5957 return <void_ptr>self._pvt_ptr

5958 def __repr__(self):

5959 if self._pvt_ptr is not NULL:

5960 str_list = []

5961 try:

5962 str_list += ['srcArray : ' + str(self.srcArray)]

5963 except ValueError:

5964 str_list += ['srcArray : <ValueError>']

5965 try:

5966 str_list += ['srcPos :\n' + '\n'.join([' ' + line for line in str(self.srcPos).splitlines()])]

5967 except ValueError:

5968 str_list += ['srcPos : <ValueError>']

5969 try:

5970 str_list += ['srcPtr :\n' + '\n'.join([' ' + line for line in str(self.srcPtr).splitlines()])]

5971 except ValueError:

5972 str_list += ['srcPtr : <ValueError>']

5973 try:

5974 str_list += ['srcDevice : ' + str(self.srcDevice)]

5975 except ValueError:

5976 str_list += ['srcDevice : <ValueError>']

5977 try:

5978 str_list += ['dstArray : ' + str(self.dstArray)]

5979 except ValueError:

5980 str_list += ['dstArray : <ValueError>']

5981 try:

5982 str_list += ['dstPos :\n' + '\n'.join([' ' + line for line in str(self.dstPos).splitlines()])]

5983 except ValueError:

5984 str_list += ['dstPos : <ValueError>']

5985 try:

5986 str_list += ['dstPtr :\n' + '\n'.join([' ' + line for line in str(self.dstPtr).splitlines()])]

5987 except ValueError:

5988 str_list += ['dstPtr : <ValueError>']

5989 try:

5990 str_list += ['dstDevice : ' + str(self.dstDevice)]

5991 except ValueError:

5992 str_list += ['dstDevice : <ValueError>']

5993 try:

5994 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]

5995 except ValueError:

5996 str_list += ['extent : <ValueError>']

5997 return '\n'.join(str_list)

5998 else:

5999 return ''

6000 @property

6001 def srcArray(self):

6002 return self._srcArray

6003 @srcArray.setter

6004 def srcArray(self, srcArray):

6005 cdef cyruntime.cudaArray_t cysrcArray

6006 if srcArray is None:

6007 cysrcArray = <cyruntime.cudaArray_t><void_ptr>0

6008 elif isinstance(srcArray, (cudaArray_t,)):

6009 psrcArray = int(srcArray)

6010 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray

6011 else:

6012 psrcArray = int(cudaArray_t(srcArray))

6013 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray

6014 self._srcArray._pvt_ptr[0] = cysrcArray

6015 @property

6016 def srcPos(self):

6017 return self._srcPos

6018 @srcPos.setter

6019 def srcPos(self, srcPos not None : cudaPos):

6020 string.memcpy(&self._pvt_ptr[0].srcPos, <cyruntime.cudaPos*><void_ptr>srcPos.getPtr(), sizeof(self._pvt_ptr[0].srcPos))

6021 @property

6022 def srcPtr(self):

6023 return self._srcPtr

6024 @srcPtr.setter

6025 def srcPtr(self, srcPtr not None : cudaPitchedPtr):

6026 string.memcpy(&self._pvt_ptr[0].srcPtr, <cyruntime.cudaPitchedPtr*><void_ptr>srcPtr.getPtr(), sizeof(self._pvt_ptr[0].srcPtr))

6027 @property

6028 def srcDevice(self):

6029 return self._pvt_ptr[0].srcDevice

6030 @srcDevice.setter

6031 def srcDevice(self, int srcDevice):

6032 self._pvt_ptr[0].srcDevice = srcDevice

6033 @property

6034 def dstArray(self):

6035 return self._dstArray

6036 @dstArray.setter

6037 def dstArray(self, dstArray):

6038 cdef cyruntime.cudaArray_t cydstArray

6039 if dstArray is None:

6040 cydstArray = <cyruntime.cudaArray_t><void_ptr>0

6041 elif isinstance(dstArray, (cudaArray_t,)):

6042 pdstArray = int(dstArray)

6043 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray

6044 else:

6045 pdstArray = int(cudaArray_t(dstArray))

6046 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray

6047 self._dstArray._pvt_ptr[0] = cydstArray

6048 @property

6049 def dstPos(self):

6050 return self._dstPos

6051 @dstPos.setter

6052 def dstPos(self, dstPos not None : cudaPos):

6053 string.memcpy(&self._pvt_ptr[0].dstPos, <cyruntime.cudaPos*><void_ptr>dstPos.getPtr(), sizeof(self._pvt_ptr[0].dstPos))

6054 @property

6055 def dstPtr(self):

6056 return self._dstPtr

6057 @dstPtr.setter

6058 def dstPtr(self, dstPtr not None : cudaPitchedPtr):

6059 string.memcpy(&self._pvt_ptr[0].dstPtr, <cyruntime.cudaPitchedPtr*><void_ptr>dstPtr.getPtr(), sizeof(self._pvt_ptr[0].dstPtr))

6060 @property

6061 def dstDevice(self):

6062 return self._pvt_ptr[0].dstDevice

6063 @dstDevice.setter

6064 def dstDevice(self, int dstDevice):

6065 self._pvt_ptr[0].dstDevice = dstDevice

6066 @property

6067 def extent(self):

6068 return self._extent

6069 @extent.setter

6070 def extent(self, extent not None : cudaExtent):

6071 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))

6072

6073cdef class cudaMemsetParams:

6074 """

6075 CUDA Memset node parameters

6076

6077 Attributes

6078 ----------

6079 dst : Any

6080 Destination device pointer

6081 pitch : size_t

6082 Pitch of destination device pointer. Unused if height is 1

6083 value : unsigned int

6084 Value to be set

6085 elementSize : unsigned int

6086 Size of each element in bytes. Must be 1, 2, or 4.

6087 width : size_t

6088 Width of the row in elements

6089 height : size_t

6090 Number of rows

6091

6092 Methods

6093 -------

6094 getPtr()

6095 Get memory address of class instance

6096 """

6097 def __cinit__(self, void_ptr _ptr = 0):

6098 if _ptr == 0:

6099 self._pvt_ptr = &self._pvt_val

6100 else:

6101 self._pvt_ptr = <cyruntime.cudaMemsetParams *>_ptr

6102 def __init__(self, void_ptr _ptr = 0):

6103 pass

6104 def __dealloc__(self):

6105 pass

6106 def getPtr(self):

6107 return <void_ptr>self._pvt_ptr

6108 def __repr__(self):

6109 if self._pvt_ptr is not NULL:

6110 str_list = []

6111 try:

6112 str_list += ['dst : ' + hex(self.dst)]

6113 except ValueError:

6114 str_list += ['dst : <ValueError>']

6115 try:

6116 str_list += ['pitch : ' + str(self.pitch)]

6117 except ValueError:

6118 str_list += ['pitch : <ValueError>']

6119 try:

6120 str_list += ['value : ' + str(self.value)]

6121 except ValueError:

6122 str_list += ['value : <ValueError>']

6123 try:

6124 str_list += ['elementSize : ' + str(self.elementSize)]

6125 except ValueError:

6126 str_list += ['elementSize : <ValueError>']

6127 try:

6128 str_list += ['width : ' + str(self.width)]

6129 except ValueError:

6130 str_list += ['width : <ValueError>']

6131 try:

6132 str_list += ['height : ' + str(self.height)]

6133 except ValueError:

6134 str_list += ['height : <ValueError>']

6135 return '\n'.join(str_list)

6136 else:

6137 return ''

6138 @property

6139 def dst(self):

6140 return <void_ptr>self._pvt_ptr[0].dst

6141 @dst.setter

6142 def dst(self, dst):

6143 _cdst = _HelperInputVoidPtr(dst)

6144 self._pvt_ptr[0].dst = <void*><void_ptr>_cdst.cptr

6145 @property

6146 def pitch(self):

6147 return self._pvt_ptr[0].pitch

6148 @pitch.setter

6149 def pitch(self, size_t pitch):

6150 self._pvt_ptr[0].pitch = pitch

6151 @property

6152 def value(self):

6153 return self._pvt_ptr[0].value

6154 @value.setter

6155 def value(self, unsigned int value):

6156 self._pvt_ptr[0].value = value

6157 @property

6158 def elementSize(self):

6159 return self._pvt_ptr[0].elementSize

6160 @elementSize.setter

6161 def elementSize(self, unsigned int elementSize):

6162 self._pvt_ptr[0].elementSize = elementSize

6163 @property

6164 def width(self):

6165 return self._pvt_ptr[0].width

6166 @width.setter

6167 def width(self, size_t width):

6168 self._pvt_ptr[0].width = width

6169 @property

6170 def height(self):

6171 return self._pvt_ptr[0].height

6172 @height.setter

6173 def height(self, size_t height):

6174 self._pvt_ptr[0].height = height

6175

6176cdef class cudaMemsetParamsV2:

6177 """

6178 CUDA Memset node parameters

6179

6180 Attributes

6181 ----------

6182 dst : Any

6183 Destination device pointer

6184 pitch : size_t

6185 Pitch of destination device pointer. Unused if height is 1

6186 value : unsigned int

6187 Value to be set

6188 elementSize : unsigned int

6189 Size of each element in bytes. Must be 1, 2, or 4.

6190 width : size_t

6191 Width of the row in elements

6192 height : size_t

6193 Number of rows

6194 ctx : cudaExecutionContext_t

6195 Context in which to run the memset. If NULL will try to use the

6196 current context.

6197

6198 Methods

6199 -------

6200 getPtr()

6201 Get memory address of class instance

6202 """

6203 def __cinit__(self, void_ptr _ptr = 0):

6204 if _ptr == 0:

6205 self._pvt_ptr = &self._pvt_val

6206 else:

6207 self._pvt_ptr = <cyruntime.cudaMemsetParamsV2 *>_ptr

6208 def __init__(self, void_ptr _ptr = 0):

6209 pass

6210 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)

6211 def __dealloc__(self):

6212 pass

6213 def getPtr(self):

6214 return <void_ptr>self._pvt_ptr

6215 def __repr__(self):

6216 if self._pvt_ptr is not NULL:

6217 str_list = []

6218 try:

6219 str_list += ['dst : ' + hex(self.dst)]

6220 except ValueError:

6221 str_list += ['dst : <ValueError>']

6222 try:

6223 str_list += ['pitch : ' + str(self.pitch)]

6224 except ValueError:

6225 str_list += ['pitch : <ValueError>']

6226 try:

6227 str_list += ['value : ' + str(self.value)]

6228 except ValueError:

6229 str_list += ['value : <ValueError>']

6230 try:

6231 str_list += ['elementSize : ' + str(self.elementSize)]

6232 except ValueError:

6233 str_list += ['elementSize : <ValueError>']

6234 try:

6235 str_list += ['width : ' + str(self.width)]

6236 except ValueError:

6237 str_list += ['width : <ValueError>']

6238 try:

6239 str_list += ['height : ' + str(self.height)]

6240 except ValueError:

6241 str_list += ['height : <ValueError>']

6242 try:

6243 str_list += ['ctx : ' + str(self.ctx)]

6244 except ValueError:

6245 str_list += ['ctx : <ValueError>']

6246 return '\n'.join(str_list)

6247 else:

6248 return ''

6249 @property

6250 def dst(self):

6251 return <void_ptr>self._pvt_ptr[0].dst

6252 @dst.setter

6253 def dst(self, dst):

6254 _cdst = _HelperInputVoidPtr(dst)

6255 self._pvt_ptr[0].dst = <void*><void_ptr>_cdst.cptr

6256 @property

6257 def pitch(self):

6258 return self._pvt_ptr[0].pitch

6259 @pitch.setter

6260 def pitch(self, size_t pitch):

6261 self._pvt_ptr[0].pitch = pitch

6262 @property

6263 def value(self):

6264 return self._pvt_ptr[0].value

6265 @value.setter

6266 def value(self, unsigned int value):

6267 self._pvt_ptr[0].value = value

6268 @property

6269 def elementSize(self):

6270 return self._pvt_ptr[0].elementSize

6271 @elementSize.setter

6272 def elementSize(self, unsigned int elementSize):

6273 self._pvt_ptr[0].elementSize = elementSize

6274 @property

6275 def width(self):

6276 return self._pvt_ptr[0].width

6277 @width.setter

6278 def width(self, size_t width):

6279 self._pvt_ptr[0].width = width

6280 @property

6281 def height(self):

6282 return self._pvt_ptr[0].height

6283 @height.setter

6284 def height(self, size_t height):

6285 self._pvt_ptr[0].height = height

6286 @property

6287 def ctx(self):

6288 return self._ctx

6289 @ctx.setter

6290 def ctx(self, ctx):

6291 cdef cyruntime.cudaExecutionContext_t cyctx

6292 if ctx is None:

6293 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0

6294 elif isinstance(ctx, (cudaExecutionContext_t,)):

6295 pctx = int(ctx)

6296 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

6297 else:

6298 pctx = int(cudaExecutionContext_t(ctx))

6299 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

6300 self._ctx._pvt_ptr[0] = cyctx

6301

6302cdef class cudaAccessPolicyWindow:

6303 """

6304 Specifies an access policy for a window, a contiguous extent of

6305 memory beginning at base_ptr and ending at base_ptr + num_bytes.

6306 Partition into many segments and assign segments such that. sum of

6307 "hit segments" / window == approx. ratio. sum of "miss segments" /

6308 window == approx 1-ratio. Segments and ratio specifications are

6309 fitted to the capabilities of the architecture. Accesses in a hit

6310 segment apply the hitProp access policy. Accesses in a miss segment

6311 apply the missProp access policy.

6312

6313 Attributes

6314 ----------

6315 base_ptr : Any

6316 Starting address of the access policy window. CUDA driver may align

6317 it.

6318 num_bytes : size_t

6319 Size in bytes of the window policy. CUDA driver may restrict the

6320 maximum size and alignment.

6321 hitRatio : float

6322 hitRatio specifies percentage of lines assigned hitProp, rest are

6323 assigned missProp.

6324 hitProp : cudaAccessProperty

6325 ::CUaccessProperty set for hit.

6326 missProp : cudaAccessProperty

6327 ::CUaccessProperty set for miss. Must be either NORMAL or

6328 STREAMING.

6329

6330 Methods

6331 -------

6332 getPtr()

6333 Get memory address of class instance

6334 """

6335 def __cinit__(self, void_ptr _ptr = 0):

6336 if _ptr == 0:

6337 self._pvt_ptr = &self._pvt_val

6338 else:

6339 self._pvt_ptr = <cyruntime.cudaAccessPolicyWindow *>_ptr

6340 def __init__(self, void_ptr _ptr = 0):

6341 pass

6342 def __dealloc__(self):

6343 pass

6344 def getPtr(self):

6345 return <void_ptr>self._pvt_ptr

6346 def __repr__(self):

6347 if self._pvt_ptr is not NULL:

6348 str_list = []

6349 try:

6350 str_list += ['base_ptr : ' + hex(self.base_ptr)]

6351 except ValueError:

6352 str_list += ['base_ptr : <ValueError>']

6353 try:

6354 str_list += ['num_bytes : ' + str(self.num_bytes)]

6355 except ValueError:

6356 str_list += ['num_bytes : <ValueError>']

6357 try:

6358 str_list += ['hitRatio : ' + str(self.hitRatio)]

6359 except ValueError:

6360 str_list += ['hitRatio : <ValueError>']

6361 try:

6362 str_list += ['hitProp : ' + str(self.hitProp)]

6363 except ValueError:

6364 str_list += ['hitProp : <ValueError>']

6365 try:

6366 str_list += ['missProp : ' + str(self.missProp)]

6367 except ValueError:

6368 str_list += ['missProp : <ValueError>']

6369 return '\n'.join(str_list)

6370 else:

6371 return ''

6372 @property

6373 def base_ptr(self):

6374 return <void_ptr>self._pvt_ptr[0].base_ptr

6375 @base_ptr.setter

6376 def base_ptr(self, base_ptr):

6377 _cbase_ptr = _HelperInputVoidPtr(base_ptr)

6378 self._pvt_ptr[0].base_ptr = <void*><void_ptr>_cbase_ptr.cptr

6379 @property

6380 def num_bytes(self):

6381 return self._pvt_ptr[0].num_bytes

6382 @num_bytes.setter

6383 def num_bytes(self, size_t num_bytes):

6384 self._pvt_ptr[0].num_bytes = num_bytes

6385 @property

6386 def hitRatio(self):

6387 return self._pvt_ptr[0].hitRatio

6388 @hitRatio.setter

6389 def hitRatio(self, float hitRatio):

6390 self._pvt_ptr[0].hitRatio = hitRatio

6391 @property

6392 def hitProp(self):

6393 if self._pvt_ptr[0].hitProp not in _dict_cudaAccessProperty:

6394 return None

6395 return _dict_cudaAccessProperty[self._pvt_ptr[0].hitProp]

6396 @hitProp.setter

6397 def hitProp(self, hitProp not None : cudaAccessProperty):

6398 self._pvt_ptr[0].hitProp = hitProp.value

6399 @property

6400 def missProp(self):

6401 if self._pvt_ptr[0].missProp not in _dict_cudaAccessProperty:

6402 return None

6403 return _dict_cudaAccessProperty[self._pvt_ptr[0].missProp]

6404 @missProp.setter

6405 def missProp(self, missProp not None : cudaAccessProperty):

6406 self._pvt_ptr[0].missProp = missProp.value

6407

6408cdef class cudaHostNodeParams:

6409 """

6410 CUDA host node parameters

6411

6412 Attributes

6413 ----------

6414 fn : cudaHostFn_t

6415 The function to call when the node executes

6416 userData : Any

6417 Argument to pass to the function

6418

6419 Methods

6420 -------

6421 getPtr()

6422 Get memory address of class instance

6423 """

6424 def __cinit__(self, void_ptr _ptr = 0):

6425 if _ptr == 0:

6426 self._pvt_ptr = &self._pvt_val

6427 else:

6428 self._pvt_ptr = <cyruntime.cudaHostNodeParams *>_ptr

6429 def __init__(self, void_ptr _ptr = 0):

6430 pass

6431 self._fn = cudaHostFn_t(_ptr=<void_ptr>&self._pvt_ptr[0].fn)

6432 def __dealloc__(self):

6433 pass

6434 def getPtr(self):

6435 return <void_ptr>self._pvt_ptr

6436 def __repr__(self):

6437 if self._pvt_ptr is not NULL:

6438 str_list = []

6439 try:

6440 str_list += ['fn : ' + str(self.fn)]

6441 except ValueError:

6442 str_list += ['fn : <ValueError>']

6443 try:

6444 str_list += ['userData : ' + hex(self.userData)]

6445 except ValueError:

6446 str_list += ['userData : <ValueError>']

6447 return '\n'.join(str_list)

6448 else:

6449 return ''

6450 @property

6451 def fn(self):

6452 return self._fn

6453 @fn.setter

6454 def fn(self, fn):

6455 cdef cyruntime.cudaHostFn_t cyfn

6456 if fn is None:

6457 cyfn = <cyruntime.cudaHostFn_t><void_ptr>0

6458 elif isinstance(fn, (cudaHostFn_t)):

6459 pfn = int(fn)

6460 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn

6461 else:

6462 pfn = int(cudaHostFn_t(fn))

6463 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn

6464 self._fn._pvt_ptr[0] = cyfn

6465 @property

6466 def userData(self):

6467 return <void_ptr>self._pvt_ptr[0].userData

6468 @userData.setter

6469 def userData(self, userData):

6470 _cuserData = _HelperInputVoidPtr(userData)

6471 self._pvt_ptr[0].userData = <void*><void_ptr>_cuserData.cptr

6472

6473cdef class cudaHostNodeParamsV2:

6474 """

6475 CUDA host node parameters

6476

6477 Attributes

6478 ----------

6479 fn : cudaHostFn_t

6480 The function to call when the node executes

6481 userData : Any

6482 Argument to pass to the function

6483

6484 Methods

6485 -------

6486 getPtr()

6487 Get memory address of class instance

6488 """

6489 def __cinit__(self, void_ptr _ptr = 0):

6490 if _ptr == 0:

6491 self._pvt_ptr = &self._pvt_val

6492 else:

6493 self._pvt_ptr = <cyruntime.cudaHostNodeParamsV2 *>_ptr

6494 def __init__(self, void_ptr _ptr = 0):

6495 pass

6496 self._fn = cudaHostFn_t(_ptr=<void_ptr>&self._pvt_ptr[0].fn)

6497 def __dealloc__(self):

6498 pass

6499 def getPtr(self):

6500 return <void_ptr>self._pvt_ptr

6501 def __repr__(self):

6502 if self._pvt_ptr is not NULL:

6503 str_list = []

6504 try:

6505 str_list += ['fn : ' + str(self.fn)]

6506 except ValueError:

6507 str_list += ['fn : <ValueError>']

6508 try:

6509 str_list += ['userData : ' + hex(self.userData)]

6510 except ValueError:

6511 str_list += ['userData : <ValueError>']

6512 return '\n'.join(str_list)

6513 else:

6514 return ''

6515 @property

6516 def fn(self):

6517 return self._fn

6518 @fn.setter

6519 def fn(self, fn):

6520 cdef cyruntime.cudaHostFn_t cyfn

6521 if fn is None:

6522 cyfn = <cyruntime.cudaHostFn_t><void_ptr>0

6523 elif isinstance(fn, (cudaHostFn_t)):

6524 pfn = int(fn)

6525 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn

6526 else:

6527 pfn = int(cudaHostFn_t(fn))

6528 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn

6529 self._fn._pvt_ptr[0] = cyfn

6530 @property

6531 def userData(self):

6532 return <void_ptr>self._pvt_ptr[0].userData

6533 @userData.setter

6534 def userData(self, userData):

6535 _cuserData = _HelperInputVoidPtr(userData)

6536 self._pvt_ptr[0].userData = <void*><void_ptr>_cuserData.cptr

6537

6538cdef class anon_struct1:

6539 """

6540 Attributes

6541 ----------

6542 array : cudaArray_t

6543

6544

6545 Methods

6546 -------

6547 getPtr()

6548 Get memory address of class instance

6549 """

6550 def __cinit__(self, void_ptr _ptr):

6551 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6552

6553 def __init__(self, void_ptr _ptr):

6554 pass

6555 self._array = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].res.array.array)

6556 def __dealloc__(self):

6557 pass

6558 def getPtr(self):

6559 return <void_ptr>&self._pvt_ptr[0].res.array

6560 def __repr__(self):

6561 if self._pvt_ptr is not NULL:

6562 str_list = []

6563 try:

6564 str_list += ['array : ' + str(self.array)]

6565 except ValueError:

6566 str_list += ['array : <ValueError>']

6567 return '\n'.join(str_list)

6568 else:

6569 return ''

6570 @property

6571 def array(self):

6572 return self._array

6573 @array.setter

6574 def array(self, array):

6575 cdef cyruntime.cudaArray_t cyarray

6576 if array is None:

6577 cyarray = <cyruntime.cudaArray_t><void_ptr>0

6578 elif isinstance(array, (cudaArray_t,)):

6579 parray = int(array)

6580 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

6581 else:

6582 parray = int(cudaArray_t(array))

6583 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

6584 self._array._pvt_ptr[0] = cyarray

6585

6586cdef class anon_struct2:

6587 """

6588 Attributes

6589 ----------

6590 mipmap : cudaMipmappedArray_t

6591

6592

6593 Methods

6594 -------

6595 getPtr()

6596 Get memory address of class instance

6597 """

6598 def __cinit__(self, void_ptr _ptr):

6599 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6600

6601 def __init__(self, void_ptr _ptr):

6602 pass

6603 self._mipmap = cudaMipmappedArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].res.mipmap.mipmap)

6604 def __dealloc__(self):

6605 pass

6606 def getPtr(self):

6607 return <void_ptr>&self._pvt_ptr[0].res.mipmap

6608 def __repr__(self):

6609 if self._pvt_ptr is not NULL:

6610 str_list = []

6611 try:

6612 str_list += ['mipmap : ' + str(self.mipmap)]

6613 except ValueError:

6614 str_list += ['mipmap : <ValueError>']

6615 return '\n'.join(str_list)

6616 else:

6617 return ''

6618 @property

6619 def mipmap(self):

6620 return self._mipmap

6621 @mipmap.setter

6622 def mipmap(self, mipmap):

6623 cdef cyruntime.cudaMipmappedArray_t cymipmap

6624 if mipmap is None:

6625 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>0

6626 elif isinstance(mipmap, (cudaMipmappedArray_t,)):

6627 pmipmap = int(mipmap)

6628 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap

6629 else:

6630 pmipmap = int(cudaMipmappedArray_t(mipmap))

6631 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap

6632 self._mipmap._pvt_ptr[0] = cymipmap

6633

6634cdef class anon_struct3:

6635 """

6636 Attributes

6637 ----------

6638 devPtr : Any

6639

6640 desc : cudaChannelFormatDesc

6641

6642 sizeInBytes : size_t

6643

6644

6645 Methods

6646 -------

6647 getPtr()

6648 Get memory address of class instance

6649 """

6650 def __cinit__(self, void_ptr _ptr):

6651 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6652

6653 def __init__(self, void_ptr _ptr):

6654 pass

6655 self._desc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].res.linear.desc)

6656 def __dealloc__(self):

6657 pass

6658 def getPtr(self):

6659 return <void_ptr>&self._pvt_ptr[0].res.linear

6660 def __repr__(self):

6661 if self._pvt_ptr is not NULL:

6662 str_list = []

6663 try:

6664 str_list += ['devPtr : ' + hex(self.devPtr)]

6665 except ValueError:

6666 str_list += ['devPtr : <ValueError>']

6667 try:

6668 str_list += ['desc :\n' + '\n'.join([' ' + line for line in str(self.desc).splitlines()])]

6669 except ValueError:

6670 str_list += ['desc : <ValueError>']

6671 try:

6672 str_list += ['sizeInBytes : ' + str(self.sizeInBytes)]

6673 except ValueError:

6674 str_list += ['sizeInBytes : <ValueError>']

6675 return '\n'.join(str_list)

6676 else:

6677 return ''

6678 @property

6679 def devPtr(self):

6680 return <void_ptr>self._pvt_ptr[0].res.linear.devPtr

6681 @devPtr.setter

6682 def devPtr(self, devPtr):

6683 _cdevPtr = _HelperInputVoidPtr(devPtr)

6684 self._pvt_ptr[0].res.linear.devPtr = <void*><void_ptr>_cdevPtr.cptr

6685 @property

6686 def desc(self):

6687 return self._desc

6688 @desc.setter

6689 def desc(self, desc not None : cudaChannelFormatDesc):

6690 string.memcpy(&self._pvt_ptr[0].res.linear.desc, <cyruntime.cudaChannelFormatDesc*><void_ptr>desc.getPtr(), sizeof(self._pvt_ptr[0].res.linear.desc))

6691 @property

6692 def sizeInBytes(self):

6693 return self._pvt_ptr[0].res.linear.sizeInBytes

6694 @sizeInBytes.setter

6695 def sizeInBytes(self, size_t sizeInBytes):

6696 self._pvt_ptr[0].res.linear.sizeInBytes = sizeInBytes

6697

6698cdef class anon_struct4:

6699 """

6700 Attributes

6701 ----------

6702 devPtr : Any

6703

6704 desc : cudaChannelFormatDesc

6705

6706 width : size_t

6707

6708 height : size_t

6709

6710 pitchInBytes : size_t

6711

6712

6713 Methods

6714 -------

6715 getPtr()

6716 Get memory address of class instance

6717 """

6718 def __cinit__(self, void_ptr _ptr):

6719 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6720

6721 def __init__(self, void_ptr _ptr):

6722 pass

6723 self._desc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].res.pitch2D.desc)

6724 def __dealloc__(self):

6725 pass

6726 def getPtr(self):

6727 return <void_ptr>&self._pvt_ptr[0].res.pitch2D

6728 def __repr__(self):

6729 if self._pvt_ptr is not NULL:

6730 str_list = []

6731 try:

6732 str_list += ['devPtr : ' + hex(self.devPtr)]

6733 except ValueError:

6734 str_list += ['devPtr : <ValueError>']

6735 try:

6736 str_list += ['desc :\n' + '\n'.join([' ' + line for line in str(self.desc).splitlines()])]

6737 except ValueError:

6738 str_list += ['desc : <ValueError>']

6739 try:

6740 str_list += ['width : ' + str(self.width)]

6741 except ValueError:

6742 str_list += ['width : <ValueError>']

6743 try:

6744 str_list += ['height : ' + str(self.height)]

6745 except ValueError:

6746 str_list += ['height : <ValueError>']

6747 try:

6748 str_list += ['pitchInBytes : ' + str(self.pitchInBytes)]

6749 except ValueError:

6750 str_list += ['pitchInBytes : <ValueError>']

6751 return '\n'.join(str_list)

6752 else:

6753 return ''

6754 @property

6755 def devPtr(self):

6756 return <void_ptr>self._pvt_ptr[0].res.pitch2D.devPtr

6757 @devPtr.setter

6758 def devPtr(self, devPtr):

6759 _cdevPtr = _HelperInputVoidPtr(devPtr)

6760 self._pvt_ptr[0].res.pitch2D.devPtr = <void*><void_ptr>_cdevPtr.cptr

6761 @property

6762 def desc(self):

6763 return self._desc

6764 @desc.setter

6765 def desc(self, desc not None : cudaChannelFormatDesc):

6766 string.memcpy(&self._pvt_ptr[0].res.pitch2D.desc, <cyruntime.cudaChannelFormatDesc*><void_ptr>desc.getPtr(), sizeof(self._pvt_ptr[0].res.pitch2D.desc))

6767 @property

6768 def width(self):

6769 return self._pvt_ptr[0].res.pitch2D.width

6770 @width.setter

6771 def width(self, size_t width):

6772 self._pvt_ptr[0].res.pitch2D.width = width

6773 @property

6774 def height(self):

6775 return self._pvt_ptr[0].res.pitch2D.height

6776 @height.setter

6777 def height(self, size_t height):

6778 self._pvt_ptr[0].res.pitch2D.height = height

6779 @property

6780 def pitchInBytes(self):

6781 return self._pvt_ptr[0].res.pitch2D.pitchInBytes

6782 @pitchInBytes.setter

6783 def pitchInBytes(self, size_t pitchInBytes):

6784 self._pvt_ptr[0].res.pitch2D.pitchInBytes = pitchInBytes

6785

6786cdef class anon_struct5:

6787 """

6788 Attributes

6789 ----------

6790 reserved : list[int]

6791

6792

6793 Methods

6794 -------

6795 getPtr()

6796 Get memory address of class instance

6797 """

6798 def __cinit__(self, void_ptr _ptr):

6799 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6800

6801 def __init__(self, void_ptr _ptr):

6802 pass

6803 def __dealloc__(self):

6804 pass

6805 def getPtr(self):

6806 return <void_ptr>&self._pvt_ptr[0].res.reserved

6807 def __repr__(self):

6808 if self._pvt_ptr is not NULL:

6809 str_list = []

6810 try:

6811 str_list += ['reserved : ' + str(self.reserved)]

6812 except ValueError:

6813 str_list += ['reserved : <ValueError>']

6814 return '\n'.join(str_list)

6815 else:

6816 return ''

6817 @property

6818 def reserved(self):

6819 return self._pvt_ptr[0].res.reserved.reserved

6820 @reserved.setter

6821 def reserved(self, reserved):

6822 self._pvt_ptr[0].res.reserved.reserved = reserved

6823

6824cdef class anon_union0:

6825 """

6826 Attributes

6827 ----------

6828 array : anon_struct1

6829

6830 mipmap : anon_struct2

6831

6832 linear : anon_struct3

6833

6834 pitch2D : anon_struct4

6835

6836 reserved : anon_struct5

6837

6838

6839 Methods

6840 -------

6841 getPtr()

6842 Get memory address of class instance

6843 """

6844 def __cinit__(self, void_ptr _ptr):

6845 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6846

6847 def __init__(self, void_ptr _ptr):

6848 pass

6849 self._array = anon_struct1(_ptr=<void_ptr>self._pvt_ptr)

6850 self._mipmap = anon_struct2(_ptr=<void_ptr>self._pvt_ptr)

6851 self._linear = anon_struct3(_ptr=<void_ptr>self._pvt_ptr)

6852 self._pitch2D = anon_struct4(_ptr=<void_ptr>self._pvt_ptr)

6853 self._reserved = anon_struct5(_ptr=<void_ptr>self._pvt_ptr)

6854 def __dealloc__(self):

6855 pass

6856 def getPtr(self):

6857 return <void_ptr>&self._pvt_ptr[0].res

6858 def __repr__(self):

6859 if self._pvt_ptr is not NULL:

6860 str_list = []

6861 try:

6862 str_list += ['array :\n' + '\n'.join([' ' + line for line in str(self.array).splitlines()])]

6863 except ValueError:

6864 str_list += ['array : <ValueError>']

6865 try:

6866 str_list += ['mipmap :\n' + '\n'.join([' ' + line for line in str(self.mipmap).splitlines()])]

6867 except ValueError:

6868 str_list += ['mipmap : <ValueError>']

6869 try:

6870 str_list += ['linear :\n' + '\n'.join([' ' + line for line in str(self.linear).splitlines()])]

6871 except ValueError:

6872 str_list += ['linear : <ValueError>']

6873 try:

6874 str_list += ['pitch2D :\n' + '\n'.join([' ' + line for line in str(self.pitch2D).splitlines()])]

6875 except ValueError:

6876 str_list += ['pitch2D : <ValueError>']

6877 try:

6878 str_list += ['reserved :\n' + '\n'.join([' ' + line for line in str(self.reserved).splitlines()])]

6879 except ValueError:

6880 str_list += ['reserved : <ValueError>']

6881 return '\n'.join(str_list)

6882 else:

6883 return ''

6884 @property

6885 def array(self):

6886 return self._array

6887 @array.setter

6888 def array(self, array not None : anon_struct1):

6889 string.memcpy(&self._pvt_ptr[0].res.array, <cyruntime.anon_struct1*><void_ptr>array.getPtr(), sizeof(self._pvt_ptr[0].res.array))

6890 @property

6891 def mipmap(self):

6892 return self._mipmap

6893 @mipmap.setter

6894 def mipmap(self, mipmap not None : anon_struct2):

6895 string.memcpy(&self._pvt_ptr[0].res.mipmap, <cyruntime.anon_struct2*><void_ptr>mipmap.getPtr(), sizeof(self._pvt_ptr[0].res.mipmap))

6896 @property

6897 def linear(self):

6898 return self._linear

6899 @linear.setter

6900 def linear(self, linear not None : anon_struct3):

6901 string.memcpy(&self._pvt_ptr[0].res.linear, <cyruntime.anon_struct3*><void_ptr>linear.getPtr(), sizeof(self._pvt_ptr[0].res.linear))

6902 @property

6903 def pitch2D(self):

6904 return self._pitch2D

6905 @pitch2D.setter

6906 def pitch2D(self, pitch2D not None : anon_struct4):

6907 string.memcpy(&self._pvt_ptr[0].res.pitch2D, <cyruntime.anon_struct4*><void_ptr>pitch2D.getPtr(), sizeof(self._pvt_ptr[0].res.pitch2D))

6908 @property

6909 def reserved(self):

6910 return self._reserved

6911 @reserved.setter

6912 def reserved(self, reserved not None : anon_struct5):

6913 string.memcpy(&self._pvt_ptr[0].res.reserved, <cyruntime.anon_struct5*><void_ptr>reserved.getPtr(), sizeof(self._pvt_ptr[0].res.reserved))

6914

6915cdef class cudaResourceDesc:

6916 """

6917 CUDA resource descriptor

6918

6919 Attributes

6920 ----------

6921 resType : cudaResourceType

6922 Resource type

6923 res : anon_union0

6924

6925 flags : unsigned int

6926 Flags (must be zero)

6927

6928 Methods

6929 -------

6930 getPtr()

6931 Get memory address of class instance

6932 """

6933 def __cinit__(self, void_ptr _ptr = 0):

6934 if _ptr == 0:

6935 self._val_ptr = <cyruntime.cudaResourceDesc *>calloc(1, sizeof(cyruntime.cudaResourceDesc))

6936 self._pvt_ptr = self._val_ptr

6937 else:

6938 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr

6939 def __init__(self, void_ptr _ptr = 0):

6940 pass

6941 self._res = anon_union0(_ptr=<void_ptr>self._pvt_ptr)

6942 def __dealloc__(self):

6943 if self._val_ptr is not NULL:

6944 free(self._val_ptr)

6945 def getPtr(self):

6946 return <void_ptr>self._pvt_ptr

6947 def __repr__(self):

6948 if self._pvt_ptr is not NULL:

6949 str_list = []

6950 try:

6951 str_list += ['resType : ' + str(self.resType)]

6952 except ValueError:

6953 str_list += ['resType : <ValueError>']

6954 try:

6955 str_list += ['res :\n' + '\n'.join([' ' + line for line in str(self.res).splitlines()])]

6956 except ValueError:

6957 str_list += ['res : <ValueError>']

6958 try:

6959 str_list += ['flags : ' + str(self.flags)]

6960 except ValueError:

6961 str_list += ['flags : <ValueError>']

6962 return '\n'.join(str_list)

6963 else:

6964 return ''

6965 @property

6966 def resType(self):

6967 if self._pvt_ptr[0].resType not in _dict_cudaResourceType:

6968 return None

6969 return _dict_cudaResourceType[self._pvt_ptr[0].resType]

6970 @resType.setter

6971 def resType(self, resType not None : cudaResourceType):

6972 self._pvt_ptr[0].resType = resType.value

6973 @property

6974 def res(self):

6975 return self._res

6976 @res.setter

6977 def res(self, res not None : anon_union0):

6978 string.memcpy(&self._pvt_ptr[0].res, <cyruntime.anon_union0*><void_ptr>res.getPtr(), sizeof(self._pvt_ptr[0].res))

6979 @property

6980 def flags(self):

6981 return self._pvt_ptr[0].flags

6982 @flags.setter

6983 def flags(self, unsigned int flags):

6984 self._pvt_ptr[0].flags = flags

6985

6986cdef class cudaResourceViewDesc:

6987 """

6988 CUDA resource view descriptor

6989

6990 Attributes

6991 ----------

6992 format : cudaResourceViewFormat

6993 Resource view format

6994 width : size_t

6995 Width of the resource view

6996 height : size_t

6997 Height of the resource view

6998 depth : size_t

6999 Depth of the resource view

7000 firstMipmapLevel : unsigned int

7001 First defined mipmap level

7002 lastMipmapLevel : unsigned int

7003 Last defined mipmap level

7004 firstLayer : unsigned int

7005 First layer index

7006 lastLayer : unsigned int

7007 Last layer index

7008 reserved : list[unsigned int]

7009 Must be zero

7010

7011 Methods

7012 -------

7013 getPtr()

7014 Get memory address of class instance

7015 """

7016 def __cinit__(self, void_ptr _ptr = 0):

7017 if _ptr == 0:

7018 self._pvt_ptr = &self._pvt_val

7019 else:

7020 self._pvt_ptr = <cyruntime.cudaResourceViewDesc *>_ptr

7021 def __init__(self, void_ptr _ptr = 0):

7022 pass

7023 def __dealloc__(self):

7024 pass

7025 def getPtr(self):

7026 return <void_ptr>self._pvt_ptr

7027 def __repr__(self):

7028 if self._pvt_ptr is not NULL:

7029 str_list = []

7030 try:

7031 str_list += ['format : ' + str(self.format)]

7032 except ValueError:

7033 str_list += ['format : <ValueError>']

7034 try:

7035 str_list += ['width : ' + str(self.width)]

7036 except ValueError:

7037 str_list += ['width : <ValueError>']

7038 try:

7039 str_list += ['height : ' + str(self.height)]

7040 except ValueError:

7041 str_list += ['height : <ValueError>']

7042 try:

7043 str_list += ['depth : ' + str(self.depth)]

7044 except ValueError:

7045 str_list += ['depth : <ValueError>']

7046 try:

7047 str_list += ['firstMipmapLevel : ' + str(self.firstMipmapLevel)]

7048 except ValueError:

7049 str_list += ['firstMipmapLevel : <ValueError>']

7050 try:

7051 str_list += ['lastMipmapLevel : ' + str(self.lastMipmapLevel)]

7052 except ValueError:

7053 str_list += ['lastMipmapLevel : <ValueError>']

7054 try:

7055 str_list += ['firstLayer : ' + str(self.firstLayer)]

7056 except ValueError:

7057 str_list += ['firstLayer : <ValueError>']

7058 try:

7059 str_list += ['lastLayer : ' + str(self.lastLayer)]

7060 except ValueError:

7061 str_list += ['lastLayer : <ValueError>']

7062 try:

7063 str_list += ['reserved : ' + str(self.reserved)]

7064 except ValueError:

7065 str_list += ['reserved : <ValueError>']

7066 return '\n'.join(str_list)

7067 else:

7068 return ''

7069 @property

7070 def format(self):

7071 if self._pvt_ptr[0].format not in _dict_cudaResourceViewFormat:

7072 return None

7073 return _dict_cudaResourceViewFormat[self._pvt_ptr[0].format]

7074 @format.setter

7075 def format(self, format not None : cudaResourceViewFormat):

7076 self._pvt_ptr[0].format = format.value

7077 @property

7078 def width(self):

7079 return self._pvt_ptr[0].width

7080 @width.setter

7081 def width(self, size_t width):

7082 self._pvt_ptr[0].width = width

7083 @property

7084 def height(self):

7085 return self._pvt_ptr[0].height

7086 @height.setter

7087 def height(self, size_t height):

7088 self._pvt_ptr[0].height = height

7089 @property

7090 def depth(self):

7091 return self._pvt_ptr[0].depth

7092 @depth.setter

7093 def depth(self, size_t depth):

7094 self._pvt_ptr[0].depth = depth

7095 @property

7096 def firstMipmapLevel(self):

7097 return self._pvt_ptr[0].firstMipmapLevel

7098 @firstMipmapLevel.setter

7099 def firstMipmapLevel(self, unsigned int firstMipmapLevel):

7100 self._pvt_ptr[0].firstMipmapLevel = firstMipmapLevel

7101 @property

7102 def lastMipmapLevel(self):

7103 return self._pvt_ptr[0].lastMipmapLevel

7104 @lastMipmapLevel.setter

7105 def lastMipmapLevel(self, unsigned int lastMipmapLevel):

7106 self._pvt_ptr[0].lastMipmapLevel = lastMipmapLevel

7107 @property

7108 def firstLayer(self):

7109 return self._pvt_ptr[0].firstLayer

7110 @firstLayer.setter

7111 def firstLayer(self, unsigned int firstLayer):

7112 self._pvt_ptr[0].firstLayer = firstLayer

7113 @property

7114 def lastLayer(self):

7115 return self._pvt_ptr[0].lastLayer

7116 @lastLayer.setter

7117 def lastLayer(self, unsigned int lastLayer):

7118 self._pvt_ptr[0].lastLayer = lastLayer

7119 @property

7120 def reserved(self):

7121 return self._pvt_ptr[0].reserved

7122 @reserved.setter

7123 def reserved(self, reserved):

7124 self._pvt_ptr[0].reserved = reserved

7125

7126cdef class cudaPointerAttributes:

7127 """

7128 CUDA pointer attributes

7129

7130 Attributes

7131 ----------

7132 type : cudaMemoryType

7133 The type of memory - cudaMemoryTypeUnregistered,

7134 cudaMemoryTypeHost, cudaMemoryTypeDevice or cudaMemoryTypeManaged.

7135 device : int

7136 The device against which the memory was allocated or registered. If

7137 the memory type is cudaMemoryTypeDevice then this identifies the

7138 device on which the memory referred physically resides. If the

7139 memory type is cudaMemoryTypeHost or::cudaMemoryTypeManaged then

7140 this identifies the device which was current when the memory was

7141 allocated or registered (and if that device is deinitialized then

7142 this allocation will vanish with that device's state).

7143 devicePointer : Any

7144 The address which may be dereferenced on the current device to

7145 access the memory or NULL if no such address exists.

7146 hostPointer : Any

7147 The address which may be dereferenced on the host to access the

7148 memory or NULL if no such address exists. CUDA doesn't check if

7149 unregistered memory is allocated so this field may contain invalid

7150 pointer if an invalid pointer has been passed to CUDA.

7151 reserved : list[long]

7152 Must be zero

7153

7154 Methods

7155 -------

7156 getPtr()

7157 Get memory address of class instance

7158 """

7159 def __cinit__(self, void_ptr _ptr = 0):

7160 if _ptr == 0:

7161 self._pvt_ptr = &self._pvt_val

7162 else:

7163 self._pvt_ptr = <cyruntime.cudaPointerAttributes *>_ptr

7164 def __init__(self, void_ptr _ptr = 0):

7165 pass

7166 def __dealloc__(self):

7167 pass

7168 def getPtr(self):

7169 return <void_ptr>self._pvt_ptr

7170 def __repr__(self):

7171 if self._pvt_ptr is not NULL:

7172 str_list = []

7173 try:

7174 str_list += ['type : ' + str(self.type)]

7175 except ValueError:

7176 str_list += ['type : <ValueError>']

7177 try:

7178 str_list += ['device : ' + str(self.device)]

7179 except ValueError:

7180 str_list += ['device : <ValueError>']

7181 try:

7182 str_list += ['devicePointer : ' + hex(self.devicePointer)]

7183 except ValueError:

7184 str_list += ['devicePointer : <ValueError>']

7185 try:

7186 str_list += ['hostPointer : ' + hex(self.hostPointer)]

7187 except ValueError:

7188 str_list += ['hostPointer : <ValueError>']

7189 try:

7190 str_list += ['reserved : ' + str(self.reserved)]

7191 except ValueError:

7192 str_list += ['reserved : <ValueError>']

7193 return '\n'.join(str_list)

7194 else:

7195 return ''

7196 @property

7197 def type(self):

7198 if self._pvt_ptr[0].type not in _dict_cudaMemoryType:

7199 return None

7200 return _dict_cudaMemoryType[self._pvt_ptr[0].type]

7201 @type.setter

7202 def type(self, type not None : cudaMemoryType):

7203 self._pvt_ptr[0].type = type.value

7204 @property

7205 def device(self):

7206 return self._pvt_ptr[0].device

7207 @device.setter

7208 def device(self, int device):

7209 self._pvt_ptr[0].device = device

7210 @property

7211 def devicePointer(self):

7212 return <void_ptr>self._pvt_ptr[0].devicePointer

7213 @devicePointer.setter

7214 def devicePointer(self, devicePointer):

7215 _cdevicePointer = _HelperInputVoidPtr(devicePointer)

7216 self._pvt_ptr[0].devicePointer = <void*><void_ptr>_cdevicePointer.cptr

7217 @property

7218 def hostPointer(self):

7219 return <void_ptr>self._pvt_ptr[0].hostPointer

7220 @hostPointer.setter

7221 def hostPointer(self, hostPointer):

7222 _chostPointer = _HelperInputVoidPtr(hostPointer)

7223 self._pvt_ptr[0].hostPointer = <void*><void_ptr>_chostPointer.cptr

7224 @property

7225 def reserved(self):

7226 return self._pvt_ptr[0].reserved

7227 @reserved.setter

7228 def reserved(self, reserved):

7229 self._pvt_ptr[0].reserved = reserved

7230

7231cdef class cudaFuncAttributes:

7232 """

7233 CUDA function attributes

7234

7235 Attributes

7236 ----------

7237 sharedSizeBytes : size_t

7238 The size in bytes of statically-allocated shared memory per block

7239 required by this function. This does not include dynamically-

7240 allocated shared memory requested by the user at runtime.

7241 constSizeBytes : size_t

7242 The size in bytes of user-allocated constant memory required by

7243 this function.

7244 localSizeBytes : size_t

7245 The size in bytes of local memory used by each thread of this

7246 function.

7247 maxThreadsPerBlock : int

7248 The maximum number of threads per block, beyond which a launch of

7249 the function would fail. This number depends on both the function

7250 and the device on which the function is currently loaded.

7251 numRegs : int

7252 The number of registers used by each thread of this function.

7253 ptxVersion : int

7254 The PTX virtual architecture version for which the function was

7255 compiled. This value is the major PTX version * 10 + the minor PTX

7256 version, so a PTX version 1.3 function would return the value 13.

7257 binaryVersion : int

7258 The binary architecture version for which the function was

7259 compiled. This value is the major binary version * 10 + the minor

7260 binary version, so a binary version 1.3 function would return the

7261 value 13.

7262 cacheModeCA : int

7263 The attribute to indicate whether the function has been compiled

7264 with user specified option "-Xptxas --dlcm=ca" set.

7265 maxDynamicSharedSizeBytes : int

7266 The maximum size in bytes of dynamic shared memory per block for

7267 this function. Any launch must have a dynamic shared memory size

7268 smaller than this value.

7269 preferredShmemCarveout : int

7270 On devices where the L1 cache and shared memory use the same

7271 hardware resources, this sets the shared memory carveout

7272 preference, in percent of the maximum shared memory. Refer to

7273 cudaDevAttrMaxSharedMemoryPerMultiprocessor. This is only a hint,

7274 and the driver can choose a different ratio if required to execute

7275 the function. See cudaFuncSetAttribute

7276 clusterDimMustBeSet : int

7277 If this attribute is set, the kernel must launch with a valid

7278 cluster dimension specified.

7279 requiredClusterWidth : int

7280 The required cluster width/height/depth in blocks. The values must

7281 either all be 0 or all be positive. The validity of the cluster

7282 dimensions is otherwise checked at launch time. If the value is

7283 set during compile time, it cannot be set at runtime. Setting it at

7284 runtime should return cudaErrorNotPermitted. See

7285 cudaFuncSetAttribute

7286 requiredClusterHeight : int

7287

7288 requiredClusterDepth : int

7289

7290 clusterSchedulingPolicyPreference : int

7291 The block scheduling policy of a function. See cudaFuncSetAttribute

7292 nonPortableClusterSizeAllowed : int

7293 Whether the function can be launched with non-portable cluster

7294 size. 1 is allowed, 0 is disallowed. A non-portable cluster size

7295 may only function on the specific SKUs the program is tested on.

7296 The launch might fail if the program is run on a different hardware

7297 platform. CUDA API provides cudaOccupancyMaxActiveClusters to

7298 assist with checking whether the desired size can be launched on

7299 the current device. Portable Cluster Size A portable cluster size

7300 is guaranteed to be functional on all compute capabilities higher

7301 than the target compute capability. The portable cluster size for

7302 sm_90 is 8 blocks per cluster. This value may increase for future

7303 compute capabilities. The specific hardware unit may support

7304 higher cluster sizes that’s not guaranteed to be portable. See

7305 cudaFuncSetAttribute

7306 reserved : list[int]

7307 Reserved for future use.

7308

7309 Methods

7310 -------

7311 getPtr()

7312 Get memory address of class instance

7313 """

7314 def __cinit__(self, void_ptr _ptr = 0):

7315 if _ptr == 0:

7316 self._pvt_ptr = &self._pvt_val

7317 else:

7318 self._pvt_ptr = <cyruntime.cudaFuncAttributes *>_ptr

7319 def __init__(self, void_ptr _ptr = 0):

7320 pass

7321 def __dealloc__(self):

7322 pass

7323 def getPtr(self):

7324 return <void_ptr>self._pvt_ptr

7325 def __repr__(self):

7326 if self._pvt_ptr is not NULL:

7327 str_list = []

7328 try:

7329 str_list += ['sharedSizeBytes : ' + str(self.sharedSizeBytes)]

7330 except ValueError:

7331 str_list += ['sharedSizeBytes : <ValueError>']

7332 try:

7333 str_list += ['constSizeBytes : ' + str(self.constSizeBytes)]

7334 except ValueError:

7335 str_list += ['constSizeBytes : <ValueError>']

7336 try:

7337 str_list += ['localSizeBytes : ' + str(self.localSizeBytes)]

7338 except ValueError:

7339 str_list += ['localSizeBytes : <ValueError>']

7340 try:

7341 str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]

7342 except ValueError:

7343 str_list += ['maxThreadsPerBlock : <ValueError>']

7344 try:

7345 str_list += ['numRegs : ' + str(self.numRegs)]

7346 except ValueError:

7347 str_list += ['numRegs : <ValueError>']

7348 try:

7349 str_list += ['ptxVersion : ' + str(self.ptxVersion)]

7350 except ValueError:

7351 str_list += ['ptxVersion : <ValueError>']

7352 try:

7353 str_list += ['binaryVersion : ' + str(self.binaryVersion)]

7354 except ValueError:

7355 str_list += ['binaryVersion : <ValueError>']

7356 try:

7357 str_list += ['cacheModeCA : ' + str(self.cacheModeCA)]

7358 except ValueError:

7359 str_list += ['cacheModeCA : <ValueError>']

7360 try:

7361 str_list += ['maxDynamicSharedSizeBytes : ' + str(self.maxDynamicSharedSizeBytes)]

7362 except ValueError:

7363 str_list += ['maxDynamicSharedSizeBytes : <ValueError>']

7364 try:

7365 str_list += ['preferredShmemCarveout : ' + str(self.preferredShmemCarveout)]

7366 except ValueError:

7367 str_list += ['preferredShmemCarveout : <ValueError>']

7368 try:

7369 str_list += ['clusterDimMustBeSet : ' + str(self.clusterDimMustBeSet)]

7370 except ValueError:

7371 str_list += ['clusterDimMustBeSet : <ValueError>']

7372 try:

7373 str_list += ['requiredClusterWidth : ' + str(self.requiredClusterWidth)]

7374 except ValueError:

7375 str_list += ['requiredClusterWidth : <ValueError>']

7376 try:

7377 str_list += ['requiredClusterHeight : ' + str(self.requiredClusterHeight)]

7378 except ValueError:

7379 str_list += ['requiredClusterHeight : <ValueError>']

7380 try:

7381 str_list += ['requiredClusterDepth : ' + str(self.requiredClusterDepth)]

7382 except ValueError:

7383 str_list += ['requiredClusterDepth : <ValueError>']

7384 try:

7385 str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]

7386 except ValueError:

7387 str_list += ['clusterSchedulingPolicyPreference : <ValueError>']

7388 try:

7389 str_list += ['nonPortableClusterSizeAllowed : ' + str(self.nonPortableClusterSizeAllowed)]

7390 except ValueError:

7391 str_list += ['nonPortableClusterSizeAllowed : <ValueError>']

7392 try:

7393 str_list += ['reserved : ' + str(self.reserved)]

7394 except ValueError:

7395 str_list += ['reserved : <ValueError>']

7396 return '\n'.join(str_list)

7397 else:

7398 return ''

7399 @property

7400 def sharedSizeBytes(self):

7401 return self._pvt_ptr[0].sharedSizeBytes

7402 @sharedSizeBytes.setter

7403 def sharedSizeBytes(self, size_t sharedSizeBytes):

7404 self._pvt_ptr[0].sharedSizeBytes = sharedSizeBytes

7405 @property

7406 def constSizeBytes(self):

7407 return self._pvt_ptr[0].constSizeBytes

7408 @constSizeBytes.setter

7409 def constSizeBytes(self, size_t constSizeBytes):

7410 self._pvt_ptr[0].constSizeBytes = constSizeBytes

7411 @property

7412 def localSizeBytes(self):

7413 return self._pvt_ptr[0].localSizeBytes

7414 @localSizeBytes.setter

7415 def localSizeBytes(self, size_t localSizeBytes):

7416 self._pvt_ptr[0].localSizeBytes = localSizeBytes

7417 @property

7418 def maxThreadsPerBlock(self):

7419 return self._pvt_ptr[0].maxThreadsPerBlock

7420 @maxThreadsPerBlock.setter

7421 def maxThreadsPerBlock(self, int maxThreadsPerBlock):

7422 self._pvt_ptr[0].maxThreadsPerBlock = maxThreadsPerBlock

7423 @property

7424 def numRegs(self):

7425 return self._pvt_ptr[0].numRegs

7426 @numRegs.setter

7427 def numRegs(self, int numRegs):

7428 self._pvt_ptr[0].numRegs = numRegs

7429 @property

7430 def ptxVersion(self):

7431 return self._pvt_ptr[0].ptxVersion

7432 @ptxVersion.setter

7433 def ptxVersion(self, int ptxVersion):

7434 self._pvt_ptr[0].ptxVersion = ptxVersion

7435 @property

7436 def binaryVersion(self):

7437 return self._pvt_ptr[0].binaryVersion

7438 @binaryVersion.setter

7439 def binaryVersion(self, int binaryVersion):

7440 self._pvt_ptr[0].binaryVersion = binaryVersion

7441 @property

7442 def cacheModeCA(self):

7443 return self._pvt_ptr[0].cacheModeCA

7444 @cacheModeCA.setter

7445 def cacheModeCA(self, int cacheModeCA):

7446 self._pvt_ptr[0].cacheModeCA = cacheModeCA

7447 @property

7448 def maxDynamicSharedSizeBytes(self):

7449 return self._pvt_ptr[0].maxDynamicSharedSizeBytes

7450 @maxDynamicSharedSizeBytes.setter

7451 def maxDynamicSharedSizeBytes(self, int maxDynamicSharedSizeBytes):

7452 self._pvt_ptr[0].maxDynamicSharedSizeBytes = maxDynamicSharedSizeBytes

7453 @property

7454 def preferredShmemCarveout(self):

7455 return self._pvt_ptr[0].preferredShmemCarveout

7456 @preferredShmemCarveout.setter

7457 def preferredShmemCarveout(self, int preferredShmemCarveout):

7458 self._pvt_ptr[0].preferredShmemCarveout = preferredShmemCarveout

7459 @property

7460 def clusterDimMustBeSet(self):

7461 return self._pvt_ptr[0].clusterDimMustBeSet

7462 @clusterDimMustBeSet.setter

7463 def clusterDimMustBeSet(self, int clusterDimMustBeSet):

7464 self._pvt_ptr[0].clusterDimMustBeSet = clusterDimMustBeSet

7465 @property

7466 def requiredClusterWidth(self):

7467 return self._pvt_ptr[0].requiredClusterWidth

7468 @requiredClusterWidth.setter

7469 def requiredClusterWidth(self, int requiredClusterWidth):

7470 self._pvt_ptr[0].requiredClusterWidth = requiredClusterWidth

7471 @property

7472 def requiredClusterHeight(self):

7473 return self._pvt_ptr[0].requiredClusterHeight

7474 @requiredClusterHeight.setter

7475 def requiredClusterHeight(self, int requiredClusterHeight):

7476 self._pvt_ptr[0].requiredClusterHeight = requiredClusterHeight

7477 @property

7478 def requiredClusterDepth(self):

7479 return self._pvt_ptr[0].requiredClusterDepth

7480 @requiredClusterDepth.setter

7481 def requiredClusterDepth(self, int requiredClusterDepth):

7482 self._pvt_ptr[0].requiredClusterDepth = requiredClusterDepth

7483 @property

7484 def clusterSchedulingPolicyPreference(self):

7485 return self._pvt_ptr[0].clusterSchedulingPolicyPreference

7486 @clusterSchedulingPolicyPreference.setter

7487 def clusterSchedulingPolicyPreference(self, int clusterSchedulingPolicyPreference):

7488 self._pvt_ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference

7489 @property

7490 def nonPortableClusterSizeAllowed(self):

7491 return self._pvt_ptr[0].nonPortableClusterSizeAllowed

7492 @nonPortableClusterSizeAllowed.setter

7493 def nonPortableClusterSizeAllowed(self, int nonPortableClusterSizeAllowed):

7494 self._pvt_ptr[0].nonPortableClusterSizeAllowed = nonPortableClusterSizeAllowed

7495 @property

7496 def reserved(self):

7497 return self._pvt_ptr[0].reserved

7498 @reserved.setter

7499 def reserved(self, reserved):

7500 self._pvt_ptr[0].reserved = reserved

7501

7502cdef class cudaMemLocation:

7503 """

7504 Specifies a memory location. To specify a gpu, set type =

7505 cudaMemLocationTypeDevice and set id = the gpu's device ordinal. To

7506 specify a cpu NUMA node, set type = cudaMemLocationTypeHostNuma and

7507 set id = host NUMA node id.

7508

7509 Attributes

7510 ----------

7511 type : cudaMemLocationType

7512 Specifies the location type, which modifies the meaning of id.

7513 id : int

7514 identifier for a given this location's ::CUmemLocationType.

7515

7516 Methods

7517 -------

7518 getPtr()

7519 Get memory address of class instance

7520 """

7521 def __cinit__(self, void_ptr _ptr = 0):

7522 if _ptr == 0:

7523 self._pvt_ptr = &self._pvt_val

7524 else:

7525 self._pvt_ptr = <cyruntime.cudaMemLocation *>_ptr

7526 def __init__(self, void_ptr _ptr = 0):

7527 pass

7528 def __dealloc__(self):

7529 pass

7530 def getPtr(self):

7531 return <void_ptr>self._pvt_ptr

7532 def __repr__(self):

7533 if self._pvt_ptr is not NULL:

7534 str_list = []

7535 try:

7536 str_list += ['type : ' + str(self.type)]

7537 except ValueError:

7538 str_list += ['type : <ValueError>']

7539 try:

7540 str_list += ['id : ' + str(self.id)]

7541 except ValueError:

7542 str_list += ['id : <ValueError>']

7543 return '\n'.join(str_list)

7544 else:

7545 return ''

7546 @property

7547 def type(self):

7548 if self._pvt_ptr[0].type not in _dict_cudaMemLocationType:

7549 return None

7550 return _dict_cudaMemLocationType[self._pvt_ptr[0].type]

7551 @type.setter

7552 def type(self, type not None : cudaMemLocationType):

7553 self._pvt_ptr[0].type = type.value

7554 @property

7555 def id(self):

7556 return self._pvt_ptr[0].id

7557 @id.setter

7558 def id(self, int id):

7559 self._pvt_ptr[0].id = id

7560

7561cdef class cudaMemAccessDesc:

7562 """

7563 Memory access descriptor

7564

7565 Attributes

7566 ----------

7567 location : cudaMemLocation

7568 Location on which the request is to change it's accessibility

7569 flags : cudaMemAccessFlags

7570 ::CUmemProt accessibility flags to set on the request

7571

7572 Methods

7573 -------

7574 getPtr()

7575 Get memory address of class instance

7576 """

7577 def __cinit__(self, void_ptr _ptr = 0):

7578 if _ptr == 0:

7579 self._pvt_ptr = &self._pvt_val

7580 else:

7581 self._pvt_ptr = <cyruntime.cudaMemAccessDesc *>_ptr

7582 def __init__(self, void_ptr _ptr = 0):

7583 pass

7584 self._location = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].location)

7585 def __dealloc__(self):

7586 pass

7587 def getPtr(self):

7588 return <void_ptr>self._pvt_ptr

7589 def __repr__(self):

7590 if self._pvt_ptr is not NULL:

7591 str_list = []

7592 try:

7593 str_list += ['location :\n' + '\n'.join([' ' + line for line in str(self.location).splitlines()])]

7594 except ValueError:

7595 str_list += ['location : <ValueError>']

7596 try:

7597 str_list += ['flags : ' + str(self.flags)]

7598 except ValueError:

7599 str_list += ['flags : <ValueError>']

7600 return '\n'.join(str_list)

7601 else:

7602 return ''

7603 @property

7604 def location(self):

7605 return self._location

7606 @location.setter

7607 def location(self, location not None : cudaMemLocation):

7608 string.memcpy(&self._pvt_ptr[0].location, <cyruntime.cudaMemLocation*><void_ptr>location.getPtr(), sizeof(self._pvt_ptr[0].location))

7609 @property

7610 def flags(self):

7611 if self._pvt_ptr[0].flags not in _dict_cudaMemAccessFlags:

7612 return None

7613 return _dict_cudaMemAccessFlags[self._pvt_ptr[0].flags]

7614 @flags.setter

7615 def flags(self, flags not None : cudaMemAccessFlags):

7616 self._pvt_ptr[0].flags = flags.value

7617

7618cdef class cudaMemPoolProps:

7619 """

7620 Specifies the properties of allocations made from the pool.

7621

7622 Attributes

7623 ----------

7624 allocType : cudaMemAllocationType

7625 Allocation type. Currently must be specified as

7626 cudaMemAllocationTypePinned

7627 handleTypes : cudaMemAllocationHandleType

7628 Handle types that will be supported by allocations from the pool.

7629 location : cudaMemLocation

7630 Location allocations should reside.

7631 win32SecurityAttributes : Any

7632 Windows-specific LPSECURITYATTRIBUTES required when

7633 cudaMemHandleTypeWin32 is specified. This security attribute

7634 defines the scope of which exported allocations may be tranferred

7635 to other processes. In all other cases, this field is required to

7636 be zero.

7637 maxSize : size_t

7638 Maximum pool size. When set to 0, defaults to a system dependent

7639 value.

7640 usage : unsigned short

7641 Bitmask indicating intended usage for the pool.

7642 reserved : bytes

7643 reserved for future use, must be 0

7644

7645 Methods

7646 -------

7647 getPtr()

7648 Get memory address of class instance

7649 """

7650 def __cinit__(self, void_ptr _ptr = 0):

7651 if _ptr == 0:

7652 self._pvt_ptr = &self._pvt_val

7653 else:

7654 self._pvt_ptr = <cyruntime.cudaMemPoolProps *>_ptr

7655 def __init__(self, void_ptr _ptr = 0):

7656 pass

7657 self._location = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].location)

7658 def __dealloc__(self):

7659 pass

7660 def getPtr(self):

7661 return <void_ptr>self._pvt_ptr

7662 def __repr__(self):

7663 if self._pvt_ptr is not NULL:

7664 str_list = []

7665 try:

7666 str_list += ['allocType : ' + str(self.allocType)]

7667 except ValueError:

7668 str_list += ['allocType : <ValueError>']

7669 try:

7670 str_list += ['handleTypes : ' + str(self.handleTypes)]

7671 except ValueError:

7672 str_list += ['handleTypes : <ValueError>']

7673 try:

7674 str_list += ['location :\n' + '\n'.join([' ' + line for line in str(self.location).splitlines()])]

7675 except ValueError:

7676 str_list += ['location : <ValueError>']

7677 try:

7678 str_list += ['win32SecurityAttributes : ' + hex(self.win32SecurityAttributes)]

7679 except ValueError:

7680 str_list += ['win32SecurityAttributes : <ValueError>']

7681 try:

7682 str_list += ['maxSize : ' + str(self.maxSize)]

7683 except ValueError:

7684 str_list += ['maxSize : <ValueError>']

7685 try:

7686 str_list += ['usage : ' + str(self.usage)]

7687 except ValueError:

7688 str_list += ['usage : <ValueError>']

7689 try:

7690 str_list += ['reserved : ' + str(self.reserved)]

7691 except ValueError:

7692 str_list += ['reserved : <ValueError>']

7693 return '\n'.join(str_list)

7694 else:

7695 return ''

7696 @property

7697 def allocType(self):

7698 if self._pvt_ptr[0].allocType not in _dict_cudaMemAllocationType:

7699 return None

7700 return _dict_cudaMemAllocationType[self._pvt_ptr[0].allocType]

7701 @allocType.setter

7702 def allocType(self, allocType not None : cudaMemAllocationType):

7703 self._pvt_ptr[0].allocType = allocType.value

7704 @property

7705 def handleTypes(self):

7706 if self._pvt_ptr[0].handleTypes not in _dict_cudaMemAllocationHandleType:

7707 return None

7708 return _dict_cudaMemAllocationHandleType[self._pvt_ptr[0].handleTypes]

7709 @handleTypes.setter

7710 def handleTypes(self, handleTypes not None : cudaMemAllocationHandleType):

7711 self._pvt_ptr[0].handleTypes = handleTypes.value

7712 @property

7713 def location(self):

7714 return self._location

7715 @location.setter

7716 def location(self, location not None : cudaMemLocation):

7717 string.memcpy(&self._pvt_ptr[0].location, <cyruntime.cudaMemLocation*><void_ptr>location.getPtr(), sizeof(self._pvt_ptr[0].location))

7718 @property

7719 def win32SecurityAttributes(self):

7720 return <void_ptr>self._pvt_ptr[0].win32SecurityAttributes

7721 @win32SecurityAttributes.setter

7722 def win32SecurityAttributes(self, win32SecurityAttributes):

7723 _cwin32SecurityAttributes = _HelperInputVoidPtr(win32SecurityAttributes)

7724 self._pvt_ptr[0].win32SecurityAttributes = <void*><void_ptr>_cwin32SecurityAttributes.cptr

7725 @property

7726 def maxSize(self):

7727 return self._pvt_ptr[0].maxSize

7728 @maxSize.setter

7729 def maxSize(self, size_t maxSize):

7730 self._pvt_ptr[0].maxSize = maxSize

7731 @property

7732 def usage(self):

7733 return self._pvt_ptr[0].usage

7734 @usage.setter

7735 def usage(self, unsigned short usage):

7736 self._pvt_ptr[0].usage = usage

7737 @property

7738 def reserved(self):

7739 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 54)

7740 @reserved.setter

7741 def reserved(self, reserved):

7742 if len(reserved) != 54:

7743 raise ValueError("reserved length must be 54, is " + str(len(reserved)))

7744 for i, b in enumerate(reserved):

7745 self._pvt_ptr[0].reserved[i] = b

7746

7747cdef class cudaMemPoolPtrExportData:

7748 """

7749 Opaque data for exporting a pool allocation

7750

7751 Attributes

7752 ----------

7753 reserved : bytes

7754

7755

7756 Methods

7757 -------

7758 getPtr()

7759 Get memory address of class instance

7760 """

7761 def __cinit__(self, void_ptr _ptr = 0):

7762 if _ptr == 0:

7763 self._pvt_ptr = &self._pvt_val

7764 else:

7765 self._pvt_ptr = <cyruntime.cudaMemPoolPtrExportData *>_ptr

7766 def __init__(self, void_ptr _ptr = 0):

7767 pass

7768 def __dealloc__(self):

7769 pass

7770 def getPtr(self):

7771 return <void_ptr>self._pvt_ptr

7772 def __repr__(self):

7773 if self._pvt_ptr is not NULL:

7774 str_list = []

7775 try:

7776 str_list += ['reserved : ' + str(self.reserved)]

7777 except ValueError:

7778 str_list += ['reserved : <ValueError>']

7779 return '\n'.join(str_list)

7780 else:

7781 return ''

7782 @property

7783 def reserved(self):

7784 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 64)

7785 @reserved.setter

7786 def reserved(self, reserved):

7787 if len(reserved) != 64:

7788 raise ValueError("reserved length must be 64, is " + str(len(reserved)))

7789 for i, b in enumerate(reserved):

7790 self._pvt_ptr[0].reserved[i] = b

7791

7792cdef class cudaMemAllocNodeParams:

7793 """

7794 Memory allocation node parameters

7795

7796 Attributes

7797 ----------

7798 poolProps : cudaMemPoolProps

7799 in: location where the allocation should reside (specified in

7800 ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is

7801 not supported. in: array of memory access descriptors. Used to

7802 describe peer GPU access

7803 accessDescs : cudaMemAccessDesc

7804 in: number of memory access descriptors. Must not exceed the number

7805 of GPUs.

7806 accessDescCount : size_t

7807 in: Number of `accessDescs`s

7808 bytesize : size_t

7809 in: size in bytes of the requested allocation

7810 dptr : Any

7811 out: address of the allocation returned by CUDA

7812

7813 Methods

7814 -------

7815 getPtr()

7816 Get memory address of class instance

7817 """

7818 def __cinit__(self, void_ptr _ptr = 0):

7819 if _ptr == 0:

7820 self._pvt_ptr = &self._pvt_val

7821 else:

7822 self._pvt_ptr = <cyruntime.cudaMemAllocNodeParams *>_ptr

7823 def __init__(self, void_ptr _ptr = 0):

7824 pass

7825 self._poolProps = cudaMemPoolProps(_ptr=<void_ptr>&self._pvt_ptr[0].poolProps)

7826 def __dealloc__(self):

7827 pass

7828 if self._accessDescs is not NULL:

7829 free(self._accessDescs)

7830 def getPtr(self):

7831 return <void_ptr>self._pvt_ptr

7832 def __repr__(self):

7833 if self._pvt_ptr is not NULL:

7834 str_list = []

7835 try:

7836 str_list += ['poolProps :\n' + '\n'.join([' ' + line for line in str(self.poolProps).splitlines()])]

7837 except ValueError:

7838 str_list += ['poolProps : <ValueError>']

7839 try:

7840 str_list += ['accessDescs : ' + str(self.accessDescs)]

7841 except ValueError:

7842 str_list += ['accessDescs : <ValueError>']

7843 try:

7844 str_list += ['accessDescCount : ' + str(self.accessDescCount)]

7845 except ValueError:

7846 str_list += ['accessDescCount : <ValueError>']

7847 try:

7848 str_list += ['bytesize : ' + str(self.bytesize)]

7849 except ValueError:

7850 str_list += ['bytesize : <ValueError>']

7851 try:

7852 str_list += ['dptr : ' + hex(self.dptr)]

7853 except ValueError:

7854 str_list += ['dptr : <ValueError>']

7855 return '\n'.join(str_list)

7856 else:

7857 return ''

7858 @property

7859 def poolProps(self):

7860 return self._poolProps

7861 @poolProps.setter

7862 def poolProps(self, poolProps not None : cudaMemPoolProps):

7863 string.memcpy(&self._pvt_ptr[0].poolProps, <cyruntime.cudaMemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._pvt_ptr[0].poolProps))

7864 @property

7865 def accessDescs(self):

7866 arrs = [<void_ptr>self._pvt_ptr[0].accessDescs + x*sizeof(cyruntime.cudaMemAccessDesc) for x in range(self._accessDescs_length)]

7867 return [cudaMemAccessDesc(_ptr=arr) for arr in arrs]

7868 @accessDescs.setter

7869 def accessDescs(self, val):

7870 if len(val) == 0:

7871 free(self._accessDescs)

7872 self._accessDescs_length = 0

7873 self._pvt_ptr[0].accessDescs = NULL

7874 else:

7875 if self._accessDescs_length != <size_t>len(val):

7876 free(self._accessDescs)

7877 self._accessDescs = <cyruntime.cudaMemAccessDesc*> calloc(len(val), sizeof(cyruntime.cudaMemAccessDesc))

7878 if self._accessDescs is NULL:

7879 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))

7880 self._accessDescs_length = <size_t>len(val)

7881 self._pvt_ptr[0].accessDescs = self._accessDescs

7882 for idx in range(len(val)):

7883 string.memcpy(&self._accessDescs[idx], (<cudaMemAccessDesc>val[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc))

7884

7885 @property

7886 def accessDescCount(self):

7887 return self._pvt_ptr[0].accessDescCount

7888 @accessDescCount.setter

7889 def accessDescCount(self, size_t accessDescCount):

7890 self._pvt_ptr[0].accessDescCount = accessDescCount

7891 @property

7892 def bytesize(self):

7893 return self._pvt_ptr[0].bytesize

7894 @bytesize.setter

7895 def bytesize(self, size_t bytesize):

7896 self._pvt_ptr[0].bytesize = bytesize

7897 @property

7898 def dptr(self):

7899 return <void_ptr>self._pvt_ptr[0].dptr

7900 @dptr.setter

7901 def dptr(self, dptr):

7902 _cdptr = _HelperInputVoidPtr(dptr)

7903 self._pvt_ptr[0].dptr = <void*><void_ptr>_cdptr.cptr

7904

7905cdef class cudaMemAllocNodeParamsV2:

7906 """

7907 Memory allocation node parameters

7908

7909 Attributes

7910 ----------

7911 poolProps : cudaMemPoolProps

7912 in: location where the allocation should reside (specified in

7913 ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is

7914 not supported. in: array of memory access descriptors. Used to

7915 describe peer GPU access

7916 accessDescs : cudaMemAccessDesc

7917 in: number of memory access descriptors. Must not exceed the number

7918 of GPUs.

7919 accessDescCount : size_t

7920 in: Number of `accessDescs`s

7921 bytesize : size_t

7922 in: size in bytes of the requested allocation

7923 dptr : Any

7924 out: address of the allocation returned by CUDA

7925

7926 Methods

7927 -------

7928 getPtr()

7929 Get memory address of class instance

7930 """

7931 def __cinit__(self, void_ptr _ptr = 0):

7932 if _ptr == 0:

7933 self._pvt_ptr = &self._pvt_val

7934 else:

7935 self._pvt_ptr = <cyruntime.cudaMemAllocNodeParamsV2 *>_ptr

7936 def __init__(self, void_ptr _ptr = 0):

7937 pass

7938 self._poolProps = cudaMemPoolProps(_ptr=<void_ptr>&self._pvt_ptr[0].poolProps)

7939 def __dealloc__(self):

7940 pass

7941 if self._accessDescs is not NULL:

7942 free(self._accessDescs)

7943 def getPtr(self):

7944 return <void_ptr>self._pvt_ptr

7945 def __repr__(self):

7946 if self._pvt_ptr is not NULL:

7947 str_list = []

7948 try:

7949 str_list += ['poolProps :\n' + '\n'.join([' ' + line for line in str(self.poolProps).splitlines()])]

7950 except ValueError:

7951 str_list += ['poolProps : <ValueError>']

7952 try:

7953 str_list += ['accessDescs : ' + str(self.accessDescs)]

7954 except ValueError:

7955 str_list += ['accessDescs : <ValueError>']

7956 try:

7957 str_list += ['accessDescCount : ' + str(self.accessDescCount)]

7958 except ValueError:

7959 str_list += ['accessDescCount : <ValueError>']

7960 try:

7961 str_list += ['bytesize : ' + str(self.bytesize)]

7962 except ValueError:

7963 str_list += ['bytesize : <ValueError>']

7964 try:

7965 str_list += ['dptr : ' + hex(self.dptr)]

7966 except ValueError:

7967 str_list += ['dptr : <ValueError>']

7968 return '\n'.join(str_list)

7969 else:

7970 return ''

7971 @property

7972 def poolProps(self):

7973 return self._poolProps

7974 @poolProps.setter

7975 def poolProps(self, poolProps not None : cudaMemPoolProps):

7976 string.memcpy(&self._pvt_ptr[0].poolProps, <cyruntime.cudaMemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._pvt_ptr[0].poolProps))

7977 @property

7978 def accessDescs(self):

7979 arrs = [<void_ptr>self._pvt_ptr[0].accessDescs + x*sizeof(cyruntime.cudaMemAccessDesc) for x in range(self._accessDescs_length)]

7980 return [cudaMemAccessDesc(_ptr=arr) for arr in arrs]

7981 @accessDescs.setter

7982 def accessDescs(self, val):

7983 if len(val) == 0:

7984 free(self._accessDescs)

7985 self._accessDescs_length = 0

7986 self._pvt_ptr[0].accessDescs = NULL

7987 else:

7988 if self._accessDescs_length != <size_t>len(val):

7989 free(self._accessDescs)

7990 self._accessDescs = <cyruntime.cudaMemAccessDesc*> calloc(len(val), sizeof(cyruntime.cudaMemAccessDesc))

7991 if self._accessDescs is NULL:

7992 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))

7993 self._accessDescs_length = <size_t>len(val)

7994 self._pvt_ptr[0].accessDescs = self._accessDescs

7995 for idx in range(len(val)):

7996 string.memcpy(&self._accessDescs[idx], (<cudaMemAccessDesc>val[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc))

7997

7998 @property

7999 def accessDescCount(self):

8000 return self._pvt_ptr[0].accessDescCount

8001 @accessDescCount.setter

8002 def accessDescCount(self, size_t accessDescCount):

8003 self._pvt_ptr[0].accessDescCount = accessDescCount

8004 @property

8005 def bytesize(self):

8006 return self._pvt_ptr[0].bytesize

8007 @bytesize.setter

8008 def bytesize(self, size_t bytesize):

8009 self._pvt_ptr[0].bytesize = bytesize

8010 @property

8011 def dptr(self):

8012 return <void_ptr>self._pvt_ptr[0].dptr

8013 @dptr.setter

8014 def dptr(self, dptr):

8015 _cdptr = _HelperInputVoidPtr(dptr)

8016 self._pvt_ptr[0].dptr = <void*><void_ptr>_cdptr.cptr

8017

8018cdef class cudaMemFreeNodeParams:

8019 """

8020 Memory free node parameters

8021

8022 Attributes

8023 ----------

8024 dptr : Any

8025 in: the pointer to free

8026

8027 Methods

8028 -------

8029 getPtr()

8030 Get memory address of class instance

8031 """

8032 def __cinit__(self, void_ptr _ptr = 0):

8033 if _ptr == 0:

8034 self._pvt_ptr = &self._pvt_val

8035 else:

8036 self._pvt_ptr = <cyruntime.cudaMemFreeNodeParams *>_ptr

8037 def __init__(self, void_ptr _ptr = 0):

8038 pass

8039 def __dealloc__(self):

8040 pass

8041 def getPtr(self):

8042 return <void_ptr>self._pvt_ptr

8043 def __repr__(self):

8044 if self._pvt_ptr is not NULL:

8045 str_list = []

8046 try:

8047 str_list += ['dptr : ' + hex(self.dptr)]

8048 except ValueError:

8049 str_list += ['dptr : <ValueError>']

8050 return '\n'.join(str_list)

8051 else:

8052 return ''

8053 @property

8054 def dptr(self):

8055 return <void_ptr>self._pvt_ptr[0].dptr

8056 @dptr.setter

8057 def dptr(self, dptr):

8058 _cdptr = _HelperInputVoidPtr(dptr)

8059 self._pvt_ptr[0].dptr = <void*><void_ptr>_cdptr.cptr

8060

8061cdef class cudaMemcpyAttributes:

8062 """

8063 Attributes specific to copies within a batch. For more details on

8064 usage see cudaMemcpyBatchAsync.

8065

8066 Attributes

8067 ----------

8068 srcAccessOrder : cudaMemcpySrcAccessOrder

8069 Source access ordering to be observed for copies with this

8070 attribute.

8071 srcLocHint : cudaMemLocation

8072 Hint location for the source operand. Ignored when the pointers are

8073 not managed memory or memory allocated outside CUDA.

8074 dstLocHint : cudaMemLocation

8075 Hint location for the destination operand. Ignored when the

8076 pointers are not managed memory or memory allocated outside CUDA.

8077 flags : unsigned int

8078 Additional flags for copies with this attribute. See

8079 cudaMemcpyFlags.

8080

8081 Methods

8082 -------

8083 getPtr()

8084 Get memory address of class instance

8085 """

8086 def __cinit__(self, void_ptr _ptr = 0):

8087 if _ptr == 0:

8088 self._pvt_ptr = &self._pvt_val

8089 else:

8090 self._pvt_ptr = <cyruntime.cudaMemcpyAttributes *>_ptr

8091 def __init__(self, void_ptr _ptr = 0):

8092 pass

8093 self._srcLocHint = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].srcLocHint)

8094 self._dstLocHint = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].dstLocHint)

8095 def __dealloc__(self):

8096 pass

8097 def getPtr(self):

8098 return <void_ptr>self._pvt_ptr

8099 def __repr__(self):

8100 if self._pvt_ptr is not NULL:

8101 str_list = []

8102 try:

8103 str_list += ['srcAccessOrder : ' + str(self.srcAccessOrder)]

8104 except ValueError:

8105 str_list += ['srcAccessOrder : <ValueError>']

8106 try:

8107 str_list += ['srcLocHint :\n' + '\n'.join([' ' + line for line in str(self.srcLocHint).splitlines()])]

8108 except ValueError:

8109 str_list += ['srcLocHint : <ValueError>']

8110 try:

8111 str_list += ['dstLocHint :\n' + '\n'.join([' ' + line for line in str(self.dstLocHint).splitlines()])]

8112 except ValueError:

8113 str_list += ['dstLocHint : <ValueError>']

8114 try:

8115 str_list += ['flags : ' + str(self.flags)]

8116 except ValueError:

8117 str_list += ['flags : <ValueError>']

8118 return '\n'.join(str_list)

8119 else:

8120 return ''

8121 @property

8122 def srcAccessOrder(self):

8123 if self._pvt_ptr[0].srcAccessOrder not in _dict_cudaMemcpySrcAccessOrder:

8124 return None

8125 return _dict_cudaMemcpySrcAccessOrder[self._pvt_ptr[0].srcAccessOrder]

8126 @srcAccessOrder.setter

8127 def srcAccessOrder(self, srcAccessOrder not None : cudaMemcpySrcAccessOrder):

8128 self._pvt_ptr[0].srcAccessOrder = srcAccessOrder.value

8129 @property

8130 def srcLocHint(self):

8131 return self._srcLocHint

8132 @srcLocHint.setter

8133 def srcLocHint(self, srcLocHint not None : cudaMemLocation):

8134 string.memcpy(&self._pvt_ptr[0].srcLocHint, <cyruntime.cudaMemLocation*><void_ptr>srcLocHint.getPtr(), sizeof(self._pvt_ptr[0].srcLocHint))

8135 @property

8136 def dstLocHint(self):

8137 return self._dstLocHint

8138 @dstLocHint.setter

8139 def dstLocHint(self, dstLocHint not None : cudaMemLocation):

8140 string.memcpy(&self._pvt_ptr[0].dstLocHint, <cyruntime.cudaMemLocation*><void_ptr>dstLocHint.getPtr(), sizeof(self._pvt_ptr[0].dstLocHint))

8141 @property

8142 def flags(self):

8143 return self._pvt_ptr[0].flags

8144 @flags.setter

8145 def flags(self, unsigned int flags):

8146 self._pvt_ptr[0].flags = flags

8147

8148cdef class cudaOffset3D:

8149 """

8150 Struct representing offset into a cudaArray_t in elements

8151

8152 Attributes

8153 ----------

8154 x : size_t

8156 y : size_t

8158 z : size_t

8161 Methods

8162 -------

8163 getPtr()

8164 Get memory address of class instance

8165 """

8166 def __cinit__(self, void_ptr _ptr = 0):

8167 if _ptr == 0:

8168 self._pvt_ptr = &self._pvt_val

8169 else:

8170 self._pvt_ptr = <cyruntime.cudaOffset3D *>_ptr

8171 def __init__(self, void_ptr _ptr = 0):

8172 pass

8173 def __dealloc__(self):

8174 pass

8175 def getPtr(self):

8176 return <void_ptr>self._pvt_ptr

8177 def __repr__(self):

8178 if self._pvt_ptr is not NULL:

8179 str_list = []

8180 try:

8181 str_list += ['x : ' + str(self.x)]

8182 except ValueError:

8183 str_list += ['x : <ValueError>']

8184 try:

8185 str_list += ['y : ' + str(self.y)]

8186 except ValueError:

8187 str_list += ['y : <ValueError>']

8188 try:

8189 str_list += ['z : ' + str(self.z)]

8190 except ValueError:

8191 str_list += ['z : <ValueError>']

8192 return '\n'.join(str_list)

8193 else:

8194 return ''

8195 @property

8196 def x(self):

8197 return self._pvt_ptr[0].x

8198 @x.setter

8199 def x(self, size_t x):

8200 self._pvt_ptr[0].x = x

8201 @property

8202 def y(self):

8203 return self._pvt_ptr[0].y

8204 @y.setter

8205 def y(self, size_t y):

8206 self._pvt_ptr[0].y = y

8207 @property

8208 def z(self):

8209 return self._pvt_ptr[0].z

8210 @z.setter

8211 def z(self, size_t z):

8212 self._pvt_ptr[0].z = z

8213

8214cdef class anon_struct6:

8215 """

8216 Attributes

8217 ----------

8218 ptr : Any

8219

8220 rowLength : size_t

8221

8222 layerHeight : size_t

8223

8224 locHint : cudaMemLocation

8225

8226

8227 Methods

8228 -------

8229 getPtr()

8230 Get memory address of class instance

8231 """

8232 def __cinit__(self, void_ptr _ptr):

8233 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr

8234

8235 def __init__(self, void_ptr _ptr):

8236 pass

8237 self._locHint = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].op.ptr.locHint)

8238 def __dealloc__(self):

8239 pass

8240 def getPtr(self):

8241 return <void_ptr>&self._pvt_ptr[0].op.ptr

8242 def __repr__(self):

8243 if self._pvt_ptr is not NULL:

8244 str_list = []

8245 try:

8246 str_list += ['ptr : ' + hex(self.ptr)]

8247 except ValueError:

8248 str_list += ['ptr : <ValueError>']

8249 try:

8250 str_list += ['rowLength : ' + str(self.rowLength)]

8251 except ValueError:

8252 str_list += ['rowLength : <ValueError>']

8253 try:

8254 str_list += ['layerHeight : ' + str(self.layerHeight)]

8255 except ValueError:

8256 str_list += ['layerHeight : <ValueError>']

8257 try:

8258 str_list += ['locHint :\n' + '\n'.join([' ' + line for line in str(self.locHint).splitlines()])]

8259 except ValueError:

8260 str_list += ['locHint : <ValueError>']

8261 return '\n'.join(str_list)

8262 else:

8263 return ''

8264 @property

8265 def ptr(self):

8266 return <void_ptr>self._pvt_ptr[0].op.ptr.ptr

8267 @ptr.setter

8268 def ptr(self, ptr):

8269 _cptr = _HelperInputVoidPtr(ptr)

8270 self._pvt_ptr[0].op.ptr.ptr = <void*><void_ptr>_cptr.cptr

8271 @property

8272 def rowLength(self):

8273 return self._pvt_ptr[0].op.ptr.rowLength

8274 @rowLength.setter

8275 def rowLength(self, size_t rowLength):

8276 self._pvt_ptr[0].op.ptr.rowLength = rowLength

8277 @property

8278 def layerHeight(self):

8279 return self._pvt_ptr[0].op.ptr.layerHeight

8280 @layerHeight.setter

8281 def layerHeight(self, size_t layerHeight):

8282 self._pvt_ptr[0].op.ptr.layerHeight = layerHeight

8283 @property

8284 def locHint(self):

8285 return self._locHint

8286 @locHint.setter

8287 def locHint(self, locHint not None : cudaMemLocation):

8288 string.memcpy(&self._pvt_ptr[0].op.ptr.locHint, <cyruntime.cudaMemLocation*><void_ptr>locHint.getPtr(), sizeof(self._pvt_ptr[0].op.ptr.locHint))

8289

8290cdef class anon_struct7:

8291 """

8292 Attributes

8293 ----------

8294 array : cudaArray_t

8295

8296 offset : cudaOffset3D

8297

8298

8299 Methods

8300 -------

8301 getPtr()

8302 Get memory address of class instance

8303 """

8304 def __cinit__(self, void_ptr _ptr):

8305 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr

8306

8307 def __init__(self, void_ptr _ptr):

8308 pass

8309 self._array = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].op.array.array)

8310 self._offset = cudaOffset3D(_ptr=<void_ptr>&self._pvt_ptr[0].op.array.offset)

8311 def __dealloc__(self):

8312 pass

8313 def getPtr(self):

8314 return <void_ptr>&self._pvt_ptr[0].op.array

8315 def __repr__(self):

8316 if self._pvt_ptr is not NULL:

8317 str_list = []

8318 try:

8319 str_list += ['array : ' + str(self.array)]

8320 except ValueError:

8321 str_list += ['array : <ValueError>']

8322 try:

8323 str_list += ['offset :\n' + '\n'.join([' ' + line for line in str(self.offset).splitlines()])]

8324 except ValueError:

8325 str_list += ['offset : <ValueError>']

8326 return '\n'.join(str_list)

8327 else:

8328 return ''

8329 @property

8330 def array(self):

8331 return self._array

8332 @array.setter

8333 def array(self, array):

8334 cdef cyruntime.cudaArray_t cyarray

8335 if array is None:

8336 cyarray = <cyruntime.cudaArray_t><void_ptr>0

8337 elif isinstance(array, (cudaArray_t,)):

8338 parray = int(array)

8339 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

8340 else:

8341 parray = int(cudaArray_t(array))

8342 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

8343 self._array._pvt_ptr[0] = cyarray

8344 @property

8345 def offset(self):

8346 return self._offset

8347 @offset.setter

8348 def offset(self, offset not None : cudaOffset3D):

8349 string.memcpy(&self._pvt_ptr[0].op.array.offset, <cyruntime.cudaOffset3D*><void_ptr>offset.getPtr(), sizeof(self._pvt_ptr[0].op.array.offset))

8350

8351cdef class anon_union1:

8352 """

8353 Attributes

8354 ----------

8355 ptr : anon_struct6

8356

8357 array : anon_struct7

8358

8359

8360 Methods

8361 -------

8362 getPtr()

8363 Get memory address of class instance

8364 """

8365 def __cinit__(self, void_ptr _ptr):

8366 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr

8367

8368 def __init__(self, void_ptr _ptr):

8369 pass

8370 self._ptr = anon_struct6(_ptr=<void_ptr>self._pvt_ptr)

8371 self._array = anon_struct7(_ptr=<void_ptr>self._pvt_ptr)

8372 def __dealloc__(self):

8373 pass

8374 def getPtr(self):

8375 return <void_ptr>&self._pvt_ptr[0].op

8376 def __repr__(self):

8377 if self._pvt_ptr is not NULL:

8378 str_list = []

8379 try:

8380 str_list += ['ptr :\n' + '\n'.join([' ' + line for line in str(self.ptr).splitlines()])]

8381 except ValueError:

8382 str_list += ['ptr : <ValueError>']

8383 try:

8384 str_list += ['array :\n' + '\n'.join([' ' + line for line in str(self.array).splitlines()])]

8385 except ValueError:

8386 str_list += ['array : <ValueError>']

8387 return '\n'.join(str_list)

8388 else:

8389 return ''

8390 @property

8391 def ptr(self):

8392 return self._ptr

8393 @ptr.setter

8394 def ptr(self, ptr not None : anon_struct6):

8395 string.memcpy(&self._pvt_ptr[0].op.ptr, <cyruntime.anon_struct6*><void_ptr>ptr.getPtr(), sizeof(self._pvt_ptr[0].op.ptr))

8396 @property

8397 def array(self):

8398 return self._array

8399 @array.setter

8400 def array(self, array not None : anon_struct7):

8401 string.memcpy(&self._pvt_ptr[0].op.array, <cyruntime.anon_struct7*><void_ptr>array.getPtr(), sizeof(self._pvt_ptr[0].op.array))

8402

8403cdef class cudaMemcpy3DOperand:

8404 """

8405 Struct representing an operand for copy with cudaMemcpy3DBatchAsync

8406

8407 Attributes

8408 ----------

8409 type : cudaMemcpy3DOperandType

8410

8411 op : anon_union1

8412

8413

8414 Methods

8415 -------

8416 getPtr()

8417 Get memory address of class instance

8418 """

8419 def __cinit__(self, void_ptr _ptr = 0):

8420 if _ptr == 0:

8421 self._val_ptr = <cyruntime.cudaMemcpy3DOperand *>calloc(1, sizeof(cyruntime.cudaMemcpy3DOperand))

8422 self._pvt_ptr = self._val_ptr

8423 else:

8424 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr

8425 def __init__(self, void_ptr _ptr = 0):

8426 pass

8427 self._op = anon_union1(_ptr=<void_ptr>self._pvt_ptr)

8428 def __dealloc__(self):

8429 if self._val_ptr is not NULL:

8430 free(self._val_ptr)

8431 def getPtr(self):

8432 return <void_ptr>self._pvt_ptr

8433 def __repr__(self):

8434 if self._pvt_ptr is not NULL:

8435 str_list = []

8436 try:

8437 str_list += ['type : ' + str(self.type)]

8438 except ValueError:

8439 str_list += ['type : <ValueError>']

8440 try:

8441 str_list += ['op :\n' + '\n'.join([' ' + line for line in str(self.op).splitlines()])]

8442 except ValueError:

8443 str_list += ['op : <ValueError>']

8444 return '\n'.join(str_list)

8445 else:

8446 return ''

8447 @property

8448 def type(self):

8449 if self._pvt_ptr[0].type not in _dict_cudaMemcpy3DOperandType:

8450 return None

8451 return _dict_cudaMemcpy3DOperandType[self._pvt_ptr[0].type]

8452 @type.setter

8453 def type(self, type not None : cudaMemcpy3DOperandType):

8454 self._pvt_ptr[0].type = type.value

8455 @property

8456 def op(self):

8457 return self._op

8458 @op.setter

8459 def op(self, op not None : anon_union1):

8460 string.memcpy(&self._pvt_ptr[0].op, <cyruntime.anon_union1*><void_ptr>op.getPtr(), sizeof(self._pvt_ptr[0].op))

8461

8462cdef class cudaMemcpy3DBatchOp:

8463 """

8464 Attributes

8465 ----------

8466 src : cudaMemcpy3DOperand

8467 Source memcpy operand.

8468 dst : cudaMemcpy3DOperand

8469 Destination memcpy operand.

8470 extent : cudaExtent

8471 Extents of the memcpy between src and dst. The width, height and

8472 depth components must not be 0.

8473 srcAccessOrder : cudaMemcpySrcAccessOrder

8474 Source access ordering to be observed for copy from src to dst.

8475 flags : unsigned int

8476 Additional flags for copy from src to dst. See cudaMemcpyFlags.

8477

8478 Methods

8479 -------

8480 getPtr()

8481 Get memory address of class instance

8482 """

8483 def __cinit__(self, void_ptr _ptr = 0):

8484 if _ptr == 0:

8485 self._pvt_ptr = &self._pvt_val

8486 else:

8487 self._pvt_ptr = <cyruntime.cudaMemcpy3DBatchOp *>_ptr

8488 def __init__(self, void_ptr _ptr = 0):

8489 pass

8490 self._src = cudaMemcpy3DOperand(_ptr=<void_ptr>&self._pvt_ptr[0].src)

8491 self._dst = cudaMemcpy3DOperand(_ptr=<void_ptr>&self._pvt_ptr[0].dst)

8492 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)

8493 def __dealloc__(self):

8494 pass

8495 def getPtr(self):

8496 return <void_ptr>self._pvt_ptr

8497 def __repr__(self):

8498 if self._pvt_ptr is not NULL:

8499 str_list = []

8500 try:

8501 str_list += ['src :\n' + '\n'.join([' ' + line for line in str(self.src).splitlines()])]

8502 except ValueError:

8503 str_list += ['src : <ValueError>']

8504 try:

8505 str_list += ['dst :\n' + '\n'.join([' ' + line for line in str(self.dst).splitlines()])]

8506 except ValueError:

8507 str_list += ['dst : <ValueError>']

8508 try:

8509 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]

8510 except ValueError:

8511 str_list += ['extent : <ValueError>']

8512 try:

8513 str_list += ['srcAccessOrder : ' + str(self.srcAccessOrder)]

8514 except ValueError:

8515 str_list += ['srcAccessOrder : <ValueError>']

8516 try:

8517 str_list += ['flags : ' + str(self.flags)]

8518 except ValueError:

8519 str_list += ['flags : <ValueError>']

8520 return '\n'.join(str_list)

8521 else:

8522 return ''

8523 @property

8524 def src(self):

8525 return self._src

8526 @src.setter

8527 def src(self, src not None : cudaMemcpy3DOperand):

8528 string.memcpy(&self._pvt_ptr[0].src, <cyruntime.cudaMemcpy3DOperand*><void_ptr>src.getPtr(), sizeof(self._pvt_ptr[0].src))

8529 @property

8530 def dst(self):

8531 return self._dst

8532 @dst.setter

8533 def dst(self, dst not None : cudaMemcpy3DOperand):

8534 string.memcpy(&self._pvt_ptr[0].dst, <cyruntime.cudaMemcpy3DOperand*><void_ptr>dst.getPtr(), sizeof(self._pvt_ptr[0].dst))

8535 @property

8536 def extent(self):

8537 return self._extent

8538 @extent.setter

8539 def extent(self, extent not None : cudaExtent):

8540 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))

8541 @property

8542 def srcAccessOrder(self):

8543 if self._pvt_ptr[0].srcAccessOrder not in _dict_cudaMemcpySrcAccessOrder:

8544 return None

8545 return _dict_cudaMemcpySrcAccessOrder[self._pvt_ptr[0].srcAccessOrder]

8546 @srcAccessOrder.setter

8547 def srcAccessOrder(self, srcAccessOrder not None : cudaMemcpySrcAccessOrder):

8548 self._pvt_ptr[0].srcAccessOrder = srcAccessOrder.value

8549 @property

8550 def flags(self):

8551 return self._pvt_ptr[0].flags

8552 @flags.setter

8553 def flags(self, unsigned int flags):

8554 self._pvt_ptr[0].flags = flags

8555

8556cdef class CUuuid_st:

8557 """

8558 Attributes

8559 ----------

8560 bytes : bytes

8561 < CUDA definition of UUID

8562

8563 Methods

8564 -------

8565 getPtr()

8566 Get memory address of class instance

8567 """

8568 def __cinit__(self, void_ptr _ptr = 0):

8569 if _ptr == 0:

8570 self._pvt_ptr = &self._pvt_val

8571 else:

8572 self._pvt_ptr = <cyruntime.CUuuid_st *>_ptr

8573 def __init__(self, void_ptr _ptr = 0):

8574 pass

8575 def __dealloc__(self):

8576 pass

8577 def getPtr(self):

8578 return <void_ptr>self._pvt_ptr

8579 def __repr__(self):

8580 if self._pvt_ptr is not NULL:

8581 str_list = []

8582 try:

8583 str_list += ['bytes : ' + str(self.bytes.hex())]

8584 except ValueError:

8585 str_list += ['bytes : <ValueError>']

8586 return '\n'.join(str_list)

8587 else:

8588 return ''

8589 @property

8590 def bytes(self):

8591 return PyBytes_FromStringAndSize(self._pvt_ptr[0].bytes, 16)

8592

8593cdef class cudaDeviceProp:

8594 """

8595 CUDA device properties

8596

8597 Attributes

8598 ----------

8599 name : bytes

8600 ASCII string identifying device

8601 uuid : cudaUUID_t

8602 16-byte unique identifier

8603 luid : bytes

8604 8-byte locally unique identifier. Value is undefined on TCC and

8605 non-Windows platforms

8606 luidDeviceNodeMask : unsigned int

8607 LUID device node mask. Value is undefined on TCC and non-Windows

8608 platforms

8609 totalGlobalMem : size_t

8610 Global memory available on device in bytes

8611 sharedMemPerBlock : size_t

8612 Shared memory available per block in bytes

8613 regsPerBlock : int

8614 32-bit registers available per block

8615 warpSize : int

8616 Warp size in threads

8617 memPitch : size_t

8618 Maximum pitch in bytes allowed by memory copies

8619 maxThreadsPerBlock : int

8620 Maximum number of threads per block

8621 maxThreadsDim : list[int]

8622 Maximum size of each dimension of a block

8623 maxGridSize : list[int]

8624 Maximum size of each dimension of a grid

8625 totalConstMem : size_t

8626 Constant memory available on device in bytes

8627 major : int

8628 Major compute capability

8629 minor : int

8630 Minor compute capability

8631 textureAlignment : size_t

8632 Alignment requirement for textures

8633 texturePitchAlignment : size_t

8634 Pitch alignment requirement for texture references bound to pitched

8635 memory

8636 multiProcessorCount : int

8637 Number of multiprocessors on device

8638 integrated : int

8639 Device is integrated as opposed to discrete

8640 canMapHostMemory : int

8641 Device can map host memory with

8642 cudaHostAlloc/cudaHostGetDevicePointer

8643 maxTexture1D : int

8644 Maximum 1D texture size

8645 maxTexture1DMipmap : int

8646 Maximum 1D mipmapped texture size

8647 maxTexture2D : list[int]

8648 Maximum 2D texture dimensions

8649 maxTexture2DMipmap : list[int]

8650 Maximum 2D mipmapped texture dimensions

8651 maxTexture2DLinear : list[int]

8652 Maximum dimensions (width, height, pitch) for 2D textures bound to

8653 pitched memory

8654 maxTexture2DGather : list[int]

8655 Maximum 2D texture dimensions if texture gather operations have to

8656 be performed

8657 maxTexture3D : list[int]

8658 Maximum 3D texture dimensions

8659 maxTexture3DAlt : list[int]

8660 Maximum alternate 3D texture dimensions

8661 maxTextureCubemap : int

8662 Maximum Cubemap texture dimensions

8663 maxTexture1DLayered : list[int]

8664 Maximum 1D layered texture dimensions

8665 maxTexture2DLayered : list[int]

8666 Maximum 2D layered texture dimensions

8667 maxTextureCubemapLayered : list[int]

8668 Maximum Cubemap layered texture dimensions

8669 maxSurface1D : int

8670 Maximum 1D surface size

8671 maxSurface2D : list[int]

8672 Maximum 2D surface dimensions

8673 maxSurface3D : list[int]

8674 Maximum 3D surface dimensions

8675 maxSurface1DLayered : list[int]

8676 Maximum 1D layered surface dimensions

8677 maxSurface2DLayered : list[int]

8678 Maximum 2D layered surface dimensions

8679 maxSurfaceCubemap : int

8680 Maximum Cubemap surface dimensions

8681 maxSurfaceCubemapLayered : list[int]

8682 Maximum Cubemap layered surface dimensions

8683 surfaceAlignment : size_t

8684 Alignment requirements for surfaces

8685 concurrentKernels : int

8686 Device can possibly execute multiple kernels concurrently

8687 ECCEnabled : int

8688 Device has ECC support enabled

8689 pciBusID : int

8690 PCI bus ID of the device

8691 pciDeviceID : int

8692 PCI device ID of the device

8693 pciDomainID : int

8694 PCI domain ID of the device

8695 tccDriver : int

8696 1 if device is a Tesla device using TCC driver, 0 otherwise

8697 asyncEngineCount : int

8698 Number of asynchronous engines

8699 unifiedAddressing : int

8700 Device shares a unified address space with the host

8701 memoryBusWidth : int

8702 Global memory bus width in bits

8703 l2CacheSize : int

8704 Size of L2 cache in bytes

8705 persistingL2CacheMaxSize : int

8706 Device's maximum l2 persisting lines capacity setting in bytes

8707 maxThreadsPerMultiProcessor : int

8708 Maximum resident threads per multiprocessor

8709 streamPrioritiesSupported : int

8710 Device supports stream priorities

8711 globalL1CacheSupported : int

8712 Device supports caching globals in L1

8713 localL1CacheSupported : int

8714 Device supports caching locals in L1

8715 sharedMemPerMultiprocessor : size_t

8716 Shared memory available per multiprocessor in bytes

8717 regsPerMultiprocessor : int

8718 32-bit registers available per multiprocessor

8719 managedMemory : int

8720 Device supports allocating managed memory on this system

8721 isMultiGpuBoard : int

8722 Device is on a multi-GPU board

8723 multiGpuBoardGroupID : int

8724 Unique identifier for a group of devices on the same multi-GPU

8725 board

8726 hostNativeAtomicSupported : int

8727 Link between the device and the host supports native atomic

8728 operations

8729 pageableMemoryAccess : int

8730 Device supports coherently accessing pageable memory without

8731 calling cudaHostRegister on it

8732 concurrentManagedAccess : int

8733 Device can coherently access managed memory concurrently with the

8734 CPU

8735 computePreemptionSupported : int

8736 Device supports Compute Preemption

8737 canUseHostPointerForRegisteredMem : int

8738 Device can access host registered memory at the same virtual

8739 address as the CPU

8740 cooperativeLaunch : int

8741 Device supports launching cooperative kernels via

8742 cudaLaunchCooperativeKernel

8743 sharedMemPerBlockOptin : size_t

8744 Per device maximum shared memory per block usable by special opt in

8745 pageableMemoryAccessUsesHostPageTables : int

8746 Device accesses pageable memory via the host's page tables

8747 directManagedMemAccessFromHost : int

8748 Host can directly access managed memory on the device without

8749 migration.

8750 maxBlocksPerMultiProcessor : int

8751 Maximum number of resident blocks per multiprocessor

8752 accessPolicyMaxWindowSize : int

8753 The maximum value of cudaAccessPolicyWindow::num_bytes.

8754 reservedSharedMemPerBlock : size_t

8755 Shared memory reserved by CUDA driver per block in bytes

8756 hostRegisterSupported : int

8757 Device supports host memory registration via cudaHostRegister.

8758 sparseCudaArraySupported : int

8759 1 if the device supports sparse CUDA arrays and sparse CUDA

8760 mipmapped arrays, 0 otherwise

8761 hostRegisterReadOnlySupported : int

8762 Device supports using the cudaHostRegister flag

8763 cudaHostRegisterReadOnly to register memory that must be mapped as

8764 read-only to the GPU

8765 timelineSemaphoreInteropSupported : int

8766 External timeline semaphore interop is supported on the device

8767 memoryPoolsSupported : int

8768 1 if the device supports using the cudaMallocAsync and cudaMemPool

8769 family of APIs, 0 otherwise

8770 gpuDirectRDMASupported : int

8771 1 if the device supports GPUDirect RDMA APIs, 0 otherwise

8772 gpuDirectRDMAFlushWritesOptions : unsigned int

8773 Bitmask to be interpreted according to the

8774 cudaFlushGPUDirectRDMAWritesOptions enum

8775 gpuDirectRDMAWritesOrdering : int

8776 See the cudaGPUDirectRDMAWritesOrdering enum for numerical values

8777 memoryPoolSupportedHandleTypes : unsigned int

8778 Bitmask of handle types supported with mempool-based IPC

8779 deferredMappingCudaArraySupported : int

8780 1 if the device supports deferred mapping CUDA arrays and CUDA

8781 mipmapped arrays

8782 ipcEventSupported : int

8783 Device supports IPC Events.

8784 clusterLaunch : int

8785 Indicates device supports cluster launch

8786 unifiedFunctionPointers : int

8787 Indicates device supports unified pointers

8788 deviceNumaConfig : int

8789 NUMA configuration of a device: value is of type

8790 cudaDeviceNumaConfig enum

8791 deviceNumaId : int

8792 NUMA node ID of the GPU memory

8793 mpsEnabled : int

8794 Indicates if contexts created on this device will be shared via MPS

8795 hostNumaId : int

8796 NUMA ID of the host node closest to the device or -1 when system

8797 does not support NUMA

8798 gpuPciDeviceID : unsigned int

8799 The combined 16-bit PCI device ID and 16-bit PCI vendor ID

8800 gpuPciSubsystemID : unsigned int

8801 The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem

8802 vendor ID

8803 hostNumaMultinodeIpcSupported : int

8804 1 if the device supports HostNuma location IPC between nodes in a

8805 multi-node system.

8806 reserved : list[int]

8807 Reserved for future use

8808

8809 Methods

8810 -------

8811 getPtr()

8812 Get memory address of class instance

8813 """

8814 def __cinit__(self, void_ptr _ptr = 0):

8815 if _ptr == 0:

8816 self._pvt_ptr = &self._pvt_val

8817 else:

8818 self._pvt_ptr = <cyruntime.cudaDeviceProp *>_ptr

8819 def __init__(self, void_ptr _ptr = 0):

8820 pass

8821 self._uuid = cudaUUID_t(_ptr=<void_ptr>&self._pvt_ptr[0].uuid)

8822 def __dealloc__(self):

8823 pass

8824 def getPtr(self):

8825 return <void_ptr>self._pvt_ptr

8826 def __repr__(self):

8827 if self._pvt_ptr is not NULL:

8828 str_list = []

8829 try:

8830 str_list += ['name : ' + self.name.decode('utf-8')]

8831 except ValueError:

8832 str_list += ['name : <ValueError>']

8833 try:

8834 str_list += ['uuid :\n' + '\n'.join([' ' + line for line in str(self.uuid).splitlines()])]

8835 except ValueError:

8836 str_list += ['uuid : <ValueError>']

8837 try:

8838 str_list += ['luid : ' + self.luid.hex()]

8839 except ValueError:

8840 str_list += ['luid : <ValueError>']

8841 try:

8842 str_list += ['luidDeviceNodeMask : ' + str(self.luidDeviceNodeMask)]

8843 except ValueError:

8844 str_list += ['luidDeviceNodeMask : <ValueError>']

8845 try:

8846 str_list += ['totalGlobalMem : ' + str(self.totalGlobalMem)]

8847 except ValueError:

8848 str_list += ['totalGlobalMem : <ValueError>']

8849 try:

8850 str_list += ['sharedMemPerBlock : ' + str(self.sharedMemPerBlock)]

8851 except ValueError:

8852 str_list += ['sharedMemPerBlock : <ValueError>']

8853 try:

8854 str_list += ['regsPerBlock : ' + str(self.regsPerBlock)]

8855 except ValueError:

8856 str_list += ['regsPerBlock : <ValueError>']

8857 try:

8858 str_list += ['warpSize : ' + str(self.warpSize)]

8859 except ValueError:

8860 str_list += ['warpSize : <ValueError>']

8861 try:

8862 str_list += ['memPitch : ' + str(self.memPitch)]

8863 except ValueError:

8864 str_list += ['memPitch : <ValueError>']

8865 try:

8866 str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]

8867 except ValueError:

8868 str_list += ['maxThreadsPerBlock : <ValueError>']

8869 try:

8870 str_list += ['maxThreadsDim : ' + str(self.maxThreadsDim)]

8871 except ValueError:

8872 str_list += ['maxThreadsDim : <ValueError>']

8873 try:

8874 str_list += ['maxGridSize : ' + str(self.maxGridSize)]

8875 except ValueError:

8876 str_list += ['maxGridSize : <ValueError>']

8877 try:

8878 str_list += ['totalConstMem : ' + str(self.totalConstMem)]

8879 except ValueError:

8880 str_list += ['totalConstMem : <ValueError>']

8881 try:

8882 str_list += ['major : ' + str(self.major)]

8883 except ValueError:

8884 str_list += ['major : <ValueError>']

8885 try:

8886 str_list += ['minor : ' + str(self.minor)]

8887 except ValueError:

8888 str_list += ['minor : <ValueError>']

8889 try:

8890 str_list += ['textureAlignment : ' + str(self.textureAlignment)]

8891 except ValueError:

8892 str_list += ['textureAlignment : <ValueError>']

8893 try:

8894 str_list += ['texturePitchAlignment : ' + str(self.texturePitchAlignment)]

8895 except ValueError:

8896 str_list += ['texturePitchAlignment : <ValueError>']

8897 try:

8898 str_list += ['multiProcessorCount : ' + str(self.multiProcessorCount)]

8899 except ValueError:

8900 str_list += ['multiProcessorCount : <ValueError>']

8901 try:

8902 str_list += ['integrated : ' + str(self.integrated)]

8903 except ValueError:

8904 str_list += ['integrated : <ValueError>']

8905 try:

8906 str_list += ['canMapHostMemory : ' + str(self.canMapHostMemory)]

8907 except ValueError:

8908 str_list += ['canMapHostMemory : <ValueError>']

8909 try:

8910 str_list += ['maxTexture1D : ' + str(self.maxTexture1D)]

8911 except ValueError:

8912 str_list += ['maxTexture1D : <ValueError>']

8913 try:

8914 str_list += ['maxTexture1DMipmap : ' + str(self.maxTexture1DMipmap)]

8915 except ValueError:

8916 str_list += ['maxTexture1DMipmap : <ValueError>']

8917 try:

8918 str_list += ['maxTexture2D : ' + str(self.maxTexture2D)]

8919 except ValueError:

8920 str_list += ['maxTexture2D : <ValueError>']

8921 try:

8922 str_list += ['maxTexture2DMipmap : ' + str(self.maxTexture2DMipmap)]

8923 except ValueError:

8924 str_list += ['maxTexture2DMipmap : <ValueError>']

8925 try:

8926 str_list += ['maxTexture2DLinear : ' + str(self.maxTexture2DLinear)]

8927 except ValueError:

8928 str_list += ['maxTexture2DLinear : <ValueError>']

8929 try:

8930 str_list += ['maxTexture2DGather : ' + str(self.maxTexture2DGather)]

8931 except ValueError:

8932 str_list += ['maxTexture2DGather : <ValueError>']

8933 try:

8934 str_list += ['maxTexture3D : ' + str(self.maxTexture3D)]

8935 except ValueError:

8936 str_list += ['maxTexture3D : <ValueError>']

8937 try:

8938 str_list += ['maxTexture3DAlt : ' + str(self.maxTexture3DAlt)]

8939 except ValueError:

8940 str_list += ['maxTexture3DAlt : <ValueError>']

8941 try:

8942 str_list += ['maxTextureCubemap : ' + str(self.maxTextureCubemap)]

8943 except ValueError:

8944 str_list += ['maxTextureCubemap : <ValueError>']

8945 try:

8946 str_list += ['maxTexture1DLayered : ' + str(self.maxTexture1DLayered)]

8947 except ValueError:

8948 str_list += ['maxTexture1DLayered : <ValueError>']

8949 try:

8950 str_list += ['maxTexture2DLayered : ' + str(self.maxTexture2DLayered)]

8951 except ValueError:

8952 str_list += ['maxTexture2DLayered : <ValueError>']

8953 try:

8954 str_list += ['maxTextureCubemapLayered : ' + str(self.maxTextureCubemapLayered)]

8955 except ValueError:

8956 str_list += ['maxTextureCubemapLayered : <ValueError>']

8957 try:

8958 str_list += ['maxSurface1D : ' + str(self.maxSurface1D)]

8959 except ValueError:

8960 str_list += ['maxSurface1D : <ValueError>']

8961 try:

8962 str_list += ['maxSurface2D : ' + str(self.maxSurface2D)]

8963 except ValueError:

8964 str_list += ['maxSurface2D : <ValueError>']

8965 try:

8966 str_list += ['maxSurface3D : ' + str(self.maxSurface3D)]

8967 except ValueError:

8968 str_list += ['maxSurface3D : <ValueError>']

8969 try:

8970 str_list += ['maxSurface1DLayered : ' + str(self.maxSurface1DLayered)]

8971 except ValueError:

8972 str_list += ['maxSurface1DLayered : <ValueError>']

8973 try:

8974 str_list += ['maxSurface2DLayered : ' + str(self.maxSurface2DLayered)]

8975 except ValueError:

8976 str_list += ['maxSurface2DLayered : <ValueError>']

8977 try:

8978 str_list += ['maxSurfaceCubemap : ' + str(self.maxSurfaceCubemap)]

8979 except ValueError:

8980 str_list += ['maxSurfaceCubemap : <ValueError>']

8981 try:

8982 str_list += ['maxSurfaceCubemapLayered : ' + str(self.maxSurfaceCubemapLayered)]

8983 except ValueError:

8984 str_list += ['maxSurfaceCubemapLayered : <ValueError>']

8985 try:

8986 str_list += ['surfaceAlignment : ' + str(self.surfaceAlignment)]

8987 except ValueError:

8988 str_list += ['surfaceAlignment : <ValueError>']

8989 try:

8990 str_list += ['concurrentKernels : ' + str(self.concurrentKernels)]

8991 except ValueError:

8992 str_list += ['concurrentKernels : <ValueError>']

8993 try:

8994 str_list += ['ECCEnabled : ' + str(self.ECCEnabled)]

8995 except ValueError:

8996 str_list += ['ECCEnabled : <ValueError>']

8997 try:

8998 str_list += ['pciBusID : ' + str(self.pciBusID)]

8999 except ValueError:

9000 str_list += ['pciBusID : <ValueError>']

9001 try:

9002 str_list += ['pciDeviceID : ' + str(self.pciDeviceID)]

9003 except ValueError:

9004 str_list += ['pciDeviceID : <ValueError>']

9005 try:

9006 str_list += ['pciDomainID : ' + str(self.pciDomainID)]

9007 except ValueError:

9008 str_list += ['pciDomainID : <ValueError>']

9009 try:

9010 str_list += ['tccDriver : ' + str(self.tccDriver)]

9011 except ValueError:

9012 str_list += ['tccDriver : <ValueError>']

9013 try:

9014 str_list += ['asyncEngineCount : ' + str(self.asyncEngineCount)]

9015 except ValueError:

9016 str_list += ['asyncEngineCount : <ValueError>']

9017 try:

9018 str_list += ['unifiedAddressing : ' + str(self.unifiedAddressing)]

9019 except ValueError:

9020 str_list += ['unifiedAddressing : <ValueError>']

9021 try:

9022 str_list += ['memoryBusWidth : ' + str(self.memoryBusWidth)]

9023 except ValueError:

9024 str_list += ['memoryBusWidth : <ValueError>']

9025 try:

9026 str_list += ['l2CacheSize : ' + str(self.l2CacheSize)]

9027 except ValueError:

9028 str_list += ['l2CacheSize : <ValueError>']

9029 try:

9030 str_list += ['persistingL2CacheMaxSize : ' + str(self.persistingL2CacheMaxSize)]

9031 except ValueError:

9032 str_list += ['persistingL2CacheMaxSize : <ValueError>']

9033 try:

9034 str_list += ['maxThreadsPerMultiProcessor : ' + str(self.maxThreadsPerMultiProcessor)]

9035 except ValueError:

9036 str_list += ['maxThreadsPerMultiProcessor : <ValueError>']

9037 try:

9038 str_list += ['streamPrioritiesSupported : ' + str(self.streamPrioritiesSupported)]

9039 except ValueError:

9040 str_list += ['streamPrioritiesSupported : <ValueError>']

9041 try:

9042 str_list += ['globalL1CacheSupported : ' + str(self.globalL1CacheSupported)]

9043 except ValueError:

9044 str_list += ['globalL1CacheSupported : <ValueError>']

9045 try:

9046 str_list += ['localL1CacheSupported : ' + str(self.localL1CacheSupported)]

9047 except ValueError:

9048 str_list += ['localL1CacheSupported : <ValueError>']

9049 try:

9050 str_list += ['sharedMemPerMultiprocessor : ' + str(self.sharedMemPerMultiprocessor)]

9051 except ValueError:

9052 str_list += ['sharedMemPerMultiprocessor : <ValueError>']

9053 try:

9054 str_list += ['regsPerMultiprocessor : ' + str(self.regsPerMultiprocessor)]

9055 except ValueError:

9056 str_list += ['regsPerMultiprocessor : <ValueError>']

9057 try:

9058 str_list += ['managedMemory : ' + str(self.managedMemory)]

9059 except ValueError:

9060 str_list += ['managedMemory : <ValueError>']

9061 try:

9062 str_list += ['isMultiGpuBoard : ' + str(self.isMultiGpuBoard)]

9063 except ValueError:

9064 str_list += ['isMultiGpuBoard : <ValueError>']

9065 try:

9066 str_list += ['multiGpuBoardGroupID : ' + str(self.multiGpuBoardGroupID)]

9067 except ValueError:

9068 str_list += ['multiGpuBoardGroupID : <ValueError>']

9069 try:

9070 str_list += ['hostNativeAtomicSupported : ' + str(self.hostNativeAtomicSupported)]

9071 except ValueError:

9072 str_list += ['hostNativeAtomicSupported : <ValueError>']

9073 try:

9074 str_list += ['pageableMemoryAccess : ' + str(self.pageableMemoryAccess)]

9075 except ValueError:

9076 str_list += ['pageableMemoryAccess : <ValueError>']

9077 try:

9078 str_list += ['concurrentManagedAccess : ' + str(self.concurrentManagedAccess)]

9079 except ValueError:

9080 str_list += ['concurrentManagedAccess : <ValueError>']

9081 try:

9082 str_list += ['computePreemptionSupported : ' + str(self.computePreemptionSupported)]

9083 except ValueError:

9084 str_list += ['computePreemptionSupported : <ValueError>']

9085 try:

9086 str_list += ['canUseHostPointerForRegisteredMem : ' + str(self.canUseHostPointerForRegisteredMem)]

9087 except ValueError:

9088 str_list += ['canUseHostPointerForRegisteredMem : <ValueError>']

9089 try:

9090 str_list += ['cooperativeLaunch : ' + str(self.cooperativeLaunch)]

9091 except ValueError:

9092 str_list += ['cooperativeLaunch : <ValueError>']

9093 try:

9094 str_list += ['sharedMemPerBlockOptin : ' + str(self.sharedMemPerBlockOptin)]

9095 except ValueError:

9096 str_list += ['sharedMemPerBlockOptin : <ValueError>']

9097 try:

9098 str_list += ['pageableMemoryAccessUsesHostPageTables : ' + str(self.pageableMemoryAccessUsesHostPageTables)]

9099 except ValueError:

9100 str_list += ['pageableMemoryAccessUsesHostPageTables : <ValueError>']

9101 try:

9102 str_list += ['directManagedMemAccessFromHost : ' + str(self.directManagedMemAccessFromHost)]

9103 except ValueError:

9104 str_list += ['directManagedMemAccessFromHost : <ValueError>']

9105 try:

9106 str_list += ['maxBlocksPerMultiProcessor : ' + str(self.maxBlocksPerMultiProcessor)]

9107 except ValueError:

9108 str_list += ['maxBlocksPerMultiProcessor : <ValueError>']

9109 try:

9110 str_list += ['accessPolicyMaxWindowSize : ' + str(self.accessPolicyMaxWindowSize)]

9111 except ValueError:

9112 str_list += ['accessPolicyMaxWindowSize : <ValueError>']

9113 try:

9114 str_list += ['reservedSharedMemPerBlock : ' + str(self.reservedSharedMemPerBlock)]

9115 except ValueError:

9116 str_list += ['reservedSharedMemPerBlock : <ValueError>']

9117 try:

9118 str_list += ['hostRegisterSupported : ' + str(self.hostRegisterSupported)]

9119 except ValueError:

9120 str_list += ['hostRegisterSupported : <ValueError>']

9121 try:

9122 str_list += ['sparseCudaArraySupported : ' + str(self.sparseCudaArraySupported)]

9123 except ValueError:

9124 str_list += ['sparseCudaArraySupported : <ValueError>']

9125 try:

9126 str_list += ['hostRegisterReadOnlySupported : ' + str(self.hostRegisterReadOnlySupported)]

9127 except ValueError:

9128 str_list += ['hostRegisterReadOnlySupported : <ValueError>']

9129 try:

9130 str_list += ['timelineSemaphoreInteropSupported : ' + str(self.timelineSemaphoreInteropSupported)]

9131 except ValueError:

9132 str_list += ['timelineSemaphoreInteropSupported : <ValueError>']

9133 try:

9134 str_list += ['memoryPoolsSupported : ' + str(self.memoryPoolsSupported)]

9135 except ValueError:

9136 str_list += ['memoryPoolsSupported : <ValueError>']

9137 try:

9138 str_list += ['gpuDirectRDMASupported : ' + str(self.gpuDirectRDMASupported)]

9139 except ValueError:

9140 str_list += ['gpuDirectRDMASupported : <ValueError>']

9141 try:

9142 str_list += ['gpuDirectRDMAFlushWritesOptions : ' + str(self.gpuDirectRDMAFlushWritesOptions)]

9143 except ValueError:

9144 str_list += ['gpuDirectRDMAFlushWritesOptions : <ValueError>']

9145 try:

9146 str_list += ['gpuDirectRDMAWritesOrdering : ' + str(self.gpuDirectRDMAWritesOrdering)]

9147 except ValueError:

9148 str_list += ['gpuDirectRDMAWritesOrdering : <ValueError>']

9149 try:

9150 str_list += ['memoryPoolSupportedHandleTypes : ' + str(self.memoryPoolSupportedHandleTypes)]

9151 except ValueError:

9152 str_list += ['memoryPoolSupportedHandleTypes : <ValueError>']

9153 try:

9154 str_list += ['deferredMappingCudaArraySupported : ' + str(self.deferredMappingCudaArraySupported)]

9155 except ValueError:

9156 str_list += ['deferredMappingCudaArraySupported : <ValueError>']

9157 try:

9158 str_list += ['ipcEventSupported : ' + str(self.ipcEventSupported)]

9159 except ValueError:

9160 str_list += ['ipcEventSupported : <ValueError>']

9161 try:

9162 str_list += ['clusterLaunch : ' + str(self.clusterLaunch)]

9163 except ValueError:

9164 str_list += ['clusterLaunch : <ValueError>']

9165 try:

9166 str_list += ['unifiedFunctionPointers : ' + str(self.unifiedFunctionPointers)]

9167 except ValueError:

9168 str_list += ['unifiedFunctionPointers : <ValueError>']

9169 try:

9170 str_list += ['deviceNumaConfig : ' + str(self.deviceNumaConfig)]

9171 except ValueError:

9172 str_list += ['deviceNumaConfig : <ValueError>']

9173 try:

9174 str_list += ['deviceNumaId : ' + str(self.deviceNumaId)]

9175 except ValueError:

9176 str_list += ['deviceNumaId : <ValueError>']

9177 try:

9178 str_list += ['mpsEnabled : ' + str(self.mpsEnabled)]

9179 except ValueError:

9180 str_list += ['mpsEnabled : <ValueError>']

9181 try:

9182 str_list += ['hostNumaId : ' + str(self.hostNumaId)]

9183 except ValueError:

9184 str_list += ['hostNumaId : <ValueError>']

9185 try:

9186 str_list += ['gpuPciDeviceID : ' + str(self.gpuPciDeviceID)]

9187 except ValueError:

9188 str_list += ['gpuPciDeviceID : <ValueError>']

9189 try:

9190 str_list += ['gpuPciSubsystemID : ' + str(self.gpuPciSubsystemID)]

9191 except ValueError:

9192 str_list += ['gpuPciSubsystemID : <ValueError>']

9193 try:

9194 str_list += ['hostNumaMultinodeIpcSupported : ' + str(self.hostNumaMultinodeIpcSupported)]

9195 except ValueError:

9196 str_list += ['hostNumaMultinodeIpcSupported : <ValueError>']

9197 try:

9198 str_list += ['reserved : ' + str(self.reserved)]

9199 except ValueError:

9200 str_list += ['reserved : <ValueError>']

9201 return '\n'.join(str_list)

9202 else:

9203 return ''

9204 @property

9205 def name(self):

9206 return self._pvt_ptr[0].name

9207 @name.setter

9208 def name(self, name):

9209 pass

9210 self._pvt_ptr[0].name = name

9211 @property

9212 def uuid(self):

9213 return self._uuid

9214 @uuid.setter

9215 def uuid(self, uuid not None : cudaUUID_t):

9216 string.memcpy(&self._pvt_ptr[0].uuid, <cyruntime.cudaUUID_t*><void_ptr>uuid.getPtr(), sizeof(self._pvt_ptr[0].uuid))

9217 @property

9218 def luid(self):

9219 return PyBytes_FromStringAndSize(self._pvt_ptr[0].luid, 8)

9220 @luid.setter

9221 def luid(self, luid):

9222 if len(luid) != 8:

9223 raise ValueError("luid length must be 8, is " + str(len(luid)))

9224 if CHAR_MIN == 0:

9225 for i, b in enumerate(luid):

9226 if b < 0 and b > -129:

9227 b = b + 256

9228 self._pvt_ptr[0].luid[i] = b

9229 else:

9230 for i, b in enumerate(luid):

9231 if b > 127 and b < 256:

9232 b = b - 256

9233 self._pvt_ptr[0].luid[i] = b

9234 @property

9235 def luidDeviceNodeMask(self):

9236 return self._pvt_ptr[0].luidDeviceNodeMask

9237 @luidDeviceNodeMask.setter

9238 def luidDeviceNodeMask(self, unsigned int luidDeviceNodeMask):

9239 self._pvt_ptr[0].luidDeviceNodeMask = luidDeviceNodeMask

9240 @property

9241 def totalGlobalMem(self):

9242 return self._pvt_ptr[0].totalGlobalMem

9243 @totalGlobalMem.setter

9244 def totalGlobalMem(self, size_t totalGlobalMem):

9245 self._pvt_ptr[0].totalGlobalMem = totalGlobalMem

9246 @property

9247 def sharedMemPerBlock(self):

9248 return self._pvt_ptr[0].sharedMemPerBlock

9249 @sharedMemPerBlock.setter

9250 def sharedMemPerBlock(self, size_t sharedMemPerBlock):

9251 self._pvt_ptr[0].sharedMemPerBlock = sharedMemPerBlock

9252 @property

9253 def regsPerBlock(self):

9254 return self._pvt_ptr[0].regsPerBlock

9255 @regsPerBlock.setter

9256 def regsPerBlock(self, int regsPerBlock):

9257 self._pvt_ptr[0].regsPerBlock = regsPerBlock

9258 @property

9259 def warpSize(self):

9260 return self._pvt_ptr[0].warpSize

9261 @warpSize.setter

9262 def warpSize(self, int warpSize):

9263 self._pvt_ptr[0].warpSize = warpSize

9264 @property

9265 def memPitch(self):

9266 return self._pvt_ptr[0].memPitch

9267 @memPitch.setter

9268 def memPitch(self, size_t memPitch):

9269 self._pvt_ptr[0].memPitch = memPitch

9270 @property

9271 def maxThreadsPerBlock(self):

9272 return self._pvt_ptr[0].maxThreadsPerBlock

9273 @maxThreadsPerBlock.setter

9274 def maxThreadsPerBlock(self, int maxThreadsPerBlock):

9275 self._pvt_ptr[0].maxThreadsPerBlock = maxThreadsPerBlock

9276 @property

9277 def maxThreadsDim(self):

9278 return self._pvt_ptr[0].maxThreadsDim

9279 @maxThreadsDim.setter

9280 def maxThreadsDim(self, maxThreadsDim):

9281 self._pvt_ptr[0].maxThreadsDim = maxThreadsDim

9282 @property

9283 def maxGridSize(self):

9284 return self._pvt_ptr[0].maxGridSize

9285 @maxGridSize.setter

9286 def maxGridSize(self, maxGridSize):

9287 self._pvt_ptr[0].maxGridSize = maxGridSize

9288 @property

9289 def totalConstMem(self):

9290 return self._pvt_ptr[0].totalConstMem

9291 @totalConstMem.setter

9292 def totalConstMem(self, size_t totalConstMem):

9293 self._pvt_ptr[0].totalConstMem = totalConstMem

9294 @property

9295 def major(self):

9296 return self._pvt_ptr[0].major

9297 @major.setter

9298 def major(self, int major):

9299 self._pvt_ptr[0].major = major

9300 @property

9301 def minor(self):

9302 return self._pvt_ptr[0].minor

9303 @minor.setter

9304 def minor(self, int minor):

9305 self._pvt_ptr[0].minor = minor

9306 @property

9307 def textureAlignment(self):

9308 return self._pvt_ptr[0].textureAlignment

9309 @textureAlignment.setter

9310 def textureAlignment(self, size_t textureAlignment):

9311 self._pvt_ptr[0].textureAlignment = textureAlignment

9312 @property

9313 def texturePitchAlignment(self):

9314 return self._pvt_ptr[0].texturePitchAlignment

9315 @texturePitchAlignment.setter

9316 def texturePitchAlignment(self, size_t texturePitchAlignment):

9317 self._pvt_ptr[0].texturePitchAlignment = texturePitchAlignment

9318 @property

9319 def multiProcessorCount(self):

9320 return self._pvt_ptr[0].multiProcessorCount

9321 @multiProcessorCount.setter

9322 def multiProcessorCount(self, int multiProcessorCount):

9323 self._pvt_ptr[0].multiProcessorCount = multiProcessorCount

9324 @property

9325 def integrated(self):

9326 return self._pvt_ptr[0].integrated

9327 @integrated.setter

9328 def integrated(self, int integrated):

9329 self._pvt_ptr[0].integrated = integrated

9330 @property

9331 def canMapHostMemory(self):

9332 return self._pvt_ptr[0].canMapHostMemory

9333 @canMapHostMemory.setter

9334 def canMapHostMemory(self, int canMapHostMemory):

9335 self._pvt_ptr[0].canMapHostMemory = canMapHostMemory

9336 @property

9337 def maxTexture1D(self):

9338 return self._pvt_ptr[0].maxTexture1D

9339 @maxTexture1D.setter

9340 def maxTexture1D(self, int maxTexture1D):

9341 self._pvt_ptr[0].maxTexture1D = maxTexture1D

9342 @property

9343 def maxTexture1DMipmap(self):

9344 return self._pvt_ptr[0].maxTexture1DMipmap

9345 @maxTexture1DMipmap.setter

9346 def maxTexture1DMipmap(self, int maxTexture1DMipmap):

9347 self._pvt_ptr[0].maxTexture1DMipmap = maxTexture1DMipmap

9348 @property

9349 def maxTexture2D(self):

9350 return self._pvt_ptr[0].maxTexture2D

9351 @maxTexture2D.setter

9352 def maxTexture2D(self, maxTexture2D):

9353 self._pvt_ptr[0].maxTexture2D = maxTexture2D

9354 @property

9355 def maxTexture2DMipmap(self):

9356 return self._pvt_ptr[0].maxTexture2DMipmap

9357 @maxTexture2DMipmap.setter

9358 def maxTexture2DMipmap(self, maxTexture2DMipmap):

9359 self._pvt_ptr[0].maxTexture2DMipmap = maxTexture2DMipmap

9360 @property

9361 def maxTexture2DLinear(self):

9362 return self._pvt_ptr[0].maxTexture2DLinear

9363 @maxTexture2DLinear.setter

9364 def maxTexture2DLinear(self, maxTexture2DLinear):

9365 self._pvt_ptr[0].maxTexture2DLinear = maxTexture2DLinear

9366 @property

9367 def maxTexture2DGather(self):

9368 return self._pvt_ptr[0].maxTexture2DGather

9369 @maxTexture2DGather.setter

9370 def maxTexture2DGather(self, maxTexture2DGather):

9371 self._pvt_ptr[0].maxTexture2DGather = maxTexture2DGather

9372 @property

9373 def maxTexture3D(self):

9374 return self._pvt_ptr[0].maxTexture3D

9375 @maxTexture3D.setter

9376 def maxTexture3D(self, maxTexture3D):

9377 self._pvt_ptr[0].maxTexture3D = maxTexture3D

9378 @property

9379 def maxTexture3DAlt(self):

9380 return self._pvt_ptr[0].maxTexture3DAlt

9381 @maxTexture3DAlt.setter

9382 def maxTexture3DAlt(self, maxTexture3DAlt):

9383 self._pvt_ptr[0].maxTexture3DAlt = maxTexture3DAlt

9384 @property

9385 def maxTextureCubemap(self):

9386 return self._pvt_ptr[0].maxTextureCubemap

9387 @maxTextureCubemap.setter

9388 def maxTextureCubemap(self, int maxTextureCubemap):

9389 self._pvt_ptr[0].maxTextureCubemap = maxTextureCubemap

9390 @property

9391 def maxTexture1DLayered(self):

9392 return self._pvt_ptr[0].maxTexture1DLayered

9393 @maxTexture1DLayered.setter

9394 def maxTexture1DLayered(self, maxTexture1DLayered):

9395 self._pvt_ptr[0].maxTexture1DLayered = maxTexture1DLayered

9396 @property

9397 def maxTexture2DLayered(self):

9398 return self._pvt_ptr[0].maxTexture2DLayered

9399 @maxTexture2DLayered.setter

9400 def maxTexture2DLayered(self, maxTexture2DLayered):

9401 self._pvt_ptr[0].maxTexture2DLayered = maxTexture2DLayered

9402 @property

9403 def maxTextureCubemapLayered(self):

9404 return self._pvt_ptr[0].maxTextureCubemapLayered

9405 @maxTextureCubemapLayered.setter

9406 def maxTextureCubemapLayered(self, maxTextureCubemapLayered):

9407 self._pvt_ptr[0].maxTextureCubemapLayered = maxTextureCubemapLayered

9408 @property

9409 def maxSurface1D(self):

9410 return self._pvt_ptr[0].maxSurface1D

9411 @maxSurface1D.setter

9412 def maxSurface1D(self, int maxSurface1D):

9413 self._pvt_ptr[0].maxSurface1D = maxSurface1D

9414 @property

9415 def maxSurface2D(self):

9416 return self._pvt_ptr[0].maxSurface2D

9417 @maxSurface2D.setter

9418 def maxSurface2D(self, maxSurface2D):

9419 self._pvt_ptr[0].maxSurface2D = maxSurface2D

9420 @property

9421 def maxSurface3D(self):

9422 return self._pvt_ptr[0].maxSurface3D

9423 @maxSurface3D.setter

9424 def maxSurface3D(self, maxSurface3D):

9425 self._pvt_ptr[0].maxSurface3D = maxSurface3D

9426 @property

9427 def maxSurface1DLayered(self):

9428 return self._pvt_ptr[0].maxSurface1DLayered

9429 @maxSurface1DLayered.setter

9430 def maxSurface1DLayered(self, maxSurface1DLayered):

9431 self._pvt_ptr[0].maxSurface1DLayered = maxSurface1DLayered

9432 @property

9433 def maxSurface2DLayered(self):

9434 return self._pvt_ptr[0].maxSurface2DLayered

9435 @maxSurface2DLayered.setter

9436 def maxSurface2DLayered(self, maxSurface2DLayered):

9437 self._pvt_ptr[0].maxSurface2DLayered = maxSurface2DLayered

9438 @property

9439 def maxSurfaceCubemap(self):

9440 return self._pvt_ptr[0].maxSurfaceCubemap

9441 @maxSurfaceCubemap.setter

9442 def maxSurfaceCubemap(self, int maxSurfaceCubemap):

9443 self._pvt_ptr[0].maxSurfaceCubemap = maxSurfaceCubemap

9444 @property

9445 def maxSurfaceCubemapLayered(self):

9446 return self._pvt_ptr[0].maxSurfaceCubemapLayered

9447 @maxSurfaceCubemapLayered.setter

9448 def maxSurfaceCubemapLayered(self, maxSurfaceCubemapLayered):

9449 self._pvt_ptr[0].maxSurfaceCubemapLayered = maxSurfaceCubemapLayered

9450 @property

9451 def surfaceAlignment(self):

9452 return self._pvt_ptr[0].surfaceAlignment

9453 @surfaceAlignment.setter

9454 def surfaceAlignment(self, size_t surfaceAlignment):

9455 self._pvt_ptr[0].surfaceAlignment = surfaceAlignment

9456 @property

9457 def concurrentKernels(self):

9458 return self._pvt_ptr[0].concurrentKernels

9459 @concurrentKernels.setter

9460 def concurrentKernels(self, int concurrentKernels):

9461 self._pvt_ptr[0].concurrentKernels = concurrentKernels

9462 @property

9463 def ECCEnabled(self):

9464 return self._pvt_ptr[0].ECCEnabled

9465 @ECCEnabled.setter

9466 def ECCEnabled(self, int ECCEnabled):

9467 self._pvt_ptr[0].ECCEnabled = ECCEnabled

9468 @property

9469 def pciBusID(self):

9470 return self._pvt_ptr[0].pciBusID

9471 @pciBusID.setter

9472 def pciBusID(self, int pciBusID):

9473 self._pvt_ptr[0].pciBusID = pciBusID

9474 @property

9475 def pciDeviceID(self):

9476 return self._pvt_ptr[0].pciDeviceID

9477 @pciDeviceID.setter

9478 def pciDeviceID(self, int pciDeviceID):

9479 self._pvt_ptr[0].pciDeviceID = pciDeviceID

9480 @property

9481 def pciDomainID(self):

9482 return self._pvt_ptr[0].pciDomainID

9483 @pciDomainID.setter

9484 def pciDomainID(self, int pciDomainID):

9485 self._pvt_ptr[0].pciDomainID = pciDomainID

9486 @property

9487 def tccDriver(self):

9488 return self._pvt_ptr[0].tccDriver

9489 @tccDriver.setter

9490 def tccDriver(self, int tccDriver):

9491 self._pvt_ptr[0].tccDriver = tccDriver

9492 @property

9493 def asyncEngineCount(self):

9494 return self._pvt_ptr[0].asyncEngineCount

9495 @asyncEngineCount.setter

9496 def asyncEngineCount(self, int asyncEngineCount):

9497 self._pvt_ptr[0].asyncEngineCount = asyncEngineCount

9498 @property

9499 def unifiedAddressing(self):

9500 return self._pvt_ptr[0].unifiedAddressing

9501 @unifiedAddressing.setter

9502 def unifiedAddressing(self, int unifiedAddressing):

9503 self._pvt_ptr[0].unifiedAddressing = unifiedAddressing

9504 @property

9505 def memoryBusWidth(self):

9506 return self._pvt_ptr[0].memoryBusWidth

9507 @memoryBusWidth.setter

9508 def memoryBusWidth(self, int memoryBusWidth):

9509 self._pvt_ptr[0].memoryBusWidth = memoryBusWidth

9510 @property

9511 def l2CacheSize(self):

9512 return self._pvt_ptr[0].l2CacheSize

9513 @l2CacheSize.setter

9514 def l2CacheSize(self, int l2CacheSize):

9515 self._pvt_ptr[0].l2CacheSize = l2CacheSize

9516 @property

9517 def persistingL2CacheMaxSize(self):

9518 return self._pvt_ptr[0].persistingL2CacheMaxSize

9519 @persistingL2CacheMaxSize.setter

9520 def persistingL2CacheMaxSize(self, int persistingL2CacheMaxSize):

9521 self._pvt_ptr[0].persistingL2CacheMaxSize = persistingL2CacheMaxSize

9522 @property

9523 def maxThreadsPerMultiProcessor(self):

9524 return self._pvt_ptr[0].maxThreadsPerMultiProcessor

9525 @maxThreadsPerMultiProcessor.setter

9526 def maxThreadsPerMultiProcessor(self, int maxThreadsPerMultiProcessor):

9527 self._pvt_ptr[0].maxThreadsPerMultiProcessor = maxThreadsPerMultiProcessor

9528 @property

9529 def streamPrioritiesSupported(self):

9530 return self._pvt_ptr[0].streamPrioritiesSupported

9531 @streamPrioritiesSupported.setter

9532 def streamPrioritiesSupported(self, int streamPrioritiesSupported):

9533 self._pvt_ptr[0].streamPrioritiesSupported = streamPrioritiesSupported

9534 @property

9535 def globalL1CacheSupported(self):

9536 return self._pvt_ptr[0].globalL1CacheSupported

9537 @globalL1CacheSupported.setter

9538 def globalL1CacheSupported(self, int globalL1CacheSupported):

9539 self._pvt_ptr[0].globalL1CacheSupported = globalL1CacheSupported

9540 @property

9541 def localL1CacheSupported(self):

9542 return self._pvt_ptr[0].localL1CacheSupported

9543 @localL1CacheSupported.setter

9544 def localL1CacheSupported(self, int localL1CacheSupported):

9545 self._pvt_ptr[0].localL1CacheSupported = localL1CacheSupported

9546 @property

9547 def sharedMemPerMultiprocessor(self):

9548 return self._pvt_ptr[0].sharedMemPerMultiprocessor

9549 @sharedMemPerMultiprocessor.setter

9550 def sharedMemPerMultiprocessor(self, size_t sharedMemPerMultiprocessor):

9551 self._pvt_ptr[0].sharedMemPerMultiprocessor = sharedMemPerMultiprocessor

9552 @property

9553 def regsPerMultiprocessor(self):

9554 return self._pvt_ptr[0].regsPerMultiprocessor

9555 @regsPerMultiprocessor.setter

9556 def regsPerMultiprocessor(self, int regsPerMultiprocessor):

9557 self._pvt_ptr[0].regsPerMultiprocessor = regsPerMultiprocessor

9558 @property

9559 def managedMemory(self):

9560 return self._pvt_ptr[0].managedMemory

9561 @managedMemory.setter

9562 def managedMemory(self, int managedMemory):

9563 self._pvt_ptr[0].managedMemory = managedMemory

9564 @property

9565 def isMultiGpuBoard(self):

9566 return self._pvt_ptr[0].isMultiGpuBoard

9567 @isMultiGpuBoard.setter

9568 def isMultiGpuBoard(self, int isMultiGpuBoard):

9569 self._pvt_ptr[0].isMultiGpuBoard = isMultiGpuBoard

9570 @property

9571 def multiGpuBoardGroupID(self):

9572 return self._pvt_ptr[0].multiGpuBoardGroupID

9573 @multiGpuBoardGroupID.setter

9574 def multiGpuBoardGroupID(self, int multiGpuBoardGroupID):

9575 self._pvt_ptr[0].multiGpuBoardGroupID = multiGpuBoardGroupID

9576 @property

9577 def hostNativeAtomicSupported(self):

9578 return self._pvt_ptr[0].hostNativeAtomicSupported

9579 @hostNativeAtomicSupported.setter

9580 def hostNativeAtomicSupported(self, int hostNativeAtomicSupported):

9581 self._pvt_ptr[0].hostNativeAtomicSupported = hostNativeAtomicSupported

9582 @property

9583 def pageableMemoryAccess(self):

9584 return self._pvt_ptr[0].pageableMemoryAccess

9585 @pageableMemoryAccess.setter

9586 def pageableMemoryAccess(self, int pageableMemoryAccess):

9587 self._pvt_ptr[0].pageableMemoryAccess = pageableMemoryAccess

9588 @property

9589 def concurrentManagedAccess(self):

9590 return self._pvt_ptr[0].concurrentManagedAccess

9591 @concurrentManagedAccess.setter

9592 def concurrentManagedAccess(self, int concurrentManagedAccess):

9593 self._pvt_ptr[0].concurrentManagedAccess = concurrentManagedAccess

9594 @property

9595 def computePreemptionSupported(self):

9596 return self._pvt_ptr[0].computePreemptionSupported

9597 @computePreemptionSupported.setter

9598 def computePreemptionSupported(self, int computePreemptionSupported):

9599 self._pvt_ptr[0].computePreemptionSupported = computePreemptionSupported

9600 @property

9601 def canUseHostPointerForRegisteredMem(self):

9602 return self._pvt_ptr[0].canUseHostPointerForRegisteredMem

9603 @canUseHostPointerForRegisteredMem.setter

9604 def canUseHostPointerForRegisteredMem(self, int canUseHostPointerForRegisteredMem):

9605 self._pvt_ptr[0].canUseHostPointerForRegisteredMem = canUseHostPointerForRegisteredMem

9606 @property

9607 def cooperativeLaunch(self):

9608 return self._pvt_ptr[0].cooperativeLaunch

9609 @cooperativeLaunch.setter

9610 def cooperativeLaunch(self, int cooperativeLaunch):

9611 self._pvt_ptr[0].cooperativeLaunch = cooperativeLaunch

9612 @property

9613 def sharedMemPerBlockOptin(self):

9614 return self._pvt_ptr[0].sharedMemPerBlockOptin

9615 @sharedMemPerBlockOptin.setter

9616 def sharedMemPerBlockOptin(self, size_t sharedMemPerBlockOptin):

9617 self._pvt_ptr[0].sharedMemPerBlockOptin = sharedMemPerBlockOptin

9618 @property

9619 def pageableMemoryAccessUsesHostPageTables(self):

9620 return self._pvt_ptr[0].pageableMemoryAccessUsesHostPageTables

9621 @pageableMemoryAccessUsesHostPageTables.setter

9622 def pageableMemoryAccessUsesHostPageTables(self, int pageableMemoryAccessUsesHostPageTables):

9623 self._pvt_ptr[0].pageableMemoryAccessUsesHostPageTables = pageableMemoryAccessUsesHostPageTables

9624 @property

9625 def directManagedMemAccessFromHost(self):

9626 return self._pvt_ptr[0].directManagedMemAccessFromHost

9627 @directManagedMemAccessFromHost.setter

9628 def directManagedMemAccessFromHost(self, int directManagedMemAccessFromHost):

9629 self._pvt_ptr[0].directManagedMemAccessFromHost = directManagedMemAccessFromHost

9630 @property

9631 def maxBlocksPerMultiProcessor(self):

9632 return self._pvt_ptr[0].maxBlocksPerMultiProcessor

9633 @maxBlocksPerMultiProcessor.setter

9634 def maxBlocksPerMultiProcessor(self, int maxBlocksPerMultiProcessor):

9635 self._pvt_ptr[0].maxBlocksPerMultiProcessor = maxBlocksPerMultiProcessor

9636 @property

9637 def accessPolicyMaxWindowSize(self):

9638 return self._pvt_ptr[0].accessPolicyMaxWindowSize

9639 @accessPolicyMaxWindowSize.setter

9640 def accessPolicyMaxWindowSize(self, int accessPolicyMaxWindowSize):

9641 self._pvt_ptr[0].accessPolicyMaxWindowSize = accessPolicyMaxWindowSize

9642 @property

9643 def reservedSharedMemPerBlock(self):

9644 return self._pvt_ptr[0].reservedSharedMemPerBlock

9645 @reservedSharedMemPerBlock.setter

9646 def reservedSharedMemPerBlock(self, size_t reservedSharedMemPerBlock):

9647 self._pvt_ptr[0].reservedSharedMemPerBlock = reservedSharedMemPerBlock

9648 @property

9649 def hostRegisterSupported(self):

9650 return self._pvt_ptr[0].hostRegisterSupported

9651 @hostRegisterSupported.setter

9652 def hostRegisterSupported(self, int hostRegisterSupported):

9653 self._pvt_ptr[0].hostRegisterSupported = hostRegisterSupported

9654 @property

9655 def sparseCudaArraySupported(self):

9656 return self._pvt_ptr[0].sparseCudaArraySupported

9657 @sparseCudaArraySupported.setter

9658 def sparseCudaArraySupported(self, int sparseCudaArraySupported):

9659 self._pvt_ptr[0].sparseCudaArraySupported = sparseCudaArraySupported

9660 @property

9661 def hostRegisterReadOnlySupported(self):

9662 return self._pvt_ptr[0].hostRegisterReadOnlySupported

9663 @hostRegisterReadOnlySupported.setter

9664 def hostRegisterReadOnlySupported(self, int hostRegisterReadOnlySupported):

9665 self._pvt_ptr[0].hostRegisterReadOnlySupported = hostRegisterReadOnlySupported

9666 @property

9667 def timelineSemaphoreInteropSupported(self):

9668 return self._pvt_ptr[0].timelineSemaphoreInteropSupported

9669 @timelineSemaphoreInteropSupported.setter

9670 def timelineSemaphoreInteropSupported(self, int timelineSemaphoreInteropSupported):

9671 self._pvt_ptr[0].timelineSemaphoreInteropSupported = timelineSemaphoreInteropSupported

9672 @property

9673 def memoryPoolsSupported(self):

9674 return self._pvt_ptr[0].memoryPoolsSupported

9675 @memoryPoolsSupported.setter

9676 def memoryPoolsSupported(self, int memoryPoolsSupported):

9677 self._pvt_ptr[0].memoryPoolsSupported = memoryPoolsSupported

9678 @property

9679 def gpuDirectRDMASupported(self):

9680 return self._pvt_ptr[0].gpuDirectRDMASupported

9681 @gpuDirectRDMASupported.setter

9682 def gpuDirectRDMASupported(self, int gpuDirectRDMASupported):

9683 self._pvt_ptr[0].gpuDirectRDMASupported = gpuDirectRDMASupported

9684 @property

9685 def gpuDirectRDMAFlushWritesOptions(self):

9686 return self._pvt_ptr[0].gpuDirectRDMAFlushWritesOptions

9687 @gpuDirectRDMAFlushWritesOptions.setter

9688 def gpuDirectRDMAFlushWritesOptions(self, unsigned int gpuDirectRDMAFlushWritesOptions):

9689 self._pvt_ptr[0].gpuDirectRDMAFlushWritesOptions = gpuDirectRDMAFlushWritesOptions

9690 @property

9691 def gpuDirectRDMAWritesOrdering(self):

9692 return self._pvt_ptr[0].gpuDirectRDMAWritesOrdering

9693 @gpuDirectRDMAWritesOrdering.setter

9694 def gpuDirectRDMAWritesOrdering(self, int gpuDirectRDMAWritesOrdering):

9695 self._pvt_ptr[0].gpuDirectRDMAWritesOrdering = gpuDirectRDMAWritesOrdering

9696 @property

9697 def memoryPoolSupportedHandleTypes(self):

9698 return self._pvt_ptr[0].memoryPoolSupportedHandleTypes

9699 @memoryPoolSupportedHandleTypes.setter

9700 def memoryPoolSupportedHandleTypes(self, unsigned int memoryPoolSupportedHandleTypes):

9701 self._pvt_ptr[0].memoryPoolSupportedHandleTypes = memoryPoolSupportedHandleTypes

9702 @property

9703 def deferredMappingCudaArraySupported(self):

9704 return self._pvt_ptr[0].deferredMappingCudaArraySupported

9705 @deferredMappingCudaArraySupported.setter

9706 def deferredMappingCudaArraySupported(self, int deferredMappingCudaArraySupported):

9707 self._pvt_ptr[0].deferredMappingCudaArraySupported = deferredMappingCudaArraySupported

9708 @property

9709 def ipcEventSupported(self):

9710 return self._pvt_ptr[0].ipcEventSupported

9711 @ipcEventSupported.setter

9712 def ipcEventSupported(self, int ipcEventSupported):

9713 self._pvt_ptr[0].ipcEventSupported = ipcEventSupported

9714 @property

9715 def clusterLaunch(self):

9716 return self._pvt_ptr[0].clusterLaunch

9717 @clusterLaunch.setter

9718 def clusterLaunch(self, int clusterLaunch):

9719 self._pvt_ptr[0].clusterLaunch = clusterLaunch

9720 @property

9721 def unifiedFunctionPointers(self):

9722 return self._pvt_ptr[0].unifiedFunctionPointers

9723 @unifiedFunctionPointers.setter

9724 def unifiedFunctionPointers(self, int unifiedFunctionPointers):

9725 self._pvt_ptr[0].unifiedFunctionPointers = unifiedFunctionPointers

9726 @property

9727 def deviceNumaConfig(self):

9728 return self._pvt_ptr[0].deviceNumaConfig

9729 @deviceNumaConfig.setter

9730 def deviceNumaConfig(self, int deviceNumaConfig):

9731 self._pvt_ptr[0].deviceNumaConfig = deviceNumaConfig

9732 @property

9733 def deviceNumaId(self):

9734 return self._pvt_ptr[0].deviceNumaId

9735 @deviceNumaId.setter

9736 def deviceNumaId(self, int deviceNumaId):

9737 self._pvt_ptr[0].deviceNumaId = deviceNumaId

9738 @property

9739 def mpsEnabled(self):

9740 return self._pvt_ptr[0].mpsEnabled

9741 @mpsEnabled.setter

9742 def mpsEnabled(self, int mpsEnabled):

9743 self._pvt_ptr[0].mpsEnabled = mpsEnabled

9744 @property

9745 def hostNumaId(self):

9746 return self._pvt_ptr[0].hostNumaId

9747 @hostNumaId.setter

9748 def hostNumaId(self, int hostNumaId):

9749 self._pvt_ptr[0].hostNumaId = hostNumaId

9750 @property

9751 def gpuPciDeviceID(self):

9752 return self._pvt_ptr[0].gpuPciDeviceID

9753 @gpuPciDeviceID.setter

9754 def gpuPciDeviceID(self, unsigned int gpuPciDeviceID):

9755 self._pvt_ptr[0].gpuPciDeviceID = gpuPciDeviceID

9756 @property

9757 def gpuPciSubsystemID(self):

9758 return self._pvt_ptr[0].gpuPciSubsystemID

9759 @gpuPciSubsystemID.setter

9760 def gpuPciSubsystemID(self, unsigned int gpuPciSubsystemID):

9761 self._pvt_ptr[0].gpuPciSubsystemID = gpuPciSubsystemID

9762 @property

9763 def hostNumaMultinodeIpcSupported(self):

9764 return self._pvt_ptr[0].hostNumaMultinodeIpcSupported

9765 @hostNumaMultinodeIpcSupported.setter

9766 def hostNumaMultinodeIpcSupported(self, int hostNumaMultinodeIpcSupported):

9767 self._pvt_ptr[0].hostNumaMultinodeIpcSupported = hostNumaMultinodeIpcSupported

9768 @property

9769 def reserved(self):

9770 return self._pvt_ptr[0].reserved

9771 @reserved.setter

9772 def reserved(self, reserved):

9773 self._pvt_ptr[0].reserved = reserved

9774

9775cdef class cudaIpcEventHandle_st:

9776 """

9777 CUDA IPC event handle

9778

9779 Attributes

9780 ----------

9781 reserved : bytes

9782

9783

9784 Methods

9785 -------

9786 getPtr()

9787 Get memory address of class instance

9788 """

9789 def __cinit__(self, void_ptr _ptr = 0):

9790 if _ptr == 0:

9791 self._pvt_ptr = &self._pvt_val

9792 else:

9793 self._pvt_ptr = <cyruntime.cudaIpcEventHandle_st *>_ptr

9794 def __init__(self, void_ptr _ptr = 0):

9795 pass

9796 def __dealloc__(self):

9797 pass

9798 def getPtr(self):

9799 return <void_ptr>self._pvt_ptr

9800 def __repr__(self):

9801 if self._pvt_ptr is not NULL:

9802 str_list = []

9803 try:

9804 str_list += ['reserved : ' + str(self.reserved)]

9805 except ValueError:

9806 str_list += ['reserved : <ValueError>']

9807 return '\n'.join(str_list)

9808 else:

9809 return ''

9810 @property

9811 def reserved(self):

9812 return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, 64)

9813 @reserved.setter

9814 def reserved(self, reserved):

9815 if len(reserved) != 64:

9816 raise ValueError("reserved length must be 64, is " + str(len(reserved)))

9817 if CHAR_MIN == 0:

9818 for i, b in enumerate(reserved):

9819 if b < 0 and b > -129:

9820 b = b + 256

9821 self._pvt_ptr[0].reserved[i] = b

9822 else:

9823 for i, b in enumerate(reserved):

9824 if b > 127 and b < 256:

9825 b = b - 256

9826 self._pvt_ptr[0].reserved[i] = b

9827

9828cdef class cudaIpcMemHandle_st:

9829 """

9830 CUDA IPC memory handle

9831

9832 Attributes

9833 ----------

9834 reserved : bytes

9835

9836

9837 Methods

9838 -------

9839 getPtr()

9840 Get memory address of class instance

9841 """

9842 def __cinit__(self, void_ptr _ptr = 0):

9843 if _ptr == 0:

9844 self._pvt_ptr = &self._pvt_val

9845 else:

9846 self._pvt_ptr = <cyruntime.cudaIpcMemHandle_st *>_ptr

9847 def __init__(self, void_ptr _ptr = 0):

9848 pass

9849 def __dealloc__(self):

9850 pass

9851 def getPtr(self):

9852 return <void_ptr>self._pvt_ptr

9853 def __repr__(self):

9854 if self._pvt_ptr is not NULL:

9855 str_list = []

9856 try:

9857 str_list += ['reserved : ' + str(self.reserved)]

9858 except ValueError:

9859 str_list += ['reserved : <ValueError>']

9860 return '\n'.join(str_list)

9861 else:

9862 return ''

9863 @property

9864 def reserved(self):

9865 return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, 64)

9866 @reserved.setter

9867 def reserved(self, reserved):

9868 if len(reserved) != 64:

9869 raise ValueError("reserved length must be 64, is " + str(len(reserved)))

9870 if CHAR_MIN == 0:

9871 for i, b in enumerate(reserved):

9872 if b < 0 and b > -129:

9873 b = b + 256

9874 self._pvt_ptr[0].reserved[i] = b

9875 else:

9876 for i, b in enumerate(reserved):

9877 if b > 127 and b < 256:

9878 b = b - 256

9879 self._pvt_ptr[0].reserved[i] = b

9880

9881cdef class cudaMemFabricHandle_st:

9882 """

9883 Attributes

9884 ----------

9885 reserved : bytes

9886

9887

9888 Methods

9889 -------

9890 getPtr()

9891 Get memory address of class instance

9892 """

9893 def __cinit__(self, void_ptr _ptr = 0):

9894 if _ptr == 0:

9895 self._pvt_ptr = &self._pvt_val

9896 else:

9897 self._pvt_ptr = <cyruntime.cudaMemFabricHandle_st *>_ptr

9898 def __init__(self, void_ptr _ptr = 0):

9899 pass

9900 def __dealloc__(self):

9901 pass

9902 def getPtr(self):

9903 return <void_ptr>self._pvt_ptr

9904 def __repr__(self):

9905 if self._pvt_ptr is not NULL:

9906 str_list = []

9907 try:

9908 str_list += ['reserved : ' + str(self.reserved)]

9909 except ValueError:

9910 str_list += ['reserved : <ValueError>']

9911 return '\n'.join(str_list)

9912 else:

9913 return ''

9914 @property

9915 def reserved(self):

9916 return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, 64)

9917 @reserved.setter

9918 def reserved(self, reserved):

9919 if len(reserved) != 64:

9920 raise ValueError("reserved length must be 64, is " + str(len(reserved)))

9921 if CHAR_MIN == 0:

9922 for i, b in enumerate(reserved):

9923 if b < 0 and b > -129:

9924 b = b + 256

9925 self._pvt_ptr[0].reserved[i] = b

9926 else:

9927 for i, b in enumerate(reserved):

9928 if b > 127 and b < 256:

9929 b = b - 256

9930 self._pvt_ptr[0].reserved[i] = b

9931

9932cdef class anon_struct8:

9933 """

9934 Attributes

9935 ----------

9936 handle : Any

9937

9938 name : Any

9939

9940

9941 Methods

9942 -------

9943 getPtr()

9944 Get memory address of class instance

9945 """

9946 def __cinit__(self, void_ptr _ptr):

9947 self._pvt_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr

9948

9949 def __init__(self, void_ptr _ptr):

9950 pass

9951 def __dealloc__(self):

9952 pass

9953 def getPtr(self):

9954 return <void_ptr>&self._pvt_ptr[0].handle.win32

9955 def __repr__(self):

9956 if self._pvt_ptr is not NULL:

9957 str_list = []

9958 try:

9959 str_list += ['handle : ' + hex(self.handle)]

9960 except ValueError:

9961 str_list += ['handle : <ValueError>']

9962 try:

9963 str_list += ['name : ' + hex(self.name)]

9964 except ValueError:

9965 str_list += ['name : <ValueError>']

9966 return '\n'.join(str_list)

9967 else:

9968 return ''

9969 @property

9970 def handle(self):

9971 return <void_ptr>self._pvt_ptr[0].handle.win32.handle

9972 @handle.setter

9973 def handle(self, handle):

9974 _chandle = _HelperInputVoidPtr(handle)

9975 self._pvt_ptr[0].handle.win32.handle = <void*><void_ptr>_chandle.cptr

9976 @property

9977 def name(self):

9978 return <void_ptr>self._pvt_ptr[0].handle.win32.name

9979 @name.setter

9980 def name(self, name):

9981 _cname = _HelperInputVoidPtr(name)

9982 self._pvt_ptr[0].handle.win32.name = <void*><void_ptr>_cname.cptr

9983

9984cdef class anon_union2:

9985 """

9986 Attributes

9987 ----------

9988 fd : int

9989

9990 win32 : anon_struct8

9991

9992 nvSciBufObject : Any

9993

9994

9995 Methods

9996 -------

9997 getPtr()

9998 Get memory address of class instance

9999 """

10000 def __cinit__(self, void_ptr _ptr):

10001 self._pvt_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr

10002

10003 def __init__(self, void_ptr _ptr):

10004 pass

10005 self._win32 = anon_struct8(_ptr=<void_ptr>self._pvt_ptr)

10006 def __dealloc__(self):

10007 pass

10008 def getPtr(self):

10009 return <void_ptr>&self._pvt_ptr[0].handle

10010 def __repr__(self):

10011 if self._pvt_ptr is not NULL:

10012 str_list = []

10013 try:

10014 str_list += ['fd : ' + str(self.fd)]

10015 except ValueError:

10016 str_list += ['fd : <ValueError>']

10017 try:

10018 str_list += ['win32 :\n' + '\n'.join([' ' + line for line in str(self.win32).splitlines()])]

10019 except ValueError:

10020 str_list += ['win32 : <ValueError>']

10021 try:

10022 str_list += ['nvSciBufObject : ' + hex(self.nvSciBufObject)]

10023 except ValueError:

10024 str_list += ['nvSciBufObject : <ValueError>']

10025 return '\n'.join(str_list)

10026 else:

10027 return ''

10028 @property

10029 def fd(self):

10030 return self._pvt_ptr[0].handle.fd

10031 @fd.setter

10032 def fd(self, int fd):

10033 self._pvt_ptr[0].handle.fd = fd

10034 @property

10035 def win32(self):

10036 return self._win32

10037 @win32.setter

10038 def win32(self, win32 not None : anon_struct8):

10039 string.memcpy(&self._pvt_ptr[0].handle.win32, <cyruntime.anon_struct8*><void_ptr>win32.getPtr(), sizeof(self._pvt_ptr[0].handle.win32))

10040 @property

10041 def nvSciBufObject(self):

10042 return <void_ptr>self._pvt_ptr[0].handle.nvSciBufObject

10043 @nvSciBufObject.setter

10044 def nvSciBufObject(self, nvSciBufObject):

10045 _cnvSciBufObject = _HelperInputVoidPtr(nvSciBufObject)

10046 self._pvt_ptr[0].handle.nvSciBufObject = <void*><void_ptr>_cnvSciBufObject.cptr

10047

10048cdef class cudaExternalMemoryHandleDesc:

10049 """

10050 External memory handle descriptor

10051

10052 Attributes

10053 ----------

10054 type : cudaExternalMemoryHandleType

10055 Type of the handle

10056 handle : anon_union2

10057

10058 size : unsigned long long

10059 Size of the memory allocation

10060 flags : unsigned int

10061 Flags must either be zero or cudaExternalMemoryDedicated

10062 reserved : list[unsigned int]

10063 Must be zero

10064

10065 Methods

10066 -------

10067 getPtr()

10068 Get memory address of class instance

10069 """

10070 def __cinit__(self, void_ptr _ptr = 0):

10071 if _ptr == 0:

10072 self._val_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>calloc(1, sizeof(cyruntime.cudaExternalMemoryHandleDesc))

10073 self._pvt_ptr = self._val_ptr

10074 else:

10075 self._pvt_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr

10076 def __init__(self, void_ptr _ptr = 0):

10077 pass

10078 self._handle = anon_union2(_ptr=<void_ptr>self._pvt_ptr)

10079 def __dealloc__(self):

10080 if self._val_ptr is not NULL:

10081 free(self._val_ptr)

10082 def getPtr(self):

10083 return <void_ptr>self._pvt_ptr

10084 def __repr__(self):

10085 if self._pvt_ptr is not NULL:

10086 str_list = []

10087 try:

10088 str_list += ['type : ' + str(self.type)]

10089 except ValueError:

10090 str_list += ['type : <ValueError>']

10091 try:

10092 str_list += ['handle :\n' + '\n'.join([' ' + line for line in str(self.handle).splitlines()])]

10093 except ValueError:

10094 str_list += ['handle : <ValueError>']

10095 try:

10096 str_list += ['size : ' + str(self.size)]

10097 except ValueError:

10098 str_list += ['size : <ValueError>']

10099 try:

10100 str_list += ['flags : ' + str(self.flags)]

10101 except ValueError:

10102 str_list += ['flags : <ValueError>']

10103 try:

10104 str_list += ['reserved : ' + str(self.reserved)]

10105 except ValueError:

10106 str_list += ['reserved : <ValueError>']

10107 return '\n'.join(str_list)

10108 else:

10109 return ''

10110 @property

10111 def type(self):

10112 if self._pvt_ptr[0].type not in _dict_cudaExternalMemoryHandleType:

10113 return None

10114 return _dict_cudaExternalMemoryHandleType[self._pvt_ptr[0].type]

10115 @type.setter

10116 def type(self, type not None : cudaExternalMemoryHandleType):

10117 self._pvt_ptr[0].type = type.value

10118 @property

10119 def handle(self):

10120 return self._handle

10121 @handle.setter

10122 def handle(self, handle not None : anon_union2):

10123 string.memcpy(&self._pvt_ptr[0].handle, <cyruntime.anon_union2*><void_ptr>handle.getPtr(), sizeof(self._pvt_ptr[0].handle))

10124 @property

10125 def size(self):

10126 return self._pvt_ptr[0].size

10127 @size.setter

10128 def size(self, unsigned long long size):

10129 self._pvt_ptr[0].size = size

10130 @property

10131 def flags(self):

10132 return self._pvt_ptr[0].flags

10133 @flags.setter

10134 def flags(self, unsigned int flags):

10135 self._pvt_ptr[0].flags = flags

10136 @property

10137 def reserved(self):

10138 return self._pvt_ptr[0].reserved

10139 @reserved.setter

10140 def reserved(self, reserved):

10141 self._pvt_ptr[0].reserved = reserved

10142

10143cdef class cudaExternalMemoryBufferDesc:

10144 """

10145 External memory buffer descriptor

10146

10147 Attributes

10148 ----------

10149 offset : unsigned long long

10150 Offset into the memory object where the buffer's base is

10151 size : unsigned long long

10152 Size of the buffer

10153 flags : unsigned int

10154 Flags reserved for future use. Must be zero.

10155 reserved : list[unsigned int]

10156 Must be zero

10157

10158 Methods

10159 -------

10160 getPtr()

10161 Get memory address of class instance

10162 """

10163 def __cinit__(self, void_ptr _ptr = 0):

10164 if _ptr == 0:

10165 self._pvt_ptr = &self._pvt_val

10166 else:

10167 self._pvt_ptr = <cyruntime.cudaExternalMemoryBufferDesc *>_ptr

10168 def __init__(self, void_ptr _ptr = 0):

10169 pass

10170 def __dealloc__(self):

10171 pass

10172 def getPtr(self):

10173 return <void_ptr>self._pvt_ptr

10174 def __repr__(self):

10175 if self._pvt_ptr is not NULL:

10176 str_list = []

10177 try:

10178 str_list += ['offset : ' + str(self.offset)]

10179 except ValueError:

10180 str_list += ['offset : <ValueError>']

10181 try:

10182 str_list += ['size : ' + str(self.size)]

10183 except ValueError:

10184 str_list += ['size : <ValueError>']

10185 try:

10186 str_list += ['flags : ' + str(self.flags)]

10187 except ValueError:

10188 str_list += ['flags : <ValueError>']

10189 try:

10190 str_list += ['reserved : ' + str(self.reserved)]

10191 except ValueError:

10192 str_list += ['reserved : <ValueError>']

10193 return '\n'.join(str_list)

10194 else:

10195 return ''

10196 @property

10197 def offset(self):

10198 return self._pvt_ptr[0].offset

10199 @offset.setter

10200 def offset(self, unsigned long long offset):

10201 self._pvt_ptr[0].offset = offset

10202 @property

10203 def size(self):

10204 return self._pvt_ptr[0].size

10205 @size.setter

10206 def size(self, unsigned long long size):

10207 self._pvt_ptr[0].size = size

10208 @property

10209 def flags(self):

10210 return self._pvt_ptr[0].flags

10211 @flags.setter

10212 def flags(self, unsigned int flags):

10213 self._pvt_ptr[0].flags = flags

10214 @property

10215 def reserved(self):

10216 return self._pvt_ptr[0].reserved

10217 @reserved.setter

10218 def reserved(self, reserved):

10219 self._pvt_ptr[0].reserved = reserved

10220

10221cdef class cudaExternalMemoryMipmappedArrayDesc:

10222 """

10223 External memory mipmap descriptor

10224

10225 Attributes

10226 ----------

10227 offset : unsigned long long

10228 Offset into the memory object where the base level of the mipmap

10229 chain is.

10230 formatDesc : cudaChannelFormatDesc

10231 Format of base level of the mipmap chain

10232 extent : cudaExtent

10233 Dimensions of base level of the mipmap chain

10234 flags : unsigned int

10235 Flags associated with CUDA mipmapped arrays. See

10236 cudaMallocMipmappedArray

10237 numLevels : unsigned int

10238 Total number of levels in the mipmap chain

10239 reserved : list[unsigned int]

10240 Must be zero

10241

10242 Methods

10243 -------

10244 getPtr()

10245 Get memory address of class instance

10246 """

10247 def __cinit__(self, void_ptr _ptr = 0):

10248 if _ptr == 0:

10249 self._pvt_ptr = &self._pvt_val

10250 else:

10251 self._pvt_ptr = <cyruntime.cudaExternalMemoryMipmappedArrayDesc *>_ptr

10252 def __init__(self, void_ptr _ptr = 0):

10253 pass

10254 self._formatDesc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].formatDesc)

10255 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)

10256 def __dealloc__(self):

10257 pass

10258 def getPtr(self):

10259 return <void_ptr>self._pvt_ptr

10260 def __repr__(self):

10261 if self._pvt_ptr is not NULL:

10262 str_list = []

10263 try:

10264 str_list += ['offset : ' + str(self.offset)]

10265 except ValueError:

10266 str_list += ['offset : <ValueError>']

10267 try:

10268 str_list += ['formatDesc :\n' + '\n'.join([' ' + line for line in str(self.formatDesc).splitlines()])]

10269 except ValueError:

10270 str_list += ['formatDesc : <ValueError>']

10271 try:

10272 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]

10273 except ValueError:

10274 str_list += ['extent : <ValueError>']

10275 try:

10276 str_list += ['flags : ' + str(self.flags)]

10277 except ValueError:

10278 str_list += ['flags : <ValueError>']

10279 try:

10280 str_list += ['numLevels : ' + str(self.numLevels)]

10281 except ValueError:

10282 str_list += ['numLevels : <ValueError>']

10283 try:

10284 str_list += ['reserved : ' + str(self.reserved)]

10285 except ValueError:

10286 str_list += ['reserved : <ValueError>']

10287 return '\n'.join(str_list)

10288 else:

10289 return ''

10290 @property

10291 def offset(self):

10292 return self._pvt_ptr[0].offset

10293 @offset.setter

10294 def offset(self, unsigned long long offset):

10295 self._pvt_ptr[0].offset = offset

10296 @property

10297 def formatDesc(self):

10298 return self._formatDesc

10299 @formatDesc.setter

10300 def formatDesc(self, formatDesc not None : cudaChannelFormatDesc):

10301 string.memcpy(&self._pvt_ptr[0].formatDesc, <cyruntime.cudaChannelFormatDesc*><void_ptr>formatDesc.getPtr(), sizeof(self._pvt_ptr[0].formatDesc))

10302 @property

10303 def extent(self):

10304 return self._extent

10305 @extent.setter

10306 def extent(self, extent not None : cudaExtent):

10307 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))

10308 @property

10309 def flags(self):

10310 return self._pvt_ptr[0].flags

10311 @flags.setter

10312 def flags(self, unsigned int flags):

10313 self._pvt_ptr[0].flags = flags

10314 @property

10315 def numLevels(self):

10316 return self._pvt_ptr[0].numLevels

10317 @numLevels.setter

10318 def numLevels(self, unsigned int numLevels):

10319 self._pvt_ptr[0].numLevels = numLevels

10320 @property

10321 def reserved(self):

10322 return self._pvt_ptr[0].reserved

10323 @reserved.setter

10324 def reserved(self, reserved):

10325 self._pvt_ptr[0].reserved = reserved

10326

10327cdef class anon_struct9:

10328 """

10329 Attributes

10330 ----------

10331 handle : Any

10332

10333 name : Any

10334

10335

10336 Methods

10337 -------

10338 getPtr()

10339 Get memory address of class instance

10340 """

10341 def __cinit__(self, void_ptr _ptr):

10342 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr

10343

10344 def __init__(self, void_ptr _ptr):

10345 pass

10346 def __dealloc__(self):

10347 pass

10348 def getPtr(self):

10349 return <void_ptr>&self._pvt_ptr[0].handle.win32

10350 def __repr__(self):

10351 if self._pvt_ptr is not NULL:

10352 str_list = []

10353 try:

10354 str_list += ['handle : ' + hex(self.handle)]

10355 except ValueError:

10356 str_list += ['handle : <ValueError>']

10357 try:

10358 str_list += ['name : ' + hex(self.name)]

10359 except ValueError:

10360 str_list += ['name : <ValueError>']

10361 return '\n'.join(str_list)

10362 else:

10363 return ''

10364 @property

10365 def handle(self):

10366 return <void_ptr>self._pvt_ptr[0].handle.win32.handle

10367 @handle.setter

10368 def handle(self, handle):

10369 _chandle = _HelperInputVoidPtr(handle)

10370 self._pvt_ptr[0].handle.win32.handle = <void*><void_ptr>_chandle.cptr

10371 @property

10372 def name(self):

10373 return <void_ptr>self._pvt_ptr[0].handle.win32.name

10374 @name.setter

10375 def name(self, name):

10376 _cname = _HelperInputVoidPtr(name)

10377 self._pvt_ptr[0].handle.win32.name = <void*><void_ptr>_cname.cptr

10378

10379cdef class anon_union3:

10380 """

10381 Attributes

10382 ----------

10383 fd : int

10384

10385 win32 : anon_struct9

10386

10387 nvSciSyncObj : Any

10388

10389

10390 Methods

10391 -------

10392 getPtr()

10393 Get memory address of class instance

10394 """

10395 def __cinit__(self, void_ptr _ptr):

10396 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr

10397

10398 def __init__(self, void_ptr _ptr):

10399 pass

10400 self._win32 = anon_struct9(_ptr=<void_ptr>self._pvt_ptr)

10401 def __dealloc__(self):

10402 pass

10403 def getPtr(self):

10404 return <void_ptr>&self._pvt_ptr[0].handle

10405 def __repr__(self):

10406 if self._pvt_ptr is not NULL:

10407 str_list = []

10408 try:

10409 str_list += ['fd : ' + str(self.fd)]

10410 except ValueError:

10411 str_list += ['fd : <ValueError>']

10412 try:

10413 str_list += ['win32 :\n' + '\n'.join([' ' + line for line in str(self.win32).splitlines()])]

10414 except ValueError:

10415 str_list += ['win32 : <ValueError>']

10416 try:

10417 str_list += ['nvSciSyncObj : ' + hex(self.nvSciSyncObj)]

10418 except ValueError:

10419 str_list += ['nvSciSyncObj : <ValueError>']

10420 return '\n'.join(str_list)

10421 else:

10422 return ''

10423 @property

10424 def fd(self):

10425 return self._pvt_ptr[0].handle.fd

10426 @fd.setter

10427 def fd(self, int fd):

10428 self._pvt_ptr[0].handle.fd = fd

10429 @property

10430 def win32(self):

10431 return self._win32

10432 @win32.setter

10433 def win32(self, win32 not None : anon_struct9):

10434 string.memcpy(&self._pvt_ptr[0].handle.win32, <cyruntime.anon_struct9*><void_ptr>win32.getPtr(), sizeof(self._pvt_ptr[0].handle.win32))

10435 @property

10436 def nvSciSyncObj(self):

10437 return <void_ptr>self._pvt_ptr[0].handle.nvSciSyncObj

10438 @nvSciSyncObj.setter

10439 def nvSciSyncObj(self, nvSciSyncObj):

10440 _cnvSciSyncObj = _HelperInputVoidPtr(nvSciSyncObj)

10441 self._pvt_ptr[0].handle.nvSciSyncObj = <void*><void_ptr>_cnvSciSyncObj.cptr

10442

10443cdef class cudaExternalSemaphoreHandleDesc:

10444 """

10445 External semaphore handle descriptor

10446

10447 Attributes

10448 ----------

10449 type : cudaExternalSemaphoreHandleType

10450 Type of the handle

10451 handle : anon_union3

10452

10453 flags : unsigned int

10454 Flags reserved for the future. Must be zero.

10455 reserved : list[unsigned int]

10456 Must be zero

10457

10458 Methods

10459 -------

10460 getPtr()

10461 Get memory address of class instance

10462 """

10463 def __cinit__(self, void_ptr _ptr = 0):

10464 if _ptr == 0:

10465 self._val_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>calloc(1, sizeof(cyruntime.cudaExternalSemaphoreHandleDesc))

10466 self._pvt_ptr = self._val_ptr

10467 else:

10468 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr

10469 def __init__(self, void_ptr _ptr = 0):

10470 pass

10471 self._handle = anon_union3(_ptr=<void_ptr>self._pvt_ptr)

10472 def __dealloc__(self):

10473 if self._val_ptr is not NULL:

10474 free(self._val_ptr)

10475 def getPtr(self):

10476 return <void_ptr>self._pvt_ptr

10477 def __repr__(self):

10478 if self._pvt_ptr is not NULL:

10479 str_list = []

10480 try:

10481 str_list += ['type : ' + str(self.type)]

10482 except ValueError:

10483 str_list += ['type : <ValueError>']

10484 try:

10485 str_list += ['handle :\n' + '\n'.join([' ' + line for line in str(self.handle).splitlines()])]

10486 except ValueError:

10487 str_list += ['handle : <ValueError>']

10488 try:

10489 str_list += ['flags : ' + str(self.flags)]

10490 except ValueError:

10491 str_list += ['flags : <ValueError>']

10492 try:

10493 str_list += ['reserved : ' + str(self.reserved)]

10494 except ValueError:

10495 str_list += ['reserved : <ValueError>']

10496 return '\n'.join(str_list)

10497 else:

10498 return ''

10499 @property

10500 def type(self):

10501 if self._pvt_ptr[0].type not in _dict_cudaExternalSemaphoreHandleType:

10502 return None

10503 return _dict_cudaExternalSemaphoreHandleType[self._pvt_ptr[0].type]

10504 @type.setter

10505 def type(self, type not None : cudaExternalSemaphoreHandleType):

10506 self._pvt_ptr[0].type = type.value

10507 @property

10508 def handle(self):

10509 return self._handle

10510 @handle.setter

10511 def handle(self, handle not None : anon_union3):

10512 string.memcpy(&self._pvt_ptr[0].handle, <cyruntime.anon_union3*><void_ptr>handle.getPtr(), sizeof(self._pvt_ptr[0].handle))

10513 @property

10514 def flags(self):

10515 return self._pvt_ptr[0].flags

10516 @flags.setter

10517 def flags(self, unsigned int flags):

10518 self._pvt_ptr[0].flags = flags

10519 @property

10520 def reserved(self):

10521 return self._pvt_ptr[0].reserved

10522 @reserved.setter

10523 def reserved(self, reserved):

10524 self._pvt_ptr[0].reserved = reserved

10525

10526cdef class anon_struct10:

10527 """

10528 Attributes

10529 ----------

10530 value : unsigned long long

10531

10532

10533 Methods

10534 -------

10535 getPtr()

10536 Get memory address of class instance

10537 """

10538 def __cinit__(self, void_ptr _ptr):

10539 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr

10540

10541 def __init__(self, void_ptr _ptr):

10542 pass

10543 def __dealloc__(self):

10544 pass

10545 def getPtr(self):

10546 return <void_ptr>&self._pvt_ptr[0].params.fence

10547 def __repr__(self):

10548 if self._pvt_ptr is not NULL:

10549 str_list = []

10550 try:

10551 str_list += ['value : ' + str(self.value)]

10552 except ValueError:

10553 str_list += ['value : <ValueError>']

10554 return '\n'.join(str_list)

10555 else:

10556 return ''

10557 @property

10558 def value(self):

10559 return self._pvt_ptr[0].params.fence.value

10560 @value.setter

10561 def value(self, unsigned long long value):

10562 self._pvt_ptr[0].params.fence.value = value

10563

10564cdef class anon_union4:

10565 """

10566 Attributes

10567 ----------

10568 fence : Any

10569

10570 reserved : unsigned long long

10571

10572

10573 Methods

10574 -------

10575 getPtr()

10576 Get memory address of class instance

10577 """

10578 def __cinit__(self, void_ptr _ptr):

10579 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr

10580

10581 def __init__(self, void_ptr _ptr):

10582 pass

10583 def __dealloc__(self):

10584 pass

10585 def getPtr(self):

10586 return <void_ptr>&self._pvt_ptr[0].params.nvSciSync

10587 def __repr__(self):

10588 if self._pvt_ptr is not NULL:

10589 str_list = []

10590 try:

10591 str_list += ['fence : ' + hex(self.fence)]

10592 except ValueError:

10593 str_list += ['fence : <ValueError>']

10594 try:

10595 str_list += ['reserved : ' + str(self.reserved)]

10596 except ValueError:

10597 str_list += ['reserved : <ValueError>']

10598 return '\n'.join(str_list)

10599 else:

10600 return ''

10601 @property

10602 def fence(self):

10603 return <void_ptr>self._pvt_ptr[0].params.nvSciSync.fence

10604 @fence.setter

10605 def fence(self, fence):

10606 _cfence = _HelperInputVoidPtr(fence)

10607 self._pvt_ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cfence.cptr

10608 @property

10609 def reserved(self):

10610 return self._pvt_ptr[0].params.nvSciSync.reserved

10611 @reserved.setter

10612 def reserved(self, unsigned long long reserved):

10613 self._pvt_ptr[0].params.nvSciSync.reserved = reserved

10614

10615cdef class anon_struct11:

10616 """

10617 Attributes

10618 ----------

10619 key : unsigned long long

10620

10621

10622 Methods

10623 -------

10624 getPtr()

10625 Get memory address of class instance

10626 """

10627 def __cinit__(self, void_ptr _ptr):

10628 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr

10629

10630 def __init__(self, void_ptr _ptr):

10631 pass

10632 def __dealloc__(self):

10633 pass

10634 def getPtr(self):

10635 return <void_ptr>&self._pvt_ptr[0].params.keyedMutex

10636 def __repr__(self):

10637 if self._pvt_ptr is not NULL:

10638 str_list = []

10639 try:

10640 str_list += ['key : ' + str(self.key)]

10641 except ValueError:

10642 str_list += ['key : <ValueError>']

10643 return '\n'.join(str_list)

10644 else:

10645 return ''

10646 @property

10647 def key(self):

10648 return self._pvt_ptr[0].params.keyedMutex.key

10649 @key.setter

10650 def key(self, unsigned long long key):

10651 self._pvt_ptr[0].params.keyedMutex.key = key

10652

10653cdef class anon_struct12:

10654 """

10655 Attributes

10656 ----------

10657 fence : anon_struct10

10658

10659 nvSciSync : anon_union4

10660

10661 keyedMutex : anon_struct11

10662

10663 reserved : list[unsigned int]

10664

10665

10666 Methods

10667 -------

10668 getPtr()

10669 Get memory address of class instance

10670 """

10671 def __cinit__(self, void_ptr _ptr):

10672 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr

10673

10674 def __init__(self, void_ptr _ptr):

10675 pass

10676 self._fence = anon_struct10(_ptr=<void_ptr>self._pvt_ptr)

10677 self._nvSciSync = anon_union4(_ptr=<void_ptr>self._pvt_ptr)

10678 self._keyedMutex = anon_struct11(_ptr=<void_ptr>self._pvt_ptr)

10679 def __dealloc__(self):

10680 pass

10681 def getPtr(self):

10682 return <void_ptr>&self._pvt_ptr[0].params

10683 def __repr__(self):

10684 if self._pvt_ptr is not NULL:

10685 str_list = []

10686 try:

10687 str_list += ['fence :\n' + '\n'.join([' ' + line for line in str(self.fence).splitlines()])]

10688 except ValueError:

10689 str_list += ['fence : <ValueError>']

10690 try:

10691 str_list += ['nvSciSync :\n' + '\n'.join([' ' + line for line in str(self.nvSciSync).splitlines()])]

10692 except ValueError:

10693 str_list += ['nvSciSync : <ValueError>']

10694 try:

10695 str_list += ['keyedMutex :\n' + '\n'.join([' ' + line for line in str(self.keyedMutex).splitlines()])]

10696 except ValueError:

10697 str_list += ['keyedMutex : <ValueError>']

10698 try:

10699 str_list += ['reserved : ' + str(self.reserved)]

10700 except ValueError:

10701 str_list += ['reserved : <ValueError>']

10702 return '\n'.join(str_list)

10703 else:

10704 return ''

10705 @property

10706 def fence(self):

10707 return self._fence

10708 @fence.setter

10709 def fence(self, fence not None : anon_struct10):

10710 string.memcpy(&self._pvt_ptr[0].params.fence, <cyruntime.anon_struct10*><void_ptr>fence.getPtr(), sizeof(self._pvt_ptr[0].params.fence))

10711 @property

10712 def nvSciSync(self):

10713 return self._nvSciSync

10714 @nvSciSync.setter

10715 def nvSciSync(self, nvSciSync not None : anon_union4):

10716 string.memcpy(&self._pvt_ptr[0].params.nvSciSync, <cyruntime.anon_union4*><void_ptr>nvSciSync.getPtr(), sizeof(self._pvt_ptr[0].params.nvSciSync))

10717 @property

10718 def keyedMutex(self):

10719 return self._keyedMutex

10720 @keyedMutex.setter

10721 def keyedMutex(self, keyedMutex not None : anon_struct11):

10722 string.memcpy(&self._pvt_ptr[0].params.keyedMutex, <cyruntime.anon_struct11*><void_ptr>keyedMutex.getPtr(), sizeof(self._pvt_ptr[0].params.keyedMutex))

10723 @property

10724 def reserved(self):

10725 return self._pvt_ptr[0].params.reserved

10726 @reserved.setter

10727 def reserved(self, reserved):

10728 self._pvt_ptr[0].params.reserved = reserved

10729

10730cdef class cudaExternalSemaphoreSignalParams:

10731 """

10732 External semaphore signal parameters, compatible with driver type

10733

10734 Attributes

10735 ----------

10736 params : anon_struct12

10737

10738 flags : unsigned int

10739 Only when cudaExternalSemaphoreSignalParams is used to signal a

10740 cudaExternalSemaphore_t of type

10741 cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is

10742 cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates

10743 that while signaling the cudaExternalSemaphore_t, no memory

10744 synchronization operations should be performed for any external

10745 memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For

10746 all other types of cudaExternalSemaphore_t, flags must be zero.

10747 reserved : list[unsigned int]

10748

10749

10750 Methods

10751 -------

10752 getPtr()

10753 Get memory address of class instance

10754 """

10755 def __cinit__(self, void_ptr _ptr = 0):

10756 if _ptr == 0:

10757 self._pvt_ptr = &self._pvt_val

10758 else:

10759 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr

10760 def __init__(self, void_ptr _ptr = 0):

10761 pass

10762 self._params = anon_struct12(_ptr=<void_ptr>self._pvt_ptr)

10763 def __dealloc__(self):

10764 pass

10765 def getPtr(self):

10766 return <void_ptr>self._pvt_ptr

10767 def __repr__(self):

10768 if self._pvt_ptr is not NULL:

10769 str_list = []

10770 try:

10771 str_list += ['params :\n' + '\n'.join([' ' + line for line in str(self.params).splitlines()])]

10772 except ValueError:

10773 str_list += ['params : <ValueError>']

10774 try:

10775 str_list += ['flags : ' + str(self.flags)]

10776 except ValueError:

10777 str_list += ['flags : <ValueError>']

10778 try:

10779 str_list += ['reserved : ' + str(self.reserved)]

10780 except ValueError:

10781 str_list += ['reserved : <ValueError>']

10782 return '\n'.join(str_list)

10783 else:

10784 return ''

10785 @property

10786 def params(self):

10787 return self._params

10788 @params.setter

10789 def params(self, params not None : anon_struct12):

10790 string.memcpy(&self._pvt_ptr[0].params, <cyruntime.anon_struct12*><void_ptr>params.getPtr(), sizeof(self._pvt_ptr[0].params))

10791 @property

10792 def flags(self):

10793 return self._pvt_ptr[0].flags

10794 @flags.setter

10795 def flags(self, unsigned int flags):

10796 self._pvt_ptr[0].flags = flags

10797 @property

10798 def reserved(self):

10799 return self._pvt_ptr[0].reserved

10800 @reserved.setter

10801 def reserved(self, reserved):

10802 self._pvt_ptr[0].reserved = reserved

10803

10804cdef class anon_struct13:

10805 """

10806 Attributes

10807 ----------

10808 value : unsigned long long

10809

10810

10811 Methods

10812 -------

10813 getPtr()

10814 Get memory address of class instance

10815 """

10816 def __cinit__(self, void_ptr _ptr):

10817 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr

10818

10819 def __init__(self, void_ptr _ptr):

10820 pass

10821 def __dealloc__(self):

10822 pass

10823 def getPtr(self):

10824 return <void_ptr>&self._pvt_ptr[0].params.fence

10825 def __repr__(self):

10826 if self._pvt_ptr is not NULL:

10827 str_list = []

10828 try:

10829 str_list += ['value : ' + str(self.value)]

10830 except ValueError:

10831 str_list += ['value : <ValueError>']

10832 return '\n'.join(str_list)

10833 else:

10834 return ''

10835 @property

10836 def value(self):

10837 return self._pvt_ptr[0].params.fence.value

10838 @value.setter

10839 def value(self, unsigned long long value):

10840 self._pvt_ptr[0].params.fence.value = value

10841

10842cdef class anon_union5:

10843 """

10844 Attributes

10845 ----------

10846 fence : Any

10847

10848 reserved : unsigned long long

10849

10850

10851 Methods

10852 -------

10853 getPtr()

10854 Get memory address of class instance

10855 """

10856 def __cinit__(self, void_ptr _ptr):

10857 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr

10858

10859 def __init__(self, void_ptr _ptr):

10860 pass

10861 def __dealloc__(self):

10862 pass

10863 def getPtr(self):

10864 return <void_ptr>&self._pvt_ptr[0].params.nvSciSync

10865 def __repr__(self):

10866 if self._pvt_ptr is not NULL:

10867 str_list = []

10868 try:

10869 str_list += ['fence : ' + hex(self.fence)]

10870 except ValueError:

10871 str_list += ['fence : <ValueError>']

10872 try:

10873 str_list += ['reserved : ' + str(self.reserved)]

10874 except ValueError:

10875 str_list += ['reserved : <ValueError>']

10876 return '\n'.join(str_list)

10877 else:

10878 return ''

10879 @property

10880 def fence(self):

10881 return <void_ptr>self._pvt_ptr[0].params.nvSciSync.fence

10882 @fence.setter

10883 def fence(self, fence):

10884 _cfence = _HelperInputVoidPtr(fence)

10885 self._pvt_ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cfence.cptr

10886 @property

10887 def reserved(self):

10888 return self._pvt_ptr[0].params.nvSciSync.reserved

10889 @reserved.setter

10890 def reserved(self, unsigned long long reserved):

10891 self._pvt_ptr[0].params.nvSciSync.reserved = reserved

10892

10893cdef class anon_struct14:

10894 """

10895 Attributes

10896 ----------

10897 key : unsigned long long

10898

10899 timeoutMs : unsigned int

10900

10901

10902 Methods

10903 -------

10904 getPtr()

10905 Get memory address of class instance

10906 """

10907 def __cinit__(self, void_ptr _ptr):

10908 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr

10909

10910 def __init__(self, void_ptr _ptr):

10911 pass

10912 def __dealloc__(self):

10913 pass

10914 def getPtr(self):

10915 return <void_ptr>&self._pvt_ptr[0].params.keyedMutex

10916 def __repr__(self):

10917 if self._pvt_ptr is not NULL:

10918 str_list = []

10919 try:

10920 str_list += ['key : ' + str(self.key)]

10921 except ValueError:

10922 str_list += ['key : <ValueError>']

10923 try:

10924 str_list += ['timeoutMs : ' + str(self.timeoutMs)]

10925 except ValueError:

10926 str_list += ['timeoutMs : <ValueError>']

10927 return '\n'.join(str_list)

10928 else:

10929 return ''

10930 @property

10931 def key(self):

10932 return self._pvt_ptr[0].params.keyedMutex.key

10933 @key.setter

10934 def key(self, unsigned long long key):

10935 self._pvt_ptr[0].params.keyedMutex.key = key

10936 @property

10937 def timeoutMs(self):

10938 return self._pvt_ptr[0].params.keyedMutex.timeoutMs

10939 @timeoutMs.setter

10940 def timeoutMs(self, unsigned int timeoutMs):

10941 self._pvt_ptr[0].params.keyedMutex.timeoutMs = timeoutMs

10942

10943cdef class anon_struct15:

10944 """

10945 Attributes

10946 ----------

10947 fence : anon_struct13

10948

10949 nvSciSync : anon_union5

10950

10951 keyedMutex : anon_struct14

10952

10953 reserved : list[unsigned int]

10954

10955

10956 Methods

10957 -------

10958 getPtr()

10959 Get memory address of class instance

10960 """

10961 def __cinit__(self, void_ptr _ptr):

10962 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr

10963

10964 def __init__(self, void_ptr _ptr):

10965 pass

10966 self._fence = anon_struct13(_ptr=<void_ptr>self._pvt_ptr)

10967 self._nvSciSync = anon_union5(_ptr=<void_ptr>self._pvt_ptr)

10968 self._keyedMutex = anon_struct14(_ptr=<void_ptr>self._pvt_ptr)

10969 def __dealloc__(self):

10970 pass

10971 def getPtr(self):

10972 return <void_ptr>&self._pvt_ptr[0].params

10973 def __repr__(self):

10974 if self._pvt_ptr is not NULL:

10975 str_list = []

10976 try:

10977 str_list += ['fence :\n' + '\n'.join([' ' + line for line in str(self.fence).splitlines()])]

10978 except ValueError:

10979 str_list += ['fence : <ValueError>']

10980 try:

10981 str_list += ['nvSciSync :\n' + '\n'.join([' ' + line for line in str(self.nvSciSync).splitlines()])]

10982 except ValueError:

10983 str_list += ['nvSciSync : <ValueError>']

10984 try:

10985 str_list += ['keyedMutex :\n' + '\n'.join([' ' + line for line in str(self.keyedMutex).splitlines()])]

10986 except ValueError:

10987 str_list += ['keyedMutex : <ValueError>']

10988 try:

10989 str_list += ['reserved : ' + str(self.reserved)]

10990 except ValueError:

10991 str_list += ['reserved : <ValueError>']

10992 return '\n'.join(str_list)

10993 else:

10994 return ''

10995 @property

10996 def fence(self):

10997 return self._fence

10998 @fence.setter

10999 def fence(self, fence not None : anon_struct13):

11000 string.memcpy(&self._pvt_ptr[0].params.fence, <cyruntime.anon_struct13*><void_ptr>fence.getPtr(), sizeof(self._pvt_ptr[0].params.fence))

11001 @property

11002 def nvSciSync(self):

11003 return self._nvSciSync

11004 @nvSciSync.setter

11005 def nvSciSync(self, nvSciSync not None : anon_union5):

11006 string.memcpy(&self._pvt_ptr[0].params.nvSciSync, <cyruntime.anon_union5*><void_ptr>nvSciSync.getPtr(), sizeof(self._pvt_ptr[0].params.nvSciSync))

11007 @property

11008 def keyedMutex(self):

11009 return self._keyedMutex

11010 @keyedMutex.setter

11011 def keyedMutex(self, keyedMutex not None : anon_struct14):

11012 string.memcpy(&self._pvt_ptr[0].params.keyedMutex, <cyruntime.anon_struct14*><void_ptr>keyedMutex.getPtr(), sizeof(self._pvt_ptr[0].params.keyedMutex))

11013 @property

11014 def reserved(self):

11015 return self._pvt_ptr[0].params.reserved

11016 @reserved.setter

11017 def reserved(self, reserved):

11018 self._pvt_ptr[0].params.reserved = reserved

11019

11020cdef class cudaExternalSemaphoreWaitParams:

11021 """

11022 External semaphore wait parameters, compatible with driver type

11023

11024 Attributes

11025 ----------

11026 params : anon_struct15

11027

11028 flags : unsigned int

11029 Only when cudaExternalSemaphoreSignalParams is used to signal a

11030 cudaExternalSemaphore_t of type

11031 cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is

11032 cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates

11033 that while waiting for the cudaExternalSemaphore_t, no memory

11034 synchronization operations should be performed for any external

11035 memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For

11036 all other types of cudaExternalSemaphore_t, flags must be zero.

11037 reserved : list[unsigned int]

11038

11039

11040 Methods

11041 -------

11042 getPtr()

11043 Get memory address of class instance

11044 """

11045 def __cinit__(self, void_ptr _ptr = 0):

11046 if _ptr == 0:

11047 self._pvt_ptr = &self._pvt_val

11048 else:

11049 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr

11050 def __init__(self, void_ptr _ptr = 0):

11051 pass

11052 self._params = anon_struct15(_ptr=<void_ptr>self._pvt_ptr)

11053 def __dealloc__(self):

11054 pass

11055 def getPtr(self):

11056 return <void_ptr>self._pvt_ptr

11057 def __repr__(self):

11058 if self._pvt_ptr is not NULL:

11059 str_list = []

11060 try:

11061 str_list += ['params :\n' + '\n'.join([' ' + line for line in str(self.params).splitlines()])]

11062 except ValueError:

11063 str_list += ['params : <ValueError>']

11064 try:

11065 str_list += ['flags : ' + str(self.flags)]

11066 except ValueError:

11067 str_list += ['flags : <ValueError>']

11068 try:

11069 str_list += ['reserved : ' + str(self.reserved)]

11070 except ValueError:

11071 str_list += ['reserved : <ValueError>']

11072 return '\n'.join(str_list)

11073 else:

11074 return ''

11075 @property

11076 def params(self):

11077 return self._params

11078 @params.setter

11079 def params(self, params not None : anon_struct15):

11080 string.memcpy(&self._pvt_ptr[0].params, <cyruntime.anon_struct15*><void_ptr>params.getPtr(), sizeof(self._pvt_ptr[0].params))

11081 @property

11082 def flags(self):

11083 return self._pvt_ptr[0].flags

11084 @flags.setter

11085 def flags(self, unsigned int flags):

11086 self._pvt_ptr[0].flags = flags

11087 @property

11088 def reserved(self):

11089 return self._pvt_ptr[0].reserved

11090 @reserved.setter

11091 def reserved(self, reserved):

11092 self._pvt_ptr[0].reserved = reserved

11093

11094cdef class cudaDevSmResource:

11095 """

11096 Data for SM-related resources All parameters in this structure are

11097 OUTPUT only. Do not write to any of the fields in this structure.

11098

11099 Attributes

11100 ----------

11101 smCount : unsigned int

11102 The amount of streaming multiprocessors available in this resource.

11103 minSmPartitionSize : unsigned int

11104 The minimum number of streaming multiprocessors required to

11105 partition this resource.

11106 smCoscheduledAlignment : unsigned int

11107 The number of streaming multiprocessors in this resource that are

11108 guaranteed to be co-scheduled on the same GPU processing cluster.

11109 smCount will be a multiple of this value, unless the backfill flag

11110 is set.

11111 flags : unsigned int

11112 The flags set on this SM resource. For available flags see

11113 ::cudaDevSmResourceGroup_flags.

11114

11115 Methods

11116 -------

11117 getPtr()

11118 Get memory address of class instance

11119 """

11120 def __cinit__(self, void_ptr _ptr = 0):

11121 if _ptr == 0:

11122 self._pvt_ptr = &self._pvt_val

11123 else:

11124 self._pvt_ptr = <cyruntime.cudaDevSmResource *>_ptr

11125 def __init__(self, void_ptr _ptr = 0):

11126 pass

11127 def __dealloc__(self):

11128 pass

11129 def getPtr(self):

11130 return <void_ptr>self._pvt_ptr

11131 def __repr__(self):

11132 if self._pvt_ptr is not NULL:

11133 str_list = []

11134 try:

11135 str_list += ['smCount : ' + str(self.smCount)]

11136 except ValueError:

11137 str_list += ['smCount : <ValueError>']

11138 try:

11139 str_list += ['minSmPartitionSize : ' + str(self.minSmPartitionSize)]

11140 except ValueError:

11141 str_list += ['minSmPartitionSize : <ValueError>']

11142 try:

11143 str_list += ['smCoscheduledAlignment : ' + str(self.smCoscheduledAlignment)]

11144 except ValueError:

11145 str_list += ['smCoscheduledAlignment : <ValueError>']

11146 try:

11147 str_list += ['flags : ' + str(self.flags)]

11148 except ValueError:

11149 str_list += ['flags : <ValueError>']

11150 return '\n'.join(str_list)

11151 else:

11152 return ''

11153 @property

11154 def smCount(self):

11155 return self._pvt_ptr[0].smCount

11156 @smCount.setter

11157 def smCount(self, unsigned int smCount):

11158 self._pvt_ptr[0].smCount = smCount

11159 @property

11160 def minSmPartitionSize(self):

11161 return self._pvt_ptr[0].minSmPartitionSize

11162 @minSmPartitionSize.setter

11163 def minSmPartitionSize(self, unsigned int minSmPartitionSize):

11164 self._pvt_ptr[0].minSmPartitionSize = minSmPartitionSize

11165 @property

11166 def smCoscheduledAlignment(self):

11167 return self._pvt_ptr[0].smCoscheduledAlignment

11168 @smCoscheduledAlignment.setter

11169 def smCoscheduledAlignment(self, unsigned int smCoscheduledAlignment):

11170 self._pvt_ptr[0].smCoscheduledAlignment = smCoscheduledAlignment

11171 @property

11172 def flags(self):

11173 return self._pvt_ptr[0].flags

11174 @flags.setter

11175 def flags(self, unsigned int flags):

11176 self._pvt_ptr[0].flags = flags

11177

11178cdef class cudaDevWorkqueueConfigResource:

11179 """

11180 Data for workqueue configuration related resources

11181

11182 Attributes

11183 ----------

11184 device : int

11185 The device on which the workqueue resources are available

11186 wqConcurrencyLimit : unsigned int

11187 The expected maximum number of concurrent stream-ordered workloads

11188 sharingScope : cudaDevWorkqueueConfigScope

11189 The sharing scope for the workqueue resources

11190

11191 Methods

11192 -------

11193 getPtr()

11194 Get memory address of class instance

11195 """

11196 def __cinit__(self, void_ptr _ptr = 0):

11197 if _ptr == 0:

11198 self._pvt_ptr = &self._pvt_val

11199 else:

11200 self._pvt_ptr = <cyruntime.cudaDevWorkqueueConfigResource *>_ptr

11201 def __init__(self, void_ptr _ptr = 0):

11202 pass

11203 def __dealloc__(self):

11204 pass

11205 def getPtr(self):

11206 return <void_ptr>self._pvt_ptr

11207 def __repr__(self):

11208 if self._pvt_ptr is not NULL:

11209 str_list = []

11210 try:

11211 str_list += ['device : ' + str(self.device)]

11212 except ValueError:

11213 str_list += ['device : <ValueError>']

11214 try:

11215 str_list += ['wqConcurrencyLimit : ' + str(self.wqConcurrencyLimit)]

11216 except ValueError:

11217 str_list += ['wqConcurrencyLimit : <ValueError>']

11218 try:

11219 str_list += ['sharingScope : ' + str(self.sharingScope)]

11220 except ValueError:

11221 str_list += ['sharingScope : <ValueError>']

11222 return '\n'.join(str_list)

11223 else:

11224 return ''

11225 @property

11226 def device(self):

11227 return self._pvt_ptr[0].device

11228 @device.setter

11229 def device(self, int device):

11230 self._pvt_ptr[0].device = device

11231 @property

11232 def wqConcurrencyLimit(self):

11233 return self._pvt_ptr[0].wqConcurrencyLimit

11234 @wqConcurrencyLimit.setter

11235 def wqConcurrencyLimit(self, unsigned int wqConcurrencyLimit):

11236 self._pvt_ptr[0].wqConcurrencyLimit = wqConcurrencyLimit

11237 @property

11238 def sharingScope(self):

11239 if self._pvt_ptr[0].sharingScope not in _dict_cudaDevWorkqueueConfigScope:

11240 return None

11241 return _dict_cudaDevWorkqueueConfigScope[self._pvt_ptr[0].sharingScope]

11242 @sharingScope.setter

11243 def sharingScope(self, sharingScope not None : cudaDevWorkqueueConfigScope):

11244 self._pvt_ptr[0].sharingScope = sharingScope.value

11245

11246cdef class cudaDevWorkqueueResource:

11247 """

11248 Handle to a pre-existing workqueue related resource

11249

11250 Attributes

11251 ----------

11252 reserved : bytes

11253 Reserved for future use

11254

11255 Methods

11256 -------

11257 getPtr()

11258 Get memory address of class instance

11259 """

11260 def __cinit__(self, void_ptr _ptr = 0):

11261 if _ptr == 0:

11262 self._pvt_ptr = &self._pvt_val

11263 else:

11264 self._pvt_ptr = <cyruntime.cudaDevWorkqueueResource *>_ptr

11265 def __init__(self, void_ptr _ptr = 0):

11266 pass

11267 def __dealloc__(self):

11268 pass

11269 def getPtr(self):

11270 return <void_ptr>self._pvt_ptr

11271 def __repr__(self):

11272 if self._pvt_ptr is not NULL:

11273 str_list = []

11274 try:

11275 str_list += ['reserved : ' + str(self.reserved)]

11276 except ValueError:

11277 str_list += ['reserved : <ValueError>']

11278 return '\n'.join(str_list)

11279 else:

11280 return ''

11281 @property

11282 def reserved(self):

11283 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 40)

11284 @reserved.setter

11285 def reserved(self, reserved):

11286 if len(reserved) != 40:

11287 raise ValueError("reserved length must be 40, is " + str(len(reserved)))

11288 for i, b in enumerate(reserved):

11289 self._pvt_ptr[0].reserved[i] = b

11290

11291cdef class cudaDevSmResourceGroupParams_st:

11292 """

11293 Input data for splitting SMs

11294

11295 Attributes

11296 ----------

11297 smCount : unsigned int

11298 The amount of SMs available in this resource.

11299 coscheduledSmCount : unsigned int

11300 The amount of co-scheduled SMs grouped together for locality

11301 purposes.

11302 preferredCoscheduledSmCount : unsigned int

11303 When possible, combine co-scheduled groups together into larger

11304 groups of this size.

11305 flags : unsigned int

11306 Combination of `cudaDevSmResourceGroup_flags` values to indicate

11307 this this group is created.

11308 reserved : list[unsigned int]

11309 Reserved for future use - ensure this is is zero initialized.

11310

11311 Methods

11312 -------

11313 getPtr()

11314 Get memory address of class instance

11315 """

11316 def __cinit__(self, void_ptr _ptr = 0):

11317 if _ptr == 0:

11318 self._pvt_ptr = &self._pvt_val

11319 else:

11320 self._pvt_ptr = <cyruntime.cudaDevSmResourceGroupParams_st *>_ptr

11321 def __init__(self, void_ptr _ptr = 0):

11322 pass

11323 def __dealloc__(self):

11324 pass

11325 def getPtr(self):

11326 return <void_ptr>self._pvt_ptr

11327 def __repr__(self):

11328 if self._pvt_ptr is not NULL:

11329 str_list = []

11330 try:

11331 str_list += ['smCount : ' + str(self.smCount)]

11332 except ValueError:

11333 str_list += ['smCount : <ValueError>']

11334 try:

11335 str_list += ['coscheduledSmCount : ' + str(self.coscheduledSmCount)]

11336 except ValueError:

11337 str_list += ['coscheduledSmCount : <ValueError>']

11338 try:

11339 str_list += ['preferredCoscheduledSmCount : ' + str(self.preferredCoscheduledSmCount)]

11340 except ValueError:

11341 str_list += ['preferredCoscheduledSmCount : <ValueError>']

11342 try:

11343 str_list += ['flags : ' + str(self.flags)]

11344 except ValueError:

11345 str_list += ['flags : <ValueError>']

11346 try:

11347 str_list += ['reserved : ' + str(self.reserved)]

11348 except ValueError:

11349 str_list += ['reserved : <ValueError>']

11350 return '\n'.join(str_list)

11351 else:

11352 return ''

11353 @property

11354 def smCount(self):

11355 return self._pvt_ptr[0].smCount

11356 @smCount.setter

11357 def smCount(self, unsigned int smCount):

11358 self._pvt_ptr[0].smCount = smCount

11359 @property

11360 def coscheduledSmCount(self):

11361 return self._pvt_ptr[0].coscheduledSmCount

11362 @coscheduledSmCount.setter

11363 def coscheduledSmCount(self, unsigned int coscheduledSmCount):

11364 self._pvt_ptr[0].coscheduledSmCount = coscheduledSmCount

11365 @property

11366 def preferredCoscheduledSmCount(self):

11367 return self._pvt_ptr[0].preferredCoscheduledSmCount

11368 @preferredCoscheduledSmCount.setter

11369 def preferredCoscheduledSmCount(self, unsigned int preferredCoscheduledSmCount):

11370 self._pvt_ptr[0].preferredCoscheduledSmCount = preferredCoscheduledSmCount

11371 @property

11372 def flags(self):

11373 return self._pvt_ptr[0].flags

11374 @flags.setter

11375 def flags(self, unsigned int flags):

11376 self._pvt_ptr[0].flags = flags

11377 @property

11378 def reserved(self):

11379 return self._pvt_ptr[0].reserved

11380 @reserved.setter

11381 def reserved(self, reserved):

11382 self._pvt_ptr[0].reserved = reserved

11383

11384cdef class cudaDevResource_st:

11385 """

11386 A tagged union describing different resources identified by the

11387 type field. This structure should not be directly modified outside

11388 of the API that created it. struct enumcudaDevResourceTypetype;

11389 union structcudaDevSmResourcesm;

11390 structcudaDevWorkqueueConfigResourcewqConfig;

11391 structcudaDevWorkqueueResourcewq; ; ; - If `typename` is

11392 `cudaDevResourceTypeInvalid`, this resoure is not valid and cannot

11393 be further accessed. - If `typename` is `cudaDevResourceTypeSm`,

11394 the cudaDevSmResource structure `sm` is filled in. For example,

11395 `sm.smCount` will reflect the amount of streaming multiprocessors

11396 available in this resource. - If `typename` is

11397 `cudaDevResourceTypeWorkqueueConfig`, the

11398 cudaDevWorkqueueConfigResource structure `wqConfig` is filled in.

11399 - If `typename` is `cudaDevResourceTypeWorkqueue`, the

11400 cudaDevWorkqueueResource structure `wq` is filled in.

11401

11402 Attributes

11403 ----------

11404 type : cudaDevResourceType

11405 Type of resource, dictates which union field was last set

11406 _internal_padding : bytes

11407

11408 sm : cudaDevSmResource

11409 Resource corresponding to cudaDevResourceTypeSm `typename`.

11410 wqConfig : cudaDevWorkqueueConfigResource

11411 Resource corresponding to cudaDevResourceTypeWorkqueueConfig

11412 `typename`.

11413 wq : cudaDevWorkqueueResource

11414 Resource corresponding to cudaDevResourceTypeWorkqueue `typename`.

11415 _oversize : bytes

11416

11417 nextResource : cudaDevResource_st

11418

11419

11420 Methods

11421 -------

11422 getPtr()

11423 Get memory address of class instance

11424 """

11425 def __cinit__(self, void_ptr _ptr = 0):

11426 if _ptr == 0:

11427 self._val_ptr = <cyruntime.cudaDevResource_st *>calloc(1, sizeof(cyruntime.cudaDevResource_st))

11428 self._pvt_ptr = self._val_ptr

11429 else:

11430 self._pvt_ptr = <cyruntime.cudaDevResource_st *>_ptr

11431 def __init__(self, void_ptr _ptr = 0):

11432 pass

11433 self._sm = cudaDevSmResource(_ptr=<void_ptr>&self._pvt_ptr[0].sm)

11434 self._wqConfig = cudaDevWorkqueueConfigResource(_ptr=<void_ptr>&self._pvt_ptr[0].wqConfig)

11435 self._wq = cudaDevWorkqueueResource(_ptr=<void_ptr>&self._pvt_ptr[0].wq)

11436 def __dealloc__(self):

11437 if self._val_ptr is not NULL:

11438 free(self._val_ptr)

11439 if self._nextResource is not NULL:

11440 free(self._nextResource)

11441 def getPtr(self):

11442 return <void_ptr>self._pvt_ptr

11443 def __repr__(self):

11444 if self._pvt_ptr is not NULL:

11445 str_list = []

11446 try:

11447 str_list += ['type : ' + str(self.type)]

11448 except ValueError:

11449 str_list += ['type : <ValueError>']

11450 try:

11451 str_list += ['_internal_padding : ' + str(self._internal_padding)]

11452 except ValueError:

11453 str_list += ['_internal_padding : <ValueError>']

11454 try:

11455 str_list += ['sm :\n' + '\n'.join([' ' + line for line in str(self.sm).splitlines()])]

11456 except ValueError:

11457 str_list += ['sm : <ValueError>']

11458 try:

11459 str_list += ['wqConfig :\n' + '\n'.join([' ' + line for line in str(self.wqConfig).splitlines()])]

11460 except ValueError:

11461 str_list += ['wqConfig : <ValueError>']

11462 try:

11463 str_list += ['wq :\n' + '\n'.join([' ' + line for line in str(self.wq).splitlines()])]

11464 except ValueError:

11465 str_list += ['wq : <ValueError>']

11466 try:

11467 str_list += ['_oversize : ' + str(self._oversize)]

11468 except ValueError:

11469 str_list += ['_oversize : <ValueError>']

11470 try:

11471 str_list += ['nextResource : ' + str(self.nextResource)]

11472 except ValueError:

11473 str_list += ['nextResource : <ValueError>']

11474 return '\n'.join(str_list)

11475 else:

11476 return ''

11477 @property

11478 def type(self):

11479 if self._pvt_ptr[0].type not in _dict_cudaDevResourceType:

11480 return None

11481 return _dict_cudaDevResourceType[self._pvt_ptr[0].type]

11482 @type.setter

11483 def type(self, type not None : cudaDevResourceType):

11484 self._pvt_ptr[0].type = type.value

11485 @property

11486 def _internal_padding(self):

11487 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0]._internal_padding, 92)

11488 @_internal_padding.setter

11489 def _internal_padding(self, _internal_padding):

11490 if len(_internal_padding) != 92:

11491 raise ValueError("_internal_padding length must be 92, is " + str(len(_internal_padding)))

11492 for i, b in enumerate(_internal_padding):

11493 self._pvt_ptr[0]._internal_padding[i] = b

11494 @property

11495 def sm(self):

11496 return self._sm

11497 @sm.setter

11498 def sm(self, sm not None : cudaDevSmResource):

11499 string.memcpy(&self._pvt_ptr[0].sm, <cyruntime.cudaDevSmResource*><void_ptr>sm.getPtr(), sizeof(self._pvt_ptr[0].sm))

11500 @property

11501 def wqConfig(self):

11502 return self._wqConfig

11503 @wqConfig.setter

11504 def wqConfig(self, wqConfig not None : cudaDevWorkqueueConfigResource):

11505 string.memcpy(&self._pvt_ptr[0].wqConfig, <cyruntime.cudaDevWorkqueueConfigResource*><void_ptr>wqConfig.getPtr(), sizeof(self._pvt_ptr[0].wqConfig))

11506 @property

11507 def wq(self):

11508 return self._wq

11509 @wq.setter

11510 def wq(self, wq not None : cudaDevWorkqueueResource):

11511 string.memcpy(&self._pvt_ptr[0].wq, <cyruntime.cudaDevWorkqueueResource*><void_ptr>wq.getPtr(), sizeof(self._pvt_ptr[0].wq))

11512 @property

11513 def _oversize(self):

11514 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0]._oversize, 40)

11515 @_oversize.setter

11516 def _oversize(self, _oversize):

11517 if len(_oversize) != 40:

11518 raise ValueError("_oversize length must be 40, is " + str(len(_oversize)))

11519 for i, b in enumerate(_oversize):

11520 self._pvt_ptr[0]._oversize[i] = b

11521 @property

11522 def nextResource(self):

11523 arrs = [<void_ptr>self._pvt_ptr[0].nextResource + x*sizeof(cyruntime.cudaDevResource_st) for x in range(self._nextResource_length)]

11524 return [cudaDevResource_st(_ptr=arr) for arr in arrs]

11525 @nextResource.setter

11526 def nextResource(self, val):

11527 if len(val) == 0:

11528 free(self._nextResource)

11529 self._nextResource_length = 0

11530 self._pvt_ptr[0].nextResource = NULL

11531 else:

11532 if self._nextResource_length != <size_t>len(val):

11533 free(self._nextResource)

11534 self._nextResource = <cyruntime.cudaDevResource_st*> calloc(len(val), sizeof(cyruntime.cudaDevResource_st))

11535 if self._nextResource is NULL:

11536 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaDevResource_st)))

11537 self._nextResource_length = <size_t>len(val)

11538 self._pvt_ptr[0].nextResource = self._nextResource

11539 for idx in range(len(val)):

11540 string.memcpy(&self._nextResource[idx], (<cudaDevResource_st>val[idx])._pvt_ptr, sizeof(cyruntime.cudaDevResource_st))

11541

11542

11543cdef class cudalibraryHostUniversalFunctionAndDataTable:

11544 """

11545 Attributes

11546 ----------

11547 functionTable : Any

11548

11549 functionWindowSize : size_t

11550

11551 dataTable : Any

11552

11553 dataWindowSize : size_t

11554

11555

11556 Methods

11557 -------

11558 getPtr()

11559 Get memory address of class instance

11560 """

11561 def __cinit__(self, void_ptr _ptr = 0):

11562 if _ptr == 0:

11563 self._pvt_ptr = &self._pvt_val

11564 else:

11565 self._pvt_ptr = <cyruntime.cudalibraryHostUniversalFunctionAndDataTable *>_ptr

11566 def __init__(self, void_ptr _ptr = 0):

11567 pass

11568 def __dealloc__(self):

11569 pass

11570 def getPtr(self):

11571 return <void_ptr>self._pvt_ptr

11572 def __repr__(self):

11573 if self._pvt_ptr is not NULL:

11574 str_list = []

11575 try:

11576 str_list += ['functionTable : ' + hex(self.functionTable)]

11577 except ValueError:

11578 str_list += ['functionTable : <ValueError>']

11579 try:

11580 str_list += ['functionWindowSize : ' + str(self.functionWindowSize)]

11581 except ValueError:

11582 str_list += ['functionWindowSize : <ValueError>']

11583 try:

11584 str_list += ['dataTable : ' + hex(self.dataTable)]

11585 except ValueError:

11586 str_list += ['dataTable : <ValueError>']

11587 try:

11588 str_list += ['dataWindowSize : ' + str(self.dataWindowSize)]

11589 except ValueError:

11590 str_list += ['dataWindowSize : <ValueError>']

11591 return '\n'.join(str_list)

11592 else:

11593 return ''

11594 @property

11595 def functionTable(self):

11596 return <void_ptr>self._pvt_ptr[0].functionTable

11597 @functionTable.setter

11598 def functionTable(self, functionTable):

11599 _cfunctionTable = _HelperInputVoidPtr(functionTable)

11600 self._pvt_ptr[0].functionTable = <void*><void_ptr>_cfunctionTable.cptr

11601 @property

11602 def functionWindowSize(self):

11603 return self._pvt_ptr[0].functionWindowSize

11604 @functionWindowSize.setter

11605 def functionWindowSize(self, size_t functionWindowSize):

11606 self._pvt_ptr[0].functionWindowSize = functionWindowSize

11607 @property

11608 def dataTable(self):

11609 return <void_ptr>self._pvt_ptr[0].dataTable

11610 @dataTable.setter

11611 def dataTable(self, dataTable):

11612 _cdataTable = _HelperInputVoidPtr(dataTable)

11613 self._pvt_ptr[0].dataTable = <void*><void_ptr>_cdataTable.cptr

11614 @property

11615 def dataWindowSize(self):

11616 return self._pvt_ptr[0].dataWindowSize

11617 @dataWindowSize.setter

11618 def dataWindowSize(self, size_t dataWindowSize):

11619 self._pvt_ptr[0].dataWindowSize = dataWindowSize

11620

11621cdef class cudaKernelNodeParams:

11622 """

11623 CUDA GPU kernel node parameters

11624

11625 Attributes

11626 ----------

11627 func : Any

11628 Kernel to launch

11629 gridDim : dim3

11630 Grid dimensions

11631 blockDim : dim3

11632 Block dimensions

11633 sharedMemBytes : unsigned int

11634 Dynamic shared-memory size per thread block in bytes

11635 kernelParams : Any

11636 Array of pointers to individual kernel arguments

11637 extra : Any

11638 Pointer to kernel arguments in the "extra" format

11639

11640 Methods

11641 -------

11642 getPtr()

11643 Get memory address of class instance

11644 """

11645 def __cinit__(self, void_ptr _ptr = 0):

11646 if _ptr == 0:

11647 self._pvt_ptr = &self._pvt_val

11648 else:

11649 self._pvt_ptr = <cyruntime.cudaKernelNodeParams *>_ptr

11650 def __init__(self, void_ptr _ptr = 0):

11651 pass

11652 self._gridDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].gridDim)

11653 self._blockDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].blockDim)

11654 def __dealloc__(self):

11655 pass

11656 def getPtr(self):

11657 return <void_ptr>self._pvt_ptr

11658 def __repr__(self):

11659 if self._pvt_ptr is not NULL:

11660 str_list = []

11661 try:

11662 str_list += ['func : ' + hex(self.func)]

11663 except ValueError:

11664 str_list += ['func : <ValueError>']

11665 try:

11666 str_list += ['gridDim :\n' + '\n'.join([' ' + line for line in str(self.gridDim).splitlines()])]

11667 except ValueError:

11668 str_list += ['gridDim : <ValueError>']

11669 try:

11670 str_list += ['blockDim :\n' + '\n'.join([' ' + line for line in str(self.blockDim).splitlines()])]

11671 except ValueError:

11672 str_list += ['blockDim : <ValueError>']

11673 try:

11674 str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]

11675 except ValueError:

11676 str_list += ['sharedMemBytes : <ValueError>']

11677 try:

11678 str_list += ['kernelParams : ' + str(self.kernelParams)]

11679 except ValueError:

11680 str_list += ['kernelParams : <ValueError>']

11681 try:

11682 str_list += ['extra : ' + str(self.extra)]

11683 except ValueError:

11684 str_list += ['extra : <ValueError>']

11685 return '\n'.join(str_list)

11686 else:

11687 return ''

11688 @property

11689 def func(self):

11690 return <void_ptr>self._pvt_ptr[0].func

11691 @func.setter

11692 def func(self, func):

11693 _cfunc = _HelperInputVoidPtr(func)

11694 self._pvt_ptr[0].func = <void*><void_ptr>_cfunc.cptr

11695 @property

11696 def gridDim(self):

11697 return self._gridDim

11698 @gridDim.setter

11699 def gridDim(self, gridDim not None : dim3):

11700 string.memcpy(&self._pvt_ptr[0].gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._pvt_ptr[0].gridDim))

11701 @property

11702 def blockDim(self):

11703 return self._blockDim

11704 @blockDim.setter

11705 def blockDim(self, blockDim not None : dim3):

11706 string.memcpy(&self._pvt_ptr[0].blockDim, <cyruntime.dim3*><void_ptr>blockDim.getPtr(), sizeof(self._pvt_ptr[0].blockDim))

11707 @property

11708 def sharedMemBytes(self):

11709 return self._pvt_ptr[0].sharedMemBytes

11710 @sharedMemBytes.setter

11711 def sharedMemBytes(self, unsigned int sharedMemBytes):

11712 self._pvt_ptr[0].sharedMemBytes = sharedMemBytes

11713 @property

11714 def kernelParams(self):

11715 return <void_ptr>self._pvt_ptr[0].kernelParams

11716 @kernelParams.setter

11717 def kernelParams(self, kernelParams):

11718 self._cykernelParams = _HelperKernelParams(kernelParams)

11719 self._pvt_ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams

11720 @property

11721 def extra(self):

11722 return <void_ptr>self._pvt_ptr[0].extra

11723 @extra.setter

11724 def extra(self, void_ptr extra):

11725 self._pvt_ptr[0].extra = <void**>extra

11726

11727cdef class cudaKernelNodeParamsV2:

11728 """

11729 CUDA GPU kernel node parameters

11730

11731 Attributes

11732 ----------

11733 func : Any

11734 Kernel to launch

11735 gridDim : dim3

11736 Grid dimensions

11737 blockDim : dim3

11738 Block dimensions

11739 sharedMemBytes : unsigned int

11740 Dynamic shared-memory size per thread block in bytes

11741 kernelParams : Any

11742 Array of pointers to individual kernel arguments

11743 extra : Any

11744 Pointer to kernel arguments in the "extra" format

11745 ctx : cudaExecutionContext_t

11746 Context in which to run the kernel. If NULL will try to use the

11747 current context.

11748

11749 Methods

11750 -------

11751 getPtr()

11752 Get memory address of class instance

11753 """

11754 def __cinit__(self, void_ptr _ptr = 0):

11755 if _ptr == 0:

11756 self._pvt_ptr = &self._pvt_val

11757 else:

11758 self._pvt_ptr = <cyruntime.cudaKernelNodeParamsV2 *>_ptr

11759 def __init__(self, void_ptr _ptr = 0):

11760 pass

11761 self._gridDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].gridDim)

11762 self._blockDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].blockDim)

11763 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)

11764 def __dealloc__(self):

11765 pass

11766 def getPtr(self):

11767 return <void_ptr>self._pvt_ptr

11768 def __repr__(self):

11769 if self._pvt_ptr is not NULL:

11770 str_list = []

11771 try:

11772 str_list += ['func : ' + hex(self.func)]

11773 except ValueError:

11774 str_list += ['func : <ValueError>']

11775 try:

11776 str_list += ['gridDim :\n' + '\n'.join([' ' + line for line in str(self.gridDim).splitlines()])]

11777 except ValueError:

11778 str_list += ['gridDim : <ValueError>']

11779 try:

11780 str_list += ['blockDim :\n' + '\n'.join([' ' + line for line in str(self.blockDim).splitlines()])]

11781 except ValueError:

11782 str_list += ['blockDim : <ValueError>']

11783 try:

11784 str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]

11785 except ValueError:

11786 str_list += ['sharedMemBytes : <ValueError>']

11787 try:

11788 str_list += ['kernelParams : ' + str(self.kernelParams)]

11789 except ValueError:

11790 str_list += ['kernelParams : <ValueError>']

11791 try:

11792 str_list += ['extra : ' + str(self.extra)]

11793 except ValueError:

11794 str_list += ['extra : <ValueError>']

11795 try:

11796 str_list += ['ctx : ' + str(self.ctx)]

11797 except ValueError:

11798 str_list += ['ctx : <ValueError>']

11799 return '\n'.join(str_list)

11800 else:

11801 return ''

11802 @property

11803 def func(self):

11804 return <void_ptr>self._pvt_ptr[0].func

11805 @func.setter

11806 def func(self, func):

11807 _cfunc = _HelperInputVoidPtr(func)

11808 self._pvt_ptr[0].func = <void*><void_ptr>_cfunc.cptr

11809 @property

11810 def gridDim(self):

11811 return self._gridDim

11812 @gridDim.setter

11813 def gridDim(self, gridDim not None : dim3):

11814 string.memcpy(&self._pvt_ptr[0].gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._pvt_ptr[0].gridDim))

11815 @property

11816 def blockDim(self):

11817 return self._blockDim

11818 @blockDim.setter

11819 def blockDim(self, blockDim not None : dim3):

11820 string.memcpy(&self._pvt_ptr[0].blockDim, <cyruntime.dim3*><void_ptr>blockDim.getPtr(), sizeof(self._pvt_ptr[0].blockDim))

11821 @property

11822 def sharedMemBytes(self):

11823 return self._pvt_ptr[0].sharedMemBytes

11824 @sharedMemBytes.setter

11825 def sharedMemBytes(self, unsigned int sharedMemBytes):

11826 self._pvt_ptr[0].sharedMemBytes = sharedMemBytes

11827 @property

11828 def kernelParams(self):

11829 return <void_ptr>self._pvt_ptr[0].kernelParams

11830 @kernelParams.setter

11831 def kernelParams(self, kernelParams):

11832 self._cykernelParams = _HelperKernelParams(kernelParams)

11833 self._pvt_ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams

11834 @property

11835 def extra(self):

11836 return <void_ptr>self._pvt_ptr[0].extra

11837 @extra.setter

11838 def extra(self, void_ptr extra):

11839 self._pvt_ptr[0].extra = <void**>extra

11840 @property

11841 def ctx(self):

11842 return self._ctx

11843 @ctx.setter

11844 def ctx(self, ctx):

11845 cdef cyruntime.cudaExecutionContext_t cyctx

11846 if ctx is None:

11847 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0

11848 elif isinstance(ctx, (cudaExecutionContext_t,)):

11849 pctx = int(ctx)

11850 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

11851 else:

11852 pctx = int(cudaExecutionContext_t(ctx))

11853 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

11854 self._ctx._pvt_ptr[0] = cyctx

11855

11856cdef class cudaExternalSemaphoreSignalNodeParams:

11857 """

11858 External semaphore signal node parameters

11859

11860 Attributes

11861 ----------

11862 extSemArray : cudaExternalSemaphore_t

11863 Array of external semaphore handles.

11864 paramsArray : cudaExternalSemaphoreSignalParams

11865 Array of external semaphore signal parameters.

11866 numExtSems : unsigned int

11867 Number of handles and parameters supplied in extSemArray and

11868 paramsArray.

11869

11870 Methods

11871 -------

11872 getPtr()

11873 Get memory address of class instance

11874 """

11875 def __cinit__(self, void_ptr _ptr = 0):

11876 if _ptr == 0:

11877 self._pvt_ptr = &self._pvt_val

11878 else:

11879 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalNodeParams *>_ptr

11880 def __init__(self, void_ptr _ptr = 0):

11881 pass

11882 def __dealloc__(self):

11883 pass

11884 if self._extSemArray is not NULL:

11885 free(self._extSemArray)

11886 if self._paramsArray is not NULL:

11887 free(self._paramsArray)

11888 def getPtr(self):

11889 return <void_ptr>self._pvt_ptr

11890 def __repr__(self):

11891 if self._pvt_ptr is not NULL:

11892 str_list = []

11893 try:

11894 str_list += ['extSemArray : ' + str(self.extSemArray)]

11895 except ValueError:

11896 str_list += ['extSemArray : <ValueError>']

11897 try:

11898 str_list += ['paramsArray : ' + str(self.paramsArray)]

11899 except ValueError:

11900 str_list += ['paramsArray : <ValueError>']

11901 try:

11902 str_list += ['numExtSems : ' + str(self.numExtSems)]

11903 except ValueError:

11904 str_list += ['numExtSems : <ValueError>']

11905 return '\n'.join(str_list)

11906 else:

11907 return ''

11908 @property

11909 def extSemArray(self):

11910 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]

11911 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]

11912 @extSemArray.setter

11913 def extSemArray(self, val):

11914 if len(val) == 0:

11915 free(self._extSemArray)

11916 self._extSemArray_length = 0

11917 self._pvt_ptr[0].extSemArray = NULL

11918 else:

11919 if self._extSemArray_length != <size_t>len(val):

11920 free(self._extSemArray)

11921 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))

11922 if self._extSemArray is NULL:

11923 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))

11924 self._extSemArray_length = <size_t>len(val)

11925 self._pvt_ptr[0].extSemArray = self._extSemArray

11926 for idx in range(len(val)):

11927 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]

11928

11929 @property

11930 def paramsArray(self):

11931 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreSignalParams) for x in range(self._paramsArray_length)]

11932 return [cudaExternalSemaphoreSignalParams(_ptr=arr) for arr in arrs]

11933 @paramsArray.setter

11934 def paramsArray(self, val):

11935 if len(val) == 0:

11936 free(self._paramsArray)

11937 self._paramsArray_length = 0

11938 self._pvt_ptr[0].paramsArray = NULL

11939 else:

11940 if self._paramsArray_length != <size_t>len(val):

11941 free(self._paramsArray)

11942 self._paramsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))

11943 if self._paramsArray is NULL:

11944 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))

11945 self._paramsArray_length = <size_t>len(val)

11946 self._pvt_ptr[0].paramsArray = self._paramsArray

11947 for idx in range(len(val)):

11948 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreSignalParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))

11949

11950 @property

11951 def numExtSems(self):

11952 return self._pvt_ptr[0].numExtSems

11953 @numExtSems.setter

11954 def numExtSems(self, unsigned int numExtSems):

11955 self._pvt_ptr[0].numExtSems = numExtSems

11956

11957cdef class cudaExternalSemaphoreSignalNodeParamsV2:

11958 """

11959 External semaphore signal node parameters

11960

11961 Attributes

11962 ----------

11963 extSemArray : cudaExternalSemaphore_t

11964 Array of external semaphore handles.

11965 paramsArray : cudaExternalSemaphoreSignalParams

11966 Array of external semaphore signal parameters.

11967 numExtSems : unsigned int

11968 Number of handles and parameters supplied in extSemArray and

11969 paramsArray.

11970

11971 Methods

11972 -------

11973 getPtr()

11974 Get memory address of class instance

11975 """

11976 def __cinit__(self, void_ptr _ptr = 0):

11977 if _ptr == 0:

11978 self._pvt_ptr = &self._pvt_val

11979 else:

11980 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalNodeParamsV2 *>_ptr

11981 def __init__(self, void_ptr _ptr = 0):

11982 pass

11983 def __dealloc__(self):

11984 pass

11985 if self._extSemArray is not NULL:

11986 free(self._extSemArray)

11987 if self._paramsArray is not NULL:

11988 free(self._paramsArray)

11989 def getPtr(self):

11990 return <void_ptr>self._pvt_ptr

11991 def __repr__(self):

11992 if self._pvt_ptr is not NULL:

11993 str_list = []

11994 try:

11995 str_list += ['extSemArray : ' + str(self.extSemArray)]

11996 except ValueError:

11997 str_list += ['extSemArray : <ValueError>']

11998 try:

11999 str_list += ['paramsArray : ' + str(self.paramsArray)]

12000 except ValueError:

12001 str_list += ['paramsArray : <ValueError>']

12002 try:

12003 str_list += ['numExtSems : ' + str(self.numExtSems)]

12004 except ValueError:

12005 str_list += ['numExtSems : <ValueError>']

12006 return '\n'.join(str_list)

12007 else:

12008 return ''

12009 @property

12010 def extSemArray(self):

12011 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]

12012 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]

12013 @extSemArray.setter

12014 def extSemArray(self, val):

12015 if len(val) == 0:

12016 free(self._extSemArray)

12017 self._extSemArray_length = 0

12018 self._pvt_ptr[0].extSemArray = NULL

12019 else:

12020 if self._extSemArray_length != <size_t>len(val):

12021 free(self._extSemArray)

12022 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))

12023 if self._extSemArray is NULL:

12024 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))

12025 self._extSemArray_length = <size_t>len(val)

12026 self._pvt_ptr[0].extSemArray = self._extSemArray

12027 for idx in range(len(val)):

12028 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]

12029

12030 @property

12031 def paramsArray(self):

12032 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreSignalParams) for x in range(self._paramsArray_length)]

12033 return [cudaExternalSemaphoreSignalParams(_ptr=arr) for arr in arrs]

12034 @paramsArray.setter

12035 def paramsArray(self, val):

12036 if len(val) == 0:

12037 free(self._paramsArray)

12038 self._paramsArray_length = 0

12039 self._pvt_ptr[0].paramsArray = NULL

12040 else:

12041 if self._paramsArray_length != <size_t>len(val):

12042 free(self._paramsArray)

12043 self._paramsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))

12044 if self._paramsArray is NULL:

12045 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))

12046 self._paramsArray_length = <size_t>len(val)

12047 self._pvt_ptr[0].paramsArray = self._paramsArray

12048 for idx in range(len(val)):

12049 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreSignalParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))

12050

12051 @property

12052 def numExtSems(self):

12053 return self._pvt_ptr[0].numExtSems

12054 @numExtSems.setter

12055 def numExtSems(self, unsigned int numExtSems):

12056 self._pvt_ptr[0].numExtSems = numExtSems

12057

12058cdef class cudaExternalSemaphoreWaitNodeParams:

12059 """

12060 External semaphore wait node parameters

12061

12062 Attributes

12063 ----------

12064 extSemArray : cudaExternalSemaphore_t

12065 Array of external semaphore handles.

12066 paramsArray : cudaExternalSemaphoreWaitParams

12067 Array of external semaphore wait parameters.

12068 numExtSems : unsigned int

12069 Number of handles and parameters supplied in extSemArray and

12070 paramsArray.

12071

12072 Methods

12073 -------

12074 getPtr()

12075 Get memory address of class instance

12076 """

12077 def __cinit__(self, void_ptr _ptr = 0):

12078 if _ptr == 0:

12079 self._pvt_ptr = &self._pvt_val

12080 else:

12081 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitNodeParams *>_ptr

12082 def __init__(self, void_ptr _ptr = 0):

12083 pass

12084 def __dealloc__(self):

12085 pass

12086 if self._extSemArray is not NULL:

12087 free(self._extSemArray)

12088 if self._paramsArray is not NULL:

12089 free(self._paramsArray)

12090 def getPtr(self):

12091 return <void_ptr>self._pvt_ptr

12092 def __repr__(self):

12093 if self._pvt_ptr is not NULL:

12094 str_list = []

12095 try:

12096 str_list += ['extSemArray : ' + str(self.extSemArray)]

12097 except ValueError:

12098 str_list += ['extSemArray : <ValueError>']

12099 try:

12100 str_list += ['paramsArray : ' + str(self.paramsArray)]

12101 except ValueError:

12102 str_list += ['paramsArray : <ValueError>']

12103 try:

12104 str_list += ['numExtSems : ' + str(self.numExtSems)]

12105 except ValueError:

12106 str_list += ['numExtSems : <ValueError>']

12107 return '\n'.join(str_list)

12108 else:

12109 return ''

12110 @property

12111 def extSemArray(self):

12112 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]

12113 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]

12114 @extSemArray.setter

12115 def extSemArray(self, val):

12116 if len(val) == 0:

12117 free(self._extSemArray)

12118 self._extSemArray_length = 0

12119 self._pvt_ptr[0].extSemArray = NULL

12120 else:

12121 if self._extSemArray_length != <size_t>len(val):

12122 free(self._extSemArray)

12123 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))

12124 if self._extSemArray is NULL:

12125 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))

12126 self._extSemArray_length = <size_t>len(val)

12127 self._pvt_ptr[0].extSemArray = self._extSemArray

12128 for idx in range(len(val)):

12129 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]

12130

12131 @property

12132 def paramsArray(self):

12133 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreWaitParams) for x in range(self._paramsArray_length)]

12134 return [cudaExternalSemaphoreWaitParams(_ptr=arr) for arr in arrs]

12135 @paramsArray.setter

12136 def paramsArray(self, val):

12137 if len(val) == 0:

12138 free(self._paramsArray)

12139 self._paramsArray_length = 0

12140 self._pvt_ptr[0].paramsArray = NULL

12141 else:

12142 if self._paramsArray_length != <size_t>len(val):

12143 free(self._paramsArray)

12144 self._paramsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))

12145 if self._paramsArray is NULL:

12146 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))

12147 self._paramsArray_length = <size_t>len(val)

12148 self._pvt_ptr[0].paramsArray = self._paramsArray

12149 for idx in range(len(val)):

12150 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreWaitParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))

12151

12152 @property

12153 def numExtSems(self):

12154 return self._pvt_ptr[0].numExtSems

12155 @numExtSems.setter

12156 def numExtSems(self, unsigned int numExtSems):

12157 self._pvt_ptr[0].numExtSems = numExtSems

12158

12159cdef class cudaExternalSemaphoreWaitNodeParamsV2:

12160 """

12161 External semaphore wait node parameters

12162

12163 Attributes

12164 ----------

12165 extSemArray : cudaExternalSemaphore_t

12166 Array of external semaphore handles.

12167 paramsArray : cudaExternalSemaphoreWaitParams

12168 Array of external semaphore wait parameters.

12169 numExtSems : unsigned int

12170 Number of handles and parameters supplied in extSemArray and

12171 paramsArray.

12172

12173 Methods

12174 -------

12175 getPtr()

12176 Get memory address of class instance

12177 """

12178 def __cinit__(self, void_ptr _ptr = 0):

12179 if _ptr == 0:

12180 self._pvt_ptr = &self._pvt_val

12181 else:

12182 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitNodeParamsV2 *>_ptr

12183 def __init__(self, void_ptr _ptr = 0):

12184 pass

12185 def __dealloc__(self):

12186 pass

12187 if self._extSemArray is not NULL:

12188 free(self._extSemArray)

12189 if self._paramsArray is not NULL:

12190 free(self._paramsArray)

12191 def getPtr(self):

12192 return <void_ptr>self._pvt_ptr

12193 def __repr__(self):

12194 if self._pvt_ptr is not NULL:

12195 str_list = []

12196 try:

12197 str_list += ['extSemArray : ' + str(self.extSemArray)]

12198 except ValueError:

12199 str_list += ['extSemArray : <ValueError>']

12200 try:

12201 str_list += ['paramsArray : ' + str(self.paramsArray)]

12202 except ValueError:

12203 str_list += ['paramsArray : <ValueError>']

12204 try:

12205 str_list += ['numExtSems : ' + str(self.numExtSems)]

12206 except ValueError:

12207 str_list += ['numExtSems : <ValueError>']

12208 return '\n'.join(str_list)

12209 else:

12210 return ''

12211 @property

12212 def extSemArray(self):

12213 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]

12214 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]

12215 @extSemArray.setter

12216 def extSemArray(self, val):

12217 if len(val) == 0:

12218 free(self._extSemArray)

12219 self._extSemArray_length = 0

12220 self._pvt_ptr[0].extSemArray = NULL

12221 else:

12222 if self._extSemArray_length != <size_t>len(val):

12223 free(self._extSemArray)

12224 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))

12225 if self._extSemArray is NULL:

12226 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))

12227 self._extSemArray_length = <size_t>len(val)

12228 self._pvt_ptr[0].extSemArray = self._extSemArray

12229 for idx in range(len(val)):

12230 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]

12231

12232 @property

12233 def paramsArray(self):

12234 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreWaitParams) for x in range(self._paramsArray_length)]

12235 return [cudaExternalSemaphoreWaitParams(_ptr=arr) for arr in arrs]

12236 @paramsArray.setter

12237 def paramsArray(self, val):

12238 if len(val) == 0:

12239 free(self._paramsArray)

12240 self._paramsArray_length = 0

12241 self._pvt_ptr[0].paramsArray = NULL

12242 else:

12243 if self._paramsArray_length != <size_t>len(val):

12244 free(self._paramsArray)

12245 self._paramsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))

12246 if self._paramsArray is NULL:

12247 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))

12248 self._paramsArray_length = <size_t>len(val)

12249 self._pvt_ptr[0].paramsArray = self._paramsArray

12250 for idx in range(len(val)):

12251 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreWaitParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))

12252

12253 @property

12254 def numExtSems(self):

12255 return self._pvt_ptr[0].numExtSems

12256 @numExtSems.setter

12257 def numExtSems(self, unsigned int numExtSems):

12258 self._pvt_ptr[0].numExtSems = numExtSems

12259

12260cdef class cudaConditionalNodeParams:

12261 """

12262 CUDA conditional node parameters

12263

12264 Attributes

12265 ----------

12266 handle : cudaGraphConditionalHandle

12267 Conditional node handle. Handles must be created in advance of

12268 creating the node using cudaGraphConditionalHandleCreate.

12269 type : cudaGraphConditionalNodeType

12270 Type of conditional node.

12271 size : unsigned int

12272 Size of graph output array. Allowed values are 1 for

12273 cudaGraphCondTypeWhile, 1 or 2 for cudaGraphCondTypeIf, or any

12274 value greater than zero for cudaGraphCondTypeSwitch.

12275 phGraph_out : cudaGraph_t

12276 CUDA-owned array populated with conditional node child graphs

12277 during creation of the node. Valid for the lifetime of the

12278 conditional node. The contents of the graph(s) are subject to the

12279 following constraints: - Allowed node types are kernel nodes,

12280 empty nodes, child graphs, memsets, memcopies, and conditionals.

12281 This applies recursively to child graphs and conditional bodies.

12282 - All kernels, including kernels in nested conditionals or child

12283 graphs at any level, must belong to the same CUDA context.

12284 These graphs may be populated using graph node creation APIs or

12285 cudaStreamBeginCaptureToGraph. cudaGraphCondTypeIf: phGraph_out[0]

12286 is executed when the condition is non-zero. If `size` == 2,

12287 phGraph_out[1] will be executed when the condition is zero.

12288 cudaGraphCondTypeWhile: phGraph_out[0] is executed as long as the

12289 condition is non-zero. cudaGraphCondTypeSwitch: phGraph_out[n] is

12290 executed when the condition is equal to n. If the condition >=

12291 `size`, no body graph is executed.

12292 ctx : cudaExecutionContext_t

12293 CUDA Execution Context

12294

12295 Methods

12296 -------

12297 getPtr()

12298 Get memory address of class instance

12299 """

12300 def __cinit__(self, void_ptr _ptr = 0):

12301 if _ptr == 0:

12302 self._pvt_ptr = &self._pvt_val

12303 else:

12304 self._pvt_ptr = <cyruntime.cudaConditionalNodeParams *>_ptr

12305 def __init__(self, void_ptr _ptr = 0):

12306 pass

12307 self._handle = cudaGraphConditionalHandle(_ptr=<void_ptr>&self._pvt_ptr[0].handle)

12308 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)

12309 def __dealloc__(self):

12310 pass

12311 def getPtr(self):

12312 return <void_ptr>self._pvt_ptr

12313 def __repr__(self):

12314 if self._pvt_ptr is not NULL:

12315 str_list = []

12316 try:

12317 str_list += ['handle : ' + str(self.handle)]

12318 except ValueError:

12319 str_list += ['handle : <ValueError>']

12320 try:

12321 str_list += ['type : ' + str(self.type)]

12322 except ValueError:

12323 str_list += ['type : <ValueError>']

12324 try:

12325 str_list += ['size : ' + str(self.size)]

12326 except ValueError:

12327 str_list += ['size : <ValueError>']

12328 try:

12329 str_list += ['phGraph_out : ' + str(self.phGraph_out)]

12330 except ValueError:

12331 str_list += ['phGraph_out : <ValueError>']

12332 try:

12333 str_list += ['ctx : ' + str(self.ctx)]

12334 except ValueError:

12335 str_list += ['ctx : <ValueError>']

12336 return '\n'.join(str_list)

12337 else:

12338 return ''

12339 @property

12340 def handle(self):

12341 return self._handle

12342 @handle.setter

12343 def handle(self, handle):

12344 cdef cyruntime.cudaGraphConditionalHandle cyhandle

12345 if handle is None:

12346 cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>0

12347 elif isinstance(handle, (cudaGraphConditionalHandle)):

12348 phandle = int(handle)

12349 cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>phandle

12350 else:

12351 phandle = int(cudaGraphConditionalHandle(handle))

12352 cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>phandle

12353 self._handle._pvt_ptr[0] = cyhandle

12354

12355 @property

12356 def type(self):

12357 if self._pvt_ptr[0].type not in _dict_cudaGraphConditionalNodeType:

12358 return None

12359 return _dict_cudaGraphConditionalNodeType[self._pvt_ptr[0].type]

12360 @type.setter

12361 def type(self, type not None : cudaGraphConditionalNodeType):

12362 self._pvt_ptr[0].type = type.value

12363 @property

12364 def size(self):

12365 return self._pvt_ptr[0].size

12366 @size.setter

12367 def size(self, unsigned int size):

12368 self._pvt_ptr[0].size = size

12369 @property

12370 def phGraph_out(self):

12371 arrs = [<void_ptr>self._pvt_ptr[0].phGraph_out + x*sizeof(cyruntime.cudaGraph_t) for x in range(self.size)]

12372 return [cudaGraph_t(_ptr=arr) for arr in arrs]

12373 @property

12374 def ctx(self):

12375 return self._ctx

12376 @ctx.setter

12377 def ctx(self, ctx):

12378 cdef cyruntime.cudaExecutionContext_t cyctx

12379 if ctx is None:

12380 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0

12381 elif isinstance(ctx, (cudaExecutionContext_t,)):

12382 pctx = int(ctx)

12383 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

12384 else:

12385 pctx = int(cudaExecutionContext_t(ctx))

12386 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

12387 self._ctx._pvt_ptr[0] = cyctx

12388

12389cdef class cudaChildGraphNodeParams:

12390 """

12391 Child graph node parameters

12392

12393 Attributes

12394 ----------

12395 graph : cudaGraph_t

12396 The child graph to clone into the node for node creation, or a

12397 handle to the graph owned by the node for node query. The graph

12398 must not contain conditional nodes. Graphs containing memory

12399 allocation or memory free nodes must set the ownership to be moved

12400 to the parent.

12401 ownership : cudaGraphChildGraphNodeOwnership

12402 The ownership relationship of the child graph node.

12403

12404 Methods

12405 -------

12406 getPtr()

12407 Get memory address of class instance

12408 """

12409 def __cinit__(self, void_ptr _ptr = 0):

12410 if _ptr == 0:

12411 self._pvt_ptr = &self._pvt_val

12412 else:

12413 self._pvt_ptr = <cyruntime.cudaChildGraphNodeParams *>_ptr

12414 def __init__(self, void_ptr _ptr = 0):

12415 pass

12416 self._graph = cudaGraph_t(_ptr=<void_ptr>&self._pvt_ptr[0].graph)

12417 def __dealloc__(self):

12418 pass

12419 def getPtr(self):

12420 return <void_ptr>self._pvt_ptr

12421 def __repr__(self):

12422 if self._pvt_ptr is not NULL:

12423 str_list = []

12424 try:

12425 str_list += ['graph : ' + str(self.graph)]

12426 except ValueError:

12427 str_list += ['graph : <ValueError>']

12428 try:

12429 str_list += ['ownership : ' + str(self.ownership)]

12430 except ValueError:

12431 str_list += ['ownership : <ValueError>']

12432 return '\n'.join(str_list)

12433 else:

12434 return ''

12435 @property

12436 def graph(self):

12437 return self._graph

12438 @graph.setter

12439 def graph(self, graph):

12440 cdef cyruntime.cudaGraph_t cygraph

12441 if graph is None:

12442 cygraph = <cyruntime.cudaGraph_t><void_ptr>0

12443 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

12444 pgraph = int(graph)

12445 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

12446 else:

12447 pgraph = int(cudaGraph_t(graph))

12448 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

12449 self._graph._pvt_ptr[0] = cygraph

12450 @property

12451 def ownership(self):

12452 if self._pvt_ptr[0].ownership not in _dict_cudaGraphChildGraphNodeOwnership:

12453 return None

12454 return _dict_cudaGraphChildGraphNodeOwnership[self._pvt_ptr[0].ownership]

12455 @ownership.setter

12456 def ownership(self, ownership not None : cudaGraphChildGraphNodeOwnership):

12457 self._pvt_ptr[0].ownership = ownership.value

12458

12459cdef class cudaEventRecordNodeParams:

12460 """

12461 Event record node parameters

12462

12463 Attributes

12464 ----------

12465 event : cudaEvent_t

12466 The event to record when the node executes

12467

12468 Methods

12469 -------

12470 getPtr()

12471 Get memory address of class instance

12472 """

12473 def __cinit__(self, void_ptr _ptr = 0):

12474 if _ptr == 0:

12475 self._pvt_ptr = &self._pvt_val

12476 else:

12477 self._pvt_ptr = <cyruntime.cudaEventRecordNodeParams *>_ptr

12478 def __init__(self, void_ptr _ptr = 0):

12479 pass

12480 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].event)

12481 def __dealloc__(self):

12482 pass

12483 def getPtr(self):

12484 return <void_ptr>self._pvt_ptr

12485 def __repr__(self):

12486 if self._pvt_ptr is not NULL:

12487 str_list = []

12488 try:

12489 str_list += ['event : ' + str(self.event)]

12490 except ValueError:

12491 str_list += ['event : <ValueError>']

12492 return '\n'.join(str_list)

12493 else:

12494 return ''

12495 @property

12496 def event(self):

12497 return self._event

12498 @event.setter

12499 def event(self, event):

12500 cdef cyruntime.cudaEvent_t cyevent

12501 if event is None:

12502 cyevent = <cyruntime.cudaEvent_t><void_ptr>0

12503 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

12504 pevent = int(event)

12505 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

12506 else:

12507 pevent = int(cudaEvent_t(event))

12508 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

12509 self._event._pvt_ptr[0] = cyevent

12510

12511cdef class cudaEventWaitNodeParams:

12512 """

12513 Event wait node parameters

12514

12515 Attributes

12516 ----------

12517 event : cudaEvent_t

12518 The event to wait on from the node

12519

12520 Methods

12521 -------

12522 getPtr()

12523 Get memory address of class instance

12524 """

12525 def __cinit__(self, void_ptr _ptr = 0):

12526 if _ptr == 0:

12527 self._pvt_ptr = &self._pvt_val

12528 else:

12529 self._pvt_ptr = <cyruntime.cudaEventWaitNodeParams *>_ptr

12530 def __init__(self, void_ptr _ptr = 0):

12531 pass

12532 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].event)

12533 def __dealloc__(self):

12534 pass

12535 def getPtr(self):

12536 return <void_ptr>self._pvt_ptr

12537 def __repr__(self):

12538 if self._pvt_ptr is not NULL:

12539 str_list = []

12540 try:

12541 str_list += ['event : ' + str(self.event)]

12542 except ValueError:

12543 str_list += ['event : <ValueError>']

12544 return '\n'.join(str_list)

12545 else:

12546 return ''

12547 @property

12548 def event(self):

12549 return self._event

12550 @event.setter

12551 def event(self, event):

12552 cdef cyruntime.cudaEvent_t cyevent

12553 if event is None:

12554 cyevent = <cyruntime.cudaEvent_t><void_ptr>0

12555 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

12556 pevent = int(event)

12557 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

12558 else:

12559 pevent = int(cudaEvent_t(event))

12560 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

12561 self._event._pvt_ptr[0] = cyevent

12562

12563cdef class cudaGraphNodeParams:

12564 """

12565 Graph node parameters. See cudaGraphAddNode.

12566

12567 Attributes

12568 ----------

12569 type : cudaGraphNodeType

12570 Type of the node

12571 reserved0 : list[int]

12572 Reserved. Must be zero.

12573 reserved1 : list[long long]

12574 Padding. Unused bytes must be zero.

12575 kernel : cudaKernelNodeParamsV2

12576 Kernel node parameters.

12577 memcpy : cudaMemcpyNodeParams

12578 Memcpy node parameters.

12579 memset : cudaMemsetParamsV2

12580 Memset node parameters.

12581 host : cudaHostNodeParamsV2

12582 Host node parameters.

12583 graph : cudaChildGraphNodeParams

12584 Child graph node parameters.

12585 eventWait : cudaEventWaitNodeParams

12586 Event wait node parameters.

12587 eventRecord : cudaEventRecordNodeParams

12588 Event record node parameters.

12589 extSemSignal : cudaExternalSemaphoreSignalNodeParamsV2

12590 External semaphore signal node parameters.

12591 extSemWait : cudaExternalSemaphoreWaitNodeParamsV2

12592 External semaphore wait node parameters.

12593 alloc : cudaMemAllocNodeParamsV2

12594 Memory allocation node parameters.

12595 free : cudaMemFreeNodeParams

12596 Memory free node parameters.

12597 conditional : cudaConditionalNodeParams

12598 Conditional node parameters.

12599 reserved2 : long long

12600 Reserved bytes. Must be zero.

12601

12602 Methods

12603 -------

12604 getPtr()

12605 Get memory address of class instance

12606 """

12607 def __cinit__(self, void_ptr _ptr = 0):

12608 if _ptr == 0:

12609 self._val_ptr = <cyruntime.cudaGraphNodeParams *>calloc(1, sizeof(cyruntime.cudaGraphNodeParams))

12610 self._pvt_ptr = self._val_ptr

12611 else:

12612 self._pvt_ptr = <cyruntime.cudaGraphNodeParams *>_ptr

12613 def __init__(self, void_ptr _ptr = 0):

12614 pass

12615 self._kernel = cudaKernelNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].kernel)

12616 self._memcpy = cudaMemcpyNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].memcpy)

12617 self._memset = cudaMemsetParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].memset)

12618 self._host = cudaHostNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].host)

12619 self._graph = cudaChildGraphNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].graph)

12620 self._eventWait = cudaEventWaitNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].eventWait)

12621 self._eventRecord = cudaEventRecordNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].eventRecord)

12622 self._extSemSignal = cudaExternalSemaphoreSignalNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].extSemSignal)

12623 self._extSemWait = cudaExternalSemaphoreWaitNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].extSemWait)

12624 self._alloc = cudaMemAllocNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].alloc)

12625 self._free = cudaMemFreeNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].free)

12626 self._conditional = cudaConditionalNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].conditional)

12627 def __dealloc__(self):

12628 if self._val_ptr is not NULL:

12629 free(self._val_ptr)

12630 def getPtr(self):

12631 return <void_ptr>self._pvt_ptr

12632 def __repr__(self):

12633 if self._pvt_ptr is not NULL:

12634 str_list = []

12635 try:

12636 str_list += ['type : ' + str(self.type)]

12637 except ValueError:

12638 str_list += ['type : <ValueError>']

12639 try:

12640 str_list += ['reserved0 : ' + str(self.reserved0)]

12641 except ValueError:

12642 str_list += ['reserved0 : <ValueError>']

12643 try:

12644 str_list += ['reserved1 : ' + str(self.reserved1)]

12645 except ValueError:

12646 str_list += ['reserved1 : <ValueError>']

12647 try:

12648 str_list += ['kernel :\n' + '\n'.join([' ' + line for line in str(self.kernel).splitlines()])]

12649 except ValueError:

12650 str_list += ['kernel : <ValueError>']

12651 try:

12652 str_list += ['memcpy :\n' + '\n'.join([' ' + line for line in str(self.memcpy).splitlines()])]

12653 except ValueError:

12654 str_list += ['memcpy : <ValueError>']

12655 try:

12656 str_list += ['memset :\n' + '\n'.join([' ' + line for line in str(self.memset).splitlines()])]

12657 except ValueError:

12658 str_list += ['memset : <ValueError>']

12659 try:

12660 str_list += ['host :\n' + '\n'.join([' ' + line for line in str(self.host).splitlines()])]

12661 except ValueError:

12662 str_list += ['host : <ValueError>']

12663 try:

12664 str_list += ['graph :\n' + '\n'.join([' ' + line for line in str(self.graph).splitlines()])]

12665 except ValueError:

12666 str_list += ['graph : <ValueError>']

12667 try:

12668 str_list += ['eventWait :\n' + '\n'.join([' ' + line for line in str(self.eventWait).splitlines()])]

12669 except ValueError:

12670 str_list += ['eventWait : <ValueError>']

12671 try:

12672 str_list += ['eventRecord :\n' + '\n'.join([' ' + line for line in str(self.eventRecord).splitlines()])]

12673 except ValueError:

12674 str_list += ['eventRecord : <ValueError>']

12675 try:

12676 str_list += ['extSemSignal :\n' + '\n'.join([' ' + line for line in str(self.extSemSignal).splitlines()])]

12677 except ValueError:

12678 str_list += ['extSemSignal : <ValueError>']

12679 try:

12680 str_list += ['extSemWait :\n' + '\n'.join([' ' + line for line in str(self.extSemWait).splitlines()])]

12681 except ValueError:

12682 str_list += ['extSemWait : <ValueError>']

12683 try:

12684 str_list += ['alloc :\n' + '\n'.join([' ' + line for line in str(self.alloc).splitlines()])]

12685 except ValueError:

12686 str_list += ['alloc : <ValueError>']

12687 try:

12688 str_list += ['free :\n' + '\n'.join([' ' + line for line in str(self.free).splitlines()])]

12689 except ValueError:

12690 str_list += ['free : <ValueError>']

12691 try:

12692 str_list += ['conditional :\n' + '\n'.join([' ' + line for line in str(self.conditional).splitlines()])]

12693 except ValueError:

12694 str_list += ['conditional : <ValueError>']

12695 try:

12696 str_list += ['reserved2 : ' + str(self.reserved2)]

12697 except ValueError:

12698 str_list += ['reserved2 : <ValueError>']

12699 return '\n'.join(str_list)

12700 else:

12701 return ''

12702 @property

12703 def type(self):

12704 if self._pvt_ptr[0].type not in _dict_cudaGraphNodeType:

12705 return None

12706 return _dict_cudaGraphNodeType[self._pvt_ptr[0].type]

12707 @type.setter

12708 def type(self, type not None : cudaGraphNodeType):

12709 self._pvt_ptr[0].type = type.value

12710 @property

12711 def reserved0(self):

12712 return self._pvt_ptr[0].reserved0

12713 @reserved0.setter

12714 def reserved0(self, reserved0):

12715 self._pvt_ptr[0].reserved0 = reserved0

12716 @property

12717 def reserved1(self):

12718 return self._pvt_ptr[0].reserved1

12719 @reserved1.setter

12720 def reserved1(self, reserved1):

12721 self._pvt_ptr[0].reserved1 = reserved1

12722 @property

12723 def kernel(self):

12724 return self._kernel

12725 @kernel.setter

12726 def kernel(self, kernel not None : cudaKernelNodeParamsV2):

12727 string.memcpy(&self._pvt_ptr[0].kernel, <cyruntime.cudaKernelNodeParamsV2*><void_ptr>kernel.getPtr(), sizeof(self._pvt_ptr[0].kernel))

12728 @property

12729 def memcpy(self):

12730 return self._memcpy

12731 @memcpy.setter

12732 def memcpy(self, memcpy not None : cudaMemcpyNodeParams):

12733 string.memcpy(&self._pvt_ptr[0].memcpy, <cyruntime.cudaMemcpyNodeParams*><void_ptr>memcpy.getPtr(), sizeof(self._pvt_ptr[0].memcpy))

12734 @property

12735 def memset(self):

12736 return self._memset

12737 @memset.setter

12738 def memset(self, memset not None : cudaMemsetParamsV2):

12739 string.memcpy(&self._pvt_ptr[0].memset, <cyruntime.cudaMemsetParamsV2*><void_ptr>memset.getPtr(), sizeof(self._pvt_ptr[0].memset))

12740 @property

12741 def host(self):

12742 return self._host

12743 @host.setter

12744 def host(self, host not None : cudaHostNodeParamsV2):

12745 string.memcpy(&self._pvt_ptr[0].host, <cyruntime.cudaHostNodeParamsV2*><void_ptr>host.getPtr(), sizeof(self._pvt_ptr[0].host))

12746 @property

12747 def graph(self):

12748 return self._graph

12749 @graph.setter

12750 def graph(self, graph not None : cudaChildGraphNodeParams):

12751 string.memcpy(&self._pvt_ptr[0].graph, <cyruntime.cudaChildGraphNodeParams*><void_ptr>graph.getPtr(), sizeof(self._pvt_ptr[0].graph))

12752 @property

12753 def eventWait(self):

12754 return self._eventWait

12755 @eventWait.setter

12756 def eventWait(self, eventWait not None : cudaEventWaitNodeParams):

12757 string.memcpy(&self._pvt_ptr[0].eventWait, <cyruntime.cudaEventWaitNodeParams*><void_ptr>eventWait.getPtr(), sizeof(self._pvt_ptr[0].eventWait))

12758 @property

12759 def eventRecord(self):

12760 return self._eventRecord

12761 @eventRecord.setter

12762 def eventRecord(self, eventRecord not None : cudaEventRecordNodeParams):

12763 string.memcpy(&self._pvt_ptr[0].eventRecord, <cyruntime.cudaEventRecordNodeParams*><void_ptr>eventRecord.getPtr(), sizeof(self._pvt_ptr[0].eventRecord))

12764 @property

12765 def extSemSignal(self):

12766 return self._extSemSignal

12767 @extSemSignal.setter

12768 def extSemSignal(self, extSemSignal not None : cudaExternalSemaphoreSignalNodeParamsV2):

12769 string.memcpy(&self._pvt_ptr[0].extSemSignal, <cyruntime.cudaExternalSemaphoreSignalNodeParamsV2*><void_ptr>extSemSignal.getPtr(), sizeof(self._pvt_ptr[0].extSemSignal))

12770 @property

12771 def extSemWait(self):

12772 return self._extSemWait

12773 @extSemWait.setter

12774 def extSemWait(self, extSemWait not None : cudaExternalSemaphoreWaitNodeParamsV2):

12775 string.memcpy(&self._pvt_ptr[0].extSemWait, <cyruntime.cudaExternalSemaphoreWaitNodeParamsV2*><void_ptr>extSemWait.getPtr(), sizeof(self._pvt_ptr[0].extSemWait))

12776 @property

12777 def alloc(self):

12778 return self._alloc

12779 @alloc.setter

12780 def alloc(self, alloc not None : cudaMemAllocNodeParamsV2):

12781 string.memcpy(&self._pvt_ptr[0].alloc, <cyruntime.cudaMemAllocNodeParamsV2*><void_ptr>alloc.getPtr(), sizeof(self._pvt_ptr[0].alloc))

12782 @property

12783 def free(self):

12784 return self._free

12785 @free.setter

12786 def free(self, free not None : cudaMemFreeNodeParams):

12787 string.memcpy(&self._pvt_ptr[0].free, <cyruntime.cudaMemFreeNodeParams*><void_ptr>free.getPtr(), sizeof(self._pvt_ptr[0].free))

12788 @property

12789 def conditional(self):

12790 return self._conditional

12791 @conditional.setter

12792 def conditional(self, conditional not None : cudaConditionalNodeParams):

12793 string.memcpy(&self._pvt_ptr[0].conditional, <cyruntime.cudaConditionalNodeParams*><void_ptr>conditional.getPtr(), sizeof(self._pvt_ptr[0].conditional))

12794 @property

12795 def reserved2(self):

12796 return self._pvt_ptr[0].reserved2

12797 @reserved2.setter

12798 def reserved2(self, long long reserved2):

12799 self._pvt_ptr[0].reserved2 = reserved2

12800

12801cdef class cudaGraphEdgeData_st:

12802 """

12803 Optional annotation for edges in a CUDA graph. Note, all edges

12804 implicitly have annotations and default to a zero-initialized value

12805 if not specified. A zero-initialized struct indicates a standard

12806 full serialization of two nodes with memory visibility.

12807

12808 Attributes

12809 ----------

12810 from_port : bytes

12811 This indicates when the dependency is triggered from the upstream

12812 node on the edge. The meaning is specfic to the node type. A value

12813 of 0 in all cases means full completion of the upstream node, with

12814 memory visibility to the downstream node or portion thereof

12815 (indicated by `to_port`). Only kernel nodes define non-zero

12816 ports. A kernel node can use the following output port types:

12817 cudaGraphKernelNodePortDefault,

12818 cudaGraphKernelNodePortProgrammatic, or

12819 cudaGraphKernelNodePortLaunchCompletion.

12820 to_port : bytes

12821 This indicates what portion of the downstream node is dependent on

12822 the upstream node or portion thereof (indicated by `from_port`).

12823 The meaning is specific to the node type. A value of 0 in all cases

12824 means the entirety of the downstream node is dependent on the

12825 upstream work. Currently no node types define non-zero ports.

12826 Accordingly, this field must be set to zero.

12827 type : bytes

12828 This should be populated with a value from

12829 ::cudaGraphDependencyType. (It is typed as char due to compiler-

12830 specific layout of bitfields.) See ::cudaGraphDependencyType.

12831 reserved : bytes

12832 These bytes are unused and must be zeroed. This ensures

12833 compatibility if additional fields are added in the future.

12834

12835 Methods

12836 -------

12837 getPtr()

12838 Get memory address of class instance

12839 """

12840 def __cinit__(self, void_ptr _ptr = 0):

12841 if _ptr == 0:

12842 self._pvt_ptr = &self._pvt_val

12843 else:

12844 self._pvt_ptr = <cyruntime.cudaGraphEdgeData_st *>_ptr

12845 def __init__(self, void_ptr _ptr = 0):

12846 pass

12847 def __dealloc__(self):

12848 pass

12849 def getPtr(self):

12850 return <void_ptr>self._pvt_ptr

12851 def __repr__(self):

12852 if self._pvt_ptr is not NULL:

12853 str_list = []

12854 try:

12855 str_list += ['from_port : ' + str(self.from_port)]

12856 except ValueError:

12857 str_list += ['from_port : <ValueError>']

12858 try:

12859 str_list += ['to_port : ' + str(self.to_port)]

12860 except ValueError:

12861 str_list += ['to_port : <ValueError>']

12862 try:

12863 str_list += ['type : ' + str(self.type)]

12864 except ValueError:

12865 str_list += ['type : <ValueError>']

12866 try:

12867 str_list += ['reserved : ' + str(self.reserved)]

12868 except ValueError:

12869 str_list += ['reserved : <ValueError>']

12870 return '\n'.join(str_list)

12871 else:

12872 return ''

12873 @property

12874 def from_port(self):

12875 return self._pvt_ptr[0].from_port

12876 @from_port.setter

12877 def from_port(self, unsigned char from_port):

12878 self._pvt_ptr[0].from_port = from_port

12879 @property

12880 def to_port(self):

12881 return self._pvt_ptr[0].to_port

12882 @to_port.setter

12883 def to_port(self, unsigned char to_port):

12884 self._pvt_ptr[0].to_port = to_port

12885 @property

12886 def type(self):

12887 return self._pvt_ptr[0].type

12888 @type.setter

12889 def type(self, unsigned char type):

12890 self._pvt_ptr[0].type = type

12891 @property

12892 def reserved(self):

12893 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 5)

12894 @reserved.setter

12895 def reserved(self, reserved):

12896 if len(reserved) != 5:

12897 raise ValueError("reserved length must be 5, is " + str(len(reserved)))

12898 for i, b in enumerate(reserved):

12899 self._pvt_ptr[0].reserved[i] = b

12900

12901cdef class cudaGraphInstantiateParams_st:

12902 """

12903 Graph instantiation parameters

12904

12905 Attributes

12906 ----------

12907 flags : unsigned long long

12908 Instantiation flags

12909 uploadStream : cudaStream_t

12910 Upload stream

12911 errNode_out : cudaGraphNode_t

12912 The node which caused instantiation to fail, if any

12913 result_out : cudaGraphInstantiateResult

12914 Whether instantiation was successful. If it failed, the reason why

12915

12916 Methods

12917 -------

12918 getPtr()

12919 Get memory address of class instance

12920 """

12921 def __cinit__(self, void_ptr _ptr = 0):

12922 if _ptr == 0:

12923 self._pvt_ptr = &self._pvt_val

12924 else:

12925 self._pvt_ptr = <cyruntime.cudaGraphInstantiateParams_st *>_ptr

12926 def __init__(self, void_ptr _ptr = 0):

12927 pass

12928 self._uploadStream = cudaStream_t(_ptr=<void_ptr>&self._pvt_ptr[0].uploadStream)

12929 self._errNode_out = cudaGraphNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].errNode_out)

12930 def __dealloc__(self):

12931 pass

12932 def getPtr(self):

12933 return <void_ptr>self._pvt_ptr

12934 def __repr__(self):

12935 if self._pvt_ptr is not NULL:

12936 str_list = []

12937 try:

12938 str_list += ['flags : ' + str(self.flags)]

12939 except ValueError:

12940 str_list += ['flags : <ValueError>']

12941 try:

12942 str_list += ['uploadStream : ' + str(self.uploadStream)]

12943 except ValueError:

12944 str_list += ['uploadStream : <ValueError>']

12945 try:

12946 str_list += ['errNode_out : ' + str(self.errNode_out)]

12947 except ValueError:

12948 str_list += ['errNode_out : <ValueError>']

12949 try:

12950 str_list += ['result_out : ' + str(self.result_out)]

12951 except ValueError:

12952 str_list += ['result_out : <ValueError>']

12953 return '\n'.join(str_list)

12954 else:

12955 return ''

12956 @property

12957 def flags(self):

12958 return self._pvt_ptr[0].flags

12959 @flags.setter

12960 def flags(self, unsigned long long flags):

12961 self._pvt_ptr[0].flags = flags

12962 @property

12963 def uploadStream(self):

12964 return self._uploadStream

12965 @uploadStream.setter

12966 def uploadStream(self, uploadStream):

12967 cdef cyruntime.cudaStream_t cyuploadStream

12968 if uploadStream is None:

12969 cyuploadStream = <cyruntime.cudaStream_t><void_ptr>0

12970 elif isinstance(uploadStream, (cudaStream_t,driver.CUstream)):

12971 puploadStream = int(uploadStream)

12972 cyuploadStream = <cyruntime.cudaStream_t><void_ptr>puploadStream

12973 else:

12974 puploadStream = int(cudaStream_t(uploadStream))

12975 cyuploadStream = <cyruntime.cudaStream_t><void_ptr>puploadStream

12976 self._uploadStream._pvt_ptr[0] = cyuploadStream

12977 @property

12978 def errNode_out(self):

12979 return self._errNode_out

12980 @errNode_out.setter

12981 def errNode_out(self, errNode_out):

12982 cdef cyruntime.cudaGraphNode_t cyerrNode_out

12983 if errNode_out is None:

12984 cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>0

12985 elif isinstance(errNode_out, (cudaGraphNode_t,driver.CUgraphNode)):

12986 perrNode_out = int(errNode_out)

12987 cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>perrNode_out

12988 else:

12989 perrNode_out = int(cudaGraphNode_t(errNode_out))

12990 cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>perrNode_out

12991 self._errNode_out._pvt_ptr[0] = cyerrNode_out

12992 @property

12993 def result_out(self):

12994 if self._pvt_ptr[0].result_out not in _dict_cudaGraphInstantiateResult:

12995 return None

12996 return _dict_cudaGraphInstantiateResult[self._pvt_ptr[0].result_out]

12997 @result_out.setter

12998 def result_out(self, result_out not None : cudaGraphInstantiateResult):

12999 self._pvt_ptr[0].result_out = result_out.value

13000

13001cdef class cudaGraphExecUpdateResultInfo_st:

13002 """

13003 Result information returned by cudaGraphExecUpdate

13004

13005 Attributes

13006 ----------

13007 result : cudaGraphExecUpdateResult

13008 Gives more specific detail when a cuda graph update fails.

13009 errorNode : cudaGraphNode_t

13010 The "to node" of the error edge when the topologies do not match.

13011 The error node when the error is associated with a specific node.

13012 NULL when the error is generic.

13013 errorFromNode : cudaGraphNode_t

13014 The from node of error edge when the topologies do not match.

13015 Otherwise NULL.

13016

13017 Methods

13018 -------

13019 getPtr()

13020 Get memory address of class instance

13021 """

13022 def __cinit__(self, void_ptr _ptr = 0):

13023 if _ptr == 0:

13024 self._pvt_ptr = &self._pvt_val

13025 else:

13026 self._pvt_ptr = <cyruntime.cudaGraphExecUpdateResultInfo_st *>_ptr

13027 def __init__(self, void_ptr _ptr = 0):

13028 pass

13029 self._errorNode = cudaGraphNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].errorNode)

13030 self._errorFromNode = cudaGraphNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].errorFromNode)

13031 def __dealloc__(self):

13032 pass

13033 def getPtr(self):

13034 return <void_ptr>self._pvt_ptr

13035 def __repr__(self):

13036 if self._pvt_ptr is not NULL:

13037 str_list = []

13038 try:

13039 str_list += ['result : ' + str(self.result)]

13040 except ValueError:

13041 str_list += ['result : <ValueError>']

13042 try:

13043 str_list += ['errorNode : ' + str(self.errorNode)]

13044 except ValueError:

13045 str_list += ['errorNode : <ValueError>']

13046 try:

13047 str_list += ['errorFromNode : ' + str(self.errorFromNode)]

13048 except ValueError:

13049 str_list += ['errorFromNode : <ValueError>']

13050 return '\n'.join(str_list)

13051 else:

13052 return ''

13053 @property

13054 def result(self):

13055 if self._pvt_ptr[0].result not in _dict_cudaGraphExecUpdateResult:

13056 return None

13057 return _dict_cudaGraphExecUpdateResult[self._pvt_ptr[0].result]

13058 @result.setter

13059 def result(self, result not None : cudaGraphExecUpdateResult):

13060 self._pvt_ptr[0].result = result.value

13061 @property

13062 def errorNode(self):

13063 return self._errorNode

13064 @errorNode.setter

13065 def errorNode(self, errorNode):

13066 cdef cyruntime.cudaGraphNode_t cyerrorNode

13067 if errorNode is None:

13068 cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>0

13069 elif isinstance(errorNode, (cudaGraphNode_t,driver.CUgraphNode)):

13070 perrorNode = int(errorNode)

13071 cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorNode

13072 else:

13073 perrorNode = int(cudaGraphNode_t(errorNode))

13074 cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorNode

13075 self._errorNode._pvt_ptr[0] = cyerrorNode

13076 @property

13077 def errorFromNode(self):

13078 return self._errorFromNode

13079 @errorFromNode.setter

13080 def errorFromNode(self, errorFromNode):

13081 cdef cyruntime.cudaGraphNode_t cyerrorFromNode

13082 if errorFromNode is None:

13083 cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>0

13084 elif isinstance(errorFromNode, (cudaGraphNode_t,driver.CUgraphNode)):

13085 perrorFromNode = int(errorFromNode)

13086 cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorFromNode

13087 else:

13088 perrorFromNode = int(cudaGraphNode_t(errorFromNode))

13089 cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorFromNode

13090 self._errorFromNode._pvt_ptr[0] = cyerrorFromNode

13091

13092cdef class anon_struct16:

13093 """

13094 Attributes

13095 ----------

13096 pValue : Any

13097

13098 offset : size_t

13099

13100 size : size_t

13101

13102

13103 Methods

13104 -------

13105 getPtr()

13106 Get memory address of class instance

13107 """

13108 def __cinit__(self, void_ptr _ptr):

13109 self._pvt_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr

13110

13111 def __init__(self, void_ptr _ptr):

13112 pass

13113 def __dealloc__(self):

13114 pass

13115 def getPtr(self):

13116 return <void_ptr>&self._pvt_ptr[0].updateData.param

13117 def __repr__(self):

13118 if self._pvt_ptr is not NULL:

13119 str_list = []

13120 try:

13121 str_list += ['pValue : ' + hex(self.pValue)]

13122 except ValueError:

13123 str_list += ['pValue : <ValueError>']

13124 try:

13125 str_list += ['offset : ' + str(self.offset)]

13126 except ValueError:

13127 str_list += ['offset : <ValueError>']

13128 try:

13129 str_list += ['size : ' + str(self.size)]

13130 except ValueError:

13131 str_list += ['size : <ValueError>']

13132 return '\n'.join(str_list)

13133 else:

13134 return ''

13135 @property

13136 def pValue(self):

13137 return <void_ptr>self._pvt_ptr[0].updateData.param.pValue

13138 @pValue.setter

13139 def pValue(self, pValue):

13140 _cpValue = _HelperInputVoidPtr(pValue)

13141 self._pvt_ptr[0].updateData.param.pValue = <void*><void_ptr>_cpValue.cptr

13142 @property

13143 def offset(self):

13144 return self._pvt_ptr[0].updateData.param.offset

13145 @offset.setter

13146 def offset(self, size_t offset):

13147 self._pvt_ptr[0].updateData.param.offset = offset

13148 @property

13149 def size(self):

13150 return self._pvt_ptr[0].updateData.param.size

13151 @size.setter

13152 def size(self, size_t size):

13153 self._pvt_ptr[0].updateData.param.size = size

13154

13155cdef class anon_union8:

13156 """

13157 Attributes

13158 ----------

13159 gridDim : dim3

13160

13161 param : anon_struct16

13162

13163 isEnabled : unsigned int

13164

13165

13166 Methods

13167 -------

13168 getPtr()

13169 Get memory address of class instance

13170 """

13171 def __cinit__(self, void_ptr _ptr):

13172 self._pvt_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr

13173

13174 def __init__(self, void_ptr _ptr):

13175 pass

13176 self._gridDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].updateData.gridDim)

13177 self._param = anon_struct16(_ptr=<void_ptr>self._pvt_ptr)

13178 def __dealloc__(self):

13179 pass

13180 def getPtr(self):

13181 return <void_ptr>&self._pvt_ptr[0].updateData

13182 def __repr__(self):

13183 if self._pvt_ptr is not NULL:

13184 str_list = []

13185 try:

13186 str_list += ['gridDim :\n' + '\n'.join([' ' + line for line in str(self.gridDim).splitlines()])]

13187 except ValueError:

13188 str_list += ['gridDim : <ValueError>']

13189 try:

13190 str_list += ['param :\n' + '\n'.join([' ' + line for line in str(self.param).splitlines()])]

13191 except ValueError:

13192 str_list += ['param : <ValueError>']

13193 try:

13194 str_list += ['isEnabled : ' + str(self.isEnabled)]

13195 except ValueError:

13196 str_list += ['isEnabled : <ValueError>']

13197 return '\n'.join(str_list)

13198 else:

13199 return ''

13200 @property

13201 def gridDim(self):

13202 return self._gridDim

13203 @gridDim.setter

13204 def gridDim(self, gridDim not None : dim3):

13205 string.memcpy(&self._pvt_ptr[0].updateData.gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._pvt_ptr[0].updateData.gridDim))

13206 @property

13207 def param(self):

13208 return self._param

13209 @param.setter

13210 def param(self, param not None : anon_struct16):

13211 string.memcpy(&self._pvt_ptr[0].updateData.param, <cyruntime.anon_struct16*><void_ptr>param.getPtr(), sizeof(self._pvt_ptr[0].updateData.param))

13212 @property

13213 def isEnabled(self):

13214 return self._pvt_ptr[0].updateData.isEnabled

13215 @isEnabled.setter

13216 def isEnabled(self, unsigned int isEnabled):

13217 self._pvt_ptr[0].updateData.isEnabled = isEnabled

13218

13219cdef class cudaGraphKernelNodeUpdate:

13220 """

13221 Struct to specify a single node update to pass as part of a larger

13222 array to ::cudaGraphKernelNodeUpdatesApply

13223

13224 Attributes

13225 ----------

13226 node : cudaGraphDeviceNode_t

13227 Node to update

13228 field : cudaGraphKernelNodeField

13229 Which type of update to apply. Determines how updateData is

13230 interpreted

13231 updateData : anon_union8

13232 Update data to apply. Which field is used depends on field's value

13233

13234 Methods

13235 -------

13236 getPtr()

13237 Get memory address of class instance

13238 """

13239 def __cinit__(self, void_ptr _ptr = 0):

13240 if _ptr == 0:

13241 self._val_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>calloc(1, sizeof(cyruntime.cudaGraphKernelNodeUpdate))

13242 self._pvt_ptr = self._val_ptr

13243 else:

13244 self._pvt_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr

13245 def __init__(self, void_ptr _ptr = 0):

13246 pass

13247 self._node = cudaGraphDeviceNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].node)

13248 self._updateData = anon_union8(_ptr=<void_ptr>self._pvt_ptr)

13249 def __dealloc__(self):

13250 if self._val_ptr is not NULL:

13251 free(self._val_ptr)

13252 def getPtr(self):

13253 return <void_ptr>self._pvt_ptr

13254 def __repr__(self):

13255 if self._pvt_ptr is not NULL:

13256 str_list = []

13257 try:

13258 str_list += ['node : ' + str(self.node)]

13259 except ValueError:

13260 str_list += ['node : <ValueError>']

13261 try:

13262 str_list += ['field : ' + str(self.field)]

13263 except ValueError:

13264 str_list += ['field : <ValueError>']

13265 try:

13266 str_list += ['updateData :\n' + '\n'.join([' ' + line for line in str(self.updateData).splitlines()])]

13267 except ValueError:

13268 str_list += ['updateData : <ValueError>']

13269 return '\n'.join(str_list)

13270 else:

13271 return ''

13272 @property

13273 def node(self):

13274 return self._node

13275 @node.setter

13276 def node(self, node):

13277 cdef cyruntime.cudaGraphDeviceNode_t cynode

13278 if node is None:

13279 cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>0

13280 elif isinstance(node, (cudaGraphDeviceNode_t,)):

13281 pnode = int(node)

13282 cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pnode

13283 else:

13284 pnode = int(cudaGraphDeviceNode_t(node))

13285 cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pnode

13286 self._node._pvt_ptr[0] = cynode

13287 @property

13288 def field(self):

13289 if self._pvt_ptr[0].field not in _dict_cudaGraphKernelNodeField:

13290 return None

13291 return _dict_cudaGraphKernelNodeField[self._pvt_ptr[0].field]

13292 @field.setter

13293 def field(self, field not None : cudaGraphKernelNodeField):

13294 self._pvt_ptr[0].field = field.value

13295 @property

13296 def updateData(self):

13297 return self._updateData

13298 @updateData.setter

13299 def updateData(self, updateData not None : anon_union8):

13300 string.memcpy(&self._pvt_ptr[0].updateData, <cyruntime.anon_union8*><void_ptr>updateData.getPtr(), sizeof(self._pvt_ptr[0].updateData))

13301

13302cdef class cudaLaunchMemSyncDomainMap_st:

13303 """

13304 Memory Synchronization Domain map See cudaLaunchMemSyncDomain. By

13305 default, kernels are launched in domain 0. Kernel launched with

13306 cudaLaunchMemSyncDomainRemote will have a different domain ID. User

13307 may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for

13308 a specific stream / graph node / kernel launch. See

13309 cudaLaunchAttributeMemSyncDomainMap. Domain ID range is available

13310 through cudaDevAttrMemSyncDomainCount.

13311

13312 Attributes

13313 ----------

13314 default_ : bytes

13315 The default domain ID to use for designated kernels

13316 remote : bytes

13317 The remote domain ID to use for designated kernels

13318

13319 Methods

13320 -------

13321 getPtr()

13322 Get memory address of class instance

13323 """

13324 def __cinit__(self, void_ptr _ptr = 0):

13325 if _ptr == 0:

13326 self._pvt_ptr = &self._pvt_val

13327 else:

13328 self._pvt_ptr = <cyruntime.cudaLaunchMemSyncDomainMap_st *>_ptr

13329 def __init__(self, void_ptr _ptr = 0):

13330 pass

13331 def __dealloc__(self):

13332 pass

13333 def getPtr(self):

13334 return <void_ptr>self._pvt_ptr

13335 def __repr__(self):

13336 if self._pvt_ptr is not NULL:

13337 str_list = []

13338 try:

13339 str_list += ['default_ : ' + str(self.default_)]

13340 except ValueError:

13341 str_list += ['default_ : <ValueError>']

13342 try:

13343 str_list += ['remote : ' + str(self.remote)]

13344 except ValueError:

13345 str_list += ['remote : <ValueError>']

13346 return '\n'.join(str_list)

13347 else:

13348 return ''

13349 @property

13350 def default_(self):

13351 return self._pvt_ptr[0].default_

13352 @default_.setter

13353 def default_(self, unsigned char default_):

13354 self._pvt_ptr[0].default_ = default_

13355 @property

13356 def remote(self):

13357 return self._pvt_ptr[0].remote

13358 @remote.setter

13359 def remote(self, unsigned char remote):

13360 self._pvt_ptr[0].remote = remote

13361

13362cdef class anon_struct17:

13363 """

13364 Attributes

13365 ----------

13366 x : unsigned int

13367

13368 y : unsigned int

13369

13370 z : unsigned int

13371

13372

13373 Methods

13374 -------

13375 getPtr()

13376 Get memory address of class instance

13377 """

13378 def __cinit__(self, void_ptr _ptr):

13379 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr

13380

13381 def __init__(self, void_ptr _ptr):

13382 pass

13383 def __dealloc__(self):

13384 pass

13385 def getPtr(self):

13386 return <void_ptr>&self._pvt_ptr[0].clusterDim

13387 def __repr__(self):

13388 if self._pvt_ptr is not NULL:

13389 str_list = []

13390 try:

13391 str_list += ['x : ' + str(self.x)]

13392 except ValueError:

13393 str_list += ['x : <ValueError>']

13394 try:

13395 str_list += ['y : ' + str(self.y)]

13396 except ValueError:

13397 str_list += ['y : <ValueError>']

13398 try:

13399 str_list += ['z : ' + str(self.z)]

13400 except ValueError:

13401 str_list += ['z : <ValueError>']

13402 return '\n'.join(str_list)

13403 else:

13404 return ''

13405 @property

13406 def x(self):

13407 return self._pvt_ptr[0].clusterDim.x

13408 @x.setter

13409 def x(self, unsigned int x):

13410 self._pvt_ptr[0].clusterDim.x = x

13411 @property

13412 def y(self):

13413 return self._pvt_ptr[0].clusterDim.y

13414 @y.setter

13415 def y(self, unsigned int y):

13416 self._pvt_ptr[0].clusterDim.y = y

13417 @property

13418 def z(self):

13419 return self._pvt_ptr[0].clusterDim.z

13420 @z.setter

13421 def z(self, unsigned int z):

13422 self._pvt_ptr[0].clusterDim.z = z

13423

13424cdef class anon_struct18:

13425 """

13426 Attributes

13427 ----------

13428 event : cudaEvent_t

13429

13430 flags : int

13431

13432 triggerAtBlockStart : int

13433

13434

13435 Methods

13436 -------

13437 getPtr()

13438 Get memory address of class instance

13439 """

13440 def __cinit__(self, void_ptr _ptr):

13441 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr

13442

13443 def __init__(self, void_ptr _ptr):

13444 pass

13445 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].programmaticEvent.event)

13446 def __dealloc__(self):

13447 pass

13448 def getPtr(self):

13449 return <void_ptr>&self._pvt_ptr[0].programmaticEvent

13450 def __repr__(self):

13451 if self._pvt_ptr is not NULL:

13452 str_list = []

13453 try:

13454 str_list += ['event : ' + str(self.event)]

13455 except ValueError:

13456 str_list += ['event : <ValueError>']

13457 try:

13458 str_list += ['flags : ' + str(self.flags)]

13459 except ValueError:

13460 str_list += ['flags : <ValueError>']

13461 try:

13462 str_list += ['triggerAtBlockStart : ' + str(self.triggerAtBlockStart)]

13463 except ValueError:

13464 str_list += ['triggerAtBlockStart : <ValueError>']

13465 return '\n'.join(str_list)

13466 else:

13467 return ''

13468 @property

13469 def event(self):

13470 return self._event

13471 @event.setter

13472 def event(self, event):

13473 cdef cyruntime.cudaEvent_t cyevent

13474 if event is None:

13475 cyevent = <cyruntime.cudaEvent_t><void_ptr>0

13476 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

13477 pevent = int(event)

13478 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

13479 else:

13480 pevent = int(cudaEvent_t(event))

13481 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

13482 self._event._pvt_ptr[0] = cyevent

13483 @property

13484 def flags(self):

13485 return self._pvt_ptr[0].programmaticEvent.flags

13486 @flags.setter

13487 def flags(self, int flags):

13488 self._pvt_ptr[0].programmaticEvent.flags = flags

13489 @property

13490 def triggerAtBlockStart(self):

13491 return self._pvt_ptr[0].programmaticEvent.triggerAtBlockStart

13492 @triggerAtBlockStart.setter

13493 def triggerAtBlockStart(self, int triggerAtBlockStart):

13494 self._pvt_ptr[0].programmaticEvent.triggerAtBlockStart = triggerAtBlockStart

13495

13496cdef class anon_struct19:

13497 """

13498 Attributes

13499 ----------

13500 x : unsigned int

13501

13502 y : unsigned int

13503

13504 z : unsigned int

13505

13506

13507 Methods

13508 -------

13509 getPtr()

13510 Get memory address of class instance

13511 """

13512 def __cinit__(self, void_ptr _ptr):

13513 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr

13514

13515 def __init__(self, void_ptr _ptr):

13516 pass

13517 def __dealloc__(self):

13518 pass

13519 def getPtr(self):

13520 return <void_ptr>&self._pvt_ptr[0].preferredClusterDim

13521 def __repr__(self):

13522 if self._pvt_ptr is not NULL:

13523 str_list = []

13524 try:

13525 str_list += ['x : ' + str(self.x)]

13526 except ValueError:

13527 str_list += ['x : <ValueError>']

13528 try:

13529 str_list += ['y : ' + str(self.y)]

13530 except ValueError:

13531 str_list += ['y : <ValueError>']

13532 try:

13533 str_list += ['z : ' + str(self.z)]

13534 except ValueError:

13535 str_list += ['z : <ValueError>']

13536 return '\n'.join(str_list)

13537 else:

13538 return ''

13539 @property

13540 def x(self):

13541 return self._pvt_ptr[0].preferredClusterDim.x

13542 @x.setter

13543 def x(self, unsigned int x):

13544 self._pvt_ptr[0].preferredClusterDim.x = x

13545 @property

13546 def y(self):

13547 return self._pvt_ptr[0].preferredClusterDim.y

13548 @y.setter

13549 def y(self, unsigned int y):

13550 self._pvt_ptr[0].preferredClusterDim.y = y

13551 @property

13552 def z(self):

13553 return self._pvt_ptr[0].preferredClusterDim.z

13554 @z.setter

13555 def z(self, unsigned int z):

13556 self._pvt_ptr[0].preferredClusterDim.z = z

13557

13558cdef class anon_struct20:

13559 """

13560 Attributes

13561 ----------

13562 event : cudaEvent_t

13563

13564 flags : int

13565

13566

13567 Methods

13568 -------

13569 getPtr()

13570 Get memory address of class instance

13571 """

13572 def __cinit__(self, void_ptr _ptr):

13573 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr

13574

13575 def __init__(self, void_ptr _ptr):

13576 pass

13577 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].launchCompletionEvent.event)

13578 def __dealloc__(self):

13579 pass

13580 def getPtr(self):

13581 return <void_ptr>&self._pvt_ptr[0].launchCompletionEvent

13582 def __repr__(self):

13583 if self._pvt_ptr is not NULL:

13584 str_list = []

13585 try:

13586 str_list += ['event : ' + str(self.event)]

13587 except ValueError:

13588 str_list += ['event : <ValueError>']

13589 try:

13590 str_list += ['flags : ' + str(self.flags)]

13591 except ValueError:

13592 str_list += ['flags : <ValueError>']

13593 return '\n'.join(str_list)

13594 else:

13595 return ''

13596 @property

13597 def event(self):

13598 return self._event

13599 @event.setter

13600 def event(self, event):

13601 cdef cyruntime.cudaEvent_t cyevent

13602 if event is None:

13603 cyevent = <cyruntime.cudaEvent_t><void_ptr>0

13604 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

13605 pevent = int(event)

13606 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

13607 else:

13608 pevent = int(cudaEvent_t(event))

13609 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

13610 self._event._pvt_ptr[0] = cyevent

13611 @property

13612 def flags(self):

13613 return self._pvt_ptr[0].launchCompletionEvent.flags

13614 @flags.setter

13615 def flags(self, int flags):

13616 self._pvt_ptr[0].launchCompletionEvent.flags = flags

13617

13618cdef class anon_struct21:

13619 """

13620 Attributes

13621 ----------

13622 deviceUpdatable : int

13623

13624 devNode : cudaGraphDeviceNode_t

13625

13626

13627 Methods

13628 -------

13629 getPtr()

13630 Get memory address of class instance

13631 """

13632 def __cinit__(self, void_ptr _ptr):

13633 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr

13634

13635 def __init__(self, void_ptr _ptr):

13636 pass

13637 self._devNode = cudaGraphDeviceNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].deviceUpdatableKernelNode.devNode)

13638 def __dealloc__(self):

13639 pass

13640 def getPtr(self):

13641 return <void_ptr>&self._pvt_ptr[0].deviceUpdatableKernelNode

13642 def __repr__(self):

13643 if self._pvt_ptr is not NULL:

13644 str_list = []

13645 try:

13646 str_list += ['deviceUpdatable : ' + str(self.deviceUpdatable)]

13647 except ValueError:

13648 str_list += ['deviceUpdatable : <ValueError>']

13649 try:

13650 str_list += ['devNode : ' + str(self.devNode)]

13651 except ValueError:

13652 str_list += ['devNode : <ValueError>']

13653 return '\n'.join(str_list)

13654 else:

13655 return ''

13656 @property

13657 def deviceUpdatable(self):

13658 return self._pvt_ptr[0].deviceUpdatableKernelNode.deviceUpdatable

13659 @deviceUpdatable.setter

13660 def deviceUpdatable(self, int deviceUpdatable):

13661 self._pvt_ptr[0].deviceUpdatableKernelNode.deviceUpdatable = deviceUpdatable

13662 @property

13663 def devNode(self):

13664 return self._devNode

13665 @devNode.setter

13666 def devNode(self, devNode):

13667 cdef cyruntime.cudaGraphDeviceNode_t cydevNode

13668 if devNode is None:

13669 cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>0

13670 elif isinstance(devNode, (cudaGraphDeviceNode_t,)):

13671 pdevNode = int(devNode)

13672 cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pdevNode

13673 else:

13674 pdevNode = int(cudaGraphDeviceNode_t(devNode))

13675 cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pdevNode

13676 self._devNode._pvt_ptr[0] = cydevNode

13677

13678cdef class cudaLaunchAttributeValue:

13679 """

13680 Launch attributes union; used as value field of

13681 ::cudaLaunchAttribute

13682

13683 Attributes

13684 ----------

13685 pad : bytes

13686

13687 accessPolicyWindow : cudaAccessPolicyWindow

13688 Value of launch attribute cudaLaunchAttributeAccessPolicyWindow.

13689 cooperative : int

13690 Value of launch attribute cudaLaunchAttributeCooperative. Nonzero

13691 indicates a cooperative kernel (see cudaLaunchCooperativeKernel).

13692 syncPolicy : cudaSynchronizationPolicy

13693 Value of launch attribute cudaLaunchAttributeSynchronizationPolicy.

13694 ::cudaSynchronizationPolicy for work queued up in this stream.

13695 clusterDim : anon_struct17

13696 Value of launch attribute cudaLaunchAttributeClusterDimension that

13697 represents the desired cluster dimensions for the kernel. Opaque

13698 type with the following fields: - `x` - The X dimension of the

13699 cluster, in blocks. Must be a divisor of the grid X dimension. -

13700 `y` - The Y dimension of the cluster, in blocks. Must be a divisor

13701 of the grid Y dimension. - `z` - The Z dimension of the cluster,

13702 in blocks. Must be a divisor of the grid Z dimension.

13703 clusterSchedulingPolicyPreference : cudaClusterSchedulingPolicy

13704 Value of launch attribute

13705 cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster

13706 scheduling policy preference for the kernel.

13707 programmaticStreamSerializationAllowed : int

13708 Value of launch attribute

13709 cudaLaunchAttributeProgrammaticStreamSerialization.

13710 programmaticEvent : anon_struct18

13711 Value of launch attribute cudaLaunchAttributeProgrammaticEvent with

13712 the following fields: - `cudaEvent_t` event - Event to fire when

13713 all blocks trigger it. - `int` flags; - Event record flags, see

13714 cudaEventRecordWithFlags. Does not accept cudaEventRecordExternal.

13715 - `int` triggerAtBlockStart - If this is set to non-0, each block

13716 launch will automatically trigger the event.

13717 priority : int

13718 Value of launch attribute cudaLaunchAttributePriority. Execution

13719 priority of the kernel.

13720 memSyncDomainMap : cudaLaunchMemSyncDomainMap

13721 Value of launch attribute cudaLaunchAttributeMemSyncDomainMap. See

13722 ::cudaLaunchMemSyncDomainMap.

13723 memSyncDomain : cudaLaunchMemSyncDomain

13724 Value of launch attribute cudaLaunchAttributeMemSyncDomain. See

13725 cudaLaunchMemSyncDomain.

13726 preferredClusterDim : anon_struct19

13727 Value of launch attribute

13728 cudaLaunchAttributePreferredClusterDimension that represents the

13729 desired preferred cluster dimensions for the kernel. Opaque type

13730 with the following fields: - `x` - The X dimension of the preferred

13731 cluster, in blocks. Must be a divisor of the grid X dimension, and

13732 must be a multiple of the `x` field of

13733 cudaLaunchAttributeValue::clusterDim. - `y` - The Y dimension of

13734 the preferred cluster, in blocks. Must be a divisor of the grid Y

13735 dimension, and must be a multiple of the `y` field of

13736 cudaLaunchAttributeValue::clusterDim. - `z` - The Z dimension of

13737 the preferred cluster, in blocks. Must be equal to the `z` field of

13738 cudaLaunchAttributeValue::clusterDim.

13739 launchCompletionEvent : anon_struct20

13740 Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent

13741 with the following fields: - `cudaEvent_t` event - Event to fire

13742 when the last block launches. - `int` flags - Event record

13743 flags, see cudaEventRecordWithFlags. Does not accept

13744 cudaEventRecordExternal.

13745 deviceUpdatableKernelNode : anon_struct21

13746 Value of launch attribute

13747 cudaLaunchAttributeDeviceUpdatableKernelNode with the following

13748 fields: - `int` deviceUpdatable - Whether or not the resulting

13749 kernel node should be device-updatable. -

13750 `cudaGraphDeviceNode_t` devNode - Returns a handle to pass to the

13751 various device-side update functions.

13752 sharedMemCarveout : unsigned int

13753 Value of launch attribute

13754 cudaLaunchAttributePreferredSharedMemoryCarveout.

13755 nvlinkUtilCentricScheduling : unsigned int

13756 Value of launch attribute

13757 cudaLaunchAttributeNvlinkUtilCentricScheduling.

13758

13759 Methods

13760 -------

13761 getPtr()

13762 Get memory address of class instance

13763 """

13764 def __cinit__(self, void_ptr _ptr = 0):

13765 if _ptr == 0:

13766 self._pvt_ptr = &self._pvt_val

13767 else:

13768 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr

13769 def __init__(self, void_ptr _ptr = 0):

13770 pass

13771 self._accessPolicyWindow = cudaAccessPolicyWindow(_ptr=<void_ptr>&self._pvt_ptr[0].accessPolicyWindow)

13772 self._clusterDim = anon_struct17(_ptr=<void_ptr>self._pvt_ptr)

13773 self._programmaticEvent = anon_struct18(_ptr=<void_ptr>self._pvt_ptr)

13774 self._memSyncDomainMap = cudaLaunchMemSyncDomainMap(_ptr=<void_ptr>&self._pvt_ptr[0].memSyncDomainMap)

13775 self._preferredClusterDim = anon_struct19(_ptr=<void_ptr>self._pvt_ptr)

13776 self._launchCompletionEvent = anon_struct20(_ptr=<void_ptr>self._pvt_ptr)

13777 self._deviceUpdatableKernelNode = anon_struct21(_ptr=<void_ptr>self._pvt_ptr)

13778 def __dealloc__(self):

13779 pass

13780 def getPtr(self):

13781 return <void_ptr>self._pvt_ptr

13782 def __repr__(self):

13783 if self._pvt_ptr is not NULL:

13784 str_list = []

13785 try:

13786 str_list += ['pad : ' + str(self.pad)]

13787 except ValueError:

13788 str_list += ['pad : <ValueError>']

13789 try:

13790 str_list += ['accessPolicyWindow :\n' + '\n'.join([' ' + line for line in str(self.accessPolicyWindow).splitlines()])]

13791 except ValueError:

13792 str_list += ['accessPolicyWindow : <ValueError>']

13793 try:

13794 str_list += ['cooperative : ' + str(self.cooperative)]

13795 except ValueError:

13796 str_list += ['cooperative : <ValueError>']

13797 try:

13798 str_list += ['syncPolicy : ' + str(self.syncPolicy)]

13799 except ValueError:

13800 str_list += ['syncPolicy : <ValueError>']

13801 try:

13802 str_list += ['clusterDim :\n' + '\n'.join([' ' + line for line in str(self.clusterDim).splitlines()])]

13803 except ValueError:

13804 str_list += ['clusterDim : <ValueError>']

13805 try:

13806 str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]

13807 except ValueError:

13808 str_list += ['clusterSchedulingPolicyPreference : <ValueError>']

13809 try:

13810 str_list += ['programmaticStreamSerializationAllowed : ' + str(self.programmaticStreamSerializationAllowed)]

13811 except ValueError:

13812 str_list += ['programmaticStreamSerializationAllowed : <ValueError>']

13813 try:

13814 str_list += ['programmaticEvent :\n' + '\n'.join([' ' + line for line in str(self.programmaticEvent).splitlines()])]

13815 except ValueError:

13816 str_list += ['programmaticEvent : <ValueError>']

13817 try:

13818 str_list += ['priority : ' + str(self.priority)]

13819 except ValueError:

13820 str_list += ['priority : <ValueError>']

13821 try:

13822 str_list += ['memSyncDomainMap :\n' + '\n'.join([' ' + line for line in str(self.memSyncDomainMap).splitlines()])]

13823 except ValueError:

13824 str_list += ['memSyncDomainMap : <ValueError>']

13825 try:

13826 str_list += ['memSyncDomain : ' + str(self.memSyncDomain)]

13827 except ValueError:

13828 str_list += ['memSyncDomain : <ValueError>']

13829 try:

13830 str_list += ['preferredClusterDim :\n' + '\n'.join([' ' + line for line in str(self.preferredClusterDim).splitlines()])]

13831 except ValueError:

13832 str_list += ['preferredClusterDim : <ValueError>']

13833 try:

13834 str_list += ['launchCompletionEvent :\n' + '\n'.join([' ' + line for line in str(self.launchCompletionEvent).splitlines()])]

13835 except ValueError:

13836 str_list += ['launchCompletionEvent : <ValueError>']

13837 try:

13838 str_list += ['deviceUpdatableKernelNode :\n' + '\n'.join([' ' + line for line in str(self.deviceUpdatableKernelNode).splitlines()])]

13839 except ValueError:

13840 str_list += ['deviceUpdatableKernelNode : <ValueError>']

13841 try:

13842 str_list += ['sharedMemCarveout : ' + str(self.sharedMemCarveout)]

13843 except ValueError:

13844 str_list += ['sharedMemCarveout : <ValueError>']

13845 try:

13846 str_list += ['nvlinkUtilCentricScheduling : ' + str(self.nvlinkUtilCentricScheduling)]

13847 except ValueError:

13848 str_list += ['nvlinkUtilCentricScheduling : <ValueError>']

13849 return '\n'.join(str_list)

13850 else:

13851 return ''

13852 @property

13853 def pad(self):

13854 return PyBytes_FromStringAndSize(self._pvt_ptr[0].pad, 64)

13855 @pad.setter

13856 def pad(self, pad):

13857 if len(pad) != 64:

13858 raise ValueError("pad length must be 64, is " + str(len(pad)))

13859 if CHAR_MIN == 0:

13860 for i, b in enumerate(pad):

13861 if b < 0 and b > -129:

13862 b = b + 256

13863 self._pvt_ptr[0].pad[i] = b

13864 else:

13865 for i, b in enumerate(pad):

13866 if b > 127 and b < 256:

13867 b = b - 256

13868 self._pvt_ptr[0].pad[i] = b

13869 @property

13870 def accessPolicyWindow(self):

13871 return self._accessPolicyWindow

13872 @accessPolicyWindow.setter

13873 def accessPolicyWindow(self, accessPolicyWindow not None : cudaAccessPolicyWindow):

13874 string.memcpy(&self._pvt_ptr[0].accessPolicyWindow, <cyruntime.cudaAccessPolicyWindow*><void_ptr>accessPolicyWindow.getPtr(), sizeof(self._pvt_ptr[0].accessPolicyWindow))

13875 @property

13876 def cooperative(self):

13877 return self._pvt_ptr[0].cooperative

13878 @cooperative.setter

13879 def cooperative(self, int cooperative):

13880 self._pvt_ptr[0].cooperative = cooperative

13881 @property

13882 def syncPolicy(self):

13883 if self._pvt_ptr[0].syncPolicy not in _dict_cudaSynchronizationPolicy:

13884 return None

13885 return _dict_cudaSynchronizationPolicy[self._pvt_ptr[0].syncPolicy]

13886 @syncPolicy.setter

13887 def syncPolicy(self, syncPolicy not None : cudaSynchronizationPolicy):

13888 self._pvt_ptr[0].syncPolicy = syncPolicy.value

13889 @property

13890 def clusterDim(self):

13891 return self._clusterDim

13892 @clusterDim.setter

13893 def clusterDim(self, clusterDim not None : anon_struct17):

13894 string.memcpy(&self._pvt_ptr[0].clusterDim, <cyruntime.anon_struct17*><void_ptr>clusterDim.getPtr(), sizeof(self._pvt_ptr[0].clusterDim))

13895 @property

13896 def clusterSchedulingPolicyPreference(self):

13897 if self._pvt_ptr[0].clusterSchedulingPolicyPreference not in _dict_cudaClusterSchedulingPolicy:

13898 return None

13899 return _dict_cudaClusterSchedulingPolicy[self._pvt_ptr[0].clusterSchedulingPolicyPreference]

13900 @clusterSchedulingPolicyPreference.setter

13901 def clusterSchedulingPolicyPreference(self, clusterSchedulingPolicyPreference not None : cudaClusterSchedulingPolicy):

13902 self._pvt_ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference.value

13903 @property

13904 def programmaticStreamSerializationAllowed(self):

13905 return self._pvt_ptr[0].programmaticStreamSerializationAllowed

13906 @programmaticStreamSerializationAllowed.setter

13907 def programmaticStreamSerializationAllowed(self, int programmaticStreamSerializationAllowed):

13908 self._pvt_ptr[0].programmaticStreamSerializationAllowed = programmaticStreamSerializationAllowed

13909 @property

13910 def programmaticEvent(self):

13911 return self._programmaticEvent

13912 @programmaticEvent.setter

13913 def programmaticEvent(self, programmaticEvent not None : anon_struct18):

13914 string.memcpy(&self._pvt_ptr[0].programmaticEvent, <cyruntime.anon_struct18*><void_ptr>programmaticEvent.getPtr(), sizeof(self._pvt_ptr[0].programmaticEvent))

13915 @property

13916 def priority(self):

13917 return self._pvt_ptr[0].priority

13918 @priority.setter

13919 def priority(self, int priority):

13920 self._pvt_ptr[0].priority = priority

13921 @property

13922 def memSyncDomainMap(self):

13923 return self._memSyncDomainMap

13924 @memSyncDomainMap.setter

13925 def memSyncDomainMap(self, memSyncDomainMap not None : cudaLaunchMemSyncDomainMap):

13926 string.memcpy(&self._pvt_ptr[0].memSyncDomainMap, <cyruntime.cudaLaunchMemSyncDomainMap*><void_ptr>memSyncDomainMap.getPtr(), sizeof(self._pvt_ptr[0].memSyncDomainMap))

13927 @property

13928 def memSyncDomain(self):

13929 if self._pvt_ptr[0].memSyncDomain not in _dict_cudaLaunchMemSyncDomain:

13930 return None

13931 return _dict_cudaLaunchMemSyncDomain[self._pvt_ptr[0].memSyncDomain]

13932 @memSyncDomain.setter

13933 def memSyncDomain(self, memSyncDomain not None : cudaLaunchMemSyncDomain):

13934 self._pvt_ptr[0].memSyncDomain = memSyncDomain.value

13935 @property

13936 def preferredClusterDim(self):

13937 return self._preferredClusterDim

13938 @preferredClusterDim.setter

13939 def preferredClusterDim(self, preferredClusterDim not None : anon_struct19):

13940 string.memcpy(&self._pvt_ptr[0].preferredClusterDim, <cyruntime.anon_struct19*><void_ptr>preferredClusterDim.getPtr(), sizeof(self._pvt_ptr[0].preferredClusterDim))

13941 @property

13942 def launchCompletionEvent(self):

13943 return self._launchCompletionEvent

13944 @launchCompletionEvent.setter

13945 def launchCompletionEvent(self, launchCompletionEvent not None : anon_struct20):

13946 string.memcpy(&self._pvt_ptr[0].launchCompletionEvent, <cyruntime.anon_struct20*><void_ptr>launchCompletionEvent.getPtr(), sizeof(self._pvt_ptr[0].launchCompletionEvent))

13947 @property

13948 def deviceUpdatableKernelNode(self):

13949 return self._deviceUpdatableKernelNode

13950 @deviceUpdatableKernelNode.setter

13951 def deviceUpdatableKernelNode(self, deviceUpdatableKernelNode not None : anon_struct21):

13952 string.memcpy(&self._pvt_ptr[0].deviceUpdatableKernelNode, <cyruntime.anon_struct21*><void_ptr>deviceUpdatableKernelNode.getPtr(), sizeof(self._pvt_ptr[0].deviceUpdatableKernelNode))

13953 @property

13954 def sharedMemCarveout(self):

13955 return self._pvt_ptr[0].sharedMemCarveout

13956 @sharedMemCarveout.setter

13957 def sharedMemCarveout(self, unsigned int sharedMemCarveout):

13958 self._pvt_ptr[0].sharedMemCarveout = sharedMemCarveout

13959 @property

13960 def nvlinkUtilCentricScheduling(self):

13961 return self._pvt_ptr[0].nvlinkUtilCentricScheduling

13962 @nvlinkUtilCentricScheduling.setter

13963 def nvlinkUtilCentricScheduling(self, unsigned int nvlinkUtilCentricScheduling):

13964 self._pvt_ptr[0].nvlinkUtilCentricScheduling = nvlinkUtilCentricScheduling

13965

13966cdef class cudaLaunchAttribute_st:

13967 """

13968 Launch attribute

13969

13970 Attributes

13971 ----------

13972 id : cudaLaunchAttributeID

13973 Attribute to set

13974 val : cudaLaunchAttributeValue

13975 Value of the attribute

13976

13977 Methods

13978 -------

13979 getPtr()

13980 Get memory address of class instance

13981 """

13982 def __cinit__(self, void_ptr _ptr = 0):

13983 if _ptr == 0:

13984 self._pvt_ptr = &self._pvt_val

13985 else:

13986 self._pvt_ptr = <cyruntime.cudaLaunchAttribute_st *>_ptr

13987 def __init__(self, void_ptr _ptr = 0):

13988 pass

13989 self._val = cudaLaunchAttributeValue(_ptr=<void_ptr>&self._pvt_ptr[0].val)

13990 def __dealloc__(self):

13991 pass

13992 def getPtr(self):

13993 return <void_ptr>self._pvt_ptr

13994 def __repr__(self):

13995 if self._pvt_ptr is not NULL:

13996 str_list = []

13997 try:

13998 str_list += ['id : ' + str(self.id)]

13999 except ValueError:

14000 str_list += ['id : <ValueError>']

14001 try:

14002 str_list += ['val :\n' + '\n'.join([' ' + line for line in str(self.val).splitlines()])]

14003 except ValueError:

14004 str_list += ['val : <ValueError>']

14005 return '\n'.join(str_list)

14006 else:

14007 return ''

14008 @property

14009 def id(self):

14010 if self._pvt_ptr[0].id not in _dict_cudaLaunchAttributeID:

14011 return None

14012 return _dict_cudaLaunchAttributeID[self._pvt_ptr[0].id]

14013 @id.setter

14014 def id(self, id not None : cudaLaunchAttributeID):

14015 self._pvt_ptr[0].id = id.value

14016 @property

14017 def val(self):

14018 return self._val

14019 @val.setter

14020 def val(self, val not None : cudaLaunchAttributeValue):

14021 string.memcpy(&self._pvt_ptr[0].val, <cyruntime.cudaLaunchAttributeValue*><void_ptr>val.getPtr(), sizeof(self._pvt_ptr[0].val))

14022

14023cdef class anon_struct22:

14024 """

14025 Attributes

14026 ----------

14027 bytesOverBudget : unsigned long long

14028

14029

14030 Methods

14031 -------

14032 getPtr()

14033 Get memory address of class instance

14034 """

14035 def __cinit__(self, void_ptr _ptr):

14036 self._pvt_ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr

14037

14038 def __init__(self, void_ptr _ptr):

14039 pass

14040 def __dealloc__(self):

14041 pass

14042 def getPtr(self):

14043 return <void_ptr>&self._pvt_ptr[0].info.overBudget

14044 def __repr__(self):

14045 if self._pvt_ptr is not NULL:

14046 str_list = []

14047 try:

14048 str_list += ['bytesOverBudget : ' + str(self.bytesOverBudget)]

14049 except ValueError:

14050 str_list += ['bytesOverBudget : <ValueError>']

14051 return '\n'.join(str_list)

14052 else:

14053 return ''

14054 @property

14055 def bytesOverBudget(self):

14056 return self._pvt_ptr[0].info.overBudget.bytesOverBudget

14057 @bytesOverBudget.setter

14058 def bytesOverBudget(self, unsigned long long bytesOverBudget):

14059 self._pvt_ptr[0].info.overBudget.bytesOverBudget = bytesOverBudget

14060

14061cdef class anon_union9:

14062 """

14063 Attributes

14064 ----------

14065 overBudget : anon_struct22

14066

14067

14068 Methods

14069 -------

14070 getPtr()

14071 Get memory address of class instance

14072 """

14073 def __cinit__(self, void_ptr _ptr):

14074 self._pvt_ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr

14075

14076 def __init__(self, void_ptr _ptr):

14077 pass

14078 self._overBudget = anon_struct22(_ptr=<void_ptr>self._pvt_ptr)

14079 def __dealloc__(self):

14080 pass

14081 def getPtr(self):

14082 return <void_ptr>&self._pvt_ptr[0].info

14083 def __repr__(self):

14084 if self._pvt_ptr is not NULL:

14085 str_list = []

14086 try:

14087 str_list += ['overBudget :\n' + '\n'.join([' ' + line for line in str(self.overBudget).splitlines()])]

14088 except ValueError:

14089 str_list += ['overBudget : <ValueError>']

14090 return '\n'.join(str_list)

14091 else:

14092 return ''

14093 @property

14094 def overBudget(self):

14095 return self._overBudget

14096 @overBudget.setter

14097 def overBudget(self, overBudget not None : anon_struct22):

14098 string.memcpy(&self._pvt_ptr[0].info.overBudget, <cyruntime.anon_struct22*><void_ptr>overBudget.getPtr(), sizeof(self._pvt_ptr[0].info.overBudget))

14099

14100cdef class cudaAsyncNotificationInfo:

14101 """

14102 Information describing an async notification event

14103

14104 Attributes

14105 ----------

14106 type : cudaAsyncNotificationType

14107 The type of notification being sent

14108 info : anon_union9

14109 Information about the notification. `typename` must be checked in

14110 order to interpret this field.

14111

14112 Methods

14113 -------

14114 getPtr()

14115 Get memory address of class instance

14116 """

14117 def __cinit__(self, void_ptr _ptr = 0):

14118 if _ptr == 0:

14119 self._val_ptr = <cyruntime.cudaAsyncNotificationInfo *>calloc(1, sizeof(cyruntime.cudaAsyncNotificationInfo))

14120 self._pvt_ptr = self._val_ptr

14121 else:

14122 self._pvt_ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr

14123 def __init__(self, void_ptr _ptr = 0):

14124 pass

14125 self._info = anon_union9(_ptr=<void_ptr>self._pvt_ptr)

14126 def __dealloc__(self):

14127 if self._val_ptr is not NULL:

14128 free(self._val_ptr)

14129 def getPtr(self):

14130 return <void_ptr>self._pvt_ptr

14131 def __repr__(self):

14132 if self._pvt_ptr is not NULL:

14133 str_list = []

14134 try:

14135 str_list += ['type : ' + str(self.type)]

14136 except ValueError:

14137 str_list += ['type : <ValueError>']

14138 try:

14139 str_list += ['info :\n' + '\n'.join([' ' + line for line in str(self.info).splitlines()])]

14140 except ValueError:

14141 str_list += ['info : <ValueError>']

14142 return '\n'.join(str_list)

14143 else:

14144 return ''

14145 @property

14146 def type(self):

14147 if self._pvt_ptr[0].type not in _dict_cudaAsyncNotificationType:

14148 return None

14149 return _dict_cudaAsyncNotificationType[self._pvt_ptr[0].type]

14150 @type.setter

14151 def type(self, type not None : cudaAsyncNotificationType):

14152 self._pvt_ptr[0].type = type.value

14153 @property

14154 def info(self):

14155 return self._info

14156 @info.setter

14157 def info(self, info not None : anon_union9):

14158 string.memcpy(&self._pvt_ptr[0].info, <cyruntime.anon_union9*><void_ptr>info.getPtr(), sizeof(self._pvt_ptr[0].info))

14159

14160cdef class cudaTextureDesc:

14161 """

14162 CUDA texture descriptor

14163

14164 Attributes

14165 ----------

14166 addressMode : list[cudaTextureAddressMode]

14167 Texture address mode for up to 3 dimensions

14168 filterMode : cudaTextureFilterMode

14169 Texture filter mode

14170 readMode : cudaTextureReadMode

14171 Texture read mode

14172 sRGB : int

14173 Perform sRGB->linear conversion during texture read

14174 borderColor : list[float]

14175 Texture Border Color

14176 normalizedCoords : int

14177 Indicates whether texture reads are normalized or not

14178 maxAnisotropy : unsigned int

14179 Limit to the anisotropy ratio

14180 mipmapFilterMode : cudaTextureFilterMode

14181 Mipmap filter mode

14182 mipmapLevelBias : float

14183 Offset applied to the supplied mipmap level

14184 minMipmapLevelClamp : float

14185 Lower end of the mipmap level range to clamp access to

14186 maxMipmapLevelClamp : float

14187 Upper end of the mipmap level range to clamp access to

14188 disableTrilinearOptimization : int

14189 Disable any trilinear filtering optimizations.

14190 seamlessCubemap : int

14191 Enable seamless cube map filtering.

14192

14193 Methods

14194 -------

14195 getPtr()

14196 Get memory address of class instance

14197 """

14198 def __cinit__(self, void_ptr _ptr = 0):

14199 if _ptr == 0:

14200 self._pvt_ptr = &self._pvt_val

14201 else:

14202 self._pvt_ptr = <cyruntime.cudaTextureDesc *>_ptr

14203 def __init__(self, void_ptr _ptr = 0):

14204 pass

14205 def __dealloc__(self):

14206 pass

14207 def getPtr(self):

14208 return <void_ptr>self._pvt_ptr

14209 def __repr__(self):

14210 if self._pvt_ptr is not NULL:

14211 str_list = []

14212 try:

14213 str_list += ['addressMode : ' + str(self.addressMode)]

14214 except ValueError:

14215 str_list += ['addressMode : <ValueError>']

14216 try:

14217 str_list += ['filterMode : ' + str(self.filterMode)]

14218 except ValueError:

14219 str_list += ['filterMode : <ValueError>']

14220 try:

14221 str_list += ['readMode : ' + str(self.readMode)]

14222 except ValueError:

14223 str_list += ['readMode : <ValueError>']

14224 try:

14225 str_list += ['sRGB : ' + str(self.sRGB)]

14226 except ValueError:

14227 str_list += ['sRGB : <ValueError>']

14228 try:

14229 str_list += ['borderColor : ' + str(self.borderColor)]

14230 except ValueError:

14231 str_list += ['borderColor : <ValueError>']

14232 try:

14233 str_list += ['normalizedCoords : ' + str(self.normalizedCoords)]

14234 except ValueError:

14235 str_list += ['normalizedCoords : <ValueError>']

14236 try:

14237 str_list += ['maxAnisotropy : ' + str(self.maxAnisotropy)]

14238 except ValueError:

14239 str_list += ['maxAnisotropy : <ValueError>']

14240 try:

14241 str_list += ['mipmapFilterMode : ' + str(self.mipmapFilterMode)]

14242 except ValueError:

14243 str_list += ['mipmapFilterMode : <ValueError>']

14244 try:

14245 str_list += ['mipmapLevelBias : ' + str(self.mipmapLevelBias)]

14246 except ValueError:

14247 str_list += ['mipmapLevelBias : <ValueError>']

14248 try:

14249 str_list += ['minMipmapLevelClamp : ' + str(self.minMipmapLevelClamp)]

14250 except ValueError:

14251 str_list += ['minMipmapLevelClamp : <ValueError>']

14252 try:

14253 str_list += ['maxMipmapLevelClamp : ' + str(self.maxMipmapLevelClamp)]

14254 except ValueError:

14255 str_list += ['maxMipmapLevelClamp : <ValueError>']

14256 try:

14257 str_list += ['disableTrilinearOptimization : ' + str(self.disableTrilinearOptimization)]

14258 except ValueError:

14259 str_list += ['disableTrilinearOptimization : <ValueError>']

14260 try:

14261 str_list += ['seamlessCubemap : ' + str(self.seamlessCubemap)]

14262 except ValueError:

14263 str_list += ['seamlessCubemap : <ValueError>']

14264 return '\n'.join(str_list)

14265 else:

14266 return ''

14267 @property

14268 def addressMode(self):

14269 return [_dict_cudaTextureAddressMode[_x] if _x in _dict_cudaTextureAddressMode else None for _x in list(self._pvt_ptr[0].addressMode)]

14270 @addressMode.setter

14271 def addressMode(self, addressMode):

14272 self._pvt_ptr[0].addressMode = [_x.value for _x in addressMode]

14273 @property

14274 def filterMode(self):

14275 if self._pvt_ptr[0].filterMode not in _dict_cudaTextureFilterMode:

14276 return None

14277 return _dict_cudaTextureFilterMode[self._pvt_ptr[0].filterMode]

14278 @filterMode.setter

14279 def filterMode(self, filterMode not None : cudaTextureFilterMode):

14280 self._pvt_ptr[0].filterMode = filterMode.value

14281 @property

14282 def readMode(self):

14283 if self._pvt_ptr[0].readMode not in _dict_cudaTextureReadMode:

14284 return None

14285 return _dict_cudaTextureReadMode[self._pvt_ptr[0].readMode]

14286 @readMode.setter

14287 def readMode(self, readMode not None : cudaTextureReadMode):

14288 self._pvt_ptr[0].readMode = readMode.value

14289 @property

14290 def sRGB(self):

14291 return self._pvt_ptr[0].sRGB

14292 @sRGB.setter

14293 def sRGB(self, int sRGB):

14294 self._pvt_ptr[0].sRGB = sRGB

14295 @property

14296 def borderColor(self):

14297 return self._pvt_ptr[0].borderColor

14298 @borderColor.setter

14299 def borderColor(self, borderColor):

14300 self._pvt_ptr[0].borderColor = borderColor

14301 @property

14302 def normalizedCoords(self):

14303 return self._pvt_ptr[0].normalizedCoords

14304 @normalizedCoords.setter

14305 def normalizedCoords(self, int normalizedCoords):

14306 self._pvt_ptr[0].normalizedCoords = normalizedCoords

14307 @property

14308 def maxAnisotropy(self):

14309 return self._pvt_ptr[0].maxAnisotropy

14310 @maxAnisotropy.setter

14311 def maxAnisotropy(self, unsigned int maxAnisotropy):

14312 self._pvt_ptr[0].maxAnisotropy = maxAnisotropy

14313 @property

14314 def mipmapFilterMode(self):

14315 if self._pvt_ptr[0].mipmapFilterMode not in _dict_cudaTextureFilterMode:

14316 return None

14317 return _dict_cudaTextureFilterMode[self._pvt_ptr[0].mipmapFilterMode]

14318 @mipmapFilterMode.setter

14319 def mipmapFilterMode(self, mipmapFilterMode not None : cudaTextureFilterMode):

14320 self._pvt_ptr[0].mipmapFilterMode = mipmapFilterMode.value

14321 @property

14322 def mipmapLevelBias(self):

14323 return self._pvt_ptr[0].mipmapLevelBias

14324 @mipmapLevelBias.setter

14325 def mipmapLevelBias(self, float mipmapLevelBias):

14326 self._pvt_ptr[0].mipmapLevelBias = mipmapLevelBias

14327 @property

14328 def minMipmapLevelClamp(self):

14329 return self._pvt_ptr[0].minMipmapLevelClamp

14330 @minMipmapLevelClamp.setter

14331 def minMipmapLevelClamp(self, float minMipmapLevelClamp):

14332 self._pvt_ptr[0].minMipmapLevelClamp = minMipmapLevelClamp

14333 @property

14334 def maxMipmapLevelClamp(self):

14335 return self._pvt_ptr[0].maxMipmapLevelClamp

14336 @maxMipmapLevelClamp.setter

14337 def maxMipmapLevelClamp(self, float maxMipmapLevelClamp):

14338 self._pvt_ptr[0].maxMipmapLevelClamp = maxMipmapLevelClamp

14339 @property

14340 def disableTrilinearOptimization(self):

14341 return self._pvt_ptr[0].disableTrilinearOptimization

14342 @disableTrilinearOptimization.setter

14343 def disableTrilinearOptimization(self, int disableTrilinearOptimization):

14344 self._pvt_ptr[0].disableTrilinearOptimization = disableTrilinearOptimization

14345 @property

14346 def seamlessCubemap(self):

14347 return self._pvt_ptr[0].seamlessCubemap

14348 @seamlessCubemap.setter

14349 def seamlessCubemap(self, int seamlessCubemap):

14350 self._pvt_ptr[0].seamlessCubemap = seamlessCubemap

14351

14352cdef class cudaEglPlaneDesc_st:

14353 """

14354 CUDA EGL Plane Descriptor - structure defining each plane of a CUDA

14355 EGLFrame

14356

14357 Attributes

14358 ----------

14359 width : unsigned int

14360 Width of plane

14361 height : unsigned int

14362 Height of plane

14363 depth : unsigned int

14364 Depth of plane

14365 pitch : unsigned int

14366 Pitch of plane

14367 numChannels : unsigned int

14368 Number of channels for the plane

14369 channelDesc : cudaChannelFormatDesc

14370 Channel Format Descriptor

14371 reserved : list[unsigned int]

14372 Reserved for future use

14373

14374 Methods

14375 -------

14376 getPtr()

14377 Get memory address of class instance

14378 """

14379 def __cinit__(self, void_ptr _ptr = 0):

14380 if _ptr == 0:

14381 self._pvt_ptr = &self._pvt_val

14382 else:

14383 self._pvt_ptr = <cyruntime.cudaEglPlaneDesc_st *>_ptr

14384 def __init__(self, void_ptr _ptr = 0):

14385 pass

14386 self._channelDesc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].channelDesc)

14387 def __dealloc__(self):

14388 pass

14389 def getPtr(self):

14390 return <void_ptr>self._pvt_ptr

14391 def __repr__(self):

14392 if self._pvt_ptr is not NULL:

14393 str_list = []

14394 try:

14395 str_list += ['width : ' + str(self.width)]

14396 except ValueError:

14397 str_list += ['width : <ValueError>']

14398 try:

14399 str_list += ['height : ' + str(self.height)]

14400 except ValueError:

14401 str_list += ['height : <ValueError>']

14402 try:

14403 str_list += ['depth : ' + str(self.depth)]

14404 except ValueError:

14405 str_list += ['depth : <ValueError>']

14406 try:

14407 str_list += ['pitch : ' + str(self.pitch)]

14408 except ValueError:

14409 str_list += ['pitch : <ValueError>']

14410 try:

14411 str_list += ['numChannels : ' + str(self.numChannels)]

14412 except ValueError:

14413 str_list += ['numChannels : <ValueError>']

14414 try:

14415 str_list += ['channelDesc :\n' + '\n'.join([' ' + line for line in str(self.channelDesc).splitlines()])]

14416 except ValueError:

14417 str_list += ['channelDesc : <ValueError>']

14418 try:

14419 str_list += ['reserved : ' + str(self.reserved)]

14420 except ValueError:

14421 str_list += ['reserved : <ValueError>']

14422 return '\n'.join(str_list)

14423 else:

14424 return ''

14425 @property

14426 def width(self):

14427 return self._pvt_ptr[0].width

14428 @width.setter

14429 def width(self, unsigned int width):

14430 self._pvt_ptr[0].width = width

14431 @property

14432 def height(self):

14433 return self._pvt_ptr[0].height

14434 @height.setter

14435 def height(self, unsigned int height):

14436 self._pvt_ptr[0].height = height

14437 @property

14438 def depth(self):

14439 return self._pvt_ptr[0].depth

14440 @depth.setter

14441 def depth(self, unsigned int depth):

14442 self._pvt_ptr[0].depth = depth

14443 @property

14444 def pitch(self):

14445 return self._pvt_ptr[0].pitch

14446 @pitch.setter

14447 def pitch(self, unsigned int pitch):

14448 self._pvt_ptr[0].pitch = pitch

14449 @property

14450 def numChannels(self):

14451 return self._pvt_ptr[0].numChannels

14452 @numChannels.setter

14453 def numChannels(self, unsigned int numChannels):

14454 self._pvt_ptr[0].numChannels = numChannels

14455 @property

14456 def channelDesc(self):

14457 return self._channelDesc

14458 @channelDesc.setter

14459 def channelDesc(self, channelDesc not None : cudaChannelFormatDesc):

14460 string.memcpy(&self._pvt_ptr[0].channelDesc, <cyruntime.cudaChannelFormatDesc*><void_ptr>channelDesc.getPtr(), sizeof(self._pvt_ptr[0].channelDesc))

14461 @property

14462 def reserved(self):

14463 return self._pvt_ptr[0].reserved

14464 @reserved.setter

14465 def reserved(self, reserved):

14466 self._pvt_ptr[0].reserved = reserved

14467

14468cdef class anon_union10:

14469 """

14470 Attributes

14471 ----------

14472 pArray : list[cudaArray_t]

14473

14474 pPitch : list[cudaPitchedPtr]

14475

14476

14477 Methods

14478 -------

14479 getPtr()

14480 Get memory address of class instance

14481 """

14482 def __cinit__(self, void_ptr _ptr):

14483 self._pvt_ptr = <cyruntime.cudaEglFrame_st *>_ptr

14484

14485 def __init__(self, void_ptr _ptr):

14486 pass

14487 def __dealloc__(self):

14488 pass

14489 def getPtr(self):

14490 return <void_ptr>&self._pvt_ptr[0].frame

14491 def __repr__(self):

14492 if self._pvt_ptr is not NULL:

14493 str_list = []

14494 try:

14495 str_list += ['pArray : ' + str(self.pArray)]

14496 except ValueError:

14497 str_list += ['pArray : <ValueError>']

14498 try:

14499 str_list += ['pPitch :\n' + '\n'.join([' ' + line for line in str(self.pPitch).splitlines()])]

14500 except ValueError:

14501 str_list += ['pPitch : <ValueError>']

14502 return '\n'.join(str_list)

14503 else:

14504 return ''

14505 @property

14506 def pArray(self):

14507 return [cudaArray_t(init_value=<void_ptr>_pArray) for _pArray in self._pvt_ptr[0].frame.pArray]

14508 @pArray.setter

14509 def pArray(self, pArray : list[cudaArray_t]):

14510 if len(pArray) != 3:

14511 raise IndexError('not enough values found during array assignment, expected 3, got', len(pArray))

14512 pArray = [int(_pArray) for _pArray in pArray]

14513 for _idx, _pArray in enumerate(pArray):

14514 self._pvt_ptr[0].frame.pArray[_idx] = <cyruntime.cudaArray_t><void_ptr>_pArray

14515

14516 @property

14517 def pPitch(self):

14518 out_pPitch = [cudaPitchedPtr() for _pPitch in self._pvt_ptr[0].frame.pPitch]

14519 for _idx in range(len(out_pPitch)):

14520 string.memcpy(<cyruntime.cudaPitchedPtr*><void_ptr>out_pPitch[_idx].getPtr(), &self._pvt_ptr[0].frame.pPitch[_idx], sizeof(cyruntime.cudaPitchedPtr))

14521 return out_pPitch

14522 @pPitch.setter

14523 def pPitch(self, pPitch : list[cudaPitchedPtr]):

14524 if len(pPitch) != 3:

14525 raise IndexError('not enough values found during array assignment, expected 3, got', len(pPitch))

14526 for _idx in range(len(pPitch)):

14527 string.memcpy(&self._pvt_ptr[0].frame.pPitch[_idx], <cyruntime.cudaPitchedPtr*><void_ptr>pPitch[_idx].getPtr(), sizeof(cyruntime.cudaPitchedPtr))

14528

14529

14530cdef class cudaEglFrame_st:

14531 """

14532 CUDA EGLFrame Descriptor - structure defining one frame of EGL.

14533 Each frame may contain one or more planes depending on whether the

14534 surface is Multiplanar or not. Each plane of EGLFrame is

14535 represented by cudaEglPlaneDesc which is defined as:

14536 typedefstructcudaEglPlaneDesc_st unsignedintwidth;

14537 unsignedintheight; unsignedintdepth; unsignedintpitch;

14538 unsignedintnumChannels; structcudaChannelFormatDescchannelDesc;

14539 unsignedintreserved[4]; cudaEglPlaneDesc;

14540

14541 Attributes

14542 ----------

14543 frame : anon_union10

14544

14545 planeDesc : list[cudaEglPlaneDesc]

14546 CUDA EGL Plane Descriptor cudaEglPlaneDesc

14547 planeCount : unsigned int

14548 Number of planes

14549 frameType : cudaEglFrameType

14550 Array or Pitch

14551 eglColorFormat : cudaEglColorFormat

14552 CUDA EGL Color Format

14553

14554 Methods

14555 -------

14556 getPtr()

14557 Get memory address of class instance

14558 """

14559 def __cinit__(self, void_ptr _ptr = 0):

14560 if _ptr == 0:

14561 self._val_ptr = <cyruntime.cudaEglFrame_st *>calloc(1, sizeof(cyruntime.cudaEglFrame_st))

14562 self._pvt_ptr = self._val_ptr

14563 else:

14564 self._pvt_ptr = <cyruntime.cudaEglFrame_st *>_ptr

14565 def __init__(self, void_ptr _ptr = 0):

14566 pass

14567 self._frame = anon_union10(_ptr=<void_ptr>self._pvt_ptr)

14568 def __dealloc__(self):

14569 if self._val_ptr is not NULL:

14570 free(self._val_ptr)

14571 def getPtr(self):

14572 return <void_ptr>self._pvt_ptr

14573 def __repr__(self):

14574 if self._pvt_ptr is not NULL:

14575 str_list = []

14576 try:

14577 str_list += ['frame :\n' + '\n'.join([' ' + line for line in str(self.frame).splitlines()])]

14578 except ValueError:

14579 str_list += ['frame : <ValueError>']

14580 try:

14581 str_list += ['planeDesc :\n' + '\n'.join([' ' + line for line in str(self.planeDesc).splitlines()])]

14582 except ValueError:

14583 str_list += ['planeDesc : <ValueError>']

14584 try:

14585 str_list += ['planeCount : ' + str(self.planeCount)]

14586 except ValueError:

14587 str_list += ['planeCount : <ValueError>']

14588 try:

14589 str_list += ['frameType : ' + str(self.frameType)]

14590 except ValueError:

14591 str_list += ['frameType : <ValueError>']

14592 try:

14593 str_list += ['eglColorFormat : ' + str(self.eglColorFormat)]

14594 except ValueError:

14595 str_list += ['eglColorFormat : <ValueError>']

14596 return '\n'.join(str_list)

14597 else:

14598 return ''

14599 @property

14600 def frame(self):

14601 return self._frame

14602 @frame.setter

14603 def frame(self, frame not None : anon_union10):

14604 string.memcpy(&self._pvt_ptr[0].frame, <cyruntime.anon_union10*><void_ptr>frame.getPtr(), sizeof(self._pvt_ptr[0].frame))

14605 @property

14606 def planeDesc(self):

14607 out_planeDesc = [cudaEglPlaneDesc() for _planeDesc in self._pvt_ptr[0].planeDesc]

14608 for _idx in range(len(out_planeDesc)):

14609 string.memcpy(<cyruntime.cudaEglPlaneDesc*><void_ptr>out_planeDesc[_idx].getPtr(), &self._pvt_ptr[0].planeDesc[_idx], sizeof(cyruntime.cudaEglPlaneDesc))

14610 return out_planeDesc

14611 @planeDesc.setter

14612 def planeDesc(self, planeDesc : list[cudaEglPlaneDesc]):

14613 if len(planeDesc) != 3:

14614 raise IndexError('not enough values found during array assignment, expected 3, got', len(planeDesc))

14615 for _idx in range(len(planeDesc)):

14616 string.memcpy(&self._pvt_ptr[0].planeDesc[_idx], <cyruntime.cudaEglPlaneDesc*><void_ptr>planeDesc[_idx].getPtr(), sizeof(cyruntime.cudaEglPlaneDesc))

14617

14618 @property

14619 def planeCount(self):

14620 return self._pvt_ptr[0].planeCount

14621 @planeCount.setter

14622 def planeCount(self, unsigned int planeCount):

14623 self._pvt_ptr[0].planeCount = planeCount

14624 @property

14625 def frameType(self):

14626 if self._pvt_ptr[0].frameType not in _dict_cudaEglFrameType:

14627 return None

14628 return _dict_cudaEglFrameType[self._pvt_ptr[0].frameType]

14629 @frameType.setter

14630 def frameType(self, frameType not None : cudaEglFrameType):

14631 self._pvt_ptr[0].frameType = frameType.value

14632 @property

14633 def eglColorFormat(self):

14634 if self._pvt_ptr[0].eglColorFormat not in _dict_cudaEglColorFormat:

14635 return None

14636 return _dict_cudaEglColorFormat[self._pvt_ptr[0].eglColorFormat]

14637 @eglColorFormat.setter

14638 def eglColorFormat(self, eglColorFormat not None : cudaEglColorFormat):

14639 self._pvt_ptr[0].eglColorFormat = eglColorFormat.value

14640

14641cdef class cudaGraphConditionalHandle:

14642 """

14643

14644 CUDA handle for conditional graph nodes

14645

14646 Methods

14647 -------

14648 getPtr()

14649 Get memory address of class instance

14650

14651 """

14652 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):

14653 if _ptr == 0:

14654 self._pvt_ptr = &self._pvt_val

14655 else:

14656 self._pvt_ptr = <cyruntime.cudaGraphConditionalHandle *>_ptr

14657 if init_value:

14658 self._pvt_ptr[0] = init_value

14659 def __dealloc__(self):

14660 pass

14661 def __repr__(self):

14662 return '<cudaGraphConditionalHandle ' + str(self.__int__()) + '>'

14663 def __int__(self):

14664 return <unsigned long long>self._pvt_ptr[0]

14665 def getPtr(self):

14666 return <void_ptr>self._pvt_ptr

14667

14668cdef class cudaLogIterator:

14669 """

14670

14671 Methods

14672 -------

14673 getPtr()

14674 Get memory address of class instance

14675

14676 """

14677 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):

14678 if _ptr == 0:

14679 self._pvt_ptr = &self._pvt_val

14680 else:

14681 self._pvt_ptr = <cyruntime.cudaLogIterator *>_ptr

14682 if init_value:

14683 self._pvt_ptr[0] = init_value

14684 def __dealloc__(self):

14685 pass

14686 def __repr__(self):

14687 return '<cudaLogIterator ' + str(self.__int__()) + '>'

14688 def __int__(self):

14689 return <unsigned int>self._pvt_ptr[0]

14690 def getPtr(self):

14691 return <void_ptr>self._pvt_ptr

14692

14693cdef class cudaSurfaceObject_t:

14694 """

14695

14696 An opaque value that represents a CUDA Surface object

14697

14698 Methods

14699 -------

14700 getPtr()

14701 Get memory address of class instance

14702

14703 """

14704 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):

14705 if _ptr == 0:

14706 self._pvt_ptr = &self._pvt_val

14707 else:

14708 self._pvt_ptr = <cyruntime.cudaSurfaceObject_t *>_ptr

14709 if init_value:

14710 self._pvt_ptr[0] = init_value

14711 def __dealloc__(self):

14712 pass

14713 def __repr__(self):

14714 return '<cudaSurfaceObject_t ' + str(self.__int__()) + '>'

14715 def __int__(self):

14716 return <unsigned long long>self._pvt_ptr[0]

14717 def getPtr(self):

14718 return <void_ptr>self._pvt_ptr

14719

14720cdef class cudaTextureObject_t:

14721 """

14722

14723 An opaque value that represents a CUDA texture object

14724

14725 Methods

14726 -------

14727 getPtr()

14728 Get memory address of class instance

14729

14730 """

14731 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):

14732 if _ptr == 0:

14733 self._pvt_ptr = &self._pvt_val

14734 else:

14735 self._pvt_ptr = <cyruntime.cudaTextureObject_t *>_ptr

14736 if init_value:

14737 self._pvt_ptr[0] = init_value

14738 def __dealloc__(self):

14739 pass

14740 def __repr__(self):

14741 return '<cudaTextureObject_t ' + str(self.__int__()) + '>'

14742 def __int__(self):

14743 return <unsigned long long>self._pvt_ptr[0]

14744 def getPtr(self):

14745 return <void_ptr>self._pvt_ptr

14746

14747cdef class GLenum:

14748 """

14749

14750 Methods

14751 -------

14752 getPtr()

14753 Get memory address of class instance

14754

14755 """

14756 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):

14757 if _ptr == 0:

14758 self._pvt_ptr = &self._pvt_val

14759 else:

14760 self._pvt_ptr = <cyruntime.GLenum *>_ptr

14761 if init_value:

14762 self._pvt_ptr[0] = init_value

14763 def __dealloc__(self):

14764 pass

14765 def __repr__(self):

14766 return '<GLenum ' + str(self.__int__()) + '>'

14767 def __int__(self):

14768 return <unsigned int>self._pvt_ptr[0]

14769 def getPtr(self):

14770 return <void_ptr>self._pvt_ptr

14771

14772cdef class GLuint:

14773 """

14774

14775 Methods

14776 -------

14777 getPtr()

14778 Get memory address of class instance

14779

14780 """

14781 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):

14782 if _ptr == 0:

14783 self._pvt_ptr = &self._pvt_val

14784 else:

14785 self._pvt_ptr = <cyruntime.GLuint *>_ptr

14786 if init_value:

14787 self._pvt_ptr[0] = init_value

14788 def __dealloc__(self):

14789 pass

14790 def __repr__(self):

14791 return '<GLuint ' + str(self.__int__()) + '>'

14792 def __int__(self):

14793 return <unsigned int>self._pvt_ptr[0]

14794 def getPtr(self):

14795 return <void_ptr>self._pvt_ptr

14796

14797cdef class EGLint:

14798 """

14799

14800 Methods

14801 -------

14802 getPtr()

14803 Get memory address of class instance

14804

14805 """

14806 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):

14807 if _ptr == 0:

14808 self._pvt_ptr = &self._pvt_val

14809 else:

14810 self._pvt_ptr = <cyruntime.EGLint *>_ptr

14811 if init_value:

14812 self._pvt_ptr[0] = init_value

14813 def __dealloc__(self):

14814 pass

14815 def __repr__(self):

14816 return '<EGLint ' + str(self.__int__()) + '>'

14817 def __int__(self):

14818 return <unsigned int>self._pvt_ptr[0]

14819 def getPtr(self):

14820 return <void_ptr>self._pvt_ptr

14821

14822cdef class VdpDevice:

14823 """

14824

14825 Methods

14826 -------

14827 getPtr()

14828 Get memory address of class instance

14829

14830 """

14831 def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):

14832 if _ptr == 0:

14833 self._pvt_ptr = &self._pvt_val

14834 else:

14835 self._pvt_ptr = <cyruntime.VdpDevice *>_ptr

14836 if init_value:

14837 self._pvt_ptr[0] = init_value

14838 def __dealloc__(self):

14839 pass

14840 def __repr__(self):

14841 return '<VdpDevice ' + str(self.__int__()) + '>'

14842 def __int__(self):

14843 return <uint32_t>self._pvt_ptr[0]

14844 def getPtr(self):

14845 return <void_ptr>self._pvt_ptr

14846

14847cdef class VdpGetProcAddress:

14848 """

14849

14850 Methods

14851 -------

14852 getPtr()

14853 Get memory address of class instance

14854

14855 """

14856 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):

14857 if _ptr == 0:

14858 self._pvt_ptr = &self._pvt_val

14859 else:

14860 self._pvt_ptr = <cyruntime.VdpGetProcAddress *>_ptr

14861 if init_value:

14862 self._pvt_ptr[0] = init_value

14863 def __dealloc__(self):

14864 pass

14865 def __repr__(self):

14866 return '<VdpGetProcAddress ' + str(self.__int__()) + '>'

14867 def __int__(self):

14868 return <unsigned long long>self._pvt_ptr[0]

14869 def getPtr(self):

14870 return <void_ptr>self._pvt_ptr

14871

14872cdef class VdpVideoSurface:

14873 """

14874

14875 Methods

14876 -------

14877 getPtr()

14878 Get memory address of class instance

14879

14880 """

14881 def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):

14882 if _ptr == 0:

14883 self._pvt_ptr = &self._pvt_val

14884 else:

14885 self._pvt_ptr = <cyruntime.VdpVideoSurface *>_ptr

14886 if init_value:

14887 self._pvt_ptr[0] = init_value

14888 def __dealloc__(self):

14889 pass

14890 def __repr__(self):

14891 return '<VdpVideoSurface ' + str(self.__int__()) + '>'

14892 def __int__(self):

14893 return <uint32_t>self._pvt_ptr[0]

14894 def getPtr(self):

14895 return <void_ptr>self._pvt_ptr

14896

14897cdef class VdpOutputSurface:

14898 """

14899

14900 Methods

14901 -------

14902 getPtr()

14903 Get memory address of class instance

14904

14905 """

14906 def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):

14907 if _ptr == 0:

14908 self._pvt_ptr = &self._pvt_val

14909 else:

14910 self._pvt_ptr = <cyruntime.VdpOutputSurface *>_ptr

14911 if init_value:

14912 self._pvt_ptr[0] = init_value

14913 def __dealloc__(self):

14914 pass

14915 def __repr__(self):

14916 return '<VdpOutputSurface ' + str(self.__int__()) + '>'

14917 def __int__(self):

14918 return <uint32_t>self._pvt_ptr[0]

14919 def getPtr(self):

14920 return <void_ptr>self._pvt_ptr

14921

14922@cython.embedsignature(True)

14923def cudaDeviceReset():

14924 """ Destroy all allocations and reset all state on the current device in the current process.

14925

14926 Explicitly destroys and cleans up all resources associated with the

14927 current device in the current process. It is the caller's

14928 responsibility to ensure that the resources are not accessed or passed

14929 in subsequent API calls and doing so will result in undefined behavior.

14930 These resources include CUDA types :py:obj:`~.cudaStream_t`,

14931 :py:obj:`~.cudaEvent_t`, :py:obj:`~.cudaArray_t`,

14932 :py:obj:`~.cudaMipmappedArray_t`, :py:obj:`~.cudaPitchedPtr`,

14933 :py:obj:`~.cudaTextureObject_t`, :py:obj:`~.cudaSurfaceObject_t`,

14934 :py:obj:`~.textureReference`, :py:obj:`~.surfaceReference`,

14935 :py:obj:`~.cudaExternalMemory_t`, :py:obj:`~.cudaExternalSemaphore_t`

14936 and :py:obj:`~.cudaGraphicsResource_t`. These resources also include

14937 memory allocations by :py:obj:`~.cudaMalloc`,

14938 :py:obj:`~.cudaMallocHost`, :py:obj:`~.cudaMallocManaged` and

14939 :py:obj:`~.cudaMallocPitch`. Any subsequent API call to this device

14940 will reinitialize the device.

14941

14942 Note that this function will reset the device immediately. It is the

14943 caller's responsibility to ensure that the device is not being accessed

14944 by any other host threads from the process when this function is

14945 called.

14946

14947 Returns

14948 -------

14949 cudaError_t

14950 :py:obj:`~.cudaSuccess`

14951

14952 See Also

14953 --------

14954 :py:obj:`~.cudaDeviceSynchronize`

14955

14956 Notes

14957 -----

14958 :py:obj:`~.cudaDeviceReset()` will not destroy memory allocations by :py:obj:`~.cudaMallocAsync()` and :py:obj:`~.cudaMallocFromPoolAsync()`. These memory allocations need to be destroyed explicitly.

14959

14960 If a non-primary :py:obj:`~.CUcontext` is current to the thread, :py:obj:`~.cudaDeviceReset()` will destroy only the internal CUDA RT state for that :py:obj:`~.CUcontext`.

14961 """

14962 with nogil:

14963 err = cyruntime.cudaDeviceReset()

14964 return (_dict_cudaError_t[err],)

14965

14966@cython.embedsignature(True)

14967def cudaDeviceSynchronize():

14968 """ Wait for compute device to finish.

14969

14970 Blocks until the device has completed all preceding requested tasks.

14971 :py:obj:`~.cudaDeviceSynchronize()` returns an error if one of the

14972 preceding tasks has failed. If the

14973 :py:obj:`~.cudaDeviceScheduleBlockingSync` flag was set for this

14974 device, the host thread will block until the device has finished its

14975 work.

14976

14977 Returns

14978 -------

14979 cudaError_t

14980 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorStreamCaptureUnsupported`

14981

14982 See Also

14983 --------

14984 :py:obj:`~.cudaDeviceReset`, :py:obj:`~.cuCtxSynchronize`

14985 """

14986 with nogil:

14987 err = cyruntime.cudaDeviceSynchronize()

14988 return (_dict_cudaError_t[err],)

14989

14990@cython.embedsignature(True)

14991def cudaDeviceSetLimit(limit not None : cudaLimit, size_t value):

14992 """ Set resource limits.

14993

14994 Setting `limit` to `value` is a request by the application to update

14995 the current limit maintained by the device. The driver is free to

14996 modify the requested value to meet h/w requirements (this could be

14997 clamping to minimum or maximum values, rounding up to nearest element

14998 size, etc). The application can use :py:obj:`~.cudaDeviceGetLimit()` to

14999 find out exactly what the limit has been set to.

15000

15001 Setting each :py:obj:`~.cudaLimit` has its own specific restrictions,

15002 so each is discussed here.

15003

15004 - :py:obj:`~.cudaLimitStackSize` controls the stack size in bytes of

15005 each GPU thread.

15006

15007 - :py:obj:`~.cudaLimitPrintfFifoSize` controls the size in bytes of the

15008 shared FIFO used by the :py:obj:`~.printf()` device system call.

15009 Setting :py:obj:`~.cudaLimitPrintfFifoSize` must not be performed

15010 after launching any kernel that uses the :py:obj:`~.printf()` device

15011 system call - in such case :py:obj:`~.cudaErrorInvalidValue` will be

15012 returned.

15013

15014 - :py:obj:`~.cudaLimitMallocHeapSize` controls the size in bytes of the

15015 heap used by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device

15016 system calls. Setting :py:obj:`~.cudaLimitMallocHeapSize` must not be

15017 performed after launching any kernel that uses the

15018 :py:obj:`~.malloc()` or :py:obj:`~.free()` device system calls - in

15019 such case :py:obj:`~.cudaErrorInvalidValue` will be returned.

15020

15021 - :py:obj:`~.cudaLimitDevRuntimeSyncDepth` controls the maximum nesting

15022 depth of a grid at which a thread can safely call

15023 :py:obj:`~.cudaDeviceSynchronize()`. Setting this limit must be

15024 performed before any launch of a kernel that uses the device runtime

15025 and calls :py:obj:`~.cudaDeviceSynchronize()` above the default sync

15026 depth, two levels of grids. Calls to

15027 :py:obj:`~.cudaDeviceSynchronize()` will fail with error code

15028 :py:obj:`~.cudaErrorSyncDepthExceeded` if the limitation is violated.

15029 This limit can be set smaller than the default or up the maximum

15030 launch depth of 24. When setting this limit, keep in mind that

15031 additional levels of sync depth require the runtime to reserve large

15032 amounts of device memory which can no longer be used for user

15033 allocations. If these reservations of device memory fail,

15034 :py:obj:`~.cudaDeviceSetLimit` will return

15035 :py:obj:`~.cudaErrorMemoryAllocation`, and the limit can be reset to

15036 a lower value. This limit is only applicable to devices of compute

15037 capability < 9.0. Attempting to set this limit on devices of other

15038 compute capability will results in error

15039 :py:obj:`~.cudaErrorUnsupportedLimit` being returned.

15040

15041 - :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` controls the

15042 maximum number of outstanding device runtime launches that can be

15043 made from the current device. A grid is outstanding from the point of

15044 launch up until the grid is known to have been completed. Device

15045 runtime launches which violate this limitation fail and return

15046 :py:obj:`~.cudaErrorLaunchPendingCountExceeded` when

15047 :py:obj:`~.cudaGetLastError()` is called after launch. If more

15048 pending launches than the default (2048 launches) are needed for a

15049 module using the device runtime, this limit can be increased. Keep in

15050 mind that being able to sustain additional pending launches will

15051 require the runtime to reserve larger amounts of device memory

15052 upfront which can no longer be used for allocations. If these

15053 reservations fail, :py:obj:`~.cudaDeviceSetLimit` will return

15054 :py:obj:`~.cudaErrorMemoryAllocation`, and the limit can be reset to

15055 a lower value. This limit is only applicable to devices of compute

15056 capability 3.5 and higher. Attempting to set this limit on devices of

15057 compute capability less than 3.5 will result in the error

15058 :py:obj:`~.cudaErrorUnsupportedLimit` being returned.

15059

15060 - :py:obj:`~.cudaLimitMaxL2FetchGranularity` controls the L2 cache

15061 fetch granularity. Values can range from 0B to 128B. This is purely a

15062 performance hint and it can be ignored or clamped depending on the

15063 platform.

15064

15065 - :py:obj:`~.cudaLimitPersistingL2CacheSize` controls size in bytes

15066 available for persisting L2 cache. This is purely a performance hint

15067 and it can be ignored or clamped depending on the platform.

15068

15069 Parameters

15070 ----------

15071 limit : :py:obj:`~.cudaLimit`

15072 Limit to set

15073 value : size_t

15074 Size of limit

15075

15076 Returns

15077 -------

15078 cudaError_t

15079 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

15080

15081 See Also

15082 --------

15083 :py:obj:`~.cudaDeviceGetLimit`, :py:obj:`~.cuCtxSetLimit`

15084 """

15085 cdef cyruntime.cudaLimit cylimit = limit.value

15086 with nogil:

15087 err = cyruntime.cudaDeviceSetLimit(cylimit, value)

15088 return (_dict_cudaError_t[err],)

15089

15090@cython.embedsignature(True)

15091def cudaDeviceGetLimit(limit not None : cudaLimit):

15092 """ Return resource limits.

15093

15094 Returns in `*pValue` the current size of `limit`. The following

15095 :py:obj:`~.cudaLimit` values are supported.

15096

15097 - :py:obj:`~.cudaLimitStackSize` is the stack size in bytes of each GPU

15098 thread.

15099

15100 - :py:obj:`~.cudaLimitPrintfFifoSize` is the size in bytes of the

15101 shared FIFO used by the :py:obj:`~.printf()` device system call.

15102

15103 - :py:obj:`~.cudaLimitMallocHeapSize` is the size in bytes of the heap

15104 used by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device system

15105 calls.

15106

15107 - :py:obj:`~.cudaLimitDevRuntimeSyncDepth` is the maximum grid depth at

15108 which a thread can isssue the device runtime call

15109 :py:obj:`~.cudaDeviceSynchronize()` to wait on child grid launches to

15110 complete. This functionality is removed for devices of compute

15111 capability >= 9.0, and hence will return error

15112 :py:obj:`~.cudaErrorUnsupportedLimit` on such devices.

15113

15114 - :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` is the maximum

15115 number of outstanding device runtime launches.

15116

15117 - :py:obj:`~.cudaLimitMaxL2FetchGranularity` is the L2 cache fetch

15118 granularity.

15119

15120 - :py:obj:`~.cudaLimitPersistingL2CacheSize` is the persisting L2 cache

15121 size in bytes.

15122

15123 Parameters

15124 ----------

15125 limit : :py:obj:`~.cudaLimit`

15126 Limit to query

15127

15128 Returns

15129 -------

15130 cudaError_t

15131 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`

15132 pValue : int

15133 Returned size of the limit

15134

15135 See Also

15136 --------

15137 :py:obj:`~.cudaDeviceSetLimit`, :py:obj:`~.cuCtxGetLimit`

15138 """

15139 cdef size_t pValue = 0

15140 cdef cyruntime.cudaLimit cylimit = limit.value

15141 with nogil:

15142 err = cyruntime.cudaDeviceGetLimit(&pValue, cylimit)

15143 if err != cyruntime.cudaSuccess:

15144 return (_dict_cudaError_t[err], None)

15145 return (_dict_cudaError_t[err], pValue)

15146

15147@cython.embedsignature(True)

15148def cudaDeviceGetTexture1DLinearMaxWidth(fmtDesc : Optional[cudaChannelFormatDesc], int device):

15149 """ Returns the maximum number of elements allocatable in a 1D linear texture for a given element size.

15150

15151 Returns in `maxWidthInElements` the maximum number of elements

15152 allocatable in a 1D linear texture for given format descriptor

15153 `fmtDesc`.

15154

15155 Parameters

15156 ----------

15157 fmtDesc : :py:obj:`~.cudaChannelFormatDesc`

15158 Texture format description.

15159 None : int

15160 None

15161

15162 Returns

15163 -------

15164 cudaError_t

15165 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`

15166 maxWidthInElements : int

15167 Returns maximum number of texture elements allocatable for given

15168 `fmtDesc`.

15169

15170 See Also

15171 --------

15172 :py:obj:`~.cuDeviceGetTexture1DLinearMaxWidth`

15173 """

15174 cdef size_t maxWidthInElements = 0

15175 cdef cyruntime.cudaChannelFormatDesc* cyfmtDesc_ptr = fmtDesc._pvt_ptr if fmtDesc is not None else NULL

15176 with nogil:

15177 err = cyruntime.cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cyfmtDesc_ptr, device)

15178 if err != cyruntime.cudaSuccess:

15179 return (_dict_cudaError_t[err], None)

15180 return (_dict_cudaError_t[err], maxWidthInElements)

15181

15182@cython.embedsignature(True)

15183def cudaDeviceGetCacheConfig():

15184 """ Returns the preferred cache configuration for the current device.

15185

15186 On devices where the L1 cache and shared memory use the same hardware

15187 resources, this returns through `pCacheConfig` the preferred cache

15188 configuration for the current device. This is only a preference. The

15189 runtime will use the requested configuration if possible, but it is

15190 free to choose a different configuration if required to execute

15191 functions.

15192

15193 This will return a `pCacheConfig` of

15194 :py:obj:`~.cudaFuncCachePreferNone` on devices where the size of the L1

15195 cache and shared memory are fixed.

15196

15197 The supported cache configurations are:

15198

15199 - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory

15200 or L1 (default)

15201

15202 - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory

15203 and smaller L1 cache

15204

15205 - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller

15206 shared memory

15207

15208 - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and

15209 shared memory

15210

15211 Returns

15212 -------

15213 cudaError_t

15214 :py:obj:`~.cudaSuccess`

15215 pCacheConfig : :py:obj:`~.cudaFuncCache`

15216 Returned cache configuration

15217

15218 See Also

15219 --------

15220 :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxGetCacheConfig`

15221 """

15222 cdef cyruntime.cudaFuncCache pCacheConfig

15223 with nogil:

15224 err = cyruntime.cudaDeviceGetCacheConfig(&pCacheConfig)

15225 if err != cyruntime.cudaSuccess:

15226 return (_dict_cudaError_t[err], None)

15227 return (_dict_cudaError_t[err], cudaFuncCache(pCacheConfig))

15228

15229@cython.embedsignature(True)

15230def cudaDeviceGetStreamPriorityRange():

15231 """ Returns numerical values that correspond to the least and greatest stream priorities.

15232

15233 Returns in `*leastPriority` and `*greatestPriority` the numerical

15234 values that correspond to the least and greatest stream priorities

15235 respectively. Stream priorities follow a convention where lower numbers

15236 imply greater priorities. The range of meaningful stream priorities is

15237 given by [`*greatestPriority`, `*leastPriority`]. If the user attempts

15238 to create a stream with a priority value that is outside the the

15239 meaningful range as specified by this API, the priority is

15240 automatically clamped down or up to either `*leastPriority` or

15241 `*greatestPriority` respectively. See

15242 :py:obj:`~.cudaStreamCreateWithPriority` for details on creating a

15243 priority stream. A NULL may be passed in for `*leastPriority` or

15244 `*greatestPriority` if the value is not desired.

15245

15246 This function will return '0' in both `*leastPriority` and

15247 `*greatestPriority` if the current context's device does not support

15248 stream priorities (see :py:obj:`~.cudaDeviceGetAttribute`).

15249

15250 Returns

15251 -------

15252 cudaError_t

15253 :py:obj:`~.cudaSuccess`

15254 leastPriority : int

15255 Pointer to an int in which the numerical value for least stream

15256 priority is returned

15257 greatestPriority : int

15258 Pointer to an int in which the numerical value for greatest stream

15259 priority is returned

15260

15261 See Also

15262 --------

15263 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`

15264 """

15265 cdef int leastPriority = 0

15266 cdef int greatestPriority = 0

15267 with nogil:

15268 err = cyruntime.cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority)

15269 if err != cyruntime.cudaSuccess:

15270 return (_dict_cudaError_t[err], None, None)

15271 return (_dict_cudaError_t[err], leastPriority, greatestPriority)

15272

15273@cython.embedsignature(True)

15274def cudaDeviceSetCacheConfig(cacheConfig not None : cudaFuncCache):

15275 """ Sets the preferred cache configuration for the current device.

15276

15277 On devices where the L1 cache and shared memory use the same hardware

15278 resources, this sets through `cacheConfig` the preferred cache

15279 configuration for the current device. This is only a preference. The

15280 runtime will use the requested configuration if possible, but it is

15281 free to choose a different configuration if required to execute the

15282 function. Any function preference set via

15283 :py:obj:`~.cudaFuncSetCacheConfig (C API)` or cudaFuncSetCacheConfig

15284 (C++ API) will be preferred over this device-wide setting. Setting the

15285 device-wide cache configuration to :py:obj:`~.cudaFuncCachePreferNone`

15286 will cause subsequent kernel launches to prefer to not change the cache

15287 configuration unless required to launch the kernel.

15288

15289 This setting does nothing on devices where the size of the L1 cache and

15290 shared memory are fixed.

15291

15292 Launching a kernel with a different preference than the most recent

15293 preference setting may insert a device-side synchronization point.

15294

15295 The supported cache configurations are:

15296

15297 - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory

15298 or L1 (default)

15299

15300 - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory

15301 and smaller L1 cache

15302

15303 - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller

15304 shared memory

15305

15306 - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and

15307 shared memory

15308

15309 Parameters

15310 ----------

15311 cacheConfig : :py:obj:`~.cudaFuncCache`

15312 Requested cache configuration

15313

15314 Returns

15315 -------

15316 cudaError_t

15317 :py:obj:`~.cudaSuccess`

15318

15319 See Also

15320 --------

15321 :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxSetCacheConfig`

15322 """

15323 cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value

15324 with nogil:

15325 err = cyruntime.cudaDeviceSetCacheConfig(cycacheConfig)

15326 return (_dict_cudaError_t[err],)

15327

15328@cython.embedsignature(True)

15329def cudaDeviceGetByPCIBusId(char* pciBusId):

15330 """ Returns a handle to a compute device.

15331

15332 Returns in `*device` a device ordinal given a PCI bus ID string.

15333

15334 where `domain`, `bus`, `device`, and `function` are all hexadecimal

15335 values

15336

15337 Parameters

15338 ----------

15339 pciBusId : bytes

15340 String in one of the following forms:

15341

15342 Returns

15343 -------

15344 cudaError_t

15345 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

15346 device : int

15347 Returned device ordinal

15348

15349 See Also

15350 --------

15351 :py:obj:`~.cudaDeviceGetPCIBusId`, :py:obj:`~.cuDeviceGetByPCIBusId`

15352 """

15353 cdef int device = 0

15354 with nogil:

15355 err = cyruntime.cudaDeviceGetByPCIBusId(&device, pciBusId)

15356 if err != cyruntime.cudaSuccess:

15357 return (_dict_cudaError_t[err], None)

15358 return (_dict_cudaError_t[err], device)

15359

15360@cython.embedsignature(True)

15361def cudaDeviceGetPCIBusId(int length, int device):

15362 """ Returns a PCI Bus Id string for the device.

15363

15364 Returns an ASCII string identifying the device `dev` in the NULL-

15365 terminated string pointed to by `pciBusId`. `length` specifies the

15366 maximum length of the string that may be returned.

15367

15368 where `domain`, `bus`, `device`, and `function` are all hexadecimal

15369 values. pciBusId should be large enough to store 13 characters

15370 including the NULL-terminator.

15371

15372 Parameters

15373 ----------

15374 length : int

15375 Maximum length of string to store in `name`

15376 device : int

15377 Device to get identifier string for

15378

15379 Returns

15380 -------

15381 cudaError_t

15382 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

15383 pciBusId : bytes

15384 Returned identifier string for the device in the following format

15385

15386 See Also

15387 --------

15388 :py:obj:`~.cudaDeviceGetByPCIBusId`, :py:obj:`~.cuDeviceGetPCIBusId`

15389 """

15390 pypciBusId = b" " * length

15391 cdef char* pciBusId = pypciBusId

15392 with nogil:

15393 err = cyruntime.cudaDeviceGetPCIBusId(pciBusId, length, device)

15394 if err != cyruntime.cudaSuccess:

15395 return (_dict_cudaError_t[err], None)

15396 return (_dict_cudaError_t[err], pypciBusId)

15397

15398@cython.embedsignature(True)

15399def cudaIpcGetEventHandle(event):

15400 """ Gets an interprocess handle for a previously allocated event.

15401

15402 Takes as input a previously allocated event. This event must have been

15403 created with the :py:obj:`~.cudaEventInterprocess` and

15404 :py:obj:`~.cudaEventDisableTiming` flags set. This opaque handle may be

15405 copied into other processes and opened with

15406 :py:obj:`~.cudaIpcOpenEventHandle` to allow efficient hardware

15407 synchronization between GPU work in different processes.

15408

15409 After the event has been been opened in the importing process,

15410 :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventSynchronize`,

15411 :py:obj:`~.cudaStreamWaitEvent` and :py:obj:`~.cudaEventQuery` may be

15412 used in either process. Performing operations on the imported event

15413 after the exported event has been freed with

15414 :py:obj:`~.cudaEventDestroy` will result in undefined behavior.

15415

15416 IPC functionality is restricted to devices with support for unified

15417 addressing on Linux and Windows operating systems. IPC functionality on

15418 Windows is supported for compatibility purposes but not recommended as

15419 it comes with performance cost. Users can test their device for IPC

15420 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with

15421 :py:obj:`~.cudaDevAttrIpcEventSupport`

15422

15423 Parameters

15424 ----------

15425 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

15426 Event allocated with :py:obj:`~.cudaEventInterprocess` and

15427 :py:obj:`~.cudaEventDisableTiming` flags.

15428

15429 Returns

15430 -------

15431 cudaError_t

15432 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`

15433 handle : :py:obj:`~.cudaIpcEventHandle_t`

15434 Pointer to a user allocated cudaIpcEventHandle in which to return

15435 the opaque event handle

15436

15437 See Also

15438 --------

15439 :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetEventHandle`

15440 """

15441 cdef cyruntime.cudaEvent_t cyevent

15442 if event is None:

15443 pevent = 0

15444 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

15445 pevent = int(event)

15446 else:

15447 pevent = int(cudaEvent_t(event))

15448 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

15449 cdef cudaIpcEventHandle_t handle = cudaIpcEventHandle_t()

15450 with nogil:

15451 err = cyruntime.cudaIpcGetEventHandle(<cyruntime.cudaIpcEventHandle_t*>handle._pvt_ptr, cyevent)

15452 if err != cyruntime.cudaSuccess:

15453 return (_dict_cudaError_t[err], None)

15454 return (_dict_cudaError_t[err], handle)

15455

15456@cython.embedsignature(True)

15457def cudaIpcOpenEventHandle(handle not None : cudaIpcEventHandle_t):

15458 """ Opens an interprocess event handle for use in the current process.

15459

15460 Opens an interprocess event handle exported from another process with

15461 :py:obj:`~.cudaIpcGetEventHandle`. This function returns a

15462 :py:obj:`~.cudaEvent_t` that behaves like a locally created event with

15463 the :py:obj:`~.cudaEventDisableTiming` flag specified. This event must

15464 be freed with :py:obj:`~.cudaEventDestroy`.

15465

15466 Performing operations on the imported event after the exported event

15467 has been freed with :py:obj:`~.cudaEventDestroy` will result in

15468 undefined behavior.

15469

15470 IPC functionality is restricted to devices with support for unified

15471 addressing on Linux and Windows operating systems. IPC functionality on

15472 Windows is supported for compatibility purposes but not recommended as

15473 it comes with performance cost. Users can test their device for IPC

15474 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with

15475 :py:obj:`~.cudaDevAttrIpcEventSupport`

15476

15477 Parameters

15478 ----------

15479 handle : :py:obj:`~.cudaIpcEventHandle_t`

15480 Interprocess handle to open

15481

15482 Returns

15483 -------

15484 cudaError_t

15485 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUninitialized`

15486 event : :py:obj:`~.cudaEvent_t`

15487 Returns the imported event

15488

15489 See Also

15490 --------

15491 :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcOpenEventHandle`

15492 """

15493 cdef cudaEvent_t event = cudaEvent_t()

15494 with nogil:

15495 err = cyruntime.cudaIpcOpenEventHandle(<cyruntime.cudaEvent_t*>event._pvt_ptr, handle._pvt_ptr[0])

15496 if err != cyruntime.cudaSuccess:

15497 return (_dict_cudaError_t[err], None)

15498 return (_dict_cudaError_t[err], event)

15499

15500@cython.embedsignature(True)

15501def cudaIpcGetMemHandle(devPtr):

15502 """ Gets an interprocess memory handle for an existing device memory allocation.

15503

15504 Takes a pointer to the base of an existing device memory allocation

15505 created with :py:obj:`~.cudaMalloc` and exports it for use in another

15506 process. This is a lightweight operation and may be called multiple

15507 times on an allocation without adverse effects.

15508

15509 If a region of memory is freed with :py:obj:`~.cudaFree` and a

15510 subsequent call to :py:obj:`~.cudaMalloc` returns memory with the same

15511 device address, :py:obj:`~.cudaIpcGetMemHandle` will return a unique

15512 handle for the new memory.

15513

15514 IPC functionality is restricted to devices with support for unified

15515 addressing on Linux and Windows operating systems. IPC functionality on

15516 Windows is supported for compatibility purposes but not recommended as

15517 it comes with performance cost. Users can test their device for IPC

15518 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with

15519 :py:obj:`~.cudaDevAttrIpcEventSupport`

15520

15521 Parameters

15522 ----------

15523 devPtr : Any

15524 Base pointer to previously allocated device memory

15525

15526 Returns

15527 -------

15528 cudaError_t

15529 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`

15530 handle : :py:obj:`~.cudaIpcMemHandle_t`

15531 Pointer to user allocated :py:obj:`~.cudaIpcMemHandle` to return

15532 the handle in.

15533

15534 See Also

15535 --------

15536 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetMemHandle`

15537 """

15538 cdef cudaIpcMemHandle_t handle = cudaIpcMemHandle_t()

15539 cydevPtr = _HelperInputVoidPtr(devPtr)

15540 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

15541 with nogil:

15542 err = cyruntime.cudaIpcGetMemHandle(<cyruntime.cudaIpcMemHandle_t*>handle._pvt_ptr, cydevPtr_ptr)

15543 if err != cyruntime.cudaSuccess:

15544 return (_dict_cudaError_t[err], None)

15545 return (_dict_cudaError_t[err], handle)

15546

15547@cython.embedsignature(True)

15548def cudaIpcOpenMemHandle(handle not None : cudaIpcMemHandle_t, unsigned int flags):

15549 """ Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process.

15550

15551 Maps memory exported from another process with

15552 :py:obj:`~.cudaIpcGetMemHandle` into the current device address space.

15553 For contexts on different devices :py:obj:`~.cudaIpcOpenMemHandle` can

15554 attempt to enable peer access between the devices as if the user called

15555 :py:obj:`~.cudaDeviceEnablePeerAccess`. This behavior is controlled by

15556 the :py:obj:`~.cudaIpcMemLazyEnablePeerAccess` flag.

15557 :py:obj:`~.cudaDeviceCanAccessPeer` can determine if a mapping is

15558 possible.

15559

15560 :py:obj:`~.cudaIpcOpenMemHandle` can open handles to devices that may

15561 not be visible in the process calling the API.

15562

15563 Contexts that may open :py:obj:`~.cudaIpcMemHandles` are restricted in

15564 the following way. :py:obj:`~.cudaIpcMemHandles` from each device in a

15565 given process may only be opened by one context per device per other

15566 process.

15567

15568 If the memory handle has already been opened by the current context,

15569 the reference count on the handle is incremented by 1 and the existing

15570 device pointer is returned.

15571

15572 Memory returned from :py:obj:`~.cudaIpcOpenMemHandle` must be freed

15573 with :py:obj:`~.cudaIpcCloseMemHandle`.

15574

15575 Calling :py:obj:`~.cudaFree` on an exported memory region before

15576 calling :py:obj:`~.cudaIpcCloseMemHandle` in the importing context will

15577 result in undefined behavior.

15578

15579 IPC functionality is restricted to devices with support for unified

15580 addressing on Linux and Windows operating systems. IPC functionality on

15581 Windows is supported for compatibility purposes but not recommended as

15582 it comes with performance cost. Users can test their device for IPC

15583 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with

15584 :py:obj:`~.cudaDevAttrIpcEventSupport`

15585

15586 Parameters

15587 ----------

15588 handle : :py:obj:`~.cudaIpcMemHandle_t`

15589 :py:obj:`~.cudaIpcMemHandle` to open

15590 flags : unsigned int

15591 Flags for this operation. Must be specified as

15592 :py:obj:`~.cudaIpcMemLazyEnablePeerAccess`

15593

15594 Returns

15595 -------

15596 cudaError_t

15597 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorTooManyPeers`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`

15598 devPtr : Any

15599 Returned device pointer

15600

15601 See Also

15602 --------

15603 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuIpcOpenMemHandle`

15604

15605 Notes

15606 -----

15607 No guarantees are made about the address returned in `*devPtr`.

15608 In particular, multiple processes may not receive the same address for the same `handle`.

15609 """

15610 cdef void_ptr devPtr = 0

15611 with nogil:

15612 err = cyruntime.cudaIpcOpenMemHandle(<void**>&devPtr, handle._pvt_ptr[0], flags)

15613 if err != cyruntime.cudaSuccess:

15614 return (_dict_cudaError_t[err], None)

15615 return (_dict_cudaError_t[err], devPtr)

15616

15617@cython.embedsignature(True)

15618def cudaIpcCloseMemHandle(devPtr):

15619 """ Attempts to close memory mapped with cudaIpcOpenMemHandle.

15620

15621 Decrements the reference count of the memory returnd by

15622 :py:obj:`~.cudaIpcOpenMemHandle` by 1. When the reference count reaches

15623 0, this API unmaps the memory. The original allocation in the exporting

15624 process as well as imported mappings in other processes will be

15625 unaffected.

15626

15627 Any resources used to enable peer access will be freed if this is the

15628 last mapping using them.

15629

15630 IPC functionality is restricted to devices with support for unified

15631 addressing on Linux and Windows operating systems. IPC functionality on

15632 Windows is supported for compatibility purposes but not recommended as

15633 it comes with performance cost. Users can test their device for IPC

15634 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with

15635 :py:obj:`~.cudaDevAttrIpcEventSupport`

15636

15637 Parameters

15638 ----------

15639 devPtr : Any

15640 Device pointer returned by :py:obj:`~.cudaIpcOpenMemHandle`

15641

15642 Returns

15643 -------

15644 cudaError_t

15645 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`

15646

15647 See Also

15648 --------

15649 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`

15650 """

15651 cydevPtr = _HelperInputVoidPtr(devPtr)

15652 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

15653 with nogil:

15654 err = cyruntime.cudaIpcCloseMemHandle(cydevPtr_ptr)

15655 return (_dict_cudaError_t[err],)

15656

15657@cython.embedsignature(True)

15658def cudaDeviceFlushGPUDirectRDMAWrites(target not None : cudaFlushGPUDirectRDMAWritesTarget, scope not None : cudaFlushGPUDirectRDMAWritesScope):

15659 """ Blocks until remote writes are visible to the specified scope.

15660

15661 Blocks until remote writes to the target context via mappings created

15662 through GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see

15663 https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are

15664 visible to the specified scope.

15665

15666 If the scope equals or lies within the scope indicated by

15667 :py:obj:`~.cudaDevAttrGPUDirectRDMAWritesOrdering`, the call will be a

15668 no-op and can be safely omitted for performance. This can be determined

15669 by comparing the numerical values between the two enums, with smaller

15670 scopes having smaller values.

15671

15672 Users may query support for this API via

15673 :py:obj:`~.cudaDevAttrGPUDirectRDMAFlushWritesOptions`.

15674

15675 Parameters

15676 ----------

15677 target : :py:obj:`~.cudaFlushGPUDirectRDMAWritesTarget`

15678 The target of the operation, see cudaFlushGPUDirectRDMAWritesTarget

15679 scope : :py:obj:`~.cudaFlushGPUDirectRDMAWritesScope`

15680 The scope of the operation, see cudaFlushGPUDirectRDMAWritesScope

15681

15682 Returns

15683 -------

15684 cudaError_t

15685 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`,

15686

15687 See Also

15688 --------

15689 :py:obj:`~.cuFlushGPUDirectRDMAWrites`

15690 """

15691 cdef cyruntime.cudaFlushGPUDirectRDMAWritesTarget cytarget = target.value

15692 cdef cyruntime.cudaFlushGPUDirectRDMAWritesScope cyscope = scope.value

15693 with nogil:

15694 err = cyruntime.cudaDeviceFlushGPUDirectRDMAWrites(cytarget, cyscope)

15695 return (_dict_cudaError_t[err],)

15696

15697ctypedef struct cudaAsyncCallbackData_st:

15698 cyruntime.cudaAsyncCallback callback

15699 void *userData

15700

15701ctypedef cudaAsyncCallbackData_st cudaAsyncCallbackData

15702

15703@cython.show_performance_hints(False)

15704cdef void cudaAsyncNotificationCallbackWrapper(cyruntime.cudaAsyncNotificationInfo_t *info, void *data, cyruntime.cudaAsyncCallbackHandle_t handle) nogil:

15705 cdef cudaAsyncCallbackData *cbData = <cudaAsyncCallbackData *>data

15706 with gil:

15707 cbData.callback(info, cbData.userData, handle)

15708

15709@cython.embedsignature(True)

15710def cudaDeviceRegisterAsyncNotification(int device, callbackFunc, userData):

15711 """ Registers a callback function to receive async notifications.

15712

15713 Registers `callbackFunc` to receive async notifications.

15714

15715 The `userData` parameter is passed to the callback function at async

15716 notification time. Likewise, `callback` is also passed to the callback

15717 function to distinguish between multiple registered callbacks.

15718

15719 The callback function being registered should be designed to return

15720 quickly (~10ms). Any long running tasks should be queued for execution

15721 on an application thread.

15722

15723 Callbacks may not call cudaDeviceRegisterAsyncNotification or

15724 cudaDeviceUnregisterAsyncNotification. Doing so will result in

15725 :py:obj:`~.cudaErrorNotPermitted`. Async notification callbacks execute

15726 in an undefined order and may be serialized.

15727

15728 Returns in `*callback` a handle representing the registered callback

15729 instance.

15730

15731 Parameters

15732 ----------

15733 device : int

15734 The device on which to register the callback

15735 callbackFunc : :py:obj:`~.cudaAsyncCallback`

15736 The function to register as a callback

15737 userData : Any

15738 A generic pointer to user data. This is passed into the callback

15739 function.

15740

15741 Returns

15742 -------

15743 cudaError_t

15744 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorNotSupported` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotPermitted` :py:obj:`~.cudaErrorUnknown`

15745 callback : :py:obj:`~.cudaAsyncCallbackHandle_t`

15746 A handle representing the registered callback instance

15747

15748 See Also

15749 --------

15750 :py:obj:`~.cudaDeviceUnregisterAsyncNotification`

15751 """

15752 cdef cyruntime.cudaAsyncCallback cycallbackFunc

15753 if callbackFunc is None:

15754 pcallbackFunc = 0

15755 elif isinstance(callbackFunc, (cudaAsyncCallback,)):

15756 pcallbackFunc = int(callbackFunc)

15757 else:

15758 pcallbackFunc = int(cudaAsyncCallback(callbackFunc))

15759 cycallbackFunc = <cyruntime.cudaAsyncCallback><void_ptr>pcallbackFunc

15760 cyuserData = _HelperInputVoidPtr(userData)

15761 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr

15762

15763 cdef cudaAsyncCallbackData *cbData = NULL

15764 cbData = <cudaAsyncCallbackData *>malloc(sizeof(cbData[0]))

15765 if cbData == NULL:

15766 return (cudaError_t.cudaErrorMemoryAllocation, None)

15767 cbData.callback = cycallbackFunc

15768 cbData.userData = cyuserData_ptr

15769

15770 cdef cudaAsyncCallbackHandle_t callback = cudaAsyncCallbackHandle_t()

15771 with nogil:

15772 err = cyruntime.cudaDeviceRegisterAsyncNotification(device, <cyruntime.cudaAsyncCallback>cudaAsyncNotificationCallbackWrapper, <void *>cbData, <cyruntime.cudaAsyncCallbackHandle_t*>callback._pvt_ptr)

15773 if err != cyruntime.cudaSuccess:

15774 free(cbData)

15775 else:

15776 m_global._allocated[int(callback)] = cbData

15777 if err != cyruntime.cudaSuccess:

15778 return (_dict_cudaError_t[err], None)

15779 return (_dict_cudaError_t[err], callback)

15780

15781@cython.embedsignature(True)

15782def cudaDeviceUnregisterAsyncNotification(int device, callback):

15783 """ Unregisters an async notification callback.

15784

15785 Unregisters `callback` so that the corresponding callback function will

15786 stop receiving async notifications.

15787

15788 Parameters

15789 ----------

15790 device : int

15791 The device from which to remove `callback`.

15792 callback : :py:obj:`~.cudaAsyncCallbackHandle_t`

15793 The callback instance to unregister from receiving async

15794 notifications.

15795

15796 Returns

15797 -------

15798 cudaError_t

15799 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorNotSupported` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotPermitted` :py:obj:`~.cudaErrorUnknown`

15800

15801 See Also

15802 --------

15803 :py:obj:`~.cudaDeviceRegisterAsyncNotification`

15804 """

15805 cdef cyruntime.cudaAsyncCallbackHandle_t cycallback

15806 if callback is None:

15807 pcallback = 0

15808 elif isinstance(callback, (cudaAsyncCallbackHandle_t,)):

15809 pcallback = int(callback)

15810 else:

15811 pcallback = int(cudaAsyncCallbackHandle_t(callback))

15812 cycallback = <cyruntime.cudaAsyncCallbackHandle_t><void_ptr>pcallback

15813 with nogil:

15814 err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback)

15815 if err == cyruntime.cudaSuccess:

15816 free(m_global._allocated[pcallback])

15817 m_global._allocated.erase(<void_ptr>pcallback)

15818 return (_dict_cudaError_t[err],)

15819

15820@cython.embedsignature(True)

15821def cudaDeviceGetSharedMemConfig():

15822 """ Returns the shared memory configuration for the current device.

15823

15824 [Deprecated]

15825

15826 This function will return in `pConfig` the current size of shared

15827 memory banks on the current device. On devices with configurable shared

15828 memory banks, :py:obj:`~.cudaDeviceSetSharedMemConfig` can be used to

15829 change this setting, so that all subsequent kernel launches will by

15830 default use the new bank size. When

15831 :py:obj:`~.cudaDeviceGetSharedMemConfig` is called on devices without

15832 configurable shared memory, it will return the fixed bank size of the

15833 hardware.

15834

15835 The returned bank configurations can be either:

15836

15837 - :py:obj:`~.cudaSharedMemBankSizeFourByte` - shared memory bank width

15838 is four bytes.

15839

15840 - :py:obj:`~.cudaSharedMemBankSizeEightByte` - shared memory bank width

15841 is eight bytes.

15842

15843 Returns

15844 -------

15845 cudaError_t

15846 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

15847 pConfig : :py:obj:`~.cudaSharedMemConfig`

15848 Returned cache configuration

15849

15850 See Also

15851 --------

15852 :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxGetSharedMemConfig`

15853 """

15854 cdef cyruntime.cudaSharedMemConfig pConfig

15855 with nogil:

15856 err = cyruntime.cudaDeviceGetSharedMemConfig(&pConfig)

15857 if err != cyruntime.cudaSuccess:

15858 return (_dict_cudaError_t[err], None)

15859 return (_dict_cudaError_t[err], cudaSharedMemConfig(pConfig))

15860

15861@cython.embedsignature(True)

15862def cudaDeviceSetSharedMemConfig(config not None : cudaSharedMemConfig):

15863 """ Sets the shared memory configuration for the current device.

15864

15865 [Deprecated]

15866

15867 On devices with configurable shared memory banks, this function will

15868 set the shared memory bank size which is used for all subsequent kernel

15869 launches. Any per-function setting of shared memory set via

15870 :py:obj:`~.cudaFuncSetSharedMemConfig` will override the device wide

15871 setting.

15872

15873 Changing the shared memory configuration between launches may introduce

15874 a device side synchronization point.

15875

15876 Changing the shared memory bank size will not increase shared memory

15877 usage or affect occupancy of kernels, but may have major effects on

15878 performance. Larger bank sizes will allow for greater potential

15879 bandwidth to shared memory, but will change what kinds of accesses to

15880 shared memory will result in bank conflicts.

15881

15882 This function will do nothing on devices with fixed shared memory bank

15883 size.

15884

15885 The supported bank configurations are:

15886

15887 - :py:obj:`~.cudaSharedMemBankSizeDefault`: set bank width the device

15888 default (currently, four bytes)

15889

15890 - :py:obj:`~.cudaSharedMemBankSizeFourByte`: set shared memory bank

15891 width to be four bytes natively.

15892

15893 - :py:obj:`~.cudaSharedMemBankSizeEightByte`: set shared memory bank

15894 width to be eight bytes natively.

15895

15896 Parameters

15897 ----------

15898 config : :py:obj:`~.cudaSharedMemConfig`

15899 Requested cache configuration

15900

15901 Returns

15902 -------

15903 cudaError_t

15904 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

15905

15906 See Also

15907 --------

15908 :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxSetSharedMemConfig`

15909 """

15910 cdef cyruntime.cudaSharedMemConfig cyconfig = config.value

15911 with nogil:

15912 err = cyruntime.cudaDeviceSetSharedMemConfig(cyconfig)

15913 return (_dict_cudaError_t[err],)

15914

15915@cython.embedsignature(True)

15916def cudaGetLastError():

15917 """ Returns the last error from a runtime call.

15918

15919 Returns the last error that has been produced by any of the runtime

15920 calls in the same instance of the CUDA Runtime library in the host

15921 thread and resets it to :py:obj:`~.cudaSuccess`.

15922

15923 Note: Multiple instances of the CUDA Runtime library can be present in

15924 an application when using a library that statically links the CUDA

15925 Runtime.

15926

15927 Returns

15928 -------

15929 cudaError_t

15930 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMissingConfiguration`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorLaunchOutOfResources`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidConfiguration`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidSymbol`, :py:obj:`~.cudaErrorUnmapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidDevicePointer`, :py:obj:`~.cudaErrorInvalidTexture`, :py:obj:`~.cudaErrorInvalidTextureBinding`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`, :py:obj:`~.cudaErrorInvalidFilterSetting`, :py:obj:`~.cudaErrorInvalidNormSetting`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInsufficientDriver`, :py:obj:`~.cudaErrorNoDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`, :py:obj:`~.cudaErrorStartupFailure`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorJitCompilerNotFound`, :py:obj:`~.cudaErrorJitCompilationDisabled`

15931

15932 See Also

15933 --------

15934 :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError`

15935 """

15936 with nogil:

15937 err = cyruntime.cudaGetLastError()

15938 return (_dict_cudaError_t[err],)

15939

15940@cython.embedsignature(True)

15941def cudaPeekAtLastError():

15942 """ Returns the last error from a runtime call.

15943

15944 Returns the last error that has been produced by any of the runtime

15945 calls in the same instance of the CUDA Runtime library in the host

15946 thread. This call does not reset the error to :py:obj:`~.cudaSuccess`

15947 like :py:obj:`~.cudaGetLastError()`.

15948

15949 Note: Multiple instances of the CUDA Runtime library can be present in

15950 an application when using a library that statically links the CUDA

15951 Runtime.

15952

15953 Returns

15954 -------

15955 cudaError_t

15956 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMissingConfiguration`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorLaunchOutOfResources`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidConfiguration`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidSymbol`, :py:obj:`~.cudaErrorUnmapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidDevicePointer`, :py:obj:`~.cudaErrorInvalidTexture`, :py:obj:`~.cudaErrorInvalidTextureBinding`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`, :py:obj:`~.cudaErrorInvalidFilterSetting`, :py:obj:`~.cudaErrorInvalidNormSetting`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInsufficientDriver`, :py:obj:`~.cudaErrorNoDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`, :py:obj:`~.cudaErrorStartupFailure`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorJitCompilerNotFound`, :py:obj:`~.cudaErrorJitCompilationDisabled`

15957

15958 See Also

15959 --------

15960 :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError`

15961 """

15962 with nogil:

15963 err = cyruntime.cudaPeekAtLastError()

15964 return (_dict_cudaError_t[err],)

15965

15966@cython.embedsignature(True)

15967def cudaGetErrorName(error not None : cudaError_t):

15968 """ Returns the string representation of an error code enum name.

15969

15970 Returns a string containing the name of an error code in the enum. If

15971 the error code is not recognized, "unrecognized error code" is

15972 returned.

15973

15974 Parameters

15975 ----------

15976 error : :py:obj:`~.cudaError_t`

15977 Error code to convert to string

15978

15979 Returns

15980 -------

15981 cudaError_t.cudaSuccess

15982 cudaError_t.cudaSuccess

15983 bytes

15984 `char*` pointer to a NULL-terminated string

15985

15986 See Also

15987 --------

15988 :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorName`

15989 """

15990 cdef cyruntime.cudaError_t cyerror = error.value

15991 with nogil:

15992 err = cyruntime.cudaGetErrorName(cyerror)

15993 return (cudaError_t.cudaSuccess, err)

15994

15995@cython.embedsignature(True)

15996def cudaGetErrorString(error not None : cudaError_t):

15997 """ Returns the description string for an error code.

15998

15999 Returns the description string for an error code. If the error code is

16000 not recognized, "unrecognized error code" is returned.

16001

16002 Parameters

16003 ----------

16004 error : :py:obj:`~.cudaError_t`

16005 Error code to convert to string

16006

16007 Returns

16008 -------

16009 cudaError_t.cudaSuccess

16010 cudaError_t.cudaSuccess

16011 bytes

16012 `char*` pointer to a NULL-terminated string

16013

16014 See Also

16015 --------

16016 :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorString`

16017 """

16018 cdef cyruntime.cudaError_t cyerror = error.value

16019 with nogil:

16020 err = cyruntime.cudaGetErrorString(cyerror)

16021 return (cudaError_t.cudaSuccess, err)

16022

16023@cython.embedsignature(True)

16024def cudaGetDeviceCount():

16025 """ Returns the number of compute-capable devices.

16026

16027 Returns in `*count` the number of devices with compute capability

16028 greater or equal to 2.0 that are available for execution.

16029

16030 Returns

16031 -------

16032 cudaError_t

16033 :py:obj:`~.cudaSuccess`

16034 count : int

16035 Returns the number of devices with compute capability greater or

16036 equal to 2.0

16037

16038 See Also

16039 --------

16040 :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetCount`

16041 """

16042 cdef int count = 0

16043 with nogil:

16044 err = cyruntime.cudaGetDeviceCount(&count)

16045 if err != cyruntime.cudaSuccess:

16046 return (_dict_cudaError_t[err], None)

16047 return (_dict_cudaError_t[err], count)

16048

16049@cython.embedsignature(True)

16050def cudaGetDeviceProperties(int device):

16051 """ Returns information about the compute-device.

16052

16053 Returns in `*prop` the properties of device `dev`.

16054

16055 Parameters

16056 ----------

16057 device : int

16058 Device number to get properties for

16059

16060 Returns

16061 -------

16062 cudaError_t

16063 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`

16064 prop : :py:obj:`~.cudaDeviceProp`

16065 Properties for the specified device

16066

16067 See Also

16068 --------

16069 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetName`

16070 """

16071 cdef cudaDeviceProp prop = cudaDeviceProp()

16072 with nogil:

16073 err = cyruntime.cudaGetDeviceProperties(<cyruntime.cudaDeviceProp*>prop._pvt_ptr, device)

16074 if err != cyruntime.cudaSuccess:

16075 return (_dict_cudaError_t[err], None)

16076 return (_dict_cudaError_t[err], prop)

16077

16078@cython.embedsignature(True)

16079def cudaDeviceGetAttribute(attr not None : cudaDeviceAttr, int device):

16080 """ Returns information about the device.

16081

16082 Returns in `*value` the integer value of the attribute `attr` on device

16083 `device`.

16084

16085 Parameters

16086 ----------

16087 attr : :py:obj:`~.cudaDeviceAttr`

16088 Device attribute to query

16089 device : int

16090 Device number to query

16091

16092 Returns

16093 -------

16094 cudaError_t

16095 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`

16096 value : int

16097 Returned device attribute value

16098

16099 See Also

16100 --------

16101 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`

16102 """

16103 cdef int value = 0

16104 cdef cyruntime.cudaDeviceAttr cyattr = attr.value

16105 with nogil:

16106 err = cyruntime.cudaDeviceGetAttribute(&value, cyattr, device)

16107 if err != cyruntime.cudaSuccess:

16108 return (_dict_cudaError_t[err], None)

16109 return (_dict_cudaError_t[err], value)

16110

16111@cython.embedsignature(True)

16112def cudaDeviceGetHostAtomicCapabilities(operations : Optional[tuple[cudaAtomicOperation] | list[cudaAtomicOperation]], unsigned int count, int device):

16113 """ Queries details about atomic operations supported between the device and host.

16114

16115 Returns in `*capabilities` the details about requested atomic

16116 `*operations` over the the link between `dev` and the host. The

16117 allocated size of `*operations` and `*capabilities` must be `count`.

16118

16119 For each :py:obj:`~.cudaAtomicOperation` in `*operations`, the

16120 corresponding result in `*capabilities` will be a bitmask indicating

16121 which of :py:obj:`~.cudaAtomicOperationCapability` the link supports

16122 natively.

16123

16124 Returns :py:obj:`~.cudaErrorInvalidDevice` if `dev` is not valid.

16125

16126 Returns :py:obj:`~.cudaErrorInvalidValue` if `*capabilities` or

16127 `*operations` is NULL, if `count` is 0, or if any of `*operations` is

16128 not valid.

16129

16130 Parameters

16131 ----------

16132 operations : list[:py:obj:`~.cudaAtomicOperation`]

16133 Requested operations

16134 count : unsigned int

16135 Count of requested operations and size of capabilities

16136 dev : int

16137 Device handle

16138

16139 Returns

16140 -------

16141 cudaError_t

16142 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`

16143 capabilities : list[unsigned int]

16144 Returned capability details of each requested operation

16145

16146 See Also

16147 --------

16148 :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cuDeviceGeHostAtomicCapabilities`

16149 """

16150 operations = [] if operations is None else operations

16151 if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations):

16152 raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]")

16153 cdef unsigned int* cycapabilities = NULL

16154 pycapabilities = []

16155 if count != 0:

16156 cycapabilities = <unsigned int*>calloc(count, sizeof(unsigned int))

16157 if cycapabilities is NULL:

16158 raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int)))

16159 cdef vector[cyruntime.cudaAtomicOperation] cyoperations = [pyoperations.value for pyoperations in (operations)]

16160 if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count))

16161 with nogil:

16162 err = cyruntime.cudaDeviceGetHostAtomicCapabilities(cycapabilities, cyoperations.data(), count, device)

16163 if cudaError_t(err) == cudaError_t(0):

16164 pycapabilities = [<unsigned int>cycapabilities[idx] for idx in range(count)]

16165 if cycapabilities is not NULL:

16166 free(cycapabilities)

16167 if err != cyruntime.cudaSuccess:

16168 return (_dict_cudaError_t[err], None)

16169 return (_dict_cudaError_t[err], pycapabilities)

16170

16171@cython.embedsignature(True)

16172def cudaDeviceGetDefaultMemPool(int device):

16173 """ Returns the default mempool of a device.

16174

16175 The default mempool of a device contains device memory from that

16176 device.

16177

16178 Parameters

16179 ----------

16180 device : int

16181 None

16182

16183 Returns

16184 -------

16185 cudaError_t

16186 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotSupported`

16187 memPool : :py:obj:`~.cudaMemPool_t`

16188 None

16189

16190 See Also

16191 --------

16192 :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMemPoolTrimTo`, :py:obj:`~.cudaMemPoolGetAttribute`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolSetAccess`

16193 """

16194 cdef cudaMemPool_t memPool = cudaMemPool_t()

16195 with nogil:

16196 err = cyruntime.cudaDeviceGetDefaultMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, device)

16197 if err != cyruntime.cudaSuccess:

16198 return (_dict_cudaError_t[err], None)

16199 return (_dict_cudaError_t[err], memPool)

16200

16201@cython.embedsignature(True)

16202def cudaDeviceSetMemPool(int device, memPool):

16203 """ Sets the current memory pool of a device.

16204

16205 The memory pool must be local to the specified device. Unless a mempool

16206 is specified in the :py:obj:`~.cudaMallocAsync` call,

16207 :py:obj:`~.cudaMallocAsync` allocates from the current mempool of the

16208 provided stream's device. By default, a device's current memory pool is

16209 its default memory pool.

16210

16211 Parameters

16212 ----------

16213 device : int

16214 None

16215 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

16216 None

16217

16218 Returns

16219 -------

16220 cudaError_t

16221 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorNotSupported`

16222

16223 See Also

16224 --------

16225 :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaMemPoolCreate`, :py:obj:`~.cudaMemPoolDestroy`, :py:obj:`~.cudaMallocFromPoolAsync`

16226

16227 Notes

16228 -----

16229 Use :py:obj:`~.cudaMallocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on.

16230 """

16231 cdef cyruntime.cudaMemPool_t cymemPool

16232 if memPool is None:

16233 pmemPool = 0

16234 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

16235 pmemPool = int(memPool)

16236 else:

16237 pmemPool = int(cudaMemPool_t(memPool))

16238 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

16239 with nogil:

16240 err = cyruntime.cudaDeviceSetMemPool(device, cymemPool)

16241 return (_dict_cudaError_t[err],)

16242

16243@cython.embedsignature(True)

16244def cudaDeviceGetMemPool(int device):

16245 """ Gets the current mempool for a device.

16246

16247 Returns the last pool provided to :py:obj:`~.cudaDeviceSetMemPool` for

16248 this device or the device's default memory pool if

16249 :py:obj:`~.cudaDeviceSetMemPool` has never been called. By default the

16250 current mempool is the default mempool for a device, otherwise the

16251 returned pool must have been set with :py:obj:`~.cuDeviceSetMemPool` or

16252 :py:obj:`~.cudaDeviceSetMemPool`.

16253

16254 Parameters

16255 ----------

16256 device : int

16257 None

16258

16259 Returns

16260 -------

16261 cudaError_t

16262 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotSupported`

16263 memPool : :py:obj:`~.cudaMemPool_t`

16264 None

16265

16266 See Also

16267 --------

16268 :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceSetMemPool`

16269 """

16270 cdef cudaMemPool_t memPool = cudaMemPool_t()

16271 with nogil:

16272 err = cyruntime.cudaDeviceGetMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, device)

16273 if err != cyruntime.cudaSuccess:

16274 return (_dict_cudaError_t[err], None)

16275 return (_dict_cudaError_t[err], memPool)

16276

16277@cython.embedsignature(True)

16278def cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, int device, int flags):

16279 """ Return NvSciSync attributes that this device can support.

16280

16281 Returns in `nvSciSyncAttrList`, the properties of NvSciSync that this

16282 CUDA device, `dev` can support. The returned `nvSciSyncAttrList` can be

16283 used to create an NvSciSync that matches this device's capabilities.

16284

16285 If NvSciSyncAttrKey_RequiredPerm field in `nvSciSyncAttrList` is

16286 already set this API will return :py:obj:`~.cudaErrorInvalidValue`.

16287

16288 The applications should set `nvSciSyncAttrList` to a valid

16289 NvSciSyncAttrList failing which this API will return

16290 :py:obj:`~.cudaErrorInvalidHandle`.

16291

16292 The `flags` controls how applications intends to use the NvSciSync

16293 created from the `nvSciSyncAttrList`. The valid flags are:

16294

16295 - :py:obj:`~.cudaNvSciSyncAttrSignal`, specifies that the applications

16296 intends to signal an NvSciSync on this CUDA device.

16297

16298 - :py:obj:`~.cudaNvSciSyncAttrWait`, specifies that the applications

16299 intends to wait on an NvSciSync on this CUDA device.

16300

16301 At least one of these flags must be set, failing which the API returns

16302 :py:obj:`~.cudaErrorInvalidValue`. Both the flags are orthogonal to one

16303 another: a developer may set both these flags that allows to set both

16304 wait and signal specific attributes in the same `nvSciSyncAttrList`.

16305

16306 Note that this API updates the input `nvSciSyncAttrList` with values

16307 equivalent to the following public attribute key-values:

16308 NvSciSyncAttrKey_RequiredPerm is set to

16309

16310 - NvSciSyncAccessPerm_SignalOnly if :py:obj:`~.cudaNvSciSyncAttrSignal`

16311 is set in `flags`.

16312

16313 - NvSciSyncAccessPerm_WaitOnly if :py:obj:`~.cudaNvSciSyncAttrWait` is

16314 set in `flags`.

16315

16316 - NvSciSyncAccessPerm_WaitSignal if both

16317 :py:obj:`~.cudaNvSciSyncAttrWait` and

16318 :py:obj:`~.cudaNvSciSyncAttrSignal` are set in `flags`.

16319 NvSciSyncAttrKey_PrimitiveInfo is set to

16320

16321 - NvSciSyncAttrValPrimitiveType_SysmemSemaphore on any valid `device`.

16322

16323 - NvSciSyncAttrValPrimitiveType_Syncpoint if `device` is a Tegra

16324 device.

16325

16326 - NvSciSyncAttrValPrimitiveType_SysmemSemaphorePayload64b if `device`

16327 is GA10X+. NvSciSyncAttrKey_GpuId is set to the same UUID that is

16328 returned in `None` from :py:obj:`~.cudaDeviceGetProperties` for this

16329 `device`.

16330

16331 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorDeviceUninitialized`,

16332 :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidHandle`,

16333 :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorNotSupported`,

16334 :py:obj:`~.cudaErrorMemoryAllocation`

16335

16336 Parameters

16337 ----------

16338 nvSciSyncAttrList : Any

16339 Return NvSciSync attributes supported.

16340 device : int

16341 Valid Cuda Device to get NvSciSync attributes for.

16342 flags : int

16343 flags describing NvSciSync usage.

16344

16345 Returns

16346 -------

16347 cudaError_t

16348

16349

16350 See Also

16351 --------

16352 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

16353 """

16354 cynvSciSyncAttrList = _HelperInputVoidPtr(nvSciSyncAttrList)

16355 cdef void* cynvSciSyncAttrList_ptr = <void*><void_ptr>cynvSciSyncAttrList.cptr

16356 with nogil:

16357 err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, device, flags)

16358 return (_dict_cudaError_t[err],)

16359

16360@cython.embedsignature(True)

16361def cudaDeviceGetP2PAttribute(attr not None : cudaDeviceP2PAttr, int srcDevice, int dstDevice):

16362 """ Queries attributes of the link between two devices.

16363

16364 Returns in `*value` the value of the requested attribute `attrib` of

16365 the link between `srcDevice` and `dstDevice`. The supported attributes

16366 are:

16367

16368 - :py:obj:`~.cudaDevP2PAttrPerformanceRank`: A relative value

16369 indicating the performance of the link between two devices. Lower

16370 value means better performance (0 being the value used for most

16371 performant link).

16372

16373 - :py:obj:`~.cudaDevP2PAttrAccessSupported`: 1 if peer access is

16374 enabled.

16375

16376 - :py:obj:`~.cudaDevP2PAttrNativeAtomicSupported`: 1 if all native

16377 atomic operations over the link are supported.

16378

16379 - :py:obj:`~.cudaDevP2PAttrCudaArrayAccessSupported`: 1 if accessing

16380 CUDA arrays over the link is supported.

16381

16382 - :py:obj:`~.cudaDevP2PAttrOnlyPartialNativeAtomicSupported`: 1 if some

16383 CUDA-valid atomic operations over the link are supported. Information

16384 about specific operations can be retrieved with

16385 :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`.

16386

16387 Returns :py:obj:`~.cudaErrorInvalidDevice` if `srcDevice` or

16388 `dstDevice` are not valid or if they represent the same device.

16389

16390 Returns :py:obj:`~.cudaErrorInvalidValue` if `attrib` is not valid or

16391 if `value` is a null pointer.

16392

16393 Parameters

16394 ----------

16395 attrib : :py:obj:`~.cudaDeviceP2PAttr`

16396 The requested attribute of the link between `srcDevice` and

16397 `dstDevice`.

16398 srcDevice : int

16399 The source device of the target link.

16400 dstDevice : int

16401 The destination device of the target link.

16402

16403 Returns

16404 -------

16405 cudaError_t

16406 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`

16407 value : int

16408 Returned value of the requested attribute

16409

16410 See Also

16411 --------

16412 :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuDeviceGetP2PAttribute` :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`

16413 """

16414 cdef int value = 0

16415 cdef cyruntime.cudaDeviceP2PAttr cyattr = attr.value

16416 with nogil:

16417 err = cyruntime.cudaDeviceGetP2PAttribute(&value, cyattr, srcDevice, dstDevice)

16418 if err != cyruntime.cudaSuccess:

16419 return (_dict_cudaError_t[err], None)

16420 return (_dict_cudaError_t[err], value)

16421

16422@cython.embedsignature(True)

16423def cudaDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[cudaAtomicOperation] | list[cudaAtomicOperation]], unsigned int count, int srcDevice, int dstDevice):

16424 """ Queries details about atomic operations supported between two devices.

16425

16426 Returns in `*capabilities` the details about requested atomic

16427 `*operations` over the the link between `srcDevice` and `dstDevice`.

16428 The allocated size of `*operations` and `*capabilities` must be

16429 `count`.

16430

16431 For each :py:obj:`~.cudaAtomicOperation` in `*operations`, the

16432 corresponding result in `*capabilities` will be a bitmask indicating

16433 which of :py:obj:`~.cudaAtomicOperationCapability` the link supports

16434 natively.

16435

16436 Returns :py:obj:`~.cudaErrorInvalidDevice` if `srcDevice` or

16437 `dstDevice` are not valid or if they represent the same device.

16438

16439 Returns :py:obj:`~.cudaErrorInvalidValue` if `*capabilities` or

16440 `*operations` is NULL, if `count` is 0, or if any of `*operations` is

16441 not valid.

16442

16443 Parameters

16444 ----------

16445 operations : list[:py:obj:`~.cudaAtomicOperation`]

16446 Requested operations

16447 count : unsigned int

16448 Count of requested operations and size of capabilities

16449 srcDevice : int

16450 The source device of the target link

16451 dstDevice : int

16452 The destination device of the target link

16453

16454 Returns

16455 -------

16456 cudaError_t

16457 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`

16458 capabilities : list[unsigned int]

16459 Returned capability details of each requested operation

16460

16461 See Also

16462 --------

16463 :py:obj:`~.cudaDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities`

16464 """

16465 operations = [] if operations is None else operations

16466 if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations):

16467 raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]")

16468 cdef unsigned int* cycapabilities = NULL

16469 pycapabilities = []

16470 if count != 0:

16471 cycapabilities = <unsigned int*>calloc(count, sizeof(unsigned int))

16472 if cycapabilities is NULL:

16473 raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int)))

16474 cdef vector[cyruntime.cudaAtomicOperation] cyoperations = [pyoperations.value for pyoperations in (operations)]

16475 if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count))

16476 with nogil:

16477 err = cyruntime.cudaDeviceGetP2PAtomicCapabilities(cycapabilities, cyoperations.data(), count, srcDevice, dstDevice)

16478 if cudaError_t(err) == cudaError_t(0):

16479 pycapabilities = [<unsigned int>cycapabilities[idx] for idx in range(count)]

16480 if cycapabilities is not NULL:

16481 free(cycapabilities)

16482 if err != cyruntime.cudaSuccess:

16483 return (_dict_cudaError_t[err], None)

16484 return (_dict_cudaError_t[err], pycapabilities)

16485

16486@cython.embedsignature(True)

16487def cudaChooseDevice(prop : Optional[cudaDeviceProp]):

16488 """ Select compute-device which best matches criteria.

16489

16490 Returns in `*device` the device which has properties that best match

16491 `*prop`.

16492

16493 Parameters

16494 ----------

16495 prop : :py:obj:`~.cudaDeviceProp`

16496 Desired device properties

16497

16498 Returns

16499 -------

16500 cudaError_t

16501 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

16502 device : int

16503 Device with best match

16504

16505 See Also

16506 --------

16507 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`

16508 """

16509 cdef int device = 0

16510 cdef cyruntime.cudaDeviceProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL

16511 with nogil:

16512 err = cyruntime.cudaChooseDevice(&device, cyprop_ptr)

16513 if err != cyruntime.cudaSuccess:

16514 return (_dict_cudaError_t[err], None)

16515 return (_dict_cudaError_t[err], device)

16516

16517@cython.embedsignature(True)

16518def cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags):

16519 """ Initialize device to be used for GPU executions.

16520

16521 This function will initialize the CUDA Runtime structures and primary

16522 context on `device` when called, but the context will not be made

16523 current to `device`.

16524

16525 When :py:obj:`~.cudaInitDeviceFlagsAreValid` is set in `flags`,

16526 deviceFlags are applied to the requested device. The values of

16527 deviceFlags match those of the flags parameters in

16528 :py:obj:`~.cudaSetDeviceFlags`. The effect may be verified by

16529 :py:obj:`~.cudaGetDeviceFlags`.

16530

16531 This function will return an error if the device is in

16532 :py:obj:`~.cudaComputeModeExclusiveProcess` and is occupied by another

16533 process or if the device is in :py:obj:`~.cudaComputeModeProhibited`.

16534

16535 Parameters

16536 ----------

16537 device : int

16538 Device on which the runtime will initialize itself.

16539 deviceFlags : unsigned int

16540 Parameters for device operation.

16541 flags : unsigned int

16542 Flags for controlling the device initialization.

16543

16544 Returns

16545 -------

16546 cudaError_t

16547 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`,

16548

16549 See Also

16550 --------

16551 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaSetDevice` :py:obj:`~.cuCtxSetCurrent`

16552 """

16553 with nogil:

16554 err = cyruntime.cudaInitDevice(device, deviceFlags, flags)

16555 return (_dict_cudaError_t[err],)

16556

16557@cython.embedsignature(True)

16558def cudaSetDevice(int device):

16559 """ Set device to be used for GPU executions.

16560

16561 Sets `device` as the current device for the calling host thread. Valid

16562 device id's are 0 to (:py:obj:`~.cudaGetDeviceCount()` - 1).

16563

16564 Any device memory subsequently allocated from this host thread using

16565 :py:obj:`~.cudaMalloc()`, :py:obj:`~.cudaMallocPitch()` or

16566 :py:obj:`~.cudaMallocArray()` will be physically resident on `device`.

16567 Any host memory allocated from this host thread using

16568 :py:obj:`~.cudaMallocHost()` or :py:obj:`~.cudaHostAlloc()` or

16569 :py:obj:`~.cudaHostRegister()` will have its lifetime associated with

16570 `device`. Any streams or events created from this host thread will be

16571 associated with `device`. Any kernels launched from this host thread

16572 using the <<<>>> operator or :py:obj:`~.cudaLaunchKernel()` will be

16573 executed on `device`.

16574

16575 This call may be made from any host thread, to any device, and at any

16576 time. This function will do no synchronization with the previous or new

16577 device, and should only take significant time when it initializes the

16578 runtime's context state. This call will bind the primary context of the

16579 specified device to the calling thread and all the subsequent memory

16580 allocations, stream and event creations, and kernel launches will be

16581 associated with the primary context. This function will also

16582 immediately initialize the runtime state on the primary context, and

16583 the context will be current on `device` immediately. This function will

16584 return an error if the device is in

16585 :py:obj:`~.cudaComputeModeExclusiveProcess` and is occupied by another

16586 process or if the device is in :py:obj:`~.cudaComputeModeProhibited`.

16587

16588 It is not required to call :py:obj:`~.cudaInitDevice` before using this

16589 function.

16590

16591 Parameters

16592 ----------

16593 device : int

16594 Device on which the active host thread should execute the device

16595 code.

16596

16597 Returns

16598 -------

16599 cudaError_t

16600 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorDeviceUnavailable`,

16601

16602 See Also

16603 --------

16604 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxSetCurrent`

16605 """

16606 with nogil:

16607 err = cyruntime.cudaSetDevice(device)

16608 return (_dict_cudaError_t[err],)

16609

16610@cython.embedsignature(True)

16611def cudaGetDevice():

16612 """ Returns which device is currently being used.

16613

16614 Returns in `*device` the current device for the calling host thread.

16615

16616 Returns

16617 -------

16618 cudaError_t

16619 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUnavailable`,

16620 device : int

16621 Returns the device on which the active host thread executes the

16622 device code.

16623

16624 See Also

16625 --------

16626 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuCtxGetCurrent`

16627 """

16628 cdef int device = 0

16629 with nogil:

16630 err = cyruntime.cudaGetDevice(&device)

16631 if err != cyruntime.cudaSuccess:

16632 return (_dict_cudaError_t[err], None)

16633 return (_dict_cudaError_t[err], device)

16634

16635@cython.embedsignature(True)

16636def cudaSetDeviceFlags(unsigned int flags):

16637 """ Sets flags to be used for device executions.

16638

16639 Records `flags` as the flags for the current device. If the current

16640 device has been set and that device has already been initialized, the

16641 previous flags are overwritten. If the current device has not been

16642 initialized, it is initialized with the provided flags. If no device

16643 has been made current to the calling thread, a default device is

16644 selected and initialized with the provided flags.

16645

16646 The three LSBs of the `flags` parameter can be used to control how the

16647 CPU thread interacts with the OS scheduler when waiting for results

16648 from the device.

16649

16650 - :py:obj:`~.cudaDeviceScheduleAuto`: The default value if the `flags`

16651 parameter is zero, uses a heuristic based on the number of active

16652 CUDA contexts in the process `C` and the number of logical processors

16653 in the system `P`. If `C` > `P`, then CUDA will yield to other OS

16654 threads when waiting for the device, otherwise CUDA will not yield

16655 while waiting for results and actively spin on the processor.

16656 Additionally, on Tegra devices, :py:obj:`~.cudaDeviceScheduleAuto`

16657 uses a heuristic based on the power profile of the platform and may

16658 choose :py:obj:`~.cudaDeviceScheduleBlockingSync` for low-powered

16659 devices.

16660

16661 - :py:obj:`~.cudaDeviceScheduleSpin`: Instruct CUDA to actively spin

16662 when waiting for results from the device. This can decrease latency

16663 when waiting for the device, but may lower the performance of CPU

16664 threads if they are performing work in parallel with the CUDA thread.

16665

16666 - :py:obj:`~.cudaDeviceScheduleYield`: Instruct CUDA to yield its

16667 thread when waiting for results from the device. This can increase

16668 latency when waiting for the device, but can increase the performance

16669 of CPU threads performing work in parallel with the device.

16670

16671 - :py:obj:`~.cudaDeviceScheduleBlockingSync`: Instruct CUDA to block

16672 the CPU thread on a synchronization primitive when waiting for the

16673 device to finish work.

16674

16675 - :py:obj:`~.cudaDeviceBlockingSync`: Instruct CUDA to block the CPU

16676 thread on a synchronization primitive when waiting for the device to

16677 finish work. :py:obj:`~.Deprecated:` This flag was deprecated as of

16678 CUDA 4.0 and replaced with

16679 :py:obj:`~.cudaDeviceScheduleBlockingSync`.

16680

16681 - :py:obj:`~.cudaDeviceMapHost`: This flag enables allocating pinned

16682 host memory that is accessible to the device. It is implicit for the

16683 runtime but may be absent if a context is created using the driver

16684 API. If this flag is not set, :py:obj:`~.cudaHostGetDevicePointer()`

16685 will always return a failure code.

16686

16687 - :py:obj:`~.cudaDeviceLmemResizeToMax`: Instruct CUDA to not reduce

16688 local memory after resizing local memory for a kernel. This can

16689 prevent thrashing by local memory allocations when launching many

16690 kernels with high local memory usage at the cost of potentially

16691 increased memory usage. :py:obj:`~.Deprecated:` This flag is

16692 deprecated and the behavior enabled by this flag is now the default

16693 and cannot be disabled.

16694

16695 - :py:obj:`~.cudaDeviceSyncMemops`: Ensures that synchronous memory

16696 operations initiated on this context will always synchronize. See

16697 further documentation in the section titled "API Synchronization

16698 behavior" to learn more about cases when synchronous memory

16699 operations can exhibit asynchronous behavior.

16700

16701 Parameters

16702 ----------

16703 flags : unsigned int

16704 Parameters for device operation

16705

16706 Returns

16707 -------

16708 cudaError_t

16709 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

16710

16711 See Also

16712 --------

16713 :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetValidDevices`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`

16714 """

16715 with nogil:

16716 err = cyruntime.cudaSetDeviceFlags(flags)

16717 return (_dict_cudaError_t[err],)

16718

16719@cython.embedsignature(True)

16720def cudaGetDeviceFlags():

16721 """ Gets the flags for the current device.

16722

16723 Returns in `flags` the flags for the current device. If there is a

16724 current device for the calling thread, the flags for the device are

16725 returned. If there is no current device, the flags for the first device

16726 are returned, which may be the default flags. Compare to the behavior

16727 of :py:obj:`~.cudaSetDeviceFlags`.

16728

16729 Typically, the flags returned should match the behavior that will be

16730 seen if the calling thread uses a device after this call, without any

16731 change to the flags or current device inbetween by this or another

16732 thread. Note that if the device is not initialized, it is possible for

16733 another thread to change the flags for the current device before it is

16734 initialized. Additionally, when using exclusive mode, if this thread

16735 has not requested a specific device, it may use a device other than the

16736 first device, contrary to the assumption made by this function.

16737

16738 If a context has been created via the driver API and is current to the

16739 calling thread, the flags for that context are always returned.

16740

16741 Flags returned by this function may specifically include

16742 :py:obj:`~.cudaDeviceMapHost` even though it is not accepted by

16743 :py:obj:`~.cudaSetDeviceFlags` because it is implicit in runtime API

16744 flags. The reason for this is that the current context may have been

16745 created via the driver API in which case the flag is not implicit and

16746 may be unset.

16747

16748 Returns

16749 -------

16750 cudaError_t

16751 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`

16752 flags : unsigned int

16753 Pointer to store the device flags

16754

16755 See Also

16756 --------

16757 :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuDevicePrimaryCtxGetState`

16758 """

16759 cdef unsigned int flags = 0

16760 with nogil:

16761 err = cyruntime.cudaGetDeviceFlags(&flags)

16762 if err != cyruntime.cudaSuccess:

16763 return (_dict_cudaError_t[err], None)

16764 return (_dict_cudaError_t[err], flags)

16765

16766@cython.embedsignature(True)

16767def cudaStreamCreate():

16768 """ Create an asynchronous stream.

16769

16770 Creates a new asynchronous stream on the context that is current to the

16771 calling host thread. If no context is current to the calling host

16772 thread, then the primary context for a device is selected, made current

16773 to the calling thread, and initialized before creating a stream on it.

16774

16775 Returns

16776 -------

16777 cudaError_t

16778 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

16779 pStream : :py:obj:`~.cudaStream_t`

16780 Pointer to new stream identifier

16781

16782 See Also

16783 --------

16784 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate`

16785 """

16786 cdef cudaStream_t pStream = cudaStream_t()

16787 with nogil:

16788 err = cyruntime.cudaStreamCreate(<cyruntime.cudaStream_t*>pStream._pvt_ptr)

16789 if err != cyruntime.cudaSuccess:

16790 return (_dict_cudaError_t[err], None)

16791 return (_dict_cudaError_t[err], pStream)

16792

16793@cython.embedsignature(True)

16794def cudaStreamCreateWithFlags(unsigned int flags):

16795 """ Create an asynchronous stream.

16796

16797 Creates a new asynchronous stream on the context that is current to the

16798 calling host thread. If no context is current to the calling host

16799 thread, then the primary context for a device is selected, made current

16800 to the calling thread, and initialized before creating a stream on it.

16801 The `flags` argument determines the behaviors of the stream. Valid

16802 values for `flags` are

16803

16804 - :py:obj:`~.cudaStreamDefault`: Default stream creation flag.

16805

16806 - :py:obj:`~.cudaStreamNonBlocking`: Specifies that work running in the

16807 created stream may run concurrently with work in stream 0 (the NULL

16808 stream), and that the created stream should perform no implicit

16809 synchronization with stream 0.

16810

16811 Parameters

16812 ----------

16813 flags : unsigned int

16814 Parameters for stream creation

16815

16816 Returns

16817 -------

16818 cudaError_t

16819 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

16820 pStream : :py:obj:`~.cudaStream_t`

16821 Pointer to new stream identifier

16822

16823 See Also

16824 --------

16825 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate`

16826 """

16827 cdef cudaStream_t pStream = cudaStream_t()

16828 with nogil:

16829 err = cyruntime.cudaStreamCreateWithFlags(<cyruntime.cudaStream_t*>pStream._pvt_ptr, flags)

16830 if err != cyruntime.cudaSuccess:

16831 return (_dict_cudaError_t[err], None)

16832 return (_dict_cudaError_t[err], pStream)

16833

16834@cython.embedsignature(True)

16835def cudaStreamCreateWithPriority(unsigned int flags, int priority):

16836 """ Create an asynchronous stream with the specified priority.

16837

16838 Creates a stream with the specified priority and returns a handle in

16839 `pStream`. The stream is created on the context that is current to the

16840 calling host thread. If no context is current to the calling host

16841 thread, then the primary context for a device is selected, made current

16842 to the calling thread, and initialized before creating a stream on it.

16843 This affects the scheduling priority of work in the stream. Priorities

16844 provide a hint to preferentially run work with higher priority when

16845 possible, but do not preempt already-running work or provide any other

16846 functional guarantee on execution order.

16847

16848 `priority` follows a convention where lower numbers represent higher

16849 priorities. '0' represents default priority. The range of meaningful

16850 numerical priorities can be queried using

16851 :py:obj:`~.cudaDeviceGetStreamPriorityRange`. If the specified priority

16852 is outside the numerical range returned by

16853 :py:obj:`~.cudaDeviceGetStreamPriorityRange`, it will automatically be

16854 clamped to the lowest or the highest number in the range.

16855

16856 Parameters

16857 ----------

16858 flags : unsigned int

16859 Flags for stream creation. See

16860 :py:obj:`~.cudaStreamCreateWithFlags` for a list of valid flags

16861 that can be passed

16862 priority : int

16863 Priority of the stream. Lower numbers represent higher priorities.

16864 See :py:obj:`~.cudaDeviceGetStreamPriorityRange` for more

16865 information about the meaningful stream priorities that can be

16866 passed.

16867

16868 Returns

16869 -------

16870 cudaError_t

16871 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

16872 pStream : :py:obj:`~.cudaStream_t`

16873 Pointer to new stream identifier

16874

16875 See Also

16876 --------

16877 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`

16878

16879 Notes

16880 -----

16881 Stream priorities are supported only on GPUs with compute capability 3.5 or higher.

16882

16883 In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.

16884 """

16885 cdef cudaStream_t pStream = cudaStream_t()

16886 with nogil:

16887 err = cyruntime.cudaStreamCreateWithPriority(<cyruntime.cudaStream_t*>pStream._pvt_ptr, flags, priority)

16888 if err != cyruntime.cudaSuccess:

16889 return (_dict_cudaError_t[err], None)

16890 return (_dict_cudaError_t[err], pStream)

16891

16892@cython.embedsignature(True)

16893def cudaStreamGetPriority(hStream):

16894 """ Query the priority of a stream.

16895

16896 Query the priority of a stream. The priority is returned in in

16897 `priority`. Note that if the stream was created with a priority outside

16898 the meaningful numerical range returned by

16899 :py:obj:`~.cudaDeviceGetStreamPriorityRange`, this function returns the

16900 clamped priority. See :py:obj:`~.cudaStreamCreateWithPriority` for

16901 details about priority clamping.

16902

16903 Parameters

16904 ----------

16905 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

16906 Handle to the stream to be queried

16907

16908 Returns

16909 -------

16910 cudaError_t

16911 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

16912 priority : int

16913 Pointer to a signed integer in which the stream's priority is

16914 returned

16915

16916 See Also

16917 --------

16918 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cuStreamGetPriority`

16919 """

16920 cdef cyruntime.cudaStream_t cyhStream

16921 if hStream is None:

16922 phStream = 0

16923 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

16924 phStream = int(hStream)

16925 else:

16926 phStream = int(cudaStream_t(hStream))

16927 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

16928 cdef int priority = 0

16929 with nogil:

16930 err = cyruntime.cudaStreamGetPriority(cyhStream, &priority)

16931 if err != cyruntime.cudaSuccess:

16932 return (_dict_cudaError_t[err], None)

16933 return (_dict_cudaError_t[err], priority)

16934

16935@cython.embedsignature(True)

16936def cudaStreamGetFlags(hStream):

16937 """ Query the flags of a stream.

16938

16939 Query the flags of a stream. The flags are returned in `flags`. See

16940 :py:obj:`~.cudaStreamCreateWithFlags` for a list of valid flags.

16941

16942 Parameters

16943 ----------

16944 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

16945 Handle to the stream to be queried

16946

16947 Returns

16948 -------

16949 cudaError_t

16950 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

16951 flags : unsigned int

16952 Pointer to an unsigned integer in which the stream's flags are

16953 returned

16954

16955 See Also

16956 --------

16957 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cuStreamGetFlags`

16958 """

16959 cdef cyruntime.cudaStream_t cyhStream

16960 if hStream is None:

16961 phStream = 0

16962 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

16963 phStream = int(hStream)

16964 else:

16965 phStream = int(cudaStream_t(hStream))

16966 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

16967 cdef unsigned int flags = 0

16968 with nogil:

16969 err = cyruntime.cudaStreamGetFlags(cyhStream, &flags)

16970 if err != cyruntime.cudaSuccess:

16971 return (_dict_cudaError_t[err], None)

16972 return (_dict_cudaError_t[err], flags)

16973

16974@cython.embedsignature(True)

16975def cudaStreamGetId(hStream):

16976 """ Query the Id of a stream.

16977

16978 Query the Id of a stream. The Id is returned in `streamId`. The Id is

16979 unique for the life of the program.

16980

16981 The stream handle `hStream` can refer to any of the following:

16982

16983 - a stream created via any of the CUDA runtime APIs such as

16984 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`

16985 and :py:obj:`~.cudaStreamCreateWithPriority`, or their driver API

16986 equivalents such as :py:obj:`~.cuStreamCreate` or

16987 :py:obj:`~.cuStreamCreateWithPriority`. Passing an invalid handle

16988 will result in undefined behavior.

16989

16990 - any of the special streams such as the NULL stream,

16991 :py:obj:`~.cudaStreamLegacy` and :py:obj:`~.cudaStreamPerThread`

16992 respectively. The driver API equivalents of these are also accepted

16993 which are NULL, :py:obj:`~.CU_STREAM_LEGACY` and

16994 :py:obj:`~.CU_STREAM_PER_THREAD`.

16995

16996 Parameters

16997 ----------

16998 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

16999 Handle to the stream to be queried

17000

17001 Returns

17002 -------

17003 cudaError_t

17004 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17005 streamId : unsigned long long

17006 Pointer to an unsigned long long in which the stream Id is returned

17007

17008 See Also

17009 --------

17010 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId`

17011 """

17012 cdef cyruntime.cudaStream_t cyhStream

17013 if hStream is None:

17014 phStream = 0

17015 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

17016 phStream = int(hStream)

17017 else:

17018 phStream = int(cudaStream_t(hStream))

17019 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

17020 cdef unsigned long long streamId = 0

17021 with nogil:

17022 err = cyruntime.cudaStreamGetId(cyhStream, &streamId)

17023 if err != cyruntime.cudaSuccess:

17024 return (_dict_cudaError_t[err], None)

17025 return (_dict_cudaError_t[err], streamId)

17026

17027@cython.embedsignature(True)

17028def cudaStreamGetDevice(hStream):

17029 """ Query the device of a stream.

17030

17031 Returns in `*device` the device of the stream.

17032

17033 Parameters

17034 ----------

17035 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17036 Handle to the stream to be queried

17037

17038 Returns

17039 -------

17040 cudaError_t

17041 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUnavailable`,

17042 device : int

17043 Returns the device to which the stream belongs

17044

17045 See Also

17046 --------

17047 :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId`

17048 """

17049 cdef cyruntime.cudaStream_t cyhStream

17050 if hStream is None:

17051 phStream = 0

17052 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

17053 phStream = int(hStream)

17054 else:

17055 phStream = int(cudaStream_t(hStream))

17056 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

17057 cdef int device = 0

17058 with nogil:

17059 err = cyruntime.cudaStreamGetDevice(cyhStream, &device)

17060 if err != cyruntime.cudaSuccess:

17061 return (_dict_cudaError_t[err], None)

17062 return (_dict_cudaError_t[err], device)

17063

17064@cython.embedsignature(True)

17065def cudaCtxResetPersistingL2Cache():

17066 """ Resets all persisting lines in cache to normal status.

17067

17068 Resets all persisting lines in cache to normal status. Takes effect on

17069 function return.

17070

17071 Returns

17072 -------

17073 cudaError_t

17074 :py:obj:`~.cudaSuccess`,

17075

17076 See Also

17077 --------

17078 :py:obj:`~.cudaAccessPolicyWindow`

17079 """

17080 with nogil:

17081 err = cyruntime.cudaCtxResetPersistingL2Cache()

17082 return (_dict_cudaError_t[err],)

17083

17084@cython.embedsignature(True)

17085def cudaStreamCopyAttributes(dst, src):

17086 """ Copies attributes from source stream to destination stream.

17087

17088 Copies attributes from source stream `src` to destination stream `dst`.

17089 Both streams must have the same context.

17090

17091 Parameters

17092 ----------

17093 dst : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17094 Destination stream

17095 src : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17096 Source stream For attributes see :py:obj:`~.cudaStreamAttrID`

17097

17098 Returns

17099 -------

17100 cudaError_t

17101 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`

17102

17103 See Also

17104 --------

17105 :py:obj:`~.cudaAccessPolicyWindow`

17106 """

17107 cdef cyruntime.cudaStream_t cysrc

17108 if src is None:

17109 psrc = 0

17110 elif isinstance(src, (cudaStream_t,driver.CUstream)):

17111 psrc = int(src)

17112 else:

17113 psrc = int(cudaStream_t(src))

17114 cysrc = <cyruntime.cudaStream_t><void_ptr>psrc

17115 cdef cyruntime.cudaStream_t cydst

17116 if dst is None:

17117 pdst = 0

17118 elif isinstance(dst, (cudaStream_t,driver.CUstream)):

17119 pdst = int(dst)

17120 else:

17121 pdst = int(cudaStream_t(dst))

17122 cydst = <cyruntime.cudaStream_t><void_ptr>pdst

17123 with nogil:

17124 err = cyruntime.cudaStreamCopyAttributes(cydst, cysrc)

17125 return (_dict_cudaError_t[err],)

17126

17127@cython.embedsignature(True)

17128def cudaStreamGetAttribute(hStream, attr not None : cudaStreamAttrID):

17129 """ Queries stream attribute.

17130

17131 Queries attribute `attr` from `hStream` and stores it in corresponding

17132 member of `value_out`.

17133

17134 Parameters

17135 ----------

17136 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17137

17138 attr : :py:obj:`~.cudaStreamAttrID`

17139

17140

17141 Returns

17142 -------

17143 cudaError_t

17144 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17145 value_out : :py:obj:`~.cudaStreamAttrValue`

17146

17147

17148 See Also

17149 --------

17150 :py:obj:`~.cudaAccessPolicyWindow`

17151 """

17152 cdef cyruntime.cudaStream_t cyhStream

17153 if hStream is None:

17154 phStream = 0

17155 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

17156 phStream = int(hStream)

17157 else:

17158 phStream = int(cudaStream_t(hStream))

17159 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

17160 cdef cyruntime.cudaStreamAttrID cyattr = attr.value

17161 cdef cudaStreamAttrValue value_out = cudaStreamAttrValue()

17162 with nogil:

17163 err = cyruntime.cudaStreamGetAttribute(cyhStream, cyattr, <cyruntime.cudaStreamAttrValue*>value_out._pvt_ptr)

17164 if err != cyruntime.cudaSuccess:

17165 return (_dict_cudaError_t[err], None)

17166 return (_dict_cudaError_t[err], value_out)

17167

17168@cython.embedsignature(True)

17169def cudaStreamSetAttribute(hStream, attr not None : cudaStreamAttrID, value : Optional[cudaStreamAttrValue]):

17170 """ Sets stream attribute.

17171

17172 Sets attribute `attr` on `hStream` from corresponding attribute of

17173 `value`. The updated attribute will be applied to subsequent work

17174 submitted to the stream. It will not affect previously submitted work.

17175

17176 Parameters

17177 ----------

17178 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17179

17180 attr : :py:obj:`~.cudaStreamAttrID`

17181

17182 value : :py:obj:`~.cudaStreamAttrValue`

17183

17184

17185 Returns

17186 -------

17187 cudaError_t

17188 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17189

17190 See Also

17191 --------

17192 :py:obj:`~.cudaAccessPolicyWindow`

17193 """

17194 cdef cyruntime.cudaStream_t cyhStream

17195 if hStream is None:

17196 phStream = 0

17197 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

17198 phStream = int(hStream)

17199 else:

17200 phStream = int(cudaStream_t(hStream))

17201 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

17202 cdef cyruntime.cudaStreamAttrID cyattr = attr.value

17203 cdef cyruntime.cudaStreamAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL

17204 with nogil:

17205 err = cyruntime.cudaStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr)

17206 return (_dict_cudaError_t[err],)

17207

17208@cython.embedsignature(True)

17209def cudaStreamDestroy(stream):

17210 """ Destroys and cleans up an asynchronous stream.

17211

17212 Destroys and cleans up the asynchronous stream specified by `stream`.

17213

17214 In case the device is still doing work in the stream `stream` when

17215 :py:obj:`~.cudaStreamDestroy()` is called, the function will return

17216 immediately and the resources associated with `stream` will be released

17217 automatically once the device has completed all work in `stream`.

17218

17219 Parameters

17220 ----------

17221 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17222 Stream identifier

17223

17224 Returns

17225 -------

17226 cudaError_t

17227 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17228

17229 See Also

17230 --------

17231 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuStreamDestroy`

17232 """

17233 cdef cyruntime.cudaStream_t cystream

17234 if stream is None:

17235 pstream = 0

17236 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17237 pstream = int(stream)

17238 else:

17239 pstream = int(cudaStream_t(stream))

17240 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17241 with nogil:

17242 err = cyruntime.cudaStreamDestroy(cystream)

17243 return (_dict_cudaError_t[err],)

17244

17245@cython.embedsignature(True)

17246def cudaStreamWaitEvent(stream, event, unsigned int flags):

17247 """ Make a compute stream wait on an event.

17248

17249 Makes all future work submitted to `stream` wait for all work captured

17250 in `event`. See :py:obj:`~.cudaEventRecord()` for details on what is

17251 captured by an event. The synchronization will be performed efficiently

17252 on the device when applicable. `event` may be from a different device

17253 than `stream`.

17254

17255 flags include:

17256

17257 - :py:obj:`~.cudaEventWaitDefault`: Default event creation flag.

17258

17259 - :py:obj:`~.cudaEventWaitExternal`: Event is captured in the graph as

17260 an external event node when performing stream capture.

17261

17262 Parameters

17263 ----------

17264 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17265 Stream to wait

17266 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

17267 Event to wait on

17268 flags : unsigned int

17269 Parameters for the operation(See above)

17270

17271 Returns

17272 -------

17273 cudaError_t

17274 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17275

17276 See Also

17277 --------

17278 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamWaitEvent`

17279 """

17280 cdef cyruntime.cudaEvent_t cyevent

17281 if event is None:

17282 pevent = 0

17283 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

17284 pevent = int(event)

17285 else:

17286 pevent = int(cudaEvent_t(event))

17287 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

17288 cdef cyruntime.cudaStream_t cystream

17289 if stream is None:

17290 pstream = 0

17291 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17292 pstream = int(stream)

17293 else:

17294 pstream = int(cudaStream_t(stream))

17295 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17296 with nogil:

17297 err = cyruntime.cudaStreamWaitEvent(cystream, cyevent, flags)

17298 return (_dict_cudaError_t[err],)

17299

17300ctypedef struct cudaStreamCallbackData_st:

17301 cyruntime.cudaStreamCallback_t callback

17302 void *userData

17303

17304ctypedef cudaStreamCallbackData_st cudaStreamCallbackData

17305

17306@cython.show_performance_hints(False)

17307cdef void cudaStreamRtCallbackWrapper(cyruntime.cudaStream_t stream, cyruntime.cudaError_t status, void *data) nogil:

17308 cdef cudaStreamCallbackData *cbData = <cudaStreamCallbackData *>data

17309 with gil:

17310 cbData.callback(stream, status, cbData.userData)

17311 free(cbData)

17312

17313@cython.embedsignature(True)

17314def cudaStreamAddCallback(stream, callback, userData, unsigned int flags):

17315 """ Add a callback to a compute stream.

17316

17317 Adds a callback to be called on the host after all currently enqueued

17318 items in the stream have completed. For each cudaStreamAddCallback

17319 call, a callback will be executed exactly once. The callback will block

17320 later work in the stream until it is finished.

17321

17322 The callback may be passed :py:obj:`~.cudaSuccess` or an error code. In

17323 the event of a device error, all subsequently executed callbacks will

17324 receive an appropriate :py:obj:`~.cudaError_t`.

17325

17326 Callbacks must not make any CUDA API calls. Attempting to use CUDA APIs

17327 may result in :py:obj:`~.cudaErrorNotPermitted`. Callbacks must not

17328 perform any synchronization that may depend on outstanding device work

17329 or other callbacks that are not mandated to run earlier. Callbacks

17330 without a mandated order (in independent streams) execute in undefined

17331 order and may be serialized.

17332

17333 For the purposes of Unified Memory, callback execution makes a number

17334 of guarantees:

17335

17336 - The callback stream is considered idle for the duration of the

17337 callback. Thus, for example, a callback may always use memory

17338 attached to the callback stream.

17339

17340 - The start of execution of a callback has the same effect as

17341 synchronizing an event recorded in the same stream immediately prior

17342 to the callback. It thus synchronizes streams which have been

17343 "joined" prior to the callback.

17344

17345 - Adding device work to any stream does not have the effect of making

17346 the stream active until all preceding callbacks have executed. Thus,

17347 for example, a callback might use global attached memory even if work

17348 has been added to another stream, if it has been properly ordered

17349 with an event.

17350

17351 - Completion of a callback does not cause a stream to become active

17352 except as described above. The callback stream will remain idle if no

17353 device work follows the callback, and will remain idle across

17354 consecutive callbacks without device work in between. Thus, for

17355 example, stream synchronization can be done by signaling from a

17356 callback at the end of the stream.

17357

17358 Parameters

17359 ----------

17360 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17361 Stream to add callback to

17362 callback : :py:obj:`~.cudaStreamCallback_t`

17363 The function to call once preceding stream operations are complete

17364 userData : Any

17365 User specified data to be passed to the callback function

17366 flags : unsigned int

17367 Reserved for future use, must be 0

17368

17369 Returns

17370 -------

17371 cudaError_t

17372 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`

17373

17374 See Also

17375 --------

17376 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cuStreamAddCallback`

17377

17378 Notes

17379 -----

17380 This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, consider using :py:obj:`~.cudaLaunchHostFunc`. Additionally, this function is not supported with :py:obj:`~.cudaStreamBeginCapture` and :py:obj:`~.cudaStreamEndCapture`, unlike :py:obj:`~.cudaLaunchHostFunc`.

17381 """

17382 cdef cyruntime.cudaStreamCallback_t cycallback

17383 if callback is None:

17384 pcallback = 0

17385 elif isinstance(callback, (cudaStreamCallback_t,)):

17386 pcallback = int(callback)

17387 else:

17388 pcallback = int(cudaStreamCallback_t(callback))

17389 cycallback = <cyruntime.cudaStreamCallback_t><void_ptr>pcallback

17390 cdef cyruntime.cudaStream_t cystream

17391 if stream is None:

17392 pstream = 0

17393 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17394 pstream = int(stream)

17395 else:

17396 pstream = int(cudaStream_t(stream))

17397 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17398 cyuserData = _HelperInputVoidPtr(userData)

17399 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr

17400

17401 cdef cudaStreamCallbackData *cbData = NULL

17402 cbData = <cudaStreamCallbackData *>malloc(sizeof(cbData[0]))

17403 if cbData == NULL:

17404 return (cudaError_t.cudaErrorMemoryAllocation,)

17405 cbData.callback = cycallback

17406 cbData.userData = cyuserData_ptr

17407

17408 with nogil:

17409 err = cyruntime.cudaStreamAddCallback(cystream, <cyruntime.cudaStreamCallback_t>cudaStreamRtCallbackWrapper, <void *>cbData, flags)

17410 if err != cyruntime.cudaSuccess:

17411 free(cbData)

17412 return (_dict_cudaError_t[err],)

17413

17414@cython.embedsignature(True)

17415def cudaStreamSynchronize(stream):

17416 """ Waits for stream tasks to complete.

17417

17418 Blocks until `stream` has completed all operations. If the

17419 :py:obj:`~.cudaDeviceScheduleBlockingSync` flag was set for this

17420 device, the host thread will block until the stream is finished with

17421 all of its tasks.

17422

17423 Parameters

17424 ----------

17425 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17426 Stream identifier

17427

17428 Returns

17429 -------

17430 cudaError_t

17431 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17432

17433 See Also

17434 --------

17435 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamSynchronize`

17436 """

17437 cdef cyruntime.cudaStream_t cystream

17438 if stream is None:

17439 pstream = 0

17440 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17441 pstream = int(stream)

17442 else:

17443 pstream = int(cudaStream_t(stream))

17444 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17445 with nogil:

17446 err = cyruntime.cudaStreamSynchronize(cystream)

17447 return (_dict_cudaError_t[err],)

17448

17449@cython.embedsignature(True)

17450def cudaStreamQuery(stream):

17451 """ Queries an asynchronous stream for completion status.

17452

17453 Returns :py:obj:`~.cudaSuccess` if all operations in `stream` have

17454 completed, or :py:obj:`~.cudaErrorNotReady` if not.

17455

17456 For the purposes of Unified Memory, a return value of

17457 :py:obj:`~.cudaSuccess` is equivalent to having called

17458 :py:obj:`~.cudaStreamSynchronize()`.

17459

17460 Parameters

17461 ----------

17462 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17463 Stream identifier

17464

17465 Returns

17466 -------

17467 cudaError_t

17468 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17469

17470 See Also

17471 --------

17472 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamQuery`

17473 """

17474 cdef cyruntime.cudaStream_t cystream

17475 if stream is None:

17476 pstream = 0

17477 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17478 pstream = int(stream)

17479 else:

17480 pstream = int(cudaStream_t(stream))

17481 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17482 with nogil:

17483 err = cyruntime.cudaStreamQuery(cystream)

17484 return (_dict_cudaError_t[err],)

17485

17486@cython.embedsignature(True)

17487def cudaStreamAttachMemAsync(stream, devPtr, size_t length, unsigned int flags):

17488 """ Attach memory to a stream asynchronously.

17489

17490 Enqueues an operation in `stream` to specify stream association of

17491 `length` bytes of memory starting from `devPtr`. This function is a

17492 stream-ordered operation, meaning that it is dependent on, and will

17493 only take effect when, previous work in stream has completed. Any

17494 previous association is automatically replaced.

17495

17496 `devPtr` must point to an one of the following types of memories:

17497

17498 - managed memory declared using the managed keyword or allocated with

17499 :py:obj:`~.cudaMallocManaged`.

17500

17501 - a valid host-accessible region of system-allocated pageable memory.

17502 This type of memory may only be specified if the device associated

17503 with the stream reports a non-zero value for the device attribute

17504 :py:obj:`~.cudaDevAttrPageableMemoryAccess`.

17505

17506 For managed allocations, `length` must be either zero or the entire

17507 allocation's size. Both indicate that the entire allocation's stream

17508 association is being changed. Currently, it is not possible to change

17509 stream association for a portion of a managed allocation.

17510

17511 For pageable allocations, `length` must be non-zero.

17512

17513 The stream association is specified using `flags` which must be one of

17514 :py:obj:`~.cudaMemAttachGlobal`, :py:obj:`~.cudaMemAttachHost` or

17515 :py:obj:`~.cudaMemAttachSingle`. The default value for `flags` is

17516 :py:obj:`~.cudaMemAttachSingle` If the :py:obj:`~.cudaMemAttachGlobal`

17517 flag is specified, the memory can be accessed by any stream on any

17518 device. If the :py:obj:`~.cudaMemAttachHost` flag is specified, the

17519 program makes a guarantee that it won't access the memory on the device

17520 from any stream on a device that has a zero value for the device

17521 attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. If the

17522 :py:obj:`~.cudaMemAttachSingle` flag is specified and `stream` is

17523 associated with a device that has a zero value for the device attribute

17524 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`, the program makes a

17525 guarantee that it will only access the memory on the device from

17526 `stream`. It is illegal to attach singly to the NULL stream, because

17527 the NULL stream is a virtual global stream and not a specific stream.

17528 An error will be returned in this case.

17529

17530 When memory is associated with a single stream, the Unified Memory

17531 system will allow CPU access to this memory region so long as all

17532 operations in `stream` have completed, regardless of whether other

17533 streams are active. In effect, this constrains exclusive ownership of

17534 the managed memory region by an active GPU to per-stream activity

17535 instead of whole-GPU activity.

17536

17537 Accessing memory on the device from streams that are not associated

17538 with it will produce undefined results. No error checking is performed

17539 by the Unified Memory system to ensure that kernels launched into other

17540 streams do not access this region.

17541

17542 It is a program's responsibility to order calls to

17543 :py:obj:`~.cudaStreamAttachMemAsync` via events, synchronization or

17544 other means to ensure legal access to memory at all times. Data

17545 visibility and coherency will be changed appropriately for all kernels

17546 which follow a stream-association change.

17547

17548 If `stream` is destroyed while data is associated with it, the

17549 association is removed and the association reverts to the default

17550 visibility of the allocation as specified at

17551 :py:obj:`~.cudaMallocManaged`. For managed variables, the default

17552 association is always :py:obj:`~.cudaMemAttachGlobal`. Note that

17553 destroying a stream is an asynchronous operation, and as a result, the

17554 change to default association won't happen until all work in the stream

17555 has completed.

17556

17557 Parameters

17558 ----------

17559 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17560 Stream in which to enqueue the attach operation

17561 devPtr : Any

17562 Pointer to memory (must be a pointer to managed memory or to a

17563 valid host-accessible region of system-allocated memory)

17564 length : size_t

17565 Length of memory (defaults to zero)

17566 flags : unsigned int

17567 Must be one of :py:obj:`~.cudaMemAttachGlobal`,

17568 :py:obj:`~.cudaMemAttachHost` or :py:obj:`~.cudaMemAttachSingle`

17569 (defaults to :py:obj:`~.cudaMemAttachSingle`)

17570

17571 Returns

17572 -------

17573 cudaError_t

17574 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

17575

17576 See Also

17577 --------

17578 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cuStreamAttachMemAsync`

17579 """

17580 cdef cyruntime.cudaStream_t cystream

17581 if stream is None:

17582 pstream = 0

17583 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17584 pstream = int(stream)

17585 else:

17586 pstream = int(cudaStream_t(stream))

17587 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17588 cydevPtr = _HelperInputVoidPtr(devPtr)

17589 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

17590 with nogil:

17591 err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr_ptr, length, flags)

17592 return (_dict_cudaError_t[err],)

17593

17594@cython.embedsignature(True)

17595def cudaStreamBeginCapture(stream, mode not None : cudaStreamCaptureMode):

17596 """ Begins graph capture on a stream.

17597

17598 Begin graph capture on `stream`. When a stream is in capture mode, all

17599 operations pushed into the stream will not be executed, but will

17600 instead be captured into a graph, which will be returned via

17601 :py:obj:`~.cudaStreamEndCapture`. Capture may not be initiated if

17602 `stream` is :py:obj:`~.cudaStreamLegacy`. Capture must be ended on the

17603 same stream in which it was initiated, and it may only be initiated if

17604 the stream is not already in capture mode. The capture mode may be

17605 queried via :py:obj:`~.cudaStreamIsCapturing`. A unique id representing

17606 the capture sequence may be queried via

17607 :py:obj:`~.cudaStreamGetCaptureInfo`.

17608

17609 If `mode` is not :py:obj:`~.cudaStreamCaptureModeRelaxed`,

17610 :py:obj:`~.cudaStreamEndCapture` must be called on this stream from the

17611 same thread.

17612

17613 Parameters

17614 ----------

17615 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17616 Stream in which to initiate capture

17617 mode : :py:obj:`~.cudaStreamCaptureMode`

17618 Controls the interaction of this capture sequence with other API

17619 calls that are potentially unsafe. For more details see

17620 :py:obj:`~.cudaThreadExchangeStreamCaptureMode`.

17621

17622 Returns

17623 -------

17624 cudaError_t

17625 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

17626

17627 See Also

17628 --------

17629 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamEndCapture`, :py:obj:`~.cudaThreadExchangeStreamCaptureMode`

17630

17631 Notes

17632 -----

17633 Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.

17634 """

17635 cdef cyruntime.cudaStream_t cystream

17636 if stream is None:

17637 pstream = 0

17638 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17639 pstream = int(stream)

17640 else:

17641 pstream = int(cudaStream_t(stream))

17642 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17643 cdef cyruntime.cudaStreamCaptureMode cymode = mode.value

17644 with nogil:

17645 err = cyruntime.cudaStreamBeginCapture(cystream, cymode)

17646 return (_dict_cudaError_t[err],)

17647

17648@cython.embedsignature(True)

17649def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], dependencyData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies, mode not None : cudaStreamCaptureMode):

17650 """ Begins graph capture on a stream to an existing graph.

17651

17652 Begin graph capture on `stream`. When a stream is in capture mode, all

17653 operations pushed into the stream will not be executed, but will

17654 instead be captured into `graph`, which will be returned via

17655 :py:obj:`~.cudaStreamEndCapture`.

17656

17657 Capture may not be initiated if `stream` is

17658 :py:obj:`~.cudaStreamLegacy`. Capture must be ended on the same stream

17659 in which it was initiated, and it may only be initiated if the stream

17660 is not already in capture mode. The capture mode may be queried via

17661 :py:obj:`~.cudaStreamIsCapturing`. A unique id representing the capture

17662 sequence may be queried via :py:obj:`~.cudaStreamGetCaptureInfo`.

17663

17664 If `mode` is not :py:obj:`~.cudaStreamCaptureModeRelaxed`,

17665 :py:obj:`~.cudaStreamEndCapture` must be called on this stream from the

17666 same thread.

17667

17668 Parameters

17669 ----------

17670 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17671 Stream in which to initiate capture.

17672 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

17673 Graph to capture into.

17674 dependencies : list[:py:obj:`~.cudaGraphNode_t`]

17675 Dependencies of the first node captured in the stream. Can be NULL

17676 if numDependencies is 0.

17677 dependencyData : list[:py:obj:`~.cudaGraphEdgeData`]

17678 Optional array of data associated with each dependency.

17679 numDependencies : size_t

17680 Number of dependencies.

17681 mode : :py:obj:`~.cudaStreamCaptureMode`

17682 Controls the interaction of this capture sequence with other API

17683 calls that are potentially unsafe. For more details see

17684 :py:obj:`~.cudaThreadExchangeStreamCaptureMode`.

17685

17686 Returns

17687 -------

17688 cudaError_t

17689 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

17690

17691 See Also

17692 --------

17693 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamEndCapture`, :py:obj:`~.cudaThreadExchangeStreamCaptureMode`

17694

17695 Notes

17696 -----

17697 Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.

17698 """

17699 dependencyData = [] if dependencyData is None else dependencyData

17700 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):

17701 raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")

17702 dependencies = [] if dependencies is None else dependencies

17703 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):

17704 raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

17705 cdef cyruntime.cudaGraph_t cygraph

17706 if graph is None:

17707 pgraph = 0

17708 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

17709 pgraph = int(graph)

17710 else:

17711 pgraph = int(cudaGraph_t(graph))

17712 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

17713 cdef cyruntime.cudaStream_t cystream

17714 if stream is None:

17715 pstream = 0

17716 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17717 pstream = int(stream)

17718 else:

17719 pstream = int(cudaStream_t(stream))

17720 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17721 cdef cyruntime.cudaGraphNode_t* cydependencies = NULL

17722 if len(dependencies) > 1:

17723 cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))

17724 if cydependencies is NULL:

17725 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

17726 else:

17727 for idx in range(len(dependencies)):

17728 cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._pvt_ptr[0]

17729 elif len(dependencies) == 1:

17730 cydependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._pvt_ptr

17731 cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL

17732 if len(dependencyData) > 1:

17733 cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))

17734 if cydependencyData is NULL:

17735 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

17736 for idx in range(len(dependencyData)):

17737 string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))

17738 elif len(dependencyData) == 1:

17739 cydependencyData = (<cudaGraphEdgeData>dependencyData[0])._pvt_ptr

17740 if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))

17741 cdef cyruntime.cudaStreamCaptureMode cymode = mode.value

17742 with nogil:

17743 err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, cydependencies, cydependencyData, numDependencies, cymode)

17744 if len(dependencies) > 1 and cydependencies is not NULL:

17745 free(cydependencies)

17746 if len(dependencyData) > 1 and cydependencyData is not NULL:

17747 free(cydependencyData)

17748 return (_dict_cudaError_t[err],)

17749

17750@cython.embedsignature(True)

17751def cudaThreadExchangeStreamCaptureMode(mode not None : cudaStreamCaptureMode):

17752 """ Swaps the stream capture interaction mode for a thread.

17753

17754 Sets the calling thread's stream capture interaction mode to the value

17755 contained in `*mode`, and overwrites `*mode` with the previous mode for

17756 the thread. To facilitate deterministic behavior across function or

17757 module boundaries, callers are encouraged to use this API in a push-pop

17758 fashion:

17759

17760 **View CUDA Toolkit Documentation for a C++ code example**

17761

17762 During stream capture (see :py:obj:`~.cudaStreamBeginCapture`), some

17763 actions, such as a call to :py:obj:`~.cudaMalloc`, may be unsafe. In

17764 the case of :py:obj:`~.cudaMalloc`, the operation is not enqueued

17765 asynchronously to a stream, and is not observed by stream capture.

17766 Therefore, if the sequence of operations captured via

17767 :py:obj:`~.cudaStreamBeginCapture` depended on the allocation being

17768 replayed whenever the graph is launched, the captured graph would be

17769 invalid.

17770

17771 Therefore, stream capture places restrictions on API calls that can be

17772 made within or concurrently to a

17773 :py:obj:`~.cudaStreamBeginCapture`-:py:obj:`~.cudaStreamEndCapture`

17774 sequence. This behavior can be controlled via this API and flags to

17775 :py:obj:`~.cudaStreamBeginCapture`.

17776

17777 A thread's mode is one of the following:

17778

17779 - `cudaStreamCaptureModeGlobal:` This is the default mode. If the local

17780 thread has an ongoing capture sequence that was not initiated with

17781 `cudaStreamCaptureModeRelaxed` at `cuStreamBeginCapture`, or if any

17782 other thread has a concurrent capture sequence initiated with

17783 `cudaStreamCaptureModeGlobal`, this thread is prohibited from

17784 potentially unsafe API calls.

17785

17786 - `cudaStreamCaptureModeThreadLocal:` If the local thread has an

17787 ongoing capture sequence not initiated with

17788 `cudaStreamCaptureModeRelaxed`, it is prohibited from potentially

17789 unsafe API calls. Concurrent capture sequences in other threads are

17790 ignored.

17791

17792 - `cudaStreamCaptureModeRelaxed:` The local thread is not prohibited

17793 from potentially unsafe API calls. Note that the thread is still

17794 prohibited from API calls which necessarily conflict with stream

17795 capture, for example, attempting :py:obj:`~.cudaEventQuery` on an

17796 event that was last recorded inside a capture sequence.

17797

17798 Parameters

17799 ----------

17800 mode : :py:obj:`~.cudaStreamCaptureMode`

17801 Pointer to mode value to swap with the current mode

17802

17803 Returns

17804 -------

17805 cudaError_t

17806 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

17807 mode : :py:obj:`~.cudaStreamCaptureMode`

17808 Pointer to mode value to swap with the current mode

17809

17810 See Also

17811 --------

17812 :py:obj:`~.cudaStreamBeginCapture`

17813 """

17814 cdef cyruntime.cudaStreamCaptureMode cymode = mode.value

17815 with nogil:

17816 err = cyruntime.cudaThreadExchangeStreamCaptureMode(&cymode)

17817 if err != cyruntime.cudaSuccess:

17818 return (_dict_cudaError_t[err], None)

17819 return (_dict_cudaError_t[err], cudaStreamCaptureMode(cymode))

17820

17821@cython.embedsignature(True)

17822def cudaStreamEndCapture(stream):

17823 """ Ends capture on a stream, returning the captured graph.

17824

17825 End capture on `stream`, returning the captured graph via `pGraph`.

17826 Capture must have been initiated on `stream` via a call to

17827 :py:obj:`~.cudaStreamBeginCapture`. If capture was invalidated, due to

17828 a violation of the rules of stream capture, then a NULL graph will be

17829 returned.

17830

17831 If the `mode` argument to :py:obj:`~.cudaStreamBeginCapture` was not

17832 :py:obj:`~.cudaStreamCaptureModeRelaxed`, this call must be from the

17833 same thread as :py:obj:`~.cudaStreamBeginCapture`.

17834

17835 Parameters

17836 ----------

17837 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17838 Stream to query

17839

17840 Returns

17841 -------

17842 cudaError_t

17843 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureWrongThread`

17844 pGraph : :py:obj:`~.cudaGraph_t`

17845 The captured graph

17846

17847 See Also

17848 --------

17849 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaGraphDestroy`

17850 """

17851 cdef cyruntime.cudaStream_t cystream

17852 if stream is None:

17853 pstream = 0

17854 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17855 pstream = int(stream)

17856 else:

17857 pstream = int(cudaStream_t(stream))

17858 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17859 cdef cudaGraph_t pGraph = cudaGraph_t()

17860 with nogil:

17861 err = cyruntime.cudaStreamEndCapture(cystream, <cyruntime.cudaGraph_t*>pGraph._pvt_ptr)

17862 if err != cyruntime.cudaSuccess:

17863 return (_dict_cudaError_t[err], None)

17864 return (_dict_cudaError_t[err], pGraph)

17865

17866@cython.embedsignature(True)

17867def cudaStreamIsCapturing(stream):

17868 """ Returns a stream's capture status.

17869

17870 Return the capture status of `stream` via `pCaptureStatus`. After a

17871 successful call, `*pCaptureStatus` will contain one of the following:

17872

17873 - :py:obj:`~.cudaStreamCaptureStatusNone`: The stream is not capturing.

17874

17875 - :py:obj:`~.cudaStreamCaptureStatusActive`: The stream is capturing.

17876

17877 - :py:obj:`~.cudaStreamCaptureStatusInvalidated`: The stream was

17878 capturing but an error has invalidated the capture sequence. The

17879 capture sequence must be terminated with

17880 :py:obj:`~.cudaStreamEndCapture` on the stream where it was initiated

17881 in order to continue using `stream`.

17882

17883 Note that, if this is called on :py:obj:`~.cudaStreamLegacy` (the "null

17884 stream") while a blocking stream on the same device is capturing, it

17885 will return :py:obj:`~.cudaErrorStreamCaptureImplicit` and

17886 `*pCaptureStatus` is unspecified after the call. The blocking stream

17887 capture is not invalidated.

17888

17889 When a blocking stream is capturing, the legacy stream is in an

17890 unusable state until the blocking stream capture is terminated. The

17891 legacy stream is not supported for stream capture, but attempted use

17892 would have an implicit dependency on the capturing stream(s).

17893

17894 Parameters

17895 ----------

17896 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17897 Stream to query

17898

17899 Returns

17900 -------

17901 cudaError_t

17902 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`

17903 pCaptureStatus : :py:obj:`~.cudaStreamCaptureStatus`

17904 Returns the stream's capture status

17905

17906 See Also

17907 --------

17908 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamEndCapture`

17909 """

17910 cdef cyruntime.cudaStream_t cystream

17911 if stream is None:

17912 pstream = 0

17913 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

17914 pstream = int(stream)

17915 else:

17916 pstream = int(cudaStream_t(stream))

17917 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

17918 cdef cyruntime.cudaStreamCaptureStatus pCaptureStatus

17919 with nogil:

17920 err = cyruntime.cudaStreamIsCapturing(cystream, &pCaptureStatus)

17921 if err != cyruntime.cudaSuccess:

17922 return (_dict_cudaError_t[err], None)

17923 return (_dict_cudaError_t[err], cudaStreamCaptureStatus(pCaptureStatus))

17924

17925@cython.embedsignature(True)

17926def cudaStreamGetCaptureInfo(stream):

17927 """ Query a stream's capture state.

17928

17929 Query stream state related to stream capture.

17930

17931 If called on :py:obj:`~.cudaStreamLegacy` (the "null stream") while a

17932 stream not created with :py:obj:`~.cudaStreamNonBlocking` is capturing,

17933 returns :py:obj:`~.cudaErrorStreamCaptureImplicit`.

17934

17935 Valid data (other than capture status) is returned only if both of the

17936 following are true:

17937

17938 - the call returns cudaSuccess

17939

17940 - the returned capture status is

17941 :py:obj:`~.cudaStreamCaptureStatusActive`

17942

17943 If `edgeData_out` is non-NULL then `dependencies_out` must be as well.

17944 If `dependencies_out` is non-NULL and `edgeData_out` is NULL, but there

17945 is non-zero edge data for one or more of the current stream

17946 dependencies, the call will return :py:obj:`~.cudaErrorLossyQuery`.

17947

17948 Parameters

17949 ----------

17950 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

17951 The stream to query

17952

17953 Returns

17954 -------

17955 cudaError_t

17956 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`, :py:obj:`~.cudaErrorLossyQuery`

17957 captureStatus_out : :py:obj:`~.cudaStreamCaptureStatus`

17958 Location to return the capture status of the stream; required

17959 id_out : unsigned long long

17960 Optional location to return an id for the capture sequence, which

17961 is unique over the lifetime of the process

17962 graph_out : :py:obj:`~.cudaGraph_t`

17963 Optional location to return the graph being captured into. All

17964 operations other than destroy and node removal are permitted on the

17965 graph while the capture sequence is in progress. This API does not

17966 transfer ownership of the graph, which is transferred or destroyed

17967 at :py:obj:`~.cudaStreamEndCapture`. Note that the graph handle may

17968 be invalidated before end of capture for certain errors. Nodes that

17969 are or become unreachable from the original stream at

17970 :py:obj:`~.cudaStreamEndCapture` due to direct actions on the graph

17971 do not trigger :py:obj:`~.cudaErrorStreamCaptureUnjoined`.

17972 dependencies_out : list[:py:obj:`~.cudaGraphNode_t`]

17973 Optional location to store a pointer to an array of nodes. The next

17974 node to be captured in the stream will depend on this set of nodes,

17975 absent operations such as event wait which modify this set. The

17976 array pointer is valid until the next API call which operates on

17977 the stream or until the capture is terminated. The node handles may

17978 be copied out and are valid until they or the graph is destroyed.

17979 The driver-owned array may also be passed directly to APIs that

17980 operate on the graph (not the stream) without copying.

17981 edgeData_out : list[:py:obj:`~.cudaGraphEdgeData`]

17982 Optional location to store a pointer to an array of graph edge

17983 data. This array parallels `dependencies_out`; the next node to be

17984 added has an edge to `dependencies_out`[i] with annotation

17985 `edgeData_out`[i] for each `i`. The array pointer is valid until

17986 the next API call which operates on the stream or until the capture

17987 is terminated.

17988 numDependencies_out : int

17989 Optional location to store the size of the array returned in

17990 dependencies_out.

17991

17992 See Also

17993 --------

17994 :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamUpdateCaptureDependencies`

17995 """

17996 cdef cyruntime.cudaStream_t cystream

17997 if stream is None:

17998 pstream = 0

17999 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

18000 pstream = int(stream)

18001 else:

18002 pstream = int(cudaStream_t(stream))

18003 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

18004 cdef cyruntime.cudaStreamCaptureStatus captureStatus_out

18005 cdef unsigned long long id_out = 0

18006 cdef cudaGraph_t graph_out = cudaGraph_t()

18007 cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL

18008 pydependencies_out = []

18009 cdef const cyruntime.cudaGraphEdgeData* cyedgeData_out = NULL

18010 pyedgeData_out = []

18011 cdef size_t numDependencies_out = 0

18012 with nogil:

18013 err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, <cyruntime.cudaGraph_t*>graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out)

18014 if cudaError_t(err) == cudaError_t(0):

18015 pydependencies_out = [cudaGraphNode_t(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]

18016 if cudaError_t(err) == cudaError_t(0):

18017 pyedgeData_out = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData_out[idx]) for idx in range(numDependencies_out)]

18018 if err != cyruntime.cudaSuccess:

18019 return (_dict_cudaError_t[err], None, None, None, None, None, None)

18020 return (_dict_cudaError_t[err], cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out)

18021

18022@cython.embedsignature(True)

18023def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], dependencyData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies, unsigned int flags):

18024 """ Update the set of dependencies in a capturing stream.

18025

18026 Modifies the dependency set of a capturing stream. The dependency set

18027 is the set of nodes that the next captured node in the stream will

18028 depend on.

18029

18030 Valid flags are :py:obj:`~.cudaStreamAddCaptureDependencies` and

18031 :py:obj:`~.cudaStreamSetCaptureDependencies`. These control whether the

18032 set passed to the API is added to the existing set or replaces it. A

18033 flags value of 0 defaults to

18034 :py:obj:`~.cudaStreamAddCaptureDependencies`.

18035

18036 Nodes that are removed from the dependency set via this API do not

18037 result in :py:obj:`~.cudaErrorStreamCaptureUnjoined` if they are

18038 unreachable from the stream at :py:obj:`~.cudaStreamEndCapture`.

18039

18040 Returns :py:obj:`~.cudaErrorIllegalState` if the stream is not

18041 capturing.

18042

18043 Parameters

18044 ----------

18045 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

18046 The stream to update

18047 dependencies : list[:py:obj:`~.cudaGraphNode_t`]

18048 The set of dependencies to add

18049 dependencyData : list[:py:obj:`~.cudaGraphEdgeData`]

18050 Optional array of data associated with each dependency.

18051 numDependencies : size_t

18052 The size of the dependencies array

18053 flags : unsigned int

18054 See above

18055

18056 Returns

18057 -------

18058 cudaError_t

18059 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorIllegalState`

18060

18061 See Also

18062 --------

18063 :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamGetCaptureInfo`,

18064 """

18065 dependencyData = [] if dependencyData is None else dependencyData

18066 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):

18067 raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")

18068 dependencies = [] if dependencies is None else dependencies

18069 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):

18070 raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

18071 cdef cyruntime.cudaStream_t cystream

18072 if stream is None:

18073 pstream = 0

18074 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

18075 pstream = int(stream)

18076 else:

18077 pstream = int(cudaStream_t(stream))

18078 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

18079 cdef cyruntime.cudaGraphNode_t* cydependencies = NULL

18080 if len(dependencies) > 1:

18081 cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))

18082 if cydependencies is NULL:

18083 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

18084 else:

18085 for idx in range(len(dependencies)):

18086 cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._pvt_ptr[0]

18087 elif len(dependencies) == 1:

18088 cydependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._pvt_ptr

18089 cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL

18090 if len(dependencyData) > 1:

18091 cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))

18092 if cydependencyData is NULL:

18093 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

18094 for idx in range(len(dependencyData)):

18095 string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))

18096 elif len(dependencyData) == 1:

18097 cydependencyData = (<cudaGraphEdgeData>dependencyData[0])._pvt_ptr

18098 with nogil:

18099 err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, cydependencies, cydependencyData, numDependencies, flags)

18100 if len(dependencies) > 1 and cydependencies is not NULL:

18101 free(cydependencies)

18102 if len(dependencyData) > 1 and cydependencyData is not NULL:

18103 free(cydependencyData)

18104 return (_dict_cudaError_t[err],)

18105

18106@cython.embedsignature(True)

18107def cudaEventCreate():

18108 """ Creates an event object.

18109

18110 Creates an event object for the current device using

18111 :py:obj:`~.cudaEventDefault`.

18112

18113 Returns

18114 -------

18115 cudaError_t

18116 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`

18117 event : :py:obj:`~.cudaEvent_t`

18118 Newly created event

18119

18120 See Also

18121 --------

18122 cudaEventCreate (C++ API), :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate`

18123 """

18124 cdef cudaEvent_t event = cudaEvent_t()

18125 with nogil:

18126 err = cyruntime.cudaEventCreate(<cyruntime.cudaEvent_t*>event._pvt_ptr)

18127 if err != cyruntime.cudaSuccess:

18128 return (_dict_cudaError_t[err], None)

18129 return (_dict_cudaError_t[err], event)

18130

18131@cython.embedsignature(True)

18132def cudaEventCreateWithFlags(unsigned int flags):

18133 """ Creates an event object with the specified flags.

18134

18135 Creates an event object for the current device with the specified

18136 flags. Valid flags include:

18137

18138 - :py:obj:`~.cudaEventDefault`: Default event creation flag.

18139

18140 - :py:obj:`~.cudaEventBlockingSync`: Specifies that event should use

18141 blocking synchronization. A host thread that uses

18142 :py:obj:`~.cudaEventSynchronize()` to wait on an event created with

18143 this flag will block until the event actually completes.

18144

18145 - :py:obj:`~.cudaEventDisableTiming`: Specifies that the created event

18146 does not need to record timing data. Events created with this flag

18147 specified and the :py:obj:`~.cudaEventBlockingSync` flag not

18148 specified will provide the best performance when used with

18149 :py:obj:`~.cudaStreamWaitEvent()` and :py:obj:`~.cudaEventQuery()`.

18150

18151 - :py:obj:`~.cudaEventInterprocess`: Specifies that the created event

18152 may be used as an interprocess event by

18153 :py:obj:`~.cudaIpcGetEventHandle()`.

18154 :py:obj:`~.cudaEventInterprocess` must be specified along with

18155 :py:obj:`~.cudaEventDisableTiming`.

18156

18157 Parameters

18158 ----------

18159 flags : unsigned int

18160 Flags for new event

18161

18162 Returns

18163 -------

18164 cudaError_t

18165 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`

18166 event : :py:obj:`~.cudaEvent_t`

18167 Newly created event

18168

18169 See Also

18170 --------

18171 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate`

18172 """

18173 cdef cudaEvent_t event = cudaEvent_t()

18174 with nogil:

18175 err = cyruntime.cudaEventCreateWithFlags(<cyruntime.cudaEvent_t*>event._pvt_ptr, flags)

18176 if err != cyruntime.cudaSuccess:

18177 return (_dict_cudaError_t[err], None)

18178 return (_dict_cudaError_t[err], event)

18179

18180@cython.embedsignature(True)

18181def cudaEventRecord(event, stream):

18182 """ Records an event.

18183

18184 Captures in `event` the contents of `stream` at the time of this call.

18185 `event` and `stream` must be on the same CUDA context. Calls such as

18186 :py:obj:`~.cudaEventQuery()` or :py:obj:`~.cudaStreamWaitEvent()` will

18187 then examine or wait for completion of the work that was captured. Uses

18188 of `stream` after this call do not modify `event`. See note on default

18189 stream behavior for what is captured in the default case.

18190

18191 :py:obj:`~.cudaEventRecord()` can be called multiple times on the same

18192 event and will overwrite the previously captured state. Other APIs such

18193 as :py:obj:`~.cudaStreamWaitEvent()` use the most recently captured

18194 state at the time of the API call, and are not affected by later calls

18195 to :py:obj:`~.cudaEventRecord()`. Before the first call to

18196 :py:obj:`~.cudaEventRecord()`, an event represents an empty set of

18197 work, so for example :py:obj:`~.cudaEventQuery()` would return

18198 :py:obj:`~.cudaSuccess`.

18199

18200 Parameters

18201 ----------

18202 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18203 Event to record

18204 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

18205 Stream in which to record event

18206

18207 Returns

18208 -------

18209 cudaError_t

18210 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`

18211

18212 See Also

18213 --------

18214 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cuEventRecord`

18215 """

18216 cdef cyruntime.cudaStream_t cystream

18217 if stream is None:

18218 pstream = 0

18219 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

18220 pstream = int(stream)

18221 else:

18222 pstream = int(cudaStream_t(stream))

18223 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

18224 cdef cyruntime.cudaEvent_t cyevent

18225 if event is None:

18226 pevent = 0

18227 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

18228 pevent = int(event)

18229 else:

18230 pevent = int(cudaEvent_t(event))

18231 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

18232 with nogil:

18233 err = cyruntime.cudaEventRecord(cyevent, cystream)

18234 return (_dict_cudaError_t[err],)

18235

18236@cython.embedsignature(True)

18237def cudaEventRecordWithFlags(event, stream, unsigned int flags):

18238 """ Records an event.

18239

18240 Captures in `event` the contents of `stream` at the time of this call.

18241 `event` and `stream` must be on the same CUDA context. Calls such as

18242 :py:obj:`~.cudaEventQuery()` or :py:obj:`~.cudaStreamWaitEvent()` will

18243 then examine or wait for completion of the work that was captured. Uses

18244 of `stream` after this call do not modify `event`. See note on default

18245 stream behavior for what is captured in the default case.

18246

18247 :py:obj:`~.cudaEventRecordWithFlags()` can be called multiple times on

18248 the same event and will overwrite the previously captured state. Other

18249 APIs such as :py:obj:`~.cudaStreamWaitEvent()` use the most recently

18250 captured state at the time of the API call, and are not affected by

18251 later calls to :py:obj:`~.cudaEventRecordWithFlags()`. Before the first

18252 call to :py:obj:`~.cudaEventRecordWithFlags()`, an event represents an

18253 empty set of work, so for example :py:obj:`~.cudaEventQuery()` would

18254 return :py:obj:`~.cudaSuccess`.

18255

18256 flags include:

18257

18258 - :py:obj:`~.cudaEventRecordDefault`: Default event creation flag.

18259

18260 - :py:obj:`~.cudaEventRecordExternal`: Event is captured in the graph

18261 as an external event node when performing stream capture.

18262

18263 Parameters

18264 ----------

18265 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18266 Event to record

18267 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

18268 Stream in which to record event

18269 flags : unsigned int

18270 Parameters for the operation(See above)

18271

18272 Returns

18273 -------

18274 cudaError_t

18275 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`

18276

18277 See Also

18278 --------

18279 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventRecord`,

18280 """

18281 cdef cyruntime.cudaStream_t cystream

18282 if stream is None:

18283 pstream = 0

18284 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

18285 pstream = int(stream)

18286 else:

18287 pstream = int(cudaStream_t(stream))

18288 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

18289 cdef cyruntime.cudaEvent_t cyevent

18290 if event is None:

18291 pevent = 0

18292 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

18293 pevent = int(event)

18294 else:

18295 pevent = int(cudaEvent_t(event))

18296 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

18297 with nogil:

18298 err = cyruntime.cudaEventRecordWithFlags(cyevent, cystream, flags)

18299 return (_dict_cudaError_t[err],)

18300

18301@cython.embedsignature(True)

18302def cudaEventQuery(event):

18303 """ Queries an event's status.

18304

18305 Queries the status of all work currently captured by `event`. See

18306 :py:obj:`~.cudaEventRecord()` for details on what is captured by an

18307 event.

18308

18309 Returns :py:obj:`~.cudaSuccess` if all captured work has been

18310 completed, or :py:obj:`~.cudaErrorNotReady` if any captured work is

18311 incomplete.

18312

18313 For the purposes of Unified Memory, a return value of

18314 :py:obj:`~.cudaSuccess` is equivalent to having called

18315 :py:obj:`~.cudaEventSynchronize()`.

18316

18317 Parameters

18318 ----------

18319 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18320 Event to query

18321

18322 Returns

18323 -------

18324 cudaError_t

18325 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`

18326

18327 See Also

18328 --------

18329 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventQuery`

18330 """

18331 cdef cyruntime.cudaEvent_t cyevent

18332 if event is None:

18333 pevent = 0

18334 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

18335 pevent = int(event)

18336 else:

18337 pevent = int(cudaEvent_t(event))

18338 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

18339 with nogil:

18340 err = cyruntime.cudaEventQuery(cyevent)

18341 return (_dict_cudaError_t[err],)

18342

18343@cython.embedsignature(True)

18344def cudaEventSynchronize(event):

18345 """ Waits for an event to complete.

18346

18347 Waits until the completion of all work currently captured in `event`.

18348 See :py:obj:`~.cudaEventRecord()` for details on what is captured by an

18349 event.

18350

18351 Waiting for an event that was created with the

18352 :py:obj:`~.cudaEventBlockingSync` flag will cause the calling CPU

18353 thread to block until the event has been completed by the device. If

18354 the :py:obj:`~.cudaEventBlockingSync` flag has not been set, then the

18355 CPU thread will busy-wait until the event has been completed by the

18356 device.

18357

18358 Parameters

18359 ----------

18360 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18361 Event to wait for

18362

18363 Returns

18364 -------

18365 cudaError_t

18366 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`

18367

18368 See Also

18369 --------

18370 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventSynchronize`

18371 """

18372 cdef cyruntime.cudaEvent_t cyevent

18373 if event is None:

18374 pevent = 0

18375 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

18376 pevent = int(event)

18377 else:

18378 pevent = int(cudaEvent_t(event))

18379 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

18380 with nogil:

18381 err = cyruntime.cudaEventSynchronize(cyevent)

18382 return (_dict_cudaError_t[err],)

18383

18384@cython.embedsignature(True)

18385def cudaEventDestroy(event):

18386 """ Destroys an event object.

18387

18388 Destroys the event specified by `event`.

18389

18390 An event may be destroyed before it is complete (i.e., while

18391 :py:obj:`~.cudaEventQuery()` would return

18392 :py:obj:`~.cudaErrorNotReady`). In this case, the call does not block

18393 on completion of the event, and any associated resources will

18394 automatically be released asynchronously at completion.

18395

18396 Parameters

18397 ----------

18398 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18399 Event to destroy

18400

18401 Returns

18402 -------

18403 cudaError_t

18404 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`

18405

18406 See Also

18407 --------

18408 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventDestroy`

18409 """

18410 cdef cyruntime.cudaEvent_t cyevent

18411 if event is None:

18412 pevent = 0

18413 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

18414 pevent = int(event)

18415 else:

18416 pevent = int(cudaEvent_t(event))

18417 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

18418 with nogil:

18419 err = cyruntime.cudaEventDestroy(cyevent)

18420 return (_dict_cudaError_t[err],)

18421

18422@cython.embedsignature(True)

18423def cudaEventElapsedTime(start, end):

18424 """ Computes the elapsed time between events.

18425

18426 Computes the elapsed time between two events (in milliseconds with a

18427 resolution of around 0.5 microseconds). Note this API is not guaranteed

18428 to return the latest errors for pending work. As such this API is

18429 intended to serve as a elapsed time calculation only and polling for

18430 completion on the events to be compared should be done with

18431 :py:obj:`~.cudaEventQuery` instead.

18432

18433 If either event was last recorded in a non-NULL stream, the resulting

18434 time may be greater than expected (even if both used the same stream

18435 handle). This happens because the :py:obj:`~.cudaEventRecord()`

18436 operation takes place asynchronously and there is no guarantee that the

18437 measured latency is actually just between the two events. Any number of

18438 other different stream operations could execute in between the two

18439 measured events, thus altering the timing in a significant way.

18440

18441 If :py:obj:`~.cudaEventRecord()` has not been called on either event,

18442 then :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If

18443 :py:obj:`~.cudaEventRecord()` has been called on both events but one or

18444 both of them has not yet been completed (that is,

18445 :py:obj:`~.cudaEventQuery()` would return :py:obj:`~.cudaErrorNotReady`

18446 on at least one of the events), :py:obj:`~.cudaErrorNotReady` is

18447 returned. If either event was created with the

18448 :py:obj:`~.cudaEventDisableTiming` flag, then this function will return

18449 :py:obj:`~.cudaErrorInvalidResourceHandle`.

18450

18451 Parameters

18452 ----------

18453 start : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18454 Starting event

18455 end : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

18456 Ending event

18457

18458 Returns

18459 -------

18460 cudaError_t

18461 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorUnknown`

18462 ms : float

18463 Time between `start` and `end` in ms

18464

18465 See Also

18466 --------

18467 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventElapsedTime`

18468 """

18469 cdef cyruntime.cudaEvent_t cyend

18470 if end is None:

18471 pend = 0

18472 elif isinstance(end, (cudaEvent_t,driver.CUevent)):

18473 pend = int(end)

18474 else:

18475 pend = int(cudaEvent_t(end))

18476 cyend = <cyruntime.cudaEvent_t><void_ptr>pend

18477 cdef cyruntime.cudaEvent_t cystart

18478 if start is None:

18479 pstart = 0

18480 elif isinstance(start, (cudaEvent_t,driver.CUevent)):

18481 pstart = int(start)

18482 else:

18483 pstart = int(cudaEvent_t(start))

18484 cystart = <cyruntime.cudaEvent_t><void_ptr>pstart

18485 cdef float ms = 0

18486 with nogil:

18487 err = cyruntime.cudaEventElapsedTime(&ms, cystart, cyend)

18488 if err != cyruntime.cudaSuccess:

18489 return (_dict_cudaError_t[err], None)

18490 return (_dict_cudaError_t[err], ms)

18491

18492@cython.embedsignature(True)

18493def cudaImportExternalMemory(memHandleDesc : Optional[cudaExternalMemoryHandleDesc]):

18494 """ Imports an external memory object.

18495

18496 Imports an externally allocated memory object and returns a handle to

18497 that in `extMem_out`.

18498

18499 The properties of the handle being imported must be described in

18500 `memHandleDesc`. The :py:obj:`~.cudaExternalMemoryHandleDesc` structure

18501 is defined as follows:

18502

18503 **View CUDA Toolkit Documentation for a C++ code example**

18504

18505 where :py:obj:`~.cudaExternalMemoryHandleDesc.type` specifies the type

18506 of handle being imported. :py:obj:`~.cudaExternalMemoryHandleType` is

18507 defined as:

18508

18509 **View CUDA Toolkit Documentation for a C++ code example**

18510

18511 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18512 :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueFd`, then

18513 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::fd must be a valid

18514 file descriptor referencing a memory object. Ownership of the file

18515 descriptor is transferred to the CUDA driver when the handle is

18516 imported successfully. Performing any operations on the file descriptor

18517 after it is imported results in undefined behavior.

18518

18519 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18520 :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueWin32`, then exactly one

18521 of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and

18522 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not

18523 be NULL. If

18524 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not

18525 NULL, then it must represent a valid shared NT handle that references a

18526 memory object. Ownership of this handle is not transferred to CUDA

18527 after the import operation, so the application must release the handle

18528 using the appropriate system call. If

18529 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not

18530 NULL, then it must point to a NULL-terminated array of UTF-16

18531 characters that refers to a memory object.

18532

18533 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18534 :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueWin32Kmt`, then

18535 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle must be

18536 non-NULL and

18537 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must be

18538 NULL. The handle specified must be a globally shared KMT handle. This

18539 handle does not hold a reference to the underlying object, and thus

18540 will be invalid when all references to the memory object are destroyed.

18541

18542 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18543 :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Heap`, then exactly one of

18544 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and

18545 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not

18546 be NULL. If

18547 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not

18548 NULL, then it must represent a valid shared NT handle that is returned

18549 by ID3D12Device::CreateSharedHandle when referring to a ID3D12Heap

18550 object. This handle holds a reference to the underlying object. If

18551 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not

18552 NULL, then it must point to a NULL-terminated array of UTF-16

18553 characters that refers to a ID3D12Heap object.

18554

18555 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18556 :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Resource`, then exactly one

18557 of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and

18558 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not

18559 be NULL. If

18560 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not

18561 NULL, then it must represent a valid shared NT handle that is returned

18562 by ID3D12Device::CreateSharedHandle when referring to a ID3D12Resource

18563 object. This handle holds a reference to the underlying object. If

18564 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not

18565 NULL, then it must point to a NULL-terminated array of UTF-16

18566 characters that refers to a ID3D12Resource object.

18567

18568 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18569 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11Resource`,then exactly one

18570 of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and

18571 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not

18572 be NULL. If

18573 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is

18574 not NULL, then it must represent a valid shared NT handle that is

18575 returned by IDXGIResource1::CreateSharedHandle when referring to a

18576 ID3D11Resource object. If

18577 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not

18578 NULL, then it must point to a NULL-terminated array of UTF-16

18579 characters that refers to a ID3D11Resource object.

18580

18581 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18582 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11ResourceKmt`, then

18583 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle must be

18584 non-NULL and

18585 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must be

18586 NULL. The handle specified must be a valid shared KMT handle that is

18587 returned by IDXGIResource::GetSharedHandle when referring to a

18588 ID3D11Resource object.

18589

18590 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is

18591 :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then

18592 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::nvSciBufObject must

18593 be NON-NULL and reference a valid NvSciBuf object. If the NvSciBuf

18594 object imported into CUDA is also mapped by other drivers, then the

18595 application must use :py:obj:`~.cudaWaitExternalSemaphoresAsync` or

18596 :py:obj:`~.cudaSignalExternalSemaphoresAsync` as approprriate barriers

18597 to maintain coherence between CUDA and the other drivers. See

18598 :py:obj:`~.cudaExternalSemaphoreWaitSkipNvSciBufMemSync` and

18599 :py:obj:`~.cudaExternalSemaphoreSignalSkipNvSciBufMemSync` for memory

18600 synchronization.

18601

18602 The size of the memory object must be specified in

18603 :py:obj:`~.cudaExternalMemoryHandleDesc.size`.

18604

18605 Specifying the flag :py:obj:`~.cudaExternalMemoryDedicated` in

18606 :py:obj:`~.cudaExternalMemoryHandleDesc.flags` indicates that the

18607 resource is a dedicated resource. The definition of what a dedicated

18608 resource is outside the scope of this extension. This flag must be set

18609 if :py:obj:`~.cudaExternalMemoryHandleDesc.type` is one of the

18610 following: :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Resource`

18611 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11Resource`

18612 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11ResourceKmt`

18613

18614 Parameters

18615 ----------

18616 memHandleDesc : :py:obj:`~.cudaExternalMemoryHandleDesc`

18617 Memory import handle descriptor

18618

18619 Returns

18620 -------

18621 cudaError_t

18622 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`

18623 extMem_out : :py:obj:`~.cudaExternalMemory_t`

18624 Returned handle to an external memory object

18625

18626 See Also

18627 --------

18628 :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`

18629

18630 Notes

18631 -----

18632 If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges as well as appropriate Vulkan pipeline barriers to maintain coherence between CPU and GPU. For more information on these APIs, please refer to "Synchronization

18633 and Cache Control" chapter from Vulkan specification.

18634 """

18635 cdef cudaExternalMemory_t extMem_out = cudaExternalMemory_t()

18636 cdef cyruntime.cudaExternalMemoryHandleDesc* cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc is not None else NULL

18637 with nogil:

18638 err = cyruntime.cudaImportExternalMemory(<cyruntime.cudaExternalMemory_t*>extMem_out._pvt_ptr, cymemHandleDesc_ptr)

18639 if err != cyruntime.cudaSuccess:

18640 return (_dict_cudaError_t[err], None)

18641 return (_dict_cudaError_t[err], extMem_out)

18642

18643@cython.embedsignature(True)

18644def cudaExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[cudaExternalMemoryBufferDesc]):

18645 """ Maps a buffer onto an imported memory object.

18646

18647 Maps a buffer onto an imported memory object and returns a device

18648 pointer in `devPtr`.

18649

18650 The properties of the buffer being mapped must be described in

18651 `bufferDesc`. The :py:obj:`~.cudaExternalMemoryBufferDesc` structure is

18652 defined as follows:

18653

18654 **View CUDA Toolkit Documentation for a C++ code example**

18655

18656 where :py:obj:`~.cudaExternalMemoryBufferDesc.offset` is the offset in

18657 the memory object where the buffer's base address is.

18658 :py:obj:`~.cudaExternalMemoryBufferDesc.size` is the size of the

18659 buffer. :py:obj:`~.cudaExternalMemoryBufferDesc.flags` must be zero.

18660

18661 The offset and size have to be suitably aligned to match the

18662 requirements of the external API. Mapping two buffers whose ranges

18663 overlap may or may not result in the same virtual address being

18664 returned for the overlapped portion. In such cases, the application

18665 must ensure that all accesses to that region from the GPU are volatile.

18666 Otherwise writes made via one address are not guaranteed to be visible

18667 via the other address, even if they're issued by the same thread. It is

18668 recommended that applications map the combined range instead of mapping

18669 separate buffers and then apply the appropriate offsets to the returned

18670 pointer to derive the individual buffers.

18671

18672 The returned pointer `devPtr` must be freed using :py:obj:`~.cudaFree`.

18673

18674 Parameters

18675 ----------

18676 extMem : :py:obj:`~.cudaExternalMemory_t`

18677 Handle to external memory object

18678 bufferDesc : :py:obj:`~.cudaExternalMemoryBufferDesc`

18679 Buffer descriptor

18680

18681 Returns

18682 -------

18683 cudaError_t

18684 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

18685 devPtr : Any

18686 Returned device pointer to buffer

18687

18688 See Also

18689 --------

18690 :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`

18691 """

18692 cdef cyruntime.cudaExternalMemory_t cyextMem

18693 if extMem is None:

18694 pextMem = 0

18695 elif isinstance(extMem, (cudaExternalMemory_t,)):

18696 pextMem = int(extMem)

18697 else:

18698 pextMem = int(cudaExternalMemory_t(extMem))

18699 cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem

18700 cdef void_ptr devPtr = 0

18701 cdef cyruntime.cudaExternalMemoryBufferDesc* cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc is not None else NULL

18702 with nogil:

18703 err = cyruntime.cudaExternalMemoryGetMappedBuffer(<void**>&devPtr, cyextMem, cybufferDesc_ptr)

18704 if err != cyruntime.cudaSuccess:

18705 return (_dict_cudaError_t[err], None)

18706 return (_dict_cudaError_t[err], devPtr)

18707

18708@cython.embedsignature(True)

18709def cudaExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[cudaExternalMemoryMipmappedArrayDesc]):

18710 """ Maps a CUDA mipmapped array onto an external memory object.

18711

18712 Maps a CUDA mipmapped array onto an external object and returns a

18713 handle to it in `mipmap`.

18714

18715 The properties of the CUDA mipmapped array being mapped must be

18716 described in `mipmapDesc`. The structure

18717 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc` is defined as follows:

18718

18719 **View CUDA Toolkit Documentation for a C++ code example**

18720

18721 where :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.offset` is the

18722 offset in the memory object where the base level of the mipmap chain

18723 is. :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.formatDesc`

18724 describes the format of the data.

18725 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.extent` specifies the

18726 dimensions of the base level of the mipmap chain.

18727 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.flags` are flags

18728 associated with CUDA mipmapped arrays. For further details, please

18729 refer to the documentation for :py:obj:`~.cudaMalloc3DArray`. Note that

18730 if the mipmapped array is bound as a color target in the graphics API,

18731 then the flag :py:obj:`~.cudaArrayColorAttachment` must be specified in

18732 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.flags`.

18733 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` specifies

18734 the total number of levels in the mipmap chain.

18735

18736 The returned CUDA mipmapped array must be freed using

18737 :py:obj:`~.cudaFreeMipmappedArray`.

18738

18739 Parameters

18740 ----------

18741 extMem : :py:obj:`~.cudaExternalMemory_t`

18742 Handle to external memory object

18743 mipmapDesc : :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc`

18744 CUDA array descriptor

18745

18746 Returns

18747 -------

18748 cudaError_t

18749 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

18750 mipmap : :py:obj:`~.cudaMipmappedArray_t`

18751 Returned CUDA mipmapped array

18752

18753 See Also

18754 --------

18755 :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`

18756

18757 Notes

18758 -----

18759 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` must not be greater than 1.

18760 """

18761 cdef cyruntime.cudaExternalMemory_t cyextMem

18762 if extMem is None:

18763 pextMem = 0

18764 elif isinstance(extMem, (cudaExternalMemory_t,)):

18765 pextMem = int(extMem)

18766 else:

18767 pextMem = int(cudaExternalMemory_t(extMem))

18768 cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem

18769 cdef cudaMipmappedArray_t mipmap = cudaMipmappedArray_t()

18770 cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc is not None else NULL

18771 with nogil:

18772 err = cyruntime.cudaExternalMemoryGetMappedMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr)

18773 if err != cyruntime.cudaSuccess:

18774 return (_dict_cudaError_t[err], None)

18775 return (_dict_cudaError_t[err], mipmap)

18776

18777@cython.embedsignature(True)

18778def cudaDestroyExternalMemory(extMem):

18779 """ Destroys an external memory object.

18780

18781 Destroys the specified external memory object. Any existing buffers and

18782 CUDA mipmapped arrays mapped onto this object must no longer be used

18783 and must be explicitly freed using :py:obj:`~.cudaFree` and

18784 :py:obj:`~.cudaFreeMipmappedArray` respectively.

18785

18786 Parameters

18787 ----------

18788 extMem : :py:obj:`~.cudaExternalMemory_t`

18789 External memory object to be destroyed

18790

18791 Returns

18792 -------

18793 cudaError_t

18794 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`

18795

18796 See Also

18797 --------

18798 :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`

18799 """

18800 cdef cyruntime.cudaExternalMemory_t cyextMem

18801 if extMem is None:

18802 pextMem = 0

18803 elif isinstance(extMem, (cudaExternalMemory_t,)):

18804 pextMem = int(extMem)

18805 else:

18806 pextMem = int(cudaExternalMemory_t(extMem))

18807 cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem

18808 with nogil:

18809 err = cyruntime.cudaDestroyExternalMemory(cyextMem)

18810 return (_dict_cudaError_t[err],)

18811

18812@cython.embedsignature(True)

18813def cudaImportExternalSemaphore(semHandleDesc : Optional[cudaExternalSemaphoreHandleDesc]):

18814 """ Imports an external semaphore.

18815

18816 Imports an externally allocated synchronization object and returns a

18817 handle to that in `extSem_out`.

18818

18819 The properties of the handle being imported must be described in

18820 `semHandleDesc`. The :py:obj:`~.cudaExternalSemaphoreHandleDesc` is

18821 defined as follows:

18822

18823 **View CUDA Toolkit Documentation for a C++ code example**

18824

18825 where :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` specifies the

18826 type of handle being imported.

18827 :py:obj:`~.cudaExternalSemaphoreHandleType` is defined as:

18828

18829 **View CUDA Toolkit Documentation for a C++ code example**

18830

18831 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18832 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`, then

18833 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::fd must be a valid

18834 file descriptor referencing a synchronization object. Ownership of the

18835 file descriptor is transferred to the CUDA driver when the handle is

18836 imported successfully. Performing any operations on the file descriptor

18837 after it is imported results in undefined behavior.

18838

18839 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18840 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`, then exactly

18841 one of

18842 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle and

18843 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must

18844 not be NULL. If

18845 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is

18846 not NULL, then it must represent a valid shared NT handle that

18847 references a synchronization object. Ownership of this handle is not

18848 transferred to CUDA after the import operation, so the application must

18849 release the handle using the appropriate system call. If

18850 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not

18851 NULL, then it must name a valid synchronization object.

18852

18853 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18854 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt`, then

18855 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle must

18856 be non-NULL and

18857 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must

18858 be NULL. The handle specified must be a globally shared KMT handle.

18859 This handle does not hold a reference to the underlying object, and

18860 thus will be invalid when all references to the synchronization object

18861 are destroyed.

18862

18863 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18864 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`, then exactly one

18865 of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle

18866 and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name

18867 must not be NULL. If

18868 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is

18869 not NULL, then it must represent a valid shared NT handle that is

18870 returned by ID3D12Device::CreateSharedHandle when referring to a

18871 ID3D12Fence object. This handle holds a reference to the underlying

18872 object. If

18873 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not

18874 NULL, then it must name a valid synchronization object that refers to a

18875 valid ID3D12Fence object.

18876

18877 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18878 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`, then exactly one

18879 of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle

18880 and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name

18881 must not be NULL. If

18882 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is

18883 not NULL, then it must represent a valid shared NT handle that is

18884 returned by ID3D11Fence::CreateSharedHandle. If

18885 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not

18886 NULL, then it must name a valid synchronization object that refers to a

18887 valid ID3D11Fence object.

18888

18889 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18890 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, then

18891 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::nvSciSyncObj

18892 represents a valid NvSciSyncObj.

18893

18894 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`, then exactly one

18895 of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle

18896 and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name

18897 must not be NULL. If

18898 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is

18899 not NULL, then it represent a valid shared NT handle that is returned

18900 by IDXGIResource1::CreateSharedHandle when referring to a

18901 IDXGIKeyedMutex object.

18902

18903 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18904 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then

18905 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle must

18906 be non-NULL and

18907 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must

18908 be NULL. The handle specified must represent a valid KMT handle that is

18909 returned by IDXGIResource::GetSharedHandle when referring to a

18910 IDXGIKeyedMutex object.

18911

18912 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18913 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`, then

18914 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::fd must be a valid

18915 file descriptor referencing a synchronization object. Ownership of the

18916 file descriptor is transferred to the CUDA driver when the handle is

18917 imported successfully. Performing any operations on the file descriptor

18918 after it is imported results in undefined behavior.

18919

18920 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is

18921 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32`, then

18922 exactly one of

18923 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle and

18924 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must

18925 not be NULL. If

18926 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is

18927 not NULL, then it must represent a valid shared NT handle that

18928 references a synchronization object. Ownership of this handle is not

18929 transferred to CUDA after the import operation, so the application must

18930 release the handle using the appropriate system call. If

18931 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not

18932 NULL, then it must name a valid synchronization object.

18933

18934 Parameters

18935 ----------

18936 semHandleDesc : :py:obj:`~.cudaExternalSemaphoreHandleDesc`

18937 Semaphore import handle descriptor

18938

18939 Returns

18940 -------

18941 cudaError_t

18942 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`

18943 extSem_out : :py:obj:`~.cudaExternalSemaphore_t`

18944 Returned handle to an external semaphore

18945

18946 See Also

18947 --------

18948 :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

18949 """

18950 cdef cudaExternalSemaphore_t extSem_out = cudaExternalSemaphore_t()

18951 cdef cyruntime.cudaExternalSemaphoreHandleDesc* cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc is not None else NULL

18952 with nogil:

18953 err = cyruntime.cudaImportExternalSemaphore(<cyruntime.cudaExternalSemaphore_t*>extSem_out._pvt_ptr, cysemHandleDesc_ptr)

18954 if err != cyruntime.cudaSuccess:

18955 return (_dict_cudaError_t[err], None)

18956 return (_dict_cudaError_t[err], extSem_out)

18957

18958@cython.embedsignature(True)

18959def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSemaphore_t] | list[cudaExternalSemaphore_t]], paramsArray : Optional[tuple[cudaExternalSemaphoreSignalParams] | list[cudaExternalSemaphoreSignalParams]], unsigned int numExtSems, stream):

18960 """ Signals a set of external semaphore objects.

18961

18962 Enqueues a signal operation on a set of externally allocated semaphore

18963 object in the specified stream. The operations will be executed when

18964 all prior operations in the stream complete.

18965

18966 The exact semantics of signaling a semaphore depends on the type of the

18967 object.

18968

18969 If the semaphore object is any one of the following types:

18970 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`,

18971 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`,

18972 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt` then

18973 signaling the semaphore will set it to the signaled state.

18974

18975 If the semaphore object is any one of the following types:

18976 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`,

18977 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`,

18978 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`,

18979 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32` then

18980 the semaphore will be set to the value specified in

18981 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::fence::value.

18982

18983 If the semaphore object is of the type

18984 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` this API sets

18985 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence

18986 to a value that can be used by subsequent waiters of the same NvSciSync

18987 object to order operations with those currently submitted in `stream`.

18988 Such an update will overwrite previous contents of

18989 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence.

18990 By default, signaling such an external semaphore object causes

18991 appropriate memory synchronization operations to be performed over all

18992 the external memory objects that are imported as

18993 :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`. This ensures that any

18994 subsequent accesses made by other importers of the same set of NvSciBuf

18995 memory object(s) are coherent. These operations can be skipped by

18996 specifying the flag

18997 :py:obj:`~.cudaExternalSemaphoreSignalSkipNvSciBufMemSync`, which can

18998 be used as a performance optimization when data coherency is not

18999 required. But specifying this flag in scenarios where data coherency is

19000 required results in undefined behavior. Also, for semaphore object of

19001 the type :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, if the

19002 NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags

19003 in :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` to

19004 cudaNvSciSyncAttrSignal, this API will return cudaErrorNotSupported.

19005

19006 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence

19007 associated with semaphore object of the type

19008 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` can be

19009 deterministic. For this the NvSciSyncAttrList used to create the

19010 semaphore object must have value of

19011 NvSciSyncAttrKey_RequireDeterministicFences key set to true.

19012 Deterministic fences allow users to enqueue a wait over the semaphore

19013 object even before corresponding signal is enqueued. For such a

19014 semaphore object, CUDA guarantees that each signal operation will

19015 increment the fence value by '1'. Users are expected to track count of

19016 signals enqueued on the semaphore object and insert waits accordingly.

19017 When such a semaphore object is signaled from multiple streams, due to

19018 concurrent stream execution, it is possible that the order in which the

19019 semaphore gets signaled is indeterministic. This could lead to waiters

19020 of the semaphore getting unblocked incorrectly. Users are expected to

19021 handle such situations, either by not using the same semaphore object

19022 with deterministic fence support enabled in different streams or by

19023 adding explicit dependency amongst such streams so that the semaphore

19024 is signaled in order.

19025 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence

19026 associated with semaphore object of the type

19027 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` can be timestamp

19028 enabled. For this the NvSciSyncAttrList used to create the object must

19029 have the value of NvSciSyncAttrKey_WaiterRequireTimestamps key set to

19030 true. Timestamps are emitted asynchronously by the GPU and CUDA saves

19031 the GPU timestamp in the corresponding NvSciSyncFence at the time of

19032 signal on GPU. Users are expected to convert GPU clocks to CPU clocks

19033 using appropriate scaling functions. Users are expected to wait for the

19034 completion of the fence before extracting timestamp using appropriate

19035 NvSciSync APIs. Users are expected to ensure that there is only one

19036 outstanding timestamp enabled fence per Cuda-NvSciSync object at any

19037 point of time, failing which leads to undefined behavior. Extracting

19038 the timestamp before the corresponding fence is signalled could lead to

19039 undefined behaviour. Timestamp extracted via appropriate NvSciSync API

19040 would be in microseconds.

19041

19042 If the semaphore object is any one of the following types:

19043 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`,

19044 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then the

19045 keyed mutex will be released with the key specified in

19046 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::key.

19047

19048 Parameters

19049 ----------

19050 extSemArray : list[:py:obj:`~.cudaExternalSemaphore_t`]

19051 Set of external semaphores to be signaled

19052 paramsArray : list[:py:obj:`~.cudaExternalSemaphoreSignalParams`]

19053 Array of semaphore parameters

19054 numExtSems : unsigned int

19055 Number of semaphores to signal

19056 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

19057 Stream to enqueue the signal operations in

19058

19059 Returns

19060 -------

19061 cudaError_t

19062 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`

19063

19064 See Also

19065 --------

19066 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

19067 """

19068 cdef cyruntime.cudaStream_t cystream

19069 if stream is None:

19070 pstream = 0

19071 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

19072 pstream = int(stream)

19073 else:

19074 pstream = int(cudaStream_t(stream))

19075 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

19076 paramsArray = [] if paramsArray is None else paramsArray

19077 if not all(isinstance(_x, (cudaExternalSemaphoreSignalParams,)) for _x in paramsArray):

19078 raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreSignalParams,] or list[cyruntime.cudaExternalSemaphoreSignalParams,]")

19079 extSemArray = [] if extSemArray is None else extSemArray

19080 if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray):

19081 raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]")

19082 cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL

19083 if len(extSemArray) > 1:

19084 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t))

19085 if cyextSemArray is NULL:

19086 raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))

19087 else:

19088 for idx in range(len(extSemArray)):

19089 cyextSemArray[idx] = <cyruntime.cudaExternalSemaphore_t>(<cudaExternalSemaphore_t>extSemArray[idx])._pvt_ptr[0]

19090 elif len(extSemArray) == 1:

19091 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*>(<cudaExternalSemaphore_t>extSemArray[0])._pvt_ptr

19092 cdef cyruntime.cudaExternalSemaphoreSignalParams* cyparamsArray = NULL

19093 if len(paramsArray) > 1:

19094 cyparamsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))

19095 if cyparamsArray is NULL:

19096 raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))

19097 for idx in range(len(paramsArray)):

19098 string.memcpy(&cyparamsArray[idx], (<cudaExternalSemaphoreSignalParams>paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))

19099 elif len(paramsArray) == 1:

19100 cyparamsArray = (<cudaExternalSemaphoreSignalParams>paramsArray[0])._pvt_ptr

19101 if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))

19102 if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))

19103 with nogil:

19104 err = cyruntime.cudaSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream)

19105 if len(extSemArray) > 1 and cyextSemArray is not NULL:

19106 free(cyextSemArray)

19107 if len(paramsArray) > 1 and cyparamsArray is not NULL:

19108 free(cyparamsArray)

19109 return (_dict_cudaError_t[err],)

19110

19111@cython.embedsignature(True)

19112def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSemaphore_t] | list[cudaExternalSemaphore_t]], paramsArray : Optional[tuple[cudaExternalSemaphoreWaitParams] | list[cudaExternalSemaphoreWaitParams]], unsigned int numExtSems, stream):

19113 """ Waits on a set of external semaphore objects.

19114

19115 Enqueues a wait operation on a set of externally allocated semaphore

19116 object in the specified stream. The operations will be executed when

19117 all prior operations in the stream complete.

19118

19119 The exact semantics of waiting on a semaphore depends on the type of

19120 the object.

19121

19122 If the semaphore object is any one of the following types:

19123 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`,

19124 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`,

19125 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt` then waiting

19126 on the semaphore will wait until the semaphore reaches the signaled

19127 state. The semaphore will then be reset to the unsignaled state.

19128 Therefore for every signal operation, there can only be one wait

19129 operation.

19130

19131 If the semaphore object is any one of the following types:

19132 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`,

19133 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`,

19134 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`,

19135 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32` then

19136 waiting on the semaphore will wait until the value of the semaphore is

19137 greater than or equal to

19138 :py:obj:`~.cudaExternalSemaphoreWaitParams`::params::fence::value.

19139

19140 If the semaphore object is of the type

19141 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` then, waiting on

19142 the semaphore will wait until the

19143 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence

19144 is signaled by the signaler of the NvSciSyncObj that was associated

19145 with this semaphore object. By default, waiting on such an external

19146 semaphore object causes appropriate memory synchronization operations

19147 to be performed over all external memory objects that are imported as

19148 :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`. This ensures that any

19149 subsequent accesses made by other importers of the same set of NvSciBuf

19150 memory object(s) are coherent. These operations can be skipped by

19151 specifying the flag

19152 :py:obj:`~.cudaExternalSemaphoreWaitSkipNvSciBufMemSync`, which can be

19153 used as a performance optimization when data coherency is not required.

19154 But specifying this flag in scenarios where data coherency is required

19155 results in undefined behavior. Also, for semaphore object of the type

19156 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, if the

19157 NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags

19158 in :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` to

19159 cudaNvSciSyncAttrWait, this API will return cudaErrorNotSupported.

19160

19161 If the semaphore object is any one of the following types:

19162 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`,

19163 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then the

19164 keyed mutex will be acquired when it is released with the key specified

19165 in

19166 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::key

19167 or until the timeout specified by

19168 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::timeoutMs

19169 has lapsed. The timeout interval can either be a finite value specified

19170 in milliseconds or an infinite value. In case an infinite value is

19171 specified the timeout never elapses. The windows INFINITE macro must be

19172 used to specify infinite timeout

19173

19174 Parameters

19175 ----------

19176 extSemArray : list[:py:obj:`~.cudaExternalSemaphore_t`]

19177 External semaphores to be waited on

19178 paramsArray : list[:py:obj:`~.cudaExternalSemaphoreWaitParams`]

19179 Array of semaphore parameters

19180 numExtSems : unsigned int

19181 Number of semaphores to wait on

19182 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

19183 Stream to enqueue the wait operations in

19184

19185 Returns

19186 -------

19187 cudaError_t

19188 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle` :py:obj:`~.cudaErrorTimeout`

19189

19190 See Also

19191 --------

19192 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`

19193 """

19194 cdef cyruntime.cudaStream_t cystream

19195 if stream is None:

19196 pstream = 0

19197 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

19198 pstream = int(stream)

19199 else:

19200 pstream = int(cudaStream_t(stream))

19201 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

19202 paramsArray = [] if paramsArray is None else paramsArray

19203 if not all(isinstance(_x, (cudaExternalSemaphoreWaitParams,)) for _x in paramsArray):

19204 raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreWaitParams,] or list[cyruntime.cudaExternalSemaphoreWaitParams,]")

19205 extSemArray = [] if extSemArray is None else extSemArray

19206 if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray):

19207 raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]")

19208 cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL

19209 if len(extSemArray) > 1:

19210 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t))

19211 if cyextSemArray is NULL:

19212 raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))

19213 else:

19214 for idx in range(len(extSemArray)):

19215 cyextSemArray[idx] = <cyruntime.cudaExternalSemaphore_t>(<cudaExternalSemaphore_t>extSemArray[idx])._pvt_ptr[0]

19216 elif len(extSemArray) == 1:

19217 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*>(<cudaExternalSemaphore_t>extSemArray[0])._pvt_ptr

19218 cdef cyruntime.cudaExternalSemaphoreWaitParams* cyparamsArray = NULL

19219 if len(paramsArray) > 1:

19220 cyparamsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))

19221 if cyparamsArray is NULL:

19222 raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))

19223 for idx in range(len(paramsArray)):

19224 string.memcpy(&cyparamsArray[idx], (<cudaExternalSemaphoreWaitParams>paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))

19225 elif len(paramsArray) == 1:

19226 cyparamsArray = (<cudaExternalSemaphoreWaitParams>paramsArray[0])._pvt_ptr

19227 if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))

19228 if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))

19229 with nogil:

19230 err = cyruntime.cudaWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream)

19231 if len(extSemArray) > 1 and cyextSemArray is not NULL:

19232 free(cyextSemArray)

19233 if len(paramsArray) > 1 and cyparamsArray is not NULL:

19234 free(cyparamsArray)

19235 return (_dict_cudaError_t[err],)

19236

19237@cython.embedsignature(True)

19238def cudaDestroyExternalSemaphore(extSem):

19239 """ Destroys an external semaphore.

19240

19241 Destroys an external semaphore object and releases any references to

19242 the underlying resource. Any outstanding signals or waits must have

19243 completed before the semaphore is destroyed.

19244

19245 Parameters

19246 ----------

19247 extSem : :py:obj:`~.cudaExternalSemaphore_t`

19248 External semaphore to be destroyed

19249

19250 Returns

19251 -------

19252 cudaError_t

19253 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`

19254

19255 See Also

19256 --------

19257 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

19258 """

19259 cdef cyruntime.cudaExternalSemaphore_t cyextSem

19260 if extSem is None:

19261 pextSem = 0

19262 elif isinstance(extSem, (cudaExternalSemaphore_t,)):

19263 pextSem = int(extSem)

19264 else:

19265 pextSem = int(cudaExternalSemaphore_t(extSem))

19266 cyextSem = <cyruntime.cudaExternalSemaphore_t><void_ptr>pextSem

19267 with nogil:

19268 err = cyruntime.cudaDestroyExternalSemaphore(cyextSem)

19269 return (_dict_cudaError_t[err],)

19270

19271@cython.embedsignature(True)

19272def cudaFuncSetCacheConfig(func, cacheConfig not None : cudaFuncCache):

19273 """ Sets the preferred cache configuration for a device function.

19274

19275 On devices where the L1 cache and shared memory use the same hardware

19276 resources, this sets through `cacheConfig` the preferred cache

19277 configuration for the function specified via `func`. This is only a

19278 preference. The runtime will use the requested configuration if

19279 possible, but it is free to choose a different configuration if

19280 required to execute `func`.

19281

19282 `func` is a device function symbol and must be declared as a `None`

19283 function. If the specified function does not exist, then

19284 :py:obj:`~.cudaErrorInvalidDeviceFunction` is returned. For templated

19285 functions, pass the function symbol as follows:

19286 func_name<template_arg_0,...,template_arg_N>

19287

19288 This setting does nothing on devices where the size of the L1 cache and

19289 shared memory are fixed.

19290

19291 Launching a kernel with a different preference than the most recent

19292 preference setting may insert a device-side synchronization point.

19293

19294 The supported cache configurations are:

19295

19296 - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory

19297 or L1 (default)

19298

19299 - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory

19300 and smaller L1 cache

19301

19302 - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller

19303 shared memory

19304

19305 - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and

19306 shared memory

19307

19308 Parameters

19309 ----------

19310 func : Any

19311 Device function symbol

19312 cacheConfig : :py:obj:`~.cudaFuncCache`

19313 Requested cache configuration

19314

19315 Returns

19316 -------

19317 cudaError_t

19318 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`2

19319

19320 See Also

19321 --------

19322 cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cudaFuncGetAttributes (C API)`, :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncSetCacheConfig`

19323

19324 Notes

19325 -----

19326 This API does not accept a :py:obj:`~.cudaKernel_t` casted as void*. If cache config modification is required for a :py:obj:`~.cudaKernel_t` (or a global function), it can be replaced with a call to :py:obj:`~.cudaFuncSetAttributes` with the attribute :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` to specify a more granular L1 cache and shared memory split configuration.

19327 """

19328 cyfunc = _HelperInputVoidPtr(func)

19329 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19330 cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value

19331 with nogil:

19332 err = cyruntime.cudaFuncSetCacheConfig(cyfunc_ptr, cycacheConfig)

19333 return (_dict_cudaError_t[err],)

19334

19335@cython.embedsignature(True)

19336def cudaFuncGetAttributes(func):

19337 """ Find out attributes for a given function.

19338

19339 This function obtains the attributes of a function specified via

19340 `func`. `func` is a device function symbol and must be declared as a

19341 `None` function. The fetched attributes are placed in `attr`. If the

19342 specified function does not exist, then it is assumed to be a

19343 :py:obj:`~.cudaKernel_t` and used as is. For templated functions, pass

19344 the function symbol as follows:

19345 func_name<template_arg_0,...,template_arg_N>

19346

19347 Note that some function attributes such as

19348 :py:obj:`~.maxThreadsPerBlock` may vary based on the device that is

19349 currently being used.

19350

19351 Parameters

19352 ----------

19353 func : Any

19354 Device function symbol

19355

19356 Returns

19357 -------

19358 cudaError_t

19359 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`2

19360 attr : :py:obj:`~.cudaFuncAttributes`

19361 Return pointer to function's attributes

19362

19363 See Also

19364 --------

19365 :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncGetAttributes (C++ API), :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncGetAttribute`

19366 """

19367 cdef cudaFuncAttributes attr = cudaFuncAttributes()

19368 cyfunc = _HelperInputVoidPtr(func)

19369 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19370 with nogil:

19371 err = cyruntime.cudaFuncGetAttributes(<cyruntime.cudaFuncAttributes*>attr._pvt_ptr, cyfunc_ptr)

19372 if err != cyruntime.cudaSuccess:

19373 return (_dict_cudaError_t[err], None)

19374 return (_dict_cudaError_t[err], attr)

19375

19376@cython.embedsignature(True)

19377def cudaFuncSetAttribute(func, attr not None : cudaFuncAttribute, int value):

19378 """ Set attributes for a given function.

19379

19380 This function sets the attributes of a function specified via `func`.

19381 The parameter `func` must be a pointer to a function that executes on

19382 the device. The parameter specified by `func` must be declared as a

19383 `None` function. The enumeration defined by `attr` is set to the value

19384 defined by `value`. If the specified function does not exist, then it

19385 is assumed to be a :py:obj:`~.cudaKernel_t` and used as is. If the

19386 specified attribute cannot be written, or if the value is incorrect,

19387 then :py:obj:`~.cudaErrorInvalidValue` is returned.

19388

19389 Valid values for `attr` are:

19390

19391 - :py:obj:`~.cudaFuncAttributeMaxDynamicSharedMemorySize` - The

19392 requested maximum size in bytes of dynamically-allocated shared

19393 memory. The sum of this value and the function attribute

19394 :py:obj:`~.sharedSizeBytes` cannot exceed the device attribute

19395 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlockOptin`. The maximal size

19396 of requestable dynamic shared memory may differ by GPU architecture.

19397

19398 - :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` - On

19399 devices where the L1 cache and shared memory use the same hardware

19400 resources, this sets the shared memory carveout preference, in

19401 percent of the total shared memory. See

19402 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerMultiprocessor`. This is only

19403 a hint, and the driver can choose a different ratio if required to

19404 execute the function.

19405

19406 - :py:obj:`~.cudaFuncAttributeRequiredClusterWidth`: The required

19407 cluster width in blocks. The width, height, and depth values must

19408 either all be 0 or all be positive. The validity of the cluster

19409 dimensions is checked at launch time. If the value is set during

19410 compile time, it cannot be set at runtime. Setting it at runtime will

19411 return cudaErrorNotPermitted.

19412

19413 - :py:obj:`~.cudaFuncAttributeRequiredClusterHeight`: The required

19414 cluster height in blocks. The width, height, and depth values must

19415 either all be 0 or all be positive. The validity of the cluster

19416 dimensions is checked at launch time. If the value is set during

19417 compile time, it cannot be set at runtime. Setting it at runtime will

19418 return cudaErrorNotPermitted.

19419

19420 - :py:obj:`~.cudaFuncAttributeRequiredClusterDepth`: The required

19421 cluster depth in blocks. The width, height, and depth values must

19422 either all be 0 or all be positive. The validity of the cluster

19423 dimensions is checked at launch time. If the value is set during

19424 compile time, it cannot be set at runtime. Setting it at runtime will

19425 return cudaErrorNotPermitted.

19426

19427 - :py:obj:`~.cudaFuncAttributeNonPortableClusterSizeAllowed`: Indicates

19428 whether the function can be launched with non-portable cluster size.

19429 1 is allowed, 0 is disallowed.

19430

19431 - :py:obj:`~.cudaFuncAttributeClusterSchedulingPolicyPreference`: The

19432 block scheduling policy of a function. The value type is

19433 cudaClusterSchedulingPolicy.

19434

19435 cudaLaunchKernel (C++ API), cudaFuncSetCacheConfig (C++ API),

19436 :py:obj:`~.cudaFuncGetAttributes (C API)`,

19437

19438 Parameters

19439 ----------

19440 func : Any

19441 Function to get attributes of

19442 attr : :py:obj:`~.cudaFuncAttribute`

19443 Attribute to set

19444 value : int

19445 Value to set

19446

19447 Returns

19448 -------

19449 cudaError_t

19450 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`

19451 """

19452 cyfunc = _HelperInputVoidPtr(func)

19453 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19454 cdef cyruntime.cudaFuncAttribute cyattr = attr.value

19455 with nogil:

19456 err = cyruntime.cudaFuncSetAttribute(cyfunc_ptr, cyattr, value)

19457 return (_dict_cudaError_t[err],)

19458

19459ctypedef struct cudaStreamHostCallbackData_st:

19460 cyruntime.cudaHostFn_t callback

19461 void *userData

19462

19463ctypedef cudaStreamHostCallbackData_st cudaStreamHostCallbackData

19464

19465@cython.show_performance_hints(False)

19466cdef void cudaStreamRtHostCallbackWrapper(void *data) nogil:

19467 cdef cudaStreamHostCallbackData *cbData = <cudaStreamHostCallbackData *>data

19468 with gil:

19469 cbData.callback(cbData.userData)

19470 free(cbData)

19471

19472@cython.embedsignature(True)

19473def cudaLaunchHostFunc(stream, fn, userData):

19474 """ Enqueues a host function call in a stream.

19475

19476 Enqueues a host function to run in a stream. The function will be

19477 called after currently enqueued work and will block work added after

19478 it.

19479

19480 The host function must not make any CUDA API calls. Attempting to use a

19481 CUDA API may result in :py:obj:`~.cudaErrorNotPermitted`, but this is

19482 not required. The host function must not perform any synchronization

19483 that may depend on outstanding CUDA work not mandated to run earlier.

19484 Host functions without a mandated order (such as in independent

19485 streams) execute in undefined order and may be serialized.

19486

19487 For the purposes of Unified Memory, execution makes a number of

19488 guarantees:

19489

19490 - The stream is considered idle for the duration of the function's

19491 execution. Thus, for example, the function may always use memory

19492 attached to the stream it was enqueued in.

19493

19494 - The start of execution of the function has the same effect as

19495 synchronizing an event recorded in the same stream immediately prior

19496 to the function. It thus synchronizes streams which have been

19497 "joined" prior to the function.

19498

19499 - Adding device work to any stream does not have the effect of making

19500 the stream active until all preceding host functions and stream

19501 callbacks have executed. Thus, for example, a function might use

19502 global attached memory even if work has been added to another stream,

19503 if the work has been ordered behind the function call with an event.

19504

19505 - Completion of the function does not cause a stream to become active

19506 except as described above. The stream will remain idle if no device

19507 work follows the function, and will remain idle across consecutive

19508 host functions or stream callbacks without device work in between.

19509 Thus, for example, stream synchronization can be done by signaling

19510 from a host function at the end of the stream.

19511

19512 Note that, in constrast to :py:obj:`~.cuStreamAddCallback`, the

19513 function will not be called in the event of an error in the CUDA

19514 context.

19515

19516 Parameters

19517 ----------

19518 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

19519 Stream to enqueue function call in

19520 fn : :py:obj:`~.cudaHostFn_t`

19521 The function to call once preceding stream operations are complete

19522 userData : Any

19523 User-specified data to be passed to the function

19524

19525 Returns

19526 -------

19527 cudaError_t

19528 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`

19529

19530 See Also

19531 --------

19532 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuLaunchHostFunc`

19533 """

19534 cdef cyruntime.cudaHostFn_t cyfn

19535 if fn is None:

19536 pfn = 0

19537 elif isinstance(fn, (cudaHostFn_t,)):

19538 pfn = int(fn)

19539 else:

19540 pfn = int(cudaHostFn_t(fn))

19541 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn

19542 cdef cyruntime.cudaStream_t cystream

19543 if stream is None:

19544 pstream = 0

19545 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

19546 pstream = int(stream)

19547 else:

19548 pstream = int(cudaStream_t(stream))

19549 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

19550 cyuserData = _HelperInputVoidPtr(userData)

19551 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr

19552

19553 cdef cudaStreamHostCallbackData *cbData = NULL

19554 cbData = <cudaStreamHostCallbackData *>malloc(sizeof(cbData[0]))

19555 if cbData == NULL:

19556 return (cudaError_t.cudaErrorMemoryAllocation,)

19557 cbData.callback = cyfn

19558 cbData.userData = cyuserData_ptr

19559

19560 with nogil:

19561 err = cyruntime.cudaLaunchHostFunc(cystream, <cyruntime.cudaHostFn_t>cudaStreamRtHostCallbackWrapper, <void *>cbData)

19562 if err != cyruntime.cudaSuccess:

19563 free(cbData)

19564 return (_dict_cudaError_t[err],)

19565

19566@cython.embedsignature(True)

19567def cudaFuncSetSharedMemConfig(func, config not None : cudaSharedMemConfig):

19568 """ Sets the shared memory configuration for a device function.

19569

19570 [Deprecated]

19571

19572 On devices with configurable shared memory banks, this function will

19573 force all subsequent launches of the specified device function to have

19574 the given shared memory bank size configuration. On any given launch of

19575 the function, the shared memory configuration of the device will be

19576 temporarily changed if needed to suit the function's preferred

19577 configuration. Changes in shared memory configuration between

19578 subsequent launches of functions, may introduce a device side

19579 synchronization point.

19580

19581 Any per-function setting of shared memory bank size set via

19582 :py:obj:`~.cudaFuncSetSharedMemConfig` will override the device wide

19583 setting set by :py:obj:`~.cudaDeviceSetSharedMemConfig`.

19584

19585 Changing the shared memory bank size will not increase shared memory

19586 usage or affect occupancy of kernels, but may have major effects on

19587 performance. Larger bank sizes will allow for greater potential

19588 bandwidth to shared memory, but will change what kinds of accesses to

19589 shared memory will result in bank conflicts.

19590

19591 This function will do nothing on devices with fixed shared memory bank

19592 size.

19593

19594 For templated functions, pass the function symbol as follows:

19595 func_name<template_arg_0,...,template_arg_N>

19596

19597 The supported bank configurations are:

19598

19599 - :py:obj:`~.cudaSharedMemBankSizeDefault`: use the device's shared

19600 memory configuration when launching this function.

19601

19602 - :py:obj:`~.cudaSharedMemBankSizeFourByte`: set shared memory bank

19603 width to be four bytes natively when launching this function.

19604

19605 - :py:obj:`~.cudaSharedMemBankSizeEightByte`: set shared memory bank

19606 width to be eight bytes natively when launching this function.

19607

19608 Parameters

19609 ----------

19610 func : Any

19611 Device function symbol

19612 config : :py:obj:`~.cudaSharedMemConfig`

19613 Requested shared memory configuration

19614

19615 Returns

19616 -------

19617 cudaError_t

19618 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`,2

19619

19620 See Also

19621 --------

19622 :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuFuncSetSharedMemConfig`

19623 """

19624 cyfunc = _HelperInputVoidPtr(func)

19625 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19626 cdef cyruntime.cudaSharedMemConfig cyconfig = config.value

19627 with nogil:

19628 err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc_ptr, cyconfig)

19629 return (_dict_cudaError_t[err],)

19630

19631@cython.embedsignature(True)

19632def cudaOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dynamicSMemSize):

19633 """ Returns occupancy for a device function.

19634

19635 Returns in `*numBlocks` the maximum number of active blocks per

19636 streaming multiprocessor for the device function.

19637

19638 Parameters

19639 ----------

19640 func : Any

19641 Kernel function for which occupancy is calculated

19642 blockSize : int

19643 Block size the kernel is intended to be launched with

19644 dynamicSMemSize : size_t

19645 Per-block dynamic shared memory usage intended, in bytes

19646

19647 Returns

19648 -------

19649 cudaError_t

19650 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,

19651 numBlocks : int

19652 Returned occupancy

19653

19654 See Also

19655 --------

19656 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor`

19657 """

19658 cdef int numBlocks = 0

19659 cyfunc = _HelperInputVoidPtr(func)

19660 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19661 with nogil:

19662 err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize)

19663 if err != cyruntime.cudaSuccess:

19664 return (_dict_cudaError_t[err], None)

19665 return (_dict_cudaError_t[err], numBlocks)

19666

19667@cython.embedsignature(True)

19668def cudaOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize):

19669 """ Returns dynamic shared memory available per block when launching `numBlocks` blocks on SM.

19670

19671 Returns in `*dynamicSmemSize` the maximum size of dynamic shared memory

19672 to allow `numBlocks` blocks per SM.

19673

19674 Parameters

19675 ----------

19676 func : Any

19677 Kernel function for which occupancy is calculated

19678 numBlocks : int

19679 Number of blocks to fit on SM

19680 blockSize : int

19681 Size of the block

19682

19683 Returns

19684 -------

19685 cudaError_t

19686 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,

19687 dynamicSmemSize : int

19688 Returned maximum dynamic shared memory

19689

19690 See Also

19691 --------

19692 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), :py:obj:`~.cudaOccupancyAvailableDynamicSMemPerBlock`

19693 """

19694 cdef size_t dynamicSmemSize = 0

19695 cyfunc = _HelperInputVoidPtr(func)

19696 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19697 with nogil:

19698 err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc_ptr, numBlocks, blockSize)

19699 if err != cyruntime.cudaSuccess:

19700 return (_dict_cudaError_t[err], None)

19701 return (_dict_cudaError_t[err], dynamicSmemSize)

19702

19703@cython.embedsignature(True)

19704def cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, size_t dynamicSMemSize, unsigned int flags):

19705 """ Returns occupancy for a device function with the specified flags.

19706

19707 Returns in `*numBlocks` the maximum number of active blocks per

19708 streaming multiprocessor for the device function.

19709

19710 The `flags` parameter controls how special cases are handled. Valid

19711 flags include:

19712

19713 - :py:obj:`~.cudaOccupancyDefault`: keeps the default behavior as

19714 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`

19715

19716 - :py:obj:`~.cudaOccupancyDisableCachingOverride`: This flag suppresses

19717 the default behavior on platform where global caching affects

19718 occupancy. On such platforms, if caching is enabled, but per-block SM

19719 resource usage would result in zero occupancy, the occupancy

19720 calculator will calculate the occupancy as if caching is disabled.

19721 Setting this flag makes the occupancy calculator to return 0 in such

19722 cases. More information can be found about this feature in the

19723 "Unified L1/Texture Cache" section of the Maxwell tuning guide.

19724

19725 Parameters

19726 ----------

19727 func : Any

19728 Kernel function for which occupancy is calculated

19729 blockSize : int

19730 Block size the kernel is intended to be launched with

19731 dynamicSMemSize : size_t

19732 Per-block dynamic shared memory usage intended, in bytes

19733 flags : unsigned int

19734 Requested behavior for the occupancy calculator

19735

19736 Returns

19737 -------

19738 cudaError_t

19739 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,

19740 numBlocks : int

19741 Returned occupancy

19742

19743 See Also

19744 --------

19745 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`

19746 """

19747 cdef int numBlocks = 0

19748 cyfunc = _HelperInputVoidPtr(func)

19749 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr

19750 with nogil:

19751 err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize, flags)

19752 if err != cyruntime.cudaSuccess:

19753 return (_dict_cudaError_t[err], None)

19754 return (_dict_cudaError_t[err], numBlocks)

19755

19756@cython.embedsignature(True)

19757def cudaMallocManaged(size_t size, unsigned int flags):

19758 """ Allocates memory that will be automatically managed by the Unified Memory system.

19759

19760 Allocates `size` bytes of managed memory on the device and returns in

19761 `*devPtr` a pointer to the allocated memory. If the device doesn't

19762 support allocating managed memory, :py:obj:`~.cudaErrorNotSupported` is

19763 returned. Support for managed memory can be queried using the device

19764 attribute :py:obj:`~.cudaDevAttrManagedMemory`. The allocated memory is

19765 suitably aligned for any kind of variable. The memory is not cleared.

19766 If `size` is 0, :py:obj:`~.cudaMallocManaged` returns

19767 :py:obj:`~.cudaErrorInvalidValue`. The pointer is valid on the CPU and

19768 on all GPUs in the system that support managed memory. All accesses to

19769 this pointer must obey the Unified Memory programming model.

19770

19771 `flags` specifies the default stream association for this allocation.

19772 `flags` must be one of :py:obj:`~.cudaMemAttachGlobal` or

19773 :py:obj:`~.cudaMemAttachHost`. The default value for `flags` is

19774 :py:obj:`~.cudaMemAttachGlobal`. If :py:obj:`~.cudaMemAttachGlobal` is

19775 specified, then this memory is accessible from any stream on any

19776 device. If :py:obj:`~.cudaMemAttachHost` is specified, then the

19777 allocation should not be accessed from devices that have a zero value

19778 for the device attribute

19779 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`; an explicit call to

19780 :py:obj:`~.cudaStreamAttachMemAsync` will be required to enable access

19781 on such devices.

19782

19783 If the association is later changed via

19784 :py:obj:`~.cudaStreamAttachMemAsync` to a single stream, the default

19785 association, as specifed during :py:obj:`~.cudaMallocManaged`, is

19786 restored when that stream is destroyed. For managed variables, the

19787 default association is always :py:obj:`~.cudaMemAttachGlobal`. Note

19788 that destroying a stream is an asynchronous operation, and as a result,

19789 the change to default association won't happen until all work in the

19790 stream has completed.

19791

19792 Memory allocated with :py:obj:`~.cudaMallocManaged` should be released

19793 with :py:obj:`~.cudaFree`.

19794

19795 Device memory oversubscription is possible for GPUs that have a non-

19796 zero value for the device attribute

19797 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Managed memory on such

19798 GPUs may be evicted from device memory to host memory at any time by

19799 the Unified Memory driver in order to make room for other allocations.

19800

19801 In a system where all GPUs have a non-zero value for the device

19802 attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`, managed

19803 memory may not be populated when this API returns and instead may be

19804 populated on access. In such systems, managed memory can migrate to any

19805 processor's memory at any time. The Unified Memory driver will employ

19806 heuristics to maintain data locality and prevent excessive page faults

19807 to the extent possible. The application can also guide the driver about

19808 memory usage patterns via :py:obj:`~.cudaMemAdvise`. The application

19809 can also explicitly migrate memory to a desired processor's memory via

19810 :py:obj:`~.cudaMemPrefetchAsync`.

19811

19812 In a multi-GPU system where all of the GPUs have a zero value for the

19813 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess` and all

19814 the GPUs have peer-to-peer support with each other, the physical

19815 storage for managed memory is created on the GPU which is active at the

19816 time :py:obj:`~.cudaMallocManaged` is called. All other GPUs will

19817 reference the data at reduced bandwidth via peer mappings over the PCIe

19818 bus. The Unified Memory driver does not migrate memory among such GPUs.

19819

19820 In a multi-GPU system where not all GPUs have peer-to-peer support with

19821 each other and where the value of the device attribute

19822 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is zero for at least one

19823 of those GPUs, the location chosen for physical storage of managed

19824 memory is system-dependent.

19825

19826 - On Linux, the location chosen will be device memory as long as the

19827 current set of active contexts are on devices that either have peer-

19828 to-peer support with each other or have a non-zero value for the

19829 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. If

19830 there is an active context on a GPU that does not have a non-zero

19831 value for that device attribute and it does not have peer-to-peer

19832 support with the other devices that have active contexts on them,

19833 then the location for physical storage will be 'zero-copy' or host

19834 memory. Note that this means that managed memory that is located in

19835 device memory is migrated to host memory if a new context is created

19836 on a GPU that doesn't have a non-zero value for the device attribute

19837 and does not support peer-to-peer with at least one of the other

19838 devices that has an active context. This in turn implies that context

19839 creation may fail if there is insufficient host memory to migrate all

19840 managed allocations.

19841

19842 - On Windows, the physical storage is always created in 'zero-copy' or

19843 host memory. All GPUs will reference the data at reduced bandwidth

19844 over the PCIe bus. In these circumstances, use of the environment

19845 variable CUDA_VISIBLE_DEVICES is recommended to restrict CUDA to only

19846 use those GPUs that have peer-to-peer support. Alternatively, users

19847 can also set CUDA_MANAGED_FORCE_DEVICE_ALLOC to a non-zero value to

19848 force the driver to always use device memory for physical storage.

19849 When this environment variable is set to a non-zero value, all

19850 devices used in that process that support managed memory have to be

19851 peer-to-peer compatible with each other. The error

19852 :py:obj:`~.cudaErrorInvalidDevice` will be returned if a device that

19853 supports managed memory is used and it is not peer-to-peer compatible

19854 with any of the other managed memory supporting devices that were

19855 previously used in that process, even if :py:obj:`~.cudaDeviceReset`

19856 has been called on those devices. These environment variables are

19857 described in the CUDA programming guide under the "CUDA environment

19858 variables" section.

19859

19860 Parameters

19861 ----------

19862 size : size_t

19863 Requested allocation size in bytes

19864 flags : unsigned int

19865 Must be either :py:obj:`~.cudaMemAttachGlobal` or

19866 :py:obj:`~.cudaMemAttachHost` (defaults to

19867 :py:obj:`~.cudaMemAttachGlobal`)

19868

19869 Returns

19870 -------

19871 cudaError_t

19872 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`

19873 devPtr : Any

19874 Pointer to allocated device memory

19875

19876 See Also

19877 --------

19878 :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cuMemAllocManaged`

19879 """

19880 cdef void_ptr devPtr = 0

19881 with nogil:

19882 err = cyruntime.cudaMallocManaged(<void**>&devPtr, size, flags)

19883 if err != cyruntime.cudaSuccess:

19884 return (_dict_cudaError_t[err], None)

19885 return (_dict_cudaError_t[err], devPtr)

19886

19887@cython.embedsignature(True)

19888def cudaMalloc(size_t size):

19889 """ Allocate memory on the device.

19890

19891 Allocates `size` bytes of linear memory on the device and returns in

19892 `*devPtr` a pointer to the allocated memory. The allocated memory is

19893 suitably aligned for any kind of variable. The memory is not cleared.

19894 :py:obj:`~.cudaMalloc()` returns :py:obj:`~.cudaErrorMemoryAllocation`

19895 in case of failure.

19896

19897 The device version of :py:obj:`~.cudaFree` cannot be used with a

19898 `*devPtr` allocated using the host API, and vice versa.

19899

19900 Parameters

19901 ----------

19902 size : size_t

19903 Requested allocation size in bytes

19904

19905 Returns

19906 -------

19907 cudaError_t

19908 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

19909 devPtr : Any

19910 Pointer to allocated device memory

19911

19912 See Also

19913 --------

19914 :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAlloc`

19915 """

19916 cdef void_ptr devPtr = 0

19917 with nogil:

19918 err = cyruntime.cudaMalloc(<void**>&devPtr, size)

19919 if err != cyruntime.cudaSuccess:

19920 return (_dict_cudaError_t[err], None)

19921 return (_dict_cudaError_t[err], devPtr)

19922

19923@cython.embedsignature(True)

19924def cudaMallocHost(size_t size):

19925 """ Allocates page-locked memory on the host.

19926

19927 Allocates `size` bytes of host memory that is page-locked and

19928 accessible to the device. The driver tracks the virtual memory ranges

19929 allocated with this function and automatically accelerates calls to

19930 functions such as :py:obj:`~.cudaMemcpy`*(). Since the memory can be

19931 accessed directly by the device, it can be read or written with much

19932 higher bandwidth than pageable memory obtained with functions such as

19933 :py:obj:`~.malloc()`.

19934

19935 On systems where :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is

19936 true, :py:obj:`~.cudaMallocHost` may not page-lock the allocated

19937 memory.

19938

19939 Page-locking excessive amounts of memory with

19940 :py:obj:`~.cudaMallocHost()` may degrade system performance, since it

19941 reduces the amount of memory available to the system for paging. As a

19942 result, this function is best used sparingly to allocate staging areas

19943 for data exchange between host and device.

19944

19945 Parameters

19946 ----------

19947 size : size_t

19948 Requested allocation size in bytes

19949

19950 Returns

19951 -------

19952 cudaError_t

19953 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

19954 ptr : Any

19955 Pointer to allocated host memory

19956

19957 See Also

19958 --------

19959 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, cudaMallocHost (C++ API), :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocHost`

19960 """

19961 cdef void_ptr ptr = 0

19962 with nogil:

19963 err = cyruntime.cudaMallocHost(<void**>&ptr, size)

19964 if err != cyruntime.cudaSuccess:

19965 return (_dict_cudaError_t[err], None)

19966 return (_dict_cudaError_t[err], ptr)

19967

19968@cython.embedsignature(True)

19969def cudaMallocPitch(size_t width, size_t height):

19970 """ Allocates pitched memory on the device.

19971

19972 Allocates at least `width` (in bytes) * `height` bytes of linear memory

19973 on the device and returns in `*devPtr` a pointer to the allocated

19974 memory. The function may pad the allocation to ensure that

19975 corresponding pointers in any given row will continue to meet the

19976 alignment requirements for coalescing as the address is updated from

19977 row to row. The pitch returned in `*pitch` by

19978 :py:obj:`~.cudaMallocPitch()` is the width in bytes of the allocation.

19979 The intended usage of `pitch` is as a separate parameter of the

19980 allocation, used to compute addresses within the 2D array. Given the

19981 row and column of an array element of type `T`, the address is computed

19982 as:

19983

19984 **View CUDA Toolkit Documentation for a C++ code example**

19985

19986 For allocations of 2D arrays, it is recommended that programmers

19987 consider performing pitch allocations using

19988 :py:obj:`~.cudaMallocPitch()`. Due to pitch alignment restrictions in

19989 the hardware, this is especially true if the application will be

19990 performing 2D memory copies between different regions of device memory

19991 (whether linear memory or CUDA arrays).

19992

19993 Parameters

19994 ----------

19995 width : size_t

19996 Requested pitched allocation width (in bytes)

19997 height : size_t

19998 Requested pitched allocation height

19999

20000 Returns

20001 -------

20002 cudaError_t

20003 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20004 devPtr : Any

20005 Pointer to allocated pitched device memory

20006 pitch : int

20007 Pitch for allocation

20008

20009 See Also

20010 --------

20011 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocPitch`

20012 """

20013 cdef void_ptr devPtr = 0

20014 cdef size_t pitch = 0

20015 with nogil:

20016 err = cyruntime.cudaMallocPitch(<void**>&devPtr, &pitch, width, height)

20017 if err != cyruntime.cudaSuccess:

20018 return (_dict_cudaError_t[err], None, None)

20019 return (_dict_cudaError_t[err], devPtr, pitch)

20020

20021@cython.embedsignature(True)

20022def cudaMallocArray(desc : Optional[cudaChannelFormatDesc], size_t width, size_t height, unsigned int flags):

20023 """ Allocate an array on the device.

20024

20025 Allocates a CUDA array according to the

20026 :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle

20027 to the new CUDA array in `*array`.

20028

20029 The :py:obj:`~.cudaChannelFormatDesc` is defined as:

20030

20031 **View CUDA Toolkit Documentation for a C++ code example**

20032

20033 where :py:obj:`~.cudaChannelFormatKind` is one of

20034 :py:obj:`~.cudaChannelFormatKindSigned`,

20035 :py:obj:`~.cudaChannelFormatKindUnsigned`, or

20036 :py:obj:`~.cudaChannelFormatKindFloat`.

20037

20038 The `flags` parameter enables different options to be specified that

20039 affect the allocation, as follows.

20040

20041 - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0

20042 and provides default array allocation

20043

20044 - :py:obj:`~.cudaArraySurfaceLoadStore`: Allocates an array that can be

20045 read from or written to using a surface reference

20046

20047 - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture

20048 gather operations will be performed on the array.

20049

20050 - :py:obj:`~.cudaArraySparse`: Allocates a CUDA array without physical

20051 backing memory. The subregions within this sparse array can later be

20052 mapped onto a physical memory allocation by calling

20053 :py:obj:`~.cuMemMapArrayAsync`. The physical backing memory must be

20054 allocated via :py:obj:`~.cuMemCreate`.

20055

20056 - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA array without

20057 physical backing memory. The entire array can later be mapped onto a

20058 physical memory allocation by calling :py:obj:`~.cuMemMapArrayAsync`.

20059 The physical backing memory must be allocated via

20060 :py:obj:`~.cuMemCreate`.

20061

20062 `width` and `height` must meet certain size requirements. See

20063 :py:obj:`~.cudaMalloc3DArray()` for more details.

20064

20065 Parameters

20066 ----------

20067 desc : :py:obj:`~.cudaChannelFormatDesc`

20068 Requested channel format

20069 width : size_t

20070 Requested array allocation width

20071 height : size_t

20072 Requested array allocation height

20073 flags : unsigned int

20074 Requested properties of allocated array

20075

20076 Returns

20077 -------

20078 cudaError_t

20079 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20080 array : :py:obj:`~.cudaArray_t`

20081 Pointer to allocated array in device memory

20082

20083 See Also

20084 --------

20085 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayCreate`

20086 """

20087 cdef cudaArray_t array = cudaArray_t()

20088 cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL

20089 with nogil:

20090 err = cyruntime.cudaMallocArray(<cyruntime.cudaArray_t*>array._pvt_ptr, cydesc_ptr, width, height, flags)

20091 if err != cyruntime.cudaSuccess:

20092 return (_dict_cudaError_t[err], None)

20093 return (_dict_cudaError_t[err], array)

20094

20095@cython.embedsignature(True)

20096def cudaFree(devPtr):

20097 """ Frees memory on the device.

20098

20099 Frees the memory space pointed to by `devPtr`, which must have been

20100 returned by a previous call to one of the following memory allocation

20101 APIs - :py:obj:`~.cudaMalloc()`, :py:obj:`~.cudaMallocPitch()`,

20102 :py:obj:`~.cudaMallocManaged()`, :py:obj:`~.cudaMallocAsync()`,

20103 :py:obj:`~.cudaMallocFromPoolAsync()`.

20104

20105 Note - This API will not perform any implicit synchronization when the

20106 pointer was allocated with :py:obj:`~.cudaMallocAsync` or

20107 :py:obj:`~.cudaMallocFromPoolAsync`. Callers must ensure that all

20108 accesses to these pointer have completed before invoking

20109 :py:obj:`~.cudaFree`. For best performance and memory reuse, users

20110 should use :py:obj:`~.cudaFreeAsync` to free memory allocated via the

20111 stream ordered memory allocator. For all other pointers, this API may

20112 perform implicit synchronization.

20113

20114 If :py:obj:`~.cudaFree`(`devPtr`) has already been called before, an

20115 error is returned. If `devPtr` is 0, no operation is performed.

20116 :py:obj:`~.cudaFree()` returns :py:obj:`~.cudaErrorValue` in case of

20117 failure.

20118

20119 The device version of :py:obj:`~.cudaFree` cannot be used with a

20120 `*devPtr` allocated using the host API, and vice versa.

20121

20122 Parameters

20123 ----------

20124 devPtr : Any

20125 Device pointer to memory to free

20126

20127 Returns

20128 -------

20129 cudaError_t

20130 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

20131

20132 See Also

20133 --------

20134 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMallocFromPoolAsync` :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaFreeAsync` :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFree`

20135 """

20136 cydevPtr = _HelperInputVoidPtr(devPtr)

20137 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

20138 with nogil:

20139 err = cyruntime.cudaFree(cydevPtr_ptr)

20140 return (_dict_cudaError_t[err],)

20141

20142@cython.embedsignature(True)

20143def cudaFreeHost(ptr):

20144 """ Frees page-locked memory.

20145

20146 Frees the memory space pointed to by `hostPtr`, which must have been

20147 returned by a previous call to :py:obj:`~.cudaMallocHost()` or

20148 :py:obj:`~.cudaHostAlloc()`.

20149

20150 Parameters

20151 ----------

20152 ptr : Any

20153 Pointer to memory to free

20154

20155 Returns

20156 -------

20157 cudaError_t

20158 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

20159

20160 See Also

20161 --------

20162 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFreeHost`

20163 """

20164 cyptr = _HelperInputVoidPtr(ptr)

20165 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr

20166 with nogil:

20167 err = cyruntime.cudaFreeHost(cyptr_ptr)

20168 return (_dict_cudaError_t[err],)

20169

20170@cython.embedsignature(True)

20171def cudaFreeArray(array):

20172 """ Frees an array on the device.

20173

20174 Frees the CUDA array `array`, which must have been returned by a

20175 previous call to :py:obj:`~.cudaMallocArray()`. If `devPtr` is 0, no

20176 operation is performed.

20177

20178 Parameters

20179 ----------

20180 array : :py:obj:`~.cudaArray_t`

20181 Pointer to array to free

20182

20183 Returns

20184 -------

20185 cudaError_t

20186 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

20187

20188 See Also

20189 --------

20190 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayDestroy`

20191 """

20192 cdef cyruntime.cudaArray_t cyarray

20193 if array is None:

20194 parray = 0

20195 elif isinstance(array, (cudaArray_t,)):

20196 parray = int(array)

20197 else:

20198 parray = int(cudaArray_t(array))

20199 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

20200 with nogil:

20201 err = cyruntime.cudaFreeArray(cyarray)

20202 return (_dict_cudaError_t[err],)

20203

20204@cython.embedsignature(True)

20205def cudaFreeMipmappedArray(mipmappedArray):

20206 """ Frees a mipmapped array on the device.

20207

20208 Frees the CUDA mipmapped array `mipmappedArray`, which must have been

20209 returned by a previous call to :py:obj:`~.cudaMallocMipmappedArray()`.

20210 If `devPtr` is 0, no operation is performed.

20211

20212 Parameters

20213 ----------

20214 mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`

20215 Pointer to mipmapped array to free

20216

20217 Returns

20218 -------

20219 cudaError_t

20220 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

20221

20222 See Also

20223 --------

20224 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMipmappedArrayDestroy`

20225 """

20226 cdef cyruntime.cudaMipmappedArray_t cymipmappedArray

20227 if mipmappedArray is None:

20228 pmipmappedArray = 0

20229 elif isinstance(mipmappedArray, (cudaMipmappedArray_t,)):

20230 pmipmappedArray = int(mipmappedArray)

20231 else:

20232 pmipmappedArray = int(cudaMipmappedArray_t(mipmappedArray))

20233 cymipmappedArray = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmappedArray

20234 with nogil:

20235 err = cyruntime.cudaFreeMipmappedArray(cymipmappedArray)

20236 return (_dict_cudaError_t[err],)

20237

20238@cython.embedsignature(True)

20239def cudaHostAlloc(size_t size, unsigned int flags):

20240 """ Allocates page-locked memory on the host.

20241

20242 Allocates `size` bytes of host memory that is page-locked and

20243 accessible to the device. The driver tracks the virtual memory ranges

20244 allocated with this function and automatically accelerates calls to

20245 functions such as :py:obj:`~.cudaMemcpy()`. Since the memory can be

20246 accessed directly by the device, it can be read or written with much

20247 higher bandwidth than pageable memory obtained with functions such as

20248 :py:obj:`~.malloc()`. Allocating excessive amounts of pinned memory may

20249 degrade system performance, since it reduces the amount of memory

20250 available to the system for paging. As a result, this function is best

20251 used sparingly to allocate staging areas for data exchange between host

20252 and device.

20253

20254 The `flags` parameter enables different options to be specified that

20255 affect the allocation, as follows.

20256

20257 - :py:obj:`~.cudaHostAllocDefault`: This flag's value is defined to be

20258 0 and causes :py:obj:`~.cudaHostAlloc()` to emulate

20259 :py:obj:`~.cudaMallocHost()`.

20260

20261 - :py:obj:`~.cudaHostAllocPortable`: The memory returned by this call

20262 will be considered as pinned memory by all CUDA contexts, not just

20263 the one that performed the allocation.

20264

20265 - :py:obj:`~.cudaHostAllocMapped`: Maps the allocation into the CUDA

20266 address space. The device pointer to the memory may be obtained by

20267 calling :py:obj:`~.cudaHostGetDevicePointer()`.

20268

20269 - :py:obj:`~.cudaHostAllocWriteCombined`: Allocates the memory as

20270 write-combined (WC). WC memory can be transferred across the PCI

20271 Express bus more quickly on some system configurations, but cannot be

20272 read efficiently by most CPUs. WC memory is a good option for buffers

20273 that will be written by the CPU and read by the device via mapped

20274 pinned memory or host->device transfers.

20275

20276 All of these flags are orthogonal to one another: a developer may

20277 allocate memory that is portable, mapped and/or write-combined with no

20278 restrictions.

20279

20280 In order for the :py:obj:`~.cudaHostAllocMapped` flag to have any

20281 effect, the CUDA context must support the :py:obj:`~.cudaDeviceMapHost`

20282 flag, which can be checked via :py:obj:`~.cudaGetDeviceFlags()`. The

20283 :py:obj:`~.cudaDeviceMapHost` flag is implicitly set for contexts

20284 created via the runtime API.

20285

20286 The :py:obj:`~.cudaHostAllocMapped` flag may be specified on CUDA

20287 contexts for devices that do not support mapped pinned memory. The

20288 failure is deferred to :py:obj:`~.cudaHostGetDevicePointer()` because

20289 the memory may be mapped into other CUDA contexts via the

20290 :py:obj:`~.cudaHostAllocPortable` flag.

20291

20292 Memory allocated by this function must be freed with

20293 :py:obj:`~.cudaFreeHost()`.

20294

20295 Parameters

20296 ----------

20297 size : size_t

20298 Requested allocation size in bytes

20299 flags : unsigned int

20300 Requested properties of allocated memory

20301

20302 Returns

20303 -------

20304 cudaError_t

20305 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20306 pHost : Any

20307 Device pointer to allocated memory

20308

20309 See Also

20310 --------

20311 :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cuMemHostAlloc`

20312 """

20313 cdef void_ptr pHost = 0

20314 with nogil:

20315 err = cyruntime.cudaHostAlloc(<void**>&pHost, size, flags)

20316 if err != cyruntime.cudaSuccess:

20317 return (_dict_cudaError_t[err], None)

20318 return (_dict_cudaError_t[err], pHost)

20319

20320@cython.embedsignature(True)

20321def cudaHostRegister(ptr, size_t size, unsigned int flags):

20322 """ Registers an existing host memory range for use by CUDA.

20323

20324 Page-locks the memory range specified by `ptr` and `size` and maps it

20325 for the device(s) as specified by `flags`. This memory range also is

20326 added to the same tracking mechanism as :py:obj:`~.cudaHostAlloc()` to

20327 automatically accelerate calls to functions such as

20328 :py:obj:`~.cudaMemcpy()`. Since the memory can be accessed directly by

20329 the device, it can be read or written with much higher bandwidth than

20330 pageable memory that has not been registered. Page-locking excessive

20331 amounts of memory may degrade system performance, since it reduces the

20332 amount of memory available to the system for paging. As a result, this

20333 function is best used sparingly to register staging areas for data

20334 exchange between host and device.

20335

20336 On systems where :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is

20337 true, :py:obj:`~.cudaHostRegister` will not page-lock the memory range

20338 specified by `ptr` but only populate unpopulated pages.

20339

20340 :py:obj:`~.cudaHostRegister` is supported only on I/O coherent devices

20341 that have a non-zero value for the device attribute

20342 :py:obj:`~.cudaDevAttrHostRegisterSupported`.

20343

20344 The `flags` parameter enables different options to be specified that

20345 affect the allocation, as follows.

20346

20347 - :py:obj:`~.cudaHostRegisterDefault`: On a system with unified virtual

20348 addressing, the memory will be both mapped and portable. On a system

20349 with no unified virtual addressing, the memory will be neither mapped

20350 nor portable.

20351

20352 - :py:obj:`~.cudaHostRegisterPortable`: The memory returned by this

20353 call will be considered as pinned memory by all CUDA contexts, not

20354 just the one that performed the allocation.

20355

20356 - :py:obj:`~.cudaHostRegisterMapped`: Maps the allocation into the CUDA

20357 address space. The device pointer to the memory may be obtained by

20358 calling :py:obj:`~.cudaHostGetDevicePointer()`.

20359

20360 - :py:obj:`~.cudaHostRegisterIoMemory`: The passed memory pointer is

20361 treated as pointing to some memory-mapped I/O space, e.g. belonging

20362 to a third-party PCIe device, and it will marked as non cache-

20363 coherent and contiguous.

20364

20365 - :py:obj:`~.cudaHostRegisterReadOnly`: The passed memory pointer is

20366 treated as pointing to memory that is considered read-only by the

20367 device. On platforms without

20368 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, this

20369 flag is required in order to register memory mapped to the CPU as

20370 read-only. Support for the use of this flag can be queried from the

20371 device attribute

20372 :py:obj:`~.cudaDevAttrHostRegisterReadOnlySupported`. Using this flag

20373 with a current context associated with a device that does not have

20374 this attribute set will cause :py:obj:`~.cudaHostRegister` to error

20375 with cudaErrorNotSupported.

20376

20377 All of these flags are orthogonal to one another: a developer may page-

20378 lock memory that is portable or mapped with no restrictions.

20379

20380 The CUDA context must have been created with the

20381 :py:obj:`~.cudaMapHost` flag in order for the

20382 :py:obj:`~.cudaHostRegisterMapped` flag to have any effect.

20383

20384 The :py:obj:`~.cudaHostRegisterMapped` flag may be specified on CUDA

20385 contexts for devices that do not support mapped pinned memory. The

20386 failure is deferred to :py:obj:`~.cudaHostGetDevicePointer()` because

20387 the memory may be mapped into other CUDA contexts via the

20388 :py:obj:`~.cudaHostRegisterPortable` flag.

20389

20390 For devices that have a non-zero value for the device attribute

20391 :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`, the memory

20392 can also be accessed from the device using the host pointer `ptr`. The

20393 device pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` may

20394 or may not match the original host pointer `ptr` and depends on the

20395 devices visible to the application. If all devices visible to the

20396 application have a non-zero value for the device attribute, the device

20397 pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` will match

20398 the original pointer `ptr`. If any device visible to the application

20399 has a zero value for the device attribute, the device pointer returned

20400 by :py:obj:`~.cudaHostGetDevicePointer()` will not match the original

20401 host pointer `ptr`, but it will be suitable for use on all devices

20402 provided Unified Virtual Addressing is enabled. In such systems, it is

20403 valid to access the memory using either pointer on devices that have a

20404 non-zero value for the device attribute. Note however that such devices

20405 should access the memory using only of the two pointers and not both.

20406

20407 The memory page-locked by this function must be unregistered with

20408 :py:obj:`~.cudaHostUnregister()`.

20409

20410 Parameters

20411 ----------

20412 ptr : Any

20413 Host pointer to memory to page-lock

20414 size : size_t

20415 Size in bytes of the address range to page-lock in bytes

20416 flags : unsigned int

20417 Flags for allocation request

20418

20419 Returns

20420 -------

20421 cudaError_t

20422 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorHostMemoryAlreadyRegistered`, :py:obj:`~.cudaErrorNotSupported`

20423

20424 See Also

20425 --------

20426 :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cudaHostGetFlags`, :py:obj:`~.cudaHostGetDevicePointer`, :py:obj:`~.cuMemHostRegister`

20427 """

20428 cyptr = _HelperInputVoidPtr(ptr)

20429 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr

20430 with nogil:

20431 err = cyruntime.cudaHostRegister(cyptr_ptr, size, flags)

20432 return (_dict_cudaError_t[err],)

20433

20434@cython.embedsignature(True)

20435def cudaHostUnregister(ptr):

20436 """ Unregisters a memory range that was registered with cudaHostRegister.

20437

20438 Unmaps the memory range whose base address is specified by `ptr`, and

20439 makes it pageable again.

20440

20441 The base address must be the same one specified to

20442 :py:obj:`~.cudaHostRegister()`.

20443

20444 Parameters

20445 ----------

20446 ptr : Any

20447 Host pointer to memory to unregister

20448

20449 Returns

20450 -------

20451 cudaError_t

20452 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorHostMemoryNotRegistered`

20453

20454 See Also

20455 --------

20456 :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cuMemHostUnregister`

20457 """

20458 cyptr = _HelperInputVoidPtr(ptr)

20459 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr

20460 with nogil:

20461 err = cyruntime.cudaHostUnregister(cyptr_ptr)

20462 return (_dict_cudaError_t[err],)

20463

20464@cython.embedsignature(True)

20465def cudaHostGetDevicePointer(pHost, unsigned int flags):

20466 """ Passes back device pointer of mapped host memory allocated by cudaHostAlloc or registered by cudaHostRegister.

20467

20468 Passes back the device pointer corresponding to the mapped, pinned host

20469 buffer allocated by :py:obj:`~.cudaHostAlloc()` or registered by

20470 :py:obj:`~.cudaHostRegister()`.

20471

20472 :py:obj:`~.cudaHostGetDevicePointer()` will fail if the

20473 :py:obj:`~.cudaDeviceMapHost` flag was not specified before deferred

20474 context creation occurred, or if called on a device that does not

20475 support mapped, pinned memory.

20476

20477 For devices that have a non-zero value for the device attribute

20478 :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`, the memory

20479 can also be accessed from the device using the host pointer `pHost`.

20480 The device pointer returned by :py:obj:`~.cudaHostGetDevicePointer()`

20481 may or may not match the original host pointer `pHost` and depends on

20482 the devices visible to the application. If all devices visible to the

20483 application have a non-zero value for the device attribute, the device

20484 pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` will match

20485 the original pointer `pHost`. If any device visible to the application

20486 has a zero value for the device attribute, the device pointer returned

20487 by :py:obj:`~.cudaHostGetDevicePointer()` will not match the original

20488 host pointer `pHost`, but it will be suitable for use on all devices

20489 provided Unified Virtual Addressing is enabled. In such systems, it is

20490 valid to access the memory using either pointer on devices that have a

20491 non-zero value for the device attribute. Note however that such devices

20492 should access the memory using only of the two pointers and not both.

20493

20494 `flags` provides for future releases. For now, it must be set to 0.

20495

20496 Parameters

20497 ----------

20498 pHost : Any

20499 Requested host pointer mapping

20500 flags : unsigned int

20501 Flags for extensions (must be 0 for now)

20502

20503 Returns

20504 -------

20505 cudaError_t

20506 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20507 pDevice : Any

20508 Returned device pointer for mapped memory

20509

20510 See Also

20511 --------

20512 :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`

20513 """

20514 cdef void_ptr pDevice = 0

20515 cypHost = _HelperInputVoidPtr(pHost)

20516 cdef void* cypHost_ptr = <void*><void_ptr>cypHost.cptr

20517 with nogil:

20518 err = cyruntime.cudaHostGetDevicePointer(<void**>&pDevice, cypHost_ptr, flags)

20519 if err != cyruntime.cudaSuccess:

20520 return (_dict_cudaError_t[err], None)

20521 return (_dict_cudaError_t[err], pDevice)

20522

20523@cython.embedsignature(True)

20524def cudaHostGetFlags(pHost):

20525 """ Passes back flags used to allocate pinned host memory allocated by cudaHostAlloc.

20526

20527 :py:obj:`~.cudaHostGetFlags()` will fail if the input pointer does not

20528 reside in an address range allocated by :py:obj:`~.cudaHostAlloc()`.

20529

20530 Parameters

20531 ----------

20532 pHost : Any

20533 Host pointer

20534

20535 Returns

20536 -------

20537 cudaError_t

20538 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

20539 pFlags : unsigned int

20540 Returned flags word

20541

20542 See Also

20543 --------

20544 :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetFlags`

20545 """

20546 cdef unsigned int pFlags = 0

20547 cypHost = _HelperInputVoidPtr(pHost)

20548 cdef void* cypHost_ptr = <void*><void_ptr>cypHost.cptr

20549 with nogil:

20550 err = cyruntime.cudaHostGetFlags(&pFlags, cypHost_ptr)

20551 if err != cyruntime.cudaSuccess:

20552 return (_dict_cudaError_t[err], None)

20553 return (_dict_cudaError_t[err], pFlags)

20554

20555@cython.embedsignature(True)

20556def cudaMalloc3D(extent not None : cudaExtent):

20557 """ Allocates logical 1D, 2D, or 3D memory objects on the device.

20558

20559 Allocates at least `width` * `height` * `depth` bytes of linear memory

20560 on the device and returns a :py:obj:`~.cudaPitchedPtr` in which `ptr`

20561 is a pointer to the allocated memory. The function may pad the

20562 allocation to ensure hardware alignment requirements are met. The pitch

20563 returned in the `pitch` field of `pitchedDevPtr` is the width in bytes

20564 of the allocation.

20565

20566 The returned :py:obj:`~.cudaPitchedPtr` contains additional fields

20567 `xsize` and `ysize`, the logical width and height of the allocation,

20568 which are equivalent to the `width` and `height` `extent` parameters

20569 provided by the programmer during allocation.

20570

20571 For allocations of 2D and 3D objects, it is highly recommended that

20572 programmers perform allocations using :py:obj:`~.cudaMalloc3D()` or

20573 :py:obj:`~.cudaMallocPitch()`. Due to alignment restrictions in the

20574 hardware, this is especially true if the application will be performing

20575 memory copies involving 2D or 3D objects (whether linear memory or CUDA

20576 arrays).

20577

20578 Parameters

20579 ----------

20580 extent : :py:obj:`~.cudaExtent`

20581 Requested allocation size (`width` field in bytes)

20582

20583 Returns

20584 -------

20585 cudaError_t

20586 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20587 pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`

20588 Pointer to allocated pitched device memory

20589

20590 See Also

20591 --------

20592 :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMemAllocPitch`

20593 """

20594 cdef cudaPitchedPtr pitchedDevPtr = cudaPitchedPtr()

20595 with nogil:

20596 err = cyruntime.cudaMalloc3D(<cyruntime.cudaPitchedPtr*>pitchedDevPtr._pvt_ptr, extent._pvt_ptr[0])

20597 if err != cyruntime.cudaSuccess:

20598 return (_dict_cudaError_t[err], None)

20599 return (_dict_cudaError_t[err], pitchedDevPtr)

20600

20601@cython.embedsignature(True)

20602def cudaMalloc3DArray(desc : Optional[cudaChannelFormatDesc], extent not None : cudaExtent, unsigned int flags):

20603 """ Allocate an array on the device.

20604

20605 Allocates a CUDA array according to the

20606 :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle

20607 to the new CUDA array in `*array`.

20608

20609 The :py:obj:`~.cudaChannelFormatDesc` is defined as:

20610

20611 **View CUDA Toolkit Documentation for a C++ code example**

20612

20613 where :py:obj:`~.cudaChannelFormatKind` is one of

20614 :py:obj:`~.cudaChannelFormatKindSigned`,

20615 :py:obj:`~.cudaChannelFormatKindUnsigned`, or

20616 :py:obj:`~.cudaChannelFormatKindFloat`.

20617

20618 :py:obj:`~.cudaMalloc3DArray()` can allocate the following:

20619

20620 - A 1D array is allocated if the height and depth extents are both

20621 zero.

20622

20623 - A 2D array is allocated if only the depth extent is zero.

20624

20625 - A 3D array is allocated if all three extents are non-zero.

20626

20627 - A 1D layered CUDA array is allocated if only the height extent is

20628 zero and the cudaArrayLayered flag is set. Each layer is a 1D array.

20629 The number of layers is determined by the depth extent.

20630

20631 - A 2D layered CUDA array is allocated if all three extents are non-

20632 zero and the cudaArrayLayered flag is set. Each layer is a 2D array.

20633 The number of layers is determined by the depth extent.

20634

20635 - A cubemap CUDA array is allocated if all three extents are non-zero

20636 and the cudaArrayCubemap flag is set. Width must be equal to height,

20637 and depth must be six. A cubemap is a special type of 2D layered CUDA

20638 array, where the six layers represent the six faces of a cube. The

20639 order of the six layers in memory is the same as that listed in

20640 :py:obj:`~.cudaGraphicsCubeFace`.

20641

20642 - A cubemap layered CUDA array is allocated if all three extents are

20643 non-zero, and both, cudaArrayCubemap and cudaArrayLayered flags are

20644 set. Width must be equal to height, and depth must be a multiple of

20645 six. A cubemap layered CUDA array is a special type of 2D layered

20646 CUDA array that consists of a collection of cubemaps. The first six

20647 layers represent the first cubemap, the next six layers form the

20648 second cubemap, and so on.

20649

20650 The `flags` parameter enables different options to be specified that

20651 affect the allocation, as follows.

20652

20653 - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0

20654 and provides default array allocation

20655

20656 - :py:obj:`~.cudaArrayLayered`: Allocates a layered CUDA array, with

20657 the depth extent indicating the number of layers

20658

20659 - :py:obj:`~.cudaArrayCubemap`: Allocates a cubemap CUDA array. Width

20660 must be equal to height, and depth must be six. If the

20661 cudaArrayLayered flag is also set, depth must be a multiple of six.

20662

20663 - :py:obj:`~.cudaArraySurfaceLoadStore`: Allocates a CUDA array that

20664 could be read from or written to using a surface reference.

20665

20666 - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture

20667 gather operations will be performed on the CUDA array. Texture gather

20668 can only be performed on 2D CUDA arrays.

20669

20670 - :py:obj:`~.cudaArraySparse`: Allocates a CUDA array without physical

20671 backing memory. The subregions within this sparse array can later be

20672 mapped onto a physical memory allocation by calling

20673 :py:obj:`~.cuMemMapArrayAsync`. This flag can only be used for

20674 creating 2D, 3D or 2D layered sparse CUDA arrays. The physical

20675 backing memory must be allocated via :py:obj:`~.cuMemCreate`.

20676

20677 - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA array without

20678 physical backing memory. The entire array can later be mapped onto a

20679 physical memory allocation by calling :py:obj:`~.cuMemMapArrayAsync`.

20680 The physical backing memory must be allocated via

20681 :py:obj:`~.cuMemCreate`.

20682

20683 The width, height and depth extents must meet certain size requirements

20684 as listed in the following table. All values are specified in elements.

20685

20686 Note that 2D CUDA arrays have different size requirements if the

20687 :py:obj:`~.cudaArrayTextureGather` flag is set. In that case, the valid

20688 range for (width, height, depth) is ((1,maxTexture2DGather[0]),

20689 (1,maxTexture2DGather[1]), 0).

20690

20691 **View CUDA Toolkit Documentation for a table example**

20692

20693 Parameters

20694 ----------

20695 desc : :py:obj:`~.cudaChannelFormatDesc`

20696 Requested channel format

20697 extent : :py:obj:`~.cudaExtent`

20698 Requested allocation size (`width` field in elements)

20699 flags : unsigned int

20700 Flags for extensions

20701

20702 Returns

20703 -------

20704 cudaError_t

20705 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20706 array : :py:obj:`~.cudaArray_t`

20707 Pointer to allocated array in device memory

20708

20709 See Also

20710 --------

20711 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuArray3DCreate`

20712 """

20713 cdef cudaArray_t array = cudaArray_t()

20714 cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL

20715 with nogil:

20716 err = cyruntime.cudaMalloc3DArray(<cyruntime.cudaArray_t*>array._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], flags)

20717 if err != cyruntime.cudaSuccess:

20718 return (_dict_cudaError_t[err], None)

20719 return (_dict_cudaError_t[err], array)

20720

20721@cython.embedsignature(True)

20722def cudaMallocMipmappedArray(desc : Optional[cudaChannelFormatDesc], extent not None : cudaExtent, unsigned int numLevels, unsigned int flags):

20723 """ Allocate a mipmapped array on the device.

20724

20725 Allocates a CUDA mipmapped array according to the

20726 :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle

20727 to the new CUDA mipmapped array in `*mipmappedArray`. `numLevels`

20728 specifies the number of mipmap levels to be allocated. This value is

20729 clamped to the range [1, 1 + floor(log2(max(width, height, depth)))].

20730

20731 The :py:obj:`~.cudaChannelFormatDesc` is defined as:

20732

20733 **View CUDA Toolkit Documentation for a C++ code example**

20734

20735 where :py:obj:`~.cudaChannelFormatKind` is one of

20736 :py:obj:`~.cudaChannelFormatKindSigned`,

20737 :py:obj:`~.cudaChannelFormatKindUnsigned`, or

20738 :py:obj:`~.cudaChannelFormatKindFloat`.

20739

20740 :py:obj:`~.cudaMallocMipmappedArray()` can allocate the following:

20741

20742 - A 1D mipmapped array is allocated if the height and depth extents are

20743 both zero.

20744

20745 - A 2D mipmapped array is allocated if only the depth extent is zero.

20746

20747 - A 3D mipmapped array is allocated if all three extents are non-zero.

20748

20749 - A 1D layered CUDA mipmapped array is allocated if only the height

20750 extent is zero and the cudaArrayLayered flag is set. Each layer is a

20751 1D mipmapped array. The number of layers is determined by the depth

20752 extent.

20753

20754 - A 2D layered CUDA mipmapped array is allocated if all three extents

20755 are non-zero and the cudaArrayLayered flag is set. Each layer is a 2D

20756 mipmapped array. The number of layers is determined by the depth

20757 extent.

20758

20759 - A cubemap CUDA mipmapped array is allocated if all three extents are

20760 non-zero and the cudaArrayCubemap flag is set. Width must be equal to

20761 height, and depth must be six. The order of the six layers in memory

20762 is the same as that listed in :py:obj:`~.cudaGraphicsCubeFace`.

20763

20764 - A cubemap layered CUDA mipmapped array is allocated if all three

20765 extents are non-zero, and both, cudaArrayCubemap and cudaArrayLayered

20766 flags are set. Width must be equal to height, and depth must be a

20767 multiple of six. A cubemap layered CUDA mipmapped array is a special

20768 type of 2D layered CUDA mipmapped array that consists of a collection

20769 of cubemap mipmapped arrays. The first six layers represent the first

20770 cubemap mipmapped array, the next six layers form the second cubemap

20771 mipmapped array, and so on.

20772

20773 The `flags` parameter enables different options to be specified that

20774 affect the allocation, as follows.

20775

20776 - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0

20777 and provides default mipmapped array allocation

20778

20779 - :py:obj:`~.cudaArrayLayered`: Allocates a layered CUDA mipmapped

20780 array, with the depth extent indicating the number of layers

20781

20782 - :py:obj:`~.cudaArrayCubemap`: Allocates a cubemap CUDA mipmapped

20783 array. Width must be equal to height, and depth must be six. If the

20784 cudaArrayLayered flag is also set, depth must be a multiple of six.

20785

20786 - :py:obj:`~.cudaArraySurfaceLoadStore`: This flag indicates that

20787 individual mipmap levels of the CUDA mipmapped array will be read

20788 from or written to using a surface reference.

20789

20790 - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture

20791 gather operations will be performed on the CUDA array. Texture gather

20792 can only be performed on 2D CUDA mipmapped arrays, and the gather

20793 operations are performed only on the most detailed mipmap level.

20794

20795 - :py:obj:`~.cudaArraySparse`: Allocates a CUDA mipmapped array without

20796 physical backing memory. The subregions within this sparse array can

20797 later be mapped onto a physical memory allocation by calling

20798 :py:obj:`~.cuMemMapArrayAsync`. This flag can only be used for

20799 creating 2D, 3D or 2D layered sparse CUDA mipmapped arrays. The

20800 physical backing memory must be allocated via

20801 :py:obj:`~.cuMemCreate`.

20802

20803 - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA mipmapped

20804 array without physical backing memory. The entire array can later be

20805 mapped onto a physical memory allocation by calling

20806 :py:obj:`~.cuMemMapArrayAsync`. The physical backing memory must be

20807 allocated via :py:obj:`~.cuMemCreate`.

20808

20809 The width, height and depth extents must meet certain size requirements

20810 as listed in the following table. All values are specified in elements.

20811

20812 **View CUDA Toolkit Documentation for a table example**

20813

20814 Parameters

20815 ----------

20816 desc : :py:obj:`~.cudaChannelFormatDesc`

20817 Requested channel format

20818 extent : :py:obj:`~.cudaExtent`

20819 Requested allocation size (`width` field in elements)

20820 numLevels : unsigned int

20821 Number of mipmap levels to allocate

20822 flags : unsigned int

20823 Flags for extensions

20824

20825 Returns

20826 -------

20827 cudaError_t

20828 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

20829 mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`

20830 Pointer to allocated mipmapped array in device memory

20831

20832 See Also

20833 --------

20834 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayCreate`

20835 """

20836 cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t()

20837 cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL

20838 with nogil:

20839 err = cyruntime.cudaMallocMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmappedArray._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], numLevels, flags)

20840 if err != cyruntime.cudaSuccess:

20841 return (_dict_cudaError_t[err], None)

20842 return (_dict_cudaError_t[err], mipmappedArray)

20843

20844@cython.embedsignature(True)

20845def cudaGetMipmappedArrayLevel(mipmappedArray, unsigned int level):

20846 """ Gets a mipmap level of a CUDA mipmapped array.

20847

20848 Returns in `*levelArray` a CUDA array that represents a single mipmap

20849 level of the CUDA mipmapped array `mipmappedArray`.

20850

20851 If `level` is greater than the maximum number of levels in this

20852 mipmapped array, :py:obj:`~.cudaErrorInvalidValue` is returned.

20853

20854 If `mipmappedArray` is NULL, :py:obj:`~.cudaErrorInvalidResourceHandle`

20855 is returned.

20856

20857 Parameters

20858 ----------

20859 mipmappedArray : :py:obj:`~.cudaMipmappedArray_const_t`

20860 CUDA mipmapped array

20861 level : unsigned int

20862 Mipmap level

20863

20864 Returns

20865 -------

20866 cudaError_t

20867 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidResourceHandle`

20868 levelArray : :py:obj:`~.cudaArray_t`

20869 Returned mipmap level CUDA array

20870

20871 See Also

20872 --------

20873 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayGetLevel`

20874 """

20875 cdef cyruntime.cudaMipmappedArray_const_t cymipmappedArray

20876 if mipmappedArray is None:

20877 pmipmappedArray = 0

20878 elif isinstance(mipmappedArray, (cudaMipmappedArray_const_t,)):

20879 pmipmappedArray = int(mipmappedArray)

20880 else:

20881 pmipmappedArray = int(cudaMipmappedArray_const_t(mipmappedArray))

20882 cymipmappedArray = <cyruntime.cudaMipmappedArray_const_t><void_ptr>pmipmappedArray

20883 cdef cudaArray_t levelArray = cudaArray_t()

20884 with nogil:

20885 err = cyruntime.cudaGetMipmappedArrayLevel(<cyruntime.cudaArray_t*>levelArray._pvt_ptr, cymipmappedArray, level)

20886 if err != cyruntime.cudaSuccess:

20887 return (_dict_cudaError_t[err], None)

20888 return (_dict_cudaError_t[err], levelArray)

20889

20890@cython.embedsignature(True)

20891def cudaMemcpy3D(p : Optional[cudaMemcpy3DParms]):

20892 """ Copies data between 3D objects.

20893

20894 **View CUDA Toolkit Documentation for a C++ code example**

20895

20896 :py:obj:`~.cudaMemcpy3D()` copies data betwen two 3D objects. The

20897 source and destination objects may be in either host memory, device

20898 memory, or a CUDA array. The source, destination, extent, and kind of

20899 copy performed is specified by the :py:obj:`~.cudaMemcpy3DParms` struct

20900 which should be initialized to zero before use:

20901

20902 **View CUDA Toolkit Documentation for a C++ code example**

20903

20904 The struct passed to :py:obj:`~.cudaMemcpy3D()` must specify one of

20905 `srcArray` or `srcPtr` and one of `dstArray` or `dstPtr`. Passing more

20906 than one non-zero source or destination will cause

20907 :py:obj:`~.cudaMemcpy3D()` to return an error.

20908

20909 The `srcPos` and `dstPos` fields are optional offsets into the source

20910 and destination objects and are defined in units of each object's

20911 elements. The element for a host or device pointer is assumed to be

20912 unsigned char.

20913

20914 The `extent` field defines the dimensions of the transferred area in

20915 elements. If a CUDA array is participating in the copy, the extent is

20916 defined in terms of that array's elements. If no CUDA array is

20917 participating in the copy then the extents are defined in elements of

20918 unsigned char.

20919

20920 The `kind` field defines the direction of the copy. It must be one of

20921 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

20922 :py:obj:`~.cudaMemcpyDeviceToHost`,

20923 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

20924 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

20925 type of transfer is inferred from the pointer values. However,

20926 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

20927 unified virtual addressing. For :py:obj:`~.cudaMemcpyHostToHost` or

20928 :py:obj:`~.cudaMemcpyHostToDevice` or

20929 :py:obj:`~.cudaMemcpyDeviceToHost` passed as kind and cudaArray type

20930 passed as source or destination, if the kind implies cudaArray type to

20931 be present on the host, :py:obj:`~.cudaMemcpy3D()` will disregard that

20932 implication and silently correct the kind based on the fact that

20933 cudaArray type can only be present on the device.

20934

20935 If the source and destination are both arrays,

20936 :py:obj:`~.cudaMemcpy3D()` will return an error if they do not have the

20937 same element size.

20938

20939 The source and destination object may not overlap. If overlapping

20940 source and destination objects are specified, undefined behavior will

20941 result.

20942

20943 The source object must entirely contain the region defined by `srcPos`

20944 and `extent`. The destination object must entirely contain the region

20945 defined by `dstPos` and `extent`.

20946

20947 :py:obj:`~.cudaMemcpy3D()` returns an error if the pitch of `srcPtr` or

20948 `dstPtr` exceeds the maximum allowed. The pitch of a

20949 :py:obj:`~.cudaPitchedPtr` allocated with :py:obj:`~.cudaMalloc3D()`

20950 will always be valid.

20951

20952 Parameters

20953 ----------

20954 p : :py:obj:`~.cudaMemcpy3DParms`

20955 3D memory copy parameters

20956

20957 Returns

20958 -------

20959 cudaError_t

20960 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

20961

20962 See Also

20963 --------

20964 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3DAsync`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3D`

20965 """

20966 cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p is not None else NULL

20967 with nogil:

20968 err = cyruntime.cudaMemcpy3D(cyp_ptr)

20969 return (_dict_cudaError_t[err],)

20970

20971@cython.embedsignature(True)

20972def cudaMemcpy3DPeer(p : Optional[cudaMemcpy3DPeerParms]):

20973 """ Copies memory between devices.

20974

20975 Perform a 3D memory copy according to the parameters specified in `p`.

20976 See the definition of the :py:obj:`~.cudaMemcpy3DPeerParms` structure

20977 for documentation of its parameters.

20978

20979 Note that this function is synchronous with respect to the host only if

20980 the source or destination of the transfer is host memory. Note also

20981 that this copy is serialized with respect to all pending and future

20982 asynchronous work in to the current device, the copy's source device,

20983 and the copy's destination device (use

20984 :py:obj:`~.cudaMemcpy3DPeerAsync` to avoid this synchronization).

20985

20986 Parameters

20987 ----------

20988 p : :py:obj:`~.cudaMemcpy3DPeerParms`

20989 Parameters for the memory copy

20990

20991 Returns

20992 -------

20993 cudaError_t

20994 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidPitchValue`

20995

20996 See Also

20997 --------

20998 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeer`

20999 """

21000 cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p is not None else NULL

21001 with nogil:

21002 err = cyruntime.cudaMemcpy3DPeer(cyp_ptr)

21003 return (_dict_cudaError_t[err],)

21004

21005@cython.embedsignature(True)

21006def cudaMemcpy3DAsync(p : Optional[cudaMemcpy3DParms], stream):

21007 """ Copies data between 3D objects.

21008

21009 **View CUDA Toolkit Documentation for a C++ code example**

21010

21011 :py:obj:`~.cudaMemcpy3DAsync()` copies data betwen two 3D objects. The

21012 source and destination objects may be in either host memory, device

21013 memory, or a CUDA array. The source, destination, extent, and kind of

21014 copy performed is specified by the :py:obj:`~.cudaMemcpy3DParms` struct

21015 which should be initialized to zero before use:

21016

21017 **View CUDA Toolkit Documentation for a C++ code example**

21018

21019 The struct passed to :py:obj:`~.cudaMemcpy3DAsync()` must specify one

21020 of `srcArray` or `srcPtr` and one of `dstArray` or `dstPtr`. Passing

21021 more than one non-zero source or destination will cause

21022 :py:obj:`~.cudaMemcpy3DAsync()` to return an error.

21023

21024 The `srcPos` and `dstPos` fields are optional offsets into the source

21025 and destination objects and are defined in units of each object's

21026 elements. The element for a host or device pointer is assumed to be

21027 unsigned char. For CUDA arrays, positions must be in the range [0,

21028 2048) for any dimension.

21029

21030 The `extent` field defines the dimensions of the transferred area in

21031 elements. If a CUDA array is participating in the copy, the extent is

21032 defined in terms of that array's elements. If no CUDA array is

21033 participating in the copy then the extents are defined in elements of

21034 unsigned char.

21035

21036 The `kind` field defines the direction of the copy. It must be one of

21037 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

21038 :py:obj:`~.cudaMemcpyDeviceToHost`,

21039 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21040 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21041 type of transfer is inferred from the pointer values. However,

21042 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21043 unified virtual addressing. For :py:obj:`~.cudaMemcpyHostToHost` or

21044 :py:obj:`~.cudaMemcpyHostToDevice` or

21045 :py:obj:`~.cudaMemcpyDeviceToHost` passed as kind and cudaArray type

21046 passed as source or destination, if the kind implies cudaArray type to

21047 be present on the host, :py:obj:`~.cudaMemcpy3DAsync()` will disregard

21048 that implication and silently correct the kind based on the fact that

21049 cudaArray type can only be present on the device.

21050

21051 If the source and destination are both arrays,

21052 :py:obj:`~.cudaMemcpy3DAsync()` will return an error if they do not

21053 have the same element size.

21054

21055 The source and destination object may not overlap. If overlapping

21056 source and destination objects are specified, undefined behavior will

21057 result.

21058

21059 The source object must lie entirely within the region defined by

21060 `srcPos` and `extent`. The destination object must lie entirely within

21061 the region defined by `dstPos` and `extent`.

21062

21063 :py:obj:`~.cudaMemcpy3DAsync()` returns an error if the pitch of

21064 `srcPtr` or `dstPtr` exceeds the maximum allowed. The pitch of a

21065 :py:obj:`~.cudaPitchedPtr` allocated with :py:obj:`~.cudaMalloc3D()`

21066 will always be valid.

21067

21068 :py:obj:`~.cudaMemcpy3DAsync()` is asynchronous with respect to the

21069 host, so the call may return before the copy is complete. The copy can

21070 optionally be associated to a stream by passing a non-zero `stream`

21071 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

21072 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy

21073 may overlap with operations in other streams.

21074

21075 The device version of this function only handles device to device

21076 copies and cannot be given local or shared pointers.

21077

21078 Parameters

21079 ----------

21080 p : :py:obj:`~.cudaMemcpy3DParms`

21081 3D memory copy parameters

21082 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

21083 Stream identifier

21084

21085 Returns

21086 -------

21087 cudaError_t

21088 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21089

21090 See Also

21091 --------

21092 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, ::::py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3DAsync`

21093 """

21094 cdef cyruntime.cudaStream_t cystream

21095 if stream is None:

21096 pstream = 0

21097 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

21098 pstream = int(stream)

21099 else:

21100 pstream = int(cudaStream_t(stream))

21101 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

21102 cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p is not None else NULL

21103 with nogil:

21104 err = cyruntime.cudaMemcpy3DAsync(cyp_ptr, cystream)

21105 return (_dict_cudaError_t[err],)

21106

21107@cython.embedsignature(True)

21108def cudaMemcpy3DPeerAsync(p : Optional[cudaMemcpy3DPeerParms], stream):

21109 """ Copies memory between devices asynchronously.

21110

21111 Perform a 3D memory copy according to the parameters specified in `p`.

21112 See the definition of the :py:obj:`~.cudaMemcpy3DPeerParms` structure

21113 for documentation of its parameters.

21114

21115 Parameters

21116 ----------

21117 p : :py:obj:`~.cudaMemcpy3DPeerParms`

21118 Parameters for the memory copy

21119 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

21120 Stream identifier

21121

21122 Returns

21123 -------

21124 cudaError_t

21125 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidPitchValue`

21126

21127 See Also

21128 --------

21129 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`

21130 """

21131 cdef cyruntime.cudaStream_t cystream

21132 if stream is None:

21133 pstream = 0

21134 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

21135 pstream = int(stream)

21136 else:

21137 pstream = int(cudaStream_t(stream))

21138 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

21139 cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p is not None else NULL

21140 with nogil:

21141 err = cyruntime.cudaMemcpy3DPeerAsync(cyp_ptr, cystream)

21142 return (_dict_cudaError_t[err],)

21143

21144@cython.embedsignature(True)

21145def cudaMemGetInfo():

21146 """ Gets free and total device memory.

21147

21148 Returns in `*total` the total amount of memory available to the the

21149 current context. Returns in `*free` the amount of memory on the device

21150 that is free according to the OS. CUDA is not guaranteed to be able to

21151 allocate all of the memory that the OS reports as free. In a multi-

21152 tenet situation, free estimate returned is prone to race condition

21153 where a new allocation/free done by a different process or a different

21154 thread in the same process between the time when free memory was

21155 estimated and reported, will result in deviation in free value reported

21156 and actual free memory.

21157

21158 The integrated GPU on Tegra shares memory with CPU and other component

21159 of the SoC. The free and total values returned by the API excludes the

21160 SWAP memory space maintained by the OS on some platforms. The OS may

21161 move some of the memory pages into swap area as the GPU or CPU allocate

21162 or access memory. See Tegra app note on how to calculate total and free

21163 memory on Tegra.

21164

21165 Returns

21166 -------

21167 cudaError_t

21168 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`

21169 free : int

21170 Returned free memory in bytes

21171 total : int

21172 Returned total memory in bytes

21173

21174 See Also

21175 --------

21176 :py:obj:`~.cuMemGetInfo`

21177 """

21178 cdef size_t free = 0

21179 cdef size_t total = 0

21180 with nogil:

21181 err = cyruntime.cudaMemGetInfo(&free, &total)

21182 if err != cyruntime.cudaSuccess:

21183 return (_dict_cudaError_t[err], None, None)

21184 return (_dict_cudaError_t[err], free, total)

21185

21186@cython.embedsignature(True)

21187def cudaArrayGetInfo(array):

21188 """ Gets info about the specified cudaArray.

21189

21190 Returns in `*desc`, `*extent` and `*flags` respectively, the type,

21191 shape and flags of `array`.

21192

21193 Any of `*desc`, `*extent` and `*flags` may be specified as NULL.

21194

21195 Parameters

21196 ----------

21197 array : :py:obj:`~.cudaArray_t`

21198 The :py:obj:`~.cudaArray` to get info for

21199

21200 Returns

21201 -------

21202 cudaError_t

21203 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

21204 desc : :py:obj:`~.cudaChannelFormatDesc`

21205 Returned array type

21206 extent : :py:obj:`~.cudaExtent`

21207 Returned array shape. 2D arrays will have depth of zero

21208 flags : unsigned int

21209 Returned array flags

21210

21211 See Also

21212 --------

21213 :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuArray3DGetDescriptor`

21214 """

21215 cdef cyruntime.cudaArray_t cyarray

21216 if array is None:

21217 parray = 0

21218 elif isinstance(array, (cudaArray_t,)):

21219 parray = int(array)

21220 else:

21221 parray = int(cudaArray_t(array))

21222 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

21223 cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc()

21224 cdef cudaExtent extent = cudaExtent()

21225 cdef unsigned int flags = 0

21226 with nogil:

21227 err = cyruntime.cudaArrayGetInfo(<cyruntime.cudaChannelFormatDesc*>desc._pvt_ptr, <cyruntime.cudaExtent*>extent._pvt_ptr, &flags, cyarray)

21228 if err != cyruntime.cudaSuccess:

21229 return (_dict_cudaError_t[err], None, None, None)

21230 return (_dict_cudaError_t[err], desc, extent, flags)

21231

21232@cython.embedsignature(True)

21233def cudaArrayGetPlane(hArray, unsigned int planeIdx):

21234 """ Gets a CUDA array plane from a CUDA array.

21235

21236 Returns in `pPlaneArray` a CUDA array that represents a single format

21237 plane of the CUDA array `hArray`.

21238

21239 If `planeIdx` is greater than the maximum number of planes in this

21240 array or if the array does not have a multi-planar format e.g:

21241 :py:obj:`~.cudaChannelFormatKindNV12`, then

21242 :py:obj:`~.cudaErrorInvalidValue` is returned.

21243

21244 Note that if the `hArray` has format

21245 :py:obj:`~.cudaChannelFormatKindNV12`, then passing in 0 for `planeIdx`

21246 returns a CUDA array of the same size as `hArray` but with one 8-bit

21247 channel and :py:obj:`~.cudaChannelFormatKindUnsigned` as its format

21248 kind. If 1 is passed for `planeIdx`, then the returned CUDA array has

21249 half the height and width of `hArray` with two 8-bit channels and

21250 :py:obj:`~.cudaChannelFormatKindUnsigned` as its format kind.

21251

21252 Parameters

21253 ----------

21254 hArray : :py:obj:`~.cudaArray_t`

21255 CUDA array

21256 planeIdx : unsigned int

21257 Plane index

21258

21259 Returns

21260 -------

21261 cudaError_t

21262 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidResourceHandle`

21263 pPlaneArray : :py:obj:`~.cudaArray_t`

21264 Returned CUDA array referenced by the `planeIdx`

21265

21266 See Also

21267 --------

21268 :py:obj:`~.cuArrayGetPlane`

21269 """

21270 cdef cyruntime.cudaArray_t cyhArray

21271 if hArray is None:

21272 phArray = 0

21273 elif isinstance(hArray, (cudaArray_t,)):

21274 phArray = int(hArray)

21275 else:

21276 phArray = int(cudaArray_t(hArray))

21277 cyhArray = <cyruntime.cudaArray_t><void_ptr>phArray

21278 cdef cudaArray_t pPlaneArray = cudaArray_t()

21279 with nogil:

21280 err = cyruntime.cudaArrayGetPlane(<cyruntime.cudaArray_t*>pPlaneArray._pvt_ptr, cyhArray, planeIdx)

21281 if err != cyruntime.cudaSuccess:

21282 return (_dict_cudaError_t[err], None)

21283 return (_dict_cudaError_t[err], pPlaneArray)

21284

21285@cython.embedsignature(True)

21286def cudaArrayGetMemoryRequirements(array, int device):

21287 """ Returns the memory requirements of a CUDA array.

21288

21289 Returns the memory requirements of a CUDA array in `memoryRequirements`

21290 If the CUDA array is not allocated with flag

21291 :py:obj:`~.cudaArrayDeferredMapping` :py:obj:`~.cudaErrorInvalidValue`

21292 will be returned.

21293

21294 The returned value in :py:obj:`~.cudaArrayMemoryRequirements.size`

21295 represents the total size of the CUDA array. The returned value in

21296 :py:obj:`~.cudaArrayMemoryRequirements.alignment` represents the

21297 alignment necessary for mapping the CUDA array.

21298

21299 Parameters

21300 ----------

21301 array : :py:obj:`~.cudaArray_t`

21302 CUDA array to get the memory requirements of

21303 device : int

21304 Device to get the memory requirements for

21305

21306 Returns

21307 -------

21308 cudaError_t

21309 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

21310 memoryRequirements : :py:obj:`~.cudaArrayMemoryRequirements`

21311 Pointer to :py:obj:`~.cudaArrayMemoryRequirements`

21312

21313 See Also

21314 --------

21315 :py:obj:`~.cudaMipmappedArrayGetMemoryRequirements`

21316 """

21317 cdef cyruntime.cudaArray_t cyarray

21318 if array is None:

21319 parray = 0

21320 elif isinstance(array, (cudaArray_t,)):

21321 parray = int(array)

21322 else:

21323 parray = int(cudaArray_t(array))

21324 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

21325 cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements()

21326 with nogil:

21327 err = cyruntime.cudaArrayGetMemoryRequirements(<cyruntime.cudaArrayMemoryRequirements*>memoryRequirements._pvt_ptr, cyarray, device)

21328 if err != cyruntime.cudaSuccess:

21329 return (_dict_cudaError_t[err], None)

21330 return (_dict_cudaError_t[err], memoryRequirements)

21331

21332@cython.embedsignature(True)

21333def cudaMipmappedArrayGetMemoryRequirements(mipmap, int device):

21334 """ Returns the memory requirements of a CUDA mipmapped array.

21335

21336 Returns the memory requirements of a CUDA mipmapped array in

21337 `memoryRequirements` If the CUDA mipmapped array is not allocated with

21338 flag :py:obj:`~.cudaArrayDeferredMapping`

21339 :py:obj:`~.cudaErrorInvalidValue` will be returned.

21340

21341 The returned value in :py:obj:`~.cudaArrayMemoryRequirements.size`

21342 represents the total size of the CUDA mipmapped array. The returned

21343 value in :py:obj:`~.cudaArrayMemoryRequirements.alignment` represents

21344 the alignment necessary for mapping the CUDA mipmapped array.

21345

21346 Parameters

21347 ----------

21348 mipmap : :py:obj:`~.cudaMipmappedArray_t`

21349 CUDA mipmapped array to get the memory requirements of

21350 device : int

21351 Device to get the memory requirements for

21352

21353 Returns

21354 -------

21355 cudaError_t

21356 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

21357 memoryRequirements : :py:obj:`~.cudaArrayMemoryRequirements`

21358 Pointer to :py:obj:`~.cudaArrayMemoryRequirements`

21359

21360 See Also

21361 --------

21362 :py:obj:`~.cudaArrayGetMemoryRequirements`

21363 """

21364 cdef cyruntime.cudaMipmappedArray_t cymipmap

21365 if mipmap is None:

21366 pmipmap = 0

21367 elif isinstance(mipmap, (cudaMipmappedArray_t,)):

21368 pmipmap = int(mipmap)

21369 else:

21370 pmipmap = int(cudaMipmappedArray_t(mipmap))

21371 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap

21372 cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements()

21373 with nogil:

21374 err = cyruntime.cudaMipmappedArrayGetMemoryRequirements(<cyruntime.cudaArrayMemoryRequirements*>memoryRequirements._pvt_ptr, cymipmap, device)

21375 if err != cyruntime.cudaSuccess:

21376 return (_dict_cudaError_t[err], None)

21377 return (_dict_cudaError_t[err], memoryRequirements)

21378

21379@cython.embedsignature(True)

21380def cudaArrayGetSparseProperties(array):

21381 """ Returns the layout properties of a sparse CUDA array.

21382

21383 Returns the layout properties of a sparse CUDA array in

21384 `sparseProperties`. If the CUDA array is not allocated with flag

21385 :py:obj:`~.cudaArraySparse` :py:obj:`~.cudaErrorInvalidValue` will be

21386 returned.

21387

21388 If the returned value in :py:obj:`~.cudaArraySparseProperties.flags`

21389 contains :py:obj:`~.cudaArraySparsePropertiesSingleMipTail`, then

21390 :py:obj:`~.cudaArraySparseProperties.miptailSize` represents the total

21391 size of the array. Otherwise, it will be zero. Also, the returned value

21392 in :py:obj:`~.cudaArraySparseProperties.miptailFirstLevel` is always

21393 zero. Note that the `array` must have been allocated using

21394 :py:obj:`~.cudaMallocArray` or :py:obj:`~.cudaMalloc3DArray`. For CUDA

21395 arrays obtained using :py:obj:`~.cudaMipmappedArrayGetLevel`,

21396 :py:obj:`~.cudaErrorInvalidValue` will be returned. Instead,

21397 :py:obj:`~.cudaMipmappedArrayGetSparseProperties` must be used to

21398 obtain the sparse properties of the entire CUDA mipmapped array to

21399 which `array` belongs to.

21400

21401 Parameters

21402 ----------

21403 array : :py:obj:`~.cudaArray_t`

21404 The CUDA array to get the sparse properties of

21405

21406 Returns

21407 -------

21408 cudaError_t

21409 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

21410 sparseProperties : :py:obj:`~.cudaArraySparseProperties`

21411 Pointer to return the :py:obj:`~.cudaArraySparseProperties`

21412

21413 See Also

21414 --------

21415 :py:obj:`~.cudaMipmappedArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`

21416 """

21417 cdef cyruntime.cudaArray_t cyarray

21418 if array is None:

21419 parray = 0

21420 elif isinstance(array, (cudaArray_t,)):

21421 parray = int(array)

21422 else:

21423 parray = int(cudaArray_t(array))

21424 cyarray = <cyruntime.cudaArray_t><void_ptr>parray

21425 cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties()

21426 with nogil:

21427 err = cyruntime.cudaArrayGetSparseProperties(<cyruntime.cudaArraySparseProperties*>sparseProperties._pvt_ptr, cyarray)

21428 if err != cyruntime.cudaSuccess:

21429 return (_dict_cudaError_t[err], None)

21430 return (_dict_cudaError_t[err], sparseProperties)

21431

21432@cython.embedsignature(True)

21433def cudaMipmappedArrayGetSparseProperties(mipmap):

21434 """ Returns the layout properties of a sparse CUDA mipmapped array.

21435

21436 Returns the sparse array layout properties in `sparseProperties`. If

21437 the CUDA mipmapped array is not allocated with flag

21438 :py:obj:`~.cudaArraySparse` :py:obj:`~.cudaErrorInvalidValue` will be

21439 returned.

21440

21441 For non-layered CUDA mipmapped arrays,

21442 :py:obj:`~.cudaArraySparseProperties.miptailSize` returns the size of

21443 the mip tail region. The mip tail region includes all mip levels whose

21444 width, height or depth is less than that of the tile. For layered CUDA

21445 mipmapped arrays, if :py:obj:`~.cudaArraySparseProperties.flags`

21446 contains :py:obj:`~.cudaArraySparsePropertiesSingleMipTail`, then

21447 :py:obj:`~.cudaArraySparseProperties.miptailSize` specifies the size of

21448 the mip tail of all layers combined. Otherwise,

21449 :py:obj:`~.cudaArraySparseProperties.miptailSize` specifies mip tail

21450 size per layer. The returned value of

21451 :py:obj:`~.cudaArraySparseProperties.miptailFirstLevel` is valid only

21452 if :py:obj:`~.cudaArraySparseProperties.miptailSize` is non-zero.

21453

21454 Parameters

21455 ----------

21456 mipmap : :py:obj:`~.cudaMipmappedArray_t`

21457 The CUDA mipmapped array to get the sparse properties of

21458

21459 Returns

21460 -------

21461 cudaError_t

21462 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

21463 sparseProperties : :py:obj:`~.cudaArraySparseProperties`

21464 Pointer to return :py:obj:`~.cudaArraySparseProperties`

21465

21466 See Also

21467 --------

21468 :py:obj:`~.cudaArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`

21469 """

21470 cdef cyruntime.cudaMipmappedArray_t cymipmap

21471 if mipmap is None:

21472 pmipmap = 0

21473 elif isinstance(mipmap, (cudaMipmappedArray_t,)):

21474 pmipmap = int(mipmap)

21475 else:

21476 pmipmap = int(cudaMipmappedArray_t(mipmap))

21477 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap

21478 cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties()

21479 with nogil:

21480 err = cyruntime.cudaMipmappedArrayGetSparseProperties(<cyruntime.cudaArraySparseProperties*>sparseProperties._pvt_ptr, cymipmap)

21481 if err != cyruntime.cudaSuccess:

21482 return (_dict_cudaError_t[err], None)

21483 return (_dict_cudaError_t[err], sparseProperties)

21484

21485@cython.embedsignature(True)

21486def cudaMemcpy(dst, src, size_t count, kind not None : cudaMemcpyKind):

21487 """ Copies data between host and device.

21488

21489 Copies `count` bytes from the memory area pointed to by `src` to the

21490 memory area pointed to by `dst`, where `kind` specifies the direction

21491 of the copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

21492 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

21493 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21494 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21495 type of transfer is inferred from the pointer values. However,

21496 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21497 unified virtual addressing. Calling :py:obj:`~.cudaMemcpy()` with dst

21498 and src pointers that do not match the direction of the copy results in

21499 an undefined behavior.

21500

21501 \note_sync

21502

21503 Parameters

21504 ----------

21505 dst : Any

21506 Destination memory address

21507 src : Any

21508 Source memory address

21509 count : size_t

21510 Size in bytes to copy

21511 kind : :py:obj:`~.cudaMemcpyKind`

21512 Type of transfer

21513

21514 Returns

21515 -------

21516 cudaError_t

21517 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21518

21519 See Also

21520 --------

21521 :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpy`

21522 """

21523 cydst = _HelperInputVoidPtr(dst)

21524 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

21525 cysrc = _HelperInputVoidPtr(src)

21526 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

21527 cdef cyruntime.cudaMemcpyKind cykind = kind.value

21528 with nogil:

21529 err = cyruntime.cudaMemcpy(cydst_ptr, cysrc_ptr, count, cykind)

21530 return (_dict_cudaError_t[err],)

21531

21532@cython.embedsignature(True)

21533def cudaMemcpyPeer(dst, int dstDevice, src, int srcDevice, size_t count):

21534 """ Copies memory between two devices.

21535

21536 Copies memory from one device to memory on another device. `dst` is the

21537 base device pointer of the destination memory and `dstDevice` is the

21538 destination device. `src` is the base device pointer of the source

21539 memory and `srcDevice` is the source device. `count` specifies the

21540 number of bytes to copy.

21541

21542 Note that this function is asynchronous with respect to the host, but

21543 serialized with respect all pending and future asynchronous work in to

21544 the current device, `srcDevice`, and `dstDevice` (use

21545 :py:obj:`~.cudaMemcpyPeerAsync` to avoid this synchronization).

21546

21547 Parameters

21548 ----------

21549 dst : Any

21550 Destination device pointer

21551 dstDevice : int

21552 Destination device

21553 src : Any

21554 Source device pointer

21555 srcDevice : int

21556 Source device

21557 count : size_t

21558 Size of memory copy in bytes

21559

21560 Returns

21561 -------

21562 cudaError_t

21563 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

21564

21565 See Also

21566 --------

21567 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeer`

21568 """

21569 cydst = _HelperInputVoidPtr(dst)

21570 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

21571 cysrc = _HelperInputVoidPtr(src)

21572 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

21573 with nogil:

21574 err = cyruntime.cudaMemcpyPeer(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count)

21575 return (_dict_cudaError_t[err],)

21576

21577@cython.embedsignature(True)

21578def cudaMemcpy2D(dst, size_t dpitch, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind):

21579 """ Copies data between host and device.

21580

21581 Copies a matrix (`height` rows of `width` bytes each) from the memory

21582 area pointed to by `src` to the memory area pointed to by `dst`, where

21583 `kind` specifies the direction of the copy, and must be one of

21584 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

21585 :py:obj:`~.cudaMemcpyDeviceToHost`,

21586 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21587 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21588 type of transfer is inferred from the pointer values. However,

21589 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21590 unified virtual addressing. `dpitch` and `spitch` are the widths in

21591 memory in bytes of the 2D arrays pointed to by `dst` and `src`,

21592 including any padding added to the end of each row. The memory areas

21593 may not overlap. `width` must not exceed either `dpitch` or `spitch`.

21594 Calling :py:obj:`~.cudaMemcpy2D()` with `dst` and `src` pointers that

21595 do not match the direction of the copy results in an undefined

21596 behavior. :py:obj:`~.cudaMemcpy2D()` returns an error if `dpitch` or

21597 `spitch` exceeds the maximum allowed.

21598

21599 Parameters

21600 ----------

21601 dst : Any

21602 Destination memory address

21603 dpitch : size_t

21604 Pitch of destination memory

21605 src : Any

21606 Source memory address

21607 spitch : size_t

21608 Pitch of source memory

21609 width : size_t

21610 Width of matrix transfer (columns in bytes)

21611 height : size_t

21612 Height of matrix transfer (rows)

21613 kind : :py:obj:`~.cudaMemcpyKind`

21614 Type of transfer

21615

21616 Returns

21617 -------

21618 cudaError_t

21619 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21620

21621 See Also

21622 --------

21623 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`

21624 """

21625 cydst = _HelperInputVoidPtr(dst)

21626 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

21627 cysrc = _HelperInputVoidPtr(src)

21628 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

21629 cdef cyruntime.cudaMemcpyKind cykind = kind.value

21630 with nogil:

21631 err = cyruntime.cudaMemcpy2D(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind)

21632 return (_dict_cudaError_t[err],)

21633

21634@cython.embedsignature(True)

21635def cudaMemcpy2DToArray(dst, size_t wOffset, size_t hOffset, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind):

21636 """ Copies data between host and device.

21637

21638 Copies a matrix (`height` rows of `width` bytes each) from the memory

21639 area pointed to by `src` to the CUDA array `dst` starting at `hOffset`

21640 rows and `wOffset` bytes from the upper left corner, where `kind`

21641 specifies the direction of the copy, and must be one of

21642 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

21643 :py:obj:`~.cudaMemcpyDeviceToHost`,

21644 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21645 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21646 type of transfer is inferred from the pointer values. However,

21647 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21648 unified virtual addressing. `spitch` is the width in memory in bytes of

21649 the 2D array pointed to by `src`, including any padding added to the

21650 end of each row. `wOffset` + `width` must not exceed the width of the

21651 CUDA array `dst`. `width` must not exceed `spitch`.

21652 :py:obj:`~.cudaMemcpy2DToArray()` returns an error if `spitch` exceeds

21653 the maximum allowed.

21654

21655 Parameters

21656 ----------

21657 dst : :py:obj:`~.cudaArray_t`

21658 Destination memory address

21659 wOffset : size_t

21660 Destination starting X offset (columns in bytes)

21661 hOffset : size_t

21662 Destination starting Y offset (rows)

21663 src : Any

21664 Source memory address

21665 spitch : size_t

21666 Pitch of source memory

21667 width : size_t

21668 Width of matrix transfer (columns in bytes)

21669 height : size_t

21670 Height of matrix transfer (rows)

21671 kind : :py:obj:`~.cudaMemcpyKind`

21672 Type of transfer

21673

21674 Returns

21675 -------

21676 cudaError_t

21677 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21678

21679 See Also

21680 --------

21681 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`

21682 """

21683 cdef cyruntime.cudaArray_t cydst

21684 if dst is None:

21685 pdst = 0

21686 elif isinstance(dst, (cudaArray_t,)):

21687 pdst = int(dst)

21688 else:

21689 pdst = int(cudaArray_t(dst))

21690 cydst = <cyruntime.cudaArray_t><void_ptr>pdst

21691 cysrc = _HelperInputVoidPtr(src)

21692 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

21693 cdef cyruntime.cudaMemcpyKind cykind = kind.value

21694 with nogil:

21695 err = cyruntime.cudaMemcpy2DToArray(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind)

21696 return (_dict_cudaError_t[err],)

21697

21698@cython.embedsignature(True)

21699def cudaMemcpy2DFromArray(dst, size_t dpitch, src, size_t wOffset, size_t hOffset, size_t width, size_t height, kind not None : cudaMemcpyKind):

21700 """ Copies data between host and device.

21701

21702 Copies a matrix (`height` rows of `width` bytes each) from the CUDA

21703 array `src` starting at `hOffset` rows and `wOffset` bytes from the

21704 upper left corner to the memory area pointed to by `dst`, where `kind`

21705 specifies the direction of the copy, and must be one of

21706 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

21707 :py:obj:`~.cudaMemcpyDeviceToHost`,

21708 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21709 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21710 type of transfer is inferred from the pointer values. However,

21711 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21712 unified virtual addressing. `dpitch` is the width in memory in bytes of

21713 the 2D array pointed to by `dst`, including any padding added to the

21714 end of each row. `wOffset` + `width` must not exceed the width of the

21715 CUDA array `src`. `width` must not exceed `dpitch`.

21716 :py:obj:`~.cudaMemcpy2DFromArray()` returns an error if `dpitch`

21717 exceeds the maximum allowed.

21718

21719 Parameters

21720 ----------

21721 dst : Any

21722 Destination memory address

21723 dpitch : size_t

21724 Pitch of destination memory

21725 src : :py:obj:`~.cudaArray_const_t`

21726 Source memory address

21727 wOffset : size_t

21728 Source starting X offset (columns in bytes)

21729 hOffset : size_t

21730 Source starting Y offset (rows)

21731 width : size_t

21732 Width of matrix transfer (columns in bytes)

21733 height : size_t

21734 Height of matrix transfer (rows)

21735 kind : :py:obj:`~.cudaMemcpyKind`

21736 Type of transfer

21737

21738 Returns

21739 -------

21740 cudaError_t

21741 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21742

21743 See Also

21744 --------

21745 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`

21746 """

21747 cdef cyruntime.cudaArray_const_t cysrc

21748 if src is None:

21749 psrc = 0

21750 elif isinstance(src, (cudaArray_const_t,)):

21751 psrc = int(src)

21752 else:

21753 psrc = int(cudaArray_const_t(src))

21754 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc

21755 cydst = _HelperInputVoidPtr(dst)

21756 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

21757 cdef cyruntime.cudaMemcpyKind cykind = kind.value

21758 with nogil:

21759 err = cyruntime.cudaMemcpy2DFromArray(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind)

21760 return (_dict_cudaError_t[err],)

21761

21762@cython.embedsignature(True)

21763def cudaMemcpy2DArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, kind not None : cudaMemcpyKind):

21764 """ Copies data between host and device.

21765

21766 Copies a matrix (`height` rows of `width` bytes each) from the CUDA

21767 array `src` starting at `hOffsetSrc` rows and `wOffsetSrc` bytes from

21768 the upper left corner to the CUDA array `dst` starting at `hOffsetDst`

21769 rows and `wOffsetDst` bytes from the upper left corner, where `kind`

21770 specifies the direction of the copy, and must be one of

21771 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

21772 :py:obj:`~.cudaMemcpyDeviceToHost`,

21773 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21774 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21775 type of transfer is inferred from the pointer values. However,

21776 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21777 unified virtual addressing. `wOffsetDst` + `width` must not exceed the

21778 width of the CUDA array `dst`. `wOffsetSrc` + `width` must not exceed

21779 the width of the CUDA array `src`.

21780

21781 Parameters

21782 ----------

21783 dst : :py:obj:`~.cudaArray_t`

21784 Destination memory address

21785 wOffsetDst : size_t

21786 Destination starting X offset (columns in bytes)

21787 hOffsetDst : size_t

21788 Destination starting Y offset (rows)

21789 src : :py:obj:`~.cudaArray_const_t`

21790 Source memory address

21791 wOffsetSrc : size_t

21792 Source starting X offset (columns in bytes)

21793 hOffsetSrc : size_t

21794 Source starting Y offset (rows)

21795 width : size_t

21796 Width of matrix transfer (columns in bytes)

21797 height : size_t

21798 Height of matrix transfer (rows)

21799 kind : :py:obj:`~.cudaMemcpyKind`

21800 Type of transfer

21801

21802 Returns

21803 -------

21804 cudaError_t

21805 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21806

21807 See Also

21808 --------

21809 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`

21810 """

21811 cdef cyruntime.cudaArray_const_t cysrc

21812 if src is None:

21813 psrc = 0

21814 elif isinstance(src, (cudaArray_const_t,)):

21815 psrc = int(src)

21816 else:

21817 psrc = int(cudaArray_const_t(src))

21818 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc

21819 cdef cyruntime.cudaArray_t cydst

21820 if dst is None:

21821 pdst = 0

21822 elif isinstance(dst, (cudaArray_t,)):

21823 pdst = int(dst)

21824 else:

21825 pdst = int(cudaArray_t(dst))

21826 cydst = <cyruntime.cudaArray_t><void_ptr>pdst

21827 cdef cyruntime.cudaMemcpyKind cykind = kind.value

21828 with nogil:

21829 err = cyruntime.cudaMemcpy2DArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, width, height, cykind)

21830 return (_dict_cudaError_t[err],)

21831

21832@cython.embedsignature(True)

21833def cudaMemcpyAsync(dst, src, size_t count, kind not None : cudaMemcpyKind, stream):

21834 """ Copies data between host and device.

21835

21836 Copies `count` bytes from the memory area pointed to by `src` to the

21837 memory area pointed to by `dst`, where `kind` specifies the direction

21838 of the copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

21839 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

21840 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

21841 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

21842 type of transfer is inferred from the pointer values. However,

21843 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

21844 unified virtual addressing.

21845

21846 The memory areas may not overlap. Calling :py:obj:`~.cudaMemcpyAsync()`

21847 with `dst` and `src` pointers that do not match the direction of the

21848 copy results in an undefined behavior.

21849

21850 :py:obj:`~.cudaMemcpyAsync()` is asynchronous with respect to the host,

21851 so the call may return before the copy is complete. The copy can

21852 optionally be associated to a stream by passing a non-zero `stream`

21853 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

21854 :py:obj:`~.cudaMemcpyDeviceToHost` and the `stream` is non-zero, the

21855 copy may overlap with operations in other streams.

21856

21857 The device version of this function only handles device to device

21858 copies and cannot be given local or shared pointers.

21859

21860 Parameters

21861 ----------

21862 dst : Any

21863 Destination memory address

21864 src : Any

21865 Source memory address

21866 count : size_t

21867 Size in bytes to copy

21868 kind : :py:obj:`~.cudaMemcpyKind`

21869 Type of transfer

21870 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

21871 Stream identifier

21872

21873 Returns

21874 -------

21875 cudaError_t

21876 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

21877

21878 See Also

21879 --------

21880 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemcpyDtoDAsync`

21881 """

21882 cdef cyruntime.cudaStream_t cystream

21883 if stream is None:

21884 pstream = 0

21885 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

21886 pstream = int(stream)

21887 else:

21888 pstream = int(cudaStream_t(stream))

21889 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

21890 cydst = _HelperInputVoidPtr(dst)

21891 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

21892 cysrc = _HelperInputVoidPtr(src)

21893 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

21894 cdef cyruntime.cudaMemcpyKind cykind = kind.value

21895 with nogil:

21896 err = cyruntime.cudaMemcpyAsync(cydst_ptr, cysrc_ptr, count, cykind, cystream)

21897 return (_dict_cudaError_t[err],)

21898

21899@cython.embedsignature(True)

21900def cudaMemcpyPeerAsync(dst, int dstDevice, src, int srcDevice, size_t count, stream):

21901 """ Copies memory between two devices asynchronously.

21902

21903 Copies memory from one device to memory on another device. `dst` is the

21904 base device pointer of the destination memory and `dstDevice` is the

21905 destination device. `src` is the base device pointer of the source

21906 memory and `srcDevice` is the source device. `count` specifies the

21907 number of bytes to copy.

21908

21909 Note that this function is asynchronous with respect to the host and

21910 all work on other devices.

21911

21912 Parameters

21913 ----------

21914 dst : Any

21915 Destination device pointer

21916 dstDevice : int

21917 Destination device

21918 src : Any

21919 Source device pointer

21920 srcDevice : int

21921 Source device

21922 count : size_t

21923 Size of memory copy in bytes

21924 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

21925 Stream identifier

21926

21927 Returns

21928 -------

21929 cudaError_t

21930 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

21931

21932 See Also

21933 --------

21934 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeerAsync`

21935 """

21936 cdef cyruntime.cudaStream_t cystream

21937 if stream is None:

21938 pstream = 0

21939 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

21940 pstream = int(stream)

21941 else:

21942 pstream = int(cudaStream_t(stream))

21943 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

21944 cydst = _HelperInputVoidPtr(dst)

21945 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

21946 cysrc = _HelperInputVoidPtr(src)

21947 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

21948 with nogil:

21949 err = cyruntime.cudaMemcpyPeerAsync(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count, cystream)

21950 return (_dict_cudaError_t[err],)

21951

21952@cython.embedsignature(True)

21953def cudaMemcpyBatchAsync(dsts : Optional[tuple[Any] | list[Any]], srcs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, attrs : Optional[tuple[cudaMemcpyAttributes] | list[cudaMemcpyAttributes]], attrsIdxs : tuple[int] | list[int], size_t numAttrs, stream):

21954 """ Performs a batch of memory copies asynchronously.

21955

21956 Performs a batch of memory copies. The batch as a whole executes in

21957 stream order but copies within a batch are not guaranteed to execute in

21958 any specific order. This API only supports pointer-to-pointer copies.

21959 For copies involving CUDA arrays, please see

21960 :py:obj:`~.cudaMemcpy3DBatchAsync`.

21961

21962 Performs memory copies from source buffers specified in `srcs` to

21963 destination buffers specified in `dsts`. The size of each copy is

21964 specified in `sizes`. All three arrays must be of the same length as

21965 specified by `count`. Since there are no ordering guarantees for copies

21966 within a batch, specifying any dependent copies within a batch will

21967 result in undefined behavior.

21968

21969 Every copy in the batch has to be associated with a set of attributes

21970 specified in the `attrs` array. Each entry in this array can apply to

21971 more than one copy. This can be done by specifying in the `attrsIdxs`

21972 array, the index of the first copy that the corresponding entry in the

21973 `attrs` array applies to. Both `attrs` and `attrsIdxs` must be of the

21974 same length as specified by `numAttrs`. For example, if a batch has 10

21975 copies listed in dst/src/sizes, the first 6 of which have one set of

21976 attributes and the remaining 4 another, then `numAttrs` will be 2,

21977 `attrsIdxs` will be {0, 6} and `attrs` will contains the two sets of

21978 attributes. Note that the first entry in `attrsIdxs` must always be 0.

21979 Also, each entry must be greater than the previous entry and the last

21980 entry should be less than `count`. Furthermore, `numAttrs` must be

21981 lesser than or equal to `count`.

21982

21983 The :py:obj:`~.cudaMemcpyAttributes.srcAccessOrder` indicates the

21984 source access ordering to be observed for copies associated with the

21985 attribute. If the source access order is set to

21986 :py:obj:`~.cudaMemcpySrcAccessOrderStream`, then the source will be

21987 accessed in stream order. If the source access order is set to

21988 :py:obj:`~.cudaMemcpySrcAccessOrderDuringApiCall` then it indicates

21989 that access to the source pointer can be out of stream order and all

21990 accesses must be complete before the API call returns. This flag is

21991 suited for ephemeral sources (ex., stack variables) when it's known

21992 that no prior operations in the stream can be accessing the memory and

21993 also that the lifetime of the memory is limited to the scope that the

21994 source variable was declared in. Specifying this flag allows the driver

21995 to optimize the copy and removes the need for the user to synchronize

21996 the stream after the API call. If the source access order is set to

21997 :py:obj:`~.cudaMemcpySrcAccessOrderAny` then it indicates that access

21998 to the source pointer can be out of stream order and the accesses can

21999 happen even after the API call returns. This flag is suited for host

22000 pointers allocated outside CUDA (ex., via malloc) when it's known that

22001 no prior operations in the stream can be accessing the memory.

22002 Specifying this flag allows the driver to optimize the copy on certain

22003 platforms. Each memcpy operation in the batch must have a valid

22004 :py:obj:`~.cudaMemcpyAttributes` corresponding to it including the

22005 appropriate srcAccessOrder setting, otherwise the API will return

22006 :py:obj:`~.cudaErrorInvalidValue`.

22007

22008 The :py:obj:`~.cudaMemcpyAttributes.srcLocHint` and

22009 :py:obj:`~.cudaMemcpyAttributes.dstLocHint` allows applications to

22010 specify hint locations for operands of a copy when the operand doesn't

22011 have a fixed location. That is, these hints are only applicable for

22012 managed memory pointers on devices where

22013 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is true or system-

22014 allocated pageable memory on devices where

22015 :py:obj:`~.cudaDevAttrPageableMemoryAccess` is true. For other cases,

22016 these hints are ignored.

22017

22018 The :py:obj:`~.cudaMemcpyAttributes.flags` field can be used to specify

22019 certain flags for copies. Setting the

22020 :py:obj:`~.cudaMemcpyFlagPreferOverlapWithCompute` flag indicates that

22021 the associated copies should preferably overlap with any compute work.

22022 Note that this flag is a hint and can be ignored depending on the

22023 platform and other parameters of the copy.

22024

22025 Parameters

22026 ----------

22027 dsts : list[Any]

22028 Array of destination pointers.

22029 srcs : list[Any]

22030 Array of memcpy source pointers.

22031 sizes : list[int]

22032 Array of sizes for memcpy operations.

22033 count : size_t

22034 Size of `dsts`, `srcs` and `sizes` arrays

22035 attrs : list[:py:obj:`~.cudaMemcpyAttributes`]

22036 Array of memcpy attributes.

22037 attrsIdxs : list[int]

22038 Array of indices to specify which copies each entry in the `attrs`

22039 array applies to. The attributes specified in attrs[k] will be

22040 applied to copies starting from attrsIdxs[k] through attrsIdxs[k+1]

22041 - 1. Also attrs[numAttrs-1] will apply to copies starting from

22042 attrsIdxs[numAttrs-1] through count - 1.

22043 numAttrs : size_t

22044 Size of `attrs` and `attrsIdxs` arrays.

22045 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22046 The stream to enqueue the operations in. Must not be legacy NULL

22047 stream.

22048

22049 Returns

22050 -------

22051 cudaError_t

22052 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

22053 """

22054 cdef cyruntime.cudaStream_t cystream

22055 if stream is None:

22056 pstream = 0

22057 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22058 pstream = int(stream)

22059 else:

22060 pstream = int(cudaStream_t(stream))

22061 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22062 if not all(isinstance(_x, (int)) for _x in attrsIdxs):

22063 raise TypeError("Argument 'attrsIdxs' is not instance of type (expected tuple[int] or list[int]")

22064 attrs = [] if attrs is None else attrs

22065 if not all(isinstance(_x, (cudaMemcpyAttributes,)) for _x in attrs):

22066 raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cyruntime.cudaMemcpyAttributes,] or list[cyruntime.cudaMemcpyAttributes,]")

22067 if not all(isinstance(_x, (int)) for _x in sizes):

22068 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")

22069 srcs = [] if srcs is None else srcs

22070 dsts = [] if dsts is None else dsts

22071 pylist = [_HelperInputVoidPtr(pydsts) for pydsts in dsts]

22072 cdef _InputVoidPtrPtrHelper voidStarHelperdsts = _InputVoidPtrPtrHelper(pylist)

22073 cdef const void** cydsts_ptr = <const void**><void_ptr>voidStarHelperdsts.cptr

22074 pylist = [_HelperInputVoidPtr(pysrcs) for pysrcs in srcs]

22075 cdef _InputVoidPtrPtrHelper voidStarHelpersrcs = _InputVoidPtrPtrHelper(pylist)

22076 cdef const void** cysrcs_ptr = <const void**><void_ptr>voidStarHelpersrcs.cptr

22077 cdef vector[size_t] cysizes = sizes

22078 if count > <size_t>len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count))

22079 if count > <size_t>len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count))

22080 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))

22081 cdef cyruntime.cudaMemcpyAttributes* cyattrs = NULL

22082 if len(attrs) > 1:

22083 cyattrs = <cyruntime.cudaMemcpyAttributes*> calloc(len(attrs), sizeof(cyruntime.cudaMemcpyAttributes))

22084 if cyattrs is NULL:

22085 raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cyruntime.cudaMemcpyAttributes)))

22086 for idx in range(len(attrs)):

22087 string.memcpy(&cyattrs[idx], (<cudaMemcpyAttributes>attrs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpyAttributes))

22088 elif len(attrs) == 1:

22089 cyattrs = (<cudaMemcpyAttributes>attrs[0])._pvt_ptr

22090 cdef vector[size_t] cyattrsIdxs = attrsIdxs

22091 if numAttrs > <size_t>len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs))

22092 if numAttrs > <size_t>len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs))

22093 with nogil:

22094 err = cyruntime.cudaMemcpyBatchAsync(cydsts_ptr, cysrcs_ptr, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, cystream)

22095 if len(attrs) > 1 and cyattrs is not NULL:

22096 free(cyattrs)

22097 return (_dict_cudaError_t[err],)

22098

22099@cython.embedsignature(True)

22100def cudaMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[cudaMemcpy3DBatchOp] | list[cudaMemcpy3DBatchOp]], unsigned long long flags, stream):

22101 """ Performs a batch of 3D memory copies asynchronously.

22102

22103 Performs a batch of memory copies. The batch as a whole executes in

22104 stream order but copies within a batch are not guaranteed to execute in

22105 any specific order. Note that this means specifying any dependent

22106 copies within a batch will result in undefined behavior.

22107

22108 Performs memory copies as specified in the `opList` array. The length

22109 of this array is specified in `numOps`. Each entry in this array

22110 describes a copy operation. This includes among other things, the

22111 source and destination operands for the copy as specified in

22112 :py:obj:`~.cudaMemcpy3DBatchOp.src` and

22113 :py:obj:`~.cudaMemcpy3DBatchOp.dst` respectively. The source and

22114 destination operands of a copy can either be a pointer or a CUDA array.

22115 The width, height and depth of a copy is specified in

22116 :py:obj:`~.cudaMemcpy3DBatchOp.extent`. The width, height and depth of

22117 a copy are specified in elements and must not be zero. For pointer-to-

22118 pointer copies, the element size is considered to be 1. For pointer to

22119 CUDA array or vice versa copies, the element size is determined by the

22120 CUDA array. For CUDA array to CUDA array copies, the element size of

22121 the two CUDA arrays must match.

22122

22123 For a given operand, if :py:obj:`~.cudaMemcpy3DOperand`::type is

22124 specified as :py:obj:`~.cudaMemcpyOperandTypePointer`, then

22125 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr will be used. The

22126 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::ptr field must contain the

22127 pointer where the copy should begin. The

22128 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::rowLength field specifies the

22129 length of each row in elements and must either be zero or be greater

22130 than or equal to the width of the copy specified in

22131 :py:obj:`~.cudaMemcpy3DBatchOp`::extent::width. The

22132 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::layerHeight field specifies

22133 the height of each layer and must either be zero or be greater than or

22134 equal to the height of the copy specified in

22135 :py:obj:`~.cudaMemcpy3DBatchOp`::extent::height. When either of these

22136 values is zero, that aspect of the operand is considered to be tightly

22137 packed according to the copy extent. For managed memory pointers on

22138 devices where :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is true or

22139 system-allocated pageable memory on devices where

22140 :py:obj:`~.cudaDevAttrPageableMemoryAccess` is true, the

22141 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::locHint field can be used to

22142 hint the location of the operand.

22143

22144 If an operand's type is specified as

22145 :py:obj:`~.cudaMemcpyOperandTypeArray`, then

22146 :py:obj:`~.cudaMemcpy3DOperand`::op::array will be used. The

22147 :py:obj:`~.cudaMemcpy3DOperand`::op::array::array field specifies the

22148 CUDA array and :py:obj:`~.cudaMemcpy3DOperand`::op::array::offset

22149 specifies the 3D offset into that array where the copy begins.

22150

22151 The :py:obj:`~.cudaMemcpyAttributes.srcAccessOrder` indicates the

22152 source access ordering to be observed for copies associated with the

22153 attribute. If the source access order is set to

22154 :py:obj:`~.cudaMemcpySrcAccessOrderStream`, then the source will be

22155 accessed in stream order. If the source access order is set to

22156 :py:obj:`~.cudaMemcpySrcAccessOrderDuringApiCall` then it indicates

22157 that access to the source pointer can be out of stream order and all

22158 accesses must be complete before the API call returns. This flag is

22159 suited for ephemeral sources (ex., stack variables) when it's known

22160 that no prior operations in the stream can be accessing the memory and

22161 also that the lifetime of the memory is limited to the scope that the

22162 source variable was declared in. Specifying this flag allows the driver

22163 to optimize the copy and removes the need for the user to synchronize

22164 the stream after the API call. If the source access order is set to

22165 :py:obj:`~.cudaMemcpySrcAccessOrderAny` then it indicates that access

22166 to the source pointer can be out of stream order and the accesses can

22167 happen even after the API call returns. This flag is suited for host

22168 pointers allocated outside CUDA (ex., via malloc) when it's known that

22169 no prior operations in the stream can be accessing the memory.

22170 Specifying this flag allows the driver to optimize the copy on certain

22171 platforms. Each memcopy operation in `opList` must have a valid

22172 srcAccessOrder setting, otherwise this API will return

22173 :py:obj:`~.cudaErrorInvalidValue`.

22174

22175 The :py:obj:`~.cudaMemcpyAttributes.flags` field can be used to specify

22176 certain flags for copies. Setting the

22177 :py:obj:`~.cudaMemcpyFlagPreferOverlapWithCompute` flag indicates that

22178 the associated copies should preferably overlap with any compute work.

22179 Note that this flag is a hint and can be ignored depending on the

22180 platform and other parameters of the copy.

22181

22182 Parameters

22183 ----------

22184 numOps : size_t

22185 Total number of memcpy operations.

22186 opList : list[:py:obj:`~.cudaMemcpy3DBatchOp`]

22187 Array of size `numOps` containing the actual memcpy operations.

22188 flags : unsigned long long

22189 Flags for future use, must be zero now.

22190 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22191 The stream to enqueue the operations in. Must not be default NULL

22192 stream.

22193

22194 Returns

22195 -------

22196 cudaError_t

22197 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

22198 """

22199 cdef cyruntime.cudaStream_t cystream

22200 if stream is None:

22201 pstream = 0

22202 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22203 pstream = int(stream)

22204 else:

22205 pstream = int(cudaStream_t(stream))

22206 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22207 opList = [] if opList is None else opList

22208 if not all(isinstance(_x, (cudaMemcpy3DBatchOp,)) for _x in opList):

22209 raise TypeError("Argument 'opList' is not instance of type (expected tuple[cyruntime.cudaMemcpy3DBatchOp,] or list[cyruntime.cudaMemcpy3DBatchOp,]")

22210 if numOps > <size_t>len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps))

22211 cdef cyruntime.cudaMemcpy3DBatchOp* cyopList = NULL

22212 if len(opList) > 1:

22213 cyopList = <cyruntime.cudaMemcpy3DBatchOp*> calloc(len(opList), sizeof(cyruntime.cudaMemcpy3DBatchOp))

22214 if cyopList is NULL:

22215 raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cyruntime.cudaMemcpy3DBatchOp)))

22216 for idx in range(len(opList)):

22217 string.memcpy(&cyopList[idx], (<cudaMemcpy3DBatchOp>opList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpy3DBatchOp))

22218 elif len(opList) == 1:

22219 cyopList = (<cudaMemcpy3DBatchOp>opList[0])._pvt_ptr

22220 with nogil:

22221 err = cyruntime.cudaMemcpy3DBatchAsync(numOps, cyopList, flags, cystream)

22222 if len(opList) > 1 and cyopList is not NULL:

22223 free(cyopList)

22224 return (_dict_cudaError_t[err],)

22225

22226@cython.embedsignature(True)

22227def cudaMemcpy2DAsync(dst, size_t dpitch, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):

22228 """ Copies data between host and device.

22229

22230 Copies a matrix (`height` rows of `width` bytes each) from the memory

22231 area pointed to by `src` to the memory area pointed to by `dst`, where

22232 `kind` specifies the direction of the copy, and must be one of

22233 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

22234 :py:obj:`~.cudaMemcpyDeviceToHost`,

22235 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

22236 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

22237 type of transfer is inferred from the pointer values. However,

22238 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

22239 unified virtual addressing. `dpitch` and `spitch` are the widths in

22240 memory in bytes of the 2D arrays pointed to by `dst` and `src`,

22241 including any padding added to the end of each row. The memory areas

22242 may not overlap. `width` must not exceed either `dpitch` or `spitch`.

22243

22244 Calling :py:obj:`~.cudaMemcpy2DAsync()` with `dst` and `src` pointers

22245 that do not match the direction of the copy results in an undefined

22246 behavior. :py:obj:`~.cudaMemcpy2DAsync()` returns an error if `dpitch`

22247 or `spitch` is greater than the maximum allowed.

22248

22249 :py:obj:`~.cudaMemcpy2DAsync()` is asynchronous with respect to the

22250 host, so the call may return before the copy is complete. The copy can

22251 optionally be associated to a stream by passing a non-zero `stream`

22252 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

22253 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy

22254 may overlap with operations in other streams.

22255

22256 The device version of this function only handles device to device

22257 copies and cannot be given local or shared pointers.

22258

22259 Parameters

22260 ----------

22261 dst : Any

22262 Destination memory address

22263 dpitch : size_t

22264 Pitch of destination memory

22265 src : Any

22266 Source memory address

22267 spitch : size_t

22268 Pitch of source memory

22269 width : size_t

22270 Width of matrix transfer (columns in bytes)

22271 height : size_t

22272 Height of matrix transfer (rows)

22273 kind : :py:obj:`~.cudaMemcpyKind`

22274 Type of transfer

22275 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22276 Stream identifier

22277

22278 Returns

22279 -------

22280 cudaError_t

22281 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

22282

22283 See Also

22284 --------

22285 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`

22286 """

22287 cdef cyruntime.cudaStream_t cystream

22288 if stream is None:

22289 pstream = 0

22290 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22291 pstream = int(stream)

22292 else:

22293 pstream = int(cudaStream_t(stream))

22294 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22295 cydst = _HelperInputVoidPtr(dst)

22296 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

22297 cysrc = _HelperInputVoidPtr(src)

22298 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

22299 cdef cyruntime.cudaMemcpyKind cykind = kind.value

22300 with nogil:

22301 err = cyruntime.cudaMemcpy2DAsync(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind, cystream)

22302 return (_dict_cudaError_t[err],)

22303

22304@cython.embedsignature(True)

22305def cudaMemcpy2DToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):

22306 """ Copies data between host and device.

22307

22308 Copies a matrix (`height` rows of `width` bytes each) from the memory

22309 area pointed to by `src` to the CUDA array `dst` starting at `hOffset`

22310 rows and `wOffset` bytes from the upper left corner, where `kind`

22311 specifies the direction of the copy, and must be one of

22312 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

22313 :py:obj:`~.cudaMemcpyDeviceToHost`,

22314 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

22315 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

22316 type of transfer is inferred from the pointer values. However,

22317 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

22318 unified virtual addressing. `spitch` is the width in memory in bytes of

22319 the 2D array pointed to by `src`, including any padding added to the

22320 end of each row. `wOffset` + `width` must not exceed the width of the

22321 CUDA array `dst`. `width` must not exceed `spitch`.

22322 :py:obj:`~.cudaMemcpy2DToArrayAsync()` returns an error if `spitch`

22323 exceeds the maximum allowed.

22324

22325 :py:obj:`~.cudaMemcpy2DToArrayAsync()` is asynchronous with respect to

22326 the host, so the call may return before the copy is complete. The copy

22327 can optionally be associated to a stream by passing a non-zero `stream`

22328 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

22329 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy

22330 may overlap with operations in other streams.

22331

22332 :py:obj:`~.cudaMemcpy2DFromArrayAsync`,

22333 :py:obj:`~.cudaMemcpyToSymbolAsync`,

22334 :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`

22335

22336 Parameters

22337 ----------

22338 dst : :py:obj:`~.cudaArray_t`

22339 Destination memory address

22340 wOffset : size_t

22341 Destination starting X offset (columns in bytes)

22342 hOffset : size_t

22343 Destination starting Y offset (rows)

22344 src : Any

22345 Source memory address

22346 spitch : size_t

22347 Pitch of source memory

22348 width : size_t

22349 Width of matrix transfer (columns in bytes)

22350 height : size_t

22351 Height of matrix transfer (rows)

22352 kind : :py:obj:`~.cudaMemcpyKind`

22353 Type of transfer

22354 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22355 Stream identifier

22356

22357 Returns

22358 -------

22359 cudaError_t

22360 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

22361

22362 See Also

22363 --------

22364 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`,

22365 """

22366 cdef cyruntime.cudaStream_t cystream

22367 if stream is None:

22368 pstream = 0

22369 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22370 pstream = int(stream)

22371 else:

22372 pstream = int(cudaStream_t(stream))

22373 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22374 cdef cyruntime.cudaArray_t cydst

22375 if dst is None:

22376 pdst = 0

22377 elif isinstance(dst, (cudaArray_t,)):

22378 pdst = int(dst)

22379 else:

22380 pdst = int(cudaArray_t(dst))

22381 cydst = <cyruntime.cudaArray_t><void_ptr>pdst

22382 cysrc = _HelperInputVoidPtr(src)

22383 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

22384 cdef cyruntime.cudaMemcpyKind cykind = kind.value

22385 with nogil:

22386 err = cyruntime.cudaMemcpy2DToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind, cystream)

22387 return (_dict_cudaError_t[err],)

22388

22389@cython.embedsignature(True)

22390def cudaMemcpy2DFromArrayAsync(dst, size_t dpitch, src, size_t wOffset, size_t hOffset, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):

22391 """ Copies data between host and device.

22392

22393 Copies a matrix (`height` rows of `width` bytes each) from the CUDA

22394 array `src` starting at `hOffset` rows and `wOffset` bytes from the

22395 upper left corner to the memory area pointed to by `dst`, where `kind`

22396 specifies the direction of the copy, and must be one of

22397 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

22398 :py:obj:`~.cudaMemcpyDeviceToHost`,

22399 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

22400 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

22401 type of transfer is inferred from the pointer values. However,

22402 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

22403 unified virtual addressing. `dpitch` is the width in memory in bytes of

22404 the 2D array pointed to by `dst`, including any padding added to the

22405 end of each row. `wOffset` + `width` must not exceed the width of the

22406 CUDA array `src`. `width` must not exceed `dpitch`.

22407 :py:obj:`~.cudaMemcpy2DFromArrayAsync()` returns an error if `dpitch`

22408 exceeds the maximum allowed.

22409

22410 :py:obj:`~.cudaMemcpy2DFromArrayAsync()` is asynchronous with respect

22411 to the host, so the call may return before the copy is complete. The

22412 copy can optionally be associated to a stream by passing a non-zero

22413 `stream` argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

22414 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy

22415 may overlap with operations in other streams.

22416

22417 :py:obj:`~.cudaMemcpyToSymbolAsync`,

22418 :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`

22419

22420 Parameters

22421 ----------

22422 dst : Any

22423 Destination memory address

22424 dpitch : size_t

22425 Pitch of destination memory

22426 src : :py:obj:`~.cudaArray_const_t`

22427 Source memory address

22428 wOffset : size_t

22429 Source starting X offset (columns in bytes)

22430 hOffset : size_t

22431 Source starting Y offset (rows)

22432 width : size_t

22433 Width of matrix transfer (columns in bytes)

22434 height : size_t

22435 Height of matrix transfer (rows)

22436 kind : :py:obj:`~.cudaMemcpyKind`

22437 Type of transfer

22438 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22439 Stream identifier

22440

22441 Returns

22442 -------

22443 cudaError_t

22444 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

22445

22446 See Also

22447 --------

22448 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`,

22449 """

22450 cdef cyruntime.cudaStream_t cystream

22451 if stream is None:

22452 pstream = 0

22453 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22454 pstream = int(stream)

22455 else:

22456 pstream = int(cudaStream_t(stream))

22457 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22458 cdef cyruntime.cudaArray_const_t cysrc

22459 if src is None:

22460 psrc = 0

22461 elif isinstance(src, (cudaArray_const_t,)):

22462 psrc = int(src)

22463 else:

22464 psrc = int(cudaArray_const_t(src))

22465 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc

22466 cydst = _HelperInputVoidPtr(dst)

22467 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

22468 cdef cyruntime.cudaMemcpyKind cykind = kind.value

22469 with nogil:

22470 err = cyruntime.cudaMemcpy2DFromArrayAsync(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind, cystream)

22471 return (_dict_cudaError_t[err],)

22472

22473@cython.embedsignature(True)

22474def cudaMemset(devPtr, int value, size_t count):

22475 """ Initializes or sets device memory to a value.

22476

22477 Fills the first `count` bytes of the memory area pointed to by `devPtr`

22478 with the constant byte value `value`.

22479

22480 Note that this function is asynchronous with respect to the host unless

22481 `devPtr` refers to pinned host memory.

22482

22483 Parameters

22484 ----------

22485 devPtr : Any

22486 Pointer to device memory

22487 value : int

22488 Value to set for each byte of specified memory

22489 count : size_t

22490 Size in bytes to set

22491

22492 Returns

22493 -------

22494 cudaError_t

22495 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

22496

22497 See Also

22498 --------

22499 :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`

22500 """

22501 cydevPtr = _HelperInputVoidPtr(devPtr)

22502 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

22503 with nogil:

22504 err = cyruntime.cudaMemset(cydevPtr_ptr, value, count)

22505 return (_dict_cudaError_t[err],)

22506

22507@cython.embedsignature(True)

22508def cudaMemset2D(devPtr, size_t pitch, int value, size_t width, size_t height):

22509 """ Initializes or sets device memory to a value.

22510

22511 Sets to the specified value `value` a matrix (`height` rows of `width`

22512 bytes each) pointed to by `dstPtr`. `pitch` is the width in bytes of

22513 the 2D array pointed to by `dstPtr`, including any padding added to the

22514 end of each row. This function performs fastest when the pitch is one

22515 that has been passed back by :py:obj:`~.cudaMallocPitch()`.

22516

22517 Note that this function is asynchronous with respect to the host unless

22518 `devPtr` refers to pinned host memory.

22519

22520 Parameters

22521 ----------

22522 devPtr : Any

22523 Pointer to 2D device memory

22524 pitch : size_t

22525 Pitch in bytes of 2D device memory(Unused if `height` is 1)

22526 value : int

22527 Value to set for each byte of specified memory

22528 width : size_t

22529 Width of matrix set (columns in bytes)

22530 height : size_t

22531 Height of matrix set (rows)

22532

22533 Returns

22534 -------

22535 cudaError_t

22536 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

22537

22538 See Also

22539 --------

22540 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`

22541 """

22542 cydevPtr = _HelperInputVoidPtr(devPtr)

22543 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

22544 with nogil:

22545 err = cyruntime.cudaMemset2D(cydevPtr_ptr, pitch, value, width, height)

22546 return (_dict_cudaError_t[err],)

22547

22548@cython.embedsignature(True)

22549def cudaMemset3D(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not None : cudaExtent):

22550 """ Initializes or sets device memory to a value.

22551

22552 Initializes each element of a 3D array to the specified value `value`.

22553 The object to initialize is defined by `pitchedDevPtr`. The `pitch`

22554 field of `pitchedDevPtr` is the width in memory in bytes of the 3D

22555 array pointed to by `pitchedDevPtr`, including any padding added to the

22556 end of each row. The `xsize` field specifies the logical width of each

22557 row in bytes, while the `ysize` field specifies the height of each 2D

22558 slice in rows. The `pitch` field of `pitchedDevPtr` is ignored when

22559 `height` and `depth` are both equal to 1.

22560

22561 The extents of the initialized region are specified as a `width` in

22562 bytes, a `height` in rows, and a `depth` in slices.

22563

22564 Extents with `width` greater than or equal to the `xsize` of

22565 `pitchedDevPtr` may perform significantly faster than extents narrower

22566 than the `xsize`. Secondarily, extents with `height` equal to the

22567 `ysize` of `pitchedDevPtr` will perform faster than when the `height`

22568 is shorter than the `ysize`.

22569

22570 This function performs fastest when the `pitchedDevPtr` has been

22571 allocated by :py:obj:`~.cudaMalloc3D()`.

22572

22573 Note that this function is asynchronous with respect to the host unless

22574 `pitchedDevPtr` refers to pinned host memory.

22575

22576 Parameters

22577 ----------

22578 pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`

22579 Pointer to pitched device memory

22580 value : int

22581 Value to set for each byte of specified memory

22582 extent : :py:obj:`~.cudaExtent`

22583 Size parameters for where to set device memory (`width` field in

22584 bytes)

22585

22586 Returns

22587 -------

22588 cudaError_t

22589 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

22590

22591 See Also

22592 --------

22593 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`

22594 """

22595 with nogil:

22596 err = cyruntime.cudaMemset3D(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0])

22597 return (_dict_cudaError_t[err],)

22598

22599@cython.embedsignature(True)

22600def cudaMemsetAsync(devPtr, int value, size_t count, stream):

22601 """ Initializes or sets device memory to a value.

22602

22603 Fills the first `count` bytes of the memory area pointed to by `devPtr`

22604 with the constant byte value `value`.

22605

22606 :py:obj:`~.cudaMemsetAsync()` is asynchronous with respect to the host,

22607 so the call may return before the memset is complete. The operation can

22608 optionally be associated to a stream by passing a non-zero `stream`

22609 argument. If `stream` is non-zero, the operation may overlap with

22610 operations in other streams.

22611

22612 The device version of this function only handles device to device

22613 copies and cannot be given local or shared pointers.

22614

22615 Parameters

22616 ----------

22617 devPtr : Any

22618 Pointer to device memory

22619 value : int

22620 Value to set for each byte of specified memory

22621 count : size_t

22622 Size in bytes to set

22623 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22624 Stream identifier

22625

22626 Returns

22627 -------

22628 cudaError_t

22629 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

22630

22631 See Also

22632 --------

22633 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32Async`

22634 """

22635 cdef cyruntime.cudaStream_t cystream

22636 if stream is None:

22637 pstream = 0

22638 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22639 pstream = int(stream)

22640 else:

22641 pstream = int(cudaStream_t(stream))

22642 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22643 cydevPtr = _HelperInputVoidPtr(devPtr)

22644 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

22645 with nogil:

22646 err = cyruntime.cudaMemsetAsync(cydevPtr_ptr, value, count, cystream)

22647 return (_dict_cudaError_t[err],)

22648

22649@cython.embedsignature(True)

22650def cudaMemset2DAsync(devPtr, size_t pitch, int value, size_t width, size_t height, stream):

22651 """ Initializes or sets device memory to a value.

22652

22653 Sets to the specified value `value` a matrix (`height` rows of `width`

22654 bytes each) pointed to by `dstPtr`. `pitch` is the width in bytes of

22655 the 2D array pointed to by `dstPtr`, including any padding added to the

22656 end of each row. This function performs fastest when the pitch is one

22657 that has been passed back by :py:obj:`~.cudaMallocPitch()`.

22658

22659 :py:obj:`~.cudaMemset2DAsync()` is asynchronous with respect to the

22660 host, so the call may return before the memset is complete. The

22661 operation can optionally be associated to a stream by passing a non-

22662 zero `stream` argument. If `stream` is non-zero, the operation may

22663 overlap with operations in other streams.

22664

22665 The device version of this function only handles device to device

22666 copies and cannot be given local or shared pointers.

22667

22668 Parameters

22669 ----------

22670 devPtr : Any

22671 Pointer to 2D device memory

22672 pitch : size_t

22673 Pitch in bytes of 2D device memory(Unused if `height` is 1)

22674 value : int

22675 Value to set for each byte of specified memory

22676 width : size_t

22677 Width of matrix set (columns in bytes)

22678 height : size_t

22679 Height of matrix set (rows)

22680 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22681 Stream identifier

22682

22683 Returns

22684 -------

22685 cudaError_t

22686 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

22687

22688 See Also

22689 --------

22690 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32Async`

22691 """

22692 cdef cyruntime.cudaStream_t cystream

22693 if stream is None:

22694 pstream = 0

22695 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22696 pstream = int(stream)

22697 else:

22698 pstream = int(cudaStream_t(stream))

22699 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22700 cydevPtr = _HelperInputVoidPtr(devPtr)

22701 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

22702 with nogil:

22703 err = cyruntime.cudaMemset2DAsync(cydevPtr_ptr, pitch, value, width, height, cystream)

22704 return (_dict_cudaError_t[err],)

22705

22706@cython.embedsignature(True)

22707def cudaMemset3DAsync(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not None : cudaExtent, stream):

22708 """ Initializes or sets device memory to a value.

22709

22710 Initializes each element of a 3D array to the specified value `value`.

22711 The object to initialize is defined by `pitchedDevPtr`. The `pitch`

22712 field of `pitchedDevPtr` is the width in memory in bytes of the 3D

22713 array pointed to by `pitchedDevPtr`, including any padding added to the

22714 end of each row. The `xsize` field specifies the logical width of each

22715 row in bytes, while the `ysize` field specifies the height of each 2D

22716 slice in rows. The `pitch` field of `pitchedDevPtr` is ignored when

22717 `height` and `depth` are both equal to 1.

22718

22719 The extents of the initialized region are specified as a `width` in

22720 bytes, a `height` in rows, and a `depth` in slices.

22721

22722 Extents with `width` greater than or equal to the `xsize` of

22723 `pitchedDevPtr` may perform significantly faster than extents narrower

22724 than the `xsize`. Secondarily, extents with `height` equal to the

22725 `ysize` of `pitchedDevPtr` will perform faster than when the `height`

22726 is shorter than the `ysize`.

22727

22728 This function performs fastest when the `pitchedDevPtr` has been

22729 allocated by :py:obj:`~.cudaMalloc3D()`.

22730

22731 :py:obj:`~.cudaMemset3DAsync()` is asynchronous with respect to the

22732 host, so the call may return before the memset is complete. The

22733 operation can optionally be associated to a stream by passing a non-

22734 zero `stream` argument. If `stream` is non-zero, the operation may

22735 overlap with operations in other streams.

22736

22737 The device version of this function only handles device to device

22738 copies and cannot be given local or shared pointers.

22739

22740 Parameters

22741 ----------

22742 pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`

22743 Pointer to pitched device memory

22744 value : int

22745 Value to set for each byte of specified memory

22746 extent : :py:obj:`~.cudaExtent`

22747 Size parameters for where to set device memory (`width` field in

22748 bytes)

22749 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22750 Stream identifier

22751

22752 Returns

22753 -------

22754 cudaError_t

22755 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

22756

22757 See Also

22758 --------

22759 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`

22760 """

22761 cdef cyruntime.cudaStream_t cystream

22762 if stream is None:

22763 pstream = 0

22764 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22765 pstream = int(stream)

22766 else:

22767 pstream = int(cudaStream_t(stream))

22768 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22769 with nogil:

22770 err = cyruntime.cudaMemset3DAsync(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0], cystream)

22771 return (_dict_cudaError_t[err],)

22772

22773@cython.embedsignature(True)

22774def cudaMemPrefetchAsync(devPtr, size_t count, location not None : cudaMemLocation, unsigned int flags, stream):

22775 """ Prefetches memory to the specified destination location.

22776

22777 Prefetches memory to the specified destination location. `devPtr` is

22778 the base device pointer of the memory to be prefetched and `location`

22779 specifies the destination location. `count` specifies the number of

22780 bytes to copy. `stream` is the stream in which the operation is

22781 enqueued. The memory range must refer to managed memory allocated via

22782 :py:obj:`~.cudaMallocManaged` or declared via managed variables, or it

22783 may also refer to memory allocated from a managed memory pool, or it

22784 may also refer to system-allocated memory on systems with non-zero

22785 cudaDevAttrPageableMemoryAccess.

22786

22787 Specifying :py:obj:`~.cudaMemLocationTypeDevice` for

22788 :py:obj:`~.cudaMemLocation.type` will prefetch memory to GPU specified

22789 by device ordinal :py:obj:`~.cudaMemLocation.id` which must have non-

22790 zero value for the device attribute

22791 :py:obj:`~.concurrentManagedAccess`. Additionally, `stream` must be

22792 associated with a device that has a non-zero value for the device

22793 attribute :py:obj:`~.concurrentManagedAccess`. Specifying

22794 :py:obj:`~.cudaMemLocationTypeHost` as :py:obj:`~.cudaMemLocation.type`

22795 will prefetch data to host memory. Applications can request prefetching

22796 memory to a specific host NUMA node by specifying

22797 :py:obj:`~.cudaMemLocationTypeHostNuma` for

22798 :py:obj:`~.cudaMemLocation.type` and a valid host NUMA node id in

22799 :py:obj:`~.cudaMemLocation.id` Users can also request prefetching

22800 memory to the host NUMA node closest to the current thread's CPU by

22801 specifying :py:obj:`~.cudaMemLocationTypeHostNumaCurrent` for

22802 :py:obj:`~.cudaMemLocation.type`. Note when

22803 :py:obj:`~.cudaMemLocation.type` is etiher

22804 :py:obj:`~.cudaMemLocationTypeHost` OR

22805 :py:obj:`~.cudaMemLocationTypeHostNumaCurrent`,

22806 :py:obj:`~.cudaMemLocation.id` will be ignored.

22807

22808 The start address and end address of the memory range will be rounded

22809 down and rounded up respectively to be aligned to CPU page size before

22810 the prefetch operation is enqueued in the stream.

22811

22812 If no physical memory has been allocated for this region, then this

22813 memory region will be populated and mapped on the destination device.

22814 If there's insufficient memory to prefetch the desired region, the

22815 Unified Memory driver may evict pages from other

22816 :py:obj:`~.cudaMallocManaged` allocations to host memory in order to

22817 make room. Device memory allocated using :py:obj:`~.cudaMalloc` or

22818 :py:obj:`~.cudaMallocArray` will not be evicted.

22819

22820 By default, any mappings to the previous location of the migrated pages

22821 are removed and mappings for the new location are only setup on the

22822 destination location. The exact behavior however also depends on the

22823 settings applied to this memory range via :py:obj:`~.cuMemAdvise` as

22824 described below:

22825

22826 If :py:obj:`~.cudaMemAdviseSetReadMostly` was set on any subset of this

22827 memory range, then that subset will create a read-only copy of the

22828 pages on destination location. If however the destination location is a

22829 host NUMA node, then any pages of that subset that are already in

22830 another host NUMA node will be transferred to the destination.

22831

22832 If :py:obj:`~.cudaMemAdviseSetPreferredLocation` was called on any

22833 subset of this memory range, then the pages will be migrated to

22834 `location` even if `location` is not the preferred location of any

22835 pages in the memory range.

22836

22837 If :py:obj:`~.cudaMemAdviseSetAccessedBy` was called on any subset of

22838 this memory range, then mappings to those pages from all the

22839 appropriate processors are updated to refer to the new location if

22840 establishing such a mapping is possible. Otherwise, those mappings are

22841 cleared.

22842

22843 Note that this API is not required for functionality and only serves to

22844 improve performance by allowing the application to migrate data to a

22845 suitable location before it is accessed. Memory accesses to this range

22846 are always coherent and are allowed even when the data is actively

22847 being migrated.

22848

22849 Note that this function is asynchronous with respect to the host and

22850 all work on other devices.

22851

22852 Parameters

22853 ----------

22854 devPtr : Any

22855 Pointer to be prefetched

22856 count : size_t

22857 Size in bytes

22858 location : :py:obj:`~.cudaMemLocation`

22859 location to prefetch to

22860 flags : unsigned int

22861 flags for future use, must be zero now.

22862 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22863 Stream to enqueue prefetch operation

22864

22865 Returns

22866 -------

22867 cudaError_t

22868 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

22869

22870 See Also

22871 --------

22872 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemPrefetchAsync`

22873 """

22874 cdef cyruntime.cudaStream_t cystream

22875 if stream is None:

22876 pstream = 0

22877 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22878 pstream = int(stream)

22879 else:

22880 pstream = int(cudaStream_t(stream))

22881 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22882 cydevPtr = _HelperInputVoidPtr(devPtr)

22883 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

22884 with nogil:

22885 err = cyruntime.cudaMemPrefetchAsync(cydevPtr_ptr, count, location._pvt_ptr[0], flags, cystream)

22886 return (_dict_cudaError_t[err],)

22887

22888@cython.embedsignature(True)

22889def cudaMemPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, prefetchLocs : Optional[tuple[cudaMemLocation] | list[cudaMemLocation]], prefetchLocIdxs : tuple[int] | list[int], size_t numPrefetchLocs, unsigned long long flags, stream):

22890 """ Performs a batch of memory prefetches asynchronously.

22891

22892 Performs a batch of memory prefetches. The batch as a whole executes in

22893 stream order but operations within a batch are not guaranteed to

22894 execute in any specific order. All devices in the system must have a

22895 non-zero value for the device attribute

22896 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` otherwise the API will

22897 return an error.

22898

22899 The semantics of the individual prefetch operations are as described in

22900 :py:obj:`~.cudaMemPrefetchAsync`.

22901

22902 Performs memory prefetch on address ranges specified in `dptrs` and

22903 `sizes`. Both arrays must be of the same length as specified by

22904 `count`. Each memory range specified must refer to managed memory

22905 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed

22906 variables or it may also refer to system-allocated memory when all

22907 devices have a non-zero value for

22908 :py:obj:`~.cudaDevAttrPageableMemoryAccess`. The prefetch location for

22909 every operation in the batch is specified in the `prefetchLocs` array.

22910 Each entry in this array can apply to more than one operation. This can

22911 be done by specifying in the `prefetchLocIdxs` array, the index of the

22912 first prefetch operation that the corresponding entry in the

22913 `prefetchLocs` array applies to. Both `prefetchLocs` and

22914 `prefetchLocIdxs` must be of the same length as specified by

22915 `numPrefetchLocs`. For example, if a batch has 10 prefetches listed in

22916 dptrs/sizes, the first 4 of which are to be prefetched to one location

22917 and the remaining 6 are to be prefetched to another, then

22918 `numPrefetchLocs` will be 2, `prefetchLocIdxs` will be {0, 4} and

22919 `prefetchLocs` will contain the two locations. Note the first entry in

22920 `prefetchLocIdxs` must always be 0. Also, each entry must be greater

22921 than the previous entry and the last entry should be less than `count`.

22922 Furthermore, `numPrefetchLocs` must be lesser than or equal to `count`.

22923

22924 Parameters

22925 ----------

22926 dptrs : list[Any]

22927 Array of pointers to be prefetched

22928 sizes : list[int]

22929 Array of sizes for memory prefetch operations.

22930 count : size_t

22931 Size of `dptrs` and `sizes` arrays.

22932 prefetchLocs : list[:py:obj:`~.cudaMemLocation`]

22933 Array of locations to prefetch to.

22934 prefetchLocIdxs : list[int]

22935 Array of indices to specify which operands each entry in the

22936 `prefetchLocs` array applies to. The locations specified in

22937 prefetchLocs[k] will be applied to copies starting from

22938 prefetchLocIdxs[k] through prefetchLocIdxs[k+1] - 1. Also

22939 prefetchLocs[numPrefetchLocs - 1] will apply to prefetches starting

22940 from prefetchLocIdxs[numPrefetchLocs - 1] through count - 1.

22941 numPrefetchLocs : size_t

22942 Size of `prefetchLocs` and `prefetchLocIdxs` arrays.

22943 flags : unsigned long long

22944 Flags reserved for future use. Must be zero.

22945 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

22946 The stream to enqueue the operations in. Must not be legacy NULL

22947 stream.

22948

22949 Returns

22950 -------

22951 cudaError_t

22952

22953 """

22954 cdef cyruntime.cudaStream_t cystream

22955 if stream is None:

22956 pstream = 0

22957 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

22958 pstream = int(stream)

22959 else:

22960 pstream = int(cudaStream_t(stream))

22961 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

22962 if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs):

22963 raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]")

22964 prefetchLocs = [] if prefetchLocs is None else prefetchLocs

22965 if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs):

22966 raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]")

22967 if not all(isinstance(_x, (int)) for _x in sizes):

22968 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")

22969 dptrs = [] if dptrs is None else dptrs

22970 pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs]

22971 cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist)

22972 cdef void** cydptrs_ptr = <void**><void_ptr>voidStarHelperdptrs.cptr

22973 cdef vector[size_t] cysizes = sizes

22974 if count > <size_t>len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count))

22975 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))

22976 cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL

22977 if len(prefetchLocs) > 1:

22978 cyprefetchLocs = <cyruntime.cudaMemLocation*> calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation))

22979 if cyprefetchLocs is NULL:

22980 raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation)))

22981 for idx in range(len(prefetchLocs)):

22982 string.memcpy(&cyprefetchLocs[idx], (<cudaMemLocation>prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation))

22983 elif len(prefetchLocs) == 1:

22984 cyprefetchLocs = (<cudaMemLocation>prefetchLocs[0])._pvt_ptr

22985 cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs

22986 if numPrefetchLocs > <size_t>len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs))

22987 if numPrefetchLocs > <size_t>len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs))

22988 with nogil:

22989 err = cyruntime.cudaMemPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream)

22990 if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL:

22991 free(cyprefetchLocs)

22992 return (_dict_cudaError_t[err],)

22993

22994@cython.embedsignature(True)

22995def cudaMemDiscardBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, unsigned long long flags, stream):

22996 """ Performs a batch of memory discards asynchronously.

22997

22998 Performs a batch of memory discards. The batch as a whole executes in

22999 stream order but operations within a batch are not guaranteed to

23000 execute in any specific order. All devices in the system must have a

23001 non-zero value for the device attribute

23002 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` otherwise the API will

23003 return an error.

23004

23005 Discarding a memory range informs the driver that the contents of that

23006 range are no longer useful. Discarding memory ranges allows the driver

23007 to optimize certain data migrations and can also help reduce memory

23008 pressure. This operation can be undone on any part of the range by

23009 either writing to it or prefetching it via

23010 :py:obj:`~.cudaMemPrefetchAsync` or

23011 :py:obj:`~.cudaMemPrefetchBatchAsync`. Reading from a discarded range,

23012 without a subsequent write or prefetch to that part of the range, will

23013 return an indeterminate value. Note that any reads, writes or

23014 prefetches to any part of the memory range that occur simultaneously

23015 with the discard operation result in undefined behavior.

23016

23017 Performs memory discard on address ranges specified in `dptrs` and

23018 `sizes`. Both arrays must be of the same length as specified by

23019 `count`. Each memory range specified must refer to managed memory

23020 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed

23021 variables or it may also refer to system-allocated memory when all

23022 devices have a non-zero value for

23023 :py:obj:`~.cudaDevAttrPageableMemoryAccess`.

23024

23025 Parameters

23026 ----------

23027 dptrs : list[Any]

23028 Array of pointers to be discarded

23029 sizes : list[int]

23030 Array of sizes for memory discard operations.

23031 count : size_t

23032 Size of `dptrs` and `sizes` arrays.

23033 flags : unsigned long long

23034 Flags reserved for future use. Must be zero.

23035 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

23036 The stream to enqueue the operations in. Must not be legacy NULL

23037 stream.

23038

23039 Returns

23040 -------

23041 cudaError_t

23042

23043 """

23044 cdef cyruntime.cudaStream_t cystream

23045 if stream is None:

23046 pstream = 0

23047 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

23048 pstream = int(stream)

23049 else:

23050 pstream = int(cudaStream_t(stream))

23051 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

23052 if not all(isinstance(_x, (int)) for _x in sizes):

23053 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")

23054 dptrs = [] if dptrs is None else dptrs

23055 pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs]

23056 cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist)

23057 cdef void** cydptrs_ptr = <void**><void_ptr>voidStarHelperdptrs.cptr

23058 cdef vector[size_t] cysizes = sizes

23059 if count > <size_t>len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count))

23060 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))

23061 with nogil:

23062 err = cyruntime.cudaMemDiscardBatchAsync(cydptrs_ptr, cysizes.data(), count, flags, cystream)

23063 return (_dict_cudaError_t[err],)

23064

23065@cython.embedsignature(True)

23066def cudaMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, prefetchLocs : Optional[tuple[cudaMemLocation] | list[cudaMemLocation]], prefetchLocIdxs : tuple[int] | list[int], size_t numPrefetchLocs, unsigned long long flags, stream):

23067 """ Performs a batch of memory discards and prefetches asynchronously.

23068

23069 Performs a batch of memory discards followed by prefetches. The batch

23070 as a whole executes in stream order but operations within a batch are

23071 not guaranteed to execute in any specific order. All devices in the

23072 system must have a non-zero value for the device attribute

23073 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` otherwise the API will

23074 return an error.

23075

23076 Calling :py:obj:`~.cudaMemDiscardAndPrefetchBatchAsync` is semantically

23077 equivalent to calling :py:obj:`~.cudaMemDiscardBatchAsync` followed by

23078 :py:obj:`~.cudaMemPrefetchBatchAsync`, but is more optimal. For more

23079 details on what discarding and prefetching imply, please refer to

23080 :py:obj:`~.cudaMemDiscardBatchAsync` and

23081 :py:obj:`~.cudaMemPrefetchBatchAsync` respectively. Note that any

23082 reads, writes or prefetches to any part of the memory range that occur

23083 simultaneously with this combined discard+prefetch operation result in

23084 undefined behavior.

23085

23086 Performs memory discard and prefetch on address ranges specified in

23087 `dptrs` and `sizes`. Both arrays must be of the same length as

23088 specified by `count`. Each memory range specified must refer to managed

23089 memory allocated via :py:obj:`~.cudaMallocManaged` or declared via

23090 managed variables or it may also refer to system-allocated memory when

23091 all devices have a non-zero value for

23092 :py:obj:`~.cudaDevAttrPageableMemoryAccess`. Every operation in the

23093 batch has to be associated with a valid location to prefetch the

23094 address range to and specified in the `prefetchLocs` array. Each entry

23095 in this array can apply to more than one operation. This can be done by

23096 specifying in the `prefetchLocIdxs` array, the index of the first

23097 operation that the corresponding entry in the `prefetchLocs` array

23098 applies to. Both `prefetchLocs` and `prefetchLocIdxs` must be of the

23099 same length as specified by `numPrefetchLocs`. For example, if a batch

23100 has 10 operations listed in dptrs/sizes, the first 6 of which are to be

23101 prefetched to one location and the remaining 4 are to be prefetched to

23102 another, then `numPrefetchLocs` will be 2, `prefetchLocIdxs` will be

23103 {0, 6} and `prefetchLocs` will contain the two set of locations. Note

23104 the first entry in `prefetchLocIdxs` must always be 0. Also, each entry

23105 must be greater than the previous entry and the last entry should be

23106 less than `count`. Furthermore, `numPrefetchLocs` must be lesser than

23107 or equal to `count`.

23108

23109 Parameters

23110 ----------

23111 dptrs : list[Any]

23112 Array of pointers to be discarded

23113 sizes : list[int]

23114 Array of sizes for memory discard operations.

23115 count : size_t

23116 Size of `dptrs` and `sizes` arrays.

23117 prefetchLocs : list[:py:obj:`~.cudaMemLocation`]

23118 Array of locations to prefetch to.

23119 prefetchLocIdxs : list[int]

23120 Array of indices to specify which operands each entry in the

23121 `prefetchLocs` array applies to. The locations specified in

23122 prefetchLocs[k] will be applied to operations starting from

23123 prefetchLocIdxs[k] through prefetchLocIdxs[k+1] - 1. Also

23124 prefetchLocs[numPrefetchLocs - 1] will apply to copies starting

23125 from prefetchLocIdxs[numPrefetchLocs - 1] through count - 1.

23126 numPrefetchLocs : size_t

23127 Size of `prefetchLocs` and `prefetchLocIdxs` arrays.

23128 flags : unsigned long long

23129 Flags reserved for future use. Must be zero.

23130 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

23131 The stream to enqueue the operations in. Must not be legacy NULL

23132 stream.

23133

23134 Returns

23135 -------

23136 cudaError_t

23137

23138 """

23139 cdef cyruntime.cudaStream_t cystream

23140 if stream is None:

23141 pstream = 0

23142 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

23143 pstream = int(stream)

23144 else:

23145 pstream = int(cudaStream_t(stream))

23146 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

23147 if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs):

23148 raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]")

23149 prefetchLocs = [] if prefetchLocs is None else prefetchLocs

23150 if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs):

23151 raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]")

23152 if not all(isinstance(_x, (int)) for _x in sizes):

23153 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")

23154 dptrs = [] if dptrs is None else dptrs

23155 pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs]

23156 cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist)

23157 cdef void** cydptrs_ptr = <void**><void_ptr>voidStarHelperdptrs.cptr

23158 cdef vector[size_t] cysizes = sizes

23159 if count > <size_t>len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count))

23160 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))

23161 cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL

23162 if len(prefetchLocs) > 1:

23163 cyprefetchLocs = <cyruntime.cudaMemLocation*> calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation))

23164 if cyprefetchLocs is NULL:

23165 raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation)))

23166 for idx in range(len(prefetchLocs)):

23167 string.memcpy(&cyprefetchLocs[idx], (<cudaMemLocation>prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation))

23168 elif len(prefetchLocs) == 1:

23169 cyprefetchLocs = (<cudaMemLocation>prefetchLocs[0])._pvt_ptr

23170 cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs

23171 if numPrefetchLocs > <size_t>len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs))

23172 if numPrefetchLocs > <size_t>len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs))

23173 with nogil:

23174 err = cyruntime.cudaMemDiscardAndPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream)

23175 if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL:

23176 free(cyprefetchLocs)

23177 return (_dict_cudaError_t[err],)

23178

23179@cython.embedsignature(True)

23180def cudaMemAdvise(devPtr, size_t count, advice not None : cudaMemoryAdvise, location not None : cudaMemLocation):

23181 """ Advise about the usage of a given memory range.

23182

23183 Advise the Unified Memory subsystem about the usage pattern for the

23184 memory range starting at `devPtr` with a size of `count` bytes. The

23185 start address and end address of the memory range will be rounded down

23186 and rounded up respectively to be aligned to CPU page size before the

23187 advice is applied. The memory range must refer to managed memory

23188 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed

23189 variables. The memory range could also refer to system-allocated

23190 pageable memory provided it represents a valid, host-accessible region

23191 of memory and all additional constraints imposed by `advice` as

23192 outlined below are also satisfied. Specifying an invalid system-

23193 allocated pageable memory range results in an error being returned.

23194

23195 The `advice` parameter can take the following values:

23196

23197 - :py:obj:`~.cudaMemAdviseSetReadMostly`: This implies that the data is

23198 mostly going to be read from and only occasionally written to. Any

23199 read accesses from any processor to this region will create a read-

23200 only copy of at least the accessed pages in that processor's memory.

23201 Additionally, if :py:obj:`~.cudaMemPrefetchAsync` or

23202 :py:obj:`~.cudaMemPrefetchAsync` is called on this region, it will

23203 create a read-only copy of the data on the destination processor. If

23204 the target location for :py:obj:`~.cudaMemPrefetchAsync` is a host

23205 NUMA node and a read-only copy already exists on another host NUMA

23206 node, that copy will be migrated to the targeted host NUMA node. If

23207 any processor writes to this region, all copies of the corresponding

23208 page will be invalidated except for the one where the write occurred.

23209 If the writing processor is the CPU and the preferred location of the

23210 page is a host NUMA node, then the page will also be migrated to that

23211 host NUMA node. The `location` argument is ignored for this advice.

23212 Note that for a page to be read-duplicated, the accessing processor

23213 must either be the CPU or a GPU that has a non-zero value for the

23214 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.

23215 Also, if a context is created on a device that does not have the

23216 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess` set,

23217 then read-duplication will not occur until all such contexts are

23218 destroyed. If the memory region refers to valid system-allocated

23219 pageable memory, then the accessing device must have a non-zero value

23220 for the device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`

23221 for a read-only copy to be created on that device. Note however that

23222 if the accessing device also has a non-zero value for the device

23223 attribute

23224 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then

23225 setting this advice will not create a read-only copy when that device

23226 accesses this memory region.

23227

23228 - :py:obj:`~.cudaMemAdviceUnsetReadMostly`: Undoes the effect of

23229 :py:obj:`~.cudaMemAdviseSetReadMostly` and also prevents the Unified

23230 Memory driver from attempting heuristic read-duplication on the

23231 memory range. Any read-duplicated copies of the data will be

23232 collapsed into a single copy. The location for the collapsed copy

23233 will be the preferred location if the page has a preferred location

23234 and one of the read-duplicated copies was resident at that location.

23235 Otherwise, the location chosen is arbitrary. Note: The `location`

23236 argument is ignored for this advice.

23237

23238 - :py:obj:`~.cudaMemAdviseSetPreferredLocation`: This advice sets the

23239 preferred location for the data to be the memory belonging to

23240 `location`. When :py:obj:`~.cudaMemLocation.type` is

23241 :py:obj:`~.cudaMemLocationTypeHost`, :py:obj:`~.cudaMemLocation.id`

23242 is ignored and the preferred location is set to be host memory. To

23243 set the preferred location to a specific host NUMA node, applications

23244 must set :py:obj:`~.cudaMemLocation.type` to

23245 :py:obj:`~.cudaMemLocationTypeHostNuma` and

23246 :py:obj:`~.cudaMemLocation.id` must specify the NUMA ID of the host

23247 NUMA node. If :py:obj:`~.cudaMemLocation.type` is set to

23248 :py:obj:`~.cudaMemLocationTypeHostNumaCurrent`,

23249 :py:obj:`~.cudaMemLocation.id` will be ignored and the host NUMA node

23250 closest to the calling thread's CPU will be used as the preferred

23251 location. If :py:obj:`~.cudaMemLocation.type` is a

23252 :py:obj:`~.cudaMemLocationTypeDevice`, then

23253 :py:obj:`~.cudaMemLocation.id` must be a valid device ordinal and the

23254 device must have a non-zero value for the device attribute

23255 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Setting the preferred

23256 location does not cause data to migrate to that location immediately.

23257 Instead, it guides the migration policy when a fault occurs on that

23258 memory region. If the data is already in its preferred location and

23259 the faulting processor can establish a mapping without requiring the

23260 data to be migrated, then data migration will be avoided. On the

23261 other hand, if the data is not in its preferred location or if a

23262 direct mapping cannot be established, then it will be migrated to the

23263 processor accessing it. It is important to note that setting the

23264 preferred location does not prevent data prefetching done using

23265 :py:obj:`~.cudaMemPrefetchAsync`. Having a preferred location can

23266 override the page thrash detection and resolution logic in the

23267 Unified Memory driver. Normally, if a page is detected to be

23268 constantly thrashing between for example host and device memory, the

23269 page may eventually be pinned to host memory by the Unified Memory

23270 driver. But if the preferred location is set as device memory, then

23271 the page will continue to thrash indefinitely. If

23272 :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory

23273 region or any subset of it, then the policies associated with that

23274 advice will override the policies of this advice, unless read

23275 accesses from `location` will not result in a read-only copy being

23276 created on that procesor as outlined in description for the advice

23277 :py:obj:`~.cudaMemAdviseSetReadMostly`. If the memory region refers

23278 to valid system-allocated pageable memory, and

23279 :py:obj:`~.cudaMemLocation.type` is

23280 :py:obj:`~.cudaMemLocationTypeDevice` then

23281 :py:obj:`~.cudaMemLocation.id` must be a valid device that has a non-

23282 zero alue for the device attribute

23283 :py:obj:`~.cudaDevAttrPageableMemoryAccess`.

23284

23285 - :py:obj:`~.cudaMemAdviseUnsetPreferredLocation`: Undoes the effect of

23286 :py:obj:`~.cudaMemAdviseSetPreferredLocation` and changes the

23287 preferred location to none. The `location` argument is ignored for

23288 this advice.

23289

23290 - :py:obj:`~.cudaMemAdviseSetAccessedBy`: This advice implies that the

23291 data will be accessed by processor `location`. The

23292 :py:obj:`~.cudaMemLocation.type` must be either

23293 :py:obj:`~.cudaMemLocationTypeDevice` with

23294 :py:obj:`~.cudaMemLocation.id` representing a valid device ordinal or

23295 :py:obj:`~.cudaMemLocationTypeHost` and

23296 :py:obj:`~.cudaMemLocation.id` will be ignored. All other location

23297 types are invalid. If :py:obj:`~.cudaMemLocation.id` is a GPU, then

23298 the device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`

23299 must be non-zero. This advice does not cause data migration and has

23300 no impact on the location of the data per se. Instead, it causes the

23301 data to always be mapped in the specified processor's page tables, as

23302 long as the location of the data permits a mapping to be established.

23303 If the data gets migrated for any reason, the mappings are updated

23304 accordingly. This advice is recommended in scenarios where data

23305 locality is not important, but avoiding faults is. Consider for

23306 example a system containing multiple GPUs with peer-to-peer access

23307 enabled, where the data located on one GPU is occasionally accessed

23308 by peer GPUs. In such scenarios, migrating data over to the other

23309 GPUs is not as important because the accesses are infrequent and the

23310 overhead of migration may be too high. But preventing faults can

23311 still help improve performance, and so having a mapping set up in

23312 advance is useful. Note that on CPU access of this data, the data may

23313 be migrated to host memory because the CPU typically cannot access

23314 device memory directly. Any GPU that had the

23315 :py:obj:`~.cudaMemAdviseSetAccessedBy` flag set for this data will

23316 now have its mapping updated to point to the page in host memory. If

23317 :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory

23318 region or any subset of it, then the policies associated with that

23319 advice will override the policies of this advice. Additionally, if

23320 the preferred location of this memory region or any subset of it is

23321 also `location`, then the policies associated with

23322 :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` will override the

23323 policies of this advice. If the memory region refers to valid system-

23324 allocated pageable memory, and :py:obj:`~.cudaMemLocation.type` is

23325 :py:obj:`~.cudaMemLocationTypeDevice` then device in

23326 :py:obj:`~.cudaMemLocation.id` must have a non-zero value for the

23327 device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.

23328 Additionally, if :py:obj:`~.cudaMemLocation.id` has a non-zero value

23329 for the device attribute

23330 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then

23331 this call has no effect.

23332

23333 - :py:obj:`~.CU_MEM_ADVISE_UNSET_ACCESSED_BY`: Undoes the effect of

23334 :py:obj:`~.cudaMemAdviseSetAccessedBy`. Any mappings to the data from

23335 `location` may be removed at any time causing accesses to result in

23336 non-fatal page faults. If the memory region refers to valid system-

23337 allocated pageable memory, and :py:obj:`~.cudaMemLocation.type` is

23338 :py:obj:`~.cudaMemLocationTypeDevice` then device in

23339 :py:obj:`~.cudaMemLocation.id` must have a non-zero value for the

23340 device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.

23341 Additionally, if :py:obj:`~.cudaMemLocation.id` has a non-zero value

23342 for the device attribute

23343 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then

23344 this call has no effect.

23345

23346 Parameters

23347 ----------

23348 devPtr : Any

23349 Pointer to memory to set the advice for

23350 count : size_t

23351 Size in bytes of the memory range

23352 advice : :py:obj:`~.cudaMemoryAdvise`

23353 Advice to be applied for the specified memory range

23354 location : :py:obj:`~.cudaMemLocation`

23355 location to apply the advice for

23356

23357 Returns

23358 -------

23359 cudaError_t

23360 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

23361

23362 See Also

23363 --------

23364 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemAdvise`

23365 """

23366 cydevPtr = _HelperInputVoidPtr(devPtr)

23367 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

23368 cdef cyruntime.cudaMemoryAdvise cyadvice = advice.value

23369 with nogil:

23370 err = cyruntime.cudaMemAdvise(cydevPtr_ptr, count, cyadvice, location._pvt_ptr[0])

23371 return (_dict_cudaError_t[err],)

23372

23373@cython.embedsignature(True)

23374def cudaMemRangeGetAttribute(size_t dataSize, attribute not None : cudaMemRangeAttribute, devPtr, size_t count):

23375 """ Query an attribute of a given memory range.

23376

23377 Query an attribute about the memory range starting at `devPtr` with a

23378 size of `count` bytes. The memory range must refer to managed memory

23379 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed

23380 variables.

23381

23382 The `attribute` parameter can take the following values:

23383

23384 - :py:obj:`~.cudaMemRangeAttributeReadMostly`: If this attribute is

23385 specified, `data` will be interpreted as a 32-bit integer, and

23386 `dataSize` must be 4. The result returned will be 1 if all pages in

23387 the given memory range have read-duplication enabled, or 0 otherwise.

23388

23389 - :py:obj:`~.cudaMemRangeAttributePreferredLocation`: If this attribute

23390 is specified, `data` will be interpreted as a 32-bit integer, and

23391 `dataSize` must be 4. The result returned will be a GPU device id if

23392 all pages in the memory range have that GPU as their preferred

23393 location, or it will be cudaCpuDeviceId if all pages in the memory

23394 range have the CPU as their preferred location, or it will be

23395 cudaInvalidDeviceId if either all the pages don't have the same

23396 preferred location or some of the pages don't have a preferred

23397 location at all. Note that the actual location of the pages in the

23398 memory range at the time of the query may be different from the

23399 preferred location.

23400

23401 - :py:obj:`~.cudaMemRangeAttributeAccessedBy`: If this attribute is

23402 specified, `data` will be interpreted as an array of 32-bit integers,

23403 and `dataSize` must be a non-zero multiple of 4. The result returned

23404 will be a list of device ids that had

23405 :py:obj:`~.cudaMemAdviceSetAccessedBy` set for that entire memory

23406 range. If any device does not have that advice set for the entire

23407 memory range, that device will not be included. If `data` is larger

23408 than the number of devices that have that advice set for that memory

23409 range, cudaInvalidDeviceId will be returned in all the extra space

23410 provided. For ex., if `dataSize` is 12 (i.e. `data` has 3 elements)

23411 and only device 0 has the advice set, then the result returned will

23412 be { 0, cudaInvalidDeviceId, cudaInvalidDeviceId }. If `data` is

23413 smaller than the number of devices that have that advice set, then

23414 only as many devices will be returned as can fit in the array. There

23415 is no guarantee on which specific devices will be returned, however.

23416

23417 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocation`: If this

23418 attribute is specified, `data` will be interpreted as a 32-bit

23419 integer, and `dataSize` must be 4. The result returned will be the

23420 last location to which all pages in the memory range were prefetched

23421 explicitly via :py:obj:`~.cudaMemPrefetchAsync`. This will either be

23422 a GPU id or cudaCpuDeviceId depending on whether the last location

23423 for prefetch was a GPU or the CPU respectively. If any page in the

23424 memory range was never explicitly prefetched or if all pages were not

23425 prefetched to the same location, cudaInvalidDeviceId will be

23426 returned. Note that this simply returns the last location that the

23427 applicaton requested to prefetch the memory range to. It gives no

23428 indication as to whether the prefetch operation to that location has

23429 completed or even begun.

23430

23431 - :py:obj:`~.cudaMemRangeAttributePreferredLocationType`: If this

23432 attribute is specified, `data` will be interpreted as a

23433 :py:obj:`~.cudaMemLocationType`, and `dataSize` must be

23434 sizeof(cudaMemLocationType). The :py:obj:`~.cudaMemLocationType`

23435 returned will be :py:obj:`~.cudaMemLocationTypeDevice` if all pages

23436 in the memory range have the same GPU as their preferred location, or

23437 :py:obj:`~.cudaMemLocationType` will be

23438 :py:obj:`~.cudaMemLocationTypeHost` if all pages in the memory range

23439 have the CPU as their preferred location, or or it will be

23440 :py:obj:`~.cudaMemLocationTypeHostNuma` if all the pages in the

23441 memory range have the same host NUMA node ID as their preferred

23442 location or it will be :py:obj:`~.cudaMemLocationTypeInvalid` if

23443 either all the pages don't have the same preferred location or some

23444 of the pages don't have a preferred location at all. Note that the

23445 actual location type of the pages in the memory range at the time of

23446 the query may be different from the preferred location type.

23447

23448 - :py:obj:`~.cudaMemRangeAttributePreferredLocationId`: If this

23449 attribute is specified, `data` will be interpreted as a 32-bit

23450 integer, and `dataSize` must be 4. If the

23451 :py:obj:`~.cudaMemRangeAttributePreferredLocationType` query for

23452 the same address range returns

23453 :py:obj:`~.cudaMemLocationTypeDevice`, it will be a valid device

23454 ordinal or if it returns :py:obj:`~.cudaMemLocationTypeHostNuma`,

23455 it will be a valid host NUMA node ID or if it returns any other

23456 location type, the id should be ignored.

23457

23458 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationType`: If this

23459 attribute is specified, `data` will be interpreted as a

23460 :py:obj:`~.cudaMemLocationType`, and `dataSize` must be

23461 sizeof(cudaMemLocationType). The result returned will be the last

23462 location type to which all pages in the memory range were prefetched

23463 explicitly via :py:obj:`~.cuMemPrefetchAsync`. The

23464 :py:obj:`~.cudaMemLocationType` returned will be

23465 :py:obj:`~.cudaMemLocationTypeDevice` if the last prefetch location

23466 was the GPU or :py:obj:`~.cudaMemLocationTypeHost` if it was the CPU

23467 or :py:obj:`~.cudaMemLocationTypeHostNuma` if the last prefetch

23468 location was a specific host NUMA node. If any page in the memory

23469 range was never explicitly prefetched or if all pages were not

23470 prefetched to the same location, :py:obj:`~.CUmemLocationType` will

23471 be :py:obj:`~.cudaMemLocationTypeInvalid`. Note that this simply

23472 returns the last location type that the application requested to

23473 prefetch the memory range to. It gives no indication as to whether

23474 the prefetch operation to that location has completed or even begun.

23475

23476 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationId`: If this

23477 attribute is specified, `data` will be interpreted as a 32-bit

23478 integer, and `dataSize` must be 4. If the

23479 :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationType` query for

23480 the same address range returns

23481 :py:obj:`~.cudaMemLocationTypeDevice`, it will be a valid device

23482 ordinal or if it returns :py:obj:`~.cudaMemLocationTypeHostNuma`,

23483 it will be a valid host NUMA node ID or if it returns any other

23484 location type, the id should be ignored.

23485

23486 Parameters

23487 ----------

23488 dataSize : size_t

23489 Array containing the size of data

23490 attribute : :py:obj:`~.cudaMemRangeAttribute`

23491 The attribute to query

23492 devPtr : Any

23493 Start of the range to query

23494 count : size_t

23495 Size of the range to query

23496

23497 Returns

23498 -------

23499 cudaError_t

23500 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

23501 data : Any

23502 A pointers to a memory location where the result of each attribute

23503 query will be written to.

23504

23505 See Also

23506 --------

23507 :py:obj:`~.cudaMemRangeGetAttributes`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemRangeGetAttribute`

23508 """

23509 cdef _HelperCUmem_range_attribute cydata = _HelperCUmem_range_attribute(attribute, dataSize)

23510 cdef void* cydata_ptr = <void*><void_ptr>cydata.cptr

23511 cdef cyruntime.cudaMemRangeAttribute cyattribute = attribute.value

23512 cydevPtr = _HelperInputVoidPtr(devPtr)

23513 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

23514 with nogil:

23515 err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr_ptr, count)

23516 if err != cyruntime.cudaSuccess:

23517 return (_dict_cudaError_t[err], None)

23518 return (_dict_cudaError_t[err], cydata.pyObj())

23519

23520@cython.embedsignature(True)

23521def cudaMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : Optional[tuple[cudaMemRangeAttribute] | list[cudaMemRangeAttribute]], size_t numAttributes, devPtr, size_t count):

23522 """ Query attributes of a given memory range.

23523

23524 Query attributes of the memory range starting at `devPtr` with a size

23525 of `count` bytes. The memory range must refer to managed memory

23526 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed

23527 variables. The `attributes` array will be interpreted to have

23528 `numAttributes` entries. The `dataSizes` array will also be interpreted

23529 to have `numAttributes` entries. The results of the query will be

23530 stored in `data`.

23531

23532 The list of supported attributes are given below. Please refer to

23533 :py:obj:`~.cudaMemRangeGetAttribute` for attribute descriptions and

23534 restrictions.

23535

23536 - :py:obj:`~.cudaMemRangeAttributeReadMostly`

23537

23538 - :py:obj:`~.cudaMemRangeAttributePreferredLocation`

23539

23540 - :py:obj:`~.cudaMemRangeAttributeAccessedBy`

23541

23542 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocation`

23543

23544 - :: cudaMemRangeAttributePreferredLocationType

23545

23546 - :: cudaMemRangeAttributePreferredLocationId

23547

23548 - :: cudaMemRangeAttributeLastPrefetchLocationType

23549

23550 - :: cudaMemRangeAttributeLastPrefetchLocationId

23551

23552 Parameters

23553 ----------

23554 dataSizes : list[int]

23555 Array containing the sizes of each result

23556 attributes : list[:py:obj:`~.cudaMemRangeAttribute`]

23557 An array of attributes to query (numAttributes and the number of

23558 attributes in this array should match)

23559 numAttributes : size_t

23560 Number of attributes to query

23561 devPtr : Any

23562 Start of the range to query

23563 count : size_t

23564 Size of the range to query

23565

23566 Returns

23567 -------

23568 cudaError_t

23569 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

23570 data : list[Any]

23571 A two-dimensional array containing pointers to memory locations

23572 where the result of each attribute query will be written to.

23573

23574 See Also

23575 --------

23576 :py:obj:`~.cudaMemRangeGetAttribute`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemRangeGetAttributes`

23577 """

23578 attributes = [] if attributes is None else attributes

23579 if not all(isinstance(_x, (cudaMemRangeAttribute)) for _x in attributes):

23580 raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cyruntime.cudaMemRangeAttribute] or list[cyruntime.cudaMemRangeAttribute]")

23581 if not all(isinstance(_x, (int)) for _x in dataSizes):

23582 raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]")

23583 pylist = [_HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)]

23584 cdef _InputVoidPtrPtrHelper voidStarHelperdata = _InputVoidPtrPtrHelper(pylist)

23585 cdef void** cyvoidStarHelper_ptr = <void**><void_ptr>voidStarHelperdata.cptr

23586 cdef vector[size_t] cydataSizes = dataSizes

23587 cdef vector[cyruntime.cudaMemRangeAttribute] cyattributes = [pyattributes.value for pyattributes in (attributes)]

23588 if numAttributes > <size_t>len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes))

23589 if numAttributes > <size_t>len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes))

23590 cydevPtr = _HelperInputVoidPtr(devPtr)

23591 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

23592 with nogil:

23593 err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr_ptr, count)

23594 if err != cyruntime.cudaSuccess:

23595 return (_dict_cudaError_t[err], None)

23596 return (_dict_cudaError_t[err], [obj.pyObj() for obj in pylist])

23597

23598@cython.embedsignature(True)

23599def cudaMemcpyToArray(dst, size_t wOffset, size_t hOffset, src, size_t count, kind not None : cudaMemcpyKind):

23600 """ Copies data between host and device.

23601

23602 [Deprecated]

23603

23604 Copies `count` bytes from the memory area pointed to by `src` to the

23605 CUDA array `dst` starting at `hOffset` rows and `wOffset` bytes from

23606 the upper left corner, where `kind` specifies the direction of the

23607 copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

23608 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

23609 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

23610 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

23611 type of transfer is inferred from the pointer values. However,

23612 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

23613 unified virtual addressing.

23614

23615 Parameters

23616 ----------

23617 dst : :py:obj:`~.cudaArray_t`

23618 Destination memory address

23619 wOffset : size_t

23620 Destination starting X offset (columns in bytes)

23621 hOffset : size_t

23622 Destination starting Y offset (rows)

23623 src : Any

23624 Source memory address

23625 count : size_t

23626 Size in bytes to copy

23627 kind : :py:obj:`~.cudaMemcpyKind`

23628 Type of transfer

23629

23630 Returns

23631 -------

23632 cudaError_t

23633 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

23634

23635 See Also

23636 --------

23637 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyDtoA`

23638 """

23639 cdef cyruntime.cudaArray_t cydst

23640 if dst is None:

23641 pdst = 0

23642 elif isinstance(dst, (cudaArray_t,)):

23643 pdst = int(dst)

23644 else:

23645 pdst = int(cudaArray_t(dst))

23646 cydst = <cyruntime.cudaArray_t><void_ptr>pdst

23647 cysrc = _HelperInputVoidPtr(src)

23648 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

23649 cdef cyruntime.cudaMemcpyKind cykind = kind.value

23650 with nogil:

23651 err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc_ptr, count, cykind)

23652 return (_dict_cudaError_t[err],)

23653

23654@cython.embedsignature(True)

23655def cudaMemcpyFromArray(dst, src, size_t wOffset, size_t hOffset, size_t count, kind not None : cudaMemcpyKind):

23656 """ Copies data between host and device.

23657

23658 [Deprecated]

23659

23660 Copies `count` bytes from the CUDA array `src` starting at `hOffset`

23661 rows and `wOffset` bytes from the upper left corner to the memory area

23662 pointed to by `dst`, where `kind` specifies the direction of the copy,

23663 and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

23664 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

23665 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

23666 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

23667 type of transfer is inferred from the pointer values. However,

23668 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

23669 unified virtual addressing.

23670

23671 Parameters

23672 ----------

23673 dst : Any

23674 Destination memory address

23675 src : :py:obj:`~.cudaArray_const_t`

23676 Source memory address

23677 wOffset : size_t

23678 Source starting X offset (columns in bytes)

23679 hOffset : size_t

23680 Source starting Y offset (rows)

23681 count : size_t

23682 Size in bytes to copy

23683 kind : :py:obj:`~.cudaMemcpyKind`

23684 Type of transfer

23685

23686 Returns

23687 -------

23688 cudaError_t

23689 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

23690

23691 See Also

23692 --------

23693 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoD`

23694 """

23695 cdef cyruntime.cudaArray_const_t cysrc

23696 if src is None:

23697 psrc = 0

23698 elif isinstance(src, (cudaArray_const_t,)):

23699 psrc = int(src)

23700 else:

23701 psrc = int(cudaArray_const_t(src))

23702 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc

23703 cydst = _HelperInputVoidPtr(dst)

23704 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

23705 cdef cyruntime.cudaMemcpyKind cykind = kind.value

23706 with nogil:

23707 err = cyruntime.cudaMemcpyFromArray(cydst_ptr, cysrc, wOffset, hOffset, count, cykind)

23708 return (_dict_cudaError_t[err],)

23709

23710@cython.embedsignature(True)

23711def cudaMemcpyArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, kind not None : cudaMemcpyKind):

23712 """ Copies data between host and device.

23713

23714 [Deprecated]

23715

23716 Copies `count` bytes from the CUDA array `src` starting at `hOffsetSrc`

23717 rows and `wOffsetSrc` bytes from the upper left corner to the CUDA

23718 array `dst` starting at `hOffsetDst` rows and `wOffsetDst` bytes from

23719 the upper left corner, where `kind` specifies the direction of the

23720 copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

23721 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

23722 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

23723 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

23724 type of transfer is inferred from the pointer values. However,

23725 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

23726 unified virtual addressing.

23727

23728 Parameters

23729 ----------

23730 dst : :py:obj:`~.cudaArray_t`

23731 Destination memory address

23732 wOffsetDst : size_t

23733 Destination starting X offset (columns in bytes)

23734 hOffsetDst : size_t

23735 Destination starting Y offset (rows)

23736 src : :py:obj:`~.cudaArray_const_t`

23737 Source memory address

23738 wOffsetSrc : size_t

23739 Source starting X offset (columns in bytes)

23740 hOffsetSrc : size_t

23741 Source starting Y offset (rows)

23742 count : size_t

23743 Size in bytes to copy

23744 kind : :py:obj:`~.cudaMemcpyKind`

23745 Type of transfer

23746

23747 Returns

23748 -------

23749 cudaError_t

23750 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

23751

23752 See Also

23753 --------

23754 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoA`

23755 """

23756 cdef cyruntime.cudaArray_const_t cysrc

23757 if src is None:

23758 psrc = 0

23759 elif isinstance(src, (cudaArray_const_t,)):

23760 psrc = int(src)

23761 else:

23762 psrc = int(cudaArray_const_t(src))

23763 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc

23764 cdef cyruntime.cudaArray_t cydst

23765 if dst is None:

23766 pdst = 0

23767 elif isinstance(dst, (cudaArray_t,)):

23768 pdst = int(dst)

23769 else:

23770 pdst = int(cudaArray_t(dst))

23771 cydst = <cyruntime.cudaArray_t><void_ptr>pdst

23772 cdef cyruntime.cudaMemcpyKind cykind = kind.value

23773 with nogil:

23774 err = cyruntime.cudaMemcpyArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, count, cykind)

23775 return (_dict_cudaError_t[err],)

23776

23777@cython.embedsignature(True)

23778def cudaMemcpyToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t count, kind not None : cudaMemcpyKind, stream):

23779 """ Copies data between host and device.

23780

23781 [Deprecated]

23782

23783 Copies `count` bytes from the memory area pointed to by `src` to the

23784 CUDA array `dst` starting at `hOffset` rows and `wOffset` bytes from

23785 the upper left corner, where `kind` specifies the direction of the

23786 copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

23787 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

23788 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

23789 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

23790 type of transfer is inferred from the pointer values. However,

23791 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

23792 unified virtual addressing.

23793

23794 :py:obj:`~.cudaMemcpyToArrayAsync()` is asynchronous with respect to

23795 the host, so the call may return before the copy is complete. The copy

23796 can optionally be associated to a stream by passing a non-zero `stream`

23797 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

23798 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy

23799 may overlap with operations in other streams.

23800

23801 Parameters

23802 ----------

23803 dst : :py:obj:`~.cudaArray_t`

23804 Destination memory address

23805 wOffset : size_t

23806 Destination starting X offset (columns in bytes)

23807 hOffset : size_t

23808 Destination starting Y offset (rows)

23809 src : Any

23810 Source memory address

23811 count : size_t

23812 Size in bytes to copy

23813 kind : :py:obj:`~.cudaMemcpyKind`

23814 Type of transfer

23815 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

23816 Stream identifier

23817

23818 Returns

23819 -------

23820 cudaError_t

23821 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

23822

23823 See Also

23824 --------

23825 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpy2DAsync`

23826 """

23827 cdef cyruntime.cudaStream_t cystream

23828 if stream is None:

23829 pstream = 0

23830 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

23831 pstream = int(stream)

23832 else:

23833 pstream = int(cudaStream_t(stream))

23834 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

23835 cdef cyruntime.cudaArray_t cydst

23836 if dst is None:

23837 pdst = 0

23838 elif isinstance(dst, (cudaArray_t,)):

23839 pdst = int(dst)

23840 else:

23841 pdst = int(cudaArray_t(dst))

23842 cydst = <cyruntime.cudaArray_t><void_ptr>pdst

23843 cysrc = _HelperInputVoidPtr(src)

23844 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

23845 cdef cyruntime.cudaMemcpyKind cykind = kind.value

23846 with nogil:

23847 err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, count, cykind, cystream)

23848 return (_dict_cudaError_t[err],)

23849

23850@cython.embedsignature(True)

23851def cudaMemcpyFromArrayAsync(dst, src, size_t wOffset, size_t hOffset, size_t count, kind not None : cudaMemcpyKind, stream):

23852 """ Copies data between host and device.

23853

23854 [Deprecated]

23855

23856 Copies `count` bytes from the CUDA array `src` starting at `hOffset`

23857 rows and `wOffset` bytes from the upper left corner to the memory area

23858 pointed to by `dst`, where `kind` specifies the direction of the copy,

23859 and must be one of :py:obj:`~.cudaMemcpyHostToHost`,

23860 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,

23861 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

23862 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

23863 type of transfer is inferred from the pointer values. However,

23864 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

23865 unified virtual addressing.

23866

23867 :py:obj:`~.cudaMemcpyFromArrayAsync()` is asynchronous with respect to

23868 the host, so the call may return before the copy is complete. The copy

23869 can optionally be associated to a stream by passing a non-zero `stream`

23870 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or

23871 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy

23872 may overlap with operations in other streams.

23873

23874 Parameters

23875 ----------

23876 dst : Any

23877 Destination memory address

23878 src : :py:obj:`~.cudaArray_const_t`

23879 Source memory address

23880 wOffset : size_t

23881 Source starting X offset (columns in bytes)

23882 hOffset : size_t

23883 Source starting Y offset (rows)

23884 count : size_t

23885 Size in bytes to copy

23886 kind : :py:obj:`~.cudaMemcpyKind`

23887 Type of transfer

23888 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

23889 Stream identifier

23890

23891 Returns

23892 -------

23893 cudaError_t

23894 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`

23895

23896 See Also

23897 --------

23898 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpy2DAsync`

23899 """

23900 cdef cyruntime.cudaStream_t cystream

23901 if stream is None:

23902 pstream = 0

23903 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

23904 pstream = int(stream)

23905 else:

23906 pstream = int(cudaStream_t(stream))

23907 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

23908 cdef cyruntime.cudaArray_const_t cysrc

23909 if src is None:

23910 psrc = 0

23911 elif isinstance(src, (cudaArray_const_t,)):

23912 psrc = int(src)

23913 else:

23914 psrc = int(cudaArray_const_t(src))

23915 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc

23916 cydst = _HelperInputVoidPtr(dst)

23917 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

23918 cdef cyruntime.cudaMemcpyKind cykind = kind.value

23919 with nogil:

23920 err = cyruntime.cudaMemcpyFromArrayAsync(cydst_ptr, cysrc, wOffset, hOffset, count, cykind, cystream)

23921 return (_dict_cudaError_t[err],)

23922

23923@cython.embedsignature(True)

23924def cudaMallocAsync(size_t size, hStream):

23925 """ Allocates memory with stream ordered semantics.

23926

23927 Inserts an allocation operation into `hStream`. A pointer to the

23928 allocated memory is returned immediately in *dptr. The allocation must

23929 not be accessed until the the allocation operation completes. The

23930 allocation comes from the memory pool associated with the stream's

23931 device.

23932

23933 Parameters

23934 ----------

23935 size : size_t

23936 Number of bytes to allocate

23937 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

23938 The stream establishing the stream ordering contract and the memory

23939 pool to allocate from

23940

23941 Returns

23942 -------

23943 cudaError_t

23944 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`,

23945 devPtr : Any

23946 Returned device pointer

23947

23948 See Also

23949 --------

23950 :py:obj:`~.cuMemAllocAsync`, cudaMallocAsync (C++ API), :py:obj:`~.cudaMallocFromPoolAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolSetAccess`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolGetAttribute`

23951

23952 Notes

23953 -----

23954 The default memory pool of a device contains device memory from that device.

23955

23956 Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.

23957

23958 During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.

23959 """

23960 cdef cyruntime.cudaStream_t cyhStream

23961 if hStream is None:

23962 phStream = 0

23963 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

23964 phStream = int(hStream)

23965 else:

23966 phStream = int(cudaStream_t(hStream))

23967 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

23968 cdef void_ptr devPtr = 0

23969 with nogil:

23970 err = cyruntime.cudaMallocAsync(<void**>&devPtr, size, cyhStream)

23971 if err != cyruntime.cudaSuccess:

23972 return (_dict_cudaError_t[err], None)

23973 return (_dict_cudaError_t[err], devPtr)

23974

23975@cython.embedsignature(True)

23976def cudaFreeAsync(devPtr, hStream):

23977 """ Frees memory with stream ordered semantics.

23978

23979 Inserts a free operation into `hStream`. The allocation must not be

23980 accessed after stream execution reaches the free. After this API

23981 returns, accessing the memory from any subsequent work launched on the

23982 GPU or querying its pointer attributes results in undefined behavior.

23983

23984 Parameters

23985 ----------

23986 dptr : Any

23987 memory to free

23988 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

23989 The stream establishing the stream ordering promise

23990

23991 Returns

23992 -------

23993 cudaError_t

23994 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`

23995

23996 See Also

23997 --------

23998 :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cudaMallocAsync`

23999

24000 Notes

24001 -----

24002 During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph allocation.

24003 """

24004 cdef cyruntime.cudaStream_t cyhStream

24005 if hStream is None:

24006 phStream = 0

24007 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

24008 phStream = int(hStream)

24009 else:

24010 phStream = int(cudaStream_t(hStream))

24011 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

24012 cydevPtr = _HelperInputVoidPtr(devPtr)

24013 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr

24014 with nogil:

24015 err = cyruntime.cudaFreeAsync(cydevPtr_ptr, cyhStream)

24016 return (_dict_cudaError_t[err],)

24017

24018@cython.embedsignature(True)

24019def cudaMemPoolTrimTo(memPool, size_t minBytesToKeep):

24020 """ Tries to release memory back to the OS.

24021

24022 Releases memory back to the OS until the pool contains fewer than

24023 minBytesToKeep reserved bytes, or there is no more memory that the

24024 allocator can safely release. The allocator cannot release OS

24025 allocations that back outstanding asynchronous allocations. The OS

24026 allocations may happen at different granularity from the user

24027 allocations.

24028

24029 Parameters

24030 ----------

24031 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24032 The memory pool to trim

24033 minBytesToKeep : size_t

24034 If the pool has less than minBytesToKeep reserved, the TrimTo

24035 operation is a no-op. Otherwise the pool will be guaranteed to have

24036 at least minBytesToKeep bytes reserved after the operation.

24037

24038 Returns

24039 -------

24040 cudaError_t

24041 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24042

24043 See Also

24044 --------

24045 :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`

24046

24047 Notes

24048 -----

24049 : Allocations that have not been freed count as outstanding.

24050

24051 : Allocations that have been asynchronously freed but whose completion has not been observed on the host (eg. by a synchronize) can count as outstanding.

24052 """

24053 cdef cyruntime.cudaMemPool_t cymemPool

24054 if memPool is None:

24055 pmemPool = 0

24056 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24057 pmemPool = int(memPool)

24058 else:

24059 pmemPool = int(cudaMemPool_t(memPool))

24060 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24061 with nogil:

24062 err = cyruntime.cudaMemPoolTrimTo(cymemPool, minBytesToKeep)

24063 return (_dict_cudaError_t[err],)

24064

24065@cython.embedsignature(True)

24066def cudaMemPoolSetAttribute(memPool, attr not None : cudaMemPoolAttr, value):

24067 """ Sets attributes of a memory pool.

24068

24069 Supported attributes are:

24070

24071 - :py:obj:`~.cudaMemPoolAttrReleaseThreshold`: (value type =

24072 cuuint64_t) Amount of reserved memory in bytes to hold onto before

24073 trying to release memory back to the OS. When more than the release

24074 threshold bytes of memory are held by the memory pool, the allocator

24075 will try to release memory back to the OS on the next call to stream,

24076 event or context synchronize. (default 0)

24077

24078 - :py:obj:`~.cudaMemPoolReuseFollowEventDependencies`: (value type =

24079 int) Allow :py:obj:`~.cudaMallocAsync` to use memory asynchronously

24080 freed in another stream as long as a stream ordering dependency of

24081 the allocating stream on the free action exists. Cuda events and null

24082 stream interactions can create the required stream ordered

24083 dependencies. (default enabled)

24084

24085 - :py:obj:`~.cudaMemPoolReuseAllowOpportunistic`: (value type = int)

24086 Allow reuse of already completed frees when there is no dependency

24087 between the free and allocation. (default enabled)

24088

24089 - :py:obj:`~.cudaMemPoolReuseAllowInternalDependencies`: (value type =

24090 int) Allow :py:obj:`~.cudaMallocAsync` to insert new stream

24091 dependencies in order to establish the stream ordering required to

24092 reuse a piece of memory released by :py:obj:`~.cudaFreeAsync`

24093 (default enabled).

24094

24095 - :py:obj:`~.cudaMemPoolAttrReservedMemHigh`: (value type = cuuint64_t)

24096 Reset the high watermark that tracks the amount of backing memory

24097 that was allocated for the memory pool. It is illegal to set this

24098 attribute to a non-zero value.

24099

24100 - :py:obj:`~.cudaMemPoolAttrUsedMemHigh`: (value type = cuuint64_t)

24101 Reset the high watermark that tracks the amount of used memory that

24102 was allocated for the memory pool. It is illegal to set this

24103 attribute to a non-zero value.

24104

24105 Parameters

24106 ----------

24107 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24108 The memory pool to modify

24109 attr : :py:obj:`~.cudaMemPoolAttr`

24110 The attribute to modify

24111 value : Any

24112 Pointer to the value to assign

24113

24114 Returns

24115 -------

24116 cudaError_t

24117 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24118

24119 See Also

24120 --------

24121 :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`

24122 """

24123 cdef cyruntime.cudaMemPool_t cymemPool

24124 if memPool is None:

24125 pmemPool = 0

24126 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24127 pmemPool = int(memPool)

24128 else:

24129 pmemPool = int(cudaMemPool_t(memPool))

24130 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24131 cdef cyruntime.cudaMemPoolAttr cyattr = attr.value

24132 cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, value, is_getter=False)

24133 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr

24134 with nogil:

24135 err = cyruntime.cudaMemPoolSetAttribute(cymemPool, cyattr, cyvalue_ptr)

24136 return (_dict_cudaError_t[err],)

24137

24138@cython.embedsignature(True)

24139def cudaMemPoolGetAttribute(memPool, attr not None : cudaMemPoolAttr):

24140 """ Gets attributes of a memory pool.

24141

24142 Supported attributes are:

24143

24144 - :py:obj:`~.cudaMemPoolAttrReleaseThreshold`: (value type =

24145 cuuint64_t) Amount of reserved memory in bytes to hold onto before

24146 trying to release memory back to the OS. When more than the release

24147 threshold bytes of memory are held by the memory pool, the allocator

24148 will try to release memory back to the OS on the next call to stream,

24149 event or context synchronize. (default 0)

24150

24151 - :py:obj:`~.cudaMemPoolReuseFollowEventDependencies`: (value type =

24152 int) Allow :py:obj:`~.cudaMallocAsync` to use memory asynchronously

24153 freed in another stream as long as a stream ordering dependency of

24154 the allocating stream on the free action exists. Cuda events and null

24155 stream interactions can create the required stream ordered

24156 dependencies. (default enabled)

24157

24158 - :py:obj:`~.cudaMemPoolReuseAllowOpportunistic`: (value type = int)

24159 Allow reuse of already completed frees when there is no dependency

24160 between the free and allocation. (default enabled)

24161

24162 - :py:obj:`~.cudaMemPoolReuseAllowInternalDependencies`: (value type =

24163 int) Allow :py:obj:`~.cudaMallocAsync` to insert new stream

24164 dependencies in order to establish the stream ordering required to

24165 reuse a piece of memory released by :py:obj:`~.cudaFreeAsync`

24166 (default enabled).

24167

24168 - :py:obj:`~.cudaMemPoolAttrReservedMemCurrent`: (value type =

24169 cuuint64_t) Amount of backing memory currently allocated for the

24170 mempool.

24171

24172 - :py:obj:`~.cudaMemPoolAttrReservedMemHigh`: (value type = cuuint64_t)

24173 High watermark of backing memory allocated for the mempool since the

24174 last time it was reset.

24175

24176 - :py:obj:`~.cudaMemPoolAttrUsedMemCurrent`: (value type = cuuint64_t)

24177 Amount of memory from the pool that is currently in use by the

24178 application.

24179

24180 - :py:obj:`~.cudaMemPoolAttrUsedMemHigh`: (value type = cuuint64_t)

24181 High watermark of the amount of memory from the pool that was in use

24182 by the application since the last time it was reset.

24183

24184 Parameters

24185 ----------

24186 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24187 The memory pool to get attributes of

24188 attr : :py:obj:`~.cudaMemPoolAttr`

24189 The attribute to get

24190

24191 Returns

24192 -------

24193 cudaError_t

24194 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24195 value : Any

24196 Retrieved value

24197

24198 See Also

24199 --------

24200 :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`

24201 """

24202 cdef cyruntime.cudaMemPool_t cymemPool

24203 if memPool is None:

24204 pmemPool = 0

24205 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24206 pmemPool = int(memPool)

24207 else:

24208 pmemPool = int(cudaMemPool_t(memPool))

24209 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24210 cdef cyruntime.cudaMemPoolAttr cyattr = attr.value

24211 cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, 0, is_getter=True)

24212 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr

24213 with nogil:

24214 err = cyruntime.cudaMemPoolGetAttribute(cymemPool, cyattr, cyvalue_ptr)

24215 if err != cyruntime.cudaSuccess:

24216 return (_dict_cudaError_t[err], None)

24217 return (_dict_cudaError_t[err], cyvalue.pyObj())

24218

24219@cython.embedsignature(True)

24220def cudaMemPoolSetAccess(memPool, descList : Optional[tuple[cudaMemAccessDesc] | list[cudaMemAccessDesc]], size_t count):

24221 """ Controls visibility of pools between devices.

24222

24223 Parameters

24224 ----------

24225 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24226 The pool being modified

24227 map : list[:py:obj:`~.cudaMemAccessDesc`]

24228 Array of access descriptors. Each descriptor instructs the access

24229 to enable for a single gpu

24230 count : size_t

24231 Number of descriptors in the map array.

24232

24233 Returns

24234 -------

24235 cudaError_t

24236 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24237

24238 See Also

24239 --------

24240 :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cudaMemPoolGetAccess`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`

24241 """

24242 descList = [] if descList is None else descList

24243 if not all(isinstance(_x, (cudaMemAccessDesc,)) for _x in descList):

24244 raise TypeError("Argument 'descList' is not instance of type (expected tuple[cyruntime.cudaMemAccessDesc,] or list[cyruntime.cudaMemAccessDesc,]")

24245 cdef cyruntime.cudaMemPool_t cymemPool

24246 if memPool is None:

24247 pmemPool = 0

24248 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24249 pmemPool = int(memPool)

24250 else:

24251 pmemPool = int(cudaMemPool_t(memPool))

24252 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24253 cdef cyruntime.cudaMemAccessDesc* cydescList = NULL

24254 if len(descList) > 1:

24255 cydescList = <cyruntime.cudaMemAccessDesc*> calloc(len(descList), sizeof(cyruntime.cudaMemAccessDesc))

24256 if cydescList is NULL:

24257 raise MemoryError('Failed to allocate length x size memory: ' + str(len(descList)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))

24258 for idx in range(len(descList)):

24259 string.memcpy(&cydescList[idx], (<cudaMemAccessDesc>descList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc))

24260 elif len(descList) == 1:

24261 cydescList = (<cudaMemAccessDesc>descList[0])._pvt_ptr

24262 if count > <size_t>len(descList): raise RuntimeError("List is too small: " + str(len(descList)) + " < " + str(count))

24263 with nogil:

24264 err = cyruntime.cudaMemPoolSetAccess(cymemPool, cydescList, count)

24265 if len(descList) > 1 and cydescList is not NULL:

24266 free(cydescList)

24267 return (_dict_cudaError_t[err],)

24268

24269@cython.embedsignature(True)

24270def cudaMemPoolGetAccess(memPool, location : Optional[cudaMemLocation]):

24271 """ Returns the accessibility of a pool from a device.

24272

24273 Returns the accessibility of the pool's memory from the specified

24274 location.

24275

24276 Parameters

24277 ----------

24278 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24279 the pool being queried

24280 location : :py:obj:`~.cudaMemLocation`

24281 the location accessing the pool

24282

24283 Returns

24284 -------

24285 cudaError_t

24286

24287 flags : :py:obj:`~.cudaMemAccessFlags`

24288 the accessibility of the pool from the specified location

24289

24290 See Also

24291 --------

24292 :py:obj:`~.cuMemPoolGetAccess`, :py:obj:`~.cudaMemPoolSetAccess`

24293 """

24294 cdef cyruntime.cudaMemPool_t cymemPool

24295 if memPool is None:

24296 pmemPool = 0

24297 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24298 pmemPool = int(memPool)

24299 else:

24300 pmemPool = int(cudaMemPool_t(memPool))

24301 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24302 cdef cyruntime.cudaMemAccessFlags flags

24303 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL

24304 with nogil:

24305 err = cyruntime.cudaMemPoolGetAccess(&flags, cymemPool, cylocation_ptr)

24306 if err != cyruntime.cudaSuccess:

24307 return (_dict_cudaError_t[err], None)

24308 return (_dict_cudaError_t[err], cudaMemAccessFlags(flags))

24309

24310@cython.embedsignature(True)

24311def cudaMemPoolCreate(poolProps : Optional[cudaMemPoolProps]):

24312 """ Creates a memory pool.

24313

24314 Creates a CUDA memory pool and returns the handle in `pool`. The

24315 `poolProps` determines the properties of the pool such as the backing

24316 device and IPC capabilities.

24317

24318 To create a memory pool for host memory not targeting a specific NUMA

24319 node, applications must set set

24320 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type to

24321 :py:obj:`~.cudaMemLocationTypeHost`.

24322 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::id is ignored for such

24323 pools. Pools created with the type :py:obj:`~.cudaMemLocationTypeHost`

24324 are not IPC capable and :py:obj:`~.cudaMemPoolProps.handleTypes` must

24325 be 0, any other values will result in

24326 :py:obj:`~.cudaErrorInvalidValue`. To create a memory pool targeting a

24327 specific host NUMA node, applications must set

24328 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type to

24329 :py:obj:`~.cudaMemLocationTypeHostNuma` and

24330 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::id must specify the NUMA

24331 ID of the host memory node. Specifying

24332 :py:obj:`~.cudaMemLocationTypeHostNumaCurrent` as the

24333 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type will result in

24334 :py:obj:`~.cudaErrorInvalidValue`. By default, the pool's memory will

24335 be accessible from the device it is allocated on. In the case of pools

24336 created with :py:obj:`~.cudaMemLocationTypeHostNuma` or

24337 :py:obj:`~.cudaMemLocationTypeHost`, their default accessibility will

24338 be from the host CPU. Applications can control the maximum size of the

24339 pool by specifying a non-zero value for

24340 :py:obj:`~.cudaMemPoolProps.maxSize`. If set to 0, the maximum size of

24341 the pool will default to a system dependent value.

24342

24343 Applications that intend to use :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC`

24344 based memory sharing must ensure: (1) `nvidia-caps-imex-channels`

24345 character device is created by the driver and is listed under

24346 /proc/devices (2) have at least one IMEX channel file accessible by the

24347 user launching the application.

24348

24349 When exporter and importer CUDA processes have been granted access to

24350 the same IMEX channel, they can securely share memory.

24351

24352 The IMEX channel security model works on a per user basis. Which means

24353 all processes under a user can share memory if the user has access to a

24354 valid IMEX channel. When multi-user isolation is desired, a separate

24355 IMEX channel is required for each user.

24356

24357 These channel files exist in /dev/nvidia-caps-imex-channels/channel*

24358 and can be created using standard OS native calls like mknod on Linux.

24359 For example: To create channel0 with the major number from

24360 /proc/devices users can execute the following command: `mknod

24361 /dev/nvidia-caps-imex-channels/channel0 c <major number> 0`

24362

24363 To create a managed memory pool, applications must set

24364 :py:obj:`~.cudaMemPoolProps`:cudaMemAllocationType to

24365 :py:obj:`~.cudaMemAllocationTypeManaged`.

24366 :py:obj:`~.cudaMemPoolProps`::cudaMemAllocationHandleType must also be

24367 set to :py:obj:`~.cudaMemHandleTypeNone` since IPC is not supported.

24368 For managed memory pools, :py:obj:`~.cudaMemPoolProps`::cudaMemLocation

24369 will be treated as the preferred location for all allocations created

24370 from the pool. An application can also set

24371 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location.

24372 :py:obj:`~.cudaMemPoolProps.maxSize` must be set to zero for managed

24373 memory pools. :py:obj:`~.cudaMemPoolProps.usage` should be zero as

24374 decompress for managed memory is not supported. For managed memory

24375 pools, all devices on the system must have non-zero

24376 :py:obj:`~.concurrentManagedAccess`. If not, this call returns

24377 :py:obj:`~.cudaErrorNotSupported`

24378

24379 Parameters

24380 ----------

24381 poolProps : :py:obj:`~.cudaMemPoolProps`

24382 None

24383

24384 Returns

24385 -------

24386 cudaError_t

24387 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`

24388 memPool : :py:obj:`~.cudaMemPool_t`

24389 None

24390

24391 See Also

24392 --------

24393 :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMallocFromPoolAsync`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`

24394

24395 Notes

24396 -----

24397 Specifying :py:obj:`~.cudaMemHandleTypeNone` creates a memory pool that will not support IPC.

24398 """

24399 cdef cudaMemPool_t memPool = cudaMemPool_t()

24400 cdef cyruntime.cudaMemPoolProps* cypoolProps_ptr = poolProps._pvt_ptr if poolProps is not None else NULL

24401 with nogil:

24402 err = cyruntime.cudaMemPoolCreate(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cypoolProps_ptr)

24403 if err != cyruntime.cudaSuccess:

24404 return (_dict_cudaError_t[err], None)

24405 return (_dict_cudaError_t[err], memPool)

24406

24407@cython.embedsignature(True)

24408def cudaMemPoolDestroy(memPool):

24409 """ Destroys the specified memory pool.

24410

24411 If any pointers obtained from this pool haven't been freed or the pool

24412 has free operations that haven't completed when

24413 :py:obj:`~.cudaMemPoolDestroy` is invoked, the function will return

24414 immediately and the resources associated with the pool will be released

24415 automatically once there are no more outstanding allocations.

24416

24417 Destroying the current mempool of a device sets the default mempool of

24418 that device as the current mempool for that device.

24419

24420 Parameters

24421 ----------

24422 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24423 None

24424

24425 Returns

24426 -------

24427 cudaError_t

24428 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24429

24430 See Also

24431 --------

24432 cuMemPoolDestroy, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`

24433

24434 Notes

24435 -----

24436 A device's default memory pool cannot be destroyed.

24437 """

24438 cdef cyruntime.cudaMemPool_t cymemPool

24439 if memPool is None:

24440 pmemPool = 0

24441 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24442 pmemPool = int(memPool)

24443 else:

24444 pmemPool = int(cudaMemPool_t(memPool))

24445 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24446 with nogil:

24447 err = cyruntime.cudaMemPoolDestroy(cymemPool)

24448 return (_dict_cudaError_t[err],)

24449

24450@cython.embedsignature(True)

24451def cudaMemGetDefaultMemPool(location : Optional[cudaMemLocation], typename not None : cudaMemAllocationType):

24452 """ Returns the default memory pool for a given location and allocation type.

24453

24454 The memory location can be of one of

24455 :py:obj:`~.cudaMemLocationTypeDevice`,

24456 :py:obj:`~.cudaMemLocationTypeHost` or

24457 :py:obj:`~.cudaMemLocationTypeHostNuma`. The allocation type can be one

24458 of :py:obj:`~.cudaMemAllocationTypePinned` or

24459 :py:obj:`~.cudaMemAllocationTypeManaged`. When the allocation type is

24460 :py:obj:`~.cudaMemAllocationTypeManaged`, the location type can also be

24461 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location

24462 for the managed memory pool. In all other cases, the call return

24463 :py:obj:`~.cudaErrorInvalidValue`

24464

24465 Parameters

24466 ----------

24467 location : :py:obj:`~.cudaMemLocation`

24468 None

24469 typename : :py:obj:`~.cudaMemAllocationType`

24470 None

24471

24472 Returns

24473 -------

24474 cudaError_t

24475 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`,

24476 memPool : :py:obj:`~.cudaMemPool_t`

24477 None

24478

24479 See Also

24480 --------

24481 :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cuMemPoolSetAttribute`, cuMemPoolSetAccess, :py:obj:`~.cuMemGetMemPool`, :py:obj:`~.cuMemPoolCreate`

24482 """

24483 cdef cudaMemPool_t memPool = cudaMemPool_t()

24484 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL

24485 cdef cyruntime.cudaMemAllocationType cytypename = typename.value

24486 with nogil:

24487 err = cyruntime.cudaMemGetDefaultMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cylocation_ptr, cytypename)

24488 if err != cyruntime.cudaSuccess:

24489 return (_dict_cudaError_t[err], None)

24490 return (_dict_cudaError_t[err], memPool)

24491

24492@cython.embedsignature(True)

24493def cudaMemGetMemPool(location : Optional[cudaMemLocation], typename not None : cudaMemAllocationType):

24494 """ Gets the current memory pool for a given memory location and allocation type.

24495

24496 The memory location can be of one of

24497 :py:obj:`~.cudaMemLocationTypeDevice`,

24498 :py:obj:`~.cudaMemLocationTypeHost` or

24499 :py:obj:`~.cudaMemLocationTypeHostNuma`. The allocation type can be one

24500 of :py:obj:`~.cudaMemAllocationTypePinned` or

24501 :py:obj:`~.cudaMemAllocationTypeManaged`. When the allocation type is

24502 :py:obj:`~.cudaMemAllocationTypeManaged`, the location type can also be

24503 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location

24504 for the managed memory pool. In all other cases, the call return

24505 :py:obj:`~.cudaErrorInvalidValue`

24506

24507 Returns the last pool provided to :py:obj:`~.cudaMemSetMemPool` or

24508 :py:obj:`~.cudaDeviceSetMemPool` for this location and allocation type

24509 or the location's default memory pool if :py:obj:`~.cudaMemSetMemPool`

24510 or :py:obj:`~.cudaDeviceSetMemPool` for that allocType and location has

24511 never been called. By default the current mempool of a location is the

24512 default mempool for a device that can be obtained via

24513 cudaMemGetDefaultMemPool Otherwise the returned pool must have been set

24514 with :py:obj:`~.cudaDeviceSetMemPool`.

24515

24516 Parameters

24517 ----------

24518 location : :py:obj:`~.cudaMemLocation`

24519 None

24520 typename : :py:obj:`~.cudaMemAllocationType`

24521 None

24522

24523 Returns

24524 -------

24525 cudaError_t

24526 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24527 memPool : :py:obj:`~.cudaMemPool_t`

24528 None

24529

24530 See Also

24531 --------

24532 :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuMemSetMemPool`

24533 """

24534 cdef cudaMemPool_t memPool = cudaMemPool_t()

24535 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL

24536 cdef cyruntime.cudaMemAllocationType cytypename = typename.value

24537 with nogil:

24538 err = cyruntime.cudaMemGetMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cylocation_ptr, cytypename)

24539 if err != cyruntime.cudaSuccess:

24540 return (_dict_cudaError_t[err], None)

24541 return (_dict_cudaError_t[err], memPool)

24542

24543@cython.embedsignature(True)

24544def cudaMemSetMemPool(location : Optional[cudaMemLocation], typename not None : cudaMemAllocationType, memPool):

24545 """ Sets the current memory pool for a memory location and allocation type.

24546

24547 The memory location can be of one of

24548 :py:obj:`~.cudaMemLocationTypeDevice`,

24549 :py:obj:`~.cudaMemLocationTypeHost` or

24550 :py:obj:`~.cudaMemLocationTypeHostNuma`. The allocation type can be one

24551 of :py:obj:`~.cudaMemAllocationTypePinned` or

24552 :py:obj:`~.cudaMemAllocationTypeManaged`. When the allocation type is

24553 :py:obj:`~.cudaMemAllocationTypeManaged`, the location type can also be

24554 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location

24555 for the managed memory pool. In all other cases, the call return

24556 :py:obj:`~.cudaErrorInvalidValue`

24557

24558 When a memory pool is set as the current memory pool, the location

24559 parameter should be the same as the location of the pool. If the

24560 location type or index don't match, the call returns

24561 :py:obj:`~.cudaErrorInvalidValue`. The type of memory pool should also

24562 match the parameter allocType. Else the call returns

24563 :py:obj:`~.cudaErrorInvalidValue`. By default, a memory location's

24564 current memory pool is its default memory pool. If the location type is

24565 :py:obj:`~.cudaMemLocationTypeDevice` and the allocation type is

24566 :py:obj:`~.cudaMemAllocationTypePinned`, then this API is the

24567 equivalent of calling :py:obj:`~.cudaDeviceSetMemPool` with the

24568 location id as the device. For further details on the implications,

24569 please refer to the documentation for :py:obj:`~.cudaDeviceSetMemPool`.

24570

24571 Parameters

24572 ----------

24573 location : :py:obj:`~.cudaMemLocation`

24574 None

24575 typename : :py:obj:`~.cudaMemAllocationType`

24576 None

24577 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24578 None

24579

24580 Returns

24581 -------

24582 cudaError_t

24583 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

24584

24585 See Also

24586 --------

24587 :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemGetMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemPoolDestroy`, :py:obj:`~.cuMemAllocFromPoolAsync`

24588

24589 Notes

24590 -----

24591 Use :py:obj:`~.cudaMallocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on.

24592 """

24593 cdef cyruntime.cudaMemPool_t cymemPool

24594 if memPool is None:

24595 pmemPool = 0

24596 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24597 pmemPool = int(memPool)

24598 else:

24599 pmemPool = int(cudaMemPool_t(memPool))

24600 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24601 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL

24602 cdef cyruntime.cudaMemAllocationType cytypename = typename.value

24603 with nogil:

24604 err = cyruntime.cudaMemSetMemPool(cylocation_ptr, cytypename, cymemPool)

24605 return (_dict_cudaError_t[err],)

24606

24607@cython.embedsignature(True)

24608def cudaMallocFromPoolAsync(size_t size, memPool, stream):

24609 """ Allocates memory from a specified pool with stream ordered semantics.

24610

24611 Inserts an allocation operation into `hStream`. A pointer to the

24612 allocated memory is returned immediately in *dptr. The allocation must

24613 not be accessed until the the allocation operation completes. The

24614 allocation comes from the specified memory pool.

24615

24616 Parameters

24617 ----------

24618 bytesize : size_t

24619 Number of bytes to allocate

24620 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24621 The pool to allocate from

24622 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

24623 The stream establishing the stream ordering semantic

24624

24625 Returns

24626 -------

24627 cudaError_t

24628 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`

24629 ptr : Any

24630 Returned device pointer

24631

24632 See Also

24633 --------

24634 :py:obj:`~.cuMemAllocFromPoolAsync`, cudaMallocAsync (C++ API), :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaMemPoolCreate`, :py:obj:`~.cudaMemPoolSetAccess`, :py:obj:`~.cudaMemPoolSetAttribute`

24635

24636 Notes

24637 -----

24638 During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.

24639 """

24640 cdef cyruntime.cudaStream_t cystream

24641 if stream is None:

24642 pstream = 0

24643 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

24644 pstream = int(stream)

24645 else:

24646 pstream = int(cudaStream_t(stream))

24647 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

24648 cdef cyruntime.cudaMemPool_t cymemPool

24649 if memPool is None:

24650 pmemPool = 0

24651 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24652 pmemPool = int(memPool)

24653 else:

24654 pmemPool = int(cudaMemPool_t(memPool))

24655 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24656 cdef void_ptr ptr = 0

24657 with nogil:

24658 err = cyruntime.cudaMallocFromPoolAsync(<void**>&ptr, size, cymemPool, cystream)

24659 if err != cyruntime.cudaSuccess:

24660 return (_dict_cudaError_t[err], None)

24661 return (_dict_cudaError_t[err], ptr)

24662

24663@cython.embedsignature(True)

24664def cudaMemPoolExportToShareableHandle(memPool, handleType not None : cudaMemAllocationHandleType, unsigned int flags):

24665 """ Exports a memory pool to the requested handle type.

24666

24667 Given an IPC capable mempool, create an OS handle to share the pool

24668 with another process. A recipient process can convert the shareable

24669 handle into a mempool with

24670 :py:obj:`~.cudaMemPoolImportFromShareableHandle`. Individual pointers

24671 can then be shared with the :py:obj:`~.cudaMemPoolExportPointer` and

24672 :py:obj:`~.cudaMemPoolImportPointer` APIs. The implementation of what

24673 the shareable handle is and how it can be transferred is defined by the

24674 requested handle type.

24675

24676 Parameters

24677 ----------

24678 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24679 pool to export

24680 handleType : :py:obj:`~.cudaMemAllocationHandleType`

24681 the type of handle to create

24682 flags : unsigned int

24683 must be 0

24684

24685 Returns

24686 -------

24687 cudaError_t

24688 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`

24689 handle_out : Any

24690 pointer to the location in which to store the requested handle

24691

24692 See Also

24693 --------

24694 :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`, :py:obj:`~.cudaMemPoolImportPointer`

24695

24696 Notes

24697 -----

24698 : To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than cudaMemHandleTypeNone.

24699 """

24700 cdef cyruntime.cudaMemPool_t cymemPool

24701 if memPool is None:

24702 pmemPool = 0

24703 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24704 pmemPool = int(memPool)

24705 else:

24706 pmemPool = int(cudaMemPool_t(memPool))

24707 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24708 cdef _HelperCUmemAllocationHandleType cyshareableHandle = _HelperCUmemAllocationHandleType(handleType)

24709 cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr

24710 cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value

24711 with nogil:

24712 err = cyruntime.cudaMemPoolExportToShareableHandle(cyshareableHandle_ptr, cymemPool, cyhandleType, flags)

24713 if err != cyruntime.cudaSuccess:

24714 return (_dict_cudaError_t[err], None)

24715 return (_dict_cudaError_t[err], cyshareableHandle.pyObj())

24716

24717@cython.embedsignature(True)

24718def cudaMemPoolImportFromShareableHandle(shareableHandle, handleType not None : cudaMemAllocationHandleType, unsigned int flags):

24719 """ imports a memory pool from a shared handle.

24720

24721 Specific allocations can be imported from the imported pool with

24722 :py:obj:`~.cudaMemPoolImportPointer`.

24723

24724 Parameters

24725 ----------

24726 handle : Any

24727 OS handle of the pool to open

24728 handleType : :py:obj:`~.cudaMemAllocationHandleType`

24729 The type of handle being imported

24730 flags : unsigned int

24731 must be 0

24732

24733 Returns

24734 -------

24735 cudaError_t

24736 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`

24737 pool_out : :py:obj:`~.cudaMemPool_t`

24738 Returned memory pool

24739

24740 See Also

24741 --------

24742 :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`, :py:obj:`~.cudaMemPoolImportPointer`

24743

24744 Notes

24745 -----

24746 Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in :py:obj:`~.cudaDeviceSetMemPool` or :py:obj:`~.cudaMallocFromPoolAsync` calls.

24747 """

24748 cdef cudaMemPool_t memPool = cudaMemPool_t()

24749 cyshareableHandle = _HelperInputVoidPtr(shareableHandle)

24750 cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr

24751 cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value

24752 with nogil:

24753 err = cyruntime.cudaMemPoolImportFromShareableHandle(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cyshareableHandle_ptr, cyhandleType, flags)

24754 if err != cyruntime.cudaSuccess:

24755 return (_dict_cudaError_t[err], None)

24756 return (_dict_cudaError_t[err], memPool)

24757

24758@cython.embedsignature(True)

24759def cudaMemPoolExportPointer(ptr):

24760 """ Export data to share a memory pool allocation between processes.

24761

24762 Constructs `shareData_out` for sharing a specific allocation from an

24763 already shared memory pool. The recipient process can import the

24764 allocation with the :py:obj:`~.cudaMemPoolImportPointer` api. The data

24765 is not a handle and may be shared through any IPC mechanism.

24766

24767 Parameters

24768 ----------

24769 ptr : Any

24770 pointer to memory being exported

24771

24772 Returns

24773 -------

24774 cudaError_t

24775 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`

24776 shareData_out : :py:obj:`~.cudaMemPoolPtrExportData`

24777 Returned export data

24778

24779 See Also

24780 --------

24781 :py:obj:`~.cuMemPoolExportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolImportPointer`

24782 """

24783 cdef cudaMemPoolPtrExportData exportData = cudaMemPoolPtrExportData()

24784 cyptr = _HelperInputVoidPtr(ptr)

24785 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr

24786 with nogil:

24787 err = cyruntime.cudaMemPoolExportPointer(<cyruntime.cudaMemPoolPtrExportData*>exportData._pvt_ptr, cyptr_ptr)

24788 if err != cyruntime.cudaSuccess:

24789 return (_dict_cudaError_t[err], None)

24790 return (_dict_cudaError_t[err], exportData)

24791

24792@cython.embedsignature(True)

24793def cudaMemPoolImportPointer(memPool, exportData : Optional[cudaMemPoolPtrExportData]):

24794 """ Import a memory pool allocation from another process.

24795

24796 Returns in `ptr_out` a pointer to the imported memory. The imported

24797 memory must not be accessed before the allocation operation completes

24798 in the exporting process. The imported memory must be freed from all

24799 importing processes before being freed in the exporting process. The

24800 pointer may be freed with cudaFree or cudaFreeAsync. If

24801 :py:obj:`~.cudaFreeAsync` is used, the free must be completed on the

24802 importing process before the free operation on the exporting process.

24803

24804 Parameters

24805 ----------

24806 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`

24807 pool from which to import

24808 shareData : :py:obj:`~.cudaMemPoolPtrExportData`

24809 data specifying the memory to import

24810

24811 Returns

24812 -------

24813 cudaError_t

24814 :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`

24815 ptr_out : Any

24816 pointer to imported memory

24817

24818 See Also

24819 --------

24820 :py:obj:`~.cuMemPoolImportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`

24821

24822 Notes

24823 -----

24824 The :py:obj:`~.cudaFreeAsync` api may be used in the exporting process before the :py:obj:`~.cudaFreeAsync` operation completes in its stream as long as the :py:obj:`~.cudaFreeAsync` in the exporting process specifies a stream with a stream dependency on the importing process's :py:obj:`~.cudaFreeAsync`.

24825 """

24826 cdef cyruntime.cudaMemPool_t cymemPool

24827 if memPool is None:

24828 pmemPool = 0

24829 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):

24830 pmemPool = int(memPool)

24831 else:

24832 pmemPool = int(cudaMemPool_t(memPool))

24833 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool

24834 cdef void_ptr ptr = 0

24835 cdef cyruntime.cudaMemPoolPtrExportData* cyexportData_ptr = exportData._pvt_ptr if exportData is not None else NULL

24836 with nogil:

24837 err = cyruntime.cudaMemPoolImportPointer(<void**>&ptr, cymemPool, cyexportData_ptr)

24838 if err != cyruntime.cudaSuccess:

24839 return (_dict_cudaError_t[err], None)

24840 return (_dict_cudaError_t[err], ptr)

24841

24842@cython.embedsignature(True)

24843def cudaPointerGetAttributes(ptr):

24844 """ Returns attributes about a specified pointer.

24845

24846 Returns in `*attributes` the attributes of the pointer `ptr`. If

24847 pointer was not allocated in, mapped by or registered with context

24848 supporting unified addressing :py:obj:`~.cudaErrorInvalidValue` is

24849 returned.

24850

24851 The :py:obj:`~.cudaPointerAttributes` structure is defined as:

24852

24853 **View CUDA Toolkit Documentation for a C++ code example**

24854

24855 In this structure, the individual fields mean

24856

24857 - :py:obj:`~.cudaPointerAttributes.type` identifies type of memory. It

24858 can be :py:obj:`~.cudaMemoryTypeUnregistered` for unregistered host

24859 memory, :py:obj:`~.cudaMemoryTypeHost` for registered host memory,

24860 :py:obj:`~.cudaMemoryTypeDevice` for device memory or

24861 :py:obj:`~.cudaMemoryTypeManaged` for managed memory.

24862

24863 - :py:obj:`~.device` is the device against which `ptr` was allocated.

24864 If `ptr` has memory type :py:obj:`~.cudaMemoryTypeDevice` then this

24865 identifies the device on which the memory referred to by `ptr`

24866 physically resides. If `ptr` has memory type

24867 :py:obj:`~.cudaMemoryTypeHost` then this identifies the device which

24868 was current when the allocation was made (and if that device is

24869 deinitialized then this allocation will vanish with that device's

24870 state).

24871

24872 - :py:obj:`~.devicePointer` is the device pointer alias through which

24873 the memory referred to by `ptr` may be accessed on the current

24874 device. If the memory referred to by `ptr` cannot be accessed

24875 directly by the current device then this is NULL.

24876

24877 - :py:obj:`~.hostPointer` is the host pointer alias through which the

24878 memory referred to by `ptr` may be accessed on the host. If the

24879 memory referred to by `ptr` cannot be accessed directly by the host

24880 then this is NULL.

24881

24882 Parameters

24883 ----------

24884 ptr : Any

24885 Pointer to get attributes for

24886

24887 Returns

24888 -------

24889 cudaError_t

24890 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`

24891 attributes : :py:obj:`~.cudaPointerAttributes`

24892 Attributes for the specified pointer

24893

24894 See Also

24895 --------

24896 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuPointerGetAttributes`

24897

24898 Notes

24899 -----

24900 In CUDA 11.0 forward passing host pointer will return :py:obj:`~.cudaMemoryTypeUnregistered` in :py:obj:`~.cudaPointerAttributes.type` and call will return :py:obj:`~.cudaSuccess`.

24901 """

24902 cdef cudaPointerAttributes attributes = cudaPointerAttributes()

24903 cyptr = _HelperInputVoidPtr(ptr)

24904 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr

24905 with nogil:

24906 err = cyruntime.cudaPointerGetAttributes(<cyruntime.cudaPointerAttributes*>attributes._pvt_ptr, cyptr_ptr)

24907 if err != cyruntime.cudaSuccess:

24908 return (_dict_cudaError_t[err], None)

24909 return (_dict_cudaError_t[err], attributes)

24910

24911@cython.embedsignature(True)

24912def cudaDeviceCanAccessPeer(int device, int peerDevice):

24913 """ Queries if a device may directly access a peer device's memory.

24914

24915 Returns in `*canAccessPeer` a value of 1 if device `device` is capable

24916 of directly accessing memory from `peerDevice` and 0 otherwise. If

24917 direct access of `peerDevice` from `device` is possible, then access

24918 may be enabled by calling :py:obj:`~.cudaDeviceEnablePeerAccess()`.

24919

24920 Parameters

24921 ----------

24922 device : int

24923 Device from which allocations on `peerDevice` are to be directly

24924 accessed.

24925 peerDevice : int

24926 Device on which the allocations to be directly accessed by `device`

24927 reside.

24928

24929 Returns

24930 -------

24931 cudaError_t

24932 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`

24933 canAccessPeer : int

24934 Returned access capability

24935

24936 See Also

24937 --------

24938 :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer`

24939 """

24940 cdef int canAccessPeer = 0

24941 with nogil:

24942 err = cyruntime.cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice)

24943 if err != cyruntime.cudaSuccess:

24944 return (_dict_cudaError_t[err], None)

24945 return (_dict_cudaError_t[err], canAccessPeer)

24946

24947@cython.embedsignature(True)

24948def cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags):

24949 """ Enables direct access to memory allocations on a peer device.

24950

24951 On success, all allocations from `peerDevice` will immediately be

24952 accessible by the current device. They will remain accessible until

24953 access is explicitly disabled using

24954 :py:obj:`~.cudaDeviceDisablePeerAccess()` or either device is reset

24955 using :py:obj:`~.cudaDeviceReset()`.

24956

24957 Note that access granted by this call is unidirectional and that in

24958 order to access memory on the current device from `peerDevice`, a

24959 separate symmetric call to :py:obj:`~.cudaDeviceEnablePeerAccess()` is

24960 required.

24961

24962 Note that there are both device-wide and system-wide limitations per

24963 system configuration, as noted in the CUDA Programming Guide under the

24964 section "Peer-to-Peer Memory Access".

24965

24966 Returns :py:obj:`~.cudaErrorInvalidDevice` if

24967 :py:obj:`~.cudaDeviceCanAccessPeer()` indicates that the current device

24968 cannot directly access memory from `peerDevice`.

24969

24970 Returns :py:obj:`~.cudaErrorPeerAccessAlreadyEnabled` if direct access

24971 of `peerDevice` from the current device has already been enabled.

24972

24973 Returns :py:obj:`~.cudaErrorInvalidValue` if `flags` is not 0.

24974

24975 Parameters

24976 ----------

24977 peerDevice : int

24978 Peer device to enable direct access to from the current device

24979 flags : unsigned int

24980 Reserved for future use and must be set to 0

24981

24982 Returns

24983 -------

24984 cudaError_t

24985 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorPeerAccessAlreadyEnabled`, :py:obj:`~.cudaErrorInvalidValue`

24986

24987 See Also

24988 --------

24989 :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuCtxEnablePeerAccess`

24990 """

24991 with nogil:

24992 err = cyruntime.cudaDeviceEnablePeerAccess(peerDevice, flags)

24993 return (_dict_cudaError_t[err],)

24994

24995@cython.embedsignature(True)

24996def cudaDeviceDisablePeerAccess(int peerDevice):

24997 """ Disables direct access to memory allocations on a peer device.

24998

24999 Returns :py:obj:`~.cudaErrorPeerAccessNotEnabled` if direct access to

25000 memory on `peerDevice` has not yet been enabled from the current

25001 device.

25002

25003 Parameters

25004 ----------

25005 peerDevice : int

25006 Peer device to disable direct access to

25007

25008 Returns

25009 -------

25010 cudaError_t

25011 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorPeerAccessNotEnabled`, :py:obj:`~.cudaErrorInvalidDevice`

25012

25013 See Also

25014 --------

25015 :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`

25016 """

25017 with nogil:

25018 err = cyruntime.cudaDeviceDisablePeerAccess(peerDevice)

25019 return (_dict_cudaError_t[err],)

25020

25021@cython.embedsignature(True)

25022def cudaGraphicsUnregisterResource(resource):

25023 """ Unregisters a graphics resource for access by CUDA.

25024

25025 Unregisters the graphics resource `resource` so it is not accessible by

25026 CUDA unless registered again.

25027

25028 If `resource` is invalid then

25029 :py:obj:`~.cudaErrorInvalidResourceHandle` is returned.

25030

25031 Parameters

25032 ----------

25033 resource : :py:obj:`~.cudaGraphicsResource_t`

25034 Resource to unregister

25035

25036 Returns

25037 -------

25038 cudaError_t

25039 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

25040

25041 See Also

25042 --------

25043 :py:obj:`~.cudaGraphicsD3D9RegisterResource`, :py:obj:`~.cudaGraphicsD3D10RegisterResource`, :py:obj:`~.cudaGraphicsD3D11RegisterResource`, :py:obj:`~.cudaGraphicsGLRegisterBuffer`, :py:obj:`~.cudaGraphicsGLRegisterImage`, :py:obj:`~.cuGraphicsUnregisterResource`

25044 """

25045 cdef cyruntime.cudaGraphicsResource_t cyresource

25046 if resource is None:

25047 presource = 0

25048 elif isinstance(resource, (cudaGraphicsResource_t,)):

25049 presource = int(resource)

25050 else:

25051 presource = int(cudaGraphicsResource_t(resource))

25052 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource

25053 with nogil:

25054 err = cyruntime.cudaGraphicsUnregisterResource(cyresource)

25055 return (_dict_cudaError_t[err],)

25056

25057@cython.embedsignature(True)

25058def cudaGraphicsResourceSetMapFlags(resource, unsigned int flags):

25059 """ Set usage flags for mapping a graphics resource.

25060

25061 Set `flags` for mapping the graphics resource `resource`.

25062

25063 Changes to `flags` will take effect the next time `resource` is mapped.

25064 The `flags` argument may be any of the following:

25065

25066 - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how

25067 `resource` will be used. It is therefore assumed that CUDA may read

25068 from or write to `resource`.

25069

25070 - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will

25071 not write to `resource`.

25072

25073 - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies CUDA will not

25074 read from `resource` and will write over the entire contents of

25075 `resource`, so none of the data previously stored in `resource` will

25076 be preserved.

25077

25078 If `resource` is presently mapped for access by CUDA then

25079 :py:obj:`~.cudaErrorUnknown` is returned. If `flags` is not one of the

25080 above values then :py:obj:`~.cudaErrorInvalidValue` is returned.

25081

25082 Parameters

25083 ----------

25084 resource : :py:obj:`~.cudaGraphicsResource_t`

25085 Registered resource to set flags for

25086 flags : unsigned int

25087 Parameters for resource mapping

25088

25089 Returns

25090 -------

25091 cudaError_t

25092 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`,

25093

25094 See Also

25095 --------

25096 :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsResourceSetMapFlags`

25097 """

25098 cdef cyruntime.cudaGraphicsResource_t cyresource

25099 if resource is None:

25100 presource = 0

25101 elif isinstance(resource, (cudaGraphicsResource_t,)):

25102 presource = int(resource)

25103 else:

25104 presource = int(cudaGraphicsResource_t(resource))

25105 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource

25106 with nogil:

25107 err = cyruntime.cudaGraphicsResourceSetMapFlags(cyresource, flags)

25108 return (_dict_cudaError_t[err],)

25109

25110@cython.embedsignature(True)

25111def cudaGraphicsMapResources(int count, resources, stream):

25112 """ Map graphics resources for access by CUDA.

25113

25114 Maps the `count` graphics resources in `resources` for access by CUDA.

25115

25116 The resources in `resources` may be accessed by CUDA until they are

25117 unmapped. The graphics API from which `resources` were registered

25118 should not access any resources while they are mapped by CUDA. If an

25119 application does so, the results are undefined.

25120

25121 This function provides the synchronization guarantee that any graphics

25122 calls issued before :py:obj:`~.cudaGraphicsMapResources()` will

25123 complete before any subsequent CUDA work issued in `stream` begins.

25124

25125 If `resources` contains any duplicate entries then

25126 :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If any of

25127 `resources` are presently mapped for access by CUDA then

25128 :py:obj:`~.cudaErrorUnknown` is returned.

25129

25130 Parameters

25131 ----------

25132 count : int

25133 Number of resources to map

25134 resources : :py:obj:`~.cudaGraphicsResource_t`

25135 Resources to map for CUDA

25136 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

25137 Stream for synchronization

25138

25139 Returns

25140 -------

25141 cudaError_t

25142 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

25143

25144 See Also

25145 --------

25146 :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsUnmapResources`, :py:obj:`~.cuGraphicsMapResources`

25147 """

25148 cdef cyruntime.cudaStream_t cystream

25149 if stream is None:

25150 pstream = 0

25151 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

25152 pstream = int(stream)

25153 else:

25154 pstream = int(cudaStream_t(stream))

25155 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

25156 cdef cyruntime.cudaGraphicsResource_t *cyresources

25157 if resources is None:

25158 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL

25159 elif isinstance(resources, (cudaGraphicsResource_t,)):

25160 presources = resources.getPtr()

25161 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>presources

25162 elif isinstance(resources, (int)):

25163 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>resources

25164 else:

25165 raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(resources)))

25166 with nogil:

25167 err = cyruntime.cudaGraphicsMapResources(count, cyresources, cystream)

25168 return (_dict_cudaError_t[err],)

25169

25170@cython.embedsignature(True)

25171def cudaGraphicsUnmapResources(int count, resources, stream):

25172 """ Unmap graphics resources.

25173

25174 Unmaps the `count` graphics resources in `resources`.

25175

25176 Once unmapped, the resources in `resources` may not be accessed by CUDA

25177 until they are mapped again.

25178

25179 This function provides the synchronization guarantee that any CUDA work

25180 issued in `stream` before :py:obj:`~.cudaGraphicsUnmapResources()` will

25181 complete before any subsequently issued graphics work begins.

25182

25183 If `resources` contains any duplicate entries then

25184 :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If any of

25185 `resources` are not presently mapped for access by CUDA then

25186 :py:obj:`~.cudaErrorUnknown` is returned.

25187

25188 Parameters

25189 ----------

25190 count : int

25191 Number of resources to unmap

25192 resources : :py:obj:`~.cudaGraphicsResource_t`

25193 Resources to unmap

25194 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

25195 Stream for synchronization

25196

25197 Returns

25198 -------

25199 cudaError_t

25200 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

25201

25202 See Also

25203 --------

25204 :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`

25205 """

25206 cdef cyruntime.cudaStream_t cystream

25207 if stream is None:

25208 pstream = 0

25209 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

25210 pstream = int(stream)

25211 else:

25212 pstream = int(cudaStream_t(stream))

25213 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

25214 cdef cyruntime.cudaGraphicsResource_t *cyresources

25215 if resources is None:

25216 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL

25217 elif isinstance(resources, (cudaGraphicsResource_t,)):

25218 presources = resources.getPtr()

25219 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>presources

25220 elif isinstance(resources, (int)):

25221 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>resources

25222 else:

25223 raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(resources)))

25224 with nogil:

25225 err = cyruntime.cudaGraphicsUnmapResources(count, cyresources, cystream)

25226 return (_dict_cudaError_t[err],)

25227

25228@cython.embedsignature(True)

25229def cudaGraphicsResourceGetMappedPointer(resource):

25230 """ Get an device pointer through which to access a mapped graphics resource.

25231

25232 Returns in `*devPtr` a pointer through which the mapped graphics

25233 resource `resource` may be accessed. Returns in `*size` the size of the

25234 memory in bytes which may be accessed from that pointer. The value set

25235 in `devPtr` may change every time that `resource` is mapped.

25236

25237 If `resource` is not a buffer then it cannot be accessed via a pointer

25238 and :py:obj:`~.cudaErrorUnknown` is returned. If `resource` is not

25239 mapped then :py:obj:`~.cudaErrorUnknown` is returned.

25240

25241 Parameters

25242 ----------

25243 resource : :py:obj:`~.cudaGraphicsResource_t`

25244 None

25245

25246 Returns

25247 -------

25248 cudaError_t

25249

25250 devPtr : Any

25251 None

25252 size : int

25253 None

25254 """

25255 cdef cyruntime.cudaGraphicsResource_t cyresource

25256 if resource is None:

25257 presource = 0

25258 elif isinstance(resource, (cudaGraphicsResource_t,)):

25259 presource = int(resource)

25260 else:

25261 presource = int(cudaGraphicsResource_t(resource))

25262 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource

25263 cdef void_ptr devPtr = 0

25264 cdef size_t size = 0

25265 with nogil:

25266 err = cyruntime.cudaGraphicsResourceGetMappedPointer(<void**>&devPtr, &size, cyresource)

25267 if err != cyruntime.cudaSuccess:

25268 return (_dict_cudaError_t[err], None, None)

25269 return (_dict_cudaError_t[err], devPtr, size)

25270

25271@cython.embedsignature(True)

25272def cudaGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsigned int mipLevel):

25273 """ Get an array through which to access a subresource of a mapped graphics resource.

25274

25275 Returns in `*array` an array through which the subresource of the

25276 mapped graphics resource `resource` which corresponds to array index

25277 `arrayIndex` and mipmap level `mipLevel` may be accessed. The value set

25278 in `array` may change every time that `resource` is mapped.

25279

25280 If `resource` is not a texture then it cannot be accessed via an array

25281 and :py:obj:`~.cudaErrorUnknown` is returned. If `arrayIndex` is not a

25282 valid array index for `resource` then :py:obj:`~.cudaErrorInvalidValue`

25283 is returned. If `mipLevel` is not a valid mipmap level for `resource`

25284 then :py:obj:`~.cudaErrorInvalidValue` is returned. If `resource` is

25285 not mapped then :py:obj:`~.cudaErrorUnknown` is returned.

25286

25287 Parameters

25288 ----------

25289 resource : :py:obj:`~.cudaGraphicsResource_t`

25290 Mapped resource to access

25291 arrayIndex : unsigned int

25292 Array index for array textures or cubemap face index as defined by

25293 :py:obj:`~.cudaGraphicsCubeFace` for cubemap textures for the

25294 subresource to access

25295 mipLevel : unsigned int

25296 Mipmap level for the subresource to access

25297

25298 Returns

25299 -------

25300 cudaError_t

25301 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

25302 array : :py:obj:`~.cudaArray_t`

25303 Returned array through which a subresource of `resource` may be

25304 accessed

25305

25306 See Also

25307 --------

25308 :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`

25309 """

25310 cdef cyruntime.cudaGraphicsResource_t cyresource

25311 if resource is None:

25312 presource = 0

25313 elif isinstance(resource, (cudaGraphicsResource_t,)):

25314 presource = int(resource)

25315 else:

25316 presource = int(cudaGraphicsResource_t(resource))

25317 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource

25318 cdef cudaArray_t array = cudaArray_t()

25319 with nogil:

25320 err = cyruntime.cudaGraphicsSubResourceGetMappedArray(<cyruntime.cudaArray_t*>array._pvt_ptr, cyresource, arrayIndex, mipLevel)

25321 if err != cyruntime.cudaSuccess:

25322 return (_dict_cudaError_t[err], None)

25323 return (_dict_cudaError_t[err], array)

25324

25325@cython.embedsignature(True)

25326def cudaGraphicsResourceGetMappedMipmappedArray(resource):

25327 """ Get a mipmapped array through which to access a mapped graphics resource.

25328

25329 Returns in `*mipmappedArray` a mipmapped array through which the mapped

25330 graphics resource `resource` may be accessed. The value set in

25331 `mipmappedArray` may change every time that `resource` is mapped.

25332

25333 If `resource` is not a texture then it cannot be accessed via an array

25334 and :py:obj:`~.cudaErrorUnknown` is returned. If `resource` is not

25335 mapped then :py:obj:`~.cudaErrorUnknown` is returned.

25336

25337 Parameters

25338 ----------

25339 resource : :py:obj:`~.cudaGraphicsResource_t`

25340 Mapped resource to access

25341

25342 Returns

25343 -------

25344 cudaError_t

25345 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

25346 mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`

25347 Returned mipmapped array through which `resource` may be accessed

25348

25349 See Also

25350 --------

25351 :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedMipmappedArray`

25352 """

25353 cdef cyruntime.cudaGraphicsResource_t cyresource

25354 if resource is None:

25355 presource = 0

25356 elif isinstance(resource, (cudaGraphicsResource_t,)):

25357 presource = int(resource)

25358 else:

25359 presource = int(cudaGraphicsResource_t(resource))

25360 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource

25361 cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t()

25362 with nogil:

25363 err = cyruntime.cudaGraphicsResourceGetMappedMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmappedArray._pvt_ptr, cyresource)

25364 if err != cyruntime.cudaSuccess:

25365 return (_dict_cudaError_t[err], None)

25366 return (_dict_cudaError_t[err], mipmappedArray)

25367

25368@cython.embedsignature(True)

25369def cudaGetChannelDesc(array):

25370 """ Get the channel descriptor of an array.

25371

25372 Returns in `*desc` the channel descriptor of the CUDA array `array`.

25373

25374 Parameters

25375 ----------

25376 array : :py:obj:`~.cudaArray_const_t`

25377 Memory array on device

25378

25379 Returns

25380 -------

25381 cudaError_t

25382 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25383 desc : :py:obj:`~.cudaChannelFormatDesc`

25384 Channel format

25385

25386 See Also

25387 --------

25388 :py:obj:`~.cudaCreateChannelDesc (C API)`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject`

25389 """

25390 cdef cyruntime.cudaArray_const_t cyarray

25391 if array is None:

25392 parray = 0

25393 elif isinstance(array, (cudaArray_const_t,)):

25394 parray = int(array)

25395 else:

25396 parray = int(cudaArray_const_t(array))

25397 cyarray = <cyruntime.cudaArray_const_t><void_ptr>parray

25398 cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc()

25399 with nogil:

25400 err = cyruntime.cudaGetChannelDesc(<cyruntime.cudaChannelFormatDesc*>desc._pvt_ptr, cyarray)

25401 if err != cyruntime.cudaSuccess:

25402 return (_dict_cudaError_t[err], None)

25403 return (_dict_cudaError_t[err], desc)

25404

25405@cython.embedsignature(True)

25406def cudaCreateChannelDesc(int x, int y, int z, int w, f not None : cudaChannelFormatKind):

25407 """ Returns a channel descriptor using the specified format.

25408

25409 Returns a channel descriptor with format `f` and number of bits of each

25410 component `x`, `y`, `z`, and `w`. The :py:obj:`~.cudaChannelFormatDesc`

25411 is defined as:

25412

25413 **View CUDA Toolkit Documentation for a C++ code example**

25414

25415 where :py:obj:`~.cudaChannelFormatKind` is one of

25416 :py:obj:`~.cudaChannelFormatKindSigned`,

25417 :py:obj:`~.cudaChannelFormatKindUnsigned`, or

25418 :py:obj:`~.cudaChannelFormatKindFloat`.

25419

25420 Parameters

25421 ----------

25422 x : int

25423 X component

25424 y : int

25425 Y component

25426 z : int

25427 Z component

25428 w : int

25429 W component

25430 f : :py:obj:`~.cudaChannelFormatKind`

25431 Channel format

25432

25433 Returns

25434 -------

25435 cudaError_t.cudaSuccess

25436 cudaError_t.cudaSuccess

25437 :py:obj:`~.cudaChannelFormatDesc`

25438 Channel descriptor with format `f`

25439

25440 See Also

25441 --------

25442 cudaCreateChannelDesc (C++ API), :py:obj:`~.cudaGetChannelDesc`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject`

25443 """

25444 cdef cyruntime.cudaChannelFormatKind cyf = f.value

25445 with nogil:

25446 err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf)

25447 cdef cudaChannelFormatDesc wrapper = cudaChannelFormatDesc()

25448 wrapper._pvt_ptr[0] = err

25449 return (cudaError_t.cudaSuccess, wrapper)

25450

25451@cython.embedsignature(True)

25452def cudaCreateTextureObject(pResDesc : Optional[cudaResourceDesc], pTexDesc : Optional[cudaTextureDesc], pResViewDesc : Optional[cudaResourceViewDesc]):

25453 """ Creates a texture object.

25454

25455 Creates a texture object and returns it in `pTexObject`. `pResDesc`

25456 describes the data to texture from. `pTexDesc` describes how the data

25457 should be sampled. `pResViewDesc` is an optional argument that

25458 specifies an alternate format for the data described by `pResDesc`, and

25459 also describes the subresource region to restrict access to when

25460 texturing. `pResViewDesc` can only be specified if the type of resource

25461 is a CUDA array or a CUDA mipmapped array not in a block compressed

25462 format.

25463

25464 Texture objects are only supported on devices of compute capability 3.0

25465 or higher. Additionally, a texture object is an opaque value, and, as

25466 such, should only be accessed through CUDA API calls.

25467

25468 The :py:obj:`~.cudaResourceDesc` structure is defined as:

25469

25470 **View CUDA Toolkit Documentation for a C++ code example**

25471

25472 where:

25473

25474 - :py:obj:`~.cudaResourceDesc.resType` specifies the type of resource

25475 to texture from. CUresourceType is defined as:

25476

25477 - **View CUDA Toolkit Documentation for a C++ code example**

25478

25479 If :py:obj:`~.cudaResourceDesc.resType` is set to

25480 :py:obj:`~.cudaResourceTypeArray`,

25481 :py:obj:`~.cudaResourceDesc`::res::array::array must be set to a valid

25482 CUDA array handle.

25483

25484 If :py:obj:`~.cudaResourceDesc.resType` is set to

25485 :py:obj:`~.cudaResourceTypeMipmappedArray`,

25486 :py:obj:`~.cudaResourceDesc`::res::mipmap::mipmap must be set to a

25487 valid CUDA mipmapped array handle and

25488 :py:obj:`~.cudaTextureDesc.normalizedCoords` must be set to true.

25489

25490 If :py:obj:`~.cudaResourceDesc.resType` is set to

25491 :py:obj:`~.cudaResourceTypeLinear`,

25492 :py:obj:`~.cudaResourceDesc`::res::linear::devPtr must be set to a

25493 valid device pointer, that is aligned to

25494 :py:obj:`~.cudaDeviceProp.textureAlignment`.

25495 :py:obj:`~.cudaResourceDesc`::res::linear::desc describes the format

25496 and the number of components per array element.

25497 :py:obj:`~.cudaResourceDesc`::res::linear::sizeInBytes specifies the

25498 size of the array in bytes. The total number of elements in the linear

25499 address range cannot exceed

25500 :py:obj:`~.cudaDeviceGetTexture1DLinearMaxWidth()`. The number of

25501 elements is computed as (sizeInBytes / sizeof(desc)).

25502

25503 If :py:obj:`~.cudaResourceDesc.resType` is set to

25504 :py:obj:`~.cudaResourceTypePitch2D`,

25505 :py:obj:`~.cudaResourceDesc`::res::pitch2D::devPtr must be set to a

25506 valid device pointer, that is aligned to

25507 :py:obj:`~.cudaDeviceProp.textureAlignment`.

25508 :py:obj:`~.cudaResourceDesc`::res::pitch2D::desc describes the format

25509 and the number of components per array element.

25510 :py:obj:`~.cudaResourceDesc`::res::pitch2D::width and

25511 :py:obj:`~.cudaResourceDesc`::res::pitch2D::height specify the width

25512 and height of the array in elements, and cannot exceed

25513 :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[0] and

25514 :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[1] respectively.

25515 :py:obj:`~.cudaResourceDesc`::res::pitch2D::pitchInBytes specifies the

25516 pitch between two rows in bytes and has to be aligned to

25517 :py:obj:`~.cudaDeviceProp.texturePitchAlignment`. Pitch cannot exceed

25518 :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[2].

25519

25520 The :py:obj:`~.cudaTextureDesc` struct is defined as

25521

25522 **View CUDA Toolkit Documentation for a C++ code example**

25523

25524 where

25525

25526 - :py:obj:`~.cudaTextureDesc.addressMode` specifies the addressing mode

25527 for each dimension of the texture data.

25528 :py:obj:`~.cudaTextureAddressMode` is defined as:

25529

25530 - **View CUDA Toolkit Documentation for a C++ code example**

25531

25532 - This is ignored if :py:obj:`~.cudaResourceDesc.resType` is

25533 :py:obj:`~.cudaResourceTypeLinear`. Also, if

25534 :py:obj:`~.cudaTextureDesc.normalizedCoords` is set to zero,

25535 :py:obj:`~.cudaAddressModeWrap` and :py:obj:`~.cudaAddressModeMirror`

25536 won't be supported and will be switched to

25537 :py:obj:`~.cudaAddressModeClamp`.

25538

25539 - :py:obj:`~.cudaTextureDesc.filterMode` specifies the filtering mode

25540 to be used when fetching from the texture.

25541 :py:obj:`~.cudaTextureFilterMode` is defined as:

25542

25543 - **View CUDA Toolkit Documentation for a C++ code example**

25544

25545 - This is ignored if :py:obj:`~.cudaResourceDesc.resType` is

25546 :py:obj:`~.cudaResourceTypeLinear`.

25547

25548 - :py:obj:`~.cudaTextureDesc.readMode` specifies whether integer data

25549 should be converted to floating point or not.

25550 :py:obj:`~.cudaTextureReadMode` is defined as:

25551

25552 - **View CUDA Toolkit Documentation for a C++ code example**

25553

25554 - Note that this applies only to 8-bit and 16-bit integer formats.

25555 32-bit integer format would not be promoted, regardless of whether or

25556 not this :py:obj:`~.cudaTextureDesc.readMode` is set

25557 :py:obj:`~.cudaReadModeNormalizedFloat` is specified.

25558

25559 - :py:obj:`~.cudaTextureDesc.sRGB` specifies whether sRGB to linear

25560 conversion should be performed during texture fetch.

25561

25562 - :py:obj:`~.cudaTextureDesc.borderColor` specifies the float values of

25563 color. where: :py:obj:`~.cudaTextureDesc.borderColor`[0] contains

25564 value of 'R', :py:obj:`~.cudaTextureDesc.borderColor`[1] contains

25565 value of 'G', :py:obj:`~.cudaTextureDesc.borderColor`[2] contains

25566 value of 'B', :py:obj:`~.cudaTextureDesc.borderColor`[3] contains

25567 value of 'A' Note that application using integer border color values

25568 will need to <reinterpret_cast> these values to float. The values are

25569 set only when the addressing mode specified by

25570 :py:obj:`~.cudaTextureDesc.addressMode` is cudaAddressModeBorder.

25571

25572 - :py:obj:`~.cudaTextureDesc.normalizedCoords` specifies whether the

25573 texture coordinates will be normalized or not.

25574

25575 - :py:obj:`~.cudaTextureDesc.maxAnisotropy` specifies the maximum

25576 anistropy ratio to be used when doing anisotropic filtering. This

25577 value will be clamped to the range [1,16].

25578

25579 - :py:obj:`~.cudaTextureDesc.mipmapFilterMode` specifies the filter

25580 mode when the calculated mipmap level lies between two defined mipmap

25581 levels.

25582

25583 - :py:obj:`~.cudaTextureDesc.mipmapLevelBias` specifies the offset to

25584 be applied to the calculated mipmap level.

25585

25586 - :py:obj:`~.cudaTextureDesc.minMipmapLevelClamp` specifies the lower

25587 end of the mipmap level range to clamp access to.

25588

25589 - :py:obj:`~.cudaTextureDesc.maxMipmapLevelClamp` specifies the upper

25590 end of the mipmap level range to clamp access to.

25591

25592 - :py:obj:`~.cudaTextureDesc.disableTrilinearOptimization` specifies

25593 whether the trilinear filtering optimizations will be disabled.

25594

25595 - :py:obj:`~.cudaTextureDesc.seamlessCubemap` specifies whether

25596 seamless cube map filtering is enabled. This flag can only be

25597 specified if the underlying resource is a CUDA array or a CUDA

25598 mipmapped array that was created with the flag

25599 :py:obj:`~.cudaArrayCubemap`. When seamless cube map filtering is

25600 enabled, texture address modes specified by

25601 :py:obj:`~.cudaTextureDesc.addressMode` are ignored. Instead, if the

25602 :py:obj:`~.cudaTextureDesc.filterMode` is set to

25603 :py:obj:`~.cudaFilterModePoint` the address mode

25604 :py:obj:`~.cudaAddressModeClamp` will be applied for all dimensions.

25605 If the :py:obj:`~.cudaTextureDesc.filterMode` is set to

25606 :py:obj:`~.cudaFilterModeLinear` seamless cube map filtering will be

25607 performed when sampling along the cube face borders.

25608

25609 The :py:obj:`~.cudaResourceViewDesc` struct is defined as

25610

25611 **View CUDA Toolkit Documentation for a C++ code example**

25612

25613 where:

25614

25615 - :py:obj:`~.cudaResourceViewDesc.format` specifies how the data

25616 contained in the CUDA array or CUDA mipmapped array should be

25617 interpreted. Note that this can incur a change in size of the texture

25618 data. If the resource view format is a block compressed format, then

25619 the underlying CUDA array or CUDA mipmapped array has to have a

25620 32-bit unsigned integer format with 2 or 4 channels, depending on the

25621 block compressed format. For ex., BC1 and BC4 require the underlying

25622 CUDA array to have a 32-bit unsigned int with 2 channels. The other

25623 BC formats require the underlying resource to have the same 32-bit

25624 unsigned int format but with 4 channels.

25625

25626 - :py:obj:`~.cudaResourceViewDesc.width` specifies the new width of the

25627 texture data. If the resource view format is a block compressed

25628 format, this value has to be 4 times the original width of the

25629 resource. For non block compressed formats, this value has to be

25630 equal to that of the original resource.

25631

25632 - :py:obj:`~.cudaResourceViewDesc.height` specifies the new height of

25633 the texture data. If the resource view format is a block compressed

25634 format, this value has to be 4 times the original height of the

25635 resource. For non block compressed formats, this value has to be

25636 equal to that of the original resource.

25637

25638 - :py:obj:`~.cudaResourceViewDesc.depth` specifies the new depth of the

25639 texture data. This value has to be equal to that of the original

25640 resource.

25641

25642 - :py:obj:`~.cudaResourceViewDesc.firstMipmapLevel` specifies the most

25643 detailed mipmap level. This will be the new mipmap level zero. For

25644 non-mipmapped resources, this value has to be

25645 zero.:py:obj:`~.cudaTextureDesc.minMipmapLevelClamp` and

25646 :py:obj:`~.cudaTextureDesc.maxMipmapLevelClamp` will be relative to

25647 this value. For ex., if the firstMipmapLevel is set to 2, and a

25648 minMipmapLevelClamp of 1.2 is specified, then the actual minimum

25649 mipmap level clamp will be 3.2.

25650

25651 - :py:obj:`~.cudaResourceViewDesc.lastMipmapLevel` specifies the least

25652 detailed mipmap level. For non-mipmapped resources, this value has to

25653 be zero.

25654

25655 - :py:obj:`~.cudaResourceViewDesc.firstLayer` specifies the first layer

25656 index for layered textures. This will be the new layer zero. For non-

25657 layered resources, this value has to be zero.

25658

25659 - :py:obj:`~.cudaResourceViewDesc.lastLayer` specifies the last layer

25660 index for layered textures. For non-layered resources, this value has

25661 to be zero.

25662

25663 Parameters

25664 ----------

25665 pResDesc : :py:obj:`~.cudaResourceDesc`

25666 Resource descriptor

25667 pTexDesc : :py:obj:`~.cudaTextureDesc`

25668 Texture descriptor

25669 pResViewDesc : :py:obj:`~.cudaResourceViewDesc`

25670 Resource view descriptor

25671

25672 Returns

25673 -------

25674 cudaError_t

25675 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25676 pTexObject : :py:obj:`~.cudaTextureObject_t`

25677 Texture object to create

25678

25679 See Also

25680 --------

25681 :py:obj:`~.cudaDestroyTextureObject`, :py:obj:`~.cuTexObjectCreate`

25682 """

25683 cdef cudaTextureObject_t pTexObject = cudaTextureObject_t()

25684 cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL

25685 cdef cyruntime.cudaTextureDesc* cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc is not None else NULL

25686 cdef cyruntime.cudaResourceViewDesc* cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc is not None else NULL

25687 with nogil:

25688 err = cyruntime.cudaCreateTextureObject(<cyruntime.cudaTextureObject_t*>pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr)

25689 if err != cyruntime.cudaSuccess:

25690 return (_dict_cudaError_t[err], None)

25691 return (_dict_cudaError_t[err], pTexObject)

25692

25693@cython.embedsignature(True)

25694def cudaDestroyTextureObject(texObject):

25695 """ Destroys a texture object.

25696

25697 Destroys the texture object specified by `texObject`.

25698

25699 Parameters

25700 ----------

25701 texObject : :py:obj:`~.cudaTextureObject_t`

25702 Texture object to destroy

25703

25704 Returns

25705 -------

25706 cudaError_t

25707 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25708

25709 See Also

25710 --------

25711 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectDestroy`

25712 """

25713 cdef cyruntime.cudaTextureObject_t cytexObject

25714 if texObject is None:

25715 ptexObject = 0

25716 elif isinstance(texObject, (cudaTextureObject_t,)):

25717 ptexObject = int(texObject)

25718 else:

25719 ptexObject = int(cudaTextureObject_t(texObject))

25720 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject

25721 with nogil:

25722 err = cyruntime.cudaDestroyTextureObject(cytexObject)

25723 return (_dict_cudaError_t[err],)

25724

25725@cython.embedsignature(True)

25726def cudaGetTextureObjectResourceDesc(texObject):

25727 """ Returns a texture object's resource descriptor.

25728

25729 Returns the resource descriptor for the texture object specified by

25730 `texObject`.

25731

25732 Parameters

25733 ----------

25734 texObject : :py:obj:`~.cudaTextureObject_t`

25735 Texture object

25736

25737 Returns

25738 -------

25739 cudaError_t

25740 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25741 pResDesc : :py:obj:`~.cudaResourceDesc`

25742 Resource descriptor

25743

25744 See Also

25745 --------

25746 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceDesc`

25747 """

25748 cdef cyruntime.cudaTextureObject_t cytexObject

25749 if texObject is None:

25750 ptexObject = 0

25751 elif isinstance(texObject, (cudaTextureObject_t,)):

25752 ptexObject = int(texObject)

25753 else:

25754 ptexObject = int(cudaTextureObject_t(texObject))

25755 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject

25756 cdef cudaResourceDesc pResDesc = cudaResourceDesc()

25757 with nogil:

25758 err = cyruntime.cudaGetTextureObjectResourceDesc(<cyruntime.cudaResourceDesc*>pResDesc._pvt_ptr, cytexObject)

25759 if err != cyruntime.cudaSuccess:

25760 return (_dict_cudaError_t[err], None)

25761 return (_dict_cudaError_t[err], pResDesc)

25762

25763@cython.embedsignature(True)

25764def cudaGetTextureObjectTextureDesc(texObject):

25765 """ Returns a texture object's texture descriptor.

25766

25767 Returns the texture descriptor for the texture object specified by

25768 `texObject`.

25769

25770 Parameters

25771 ----------

25772 texObject : :py:obj:`~.cudaTextureObject_t`

25773 Texture object

25774

25775 Returns

25776 -------

25777 cudaError_t

25778 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25779 pTexDesc : :py:obj:`~.cudaTextureDesc`

25780 Texture descriptor

25781

25782 See Also

25783 --------

25784 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetTextureDesc`

25785 """

25786 cdef cyruntime.cudaTextureObject_t cytexObject

25787 if texObject is None:

25788 ptexObject = 0

25789 elif isinstance(texObject, (cudaTextureObject_t,)):

25790 ptexObject = int(texObject)

25791 else:

25792 ptexObject = int(cudaTextureObject_t(texObject))

25793 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject

25794 cdef cudaTextureDesc pTexDesc = cudaTextureDesc()

25795 with nogil:

25796 err = cyruntime.cudaGetTextureObjectTextureDesc(<cyruntime.cudaTextureDesc*>pTexDesc._pvt_ptr, cytexObject)

25797 if err != cyruntime.cudaSuccess:

25798 return (_dict_cudaError_t[err], None)

25799 return (_dict_cudaError_t[err], pTexDesc)

25800

25801@cython.embedsignature(True)

25802def cudaGetTextureObjectResourceViewDesc(texObject):

25803 """ Returns a texture object's resource view descriptor.

25804

25805 Returns the resource view descriptor for the texture object specified

25806 by `texObject`. If no resource view was specified,

25807 :py:obj:`~.cudaErrorInvalidValue` is returned.

25808

25809 Parameters

25810 ----------

25811 texObject : :py:obj:`~.cudaTextureObject_t`

25812 Texture object

25813

25814 Returns

25815 -------

25816 cudaError_t

25817 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25818 pResViewDesc : :py:obj:`~.cudaResourceViewDesc`

25819 Resource view descriptor

25820

25821 See Also

25822 --------

25823 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceViewDesc`

25824 """

25825 cdef cyruntime.cudaTextureObject_t cytexObject

25826 if texObject is None:

25827 ptexObject = 0

25828 elif isinstance(texObject, (cudaTextureObject_t,)):

25829 ptexObject = int(texObject)

25830 else:

25831 ptexObject = int(cudaTextureObject_t(texObject))

25832 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject

25833 cdef cudaResourceViewDesc pResViewDesc = cudaResourceViewDesc()

25834 with nogil:

25835 err = cyruntime.cudaGetTextureObjectResourceViewDesc(<cyruntime.cudaResourceViewDesc*>pResViewDesc._pvt_ptr, cytexObject)

25836 if err != cyruntime.cudaSuccess:

25837 return (_dict_cudaError_t[err], None)

25838 return (_dict_cudaError_t[err], pResViewDesc)

25839

25840@cython.embedsignature(True)

25841def cudaCreateSurfaceObject(pResDesc : Optional[cudaResourceDesc]):

25842 """ Creates a surface object.

25843

25844 Creates a surface object and returns it in `pSurfObject`. `pResDesc`

25845 describes the data to perform surface load/stores on.

25846 :py:obj:`~.cudaResourceDesc.resType` must be

25847 :py:obj:`~.cudaResourceTypeArray` and

25848 :py:obj:`~.cudaResourceDesc`::res::array::array must be set to a valid

25849 CUDA array handle.

25850

25851 Surface objects are only supported on devices of compute capability 3.0

25852 or higher. Additionally, a surface object is an opaque value, and, as

25853 such, should only be accessed through CUDA API calls.

25854

25855 Parameters

25856 ----------

25857 pResDesc : :py:obj:`~.cudaResourceDesc`

25858 Resource descriptor

25859

25860 Returns

25861 -------

25862 cudaError_t

25863 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidResourceHandle`

25864 pSurfObject : :py:obj:`~.cudaSurfaceObject_t`

25865 Surface object to create

25866

25867 See Also

25868 --------

25869 :py:obj:`~.cudaDestroySurfaceObject`, :py:obj:`~.cuSurfObjectCreate`

25870 """

25871 cdef cudaSurfaceObject_t pSurfObject = cudaSurfaceObject_t()

25872 cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL

25873 with nogil:

25874 err = cyruntime.cudaCreateSurfaceObject(<cyruntime.cudaSurfaceObject_t*>pSurfObject._pvt_ptr, cypResDesc_ptr)

25875 if err != cyruntime.cudaSuccess:

25876 return (_dict_cudaError_t[err], None)

25877 return (_dict_cudaError_t[err], pSurfObject)

25878

25879@cython.embedsignature(True)

25880def cudaDestroySurfaceObject(surfObject):

25881 """ Destroys a surface object.

25882

25883 Destroys the surface object specified by `surfObject`.

25884

25885 Parameters

25886 ----------

25887 surfObject : :py:obj:`~.cudaSurfaceObject_t`

25888 Surface object to destroy

25889

25890 Returns

25891 -------

25892 cudaError_t

25893 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25894

25895 See Also

25896 --------

25897 :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectDestroy`

25898 """

25899 cdef cyruntime.cudaSurfaceObject_t cysurfObject

25900 if surfObject is None:

25901 psurfObject = 0

25902 elif isinstance(surfObject, (cudaSurfaceObject_t,)):

25903 psurfObject = int(surfObject)

25904 else:

25905 psurfObject = int(cudaSurfaceObject_t(surfObject))

25906 cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject

25907 with nogil:

25908 err = cyruntime.cudaDestroySurfaceObject(cysurfObject)

25909 return (_dict_cudaError_t[err],)

25910

25911@cython.embedsignature(True)

25912def cudaGetSurfaceObjectResourceDesc(surfObject):

25913 """ Returns a surface object's resource descriptor Returns the resource descriptor for the surface object specified by `surfObject`.

25914

25915 Parameters

25916 ----------

25917 surfObject : :py:obj:`~.cudaSurfaceObject_t`

25918 Surface object

25919

25920 Returns

25921 -------

25922 cudaError_t

25923 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25924 pResDesc : :py:obj:`~.cudaResourceDesc`

25925 Resource descriptor

25926

25927 See Also

25928 --------

25929 :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectGetResourceDesc`

25930 """

25931 cdef cyruntime.cudaSurfaceObject_t cysurfObject

25932 if surfObject is None:

25933 psurfObject = 0

25934 elif isinstance(surfObject, (cudaSurfaceObject_t,)):

25935 psurfObject = int(surfObject)

25936 else:

25937 psurfObject = int(cudaSurfaceObject_t(surfObject))

25938 cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject

25939 cdef cudaResourceDesc pResDesc = cudaResourceDesc()

25940 with nogil:

25941 err = cyruntime.cudaGetSurfaceObjectResourceDesc(<cyruntime.cudaResourceDesc*>pResDesc._pvt_ptr, cysurfObject)

25942 if err != cyruntime.cudaSuccess:

25943 return (_dict_cudaError_t[err], None)

25944 return (_dict_cudaError_t[err], pResDesc)

25945

25946@cython.embedsignature(True)

25947def cudaDriverGetVersion():

25948 """ Returns the latest version of CUDA supported by the driver.

25949

25950 Returns in `*driverVersion` the latest version of CUDA supported by the

25951 driver. The version is returned as (1000 * major + 10 * minor). For

25952 example, CUDA 9.2 would be represented by 9020. If no driver is

25953 installed, then 0 is returned as the driver version.

25954

25955 This function automatically returns :py:obj:`~.cudaErrorInvalidValue`

25956 if `driverVersion` is NULL.

25957

25958 Returns

25959 -------

25960 cudaError_t

25961 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25962 driverVersion : int

25963 Returns the CUDA driver version.

25964

25965 See Also

25966 --------

25967 :py:obj:`~.cudaRuntimeGetVersion`, :py:obj:`~.cuDriverGetVersion`

25968 """

25969 cdef int driverVersion = 0

25970 with nogil:

25971 err = cyruntime.cudaDriverGetVersion(&driverVersion)

25972 if err != cyruntime.cudaSuccess:

25973 return (_dict_cudaError_t[err], None)

25974 return (_dict_cudaError_t[err], driverVersion)

25975

25976@cython.embedsignature(True)

25977def cudaRuntimeGetVersion():

25978 """ Returns the CUDA Runtime version.

25979

25980 Returns in `*runtimeVersion` the version number of the current CUDA

25981 Runtime instance. The version is returned as (1000 * major + 10 *

25982 minor). For example, CUDA 9.2 would be represented by 9020.

25983

25984 As of CUDA 12.0, this function no longer initializes CUDA. The purpose

25985 of this API is solely to return a compile-time constant stating the

25986 CUDA Toolkit version in the above format.

25987

25988 This function automatically returns :py:obj:`~.cudaErrorInvalidValue`

25989 if the `runtimeVersion` argument is NULL.

25990

25991 Returns

25992 -------

25993 cudaError_t

25994 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

25995 runtimeVersion : int

25996 Returns the CUDA Runtime version.

25997

25998 See Also

25999 --------

26000 :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion`

26001 """

26002 cdef int runtimeVersion = 0

26003 with nogil:

26004 err = cyruntime.cudaRuntimeGetVersion(&runtimeVersion)

26005 if err != cyruntime.cudaSuccess:

26006 return (_dict_cudaError_t[err], None)

26007 return (_dict_cudaError_t[err], runtimeVersion)

26008

26009@cython.embedsignature(True)

26010def cudaLogsRegisterCallback(callbackFunc, userData):

26011 """ Register a callback function to receive error log messages.

26012

26013 Parameters

26014 ----------

26015 callbackFunc : :py:obj:`~.cudaLogsCallback_t`

26016 The function to register as a callback

26017 userData : Any

26018 A generic pointer to user data. This is passed into the callback

26019 function.

26020

26021 Returns

26022 -------

26023 cudaError_t

26024 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

26025 callback_out : :py:obj:`~.cudaLogsCallbackHandle`

26026 Optional location to store the callback handle after it is

26027 registered

26028 """

26029 cdef cyruntime.cudaLogsCallback_t cycallbackFunc

26030 if callbackFunc is None:

26031 pcallbackFunc = 0

26032 elif isinstance(callbackFunc, (cudaLogsCallback_t,)):

26033 pcallbackFunc = int(callbackFunc)

26034 else:

26035 pcallbackFunc = int(cudaLogsCallback_t(callbackFunc))

26036 cycallbackFunc = <cyruntime.cudaLogsCallback_t><void_ptr>pcallbackFunc

26037 cyuserData = _HelperInputVoidPtr(userData)

26038 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr

26039 cdef cudaLogsCallbackHandle callback_out = cudaLogsCallbackHandle()

26040 with nogil:

26041 err = cyruntime.cudaLogsRegisterCallback(cycallbackFunc, cyuserData_ptr, <cyruntime.cudaLogsCallbackHandle*>callback_out._pvt_ptr)

26042 if err != cyruntime.cudaSuccess:

26043 return (_dict_cudaError_t[err], None)

26044 return (_dict_cudaError_t[err], callback_out)

26045

26046@cython.embedsignature(True)

26047def cudaLogsUnregisterCallback(callback):

26048 """ Unregister a log message callback.

26049

26050 Parameters

26051 ----------

26052 callback : :py:obj:`~.cudaLogsCallbackHandle`

26053 The callback instance to unregister from receiving log messages

26054

26055 Returns

26056 -------

26057 cudaError_t

26058 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

26059 """

26060 cdef cyruntime.cudaLogsCallbackHandle cycallback

26061 if callback is None:

26062 pcallback = 0

26063 elif isinstance(callback, (cudaLogsCallbackHandle,)):

26064 pcallback = int(callback)

26065 else:

26066 pcallback = int(cudaLogsCallbackHandle(callback))

26067 cycallback = <cyruntime.cudaLogsCallbackHandle><void_ptr>pcallback

26068 with nogil:

26069 err = cyruntime.cudaLogsUnregisterCallback(cycallback)

26070 return (_dict_cudaError_t[err],)

26071

26072@cython.embedsignature(True)

26073def cudaLogsCurrent(unsigned int flags):

26074 """ Sets log iterator to point to the end of log buffer, where the next message would be written.

26075

26076 Parameters

26077 ----------

26078 flags : unsigned int

26079 Reserved for future use, must be 0

26080

26081 Returns

26082 -------

26083 cudaError_t

26084 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

26085 iterator_out : :py:obj:`~.cudaLogIterator`

26086 Location to store an iterator to the current tail of the logs

26087 """

26088 cdef cudaLogIterator iterator_out = cudaLogIterator()

26089 with nogil:

26090 err = cyruntime.cudaLogsCurrent(<cyruntime.cudaLogIterator*>iterator_out._pvt_ptr, flags)

26091 if err != cyruntime.cudaSuccess:

26092 return (_dict_cudaError_t[err], None)

26093 return (_dict_cudaError_t[err], iterator_out)

26094

26095@cython.embedsignature(True)

26096def cudaLogsDumpToFile(iterator : Optional[cudaLogIterator], char* pathToFile, unsigned int flags):

26097 """ Dump accumulated driver logs into a file.

26098

26099 Logs generated by the driver are stored in an internal buffer and can

26100 be copied out using this API. This API dumps all driver logs starting

26101 from `iterator` into `pathToFile` provided.

26102

26103 Parameters

26104 ----------

26105 iterator : :py:obj:`~.cudaLogIterator`

26106 Optional auto-advancing iterator specifying the starting log to

26107 read. NULL value dumps all logs.

26108 pathToFile : bytes

26109 Path to output file for dumping logs

26110 flags : unsigned int

26111 Reserved for future use, must be 0

26112

26113 Returns

26114 -------

26115 cudaError_t

26116 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

26117 iterator : :py:obj:`~.cudaLogIterator`

26118 Optional auto-advancing iterator specifying the starting log to

26119 read. NULL value dumps all logs.

26120

26121 Notes

26122 -----

26123 `iterator` is auto-advancing. Dumping logs will update the value of `iterator` to receive the next generated log.

26124

26125 The driver reserves limited memory for storing logs. The oldest logs may be overwritten and become unrecoverable. An indication will appear in the destination outupt if the logs have been truncated. Call dump after each failed API to mitigate this risk.

26126 """

26127 cdef cyruntime.cudaLogIterator* cyiterator = NULL

26128 if iterator is not None:

26129 cyiterator = iterator._pvt_ptr

26130 with nogil:

26131 err = cyruntime.cudaLogsDumpToFile(cyiterator, pathToFile, flags)

26132 if err != cyruntime.cudaSuccess:

26133 return (_dict_cudaError_t[err], None)

26134 return (_dict_cudaError_t[err], iterator)

26135

26136@cython.embedsignature(True)

26137def cudaLogsDumpToMemory(iterator : Optional[cudaLogIterator], char* buffer, size_t size, unsigned int flags):

26138 """ Dump accumulated driver logs into a buffer.

26139

26140 Logs generated by the driver are stored in an internal buffer and can

26141 be copied out using this API. This API dumps driver logs from

26142 `iterator` into `buffer` up to the size specified in `*size`. The

26143 driver will always null terminate the buffer but there will not be a

26144 null character between log entries, only a newline \n. The driver will

26145 then return the actual number of bytes written in `*size`, excluding

26146 the null terminator. If there are no messages to dump, `*size` will be

26147 set to 0 and the function will return :py:obj:`~.CUDA_SUCCESS`. If the

26148 provided `buffer` is not large enough to hold any messages, `*size`

26149 will be set to 0 and the function will return

26150 :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.

26151

26152 Parameters

26153 ----------

26154 iterator : :py:obj:`~.cudaLogIterator`

26155 Optional auto-advancing iterator specifying the starting log to

26156 read. NULL value dumps all logs.

26157 buffer : bytes

26158 Pointer to dump logs

26159 size : int

26160 See description

26161 flags : unsigned int

26162 Reserved for future use, must be 0

26163

26164 Returns

26165 -------

26166 cudaError_t

26167 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

26168 iterator : :py:obj:`~.cudaLogIterator`

26169 Optional auto-advancing iterator specifying the starting log to

26170 read. NULL value dumps all logs.

26171 size : int

26172 See description

26173

26174 Notes

26175 -----

26176 `iterator` is auto-advancing. Dumping logs will update the value of `iterator` to receive the next generated log.

26177

26178 The driver reserves limited memory for storing logs. The maximum size of the buffer is 25600 bytes. The oldest logs may be overwritten and become unrecoverable. An indication will appear in the destination outupt if the logs have been truncated. Call dump after each failed API to mitigate this risk.

26179

26180 If the provided value in `*size` is not large enough to hold all buffered messages, a message will be added at the head of the buffer indicating this. The driver then computes the number of messages it is able to store in `buffer` and writes it out. The final message in `buffer` will always be the most recent log message as of when the API is called.

26181 """

26182 cdef cyruntime.cudaLogIterator* cyiterator = NULL

26183 if iterator is not None:

26184 cyiterator = iterator._pvt_ptr

26185 with nogil:

26186 err = cyruntime.cudaLogsDumpToMemory(cyiterator, buffer, &size, flags)

26187 if err != cyruntime.cudaSuccess:

26188 return (_dict_cudaError_t[err], None, None)

26189 return (_dict_cudaError_t[err], iterator, size)

26190

26191@cython.embedsignature(True)

26192def cudaGraphCreate(unsigned int flags):

26193 """ Creates a graph.

26194

26195 Creates an empty graph, which is returned via `pGraph`.

26196

26197 Parameters

26198 ----------

26199 flags : unsigned int

26200 Graph creation flags, must be 0

26201

26202 Returns

26203 -------

26204 cudaError_t

26205 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

26206 pGraph : :py:obj:`~.cudaGraph_t`

26207 Returns newly created graph

26208

26209 See Also

26210 --------

26211 :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphDestroy`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphClone`

26212 """

26213 cdef cudaGraph_t pGraph = cudaGraph_t()

26214 with nogil:

26215 err = cyruntime.cudaGraphCreate(<cyruntime.cudaGraph_t*>pGraph._pvt_ptr, flags)

26216 if err != cyruntime.cudaSuccess:

26217 return (_dict_cudaError_t[err], None)

26218 return (_dict_cudaError_t[err], pGraph)

26219

26220@cython.embedsignature(True)

26221def cudaGraphAddKernelNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pNodeParams : Optional[cudaKernelNodeParams]):

26222 """ Creates a kernel execution node and adds it to a graph.

26223

26224 Creates a new kernel execution node and adds it to `graph` with

26225 `numDependencies` dependencies specified via `pDependencies` and

26226 arguments specified in `pNodeParams`. It is possible for

26227 `numDependencies` to be 0, in which case the node will be placed at the

26228 root of the graph. `pDependencies` may not have any duplicate entries.

26229 A handle to the new node will be returned in `pGraphNode`.

26230

26231 The :py:obj:`~.cudaKernelNodeParams` structure is defined as:

26232

26233 **View CUDA Toolkit Documentation for a C++ code example**

26234

26235 When the graph is launched, the node will invoke kernel `func` on a

26236 (`gridDim.x` x `gridDim.y` x `gridDim.z`) grid of blocks. Each block

26237 contains (`blockDim.x` x `blockDim.y` x `blockDim.z`) threads.

26238

26239 `sharedMem` sets the amount of dynamic shared memory that will be

26240 available to each thread block.

26241

26242 Kernel parameters to `func` can be specified in one of two ways:

26243

26244 1) Kernel parameters can be specified via `kernelParams`. If the kernel

26245 has N parameters, then `kernelParams` needs to be an array of N

26246 pointers. Each pointer, from `kernelParams`[0] to `kernelParams`[N-1],

26247 points to the region of memory from which the actual parameter will be

26248 copied. The number of kernel parameters and their offsets and sizes do

26249 not need to be specified as that information is retrieved directly from

26250 the kernel's image.

26251

26252 2) Kernel parameters can also be packaged by the application into a

26253 single buffer that is passed in via `extra`. This places the burden on

26254 the application of knowing each kernel parameter's size and

26255 alignment/padding within the buffer. The `extra` parameter exists to

26256 allow this function to take additional less commonly used arguments.

26257 `extra` specifies a list of names of extra settings and their

26258 corresponding values. Each extra setting name is immediately followed

26259 by the corresponding value. The list must be terminated with either

26260 NULL or CU_LAUNCH_PARAM_END.

26261

26262 - :py:obj:`~.CU_LAUNCH_PARAM_END`, which indicates the end of the

26263 `extra` array;

26264

26265 - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`, which specifies that the

26266 next value in `extra` will be a pointer to a buffer containing all

26267 the kernel parameters for launching kernel `func`;

26268

26269 - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE`, which specifies that the

26270 next value in `extra` will be a pointer to a size_t containing the

26271 size of the buffer specified with

26272 :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`;

26273

26274 The error :py:obj:`~.cudaErrorInvalidValue` will be returned if kernel

26275 parameters are specified with both `kernelParams` and `extra` (i.e.

26276 both `kernelParams` and `extra` are non-NULL).

26277

26278 The `kernelParams` or `extra` array, as well as the argument values it

26279 points to, are copied during this call.

26280

26281 Parameters

26282 ----------

26283 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

26284 Graph to which to add the node

26285 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

26286 Dependencies of the node

26287 numDependencies : size_t

26288 Number of dependencies

26289 pNodeParams : :py:obj:`~.cudaKernelNodeParams`

26290 Parameters for the GPU execution node

26291

26292 Returns

26293 -------

26294 cudaError_t

26295 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`

26296 pGraphNode : :py:obj:`~.cudaGraphNode_t`

26297 Returns newly created node

26298

26299 See Also

26300 --------

26301 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

26302

26303 Notes

26304 -----

26305 Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.

26306 """

26307 pDependencies = [] if pDependencies is None else pDependencies

26308 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

26309 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

26310 cdef cyruntime.cudaGraph_t cygraph

26311 if graph is None:

26312 pgraph = 0

26313 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

26314 pgraph = int(graph)

26315 else:

26316 pgraph = int(cudaGraph_t(graph))

26317 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

26318 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

26319 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

26320 if len(pDependencies) > 1:

26321 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

26322 if cypDependencies is NULL:

26323 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

26324 else:

26325 for idx in range(len(pDependencies)):

26326 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

26327 elif len(pDependencies) == 1:

26328 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

26329 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

26330 cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

26331 with nogil:

26332 err = cyruntime.cudaGraphAddKernelNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr)

26333 if len(pDependencies) > 1 and cypDependencies is not NULL:

26334 free(cypDependencies)

26335 if err != cyruntime.cudaSuccess:

26336 return (_dict_cudaError_t[err], None)

26337 return (_dict_cudaError_t[err], pGraphNode)

26338

26339@cython.embedsignature(True)

26340def cudaGraphKernelNodeGetParams(node):

26341 """ Returns a kernel node's parameters.

26342

26343 Returns the parameters of kernel node `node` in `pNodeParams`. The

26344 `kernelParams` or `extra` array returned in `pNodeParams`, as well as

26345 the argument values it points to, are owned by the node. This memory

26346 remains valid until the node is destroyed or its parameters are

26347 modified, and should not be modified directly. Use

26348 :py:obj:`~.cudaGraphKernelNodeSetParams` to update the parameters of

26349 this node.

26350

26351 The params will contain either `kernelParams` or `extra`, according to

26352 which of these was most recently set on the node.

26353

26354 Parameters

26355 ----------

26356 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26357 Node to get the parameters for

26358

26359 Returns

26360 -------

26361 cudaError_t

26362 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`

26363 pNodeParams : :py:obj:`~.cudaKernelNodeParams`

26364 Pointer to return the parameters

26365

26366 See Also

26367 --------

26368 :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`

26369 """

26370 cdef cyruntime.cudaGraphNode_t cynode

26371 if node is None:

26372 pnode = 0

26373 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26374 pnode = int(node)

26375 else:

26376 pnode = int(cudaGraphNode_t(node))

26377 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26378 cdef cudaKernelNodeParams pNodeParams = cudaKernelNodeParams()

26379 with nogil:

26380 err = cyruntime.cudaGraphKernelNodeGetParams(cynode, <cyruntime.cudaKernelNodeParams*>pNodeParams._pvt_ptr)

26381 if err != cyruntime.cudaSuccess:

26382 return (_dict_cudaError_t[err], None)

26383 return (_dict_cudaError_t[err], pNodeParams)

26384

26385@cython.embedsignature(True)

26386def cudaGraphKernelNodeSetParams(node, pNodeParams : Optional[cudaKernelNodeParams]):

26387 """ Sets a kernel node's parameters.

26388

26389 Sets the parameters of kernel node `node` to `pNodeParams`.

26390

26391 Parameters

26392 ----------

26393 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26394 Node to set the parameters for

26395 pNodeParams : :py:obj:`~.cudaKernelNodeParams`

26396 Parameters to copy

26397

26398 Returns

26399 -------

26400 cudaError_t

26401 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorMemoryAllocation`

26402

26403 See Also

26404 --------

26405 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeGetParams`

26406 """

26407 cdef cyruntime.cudaGraphNode_t cynode

26408 if node is None:

26409 pnode = 0

26410 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26411 pnode = int(node)

26412 else:

26413 pnode = int(cudaGraphNode_t(node))

26414 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26415 cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

26416 with nogil:

26417 err = cyruntime.cudaGraphKernelNodeSetParams(cynode, cypNodeParams_ptr)

26418 return (_dict_cudaError_t[err],)

26419

26420@cython.embedsignature(True)

26421def cudaGraphKernelNodeCopyAttributes(hDst, hSrc):

26422 """ Copies attributes from source node to destination node.

26423

26424 Copies attributes from source node `hSrc` to destination node `hDst`.

26425 Both node must have the same context.

26426

26427 Parameters

26428 ----------

26429 hDst : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26430 Destination node

26431 hSrc : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26432 Source node For list of attributes see

26433 :py:obj:`~.cudaKernelNodeAttrID`

26434

26435 Returns

26436 -------

26437 cudaError_t

26438 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidContext`

26439

26440 See Also

26441 --------

26442 :py:obj:`~.cudaAccessPolicyWindow`

26443 """

26444 cdef cyruntime.cudaGraphNode_t cyhSrc

26445 if hSrc is None:

26446 phSrc = 0

26447 elif isinstance(hSrc, (cudaGraphNode_t,driver.CUgraphNode)):

26448 phSrc = int(hSrc)

26449 else:

26450 phSrc = int(cudaGraphNode_t(hSrc))

26451 cyhSrc = <cyruntime.cudaGraphNode_t><void_ptr>phSrc

26452 cdef cyruntime.cudaGraphNode_t cyhDst

26453 if hDst is None:

26454 phDst = 0

26455 elif isinstance(hDst, (cudaGraphNode_t,driver.CUgraphNode)):

26456 phDst = int(hDst)

26457 else:

26458 phDst = int(cudaGraphNode_t(hDst))

26459 cyhDst = <cyruntime.cudaGraphNode_t><void_ptr>phDst

26460 with nogil:

26461 err = cyruntime.cudaGraphKernelNodeCopyAttributes(cyhDst, cyhSrc)

26462 return (_dict_cudaError_t[err],)

26463

26464@cython.embedsignature(True)

26465def cudaGraphKernelNodeGetAttribute(hNode, attr not None : cudaKernelNodeAttrID):

26466 """ Queries node attribute.

26467

26468 Queries attribute `attr` from node `hNode` and stores it in

26469 corresponding member of `value_out`.

26470

26471 Parameters

26472 ----------

26473 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26474

26475 attr : :py:obj:`~.cudaKernelNodeAttrID`

26476

26477

26478 Returns

26479 -------

26480 cudaError_t

26481 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

26482 value_out : :py:obj:`~.cudaKernelNodeAttrValue`

26483

26484

26485 See Also

26486 --------

26487 :py:obj:`~.cudaAccessPolicyWindow`

26488 """

26489 cdef cyruntime.cudaGraphNode_t cyhNode

26490 if hNode is None:

26491 phNode = 0

26492 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

26493 phNode = int(hNode)

26494 else:

26495 phNode = int(cudaGraphNode_t(hNode))

26496 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

26497 cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value

26498 cdef cudaKernelNodeAttrValue value_out = cudaKernelNodeAttrValue()

26499 with nogil:

26500 err = cyruntime.cudaGraphKernelNodeGetAttribute(cyhNode, cyattr, <cyruntime.cudaKernelNodeAttrValue*>value_out._pvt_ptr)

26501 if err != cyruntime.cudaSuccess:

26502 return (_dict_cudaError_t[err], None)

26503 return (_dict_cudaError_t[err], value_out)

26504

26505@cython.embedsignature(True)

26506def cudaGraphKernelNodeSetAttribute(hNode, attr not None : cudaKernelNodeAttrID, value : Optional[cudaKernelNodeAttrValue]):

26507 """ Sets node attribute.

26508

26509 Sets attribute `attr` on node `hNode` from corresponding attribute of

26510 `value`.

26511

26512 Parameters

26513 ----------

26514 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26515

26516 attr : :py:obj:`~.cudaKernelNodeAttrID`

26517

26518 value : :py:obj:`~.cudaKernelNodeAttrValue`

26519

26520

26521 Returns

26522 -------

26523 cudaError_t

26524 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

26525

26526 See Also

26527 --------

26528 :py:obj:`~.cudaAccessPolicyWindow`

26529 """

26530 cdef cyruntime.cudaGraphNode_t cyhNode

26531 if hNode is None:

26532 phNode = 0

26533 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

26534 phNode = int(hNode)

26535 else:

26536 phNode = int(cudaGraphNode_t(hNode))

26537 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

26538 cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value

26539 cdef cyruntime.cudaKernelNodeAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL

26540 with nogil:

26541 err = cyruntime.cudaGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr)

26542 return (_dict_cudaError_t[err],)

26543

26544@cython.embedsignature(True)

26545def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pCopyParams : Optional[cudaMemcpy3DParms]):

26546 """ Creates a memcpy node and adds it to a graph.

26547

26548 Creates a new memcpy node and adds it to `graph` with `numDependencies`

26549 dependencies specified via `pDependencies`. It is possible for

26550 `numDependencies` to be 0, in which case the node will be placed at the

26551 root of the graph. `pDependencies` may not have any duplicate entries.

26552 A handle to the new node will be returned in `pGraphNode`.

26553

26554 When the graph is launched, the node will perform the memcpy described

26555 by `pCopyParams`. See :py:obj:`~.cudaMemcpy3D()` for a description of

26556 the structure and its restrictions.

26557

26558 Memcpy nodes have some additional restrictions with regards to managed

26559 memory, if the system contains at least one device which has a zero

26560 value for the device attribute

26561 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.

26562

26563 Parameters

26564 ----------

26565 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

26566 Graph to which to add the node

26567 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

26568 Dependencies of the node

26569 numDependencies : size_t

26570 Number of dependencies

26571 pCopyParams : :py:obj:`~.cudaMemcpy3DParms`

26572 Parameters for the memory copy

26573

26574 Returns

26575 -------

26576 cudaError_t

26577 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

26578 pGraphNode : :py:obj:`~.cudaGraphNode_t`

26579 Returns newly created node

26580

26581 See Also

26582 --------

26583 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNodeToSymbol`, :py:obj:`~.cudaGraphAddMemcpyNodeFromSymbol`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode`

26584 """

26585 pDependencies = [] if pDependencies is None else pDependencies

26586 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

26587 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

26588 cdef cyruntime.cudaGraph_t cygraph

26589 if graph is None:

26590 pgraph = 0

26591 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

26592 pgraph = int(graph)

26593 else:

26594 pgraph = int(cudaGraph_t(graph))

26595 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

26596 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

26597 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

26598 if len(pDependencies) > 1:

26599 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

26600 if cypDependencies is NULL:

26601 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

26602 else:

26603 for idx in range(len(pDependencies)):

26604 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

26605 elif len(pDependencies) == 1:

26606 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

26607 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

26608 cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr = pCopyParams._pvt_ptr if pCopyParams is not None else NULL

26609 with nogil:

26610 err = cyruntime.cudaGraphAddMemcpyNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypCopyParams_ptr)

26611 if len(pDependencies) > 1 and cypDependencies is not NULL:

26612 free(cypDependencies)

26613 if err != cyruntime.cudaSuccess:

26614 return (_dict_cudaError_t[err], None)

26615 return (_dict_cudaError_t[err], pGraphNode)

26616

26617@cython.embedsignature(True)

26618def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, dst, src, size_t count, kind not None : cudaMemcpyKind):

26619 """ Creates a 1D memcpy node and adds it to a graph.

26620

26621 Creates a new 1D memcpy node and adds it to `graph` with

26622 `numDependencies` dependencies specified via `pDependencies`. It is

26623 possible for `numDependencies` to be 0, in which case the node will be

26624 placed at the root of the graph. `pDependencies` may not have any

26625 duplicate entries. A handle to the new node will be returned in

26626 `pGraphNode`.

26627

26628 When the graph is launched, the node will copy `count` bytes from the

26629 memory area pointed to by `src` to the memory area pointed to by `dst`,

26630 where `kind` specifies the direction of the copy, and must be one of

26631 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

26632 :py:obj:`~.cudaMemcpyDeviceToHost`,

26633 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

26634 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

26635 type of transfer is inferred from the pointer values. However,

26636 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

26637 unified virtual addressing. Launching a memcpy node with dst and src

26638 pointers that do not match the direction of the copy results in an

26639 undefined behavior.

26640

26641 Memcpy nodes have some additional restrictions with regards to managed

26642 memory, if the system contains at least one device which has a zero

26643 value for the device attribute

26644 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.

26645

26646 Parameters

26647 ----------

26648 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

26649 Graph to which to add the node

26650 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

26651 Dependencies of the node

26652 numDependencies : size_t

26653 Number of dependencies

26654 dst : Any

26655 Destination memory address

26656 src : Any

26657 Source memory address

26658 count : size_t

26659 Size in bytes to copy

26660 kind : :py:obj:`~.cudaMemcpyKind`

26661 Type of transfer

26662

26663 Returns

26664 -------

26665 cudaError_t

26666 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

26667 pGraphNode : :py:obj:`~.cudaGraphNode_t`

26668 Returns newly created node

26669

26670 See Also

26671 --------

26672 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode`

26673 """

26674 pDependencies = [] if pDependencies is None else pDependencies

26675 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

26676 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

26677 cdef cyruntime.cudaGraph_t cygraph

26678 if graph is None:

26679 pgraph = 0

26680 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

26681 pgraph = int(graph)

26682 else:

26683 pgraph = int(cudaGraph_t(graph))

26684 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

26685 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

26686 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

26687 if len(pDependencies) > 1:

26688 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

26689 if cypDependencies is NULL:

26690 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

26691 else:

26692 for idx in range(len(pDependencies)):

26693 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

26694 elif len(pDependencies) == 1:

26695 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

26696 cydst = _HelperInputVoidPtr(dst)

26697 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

26698 cysrc = _HelperInputVoidPtr(src)

26699 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

26700 cdef cyruntime.cudaMemcpyKind cykind = kind.value

26701 with nogil:

26702 err = cyruntime.cudaGraphAddMemcpyNode1D(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydst_ptr, cysrc_ptr, count, cykind)

26703 if len(pDependencies) > 1 and cypDependencies is not NULL:

26704 free(cypDependencies)

26705 if err != cyruntime.cudaSuccess:

26706 return (_dict_cudaError_t[err], None)

26707 return (_dict_cudaError_t[err], pGraphNode)

26708

26709@cython.embedsignature(True)

26710def cudaGraphMemcpyNodeGetParams(node):

26711 """ Returns a memcpy node's parameters.

26712

26713 Returns the parameters of memcpy node `node` in `pNodeParams`.

26714

26715 Parameters

26716 ----------

26717 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26718 Node to get the parameters for

26719

26720 Returns

26721 -------

26722 cudaError_t

26723 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

26724 pNodeParams : :py:obj:`~.cudaMemcpy3DParms`

26725 Pointer to return the parameters

26726

26727 See Also

26728 --------

26729 :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`

26730 """

26731 cdef cyruntime.cudaGraphNode_t cynode

26732 if node is None:

26733 pnode = 0

26734 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26735 pnode = int(node)

26736 else:

26737 pnode = int(cudaGraphNode_t(node))

26738 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26739 cdef cudaMemcpy3DParms pNodeParams = cudaMemcpy3DParms()

26740 with nogil:

26741 err = cyruntime.cudaGraphMemcpyNodeGetParams(cynode, <cyruntime.cudaMemcpy3DParms*>pNodeParams._pvt_ptr)

26742 if err != cyruntime.cudaSuccess:

26743 return (_dict_cudaError_t[err], None)

26744 return (_dict_cudaError_t[err], pNodeParams)

26745

26746@cython.embedsignature(True)

26747def cudaGraphMemcpyNodeSetParams(node, pNodeParams : Optional[cudaMemcpy3DParms]):

26748 """ Sets a memcpy node's parameters.

26749

26750 Sets the parameters of memcpy node `node` to `pNodeParams`.

26751

26752 Parameters

26753 ----------

26754 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26755 Node to set the parameters for

26756 pNodeParams : :py:obj:`~.cudaMemcpy3DParms`

26757 Parameters to copy

26758

26759 Returns

26760 -------

26761 cudaError_t

26762 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

26763

26764 See Also

26765 --------

26766 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`

26767 """

26768 cdef cyruntime.cudaGraphNode_t cynode

26769 if node is None:

26770 pnode = 0

26771 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26772 pnode = int(node)

26773 else:

26774 pnode = int(cudaGraphNode_t(node))

26775 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26776 cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

26777 with nogil:

26778 err = cyruntime.cudaGraphMemcpyNodeSetParams(cynode, cypNodeParams_ptr)

26779 return (_dict_cudaError_t[err],)

26780

26781@cython.embedsignature(True)

26782def cudaGraphMemcpyNodeSetParams1D(node, dst, src, size_t count, kind not None : cudaMemcpyKind):

26783 """ Sets a memcpy node's parameters to perform a 1-dimensional copy.

26784

26785 Sets the parameters of memcpy node `node` to the copy described by the

26786 provided parameters.

26787

26788 When the graph is launched, the node will copy `count` bytes from the

26789 memory area pointed to by `src` to the memory area pointed to by `dst`,

26790 where `kind` specifies the direction of the copy, and must be one of

26791 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,

26792 :py:obj:`~.cudaMemcpyDeviceToHost`,

26793 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.

26794 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the

26795 type of transfer is inferred from the pointer values. However,

26796 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support

26797 unified virtual addressing. Launching a memcpy node with dst and src

26798 pointers that do not match the direction of the copy results in an

26799 undefined behavior.

26800

26801 Parameters

26802 ----------

26803 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26804 Node to set the parameters for

26805 dst : Any

26806 Destination memory address

26807 src : Any

26808 Source memory address

26809 count : size_t

26810 Size in bytes to copy

26811 kind : :py:obj:`~.cudaMemcpyKind`

26812 Type of transfer

26813

26814 Returns

26815 -------

26816 cudaError_t

26817 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

26818

26819 See Also

26820 --------

26821 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`

26822 """

26823 cdef cyruntime.cudaGraphNode_t cynode

26824 if node is None:

26825 pnode = 0

26826 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26827 pnode = int(node)

26828 else:

26829 pnode = int(cudaGraphNode_t(node))

26830 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26831 cydst = _HelperInputVoidPtr(dst)

26832 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

26833 cysrc = _HelperInputVoidPtr(src)

26834 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

26835 cdef cyruntime.cudaMemcpyKind cykind = kind.value

26836 with nogil:

26837 err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst_ptr, cysrc_ptr, count, cykind)

26838 return (_dict_cudaError_t[err],)

26839

26840@cython.embedsignature(True)

26841def cudaGraphAddMemsetNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pMemsetParams : Optional[cudaMemsetParams]):

26842 """ Creates a memset node and adds it to a graph.

26843

26844 Creates a new memset node and adds it to `graph` with `numDependencies`

26845 dependencies specified via `pDependencies`. It is possible for

26846 `numDependencies` to be 0, in which case the node will be placed at the

26847 root of the graph. `pDependencies` may not have any duplicate entries.

26848 A handle to the new node will be returned in `pGraphNode`.

26849

26850 The element size must be 1, 2, or 4 bytes. When the graph is launched,

26851 the node will perform the memset described by `pMemsetParams`.

26852

26853 Parameters

26854 ----------

26855 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

26856 Graph to which to add the node

26857 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

26858 Dependencies of the node

26859 numDependencies : size_t

26860 Number of dependencies

26861 pMemsetParams : :py:obj:`~.cudaMemsetParams`

26862 Parameters for the memory set

26863

26864 Returns

26865 -------

26866 cudaError_t

26867 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

26868 pGraphNode : :py:obj:`~.cudaGraphNode_t`

26869 Returns newly created node

26870

26871 See Also

26872 --------

26873 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`

26874 """

26875 pDependencies = [] if pDependencies is None else pDependencies

26876 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

26877 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

26878 cdef cyruntime.cudaGraph_t cygraph

26879 if graph is None:

26880 pgraph = 0

26881 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

26882 pgraph = int(graph)

26883 else:

26884 pgraph = int(cudaGraph_t(graph))

26885 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

26886 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

26887 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

26888 if len(pDependencies) > 1:

26889 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

26890 if cypDependencies is NULL:

26891 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

26892 else:

26893 for idx in range(len(pDependencies)):

26894 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

26895 elif len(pDependencies) == 1:

26896 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

26897 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

26898 cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr = pMemsetParams._pvt_ptr if pMemsetParams is not None else NULL

26899 with nogil:

26900 err = cyruntime.cudaGraphAddMemsetNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypMemsetParams_ptr)

26901 if len(pDependencies) > 1 and cypDependencies is not NULL:

26902 free(cypDependencies)

26903 if err != cyruntime.cudaSuccess:

26904 return (_dict_cudaError_t[err], None)

26905 return (_dict_cudaError_t[err], pGraphNode)

26906

26907@cython.embedsignature(True)

26908def cudaGraphMemsetNodeGetParams(node):

26909 """ Returns a memset node's parameters.

26910

26911 Returns the parameters of memset node `node` in `pNodeParams`.

26912

26913 Parameters

26914 ----------

26915 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26916 Node to get the parameters for

26917

26918 Returns

26919 -------

26920 cudaError_t

26921 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

26922 pNodeParams : :py:obj:`~.cudaMemsetParams`

26923 Pointer to return the parameters

26924

26925 See Also

26926 --------

26927 :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`

26928 """

26929 cdef cyruntime.cudaGraphNode_t cynode

26930 if node is None:

26931 pnode = 0

26932 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26933 pnode = int(node)

26934 else:

26935 pnode = int(cudaGraphNode_t(node))

26936 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26937 cdef cudaMemsetParams pNodeParams = cudaMemsetParams()

26938 with nogil:

26939 err = cyruntime.cudaGraphMemsetNodeGetParams(cynode, <cyruntime.cudaMemsetParams*>pNodeParams._pvt_ptr)

26940 if err != cyruntime.cudaSuccess:

26941 return (_dict_cudaError_t[err], None)

26942 return (_dict_cudaError_t[err], pNodeParams)

26943

26944@cython.embedsignature(True)

26945def cudaGraphMemsetNodeSetParams(node, pNodeParams : Optional[cudaMemsetParams]):

26946 """ Sets a memset node's parameters.

26947

26948 Sets the parameters of memset node `node` to `pNodeParams`.

26949

26950 Parameters

26951 ----------

26952 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

26953 Node to set the parameters for

26954 pNodeParams : :py:obj:`~.cudaMemsetParams`

26955 Parameters to copy

26956

26957 Returns

26958 -------

26959 cudaError_t

26960 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

26961

26962 See Also

26963 --------

26964 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeGetParams`

26965 """

26966 cdef cyruntime.cudaGraphNode_t cynode

26967 if node is None:

26968 pnode = 0

26969 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

26970 pnode = int(node)

26971 else:

26972 pnode = int(cudaGraphNode_t(node))

26973 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

26974 cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

26975 with nogil:

26976 err = cyruntime.cudaGraphMemsetNodeSetParams(cynode, cypNodeParams_ptr)

26977 return (_dict_cudaError_t[err],)

26978

26979@cython.embedsignature(True)

26980def cudaGraphAddHostNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pNodeParams : Optional[cudaHostNodeParams]):

26981 """ Creates a host execution node and adds it to a graph.

26982

26983 Creates a new CPU execution node and adds it to `graph` with

26984 `numDependencies` dependencies specified via `pDependencies` and

26985 arguments specified in `pNodeParams`. It is possible for

26986 `numDependencies` to be 0, in which case the node will be placed at the

26987 root of the graph. `pDependencies` may not have any duplicate entries.

26988 A handle to the new node will be returned in `pGraphNode`.

26989

26990 When the graph is launched, the node will invoke the specified CPU

26991 function. Host nodes are not supported under MPS with pre-Volta GPUs.

26992

26993 Parameters

26994 ----------

26995 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

26996 Graph to which to add the node

26997 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

26998 Dependencies of the node

26999 numDependencies : size_t

27000 Number of dependencies

27001 pNodeParams : :py:obj:`~.cudaHostNodeParams`

27002 Parameters for the host node

27003

27004 Returns

27005 -------

27006 cudaError_t

27007 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`

27008 pGraphNode : :py:obj:`~.cudaGraphNode_t`

27009 Returns newly created node

27010

27011 See Also

27012 --------

27013 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27014 """

27015 pDependencies = [] if pDependencies is None else pDependencies

27016 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27017 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27018 cdef cyruntime.cudaGraph_t cygraph

27019 if graph is None:

27020 pgraph = 0

27021 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27022 pgraph = int(graph)

27023 else:

27024 pgraph = int(cudaGraph_t(graph))

27025 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27026 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27027 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27028 if len(pDependencies) > 1:

27029 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27030 if cypDependencies is NULL:

27031 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27032 else:

27033 for idx in range(len(pDependencies)):

27034 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27035 elif len(pDependencies) == 1:

27036 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27037 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27038 cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

27039 with nogil:

27040 err = cyruntime.cudaGraphAddHostNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr)

27041 if len(pDependencies) > 1 and cypDependencies is not NULL:

27042 free(cypDependencies)

27043 if err != cyruntime.cudaSuccess:

27044 return (_dict_cudaError_t[err], None)

27045 return (_dict_cudaError_t[err], pGraphNode)

27046

27047@cython.embedsignature(True)

27048def cudaGraphHostNodeGetParams(node):

27049 """ Returns a host node's parameters.

27050

27051 Returns the parameters of host node `node` in `pNodeParams`.

27052

27053 Parameters

27054 ----------

27055 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27056 Node to get the parameters for

27057

27058 Returns

27059 -------

27060 cudaError_t

27061 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27062 pNodeParams : :py:obj:`~.cudaHostNodeParams`

27063 Pointer to return the parameters

27064

27065 See Also

27066 --------

27067 :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`

27068 """

27069 cdef cyruntime.cudaGraphNode_t cynode

27070 if node is None:

27071 pnode = 0

27072 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27073 pnode = int(node)

27074 else:

27075 pnode = int(cudaGraphNode_t(node))

27076 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27077 cdef cudaHostNodeParams pNodeParams = cudaHostNodeParams()

27078 with nogil:

27079 err = cyruntime.cudaGraphHostNodeGetParams(cynode, <cyruntime.cudaHostNodeParams*>pNodeParams._pvt_ptr)

27080 if err != cyruntime.cudaSuccess:

27081 return (_dict_cudaError_t[err], None)

27082 return (_dict_cudaError_t[err], pNodeParams)

27083

27084@cython.embedsignature(True)

27085def cudaGraphHostNodeSetParams(node, pNodeParams : Optional[cudaHostNodeParams]):

27086 """ Sets a host node's parameters.

27087

27088 Sets the parameters of host node `node` to `nodeParams`.

27089

27090 Parameters

27091 ----------

27092 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27093 Node to set the parameters for

27094 pNodeParams : :py:obj:`~.cudaHostNodeParams`

27095 Parameters to copy

27096

27097 Returns

27098 -------

27099 cudaError_t

27100 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27101

27102 See Also

27103 --------

27104 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeGetParams`

27105 """

27106 cdef cyruntime.cudaGraphNode_t cynode

27107 if node is None:

27108 pnode = 0

27109 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27110 pnode = int(node)

27111 else:

27112 pnode = int(cudaGraphNode_t(node))

27113 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27114 cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

27115 with nogil:

27116 err = cyruntime.cudaGraphHostNodeSetParams(cynode, cypNodeParams_ptr)

27117 return (_dict_cudaError_t[err],)

27118

27119@cython.embedsignature(True)

27120def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, childGraph):

27121 """ Creates a child graph node and adds it to a graph.

27122

27123 Creates a new node which executes an embedded graph, and adds it to

27124 `graph` with `numDependencies` dependencies specified via

27125 `pDependencies`. It is possible for `numDependencies` to be 0, in which

27126 case the node will be placed at the root of the graph. `pDependencies`

27127 may not have any duplicate entries. A handle to the new node will be

27128 returned in `pGraphNode`.

27129

27130 If `childGraph` contains allocation nodes, free nodes, or conditional

27131 nodes, this call will return an error.

27132

27133 The node executes an embedded child graph. The child graph is cloned in

27134 this call.

27135

27136 Parameters

27137 ----------

27138 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27139 Graph to which to add the node

27140 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

27141 Dependencies of the node

27142 numDependencies : size_t

27143 Number of dependencies

27144 childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27145 The graph to clone into this node

27146

27147 Returns

27148 -------

27149 cudaError_t

27150 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27151 pGraphNode : :py:obj:`~.cudaGraphNode_t`

27152 Returns newly created node

27153

27154 See Also

27155 --------

27156 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphClone`

27157 """

27158 cdef cyruntime.cudaGraph_t cychildGraph

27159 if childGraph is None:

27160 pchildGraph = 0

27161 elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)):

27162 pchildGraph = int(childGraph)

27163 else:

27164 pchildGraph = int(cudaGraph_t(childGraph))

27165 cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph

27166 pDependencies = [] if pDependencies is None else pDependencies

27167 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27168 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27169 cdef cyruntime.cudaGraph_t cygraph

27170 if graph is None:

27171 pgraph = 0

27172 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27173 pgraph = int(graph)

27174 else:

27175 pgraph = int(cudaGraph_t(graph))

27176 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27177 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27178 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27179 if len(pDependencies) > 1:

27180 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27181 if cypDependencies is NULL:

27182 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27183 else:

27184 for idx in range(len(pDependencies)):

27185 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27186 elif len(pDependencies) == 1:

27187 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27188 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27189 with nogil:

27190 err = cyruntime.cudaGraphAddChildGraphNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cychildGraph)

27191 if len(pDependencies) > 1 and cypDependencies is not NULL:

27192 free(cypDependencies)

27193 if err != cyruntime.cudaSuccess:

27194 return (_dict_cudaError_t[err], None)

27195 return (_dict_cudaError_t[err], pGraphNode)

27196

27197@cython.embedsignature(True)

27198def cudaGraphChildGraphNodeGetGraph(node):

27199 """ Gets a handle to the embedded graph of a child graph node.

27200

27201 Gets a handle to the embedded graph in a child graph node. This call

27202 does not clone the graph. Changes to the graph will be reflected in the

27203 node, and the node retains ownership of the graph.

27204

27205 Allocation and free nodes cannot be added to the returned graph.

27206 Attempting to do so will return an error.

27207

27208 Parameters

27209 ----------

27210 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27211 Node to get the embedded graph for

27212

27213 Returns

27214 -------

27215 cudaError_t

27216 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27217 pGraph : :py:obj:`~.cudaGraph_t`

27218 Location to store a handle to the graph

27219

27220 See Also

27221 --------

27222 :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphNodeFindInClone`

27223 """

27224 cdef cyruntime.cudaGraphNode_t cynode

27225 if node is None:

27226 pnode = 0

27227 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27228 pnode = int(node)

27229 else:

27230 pnode = int(cudaGraphNode_t(node))

27231 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27232 cdef cudaGraph_t pGraph = cudaGraph_t()

27233 with nogil:

27234 err = cyruntime.cudaGraphChildGraphNodeGetGraph(cynode, <cyruntime.cudaGraph_t*>pGraph._pvt_ptr)

27235 if err != cyruntime.cudaSuccess:

27236 return (_dict_cudaError_t[err], None)

27237 return (_dict_cudaError_t[err], pGraph)

27238

27239@cython.embedsignature(True)

27240def cudaGraphAddEmptyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies):

27241 """ Creates an empty node and adds it to a graph.

27242

27243 Creates a new node which performs no operation, and adds it to `graph`

27244 with `numDependencies` dependencies specified via `pDependencies`. It

27245 is possible for `numDependencies` to be 0, in which case the node will

27246 be placed at the root of the graph. `pDependencies` may not have any

27247 duplicate entries. A handle to the new node will be returned in

27248 `pGraphNode`.

27249

27250 An empty node performs no operation during execution, but can be used

27251 for transitive ordering. For example, a phased execution graph with 2

27252 groups of n nodes with a barrier between them can be represented using

27253 an empty node and 2*n dependency edges, rather than no empty node and

27254 n^2 dependency edges.

27255

27256 Parameters

27257 ----------

27258 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27259 Graph to which to add the node

27260 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

27261 Dependencies of the node

27262 numDependencies : size_t

27263 Number of dependencies

27264

27265 Returns

27266 -------

27267 cudaError_t

27268 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27269 pGraphNode : :py:obj:`~.cudaGraphNode_t`

27270 Returns newly created node

27271

27272 See Also

27273 --------

27274 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27275 """

27276 pDependencies = [] if pDependencies is None else pDependencies

27277 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27278 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27279 cdef cyruntime.cudaGraph_t cygraph

27280 if graph is None:

27281 pgraph = 0

27282 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27283 pgraph = int(graph)

27284 else:

27285 pgraph = int(cudaGraph_t(graph))

27286 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27287 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27288 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27289 if len(pDependencies) > 1:

27290 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27291 if cypDependencies is NULL:

27292 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27293 else:

27294 for idx in range(len(pDependencies)):

27295 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27296 elif len(pDependencies) == 1:

27297 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27298 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27299 with nogil:

27300 err = cyruntime.cudaGraphAddEmptyNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies)

27301 if len(pDependencies) > 1 and cypDependencies is not NULL:

27302 free(cypDependencies)

27303 if err != cyruntime.cudaSuccess:

27304 return (_dict_cudaError_t[err], None)

27305 return (_dict_cudaError_t[err], pGraphNode)

27306

27307@cython.embedsignature(True)

27308def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, event):

27309 """ Creates an event record node and adds it to a graph.

27310

27311 Creates a new event record node and adds it to `hGraph` with

27312 `numDependencies` dependencies specified via `dependencies` and event

27313 specified in `event`. It is possible for `numDependencies` to be 0, in

27314 which case the node will be placed at the root of the graph.

27315 `dependencies` may not have any duplicate entries. A handle to the new

27316 node will be returned in `phGraphNode`.

27317

27318 Each launch of the graph will record `event` to capture execution of

27319 the node's dependencies.

27320

27321 These nodes may not be used in loops or conditionals.

27322

27323 Parameters

27324 ----------

27325 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27326 Graph to which to add the node

27327 dependencies : list[:py:obj:`~.cudaGraphNode_t`]

27328 Dependencies of the node

27329 numDependencies : size_t

27330 Number of dependencies

27331 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

27332 Event for the node

27333

27334 Returns

27335 -------

27336 cudaError_t

27337 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27338 phGraphNode : :py:obj:`~.cudaGraphNode_t`

27339 Returns newly created node

27340

27341 See Also

27342 --------

27343 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27344 """

27345 cdef cyruntime.cudaEvent_t cyevent

27346 if event is None:

27347 pevent = 0

27348 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

27349 pevent = int(event)

27350 else:

27351 pevent = int(cudaEvent_t(event))

27352 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

27353 pDependencies = [] if pDependencies is None else pDependencies

27354 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27355 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27356 cdef cyruntime.cudaGraph_t cygraph

27357 if graph is None:

27358 pgraph = 0

27359 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27360 pgraph = int(graph)

27361 else:

27362 pgraph = int(cudaGraph_t(graph))

27363 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27364 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27365 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27366 if len(pDependencies) > 1:

27367 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27368 if cypDependencies is NULL:

27369 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27370 else:

27371 for idx in range(len(pDependencies)):

27372 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27373 elif len(pDependencies) == 1:

27374 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27375 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27376 with nogil:

27377 err = cyruntime.cudaGraphAddEventRecordNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent)

27378 if len(pDependencies) > 1 and cypDependencies is not NULL:

27379 free(cypDependencies)

27380 if err != cyruntime.cudaSuccess:

27381 return (_dict_cudaError_t[err], None)

27382 return (_dict_cudaError_t[err], pGraphNode)

27383

27384@cython.embedsignature(True)

27385def cudaGraphEventRecordNodeGetEvent(node):

27386 """ Returns the event associated with an event record node.

27387

27388 Returns the event of event record node `hNode` in `event_out`.

27389

27390 Parameters

27391 ----------

27392 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27393 Node to get the event for

27394

27395 Returns

27396 -------

27397 cudaError_t

27398 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27399 event_out : :py:obj:`~.cudaEvent_t`

27400 Pointer to return the event

27401

27402 See Also

27403 --------

27404 :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`

27405 """

27406 cdef cyruntime.cudaGraphNode_t cynode

27407 if node is None:

27408 pnode = 0

27409 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27410 pnode = int(node)

27411 else:

27412 pnode = int(cudaGraphNode_t(node))

27413 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27414 cdef cudaEvent_t event_out = cudaEvent_t()

27415 with nogil:

27416 err = cyruntime.cudaGraphEventRecordNodeGetEvent(cynode, <cyruntime.cudaEvent_t*>event_out._pvt_ptr)

27417 if err != cyruntime.cudaSuccess:

27418 return (_dict_cudaError_t[err], None)

27419 return (_dict_cudaError_t[err], event_out)

27420

27421@cython.embedsignature(True)

27422def cudaGraphEventRecordNodeSetEvent(node, event):

27423 """ Sets an event record node's event.

27424

27425 Sets the event of event record node `hNode` to `event`.

27426

27427 Parameters

27428 ----------

27429 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27430 Node to set the event for

27431 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

27432 Event to use

27433

27434 Returns

27435 -------

27436 cudaError_t

27437 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27438

27439 See Also

27440 --------

27441 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`

27442 """

27443 cdef cyruntime.cudaEvent_t cyevent

27444 if event is None:

27445 pevent = 0

27446 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

27447 pevent = int(event)

27448 else:

27449 pevent = int(cudaEvent_t(event))

27450 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

27451 cdef cyruntime.cudaGraphNode_t cynode

27452 if node is None:

27453 pnode = 0

27454 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27455 pnode = int(node)

27456 else:

27457 pnode = int(cudaGraphNode_t(node))

27458 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27459 with nogil:

27460 err = cyruntime.cudaGraphEventRecordNodeSetEvent(cynode, cyevent)

27461 return (_dict_cudaError_t[err],)

27462

27463@cython.embedsignature(True)

27464def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, event):

27465 """ Creates an event wait node and adds it to a graph.

27466

27467 Creates a new event wait node and adds it to `hGraph` with

27468 `numDependencies` dependencies specified via `dependencies` and event

27469 specified in `event`. It is possible for `numDependencies` to be 0, in

27470 which case the node will be placed at the root of the graph.

27471 `dependencies` may not have any duplicate entries. A handle to the new

27472 node will be returned in `phGraphNode`.

27473

27474 The graph node will wait for all work captured in `event`. See

27475 :py:obj:`~.cuEventRecord()` for details on what is captured by an

27476 event. The synchronization will be performed efficiently on the device

27477 when applicable. `event` may be from a different context or device than

27478 the launch stream.

27479

27480 These nodes may not be used in loops or conditionals.

27481

27482 Parameters

27483 ----------

27484 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27485 Graph to which to add the node

27486 dependencies : list[:py:obj:`~.cudaGraphNode_t`]

27487 Dependencies of the node

27488 numDependencies : size_t

27489 Number of dependencies

27490 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

27491 Event for the node

27492

27493 Returns

27494 -------

27495 cudaError_t

27496 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27497 phGraphNode : :py:obj:`~.cudaGraphNode_t`

27498 Returns newly created node

27499

27500 See Also

27501 --------

27502 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27503 """

27504 cdef cyruntime.cudaEvent_t cyevent

27505 if event is None:

27506 pevent = 0

27507 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

27508 pevent = int(event)

27509 else:

27510 pevent = int(cudaEvent_t(event))

27511 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

27512 pDependencies = [] if pDependencies is None else pDependencies

27513 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27514 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27515 cdef cyruntime.cudaGraph_t cygraph

27516 if graph is None:

27517 pgraph = 0

27518 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27519 pgraph = int(graph)

27520 else:

27521 pgraph = int(cudaGraph_t(graph))

27522 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27523 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27524 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27525 if len(pDependencies) > 1:

27526 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27527 if cypDependencies is NULL:

27528 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27529 else:

27530 for idx in range(len(pDependencies)):

27531 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27532 elif len(pDependencies) == 1:

27533 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27534 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27535 with nogil:

27536 err = cyruntime.cudaGraphAddEventWaitNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent)

27537 if len(pDependencies) > 1 and cypDependencies is not NULL:

27538 free(cypDependencies)

27539 if err != cyruntime.cudaSuccess:

27540 return (_dict_cudaError_t[err], None)

27541 return (_dict_cudaError_t[err], pGraphNode)

27542

27543@cython.embedsignature(True)

27544def cudaGraphEventWaitNodeGetEvent(node):

27545 """ Returns the event associated with an event wait node.

27546

27547 Returns the event of event wait node `hNode` in `event_out`.

27548

27549 Parameters

27550 ----------

27551 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27552 Node to get the event for

27553

27554 Returns

27555 -------

27556 cudaError_t

27557 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27558 event_out : :py:obj:`~.cudaEvent_t`

27559 Pointer to return the event

27560

27561 See Also

27562 --------

27563 :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`

27564 """

27565 cdef cyruntime.cudaGraphNode_t cynode

27566 if node is None:

27567 pnode = 0

27568 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27569 pnode = int(node)

27570 else:

27571 pnode = int(cudaGraphNode_t(node))

27572 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27573 cdef cudaEvent_t event_out = cudaEvent_t()

27574 with nogil:

27575 err = cyruntime.cudaGraphEventWaitNodeGetEvent(cynode, <cyruntime.cudaEvent_t*>event_out._pvt_ptr)

27576 if err != cyruntime.cudaSuccess:

27577 return (_dict_cudaError_t[err], None)

27578 return (_dict_cudaError_t[err], event_out)

27579

27580@cython.embedsignature(True)

27581def cudaGraphEventWaitNodeSetEvent(node, event):

27582 """ Sets an event wait node's event.

27583

27584 Sets the event of event wait node `hNode` to `event`.

27585

27586 Parameters

27587 ----------

27588 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27589 Node to set the event for

27590 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

27591 Event to use

27592

27593 Returns

27594 -------

27595 cudaError_t

27596 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27597

27598 See Also

27599 --------

27600 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`

27601 """

27602 cdef cyruntime.cudaEvent_t cyevent

27603 if event is None:

27604 pevent = 0

27605 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

27606 pevent = int(event)

27607 else:

27608 pevent = int(cudaEvent_t(event))

27609 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

27610 cdef cyruntime.cudaGraphNode_t cynode

27611 if node is None:

27612 pnode = 0

27613 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

27614 pnode = int(node)

27615 else:

27616 pnode = int(cudaGraphNode_t(node))

27617 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

27618 with nogil:

27619 err = cyruntime.cudaGraphEventWaitNodeSetEvent(cynode, cyevent)

27620 return (_dict_cudaError_t[err],)

27621

27622@cython.embedsignature(True)

27623def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):

27624 """ Creates an external semaphore signal node and adds it to a graph.

27625

27626 Creates a new external semaphore signal node and adds it to `graph`

27627 with `numDependencies` dependencies specified via `dependencies` and

27628 arguments specified in `nodeParams`. It is possible for

27629 `numDependencies` to be 0, in which case the node will be placed at the

27630 root of the graph. `dependencies` may not have any duplicate entries. A

27631 handle to the new node will be returned in `pGraphNode`.

27632

27633 Performs a signal operation on a set of externally allocated semaphore

27634 objects when the node is launched. The operation(s) will occur after

27635 all of the node's dependencies have completed.

27636

27637 Parameters

27638 ----------

27639 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27640 Graph to which to add the node

27641 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

27642 Dependencies of the node

27643 numDependencies : size_t

27644 Number of dependencies

27645 nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`

27646 Parameters for the node

27647

27648 Returns

27649 -------

27650 cudaError_t

27651 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27652 pGraphNode : :py:obj:`~.cudaGraphNode_t`

27653 Returns newly created node

27654

27655 See Also

27656 --------

27657 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27658 """

27659 pDependencies = [] if pDependencies is None else pDependencies

27660 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27661 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27662 cdef cyruntime.cudaGraph_t cygraph

27663 if graph is None:

27664 pgraph = 0

27665 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27666 pgraph = int(graph)

27667 else:

27668 pgraph = int(cudaGraph_t(graph))

27669 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27670 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27671 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27672 if len(pDependencies) > 1:

27673 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27674 if cypDependencies is NULL:

27675 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27676 else:

27677 for idx in range(len(pDependencies)):

27678 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27679 elif len(pDependencies) == 1:

27680 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27681 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27682 cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

27683 with nogil:

27684 err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr)

27685 if len(pDependencies) > 1 and cypDependencies is not NULL:

27686 free(cypDependencies)

27687 if err != cyruntime.cudaSuccess:

27688 return (_dict_cudaError_t[err], None)

27689 return (_dict_cudaError_t[err], pGraphNode)

27690

27691@cython.embedsignature(True)

27692def cudaGraphExternalSemaphoresSignalNodeGetParams(hNode):

27693 """ Returns an external semaphore signal node's parameters.

27694

27695 Returns the parameters of an external semaphore signal node `hNode` in

27696 `params_out`. The `extSemArray` and `paramsArray` returned in

27697 `params_out`, are owned by the node. This memory remains valid until

27698 the node is destroyed or its parameters are modified, and should not be

27699 modified directly. Use

27700 :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams` to update

27701 the parameters of this node.

27702

27703 Parameters

27704 ----------

27705 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27706 Node to get the parameters for

27707

27708 Returns

27709 -------

27710 cudaError_t

27711 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27712 params_out : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`

27713 Pointer to return the parameters

27714

27715 See Also

27716 --------

27717 :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

27718 """

27719 cdef cyruntime.cudaGraphNode_t cyhNode

27720 if hNode is None:

27721 phNode = 0

27722 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

27723 phNode = int(hNode)

27724 else:

27725 phNode = int(cudaGraphNode_t(hNode))

27726 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

27727 cdef cudaExternalSemaphoreSignalNodeParams params_out = cudaExternalSemaphoreSignalNodeParams()

27728 with nogil:

27729 err = cyruntime.cudaGraphExternalSemaphoresSignalNodeGetParams(cyhNode, <cyruntime.cudaExternalSemaphoreSignalNodeParams*>params_out._pvt_ptr)

27730 if err != cyruntime.cudaSuccess:

27731 return (_dict_cudaError_t[err], None)

27732 return (_dict_cudaError_t[err], params_out)

27733

27734@cython.embedsignature(True)

27735def cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):

27736 """ Sets an external semaphore signal node's parameters.

27737

27738 Sets the parameters of an external semaphore signal node `hNode` to

27739 `nodeParams`.

27740

27741 Parameters

27742 ----------

27743 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27744 Node to set the parameters for

27745 nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`

27746 Parameters to copy

27747

27748 Returns

27749 -------

27750 cudaError_t

27751 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27752

27753 See Also

27754 --------

27755 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

27756 """

27757 cdef cyruntime.cudaGraphNode_t cyhNode

27758 if hNode is None:

27759 phNode = 0

27760 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

27761 phNode = int(hNode)

27762 else:

27763 phNode = int(cudaGraphNode_t(hNode))

27764 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

27765 cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

27766 with nogil:

27767 err = cyruntime.cudaGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr)

27768 return (_dict_cudaError_t[err],)

27769

27770@cython.embedsignature(True)

27771def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):

27772 """ Creates an external semaphore wait node and adds it to a graph.

27773

27774 Creates a new external semaphore wait node and adds it to `graph` with

27775 `numDependencies` dependencies specified via `dependencies` and

27776 arguments specified in `nodeParams`. It is possible for

27777 `numDependencies` to be 0, in which case the node will be placed at the

27778 root of the graph. `dependencies` may not have any duplicate entries. A

27779 handle to the new node will be returned in `pGraphNode`.

27780

27781 Performs a wait operation on a set of externally allocated semaphore

27782 objects when the node is launched. The node's dependencies will not be

27783 launched until the wait operation has completed.

27784

27785 Parameters

27786 ----------

27787 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27788 Graph to which to add the node

27789 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

27790 Dependencies of the node

27791 numDependencies : size_t

27792 Number of dependencies

27793 nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`

27794 Parameters for the node

27795

27796 Returns

27797 -------

27798 cudaError_t

27799 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27800 pGraphNode : :py:obj:`~.cudaGraphNode_t`

27801 Returns newly created node

27802

27803 See Also

27804 --------

27805 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27806 """

27807 pDependencies = [] if pDependencies is None else pDependencies

27808 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27809 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27810 cdef cyruntime.cudaGraph_t cygraph

27811 if graph is None:

27812 pgraph = 0

27813 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

27814 pgraph = int(graph)

27815 else:

27816 pgraph = int(cudaGraph_t(graph))

27817 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

27818 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

27819 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

27820 if len(pDependencies) > 1:

27821 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

27822 if cypDependencies is NULL:

27823 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

27824 else:

27825 for idx in range(len(pDependencies)):

27826 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

27827 elif len(pDependencies) == 1:

27828 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

27829 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

27830 cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

27831 with nogil:

27832 err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr)

27833 if len(pDependencies) > 1 and cypDependencies is not NULL:

27834 free(cypDependencies)

27835 if err != cyruntime.cudaSuccess:

27836 return (_dict_cudaError_t[err], None)

27837 return (_dict_cudaError_t[err], pGraphNode)

27838

27839@cython.embedsignature(True)

27840def cudaGraphExternalSemaphoresWaitNodeGetParams(hNode):

27841 """ Returns an external semaphore wait node's parameters.

27842

27843 Returns the parameters of an external semaphore wait node `hNode` in

27844 `params_out`. The `extSemArray` and `paramsArray` returned in

27845 `params_out`, are owned by the node. This memory remains valid until

27846 the node is destroyed or its parameters are modified, and should not be

27847 modified directly. Use

27848 :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams` to update

27849 the parameters of this node.

27850

27851 Parameters

27852 ----------

27853 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27854 Node to get the parameters for

27855

27856 Returns

27857 -------

27858 cudaError_t

27859 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27860 params_out : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`

27861 Pointer to return the parameters

27862

27863 See Also

27864 --------

27865 :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

27866 """

27867 cdef cyruntime.cudaGraphNode_t cyhNode

27868 if hNode is None:

27869 phNode = 0

27870 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

27871 phNode = int(hNode)

27872 else:

27873 phNode = int(cudaGraphNode_t(hNode))

27874 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

27875 cdef cudaExternalSemaphoreWaitNodeParams params_out = cudaExternalSemaphoreWaitNodeParams()

27876 with nogil:

27877 err = cyruntime.cudaGraphExternalSemaphoresWaitNodeGetParams(cyhNode, <cyruntime.cudaExternalSemaphoreWaitNodeParams*>params_out._pvt_ptr)

27878 if err != cyruntime.cudaSuccess:

27879 return (_dict_cudaError_t[err], None)

27880 return (_dict_cudaError_t[err], params_out)

27881

27882@cython.embedsignature(True)

27883def cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):

27884 """ Sets an external semaphore wait node's parameters.

27885

27886 Sets the parameters of an external semaphore wait node `hNode` to

27887 `nodeParams`.

27888

27889 Parameters

27890 ----------

27891 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

27892 Node to set the parameters for

27893 nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`

27894 Parameters to copy

27895

27896 Returns

27897 -------

27898 cudaError_t

27899 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

27900

27901 See Also

27902 --------

27903 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`

27904 """

27905 cdef cyruntime.cudaGraphNode_t cyhNode

27906 if hNode is None:

27907 phNode = 0

27908 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

27909 phNode = int(hNode)

27910 else:

27911 phNode = int(cudaGraphNode_t(hNode))

27912 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

27913 cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

27914 with nogil:

27915 err = cyruntime.cudaGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr)

27916 return (_dict_cudaError_t[err],)

27917

27918@cython.embedsignature(True)

27919def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaMemAllocNodeParams]):

27920 """ Creates an allocation node and adds it to a graph.

27921

27922 Creates a new allocation node and adds it to `graph` with

27923 `numDependencies` dependencies specified via `pDependencies` and

27924 arguments specified in `nodeParams`. It is possible for

27925 `numDependencies` to be 0, in which case the node will be placed at the

27926 root of the graph. `pDependencies` may not have any duplicate entries.

27927 A handle to the new node will be returned in `pGraphNode`.

27928

27929 When :py:obj:`~.cudaGraphAddMemAllocNode` creates an allocation node,

27930 it returns the address of the allocation in `nodeParams.dptr`. The

27931 allocation's address remains fixed across instantiations and launches.

27932

27933 If the allocation is freed in the same graph, by creating a free node

27934 using :py:obj:`~.cudaGraphAddMemFreeNode`, the allocation can be

27935 accessed by nodes ordered after the allocation node but before the free

27936 node. These allocations cannot be freed outside the owning graph, and

27937 they can only be freed once in the owning graph.

27938

27939 If the allocation is not freed in the same graph, then it can be

27940 accessed not only by nodes in the graph which are ordered after the

27941 allocation node, but also by stream operations ordered after the

27942 graph's execution but before the allocation is freed.

27943

27944 Allocations which are not freed in the same graph can be freed by:

27945

27946 - passing the allocation to :py:obj:`~.cudaMemFreeAsync` or

27947 :py:obj:`~.cudaMemFree`;

27948

27949 - launching a graph with a free node for that allocation; or

27950

27951 - specifying :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`

27952 during instantiation, which makes each launch behave as though it

27953 called :py:obj:`~.cudaMemFreeAsync` for every unfreed allocation.

27954

27955 It is not possible to free an allocation in both the owning graph and

27956 another graph. If the allocation is freed in the same graph, a free

27957 node cannot be added to another graph. If the allocation is freed in

27958 another graph, a free node can no longer be added to the owning graph.

27959

27960 The following restrictions apply to graphs which contain allocation

27961 and/or memory free nodes:

27962

27963 - Nodes and edges of the graph cannot be deleted.

27964

27965 - The graph can only be used in a child node if the ownership is moved

27966 to the parent.

27967

27968 - Only one instantiation of the graph may exist at any point in time.

27969

27970 - The graph cannot be cloned.

27971

27972 Parameters

27973 ----------

27974 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

27975 Graph to which to add the node

27976 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

27977 Dependencies of the node

27978 numDependencies : size_t

27979 Number of dependencies

27980 nodeParams : :py:obj:`~.cudaMemAllocNodeParams`

27981 Parameters for the node

27982

27983 Returns

27984 -------

27985 cudaError_t

27986 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`

27987 pGraphNode : :py:obj:`~.cudaGraphNode_t`

27988 Returns newly created node

27989

27990 See Also

27991 --------

27992 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemAllocNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

27993 """

27994 pDependencies = [] if pDependencies is None else pDependencies

27995 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

27996 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

27997 cdef cyruntime.cudaGraph_t cygraph

27998 if graph is None:

27999 pgraph = 0

28000 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

28001 pgraph = int(graph)

28002 else:

28003 pgraph = int(cudaGraph_t(graph))

28004 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

28005 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

28006 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

28007 if len(pDependencies) > 1:

28008 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

28009 if cypDependencies is NULL:

28010 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28011 else:

28012 for idx in range(len(pDependencies)):

28013 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

28014 elif len(pDependencies) == 1:

28015 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

28016 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

28017 cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

28018 with nogil:

28019 err = cyruntime.cudaGraphAddMemAllocNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr)

28020 if len(pDependencies) > 1 and cypDependencies is not NULL:

28021 free(cypDependencies)

28022 if err != cyruntime.cudaSuccess:

28023 return (_dict_cudaError_t[err], None)

28024 return (_dict_cudaError_t[err], pGraphNode)

28025

28026@cython.embedsignature(True)

28027def cudaGraphMemAllocNodeGetParams(node):

28028 """ Returns a memory alloc node's parameters.

28029

28030 Returns the parameters of a memory alloc node `hNode` in `params_out`.

28031 The `poolProps` and `accessDescs` returned in `params_out`, are owned

28032 by the node. This memory remains valid until the node is destroyed. The

28033 returned parameters must not be modified.

28034

28035 Parameters

28036 ----------

28037 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28038 Node to get the parameters for

28039

28040 Returns

28041 -------

28042 cudaError_t

28043 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28044 params_out : :py:obj:`~.cudaMemAllocNodeParams`

28045 Pointer to return the parameters

28046

28047 See Also

28048 --------

28049 :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`

28050 """

28051 cdef cyruntime.cudaGraphNode_t cynode

28052 if node is None:

28053 pnode = 0

28054 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

28055 pnode = int(node)

28056 else:

28057 pnode = int(cudaGraphNode_t(node))

28058 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

28059 cdef cudaMemAllocNodeParams params_out = cudaMemAllocNodeParams()

28060 with nogil:

28061 err = cyruntime.cudaGraphMemAllocNodeGetParams(cynode, <cyruntime.cudaMemAllocNodeParams*>params_out._pvt_ptr)

28062 if err != cyruntime.cudaSuccess:

28063 return (_dict_cudaError_t[err], None)

28064 return (_dict_cudaError_t[err], params_out)

28065

28066@cython.embedsignature(True)

28067def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, dptr):

28068 """ Creates a memory free node and adds it to a graph.

28069

28070 Creates a new memory free node and adds it to `graph` with

28071 `numDependencies` dependencies specified via `pDependencies` and

28072 address specified in `dptr`. It is possible for `numDependencies` to be

28073 0, in which case the node will be placed at the root of the graph.

28074 `pDependencies` may not have any duplicate entries. A handle to the new

28075 node will be returned in `pGraphNode`.

28076

28077 :py:obj:`~.cudaGraphAddMemFreeNode` will return

28078 :py:obj:`~.cudaErrorInvalidValue` if the user attempts to free:

28079

28080 - an allocation twice in the same graph.

28081

28082 - an address that was not returned by an allocation node.

28083

28084 - an invalid address.

28085

28086 The following restrictions apply to graphs which contain allocation

28087 and/or memory free nodes:

28088

28089 - Nodes and edges of the graph cannot be deleted.

28090

28091 - The graph can only be used in a child node if the ownership is moved

28092 to the parent.

28093

28094 - Only one instantiation of the graph may exist at any point in time.

28095

28096 - The graph cannot be cloned.

28097

28098 Parameters

28099 ----------

28100 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28101 Graph to which to add the node

28102 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

28103 Dependencies of the node

28104 numDependencies : size_t

28105 Number of dependencies

28106 dptr : Any

28107 Address of memory to free

28108

28109 Returns

28110 -------

28111 cudaError_t

28112 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`

28113 pGraphNode : :py:obj:`~.cudaGraphNode_t`

28114 Returns newly created node

28115

28116 See Also

28117 --------

28118 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

28119 """

28120 pDependencies = [] if pDependencies is None else pDependencies

28121 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

28122 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

28123 cdef cyruntime.cudaGraph_t cygraph

28124 if graph is None:

28125 pgraph = 0

28126 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

28127 pgraph = int(graph)

28128 else:

28129 pgraph = int(cudaGraph_t(graph))

28130 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

28131 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

28132 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

28133 if len(pDependencies) > 1:

28134 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

28135 if cypDependencies is NULL:

28136 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28137 else:

28138 for idx in range(len(pDependencies)):

28139 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

28140 elif len(pDependencies) == 1:

28141 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

28142 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))

28143 cydptr = _HelperInputVoidPtr(dptr)

28144 cdef void* cydptr_ptr = <void*><void_ptr>cydptr.cptr

28145 with nogil:

28146 err = cyruntime.cudaGraphAddMemFreeNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydptr_ptr)

28147 if len(pDependencies) > 1 and cypDependencies is not NULL:

28148 free(cypDependencies)

28149 if err != cyruntime.cudaSuccess:

28150 return (_dict_cudaError_t[err], None)

28151 return (_dict_cudaError_t[err], pGraphNode)

28152

28153@cython.embedsignature(True)

28154def cudaGraphMemFreeNodeGetParams(node):

28155 """ Returns a memory free node's parameters.

28156

28157 Returns the address of a memory free node `hNode` in `dptr_out`.

28158

28159 Parameters

28160 ----------

28161 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28162 Node to get the parameters for

28163

28164 Returns

28165 -------

28166 cudaError_t

28167 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28168 dptr_out : Any

28169 Pointer to return the device address

28170

28171 See Also

28172 --------

28173 :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`

28174 """

28175 cdef cyruntime.cudaGraphNode_t cynode

28176 if node is None:

28177 pnode = 0

28178 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

28179 pnode = int(node)

28180 else:

28181 pnode = int(cudaGraphNode_t(node))

28182 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

28183 cdef void_ptr dptr_out = 0

28184 cdef void* cydptr_out_ptr = <void*>&dptr_out

28185 with nogil:

28186 err = cyruntime.cudaGraphMemFreeNodeGetParams(cynode, cydptr_out_ptr)

28187 if err != cyruntime.cudaSuccess:

28188 return (_dict_cudaError_t[err], None)

28189 return (_dict_cudaError_t[err], dptr_out)

28190

28191@cython.embedsignature(True)

28192def cudaDeviceGraphMemTrim(int device):

28193 """ Free unused memory that was cached on the specified device for use with graphs back to the OS.

28194

28195 Blocks which are not in use by a graph that is either currently

28196 executing or scheduled to execute are freed back to the operating

28197 system.

28198

28199 Parameters

28200 ----------

28201 device : int

28202 The device for which cached memory should be freed.

28203

28204 Returns

28205 -------

28206 cudaError_t

28207 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28208

28209 See Also

28210 --------

28211 :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`

28212 """

28213 with nogil:

28214 err = cyruntime.cudaDeviceGraphMemTrim(device)

28215 return (_dict_cudaError_t[err],)

28216

28217@cython.embedsignature(True)

28218def cudaDeviceGetGraphMemAttribute(int device, attr not None : cudaGraphMemAttributeType):

28219 """ Query asynchronous allocation attributes related to graphs.

28220

28221 Valid attributes are:

28222

28223 - :py:obj:`~.cudaGraphMemAttrUsedMemCurrent`: Amount of memory, in

28224 bytes, currently associated with graphs

28225

28226 - :py:obj:`~.cudaGraphMemAttrUsedMemHigh`: High watermark of memory, in

28227 bytes, associated with graphs since the last time it was reset. High

28228 watermark can only be reset to zero.

28229

28230 - :py:obj:`~.cudaGraphMemAttrReservedMemCurrent`: Amount of memory, in

28231 bytes, currently allocated for use by the CUDA graphs asynchronous

28232 allocator.

28233

28234 - :py:obj:`~.cudaGraphMemAttrReservedMemHigh`: High watermark of

28235 memory, in bytes, currently allocated for use by the CUDA graphs

28236 asynchronous allocator.

28237

28238 Parameters

28239 ----------

28240 device : int

28241 Specifies the scope of the query

28242 attr : :py:obj:`~.cudaGraphMemAttributeType`

28243 attribute to get

28244

28245 Returns

28246 -------

28247 cudaError_t

28248 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`

28249 value : Any

28250 retrieved value

28251

28252 See Also

28253 --------

28254 :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`

28255 """

28256 cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value

28257 cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, 0, is_getter=True)

28258 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr

28259 with nogil:

28260 err = cyruntime.cudaDeviceGetGraphMemAttribute(device, cyattr, cyvalue_ptr)

28261 if err != cyruntime.cudaSuccess:

28262 return (_dict_cudaError_t[err], None)

28263 return (_dict_cudaError_t[err], cyvalue.pyObj())

28264

28265@cython.embedsignature(True)

28266def cudaDeviceSetGraphMemAttribute(int device, attr not None : cudaGraphMemAttributeType, value):

28267 """ Set asynchronous allocation attributes related to graphs.

28268

28269 Valid attributes are:

28270

28271 - :py:obj:`~.cudaGraphMemAttrUsedMemHigh`: High watermark of memory, in

28272 bytes, associated with graphs since the last time it was reset. High

28273 watermark can only be reset to zero.

28274

28275 - :py:obj:`~.cudaGraphMemAttrReservedMemHigh`: High watermark of

28276 memory, in bytes, currently allocated for use by the CUDA graphs

28277 asynchronous allocator.

28278

28279 Parameters

28280 ----------

28281 device : int

28282 Specifies the scope of the query

28283 attr : :py:obj:`~.cudaGraphMemAttributeType`

28284 attribute to get

28285 value : Any

28286 pointer to value to set

28287

28288 Returns

28289 -------

28290 cudaError_t

28291 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`

28292

28293 See Also

28294 --------

28295 :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`

28296 """

28297 cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value

28298 cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, value, is_getter=False)

28299 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr

28300 with nogil:

28301 err = cyruntime.cudaDeviceSetGraphMemAttribute(device, cyattr, cyvalue_ptr)

28302 return (_dict_cudaError_t[err],)

28303

28304@cython.embedsignature(True)

28305def cudaGraphClone(originalGraph):

28306 """ Clones a graph.

28307

28308 This function creates a copy of `originalGraph` and returns it in

28309 `pGraphClone`. All parameters are copied into the cloned graph. The

28310 original graph may be modified after this call without affecting the

28311 clone.

28312

28313 Child graph nodes in the original graph are recursively copied into the

28314 clone.

28315

28316 Parameters

28317 ----------

28318 originalGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28319 Graph to clone

28320

28321 Returns

28322 -------

28323 cudaError_t

28324 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`

28325 pGraphClone : :py:obj:`~.cudaGraph_t`

28326 Returns newly created cloned graph

28327

28328 See Also

28329 --------

28330 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeFindInClone`

28331

28332 Notes

28333 -----

28334 : Cloning is not supported for graphs which contain memory allocation nodes, memory free nodes, or conditional nodes.

28335 """

28336 cdef cyruntime.cudaGraph_t cyoriginalGraph

28337 if originalGraph is None:

28338 poriginalGraph = 0

28339 elif isinstance(originalGraph, (cudaGraph_t,driver.CUgraph)):

28340 poriginalGraph = int(originalGraph)

28341 else:

28342 poriginalGraph = int(cudaGraph_t(originalGraph))

28343 cyoriginalGraph = <cyruntime.cudaGraph_t><void_ptr>poriginalGraph

28344 cdef cudaGraph_t pGraphClone = cudaGraph_t()

28345 with nogil:

28346 err = cyruntime.cudaGraphClone(<cyruntime.cudaGraph_t*>pGraphClone._pvt_ptr, cyoriginalGraph)

28347 if err != cyruntime.cudaSuccess:

28348 return (_dict_cudaError_t[err], None)

28349 return (_dict_cudaError_t[err], pGraphClone)

28350

28351@cython.embedsignature(True)

28352def cudaGraphNodeFindInClone(originalNode, clonedGraph):

28353 """ Finds a cloned version of a node.

28354

28355 This function returns the node in `clonedGraph` corresponding to

28356 `originalNode` in the original graph.

28357

28358 `clonedGraph` must have been cloned from `originalGraph` via

28359 :py:obj:`~.cudaGraphClone`. `originalNode` must have been in

28360 `originalGraph` at the time of the call to :py:obj:`~.cudaGraphClone`,

28361 and the corresponding cloned node in `clonedGraph` must not have been

28362 removed. The cloned node is then returned via `pClonedNode`.

28363

28364 Parameters

28365 ----------

28366 originalNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28367 Handle to the original node

28368 clonedGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28369 Cloned graph to query

28370

28371 Returns

28372 -------

28373 cudaError_t

28374 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28375 pNode : :py:obj:`~.cudaGraphNode_t`

28376 Returns handle to the cloned node

28377

28378 See Also

28379 --------

28380 :py:obj:`~.cudaGraphClone`

28381 """

28382 cdef cyruntime.cudaGraph_t cyclonedGraph

28383 if clonedGraph is None:

28384 pclonedGraph = 0

28385 elif isinstance(clonedGraph, (cudaGraph_t,driver.CUgraph)):

28386 pclonedGraph = int(clonedGraph)

28387 else:

28388 pclonedGraph = int(cudaGraph_t(clonedGraph))

28389 cyclonedGraph = <cyruntime.cudaGraph_t><void_ptr>pclonedGraph

28390 cdef cyruntime.cudaGraphNode_t cyoriginalNode

28391 if originalNode is None:

28392 poriginalNode = 0

28393 elif isinstance(originalNode, (cudaGraphNode_t,driver.CUgraphNode)):

28394 poriginalNode = int(originalNode)

28395 else:

28396 poriginalNode = int(cudaGraphNode_t(originalNode))

28397 cyoriginalNode = <cyruntime.cudaGraphNode_t><void_ptr>poriginalNode

28398 cdef cudaGraphNode_t pNode = cudaGraphNode_t()

28399 with nogil:

28400 err = cyruntime.cudaGraphNodeFindInClone(<cyruntime.cudaGraphNode_t*>pNode._pvt_ptr, cyoriginalNode, cyclonedGraph)

28401 if err != cyruntime.cudaSuccess:

28402 return (_dict_cudaError_t[err], None)

28403 return (_dict_cudaError_t[err], pNode)

28404

28405@cython.embedsignature(True)

28406def cudaGraphNodeGetType(node):

28407 """ Returns a node's type.

28408

28409 Returns the node type of `node` in `pType`.

28410

28411 Parameters

28412 ----------

28413 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28414 Node to query

28415

28416 Returns

28417 -------

28418 cudaError_t

28419 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28420 pType : :py:obj:`~.cudaGraphNodeType`

28421 Pointer to return the node type

28422

28423 See Also

28424 --------

28425 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`

28426 """

28427 cdef cyruntime.cudaGraphNode_t cynode

28428 if node is None:

28429 pnode = 0

28430 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

28431 pnode = int(node)

28432 else:

28433 pnode = int(cudaGraphNode_t(node))

28434 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

28435 cdef cyruntime.cudaGraphNodeType pType

28436 with nogil:

28437 err = cyruntime.cudaGraphNodeGetType(cynode, &pType)

28438 if err != cyruntime.cudaSuccess:

28439 return (_dict_cudaError_t[err], None)

28440 return (_dict_cudaError_t[err], cudaGraphNodeType(pType))

28441

28442@cython.embedsignature(True)

28443def cudaGraphNodeGetContainingGraph(hNode):

28444 """ Returns the graph that contains a given graph node.

28445

28446 Returns the graph that contains `hNode` in `*phGraph`. If hNode is in a

28447 child graph, the child graph it is in is returned.

28448

28449 Parameters

28450 ----------

28451 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28452 Node to query

28453

28454 Returns

28455 -------

28456 cudaError_t

28457 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

28458 phGraph : :py:obj:`~.cudaGraph_t`

28459 Pointer to return the containing graph

28460

28461 See Also

28462 --------

28463 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId`

28464 """

28465 cdef cyruntime.cudaGraphNode_t cyhNode

28466 if hNode is None:

28467 phNode = 0

28468 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

28469 phNode = int(hNode)

28470 else:

28471 phNode = int(cudaGraphNode_t(hNode))

28472 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

28473 cdef cudaGraph_t phGraph = cudaGraph_t()

28474 with nogil:

28475 err = cyruntime.cudaGraphNodeGetContainingGraph(cyhNode, <cyruntime.cudaGraph_t*>phGraph._pvt_ptr)

28476 if err != cyruntime.cudaSuccess:

28477 return (_dict_cudaError_t[err], None)

28478 return (_dict_cudaError_t[err], phGraph)

28479

28480@cython.embedsignature(True)

28481def cudaGraphNodeGetLocalId(hNode):

28482 """ Returns the node id of a given graph node.

28483

28484 Returns the node id of `hNode` in `*nodeId`. The nodeId matches that

28485 referenced by :py:obj:`~.cudaGraphDebugDotPrint`. The local nodeId and

28486 graphId together can uniquely identify the node.

28487

28488 Parameters

28489 ----------

28490 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28491 Node to query

28492

28493 Returns

28494 -------

28495 cudaError_t

28496 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`

28497 nodeId : unsigned int

28498 Pointer to return the nodeId

28499

28500 See Also

28501 --------

28502 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId`

28503 """

28504 cdef cyruntime.cudaGraphNode_t cyhNode

28505 if hNode is None:

28506 phNode = 0

28507 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

28508 phNode = int(hNode)

28509 else:

28510 phNode = int(cudaGraphNode_t(hNode))

28511 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

28512 cdef unsigned int nodeId = 0

28513 with nogil:

28514 err = cyruntime.cudaGraphNodeGetLocalId(cyhNode, &nodeId)

28515 if err != cyruntime.cudaSuccess:

28516 return (_dict_cudaError_t[err], None)

28517 return (_dict_cudaError_t[err], nodeId)

28518

28519@cython.embedsignature(True)

28520def cudaGraphNodeGetToolsId(hNode):

28521 """ Returns an id used by tools to identify a given node.

28522

28523 Parameters

28524 ----------

28525 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28526 Node to query

28527

28528 Returns

28529 -------

28530 cudaError_t

28531 :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.cudaErrorInvalidValue`

28532 *toolsNodeId : unsigned long long

28533 Pointer to return the id used by tools

28534

28535 See Also

28536 --------

28537 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId`

28538 """

28539 cdef cyruntime.cudaGraphNode_t cyhNode

28540 if hNode is None:

28541 phNode = 0

28542 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

28543 phNode = int(hNode)

28544 else:

28545 phNode = int(cudaGraphNode_t(hNode))

28546 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

28547 cdef unsigned long long toolsNodeId = 0

28548 with nogil:

28549 err = cyruntime.cudaGraphNodeGetToolsId(cyhNode, &toolsNodeId)

28550 if err != cyruntime.cudaSuccess:

28551 return (_dict_cudaError_t[err], None)

28552 return (_dict_cudaError_t[err], toolsNodeId)

28553

28554@cython.embedsignature(True)

28555def cudaGraphGetId(hGraph):

28556 """ Returns the id of a given graph.

28557

28558 Returns the id of `hGraph` in `*graphId`. The value in `*graphId`

28559 matches that referenced by :py:obj:`~.cudaGraphDebugDotPrint`.

28560

28561 Parameters

28562 ----------

28563 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28564 Graph to query

28565

28566 Returns

28567 -------

28568 cudaError_t

28569 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28570 graphId : unsigned int

28571 Pointer to return the graphId

28572

28573 See Also

28574 --------

28575 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphExecGetId`

28576 """

28577 cdef cyruntime.cudaGraph_t cyhGraph

28578 if hGraph is None:

28579 phGraph = 0

28580 elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)):

28581 phGraph = int(hGraph)

28582 else:

28583 phGraph = int(cudaGraph_t(hGraph))

28584 cyhGraph = <cyruntime.cudaGraph_t><void_ptr>phGraph

28585 cdef unsigned int graphID = 0

28586 with nogil:

28587 err = cyruntime.cudaGraphGetId(cyhGraph, &graphID)

28588 if err != cyruntime.cudaSuccess:

28589 return (_dict_cudaError_t[err], None)

28590 return (_dict_cudaError_t[err], graphID)

28591

28592@cython.embedsignature(True)

28593def cudaGraphExecGetId(hGraphExec):

28594 """ Returns the id of a given graph exec.

28595

28596 Returns the id of `hGraphExec` in `*graphId`. The value in `*graphId`

28597 matches that referenced by :py:obj:`~.cudaGraphDebugDotPrint`.

28598

28599 Parameters

28600 ----------

28601 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

28602 Graph to query

28603

28604 Returns

28605 -------

28606 cudaError_t

28607 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28608 graphId : unsigned int

28609 Pointer to return the graphId

28610

28611 See Also

28612 --------

28613 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId`

28614 """

28615 cdef cyruntime.cudaGraphExec_t cyhGraphExec

28616 if hGraphExec is None:

28617 phGraphExec = 0

28618 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

28619 phGraphExec = int(hGraphExec)

28620 else:

28621 phGraphExec = int(cudaGraphExec_t(hGraphExec))

28622 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

28623 cdef unsigned int graphID = 0

28624 with nogil:

28625 err = cyruntime.cudaGraphExecGetId(cyhGraphExec, &graphID)

28626 if err != cyruntime.cudaSuccess:

28627 return (_dict_cudaError_t[err], None)

28628 return (_dict_cudaError_t[err], graphID)

28629

28630@cython.embedsignature(True)

28631def cudaGraphGetNodes(graph, size_t numNodes = 0):

28632 """ Returns a graph's nodes.

28633

28634 Returns a list of `graph's` nodes. `nodes` may be NULL, in which case

28635 this function will return the number of nodes in `numNodes`. Otherwise,

28636 `numNodes` entries will be filled in. If `numNodes` is higher than the

28637 actual number of nodes, the remaining entries in `nodes` will be set to

28638 NULL, and the number of nodes actually obtained will be returned in

28639 `numNodes`.

28640

28641 Parameters

28642 ----------

28643 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28644 Graph to query

28645 numNodes : int

28646 See description

28647

28648 Returns

28649 -------

28650 cudaError_t

28651 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28652 nodes : list[:py:obj:`~.cudaGraphNode_t`]

28653 Pointer to return the nodes

28654 numNodes : int

28655 See description

28656

28657 See Also

28658 --------

28659 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`

28660 """

28661 cdef size_t _graph_length = numNodes

28662 cdef cyruntime.cudaGraph_t cygraph

28663 if graph is None:

28664 pgraph = 0

28665 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

28666 pgraph = int(graph)

28667 else:

28668 pgraph = int(cudaGraph_t(graph))

28669 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

28670 cdef cyruntime.cudaGraphNode_t* cynodes = NULL

28671 pynodes = []

28672 if _graph_length != 0:

28673 cynodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))

28674 if cynodes is NULL:

28675 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28676 with nogil:

28677 err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes)

28678 if cudaError_t(err) == cudaError_t(0):

28679 pynodes = [cudaGraphNode_t(init_value=<void_ptr>cynodes[idx]) for idx in range(_graph_length)]

28680 if cynodes is not NULL:

28681 free(cynodes)

28682 if err != cyruntime.cudaSuccess:

28683 return (_dict_cudaError_t[err], None, None)

28684 return (_dict_cudaError_t[err], pynodes, numNodes)

28685

28686@cython.embedsignature(True)

28687def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0):

28688 """ Returns a graph's root nodes.

28689

28690 Returns a list of `graph's` root nodes. `pRootNodes` may be NULL, in

28691 which case this function will return the number of root nodes in

28692 `pNumRootNodes`. Otherwise, `pNumRootNodes` entries will be filled in.

28693 If `pNumRootNodes` is higher than the actual number of root nodes, the

28694 remaining entries in `pRootNodes` will be set to NULL, and the number

28695 of nodes actually obtained will be returned in `pNumRootNodes`.

28696

28697 Parameters

28698 ----------

28699 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28700 Graph to query

28701 pNumRootNodes : int

28702 See description

28703

28704 Returns

28705 -------

28706 cudaError_t

28707 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

28708 pRootNodes : list[:py:obj:`~.cudaGraphNode_t`]

28709 Pointer to return the root nodes

28710 pNumRootNodes : int

28711 See description

28712

28713 See Also

28714 --------

28715 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`

28716 """

28717 cdef size_t _graph_length = pNumRootNodes

28718 cdef cyruntime.cudaGraph_t cygraph

28719 if graph is None:

28720 pgraph = 0

28721 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

28722 pgraph = int(graph)

28723 else:

28724 pgraph = int(cudaGraph_t(graph))

28725 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

28726 cdef cyruntime.cudaGraphNode_t* cypRootNodes = NULL

28727 pypRootNodes = []

28728 if _graph_length != 0:

28729 cypRootNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))

28730 if cypRootNodes is NULL:

28731 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28732 with nogil:

28733 err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes)

28734 if cudaError_t(err) == cudaError_t(0):

28735 pypRootNodes = [cudaGraphNode_t(init_value=<void_ptr>cypRootNodes[idx]) for idx in range(_graph_length)]

28736 if cypRootNodes is not NULL:

28737 free(cypRootNodes)

28738 if err != cyruntime.cudaSuccess:

28739 return (_dict_cudaError_t[err], None, None)

28740 return (_dict_cudaError_t[err], pypRootNodes, pNumRootNodes)

28741

28742@cython.embedsignature(True)

28743def cudaGraphGetEdges(graph, size_t numEdges = 0):

28744 """ Returns a graph's dependency edges.

28745

28746 Returns a list of `graph's` dependency edges. Edges are returned via

28747 corresponding indices in `from`, `to` and `edgeData`; that is, the node

28748 in `to`[i] has a dependency on the node in `from`[i] with data

28749 `edgeData`[i]. `from` and `to` may both be NULL, in which case this

28750 function only returns the number of edges in `numEdges`. Otherwise,

28751 `numEdges` entries will be filled in. If `numEdges` is higher than the

28752 actual number of edges, the remaining entries in `from` and `to` will

28753 be set to NULL, and the number of edges actually returned will be

28754 written to `numEdges`. `edgeData` may alone be NULL, in which case the

28755 edges must all have default (zeroed) edge data. Attempting a losst

28756 query via NULL `edgeData` will result in

28757 :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL then `from`

28758 and `to` must be as well.

28759

28760 Parameters

28761 ----------

28762 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28763 Graph to get the edges from

28764 numEdges : int

28765 See description

28766

28767 Returns

28768 -------

28769 cudaError_t

28770 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`

28771 from : list[:py:obj:`~.cudaGraphNode_t`]

28772 Location to return edge endpoints

28773 to : list[:py:obj:`~.cudaGraphNode_t`]

28774 Location to return edge endpoints

28775 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]

28776 Optional location to return edge data

28777 numEdges : int

28778 See description

28779

28780 See Also

28781 --------

28782 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`

28783 """

28784 cdef size_t _graph_length = numEdges

28785 cdef cyruntime.cudaGraph_t cygraph

28786 if graph is None:

28787 pgraph = 0

28788 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

28789 pgraph = int(graph)

28790 else:

28791 pgraph = int(cudaGraph_t(graph))

28792 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

28793 cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL

28794 pyfrom_ = []

28795 if _graph_length != 0:

28796 cyfrom_ = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))

28797 if cyfrom_ is NULL:

28798 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28799 cdef cyruntime.cudaGraphNode_t* cyto = NULL

28800 pyto = []

28801 if _graph_length != 0:

28802 cyto = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))

28803 if cyto is NULL:

28804 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28805 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL

28806 pyedgeData = []

28807 if _graph_length != 0:

28808 cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))

28809 if cyedgeData is NULL:

28810 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

28811 with nogil:

28812 err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, cyedgeData, &numEdges)

28813 if cudaError_t(err) == cudaError_t(0):

28814 pyfrom_ = [cudaGraphNode_t(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]

28815 if cyfrom_ is not NULL:

28816 free(cyfrom_)

28817 if cudaError_t(err) == cudaError_t(0):

28818 pyto = [cudaGraphNode_t(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]

28819 if cyto is not NULL:

28820 free(cyto)

28821 if cudaError_t(err) == cudaError_t(0):

28822 pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]

28823 if cyedgeData is not NULL:

28824 free(cyedgeData)

28825 if err != cyruntime.cudaSuccess:

28826 return (_dict_cudaError_t[err], None, None, None, None)

28827 return (_dict_cudaError_t[err], pyfrom_, pyto, pyedgeData, numEdges)

28828

28829@cython.embedsignature(True)

28830def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0):

28831 """ Returns a node's dependencies.

28832

28833 Returns a list of `node's` dependencies. `pDependencies` may be NULL,

28834 in which case this function will return the number of dependencies in

28835 `pNumDependencies`. Otherwise, `pNumDependencies` entries will be

28836 filled in. If `pNumDependencies` is higher than the actual number of

28837 dependencies, the remaining entries in `pDependencies` will be set to

28838 NULL, and the number of nodes actually obtained will be returned in

28839 `pNumDependencies`.

28840

28841 Note that if an edge has non-zero (non-default) edge data and

28842 `edgeData` is NULL, this API will return

28843 :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL, then

28844 `pDependencies` must be as well.

28845

28846 Parameters

28847 ----------

28848 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28849 Node to query

28850 pNumDependencies : int

28851 See description

28852

28853 Returns

28854 -------

28855 cudaError_t

28856 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`

28857 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

28858 Pointer to return the dependencies

28859 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]

28860 Optional array to return edge data for each dependency

28861 pNumDependencies : int

28862 See description

28863

28864 See Also

28865 --------

28866 :py:obj:`~.cudaGraphNodeGetDependentNodes`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`

28867 """

28868 cdef size_t _graph_length = pNumDependencies

28869 cdef cyruntime.cudaGraphNode_t cynode

28870 if node is None:

28871 pnode = 0

28872 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

28873 pnode = int(node)

28874 else:

28875 pnode = int(cudaGraphNode_t(node))

28876 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

28877 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

28878 pypDependencies = []

28879 if _graph_length != 0:

28880 cypDependencies = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))

28881 if cypDependencies is NULL:

28882 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28883 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL

28884 pyedgeData = []

28885 if _graph_length != 0:

28886 cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))

28887 if cyedgeData is NULL:

28888 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

28889 with nogil:

28890 err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, cyedgeData, &pNumDependencies)

28891 if cudaError_t(err) == cudaError_t(0):

28892 pypDependencies = [cudaGraphNode_t(init_value=<void_ptr>cypDependencies[idx]) for idx in range(_graph_length)]

28893 if cypDependencies is not NULL:

28894 free(cypDependencies)

28895 if cudaError_t(err) == cudaError_t(0):

28896 pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]

28897 if cyedgeData is not NULL:

28898 free(cyedgeData)

28899 if err != cyruntime.cudaSuccess:

28900 return (_dict_cudaError_t[err], None, None, None)

28901 return (_dict_cudaError_t[err], pypDependencies, pyedgeData, pNumDependencies)

28902

28903@cython.embedsignature(True)

28904def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0):

28905 """ Returns a node's dependent nodes.

28906

28907 Returns a list of `node's` dependent nodes. `pDependentNodes` may be

28908 NULL, in which case this function will return the number of dependent

28909 nodes in `pNumDependentNodes`. Otherwise, `pNumDependentNodes` entries

28910 will be filled in. If `pNumDependentNodes` is higher than the actual

28911 number of dependent nodes, the remaining entries in `pDependentNodes`

28912 will be set to NULL, and the number of nodes actually obtained will be

28913 returned in `pNumDependentNodes`.

28914

28915 Note that if an edge has non-zero (non-default) edge data and

28916 `edgeData` is NULL, this API will return

28917 :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL, then

28918 `pDependentNodes` must be as well.

28919

28920 Parameters

28921 ----------

28922 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

28923 Node to query

28924 pNumDependentNodes : int

28925 See description

28926

28927 Returns

28928 -------

28929 cudaError_t

28930 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`

28931 pDependentNodes : list[:py:obj:`~.cudaGraphNode_t`]

28932 Pointer to return the dependent nodes

28933 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]

28934 Optional pointer to return edge data for dependent nodes

28935 pNumDependentNodes : int

28936 See description

28937

28938 See Also

28939 --------

28940 :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`

28941 """

28942 cdef size_t _graph_length = pNumDependentNodes

28943 cdef cyruntime.cudaGraphNode_t cynode

28944 if node is None:

28945 pnode = 0

28946 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

28947 pnode = int(node)

28948 else:

28949 pnode = int(cudaGraphNode_t(node))

28950 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

28951 cdef cyruntime.cudaGraphNode_t* cypDependentNodes = NULL

28952 pypDependentNodes = []

28953 if _graph_length != 0:

28954 cypDependentNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))

28955 if cypDependentNodes is NULL:

28956 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

28957 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL

28958 pyedgeData = []

28959 if _graph_length != 0:

28960 cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))

28961 if cyedgeData is NULL:

28962 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

28963 with nogil:

28964 err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes)

28965 if cudaError_t(err) == cudaError_t(0):

28966 pypDependentNodes = [cudaGraphNode_t(init_value=<void_ptr>cypDependentNodes[idx]) for idx in range(_graph_length)]

28967 if cypDependentNodes is not NULL:

28968 free(cypDependentNodes)

28969 if cudaError_t(err) == cudaError_t(0):

28970 pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]

28971 if cyedgeData is not NULL:

28972 free(cyedgeData)

28973 if err != cyruntime.cudaSuccess:

28974 return (_dict_cudaError_t[err], None, None, None)

28975 return (_dict_cudaError_t[err], pypDependentNodes, pyedgeData, pNumDependentNodes)

28976

28977@cython.embedsignature(True)

28978def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], to : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], edgeData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies):

28979 """ Adds dependency edges to a graph.

28980

28981 The number of dependencies to be added is defined by `numDependencies`

28982 Elements in `pFrom` and `pTo` at corresponding indices define a

28983 dependency. Each node in `pFrom` and `pTo` must belong to `graph`.

28984

28985 If `numDependencies` is 0, elements in `pFrom` and `pTo` will be

28986 ignored. Specifying an existing dependency will return an error.

28987

28988 Parameters

28989 ----------

28990 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

28991 Graph to which dependencies are added

28992 from : list[:py:obj:`~.cudaGraphNode_t`]

28993 Array of nodes that provide the dependencies

28994 to : list[:py:obj:`~.cudaGraphNode_t`]

28995 Array of dependent nodes

28996 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]

28997 Optional array of edge data. If NULL, default (zeroed) edge data is

28998 assumed.

28999 numDependencies : size_t

29000 Number of dependencies to be added

29001

29002 Returns

29003 -------

29004 cudaError_t

29005 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29006

29007 See Also

29008 --------

29009 :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`

29010 """

29011 edgeData = [] if edgeData is None else edgeData

29012 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData):

29013 raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")

29014 to = [] if to is None else to

29015 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):

29016 raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

29017 from_ = [] if from_ is None else from_

29018 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):

29019 raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

29020 cdef cyruntime.cudaGraph_t cygraph

29021 if graph is None:

29022 pgraph = 0

29023 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

29024 pgraph = int(graph)

29025 else:

29026 pgraph = int(cudaGraph_t(graph))

29027 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

29028 cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL

29029 if len(from_) > 1:

29030 cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))

29031 if cyfrom_ is NULL:

29032 raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

29033 else:

29034 for idx in range(len(from_)):

29035 cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._pvt_ptr[0]

29036 elif len(from_) == 1:

29037 cyfrom_ = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._pvt_ptr

29038 cdef cyruntime.cudaGraphNode_t* cyto = NULL

29039 if len(to) > 1:

29040 cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))

29041 if cyto is NULL:

29042 raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

29043 else:

29044 for idx in range(len(to)):

29045 cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._pvt_ptr[0]

29046 elif len(to) == 1:

29047 cyto = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._pvt_ptr

29048 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL

29049 if len(edgeData) > 1:

29050 cyedgeData = <cyruntime.cudaGraphEdgeData*> calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData))

29051 if cyedgeData is NULL:

29052 raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

29053 for idx in range(len(edgeData)):

29054 string.memcpy(&cyedgeData[idx], (<cudaGraphEdgeData>edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))

29055 elif len(edgeData) == 1:

29056 cyedgeData = (<cudaGraphEdgeData>edgeData[0])._pvt_ptr

29057 with nogil:

29058 err = cyruntime.cudaGraphAddDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies)

29059 if len(from_) > 1 and cyfrom_ is not NULL:

29060 free(cyfrom_)

29061 if len(to) > 1 and cyto is not NULL:

29062 free(cyto)

29063 if len(edgeData) > 1 and cyedgeData is not NULL:

29064 free(cyedgeData)

29065 return (_dict_cudaError_t[err],)

29066

29067@cython.embedsignature(True)

29068def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], to : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], edgeData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies):

29069 """ Removes dependency edges from a graph.

29070

29071 The number of `pDependencies` to be removed is defined by

29072 `numDependencies`. Elements in `pFrom` and `pTo` at corresponding

29073 indices define a dependency. Each node in `pFrom` and `pTo` must belong

29074 to `graph`.

29075

29076 If `numDependencies` is 0, elements in `pFrom` and `pTo` will be

29077 ignored. Specifying an edge that does not exist in the graph, with data

29078 matching `edgeData`, results in an error. `edgeData` is nullable, which

29079 is equivalent to passing default (zeroed) data for each edge.

29080

29081 Parameters

29082 ----------

29083 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

29084 Graph from which to remove dependencies

29085 from : list[:py:obj:`~.cudaGraphNode_t`]

29086 Array of nodes that provide the dependencies

29087 to : list[:py:obj:`~.cudaGraphNode_t`]

29088 Array of dependent nodes

29089 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]

29090 Optional array of edge data. If NULL, edge data is assumed to be

29091 default (zeroed).

29092 numDependencies : size_t

29093 Number of dependencies to be removed

29094

29095 Returns

29096 -------

29097 cudaError_t

29098 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29099

29100 See Also

29101 --------

29102 :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`

29103 """

29104 edgeData = [] if edgeData is None else edgeData

29105 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData):

29106 raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")

29107 to = [] if to is None else to

29108 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):

29109 raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

29110 from_ = [] if from_ is None else from_

29111 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):

29112 raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

29113 cdef cyruntime.cudaGraph_t cygraph

29114 if graph is None:

29115 pgraph = 0

29116 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

29117 pgraph = int(graph)

29118 else:

29119 pgraph = int(cudaGraph_t(graph))

29120 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

29121 cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL

29122 if len(from_) > 1:

29123 cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))

29124 if cyfrom_ is NULL:

29125 raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

29126 else:

29127 for idx in range(len(from_)):

29128 cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._pvt_ptr[0]

29129 elif len(from_) == 1:

29130 cyfrom_ = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._pvt_ptr

29131 cdef cyruntime.cudaGraphNode_t* cyto = NULL

29132 if len(to) > 1:

29133 cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))

29134 if cyto is NULL:

29135 raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

29136 else:

29137 for idx in range(len(to)):

29138 cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._pvt_ptr[0]

29139 elif len(to) == 1:

29140 cyto = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._pvt_ptr

29141 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL

29142 if len(edgeData) > 1:

29143 cyedgeData = <cyruntime.cudaGraphEdgeData*> calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData))

29144 if cyedgeData is NULL:

29145 raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

29146 for idx in range(len(edgeData)):

29147 string.memcpy(&cyedgeData[idx], (<cudaGraphEdgeData>edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))

29148 elif len(edgeData) == 1:

29149 cyedgeData = (<cudaGraphEdgeData>edgeData[0])._pvt_ptr

29150 with nogil:

29151 err = cyruntime.cudaGraphRemoveDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies)

29152 if len(from_) > 1 and cyfrom_ is not NULL:

29153 free(cyfrom_)

29154 if len(to) > 1 and cyto is not NULL:

29155 free(cyto)

29156 if len(edgeData) > 1 and cyedgeData is not NULL:

29157 free(cyedgeData)

29158 return (_dict_cudaError_t[err],)

29159

29160@cython.embedsignature(True)

29161def cudaGraphDestroyNode(node):

29162 """ Remove a node from the graph.

29163

29164 Removes `node` from its graph. This operation also severs any

29165 dependencies of other nodes on `node` and vice versa.

29166

29167 Dependencies cannot be removed from graphs which contain allocation or

29168 free nodes. Any attempt to do so will return an error.

29169

29170 Parameters

29171 ----------

29172 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29173 Node to remove

29174

29175 Returns

29176 -------

29177 cudaError_t

29178 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29179

29180 See Also

29181 --------

29182 :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`

29183 """

29184 cdef cyruntime.cudaGraphNode_t cynode

29185 if node is None:

29186 pnode = 0

29187 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29188 pnode = int(node)

29189 else:

29190 pnode = int(cudaGraphNode_t(node))

29191 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29192 with nogil:

29193 err = cyruntime.cudaGraphDestroyNode(cynode)

29194 return (_dict_cudaError_t[err],)

29195

29196@cython.embedsignature(True)

29197def cudaGraphInstantiate(graph, unsigned long long flags):

29198 """ Creates an executable graph from a graph.

29199

29200 Instantiates `graph` as an executable graph. The graph is validated for

29201 any structural constraints or intra-node constraints which were not

29202 previously validated. If instantiation is successful, a handle to the

29203 instantiated graph is returned in `pGraphExec`.

29204

29205 The `flags` parameter controls the behavior of instantiation and

29206 subsequent graph launches. Valid flags are:

29207

29208 - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which

29209 configures a graph containing memory allocation nodes to

29210 automatically free any unfreed memory allocations before the graph is

29211 relaunched.

29212

29213 - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures

29214 the graph for launch from the device. If this flag is passed, the

29215 executable graph handle returned can be used to launch the graph from

29216 both the host and device. This flag cannot be used in conjunction

29217 with :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.

29218

29219 - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the

29220 graph to use the priorities from the per-node attributes rather than

29221 the priority of the launch stream during execution. Note that

29222 priorities are only available on kernel nodes, and are copied from

29223 stream priority during stream capture.

29224

29225 If `graph` contains any allocation or free nodes, there can be at most

29226 one executable graph in existence for that graph at a time. An attempt

29227 to instantiate a second executable graph before destroying the first

29228 with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same

29229 also applies if `graph` contains any device-updatable kernel nodes.

29230

29231 Graphs instantiated for launch on the device have additional

29232 restrictions which do not apply to host graphs:

29233

29234 - The graph's nodes must reside on a single device.

29235

29236 - The graph can only contain kernel nodes, memcpy nodes, memset nodes,

29237 and child graph nodes.

29238

29239 - The graph cannot be empty and must contain at least one kernel,

29240 memcpy, or memset node. Operation-specific restrictions are outlined

29241 below.

29242

29243 - Kernel nodes:

29244

29245 - Use of CUDA Dynamic Parallelism is not permitted.

29246

29247 - Cooperative launches are permitted as long as MPS is not in use.

29248

29249 - Memcpy nodes:

29250

29251 - Only copies involving device memory and/or pinned device-mapped

29252 host memory are permitted.

29253

29254 - Copies involving CUDA arrays are not permitted.

29255

29256 - Both operands must be accessible from the current device, and the

29257 current device must match the device of other nodes in the graph.

29258

29259 If `graph` is not instantiated for launch on the device but contains

29260 kernels which call device-side :py:obj:`~.cudaGraphLaunch()` from

29261 multiple devices, this will result in an error.

29262

29263 Parameters

29264 ----------

29265 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

29266 Graph to instantiate

29267 flags : unsigned long long

29268 Flags to control instantiation. See

29269 :py:obj:`~.CUgraphInstantiate_flags`.

29270

29271 Returns

29272 -------

29273 cudaError_t

29274 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29275 pGraphExec : :py:obj:`~.cudaGraphExec_t`

29276 Returns instantiated graph

29277

29278 See Also

29279 --------

29280 :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`

29281 """

29282 cdef cyruntime.cudaGraph_t cygraph

29283 if graph is None:

29284 pgraph = 0

29285 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

29286 pgraph = int(graph)

29287 else:

29288 pgraph = int(cudaGraph_t(graph))

29289 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

29290 cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()

29291 with nogil:

29292 err = cyruntime.cudaGraphInstantiate(<cyruntime.cudaGraphExec_t*>pGraphExec._pvt_ptr, cygraph, flags)

29293 if err != cyruntime.cudaSuccess:

29294 return (_dict_cudaError_t[err], None)

29295 return (_dict_cudaError_t[err], pGraphExec)

29296

29297@cython.embedsignature(True)

29298def cudaGraphInstantiateWithFlags(graph, unsigned long long flags):

29299 """ Creates an executable graph from a graph.

29300

29301 Instantiates `graph` as an executable graph. The graph is validated for

29302 any structural constraints or intra-node constraints which were not

29303 previously validated. If instantiation is successful, a handle to the

29304 instantiated graph is returned in `pGraphExec`.

29305

29306 The `flags` parameter controls the behavior of instantiation and

29307 subsequent graph launches. Valid flags are:

29308

29309 - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which

29310 configures a graph containing memory allocation nodes to

29311 automatically free any unfreed memory allocations before the graph is

29312 relaunched.

29313

29314 - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures

29315 the graph for launch from the device. If this flag is passed, the

29316 executable graph handle returned can be used to launch the graph from

29317 both the host and device. This flag can only be used on platforms

29318 which support unified addressing. This flag cannot be used in

29319 conjunction with

29320 :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.

29321

29322 - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the

29323 graph to use the priorities from the per-node attributes rather than

29324 the priority of the launch stream during execution. Note that

29325 priorities are only available on kernel nodes, and are copied from

29326 stream priority during stream capture.

29327

29328 If `graph` contains any allocation or free nodes, there can be at most

29329 one executable graph in existence for that graph at a time. An attempt

29330 to instantiate a second executable graph before destroying the first

29331 with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same

29332 also applies if `graph` contains any device-updatable kernel nodes.

29333

29334 If `graph` contains kernels which call device-side

29335 :py:obj:`~.cudaGraphLaunch()` from multiple devices, this will result

29336 in an error.

29337

29338 Graphs instantiated for launch on the device have additional

29339 restrictions which do not apply to host graphs:

29340

29341 - The graph's nodes must reside on a single device.

29342

29343 - The graph can only contain kernel nodes, memcpy nodes, memset nodes,

29344 and child graph nodes.

29345

29346 - The graph cannot be empty and must contain at least one kernel,

29347 memcpy, or memset node. Operation-specific restrictions are outlined

29348 below.

29349

29350 - Kernel nodes:

29351

29352 - Use of CUDA Dynamic Parallelism is not permitted.

29353

29354 - Cooperative launches are permitted as long as MPS is not in use.

29355

29356 - Memcpy nodes:

29357

29358 - Only copies involving device memory and/or pinned device-mapped

29359 host memory are permitted.

29360

29361 - Copies involving CUDA arrays are not permitted.

29362

29363 - Both operands must be accessible from the current device, and the

29364 current device must match the device of other nodes in the graph.

29365

29366 Parameters

29367 ----------

29368 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

29369 Graph to instantiate

29370 flags : unsigned long long

29371 Flags to control instantiation. See

29372 :py:obj:`~.CUgraphInstantiate_flags`.

29373

29374 Returns

29375 -------

29376 cudaError_t

29377 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29378 pGraphExec : :py:obj:`~.cudaGraphExec_t`

29379 Returns instantiated graph

29380

29381 See Also

29382 --------

29383 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`

29384 """

29385 cdef cyruntime.cudaGraph_t cygraph

29386 if graph is None:

29387 pgraph = 0

29388 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

29389 pgraph = int(graph)

29390 else:

29391 pgraph = int(cudaGraph_t(graph))

29392 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

29393 cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()

29394 with nogil:

29395 err = cyruntime.cudaGraphInstantiateWithFlags(<cyruntime.cudaGraphExec_t*>pGraphExec._pvt_ptr, cygraph, flags)

29396 if err != cyruntime.cudaSuccess:

29397 return (_dict_cudaError_t[err], None)

29398 return (_dict_cudaError_t[err], pGraphExec)

29399

29400@cython.embedsignature(True)

29401def cudaGraphInstantiateWithParams(graph, instantiateParams : Optional[cudaGraphInstantiateParams]):

29402 """ Creates an executable graph from a graph.

29403

29404 Instantiates `graph` as an executable graph according to the

29405 `instantiateParams` structure. The graph is validated for any

29406 structural constraints or intra-node constraints which were not

29407 previously validated. If instantiation is successful, a handle to the

29408 instantiated graph is returned in `pGraphExec`.

29409

29410 `instantiateParams` controls the behavior of instantiation and

29411 subsequent graph launches, as well as returning more detailed

29412 information in the event of an error.

29413 :py:obj:`~.cudaGraphInstantiateParams` is defined as:

29414

29415 **View CUDA Toolkit Documentation for a C++ code example**

29416

29417 The `flags` field controls the behavior of instantiation and subsequent

29418 graph launches. Valid flags are:

29419

29420 - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which

29421 configures a graph containing memory allocation nodes to

29422 automatically free any unfreed memory allocations before the graph is

29423 relaunched.

29424

29425 - :py:obj:`~.cudaGraphInstantiateFlagUpload`, which will perform an

29426 upload of the graph into `uploadStream` once the graph has been

29427 instantiated.

29428

29429 - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures

29430 the graph for launch from the device. If this flag is passed, the

29431 executable graph handle returned can be used to launch the graph from

29432 both the host and device. This flag can only be used on platforms

29433 which support unified addressing. This flag cannot be used in

29434 conjunction with

29435 :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.

29436

29437 - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the

29438 graph to use the priorities from the per-node attributes rather than

29439 the priority of the launch stream during execution. Note that

29440 priorities are only available on kernel nodes, and are copied from

29441 stream priority during stream capture.

29442

29443 If `graph` contains any allocation or free nodes, there can be at most

29444 one executable graph in existence for that graph at a time. An attempt

29445 to instantiate a second executable graph before destroying the first

29446 with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same

29447 also applies if `graph` contains any device-updatable kernel nodes.

29448

29449 If `graph` contains kernels which call device-side

29450 :py:obj:`~.cudaGraphLaunch()` from multiple devices, this will result

29451 in an error.

29452

29453 Graphs instantiated for launch on the device have additional

29454 restrictions which do not apply to host graphs:

29455

29456 - The graph's nodes must reside on a single device.

29457

29458 - The graph can only contain kernel nodes, memcpy nodes, memset nodes,

29459 and child graph nodes.

29460

29461 - The graph cannot be empty and must contain at least one kernel,

29462 memcpy, or memset node. Operation-specific restrictions are outlined

29463 below.

29464

29465 - Kernel nodes:

29466

29467 - Use of CUDA Dynamic Parallelism is not permitted.

29468

29469 - Cooperative launches are permitted as long as MPS is not in use.

29470

29471 - Memcpy nodes:

29472

29473 - Only copies involving device memory and/or pinned device-mapped

29474 host memory are permitted.

29475

29476 - Copies involving CUDA arrays are not permitted.

29477

29478 - Both operands must be accessible from the current device, and the

29479 current device must match the device of other nodes in the graph.

29480

29481 In the event of an error, the `result_out` and `errNode_out` fields

29482 will contain more information about the nature of the error. Possible

29483 error reporting includes:

29484

29485 - :py:obj:`~.cudaGraphInstantiateError`, if passed an invalid value or

29486 if an unexpected error occurred which is described by the return

29487 value of the function. `errNode_out` will be set to NULL.

29488

29489 - :py:obj:`~.cudaGraphInstantiateInvalidStructure`, if the graph

29490 structure is invalid. `errNode_out` will be set to one of the

29491 offending nodes.

29492

29493 - :py:obj:`~.cudaGraphInstantiateNodeOperationNotSupported`, if the

29494 graph is instantiated for device launch but contains a node of an

29495 unsupported node type, or a node which performs unsupported

29496 operations, such as use of CUDA dynamic parallelism within a kernel

29497 node. `errNode_out` will be set to this node.

29498

29499 - :py:obj:`~.cudaGraphInstantiateMultipleDevicesNotSupported`, if the

29500 graph is instantiated for device launch but a node’s device differs

29501 from that of another node. This error can also be returned if a graph

29502 is not instantiated for device launch and it contains kernels which

29503 call device-side :py:obj:`~.cudaGraphLaunch()` from multiple devices.

29504 `errNode_out` will be set to this node.

29505

29506 If instantiation is successful, `result_out` will be set to

29507 :py:obj:`~.cudaGraphInstantiateSuccess`, and `hErrNode_out` will be set

29508 to NULL.

29509

29510 Parameters

29511 ----------

29512 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

29513 Graph to instantiate

29514 instantiateParams : :py:obj:`~.cudaGraphInstantiateParams`

29515 Instantiation parameters

29516

29517 Returns

29518 -------

29519 cudaError_t

29520 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29521 pGraphExec : :py:obj:`~.cudaGraphExec_t`

29522 Returns instantiated graph

29523

29524 See Also

29525 --------

29526 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphExecDestroy`

29527 """

29528 cdef cyruntime.cudaGraph_t cygraph

29529 if graph is None:

29530 pgraph = 0

29531 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

29532 pgraph = int(graph)

29533 else:

29534 pgraph = int(cudaGraph_t(graph))

29535 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

29536 cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()

29537 cdef cyruntime.cudaGraphInstantiateParams* cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams is not None else NULL

29538 with nogil:

29539 err = cyruntime.cudaGraphInstantiateWithParams(<cyruntime.cudaGraphExec_t*>pGraphExec._pvt_ptr, cygraph, cyinstantiateParams_ptr)

29540 if err != cyruntime.cudaSuccess:

29541 return (_dict_cudaError_t[err], None)

29542 return (_dict_cudaError_t[err], pGraphExec)

29543

29544@cython.embedsignature(True)

29545def cudaGraphExecGetFlags(graphExec):

29546 """ Query the instantiation flags of an executable graph.

29547

29548 Returns the flags that were passed to instantiation for the given

29549 executable graph. :py:obj:`~.cudaGraphInstantiateFlagUpload` will not

29550 be returned by this API as it does not affect the resulting executable

29551 graph.

29552

29553 Parameters

29554 ----------

29555 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29556 The executable graph to query

29557

29558 Returns

29559 -------

29560 cudaError_t

29561 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29562 flags : unsigned long long

29563 Returns the instantiation flags

29564

29565 See Also

29566 --------

29567 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphInstantiateWithParams`

29568 """

29569 cdef cyruntime.cudaGraphExec_t cygraphExec

29570 if graphExec is None:

29571 pgraphExec = 0

29572 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29573 pgraphExec = int(graphExec)

29574 else:

29575 pgraphExec = int(cudaGraphExec_t(graphExec))

29576 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec

29577 cdef unsigned long long flags = 0

29578 with nogil:

29579 err = cyruntime.cudaGraphExecGetFlags(cygraphExec, &flags)

29580 if err != cyruntime.cudaSuccess:

29581 return (_dict_cudaError_t[err], None)

29582 return (_dict_cudaError_t[err], flags)

29583

29584@cython.embedsignature(True)

29585def cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaKernelNodeParams]):

29586 """ Sets the parameters for a kernel node in the given graphExec.

29587

29588 Sets the parameters of a kernel node in an executable graph

29589 `hGraphExec`. The node is identified by the corresponding node `node`

29590 in the non-executable graph, from which the executable graph was

29591 instantiated.

29592

29593 `node` must not have been removed from the original graph. All

29594 `nodeParams` fields may change, but the following restrictions apply to

29595 `func` updates:

29596

29597 - The owning device of the function cannot change.

29598

29599 - A node whose function originally did not use CUDA dynamic parallelism

29600 cannot be updated to a function which uses CDP

29601

29602 - A node whose function originally did not make device-side update

29603 calls cannot be updated to a function which makes device-side update

29604 calls.

29605

29606 - If `hGraphExec` was not instantiated for device launch, a node whose

29607 function originally did not use device-side

29608 :py:obj:`~.cudaGraphLaunch()` cannot be updated to a function which

29609 uses device-side :py:obj:`~.cudaGraphLaunch()` unless the node

29610 resides on the same device as nodes which contained such calls at

29611 instantiate-time. If no such calls were present at instantiation,

29612 these updates cannot be performed at all.

29613

29614 The modifications only affect future launches of `hGraphExec`. Already

29615 enqueued or running launches of `hGraphExec` are not affected by this

29616 call. `node` is also not modified by this call.

29617

29618 If `node` is a device-updatable kernel node, the next upload/launch of

29619 `hGraphExec` will overwrite any previous device-side updates.

29620 Additionally, applying host updates to a device-updatable kernel node

29621 while it is being updated from the device will result in undefined

29622 behavior.

29623

29624 Parameters

29625 ----------

29626 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29627 The executable graph in which to set the specified node

29628 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29629 kernel node from the graph from which graphExec was instantiated

29630 pNodeParams : :py:obj:`~.cudaKernelNodeParams`

29631 Updated Parameters to set

29632

29633 Returns

29634 -------

29635 cudaError_t

29636 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

29637

29638 See Also

29639 --------

29640 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

29641 """

29642 cdef cyruntime.cudaGraphNode_t cynode

29643 if node is None:

29644 pnode = 0

29645 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29646 pnode = int(node)

29647 else:

29648 pnode = int(cudaGraphNode_t(node))

29649 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29650 cdef cyruntime.cudaGraphExec_t cyhGraphExec

29651 if hGraphExec is None:

29652 phGraphExec = 0

29653 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29654 phGraphExec = int(hGraphExec)

29655 else:

29656 phGraphExec = int(cudaGraphExec_t(hGraphExec))

29657 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

29658 cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

29659 with nogil:

29660 err = cyruntime.cudaGraphExecKernelNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)

29661 return (_dict_cudaError_t[err],)

29662

29663@cython.embedsignature(True)

29664def cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaMemcpy3DParms]):

29665 """ Sets the parameters for a memcpy node in the given graphExec.

29666

29667 Updates the work represented by `node` in `hGraphExec` as though `node`

29668 had contained `pNodeParams` at instantiation. `node` must remain in the

29669 graph which was used to instantiate `hGraphExec`. Changed edges to and

29670 from `node` are ignored.

29671

29672 The source and destination memory in `pNodeParams` must be allocated

29673 from the same contexts as the original source and destination memory.

29674 Both the instantiation-time memory operands and the memory operands in

29675 `pNodeParams` must be 1-dimensional. Zero-length operations are not

29676 supported.

29677

29678 The modifications only affect future launches of `hGraphExec`. Already

29679 enqueued or running launches of `hGraphExec` are not affected by this

29680 call. `node` is also not modified by this call.

29681

29682 Returns :py:obj:`~.cudaErrorInvalidValue` if the memory operands'

29683 mappings changed or either the original or new memory operands are

29684 multidimensional.

29685

29686 Parameters

29687 ----------

29688 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29689 The executable graph in which to set the specified node

29690 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29691 Memcpy node from the graph which was used to instantiate graphExec

29692 pNodeParams : :py:obj:`~.cudaMemcpy3DParms`

29693 Updated Parameters to set

29694

29695 Returns

29696 -------

29697 cudaError_t

29698 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

29699

29700 See Also

29701 --------

29702 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

29703 """

29704 cdef cyruntime.cudaGraphNode_t cynode

29705 if node is None:

29706 pnode = 0

29707 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29708 pnode = int(node)

29709 else:

29710 pnode = int(cudaGraphNode_t(node))

29711 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29712 cdef cyruntime.cudaGraphExec_t cyhGraphExec

29713 if hGraphExec is None:

29714 phGraphExec = 0

29715 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29716 phGraphExec = int(hGraphExec)

29717 else:

29718 phGraphExec = int(cudaGraphExec_t(hGraphExec))

29719 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

29720 cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

29721 with nogil:

29722 err = cyruntime.cudaGraphExecMemcpyNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)

29723 return (_dict_cudaError_t[err],)

29724

29725@cython.embedsignature(True)

29726def cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, size_t count, kind not None : cudaMemcpyKind):

29727 """ Sets the parameters for a memcpy node in the given graphExec to perform a 1-dimensional copy.

29728

29729 Updates the work represented by `node` in `hGraphExec` as though `node`

29730 had contained the given params at instantiation. `node` must remain in

29731 the graph which was used to instantiate `hGraphExec`. Changed edges to

29732 and from `node` are ignored.

29733

29734 `src` and `dst` must be allocated from the same contexts as the

29735 original source and destination memory. The instantiation-time memory

29736 operands must be 1-dimensional. Zero-length operations are not

29737 supported.

29738

29739 The modifications only affect future launches of `hGraphExec`. Already

29740 enqueued or running launches of `hGraphExec` are not affected by this

29741 call. `node` is also not modified by this call.

29742

29743 Returns :py:obj:`~.cudaErrorInvalidValue` if the memory operands'

29744 mappings changed or the original memory operands are multidimensional.

29745

29746 Parameters

29747 ----------

29748 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29749 The executable graph in which to set the specified node

29750 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29751 Memcpy node from the graph which was used to instantiate graphExec

29752 dst : Any

29753 Destination memory address

29754 src : Any

29755 Source memory address

29756 count : size_t

29757 Size in bytes to copy

29758 kind : :py:obj:`~.cudaMemcpyKind`

29759 Type of transfer

29760

29761 Returns

29762 -------

29763 cudaError_t

29764 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

29765

29766 See Also

29767 --------

29768 :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

29769 """

29770 cdef cyruntime.cudaGraphNode_t cynode

29771 if node is None:

29772 pnode = 0

29773 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29774 pnode = int(node)

29775 else:

29776 pnode = int(cudaGraphNode_t(node))

29777 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29778 cdef cyruntime.cudaGraphExec_t cyhGraphExec

29779 if hGraphExec is None:

29780 phGraphExec = 0

29781 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29782 phGraphExec = int(hGraphExec)

29783 else:

29784 phGraphExec = int(cudaGraphExec_t(hGraphExec))

29785 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

29786 cydst = _HelperInputVoidPtr(dst)

29787 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr

29788 cysrc = _HelperInputVoidPtr(src)

29789 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr

29790 cdef cyruntime.cudaMemcpyKind cykind = kind.value

29791 with nogil:

29792 err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst_ptr, cysrc_ptr, count, cykind)

29793 return (_dict_cudaError_t[err],)

29794

29795@cython.embedsignature(True)

29796def cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaMemsetParams]):

29797 """ Sets the parameters for a memset node in the given graphExec.

29798

29799 Updates the work represented by `node` in `hGraphExec` as though `node`

29800 had contained `pNodeParams` at instantiation. `node` must remain in the

29801 graph which was used to instantiate `hGraphExec`. Changed edges to and

29802 from `node` are ignored.

29803

29804 Zero sized operations are not supported.

29805

29806 The new destination pointer in `pNodeParams` must be to the same kind

29807 of allocation as the original destination pointer and have the same

29808 context association and device mapping as the original destination

29809 pointer.

29810

29811 Both the value and pointer address may be updated. Changing other

29812 aspects of the memset (width, height, element size or pitch) may cause

29813 the update to be rejected. Specifically, for 2d memsets, all dimension

29814 changes are rejected. For 1d memsets, changes in height are explicitly

29815 rejected and other changes are opportunistically allowed if the

29816 resulting work maps onto the work resources already allocated for the

29817 node.

29818

29819 The modifications only affect future launches of `hGraphExec`. Already

29820 enqueued or running launches of `hGraphExec` are not affected by this

29821 call. `node` is also not modified by this call.

29822

29823 Parameters

29824 ----------

29825 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29826 The executable graph in which to set the specified node

29827 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29828 Memset node from the graph which was used to instantiate graphExec

29829 pNodeParams : :py:obj:`~.cudaMemsetParams`

29830 Updated Parameters to set

29831

29832 Returns

29833 -------

29834 cudaError_t

29835 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

29836

29837 See Also

29838 --------

29839 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

29840 """

29841 cdef cyruntime.cudaGraphNode_t cynode

29842 if node is None:

29843 pnode = 0

29844 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29845 pnode = int(node)

29846 else:

29847 pnode = int(cudaGraphNode_t(node))

29848 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29849 cdef cyruntime.cudaGraphExec_t cyhGraphExec

29850 if hGraphExec is None:

29851 phGraphExec = 0

29852 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29853 phGraphExec = int(hGraphExec)

29854 else:

29855 phGraphExec = int(cudaGraphExec_t(hGraphExec))

29856 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

29857 cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

29858 with nogil:

29859 err = cyruntime.cudaGraphExecMemsetNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)

29860 return (_dict_cudaError_t[err],)

29861

29862@cython.embedsignature(True)

29863def cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaHostNodeParams]):

29864 """ Sets the parameters for a host node in the given graphExec.

29865

29866 Updates the work represented by `node` in `hGraphExec` as though `node`

29867 had contained `pNodeParams` at instantiation. `node` must remain in the

29868 graph which was used to instantiate `hGraphExec`. Changed edges to and

29869 from `node` are ignored.

29870

29871 The modifications only affect future launches of `hGraphExec`. Already

29872 enqueued or running launches of `hGraphExec` are not affected by this

29873 call. `node` is also not modified by this call.

29874

29875 Parameters

29876 ----------

29877 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29878 The executable graph in which to set the specified node

29879 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29880 Host node from the graph which was used to instantiate graphExec

29881 pNodeParams : :py:obj:`~.cudaHostNodeParams`

29882 Updated Parameters to set

29883

29884 Returns

29885 -------

29886 cudaError_t

29887 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

29888

29889 See Also

29890 --------

29891 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

29892 """

29893 cdef cyruntime.cudaGraphNode_t cynode

29894 if node is None:

29895 pnode = 0

29896 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29897 pnode = int(node)

29898 else:

29899 pnode = int(cudaGraphNode_t(node))

29900 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29901 cdef cyruntime.cudaGraphExec_t cyhGraphExec

29902 if hGraphExec is None:

29903 phGraphExec = 0

29904 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29905 phGraphExec = int(hGraphExec)

29906 else:

29907 phGraphExec = int(cudaGraphExec_t(hGraphExec))

29908 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

29909 cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL

29910 with nogil:

29911 err = cyruntime.cudaGraphExecHostNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)

29912 return (_dict_cudaError_t[err],)

29913

29914@cython.embedsignature(True)

29915def cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph):

29916 """ Updates node parameters in the child graph node in the given graphExec.

29917

29918 Updates the work represented by `node` in `hGraphExec` as though the

29919 nodes contained in `node's` graph had the parameters contained in

29920 `childGraph's` nodes at instantiation. `node` must remain in the graph

29921 which was used to instantiate `hGraphExec`. Changed edges to and from

29922 `node` are ignored.

29923

29924 The modifications only affect future launches of `hGraphExec`. Already

29925 enqueued or running launches of `hGraphExec` are not affected by this

29926 call. `node` is also not modified by this call.

29927

29928 The topology of `childGraph`, as well as the node insertion order, must

29929 match that of the graph contained in `node`. See

29930 :py:obj:`~.cudaGraphExecUpdate()` for a list of restrictions on what

29931 can be updated in an instantiated graph. The update is recursive, so

29932 child graph nodes contained within the top level child graph will also

29933 be updated.

29934

29935 Parameters

29936 ----------

29937 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29938 The executable graph in which to set the specified node

29939 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29940 Host node from the graph which was used to instantiate graphExec

29941 childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

29942 The graph supplying the updated parameters

29943

29944 Returns

29945 -------

29946 cudaError_t

29947 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

29948

29949 See Also

29950 --------

29951 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

29952 """

29953 cdef cyruntime.cudaGraph_t cychildGraph

29954 if childGraph is None:

29955 pchildGraph = 0

29956 elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)):

29957 pchildGraph = int(childGraph)

29958 else:

29959 pchildGraph = int(cudaGraph_t(childGraph))

29960 cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph

29961 cdef cyruntime.cudaGraphNode_t cynode

29962 if node is None:

29963 pnode = 0

29964 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

29965 pnode = int(node)

29966 else:

29967 pnode = int(cudaGraphNode_t(node))

29968 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

29969 cdef cyruntime.cudaGraphExec_t cyhGraphExec

29970 if hGraphExec is None:

29971 phGraphExec = 0

29972 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

29973 phGraphExec = int(hGraphExec)

29974 else:

29975 phGraphExec = int(cudaGraphExec_t(hGraphExec))

29976 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

29977 with nogil:

29978 err = cyruntime.cudaGraphExecChildGraphNodeSetParams(cyhGraphExec, cynode, cychildGraph)

29979 return (_dict_cudaError_t[err],)

29980

29981@cython.embedsignature(True)

29982def cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event):

29983 """ Sets the event for an event record node in the given graphExec.

29984

29985 Sets the event of an event record node in an executable graph

29986 `hGraphExec`. The node is identified by the corresponding node `hNode`

29987 in the non-executable graph, from which the executable graph was

29988 instantiated.

29989

29990 The modifications only affect future launches of `hGraphExec`. Already

29991 enqueued or running launches of `hGraphExec` are not affected by this

29992 call. `hNode` is also not modified by this call.

29993

29994 Parameters

29995 ----------

29996 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

29997 The executable graph in which to set the specified node

29998 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

29999 Event record node from the graph from which graphExec was

30000 instantiated

30001 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

30002 Updated event to use

30003

30004 Returns

30005 -------

30006 cudaError_t

30007 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30008

30009 See Also

30010 --------

30011 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

30012 """

30013 cdef cyruntime.cudaEvent_t cyevent

30014 if event is None:

30015 pevent = 0

30016 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

30017 pevent = int(event)

30018 else:

30019 pevent = int(cudaEvent_t(event))

30020 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

30021 cdef cyruntime.cudaGraphNode_t cyhNode

30022 if hNode is None:

30023 phNode = 0

30024 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

30025 phNode = int(hNode)

30026 else:

30027 phNode = int(cudaGraphNode_t(hNode))

30028 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

30029 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30030 if hGraphExec is None:

30031 phGraphExec = 0

30032 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30033 phGraphExec = int(hGraphExec)

30034 else:

30035 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30036 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30037 with nogil:

30038 err = cyruntime.cudaGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent)

30039 return (_dict_cudaError_t[err],)

30040

30041@cython.embedsignature(True)

30042def cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event):

30043 """ Sets the event for an event wait node in the given graphExec.

30044

30045 Sets the event of an event wait node in an executable graph

30046 `hGraphExec`. The node is identified by the corresponding node `hNode`

30047 in the non-executable graph, from which the executable graph was

30048 instantiated.

30049

30050 The modifications only affect future launches of `hGraphExec`. Already

30051 enqueued or running launches of `hGraphExec` are not affected by this

30052 call. `hNode` is also not modified by this call.

30053

30054 Parameters

30055 ----------

30056 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30057 The executable graph in which to set the specified node

30058 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

30059 Event wait node from the graph from which graphExec was

30060 instantiated

30061 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

30062 Updated event to use

30063

30064 Returns

30065 -------

30066 cudaError_t

30067 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30068

30069 See Also

30070 --------

30071 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

30072 """

30073 cdef cyruntime.cudaEvent_t cyevent

30074 if event is None:

30075 pevent = 0

30076 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

30077 pevent = int(event)

30078 else:

30079 pevent = int(cudaEvent_t(event))

30080 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

30081 cdef cyruntime.cudaGraphNode_t cyhNode

30082 if hNode is None:

30083 phNode = 0

30084 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

30085 phNode = int(hNode)

30086 else:

30087 phNode = int(cudaGraphNode_t(hNode))

30088 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

30089 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30090 if hGraphExec is None:

30091 phGraphExec = 0

30092 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30093 phGraphExec = int(hGraphExec)

30094 else:

30095 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30096 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30097 with nogil:

30098 err = cyruntime.cudaGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent)

30099 return (_dict_cudaError_t[err],)

30100

30101@cython.embedsignature(True)

30102def cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):

30103 """ Sets the parameters for an external semaphore signal node in the given graphExec.

30104

30105 Sets the parameters of an external semaphore signal node in an

30106 executable graph `hGraphExec`. The node is identified by the

30107 corresponding node `hNode` in the non-executable graph, from which the

30108 executable graph was instantiated.

30109

30110 `hNode` must not have been removed from the original graph.

30111

30112 The modifications only affect future launches of `hGraphExec`. Already

30113 enqueued or running launches of `hGraphExec` are not affected by this

30114 call. `hNode` is also not modified by this call.

30115

30116 Changing `nodeParams->numExtSems` is not supported.

30117

30118 Parameters

30119 ----------

30120 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30121 The executable graph in which to set the specified node

30122 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

30123 semaphore signal node from the graph from which graphExec was

30124 instantiated

30125 nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`

30126 Updated Parameters to set

30127

30128 Returns

30129 -------

30130 cudaError_t

30131 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30132

30133 See Also

30134 --------

30135 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

30136 """

30137 cdef cyruntime.cudaGraphNode_t cyhNode

30138 if hNode is None:

30139 phNode = 0

30140 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

30141 phNode = int(hNode)

30142 else:

30143 phNode = int(cudaGraphNode_t(hNode))

30144 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

30145 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30146 if hGraphExec is None:

30147 phGraphExec = 0

30148 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30149 phGraphExec = int(hGraphExec)

30150 else:

30151 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30152 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30153 cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

30154 with nogil:

30155 err = cyruntime.cudaGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)

30156 return (_dict_cudaError_t[err],)

30157

30158@cython.embedsignature(True)

30159def cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):

30160 """ Sets the parameters for an external semaphore wait node in the given graphExec.

30161

30162 Sets the parameters of an external semaphore wait node in an executable

30163 graph `hGraphExec`. The node is identified by the corresponding node

30164 `hNode` in the non-executable graph, from which the executable graph

30165 was instantiated.

30166

30167 `hNode` must not have been removed from the original graph.

30168

30169 The modifications only affect future launches of `hGraphExec`. Already

30170 enqueued or running launches of `hGraphExec` are not affected by this

30171 call. `hNode` is also not modified by this call.

30172

30173 Changing `nodeParams->numExtSems` is not supported.

30174

30175 Parameters

30176 ----------

30177 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30178 The executable graph in which to set the specified node

30179 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

30180 semaphore wait node from the graph from which graphExec was

30181 instantiated

30182 nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`

30183 Updated Parameters to set

30184

30185 Returns

30186 -------

30187 cudaError_t

30188 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30189

30190 See Also

30191 --------

30192 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

30193 """

30194 cdef cyruntime.cudaGraphNode_t cyhNode

30195 if hNode is None:

30196 phNode = 0

30197 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

30198 phNode = int(hNode)

30199 else:

30200 phNode = int(cudaGraphNode_t(hNode))

30201 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

30202 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30203 if hGraphExec is None:

30204 phGraphExec = 0

30205 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30206 phGraphExec = int(hGraphExec)

30207 else:

30208 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30209 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30210 cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

30211 with nogil:

30212 err = cyruntime.cudaGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)

30213 return (_dict_cudaError_t[err],)

30214

30215@cython.embedsignature(True)

30216def cudaGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled):

30217 """ Enables or disables the specified node in the given graphExec.

30218

30219 Sets `hNode` to be either enabled or disabled. Disabled nodes are

30220 functionally equivalent to empty nodes until they are reenabled.

30221 Existing node parameters are not affected by disabling/enabling the

30222 node.

30223

30224 The node is identified by the corresponding node `hNode` in the non-

30225 executable graph, from which the executable graph was instantiated.

30226

30227 `hNode` must not have been removed from the original graph.

30228

30229 The modifications only affect future launches of `hGraphExec`. Already

30230 enqueued or running launches of `hGraphExec` are not affected by this

30231 call. `hNode` is also not modified by this call.

30232

30233 Parameters

30234 ----------

30235 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30236 The executable graph in which to set the specified node

30237 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

30238 Node from the graph from which graphExec was instantiated

30239 isEnabled : unsigned int

30240 Node is enabled if != 0, otherwise the node is disabled

30241

30242 Returns

30243 -------

30244 cudaError_t

30245 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30246

30247 See Also

30248 --------

30249 :py:obj:`~.cudaGraphNodeGetEnabled`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` :py:obj:`~.cudaGraphLaunch`

30250

30251 Notes

30252 -----

30253 Currently only kernel, memset and memcpy nodes are supported.

30254 """

30255 cdef cyruntime.cudaGraphNode_t cyhNode

30256 if hNode is None:

30257 phNode = 0

30258 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

30259 phNode = int(hNode)

30260 else:

30261 phNode = int(cudaGraphNode_t(hNode))

30262 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

30263 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30264 if hGraphExec is None:

30265 phGraphExec = 0

30266 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30267 phGraphExec = int(hGraphExec)

30268 else:

30269 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30270 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30271 with nogil:

30272 err = cyruntime.cudaGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled)

30273 return (_dict_cudaError_t[err],)

30274

30275@cython.embedsignature(True)

30276def cudaGraphNodeGetEnabled(hGraphExec, hNode):

30277 """ Query whether a node in the given graphExec is enabled.

30278

30279 Sets isEnabled to 1 if `hNode` is enabled, or 0 if `hNode` is disabled.

30280

30281 The node is identified by the corresponding node `hNode` in the non-

30282 executable graph, from which the executable graph was instantiated.

30283

30284 `hNode` must not have been removed from the original graph.

30285

30286 Parameters

30287 ----------

30288 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30289 The executable graph in which to set the specified node

30290 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

30291 Node from the graph from which graphExec was instantiated

30292

30293 Returns

30294 -------

30295 cudaError_t

30296 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30297 isEnabled : unsigned int

30298 Location to return the enabled status of the node

30299

30300 See Also

30301 --------

30302 :py:obj:`~.cudaGraphNodeSetEnabled`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` :py:obj:`~.cudaGraphLaunch`

30303

30304 Notes

30305 -----

30306 Currently only kernel, memset and memcpy nodes are supported.

30307 """

30308 cdef cyruntime.cudaGraphNode_t cyhNode

30309 if hNode is None:

30310 phNode = 0

30311 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):

30312 phNode = int(hNode)

30313 else:

30314 phNode = int(cudaGraphNode_t(hNode))

30315 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode

30316 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30317 if hGraphExec is None:

30318 phGraphExec = 0

30319 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30320 phGraphExec = int(hGraphExec)

30321 else:

30322 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30323 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30324 cdef unsigned int isEnabled = 0

30325 with nogil:

30326 err = cyruntime.cudaGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled)

30327 if err != cyruntime.cudaSuccess:

30328 return (_dict_cudaError_t[err], None)

30329 return (_dict_cudaError_t[err], isEnabled)

30330

30331@cython.embedsignature(True)

30332def cudaGraphExecUpdate(hGraphExec, hGraph):

30333 """ Check whether an executable graph can be updated with a graph and perform the update if possible.

30334

30335 Updates the node parameters in the instantiated graph specified by

30336 `hGraphExec` with the node parameters in a topologically identical

30337 graph specified by `hGraph`.

30338

30339 Limitations:

30340

30341 - Kernel nodes:

30342

30343 - The owning context of the function cannot change.

30344

30345 - A node whose function originally did not use CUDA dynamic

30346 parallelism cannot be updated to a function which uses CDP.

30347

30348 - A node whose function originally did not make device-side update

30349 calls cannot be updated to a function which makes device-side

30350 update calls.

30351

30352 - A cooperative node cannot be updated to a non-cooperative node, and

30353 vice-versa.

30354

30355 - If the graph was instantiated with

30356 cudaGraphInstantiateFlagUseNodePriority, the priority attribute

30357 cannot change. Equality is checked on the originally requested

30358 priority values, before they are clamped to the device's supported

30359 range.

30360

30361 - If `hGraphExec` was not instantiated for device launch, a node

30362 whose function originally did not use device-side

30363 :py:obj:`~.cudaGraphLaunch()` cannot be updated to a function which

30364 uses device-side :py:obj:`~.cudaGraphLaunch()` unless the node

30365 resides on the same device as nodes which contained such calls at

30366 instantiate-time. If no such calls were present at instantiation,

30367 these updates cannot be performed at all.

30368

30369 - Neither `hGraph` nor `hGraphExec` may contain device-updatable

30370 kernel nodes.

30371

30372 - Memset and memcpy nodes:

30373

30374 - The CUDA device(s) to which the operand(s) was allocated/mapped

30375 cannot change.

30376

30377 - The source/destination memory must be allocated from the same

30378 contexts as the original source/destination memory.

30379

30380 - For 2d memsets, only address and assigned value may be updated.

30381

30382 - For 1d memsets, updating dimensions is also allowed, but may fail

30383 if the resulting operation doesn't map onto the work resources

30384 already allocated for the node.

30385

30386 - Additional memcpy node restrictions:

30387

30388 - Changing either the source or destination memory type(i.e.

30389 CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.

30390

30391 - Conditional nodes:

30392

30393 - Changing node parameters is not supported.

30394

30395 - Changing parameters of nodes within the conditional body graph is

30396 subject to the rules above.

30397

30398 - Conditional handle flags and default values are updated as part of

30399 the graph update.

30400

30401 Note: The API may add further restrictions in future releases. The

30402 return code should always be checked.

30403

30404 cudaGraphExecUpdate sets the result member of `resultInfo` to

30405 cudaGraphExecUpdateErrorTopologyChanged under the following conditions:

30406

30407 - The count of nodes directly in `hGraphExec` and `hGraph` differ, in

30408 which case resultInfo->errorNode is set to NULL.

30409

30410 - `hGraph` has more exit nodes than `hGraph`, in which case

30411 resultInfo->errorNode is set to one of the exit nodes in hGraph.

30412

30413 - A node in `hGraph` has a different number of dependencies than the

30414 node from `hGraphExec` it is paired with, in which case

30415 resultInfo->errorNode is set to the node from `hGraph`.

30416

30417 - A node in `hGraph` has a dependency that does not match with the

30418 corresponding dependency of the paired node from `hGraphExec`.

30419 resultInfo->errorNode will be set to the node from `hGraph`.

30420 resultInfo->errorFromNode will be set to the mismatched dependency.

30421 The dependencies are paired based on edge order and a dependency does

30422 not match when the nodes are already paired based on other edges

30423 examined in the graph.

30424

30425 cudaGraphExecUpdate sets `the` result member of `resultInfo` to:

30426

30427 - cudaGraphExecUpdateError if passed an invalid value.

30428

30429 - cudaGraphExecUpdateErrorTopologyChanged if the graph topology changed

30430

30431 - cudaGraphExecUpdateErrorNodeTypeChanged if the type of a node

30432 changed, in which case `hErrorNode_out` is set to the node from

30433 `hGraph`.

30434

30435 - cudaGraphExecUpdateErrorFunctionChanged if the function of a kernel

30436 node changed (CUDA driver < 11.2)

30437

30438 - cudaGraphExecUpdateErrorUnsupportedFunctionChange if the func field

30439 of a kernel changed in an unsupported way(see note above), in which

30440 case `hErrorNode_out` is set to the node from `hGraph`

30441

30442 - cudaGraphExecUpdateErrorParametersChanged if any parameters to a node

30443 changed in a way that is not supported, in which case

30444 `hErrorNode_out` is set to the node from `hGraph`

30445

30446 - cudaGraphExecUpdateErrorAttributesChanged if any attributes of a node

30447 changed in a way that is not supported, in which case

30448 `hErrorNode_out` is set to the node from `hGraph`

30449

30450 - cudaGraphExecUpdateErrorNotSupported if something about a node is

30451 unsupported, like the node's type or configuration, in which case

30452 `hErrorNode_out` is set to the node from `hGraph`

30453

30454 If the update fails for a reason not listed above, the result member of

30455 `resultInfo` will be set to cudaGraphExecUpdateError. If the update

30456 succeeds, the result member will be set to cudaGraphExecUpdateSuccess.

30457

30458 cudaGraphExecUpdate returns cudaSuccess when the updated was performed

30459 successfully. It returns cudaErrorGraphExecUpdateFailure if the graph

30460 update was not performed because it included changes which violated

30461 constraints specific to instantiated graph update.

30462

30463 Parameters

30464 ----------

30465 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30466 The instantiated graph to be updated

30467 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

30468 The graph containing the updated parameters

30469

30470 Returns

30471 -------

30472 cudaError_t

30473 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorGraphExecUpdateFailure`,

30474 resultInfo : :py:obj:`~.cudaGraphExecUpdateResultInfo`

30475 the error info structure

30476

30477 See Also

30478 --------

30479 :py:obj:`~.cudaGraphInstantiate`

30480 """

30481 cdef cyruntime.cudaGraph_t cyhGraph

30482 if hGraph is None:

30483 phGraph = 0

30484 elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)):

30485 phGraph = int(hGraph)

30486 else:

30487 phGraph = int(cudaGraph_t(hGraph))

30488 cyhGraph = <cyruntime.cudaGraph_t><void_ptr>phGraph

30489 cdef cyruntime.cudaGraphExec_t cyhGraphExec

30490 if hGraphExec is None:

30491 phGraphExec = 0

30492 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30493 phGraphExec = int(hGraphExec)

30494 else:

30495 phGraphExec = int(cudaGraphExec_t(hGraphExec))

30496 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec

30497 cdef cudaGraphExecUpdateResultInfo resultInfo = cudaGraphExecUpdateResultInfo()

30498 with nogil:

30499 err = cyruntime.cudaGraphExecUpdate(cyhGraphExec, cyhGraph, <cyruntime.cudaGraphExecUpdateResultInfo*>resultInfo._pvt_ptr)

30500 if err != cyruntime.cudaSuccess:

30501 return (_dict_cudaError_t[err], None)

30502 return (_dict_cudaError_t[err], resultInfo)

30503

30504@cython.embedsignature(True)

30505def cudaGraphUpload(graphExec, stream):

30506 """ Uploads an executable graph in a stream.

30507

30508 Uploads `hGraphExec` to the device in `hStream` without executing it.

30509 Uploads of the same `hGraphExec` will be serialized. Each upload is

30510 ordered behind both any previous work in `hStream` and any previous

30511 launches of `hGraphExec`. Uses memory cached by `stream` to back the

30512 allocations owned by `graphExec`.

30513

30514 Parameters

30515 ----------

30516 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30517 Executable graph to upload

30518 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

30519 Stream in which to upload the graph

30520

30521 Returns

30522 -------

30523 cudaError_t

30524 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,

30525

30526 See Also

30527 --------

30528 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`

30529 """

30530 cdef cyruntime.cudaStream_t cystream

30531 if stream is None:

30532 pstream = 0

30533 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

30534 pstream = int(stream)

30535 else:

30536 pstream = int(cudaStream_t(stream))

30537 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

30538 cdef cyruntime.cudaGraphExec_t cygraphExec

30539 if graphExec is None:

30540 pgraphExec = 0

30541 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30542 pgraphExec = int(graphExec)

30543 else:

30544 pgraphExec = int(cudaGraphExec_t(graphExec))

30545 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec

30546 with nogil:

30547 err = cyruntime.cudaGraphUpload(cygraphExec, cystream)

30548 return (_dict_cudaError_t[err],)

30549

30550@cython.embedsignature(True)

30551def cudaGraphLaunch(graphExec, stream):

30552 """ Launches an executable graph in a stream.

30553

30554 Executes `graphExec` in `stream`. Only one instance of `graphExec` may

30555 be executing at a time. Each launch is ordered behind both any previous

30556 work in `stream` and any previous launches of `graphExec`. To execute a

30557 graph concurrently, it must be instantiated multiple times into

30558 multiple executable graphs.

30559

30560 If any allocations created by `graphExec` remain unfreed (from a

30561 previous launch) and `graphExec` was not instantiated with

30562 :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, the launch will

30563 fail with :py:obj:`~.cudaErrorInvalidValue`.

30564

30565 Parameters

30566 ----------

30567 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30568 Executable graph to launch

30569 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

30570 Stream in which to launch the graph

30571

30572 Returns

30573 -------

30574 cudaError_t

30575 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30576

30577 See Also

30578 --------

30579 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphExecDestroy`

30580 """

30581 cdef cyruntime.cudaStream_t cystream

30582 if stream is None:

30583 pstream = 0

30584 elif isinstance(stream, (cudaStream_t,driver.CUstream)):

30585 pstream = int(stream)

30586 else:

30587 pstream = int(cudaStream_t(stream))

30588 cystream = <cyruntime.cudaStream_t><void_ptr>pstream

30589 cdef cyruntime.cudaGraphExec_t cygraphExec

30590 if graphExec is None:

30591 pgraphExec = 0

30592 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30593 pgraphExec = int(graphExec)

30594 else:

30595 pgraphExec = int(cudaGraphExec_t(graphExec))

30596 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec

30597 with nogil:

30598 err = cyruntime.cudaGraphLaunch(cygraphExec, cystream)

30599 return (_dict_cudaError_t[err],)

30600

30601@cython.embedsignature(True)

30602def cudaGraphExecDestroy(graphExec):

30603 """ Destroys an executable graph.

30604

30605 Destroys the executable graph specified by `graphExec`.

30606

30607 Parameters

30608 ----------

30609 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

30610 Executable graph to destroy

30611

30612 Returns

30613 -------

30614 cudaError_t

30615 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30616

30617 See Also

30618 --------

30619 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`

30620 """

30621 cdef cyruntime.cudaGraphExec_t cygraphExec

30622 if graphExec is None:

30623 pgraphExec = 0

30624 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):

30625 pgraphExec = int(graphExec)

30626 else:

30627 pgraphExec = int(cudaGraphExec_t(graphExec))

30628 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec

30629 with nogil:

30630 err = cyruntime.cudaGraphExecDestroy(cygraphExec)

30631 return (_dict_cudaError_t[err],)

30632

30633@cython.embedsignature(True)

30634def cudaGraphDestroy(graph):

30635 """ Destroys a graph.

30636

30637 Destroys the graph specified by `graph`, as well as all of its nodes.

30638

30639 Parameters

30640 ----------

30641 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

30642 Graph to destroy

30643

30644 Returns

30645 -------

30646 cudaError_t

30647 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30648

30649 See Also

30650 --------

30651 :py:obj:`~.cudaGraphCreate`

30652 """

30653 cdef cyruntime.cudaGraph_t cygraph

30654 if graph is None:

30655 pgraph = 0

30656 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

30657 pgraph = int(graph)

30658 else:

30659 pgraph = int(cudaGraph_t(graph))

30660 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

30661 with nogil:

30662 err = cyruntime.cudaGraphDestroy(cygraph)

30663 return (_dict_cudaError_t[err],)

30664

30665@cython.embedsignature(True)

30666def cudaGraphDebugDotPrint(graph, char* path, unsigned int flags):

30667 """ Write a DOT file describing graph structure.

30668

30669 Using the provided `graph`, write to `path` a DOT formatted description

30670 of the graph. By default this includes the graph topology, node types,

30671 node id, kernel names and memcpy direction. `flags` can be specified to

30672 write more detailed information about each node type such as parameter

30673 values, kernel attributes, node and function handles.

30674

30675 Parameters

30676 ----------

30677 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

30678 The graph to create a DOT file from

30679 path : bytes

30680 The path to write the DOT file to

30681 flags : unsigned int

30682 Flags from cudaGraphDebugDotFlags for specifying which additional

30683 node information to write

30684

30685 Returns

30686 -------

30687 cudaError_t

30688 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOperatingSystem`

30689 """

30690 cdef cyruntime.cudaGraph_t cygraph

30691 if graph is None:

30692 pgraph = 0

30693 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

30694 pgraph = int(graph)

30695 else:

30696 pgraph = int(cudaGraph_t(graph))

30697 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

30698 with nogil:

30699 err = cyruntime.cudaGraphDebugDotPrint(cygraph, path, flags)

30700 return (_dict_cudaError_t[err],)

30701

30702@cython.embedsignature(True)

30703def cudaUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned int flags):

30704 """ Create a user object.

30705

30706 Create a user object with the specified destructor callback and initial

30707 reference count. The initial references are owned by the caller.

30708

30709 Destructor callbacks cannot make CUDA API calls and should avoid

30710 blocking behavior, as they are executed by a shared internal thread.

30711 Another thread may be signaled to perform such actions, if it does not

30712 block forward progress of tasks scheduled through CUDA.

30713

30714 See CUDA User Objects in the CUDA C++ Programming Guide for more

30715 information on user objects.

30716

30717 Parameters

30718 ----------

30719 ptr : Any

30720 The pointer to pass to the destroy function

30721 destroy : :py:obj:`~.cudaHostFn_t`

30722 Callback to free the user object when it is no longer in use

30723 initialRefcount : unsigned int

30724 The initial refcount to create the object with, typically 1. The

30725 initial references are owned by the calling thread.

30726 flags : unsigned int

30727 Currently it is required to pass

30728 :py:obj:`~.cudaUserObjectNoDestructorSync`, which is the only

30729 defined flag. This indicates that the destroy callback cannot be

30730 waited on by any CUDA API. Users requiring synchronization of the

30731 callback should signal its completion manually.

30732

30733 Returns

30734 -------

30735 cudaError_t

30736 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30737 object_out : :py:obj:`~.cudaUserObject_t`

30738 Location to return the user object handle

30739

30740 See Also

30741 --------

30742 :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`

30743 """

30744 cdef cyruntime.cudaHostFn_t cydestroy

30745 if destroy is None:

30746 pdestroy = 0

30747 elif isinstance(destroy, (cudaHostFn_t,)):

30748 pdestroy = int(destroy)

30749 else:

30750 pdestroy = int(cudaHostFn_t(destroy))

30751 cydestroy = <cyruntime.cudaHostFn_t><void_ptr>pdestroy

30752 cdef cudaUserObject_t object_out = cudaUserObject_t()

30753 cyptr = _HelperInputVoidPtr(ptr)

30754 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr

30755 with nogil:

30756 err = cyruntime.cudaUserObjectCreate(<cyruntime.cudaUserObject_t*>object_out._pvt_ptr, cyptr_ptr, cydestroy, initialRefcount, flags)

30757 if err != cyruntime.cudaSuccess:

30758 return (_dict_cudaError_t[err], None)

30759 return (_dict_cudaError_t[err], object_out)

30760

30761@cython.embedsignature(True)

30762def cudaUserObjectRetain(object, unsigned int count):

30763 """ Retain a reference to a user object.

30764

30765 Retains new references to a user object. The new references are owned

30766 by the caller.

30767

30768 See CUDA User Objects in the CUDA C++ Programming Guide for more

30769 information on user objects.

30770

30771 Parameters

30772 ----------

30773 object : :py:obj:`~.cudaUserObject_t`

30774 The object to retain

30775 count : unsigned int

30776 The number of references to retain, typically 1. Must be nonzero

30777 and not larger than INT_MAX.

30778

30779 Returns

30780 -------

30781 cudaError_t

30782 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30783

30784 See Also

30785 --------

30786 :py:obj:`~.cudaUserObjectCreate`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`

30787 """

30788 cdef cyruntime.cudaUserObject_t cyobject

30789 if object is None:

30790 pobject = 0

30791 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):

30792 pobject = int(object)

30793 else:

30794 pobject = int(cudaUserObject_t(object))

30795 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject

30796 with nogil:

30797 err = cyruntime.cudaUserObjectRetain(cyobject, count)

30798 return (_dict_cudaError_t[err],)

30799

30800@cython.embedsignature(True)

30801def cudaUserObjectRelease(object, unsigned int count):

30802 """ Release a reference to a user object.

30803

30804 Releases user object references owned by the caller. The object's

30805 destructor is invoked if the reference count reaches zero.

30806

30807 It is undefined behavior to release references not owned by the caller,

30808 or to use a user object handle after all references are released.

30809

30810 See CUDA User Objects in the CUDA C++ Programming Guide for more

30811 information on user objects.

30812

30813 Parameters

30814 ----------

30815 object : :py:obj:`~.cudaUserObject_t`

30816 The object to release

30817 count : unsigned int

30818 The number of references to release, typically 1. Must be nonzero

30819 and not larger than INT_MAX.

30820

30821 Returns

30822 -------

30823 cudaError_t

30824 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30825

30826 See Also

30827 --------

30828 :py:obj:`~.cudaUserObjectCreate`, :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`

30829 """

30830 cdef cyruntime.cudaUserObject_t cyobject

30831 if object is None:

30832 pobject = 0

30833 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):

30834 pobject = int(object)

30835 else:

30836 pobject = int(cudaUserObject_t(object))

30837 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject

30838 with nogil:

30839 err = cyruntime.cudaUserObjectRelease(cyobject, count)

30840 return (_dict_cudaError_t[err],)

30841

30842@cython.embedsignature(True)

30843def cudaGraphRetainUserObject(graph, object, unsigned int count, unsigned int flags):

30844 """ Retain a reference to a user object from a graph.

30845

30846 Creates or moves user object references that will be owned by a CUDA

30847 graph.

30848

30849 See CUDA User Objects in the CUDA C++ Programming Guide for more

30850 information on user objects.

30851

30852 Parameters

30853 ----------

30854 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

30855 The graph to associate the reference with

30856 object : :py:obj:`~.cudaUserObject_t`

30857 The user object to retain a reference for

30858 count : unsigned int

30859 The number of references to add to the graph, typically 1. Must be

30860 nonzero and not larger than INT_MAX.

30861 flags : unsigned int

30862 The optional flag :py:obj:`~.cudaGraphUserObjectMove` transfers

30863 references from the calling thread, rather than create new

30864 references. Pass 0 to create new references.

30865

30866 Returns

30867 -------

30868 cudaError_t

30869 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30870

30871 See Also

30872 --------

30873 :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`

30874 """

30875 cdef cyruntime.cudaUserObject_t cyobject

30876 if object is None:

30877 pobject = 0

30878 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):

30879 pobject = int(object)

30880 else:

30881 pobject = int(cudaUserObject_t(object))

30882 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject

30883 cdef cyruntime.cudaGraph_t cygraph

30884 if graph is None:

30885 pgraph = 0

30886 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

30887 pgraph = int(graph)

30888 else:

30889 pgraph = int(cudaGraph_t(graph))

30890 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

30891 with nogil:

30892 err = cyruntime.cudaGraphRetainUserObject(cygraph, cyobject, count, flags)

30893 return (_dict_cudaError_t[err],)

30894

30895@cython.embedsignature(True)

30896def cudaGraphReleaseUserObject(graph, object, unsigned int count):

30897 """ Release a user object reference from a graph.

30898

30899 Releases user object references owned by a graph.

30900

30901 See CUDA User Objects in the CUDA C++ Programming Guide for more

30902 information on user objects.

30903

30904 Parameters

30905 ----------

30906 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

30907 The graph that will release the reference

30908 object : :py:obj:`~.cudaUserObject_t`

30909 The user object to release a reference for

30910 count : unsigned int

30911 The number of references to release, typically 1. Must be nonzero

30912 and not larger than INT_MAX.

30913

30914 Returns

30915 -------

30916 cudaError_t

30917 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

30918

30919 See Also

30920 --------

30921 :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphCreate`

30922 """

30923 cdef cyruntime.cudaUserObject_t cyobject

30924 if object is None:

30925 pobject = 0

30926 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):

30927 pobject = int(object)

30928 else:

30929 pobject = int(cudaUserObject_t(object))

30930 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject

30931 cdef cyruntime.cudaGraph_t cygraph

30932 if graph is None:

30933 pgraph = 0

30934 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

30935 pgraph = int(graph)

30936 else:

30937 pgraph = int(cudaGraph_t(graph))

30938 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

30939 with nogil:

30940 err = cyruntime.cudaGraphReleaseUserObject(cygraph, cyobject, count)

30941 return (_dict_cudaError_t[err],)

30942

30943@cython.embedsignature(True)

30944def cudaGraphAddNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], dependencyData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies, nodeParams : Optional[cudaGraphNodeParams]):

30945 """ Adds a node of arbitrary type to a graph.

30946

30947 Creates a new node in `graph` described by `nodeParams` with

30948 `numDependencies` dependencies specified via `pDependencies`.

30949 `numDependencies` may be 0. `pDependencies` may be null if

30950 `numDependencies` is 0. `pDependencies` may not have any duplicate

30951 entries.

30952

30953 `nodeParams` is a tagged union. The node type should be specified in

30954 the `typename` field, and type-specific parameters in the corresponding

30955 union member. All unused bytes - that is, `reserved0` and all bytes

30956 past the utilized union member - must be set to zero. It is recommended

30957 to use brace initialization or memset to ensure all bytes are

30958 initialized.

30959

30960 Note that for some node types, `nodeParams` may contain "out

30961 parameters" which are modified during the call, such as

30962 `nodeParams->alloc.dptr`.

30963

30964 A handle to the new node will be returned in `phGraphNode`.

30965

30966 Parameters

30967 ----------

30968 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

30969 Graph to which to add the node

30970 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]

30971 Dependencies of the node

30972 dependencyData : list[:py:obj:`~.cudaGraphEdgeData`]

30973 Optional edge data for the dependencies. If NULL, the data is

30974 assumed to be default (zeroed) for all dependencies.

30975 numDependencies : size_t

30976 Number of dependencies

30977 nodeParams : :py:obj:`~.cudaGraphNodeParams`

30978 Specification of the node

30979

30980 Returns

30981 -------

30982 cudaError_t

30983 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`

30984 pGraphNode : :py:obj:`~.cudaGraphNode_t`

30985 Returns newly created node

30986

30987 See Also

30988 --------

30989 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphExecNodeSetParams`

30990 """

30991 dependencyData = [] if dependencyData is None else dependencyData

30992 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):

30993 raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")

30994 pDependencies = [] if pDependencies is None else pDependencies

30995 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):

30996 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")

30997 cdef cyruntime.cudaGraph_t cygraph

30998 if graph is None:

30999 pgraph = 0

31000 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

31001 pgraph = int(graph)

31002 else:

31003 pgraph = int(cudaGraph_t(graph))

31004 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

31005 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()

31006 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL

31007 if len(pDependencies) > 1:

31008 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))

31009 if cypDependencies is NULL:

31010 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))

31011 else:

31012 for idx in range(len(pDependencies)):

31013 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]

31014 elif len(pDependencies) == 1:

31015 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr

31016 cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL

31017 if len(dependencyData) > 1:

31018 cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))

31019 if cydependencyData is NULL:

31020 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))

31021 for idx in range(len(dependencyData)):

31022 string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))

31023 elif len(dependencyData) == 1:

31024 cydependencyData = (<cudaGraphEdgeData>dependencyData[0])._pvt_ptr

31025 cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

31026 with nogil:

31027 err = cyruntime.cudaGraphAddNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, cydependencyData, numDependencies, cynodeParams_ptr)

31028 if len(pDependencies) > 1 and cypDependencies is not NULL:

31029 free(cypDependencies)

31030 if len(dependencyData) > 1 and cydependencyData is not NULL:

31031 free(cydependencyData)

31032 if err != cyruntime.cudaSuccess:

31033 return (_dict_cudaError_t[err], None)

31034 return (_dict_cudaError_t[err], pGraphNode)

31035

31036@cython.embedsignature(True)

31037def cudaGraphNodeSetParams(node, nodeParams : Optional[cudaGraphNodeParams]):

31038 """ Update's a graph node's parameters.

31039

31040 Sets the parameters of graph node `node` to `nodeParams`. The node type

31041 specified by `nodeParams->type` must match the type of `node`.

31042 `nodeParams` must be fully initialized and all unused bytes (reserved,

31043 padding) zeroed.

31044

31045 Modifying parameters is not supported for node types

31046 cudaGraphNodeTypeMemAlloc and cudaGraphNodeTypeMemFree.

31047

31048 Parameters

31049 ----------

31050 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

31051 Node to set the parameters for

31052 nodeParams : :py:obj:`~.cudaGraphNodeParams`

31053 Parameters to copy

31054

31055 Returns

31056 -------

31057 cudaError_t

31058 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`

31059

31060 See Also

31061 --------

31062 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExecNodeSetParams`

31063 """

31064 cdef cyruntime.cudaGraphNode_t cynode

31065 if node is None:

31066 pnode = 0

31067 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

31068 pnode = int(node)

31069 else:

31070 pnode = int(cudaGraphNode_t(node))

31071 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

31072 cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

31073 with nogil:

31074 err = cyruntime.cudaGraphNodeSetParams(cynode, cynodeParams_ptr)

31075 return (_dict_cudaError_t[err],)

31076

31077@cython.embedsignature(True)

31078def cudaGraphExecNodeSetParams(graphExec, node, nodeParams : Optional[cudaGraphNodeParams]):

31079 """ Update's a graph node's parameters in an instantiated graph.

31080

31081 Sets the parameters of a node in an executable graph `graphExec`. The

31082 node is identified by the corresponding node `node` in the non-

31083 executable graph from which the executable graph was instantiated.

31084 `node` must not have been removed from the original graph.

31085

31086 The modifications only affect future launches of `graphExec`. Already

31087 enqueued or running launches of `graphExec` are not affected by this

31088 call. `node` is also not modified by this call.

31089

31090 Allowed changes to parameters on executable graphs are as follows:

31091

31092 **View CUDA Toolkit Documentation for a table example**

31093

31094 Parameters

31095 ----------

31096 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`

31097 The executable graph in which to update the specified node

31098 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`

31099 Corresponding node from the graph from which graphExec was

31100 instantiated

31101 nodeParams : :py:obj:`~.cudaGraphNodeParams`

31102 Updated Parameters to set

31103

31104 Returns

31105 -------

31106 cudaError_t

31107 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`

31108

31109 See Also

31110 --------

31111 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphNodeSetParams` :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`

31112 """

31113 cdef cyruntime.cudaGraphNode_t cynode

31114 if node is None:

31115 pnode = 0

31116 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):

31117 pnode = int(node)

31118 else:

31119 pnode = int(cudaGraphNode_t(node))

31120 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode

31121 cdef cyruntime.cudaGraphExec_t cygraphExec

31122 if graphExec is None:

31123 pgraphExec = 0

31124 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):

31125 pgraphExec = int(graphExec)

31126 else:

31127 pgraphExec = int(cudaGraphExec_t(graphExec))

31128 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec

31129 cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL

31130 with nogil:

31131 err = cyruntime.cudaGraphExecNodeSetParams(cygraphExec, cynode, cynodeParams_ptr)

31132 return (_dict_cudaError_t[err],)

31133

31134@cython.embedsignature(True)

31135def cudaGraphConditionalHandleCreate(graph, unsigned int defaultLaunchValue, unsigned int flags):

31136 """ Create a conditional handle.

31137

31138 Creates a conditional handle associated with `hGraph`.

31139

31140 The conditional handle must be associated with a conditional node in

31141 this graph or one of its children.

31142

31143 Handles not associated with a conditional node may cause graph

31144 instantiation to fail.

31145

31146 Parameters

31147 ----------

31148 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

31149 Graph which will contain the conditional node using this handle.

31150 defaultLaunchValue : unsigned int

31151 Optional initial value for the conditional variable. Applied at the

31152 beginning of each graph execution if cudaGraphCondAssignDefault is

31153 set in `flags`.

31154 flags : unsigned int

31155 Currently must be cudaGraphCondAssignDefault or 0.

31156

31157 Returns

31158 -------

31159 cudaError_t

31160 :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`

31161 pHandle_out : :py:obj:`~.cudaGraphConditionalHandle`

31162 Pointer used to return the handle to the caller.

31163

31164 See Also

31165 --------

31166 :py:obj:`~.cuGraphAddNode`,

31167 """

31168 cdef cyruntime.cudaGraph_t cygraph

31169 if graph is None:

31170 pgraph = 0

31171 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

31172 pgraph = int(graph)

31173 else:

31174 pgraph = int(cudaGraph_t(graph))

31175 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

31176 cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle()

31177 with nogil:

31178 err = cyruntime.cudaGraphConditionalHandleCreate(<cyruntime.cudaGraphConditionalHandle*>pHandle_out._pvt_ptr, cygraph, defaultLaunchValue, flags)

31179 if err != cyruntime.cudaSuccess:

31180 return (_dict_cudaError_t[err], None)

31181 return (_dict_cudaError_t[err], pHandle_out)

31182

31183@cython.embedsignature(True)

31184def cudaGraphConditionalHandleCreate_v2(graph, ctx, unsigned int defaultLaunchValue, unsigned int flags):

31185 """ Create a conditional handle.

31186

31187 Creates a conditional handle associated with `hGraph`.

31188

31189 The conditional handle must be associated with a conditional node in

31190 this graph or one of its children.

31191

31192 Handles not associated with a conditional node may cause graph

31193 instantiation to fail.

31194

31195 Parameters

31196 ----------

31197 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`

31198 Graph which will contain the conditional node using this handle.

31199 ctx : :py:obj:`~.cudaExecutionContext_t`

31200 Execution context for the handle and associated conditional node.

31201 If NULL, current context will be used.

31202 defaultLaunchValue : unsigned int

31203 Optional initial value for the conditional variable. Applied at the

31204 beginning of each graph execution if cudaGraphCondAssignDefault is

31205 set in `flags`.

31206 flags : unsigned int

31207 Currently must be cudaGraphCondAssignDefault or 0.

31208

31209 Returns

31210 -------

31211 cudaError_t

31212 :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`

31213 pHandle_out : :py:obj:`~.cudaGraphConditionalHandle`

31214 Pointer used to return the handle to the caller.

31215

31216 See Also

31217 --------

31218 :py:obj:`~.cuGraphAddNode`,

31219 """

31220 cdef cyruntime.cudaExecutionContext_t cyctx

31221 if ctx is None:

31222 pctx = 0

31223 elif isinstance(ctx, (cudaExecutionContext_t,)):

31224 pctx = int(ctx)

31225 else:

31226 pctx = int(cudaExecutionContext_t(ctx))

31227 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

31228 cdef cyruntime.cudaGraph_t cygraph

31229 if graph is None:

31230 pgraph = 0

31231 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):

31232 pgraph = int(graph)

31233 else:

31234 pgraph = int(cudaGraph_t(graph))

31235 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph

31236 cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle()

31237 with nogil:

31238 err = cyruntime.cudaGraphConditionalHandleCreate_v2(<cyruntime.cudaGraphConditionalHandle*>pHandle_out._pvt_ptr, cygraph, cyctx, defaultLaunchValue, flags)

31239 if err != cyruntime.cudaSuccess:

31240 return (_dict_cudaError_t[err], None)

31241 return (_dict_cudaError_t[err], pHandle_out)

31242

31243@cython.embedsignature(True)

31244def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags):

31245 """ Returns the requested driver API function pointer.

31246

31247 [Deprecated]

31248

31249 Returns in `**funcPtr` the address of the CUDA driver function for the

31250 requested flags.

31251

31252 For a requested driver symbol, if the CUDA version in which the driver

31253 symbol was introduced is less than or equal to the CUDA runtime

31254 version, the API will return the function pointer to the corresponding

31255 versioned driver function.

31256

31257 The pointer returned by the API should be cast to a function pointer

31258 matching the requested driver function's definition in the API header

31259 file. The function pointer typedef can be picked up from the

31260 corresponding typedefs header file. For example, cudaTypedefs.h

31261 consists of function pointer typedefs for driver APIs defined in

31262 cuda.h.

31263

31264 The API will return :py:obj:`~.cudaSuccess` and set the returned

31265 `funcPtr` if the requested driver function is valid and supported on

31266 the platform.

31267

31268 The API will return :py:obj:`~.cudaSuccess` and set the returned

31269 `funcPtr` to NULL if the requested driver function is not supported on

31270 the platform, no ABI compatible driver function exists for the CUDA

31271 runtime version or if the driver symbol is invalid.

31272

31273 It will also set the optional `driverStatus` to one of the values in

31274 :py:obj:`~.cudaDriverEntryPointQueryResult` with the following

31275 meanings:

31276

31277 - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was

31278 succesfully found based on input arguments and `pfn` is valid

31279

31280 - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol

31281 was not found

31282

31283 - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested

31284 symbol was found but is not supported by the current runtime version

31285 (CUDART_VERSION)

31286

31287 The requested flags can be:

31288

31289 - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is

31290 equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the

31291 code is compiled with --default-stream per-thread compilation flag or

31292 the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;

31293 :py:obj:`~.cudaEnableLegacyStream` otherwise.

31294

31295 - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for

31296 all driver symbols that match the requested driver symbol name except

31297 the corresponding per-thread versions.

31298

31299 - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the

31300 search for all driver symbols that match the requested driver symbol

31301 name including the per-thread versions. If a per-thread version is

31302 not found, the API will return the legacy version of the driver

31303 function.

31304

31305 Parameters

31306 ----------

31307 symbol : bytes

31308 The base name of the driver API function to look for. As an

31309 example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`

31310 would be cuMemAlloc. Note that the API will use the CUDA runtime

31311 version to return the address to the most recent ABI compatible

31312 driver symbol, :py:obj:`~.cuMemAlloc` or :py:obj:`~.cuMemAlloc_v2`.

31313 flags : unsigned long long

31314 Flags to specify search options.

31315

31316 Returns

31317 -------

31318 cudaError_t

31319 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`

31320 funcPtr : Any

31321 Location to return the function pointer to the requested driver

31322 function

31323 driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult`

31324 Optional location to store the status of finding the symbol from

31325 the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for

31326 possible values.

31327

31328 See Also

31329 --------

31330 :py:obj:`~.cuGetProcAddress`

31331

31332 Notes

31333 -----

31334 This API is deprecated and :py:obj:`~.cudaGetDriverEntryPointByVersion` (with a hardcoded :py:obj:`~.cudaVersion`) should be used instead.

31335 """

31336 cdef void_ptr funcPtr = 0

31337 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus

31338 with nogil:

31339 err = cyruntime.cudaGetDriverEntryPoint(symbol, <void**>&funcPtr, flags, &driverStatus)

31340 if err != cyruntime.cudaSuccess:

31341 return (_dict_cudaError_t[err], None, None)

31342 return (_dict_cudaError_t[err], funcPtr, cudaDriverEntryPointQueryResult(driverStatus))

31343

31344@cython.embedsignature(True)

31345def cudaGetDriverEntryPointByVersion(char* symbol, unsigned int cudaVersion, unsigned long long flags):

31346 """ Returns the requested driver API function pointer by CUDA version.

31347

31348 Returns in `**funcPtr` the address of the CUDA driver function for the

31349 requested flags and CUDA driver version.

31350

31351 The CUDA version is specified as (1000 * major + 10 * minor), so CUDA

31352 11.2 should be specified as 11020. For a requested driver symbol, if

31353 the specified CUDA version is greater than or equal to the CUDA version

31354 in which the driver symbol was introduced, this API will return the

31355 function pointer to the corresponding versioned function. If the

31356 specified CUDA version is greater than the driver version, the API will

31357 return :py:obj:`~.cudaErrorInvalidValue`.

31358

31359 The pointer returned by the API should be cast to a function pointer

31360 matching the requested driver function's definition in the API header

31361 file. The function pointer typedef can be picked up from the

31362 corresponding typedefs header file. For example, cudaTypedefs.h

31363 consists of function pointer typedefs for driver APIs defined in

31364 cuda.h.

31365

31366 For the case where the CUDA version requested is greater than the CUDA

31367 Toolkit installed, there may not be an appropriate function pointer

31368 typedef in the corresponding header file and may need a custom typedef

31369 to match the driver function signature returned. This can be done by

31370 getting the typedefs from a later toolkit or creating appropriately

31371 matching custom function typedefs.

31372

31373 The API will return :py:obj:`~.cudaSuccess` and set the returned

31374 `funcPtr` if the requested driver function is valid and supported on

31375 the platform.

31376

31377 The API will return :py:obj:`~.cudaSuccess` and set the returned

31378 `funcPtr` to NULL if the requested driver function is not supported on

31379 the platform, no ABI compatible driver function exists for the

31380 requested version or if the driver symbol is invalid.

31381

31382 It will also set the optional `driverStatus` to one of the values in

31383 :py:obj:`~.cudaDriverEntryPointQueryResult` with the following

31384 meanings:

31385

31386 - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was

31387 succesfully found based on input arguments and `pfn` is valid

31388

31389 - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol

31390 was not found

31391

31392 - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested

31393 symbol was found but is not supported by the specified version

31394 `cudaVersion`

31395

31396 The requested flags can be:

31397

31398 - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is

31399 equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the

31400 code is compiled with --default-stream per-thread compilation flag or

31401 the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;

31402 :py:obj:`~.cudaEnableLegacyStream` otherwise.

31403

31404 - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for

31405 all driver symbols that match the requested driver symbol name except

31406 the corresponding per-thread versions.

31407

31408 - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the

31409 search for all driver symbols that match the requested driver symbol

31410 name including the per-thread versions. If a per-thread version is

31411 not found, the API will return the legacy version of the driver

31412 function.

31413

31414 Parameters

31415 ----------

31416 symbol : bytes

31417 The base name of the driver API function to look for. As an

31418 example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`

31419 would be cuMemAlloc.

31420 cudaVersion : unsigned int

31421 The CUDA version to look for the requested driver symbol

31422 flags : unsigned long long

31423 Flags to specify search options.

31424

31425 Returns

31426 -------

31427 cudaError_t

31428 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`

31429 funcPtr : Any

31430 Location to return the function pointer to the requested driver

31431 function

31432 driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult`

31433 Optional location to store the status of finding the symbol from

31434 the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for

31435 possible values.

31436

31437 See Also

31438 --------

31439 :py:obj:`~.cuGetProcAddress`

31440 """

31441 cdef void_ptr funcPtr = 0

31442 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus

31443 with nogil:

31444 err = cyruntime.cudaGetDriverEntryPointByVersion(symbol, <void**>&funcPtr, cudaVersion, flags, &driverStatus)

31445 if err != cyruntime.cudaSuccess:

31446 return (_dict_cudaError_t[err], None, None)

31447 return (_dict_cudaError_t[err], funcPtr, cudaDriverEntryPointQueryResult(driverStatus))

31448

31449@cython.embedsignature(True)

31450def cudaLibraryLoadData(code, jitOptions : Optional[tuple[cudaJitOption] | list[cudaJitOption]], jitOptionsValues : Optional[tuple[Any] | list[Any]], unsigned int numJitOptions, libraryOptions : Optional[tuple[cudaLibraryOption] | list[cudaLibraryOption]], libraryOptionValues : Optional[tuple[Any] | list[Any]], unsigned int numLibraryOptions):

31451 """ Load a library with specified code and options.

31452

31453 Takes a pointer `code` and loads the corresponding library `library`

31454 based on the application defined library loading mode:

31455

31456 - If module loading is set to EAGER, via the environment variables

31457 described in "Module loading", `library` is loaded eagerly into all

31458 contexts at the time of the call and future contexts at the time of

31459 creation until the library is unloaded with

31460 :py:obj:`~.cudaLibraryUnload()`.

31461

31462 - If the environment variables are set to LAZY, `library` is not

31463 immediately loaded onto all existent contexts and will only be loaded

31464 when a function is needed for that context, such as a kernel launch.

31465

31466 These environment variables are described in the CUDA programming guide

31467 under the "CUDA environment variables" section.

31468

31469 The `code` may be a `cubin` or `fatbin` as output by nvcc, or a NULL-

31470 terminated `PTX`, either as output by nvcc or hand-written, or `Tile`

31471 IR data. A fatbin should also contain relocatable code when doing

31472 separate compilation. Please also see the documentation for nvrtc

31473 (https://docs.nvidia.com/cuda/nvrtc/index.html), nvjitlink

31474 (https://docs.nvidia.com/cuda/nvjitlink/index.html), and nvfatbin

31475 (https://docs.nvidia.com/cuda/nvfatbin/index.html) for more information

31476 on generating loadable code at runtime.

31477

31478 Options are passed as an array via `jitOptions` and any corresponding

31479 parameters are passed in `jitOptionsValues`. The number of total JIT

31480 options is supplied via `numJitOptions`. Any outputs will be returned

31481 via `jitOptionsValues`.

31482

31483 Library load options are passed as an array via `libraryOptions` and

31484 any corresponding parameters are passed in `libraryOptionValues`. The

31485 number of total library load options is supplied via

31486 `numLibraryOptions`.

31487

31488 Parameters

31489 ----------

31490 code : Any

31491 Code to load

31492 jitOptions : list[:py:obj:`~.cudaJitOption`]

31493 Options for JIT

31494 jitOptionsValues : list[Any]

31495 Option values for JIT

31496 numJitOptions : unsigned int

31497 Number of options

31498 libraryOptions : list[:py:obj:`~.cudaLibraryOption`]

31499 Options for loading

31500 libraryOptionValues : list[Any]

31501 Option values for loading

31502 numLibraryOptions : unsigned int

31503 Number of options for loading

31504

31505 Returns

31506 -------

31507 cudaError_t

31508 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorSharedObjectSymbolNotFound`, :py:obj:`~.cudaErrorSharedObjectInitFailed`, :py:obj:`~.cudaErrorJitCompilerNotFound`

31509 library : :py:obj:`~.cudaLibrary_t`

31510 Returned library

31511

31512 See Also

31513 --------

31514 :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryLoadData`

31515 """

31516 libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues

31517 libraryOptions = [] if libraryOptions is None else libraryOptions

31518 if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions):

31519 raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]")

31520 jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues

31521 jitOptions = [] if jitOptions is None else jitOptions

31522 if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions):

31523 raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]")

31524 cdef cudaLibrary_t library = cudaLibrary_t()

31525 cycode = _HelperInputVoidPtr(code)

31526 cdef void* cycode_ptr = <void*><void_ptr>cycode.cptr

31527 cdef vector[cyruntime.cudaJitOption] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)]

31528 pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)]

31529 cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist)

31530 cdef void** cyjitOptionsValues_ptr = <void**><void_ptr>voidStarHelperjitOptionsValues.cptr

31531 if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions))

31532 if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions))

31533 cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)]

31534 pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)]

31535 cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist)

31536 cdef void** cylibraryOptionValues_ptr = <void**><void_ptr>voidStarHelperlibraryOptionValues.cptr

31537 if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions))

31538 if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions))

31539 with nogil:

31540 err = cyruntime.cudaLibraryLoadData(<cyruntime.cudaLibrary_t*>library._pvt_ptr, cycode_ptr, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions)

31541 if err != cyruntime.cudaSuccess:

31542 return (_dict_cudaError_t[err], None)

31543 return (_dict_cudaError_t[err], library)

31544

31545@cython.embedsignature(True)

31546def cudaLibraryLoadFromFile(char* fileName, jitOptions : Optional[tuple[cudaJitOption] | list[cudaJitOption]], jitOptionsValues : Optional[tuple[Any] | list[Any]], unsigned int numJitOptions, libraryOptions : Optional[tuple[cudaLibraryOption] | list[cudaLibraryOption]], libraryOptionValues : Optional[tuple[Any] | list[Any]], unsigned int numLibraryOptions):

31547 """ Load a library with specified file and options.

31548

31549 Takes a pointer `code` and loads the corresponding library `library`

31550 based on the application defined library loading mode:

31551

31552 - If module loading is set to EAGER, via the environment variables

31553 described in "Module loading", `library` is loaded eagerly into all

31554 contexts at the time of the call and future contexts at the time of

31555 creation until the library is unloaded with

31556 :py:obj:`~.cudaLibraryUnload()`.

31557

31558 - If the environment variables are set to LAZY, `library` is not

31559 immediately loaded onto all existent contexts and will only be loaded

31560 when a function is needed for that context, such as a kernel launch.

31561

31562 These environment variables are described in the CUDA programming guide

31563 under the "CUDA environment variables" section.

31564

31565 The file should be a `cubin` file as output by nvcc, or a `PTX` file

31566 either as output by nvcc or handwritten, or a `fatbin` file as output

31567 by nvcc or hand-written, or `Tile` IR file. A fatbin should also

31568 contain relocatable code when doing separate compilation. Please also

31569 see the documentation for nvrtc

31570 (https://docs.nvidia.com/cuda/nvrtc/index.html), nvjitlink

31571 (https://docs.nvidia.com/cuda/nvjitlink/index.html), and nvfatbin

31572 (https://docs.nvidia.com/cuda/nvfatbin/index.html) for more information

31573 on generating loadable code at runtime.

31574

31575 Options are passed as an array via `jitOptions` and any corresponding

31576 parameters are passed in `jitOptionsValues`. The number of total

31577 options is supplied via `numJitOptions`. Any outputs will be returned

31578 via `jitOptionsValues`.

31579

31580 Library load options are passed as an array via `libraryOptions` and

31581 any corresponding parameters are passed in `libraryOptionValues`. The

31582 number of total library load options is supplied via

31583 `numLibraryOptions`.

31584

31585 Parameters

31586 ----------

31587 fileName : bytes

31588 File to load from

31589 jitOptions : list[:py:obj:`~.cudaJitOption`]

31590 Options for JIT

31591 jitOptionsValues : list[Any]

31592 Option values for JIT

31593 numJitOptions : unsigned int

31594 Number of options

31595 libraryOptions : list[:py:obj:`~.cudaLibraryOption`]

31596 Options for loading

31597 libraryOptionValues : list[Any]

31598 Option values for loading

31599 numLibraryOptions : unsigned int

31600 Number of options for loading

31601

31602 Returns

31603 -------

31604 cudaError_t

31605 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorSharedObjectSymbolNotFound`, :py:obj:`~.cudaErrorSharedObjectInitFailed`, :py:obj:`~.cudaErrorJitCompilerNotFound`

31606 library : :py:obj:`~.cudaLibrary_t`

31607 Returned library

31608

31609 See Also

31610 --------

31611 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryLoadFromFile`

31612 """

31613 libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues

31614 libraryOptions = [] if libraryOptions is None else libraryOptions

31615 if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions):

31616 raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]")

31617 jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues

31618 jitOptions = [] if jitOptions is None else jitOptions

31619 if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions):

31620 raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]")

31621 cdef cudaLibrary_t library = cudaLibrary_t()

31622 cdef vector[cyruntime.cudaJitOption] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)]

31623 pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)]

31624 cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist)

31625 cdef void** cyjitOptionsValues_ptr = <void**><void_ptr>voidStarHelperjitOptionsValues.cptr

31626 if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions))

31627 if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions))

31628 cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)]

31629 pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)]

31630 cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist)

31631 cdef void** cylibraryOptionValues_ptr = <void**><void_ptr>voidStarHelperlibraryOptionValues.cptr

31632 if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions))

31633 if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions))

31634 with nogil:

31635 err = cyruntime.cudaLibraryLoadFromFile(<cyruntime.cudaLibrary_t*>library._pvt_ptr, fileName, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions)

31636 if err != cyruntime.cudaSuccess:

31637 return (_dict_cudaError_t[err], None)

31638 return (_dict_cudaError_t[err], library)

31639

31640@cython.embedsignature(True)

31641def cudaLibraryUnload(library):

31642 """ Unloads a library.

31643

31644 Unloads the library specified with `library`

31645

31646 Parameters

31647 ----------

31648 library : :py:obj:`~.cudaLibrary_t`

31649 Library to unload

31650

31651 Returns

31652 -------

31653 cudaError_t

31654 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`

31655

31656 See Also

31657 --------

31658 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`

31659 """

31660 cdef cyruntime.cudaLibrary_t cylibrary

31661 if library is None:

31662 plibrary = 0

31663 elif isinstance(library, (cudaLibrary_t,)):

31664 plibrary = int(library)

31665 else:

31666 plibrary = int(cudaLibrary_t(library))

31667 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary

31668 with nogil:

31669 err = cyruntime.cudaLibraryUnload(cylibrary)

31670 return (_dict_cudaError_t[err],)

31671

31672@cython.embedsignature(True)

31673def cudaLibraryGetKernel(library, char* name):

31674 """ Returns a kernel handle.

31675

31676 Returns in `pKernel` the handle of the kernel with name `name` located

31677 in library `library`. If kernel handle is not found, the call returns

31678 :py:obj:`~.cudaErrorSymbolNotFound`.

31679

31680 Parameters

31681 ----------

31682 library : :py:obj:`~.cudaLibrary_t`

31683 Library to retrieve kernel from

31684 name : bytes

31685 Name of kernel to retrieve

31686

31687 Returns

31688 -------

31689 cudaError_t

31690 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound`

31691 pKernel : :py:obj:`~.cudaKernel_t`

31692 Returned kernel handle

31693

31694 See Also

31695 --------

31696 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryGetKernel`

31697 """

31698 cdef cyruntime.cudaLibrary_t cylibrary

31699 if library is None:

31700 plibrary = 0

31701 elif isinstance(library, (cudaLibrary_t,)):

31702 plibrary = int(library)

31703 else:

31704 plibrary = int(cudaLibrary_t(library))

31705 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary

31706 cdef cudaKernel_t pKernel = cudaKernel_t()

31707 with nogil:

31708 err = cyruntime.cudaLibraryGetKernel(<cyruntime.cudaKernel_t*>pKernel._pvt_ptr, cylibrary, name)

31709 if err != cyruntime.cudaSuccess:

31710 return (_dict_cudaError_t[err], None)

31711 return (_dict_cudaError_t[err], pKernel)

31712

31713@cython.embedsignature(True)

31714def cudaLibraryGetGlobal(library, char* name):

31715 """ Returns a global device pointer.

31716

31717 Returns in `*dptr` and `*bytes` the base pointer and size of the global

31718 with name `name` for the requested library `library` and the current

31719 device. If no global for the requested name `name` exists, the call

31720 returns :py:obj:`~.cudaErrorSymbolNotFound`. One of the parameters

31721 `dptr` or `numbytes` (not both) can be NULL in which case it is

31722 ignored. The returned `dptr` cannot be passed to the Symbol APIs such

31723 as :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`,

31724 :py:obj:`~.cudaGetSymbolAddress`, or :py:obj:`~.cudaGetSymbolSize`.

31725

31726 Parameters

31727 ----------

31728 library : :py:obj:`~.cudaLibrary_t`

31729 Library to retrieve global from

31730 name : bytes

31731 Name of global to retrieve

31732

31733 Returns

31734 -------

31735 cudaError_t

31736 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound` :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorContextIsDestroyed`

31737 dptr : Any

31738 Returned global device pointer for the requested library

31739 numbytes : int

31740 Returned global size in bytes

31741

31742 See Also

31743 --------

31744 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetManaged`, :py:obj:`~.cuLibraryGetGlobal`

31745 """

31746 cdef cyruntime.cudaLibrary_t cylibrary

31747 if library is None:

31748 plibrary = 0

31749 elif isinstance(library, (cudaLibrary_t,)):

31750 plibrary = int(library)

31751 else:

31752 plibrary = int(cudaLibrary_t(library))

31753 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary

31754 cdef void_ptr dptr = 0

31755 cdef size_t numbytes = 0

31756 with nogil:

31757 err = cyruntime.cudaLibraryGetGlobal(<void**>&dptr, &numbytes, cylibrary, name)

31758 if err != cyruntime.cudaSuccess:

31759 return (_dict_cudaError_t[err], None, None)

31760 return (_dict_cudaError_t[err], dptr, numbytes)

31761

31762@cython.embedsignature(True)

31763def cudaLibraryGetManaged(library, char* name):

31764 """ Returns a pointer to managed memory.

31765

31766 Returns in `*dptr` and `*bytes` the base pointer and size of the

31767 managed memory with name `name` for the requested library `library`. If

31768 no managed memory with the requested name `name` exists, the call

31769 returns :py:obj:`~.cudaErrorSymbolNotFound`. One of the parameters

31770 `dptr` or `numbytes` (not both) can be NULL in which case it is

31771 ignored. Note that managed memory for library `library` is shared

31772 across devices and is registered when the library is loaded. The

31773 returned `dptr` cannot be passed to the Symbol APIs such as

31774 :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`,

31775 :py:obj:`~.cudaGetSymbolAddress`, or :py:obj:`~.cudaGetSymbolSize`.

31776

31777 Parameters

31778 ----------

31779 library : :py:obj:`~.cudaLibrary_t`

31780 Library to retrieve managed memory from

31781 name : bytes

31782 Name of managed memory to retrieve

31783

31784 Returns

31785 -------

31786 cudaError_t

31787 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound`

31788 dptr : Any

31789 Returned pointer to the managed memory

31790 numbytes : int

31791 Returned memory size in bytes

31792

31793 See Also

31794 --------

31795 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetGlobal`, :py:obj:`~.cuLibraryGetManaged`

31796 """

31797 cdef cyruntime.cudaLibrary_t cylibrary

31798 if library is None:

31799 plibrary = 0

31800 elif isinstance(library, (cudaLibrary_t,)):

31801 plibrary = int(library)

31802 else:

31803 plibrary = int(cudaLibrary_t(library))

31804 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary

31805 cdef void_ptr dptr = 0

31806 cdef size_t numbytes = 0

31807 with nogil:

31808 err = cyruntime.cudaLibraryGetManaged(<void**>&dptr, &numbytes, cylibrary, name)

31809 if err != cyruntime.cudaSuccess:

31810 return (_dict_cudaError_t[err], None, None)

31811 return (_dict_cudaError_t[err], dptr, numbytes)

31812

31813@cython.embedsignature(True)

31814def cudaLibraryGetUnifiedFunction(library, char* symbol):

31815 """ Returns a pointer to a unified function.

31816

31817 Returns in `*fptr` the function pointer to a unified function denoted

31818 by `symbol`. If no unified function with name `symbol` exists, the call

31819 returns :py:obj:`~.cudaErrorSymbolNotFound`. If there is no device with

31820 attribute :py:obj:`~.cudaDeviceProp.unifiedFunctionPointers` present in

31821 the system, the call may return :py:obj:`~.cudaErrorSymbolNotFound`.

31822

31823 Parameters

31824 ----------

31825 library : :py:obj:`~.cudaLibrary_t`

31826 Library to retrieve function pointer memory from

31827 symbol : bytes

31828 Name of function pointer to retrieve

31829

31830 Returns

31831 -------

31832 cudaError_t

31833 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound`

31834 fptr : Any

31835 Returned pointer to a unified function

31836

31837 See Also

31838 --------

31839 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryGetUnifiedFunction`

31840 """

31841 cdef cyruntime.cudaLibrary_t cylibrary

31842 if library is None:

31843 plibrary = 0

31844 elif isinstance(library, (cudaLibrary_t,)):

31845 plibrary = int(library)

31846 else:

31847 plibrary = int(cudaLibrary_t(library))

31848 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary

31849 cdef void_ptr fptr = 0

31850 with nogil:

31851 err = cyruntime.cudaLibraryGetUnifiedFunction(<void**>&fptr, cylibrary, symbol)

31852 if err != cyruntime.cudaSuccess:

31853 return (_dict_cudaError_t[err], None)

31854 return (_dict_cudaError_t[err], fptr)

31855

31856@cython.embedsignature(True)

31857def cudaLibraryGetKernelCount(lib):

31858 """ Returns the number of kernels within a library.

31859

31860 Returns in `count` the number of kernels in `lib`.

31861

31862 Parameters

31863 ----------

31864 lib : :py:obj:`~.cudaLibrary_t`

31865 Library to query

31866

31867 Returns

31868 -------

31869 cudaError_t

31870 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

31871 count : unsigned int

31872 Number of kernels found within the library

31873

31874 See Also

31875 --------

31876 :py:obj:`~.cudaLibraryEnumerateKernels`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cuLibraryGetKernelCount`

31877 """

31878 cdef cyruntime.cudaLibrary_t cylib

31879 if lib is None:

31880 plib = 0

31881 elif isinstance(lib, (cudaLibrary_t,)):

31882 plib = int(lib)

31883 else:

31884 plib = int(cudaLibrary_t(lib))

31885 cylib = <cyruntime.cudaLibrary_t><void_ptr>plib

31886 cdef unsigned int count = 0

31887 with nogil:

31888 err = cyruntime.cudaLibraryGetKernelCount(&count, cylib)

31889 if err != cyruntime.cudaSuccess:

31890 return (_dict_cudaError_t[err], None)

31891 return (_dict_cudaError_t[err], count)

31892

31893@cython.embedsignature(True)

31894def cudaLibraryEnumerateKernels(unsigned int numKernels, lib):

31895 """ Retrieve the kernel handles within a library.

31896

31897 Returns in `kernels` a maximum number of `numKernels` kernel handles

31898 within `lib`. The returned kernel handle becomes invalid when the

31899 library is unloaded.

31900

31901 Parameters

31902 ----------

31903 numKernels : unsigned int

31904 Maximum number of kernel handles may be returned to the buffer

31905 lib : :py:obj:`~.cudaLibrary_t`

31906 Library to query from

31907

31908 Returns

31909 -------

31910 cudaError_t

31911 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`

31912 kernels : list[:py:obj:`~.cudaKernel_t`]

31913 Buffer where the kernel handles are returned to

31914

31915 See Also

31916 --------

31917 :py:obj:`~.cudaLibraryGetKernelCount`, :py:obj:`~.cuLibraryEnumerateKernels`

31918 """

31919 cdef cyruntime.cudaLibrary_t cylib

31920 if lib is None:

31921 plib = 0

31922 elif isinstance(lib, (cudaLibrary_t,)):

31923 plib = int(lib)

31924 else:

31925 plib = int(cudaLibrary_t(lib))

31926 cylib = <cyruntime.cudaLibrary_t><void_ptr>plib

31927 cdef cyruntime.cudaKernel_t* cykernels = NULL

31928 pykernels = []

31929 if numKernels != 0:

31930 cykernels = <cyruntime.cudaKernel_t*>calloc(numKernels, sizeof(cyruntime.cudaKernel_t))

31931 if cykernels is NULL:

31932 raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cyruntime.cudaKernel_t)))

31933 with nogil:

31934 err = cyruntime.cudaLibraryEnumerateKernels(cykernels, numKernels, cylib)

31935 if cudaError_t(err) == cudaError_t(0):

31936 pykernels = [cudaKernel_t(init_value=<void_ptr>cykernels[idx]) for idx in range(numKernels)]

31937 if cykernels is not NULL:

31938 free(cykernels)

31939 if err != cyruntime.cudaSuccess:

31940 return (_dict_cudaError_t[err], None)

31941 return (_dict_cudaError_t[err], pykernels)

31942

31943@cython.embedsignature(True)

31944def cudaKernelSetAttributeForDevice(kernel, attr not None : cudaFuncAttribute, int value, int device):

31945 """ Sets information about a kernel.

31946

31947 This call sets the value of a specified attribute `attr` on the kernel

31948 `kernel` for the requested device `device` to an integer value

31949 specified by `value`. This function returns :py:obj:`~.cudaSuccess` if

31950 the new value of the attribute could be successfully set. If the set

31951 fails, this call will return an error. Not all attributes can have

31952 values set. Attempting to set a value on a read-only attribute will

31953 result in an error (:py:obj:`~.cudaErrorInvalidValue`)

31954

31955 Note that attributes set using :py:obj:`~.cudaFuncSetAttribute()` will

31956 override the attribute set by this API irrespective of whether the call

31957 to :py:obj:`~.cudaFuncSetAttribute()` is made before or after this API

31958 call. Because of this and the stricter locking requirements mentioned

31959 below it is suggested that this call be used during the initialization

31960 path and not on each thread accessing `kernel` such as on kernel

31961 launches or on the critical path.

31962

31963 Valid values for `attr` are:

31964

31965 - :py:obj:`~.cudaFuncAttributeMaxDynamicSharedMemorySize` - The

31966 requested maximum size in bytes of dynamically-allocated shared

31967 memory. The sum of this value and the function attribute

31968 :py:obj:`~.sharedSizeBytes` cannot exceed the device attribute

31969 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlockOptin`. The maximal size

31970 of requestable dynamic shared memory may differ by GPU architecture.

31971

31972 - :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` - On

31973 devices where the L1 cache and shared memory use the same hardware

31974 resources, this sets the shared memory carveout preference, in

31975 percent of the total shared memory. See

31976 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerMultiprocessor`. This is only

31977 a hint, and the driver can choose a different ratio if required to

31978 execute the function.

31979

31980 - :py:obj:`~.cudaFuncAttributeRequiredClusterWidth`: The required

31981 cluster width in blocks. The width, height, and depth values must

31982 either all be 0 or all be positive. The validity of the cluster

31983 dimensions is checked at launch time. If the value is set during

31984 compile time, it cannot be set at runtime. Setting it at runtime will

31985 return cudaErrorNotPermitted.

31986

31987 - :py:obj:`~.cudaFuncAttributeRequiredClusterHeight`: The required

31988 cluster height in blocks. The width, height, and depth values must

31989 either all be 0 or all be positive. The validity of the cluster

31990 dimensions is checked at launch time. If the value is set during

31991 compile time, it cannot be set at runtime. Setting it at runtime will

31992 return cudaErrorNotPermitted.

31993

31994 - :py:obj:`~.cudaFuncAttributeRequiredClusterDepth`: The required

31995 cluster depth in blocks. The width, height, and depth values must

31996 either all be 0 or all be positive. The validity of the cluster

31997 dimensions is checked at launch time. If the value is set during

31998 compile time, it cannot be set at runtime. Setting it at runtime will

31999 return cudaErrorNotPermitted.

32000

32001 - :py:obj:`~.cudaFuncAttributeNonPortableClusterSizeAllowed`: Indicates

32002 whether the function can be launched with non-portable cluster size.

32003 1 is allowed, 0 is disallowed.

32004

32005 - :py:obj:`~.cudaFuncAttributeClusterSchedulingPolicyPreference`: The

32006 block scheduling policy of a function. The value type is

32007 cudaClusterSchedulingPolicy.

32008

32009 Parameters

32010 ----------

32011 kernel : :py:obj:`~.cudaKernel_t`

32012 Kernel to set attribute of

32013 attr : :py:obj:`~.cudaFuncAttribute`

32014 Attribute requested

32015 value : int

32016 Value to set

32017 device : int

32018 Device to set attribute of

32019

32020 Returns

32021 -------

32022 cudaError_t

32023 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`

32024

32025 See Also

32026 --------

32027 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetKernel`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`

32028

32029 Notes

32030 -----

32031 The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cudaFuncSetAttribute()` due to device-wide semantics. If multiple threads are trying to set the same attribute on the same device simultaneously, the attribute setting will depend on the interleavings chosen by the OS scheduler and memory consistency.

32032 """

32033 cdef cyruntime.cudaKernel_t cykernel

32034 if kernel is None:

32035 pkernel = 0

32036 elif isinstance(kernel, (cudaKernel_t,)):

32037 pkernel = int(kernel)

32038 else:

32039 pkernel = int(cudaKernel_t(kernel))

32040 cykernel = <cyruntime.cudaKernel_t><void_ptr>pkernel

32041 cdef cyruntime.cudaFuncAttribute cyattr = attr.value

32042 with nogil:

32043 err = cyruntime.cudaKernelSetAttributeForDevice(cykernel, cyattr, value, device)

32044 return (_dict_cudaError_t[err],)

32045

32046@cython.embedsignature(True)

32047def cudaDeviceGetDevResource(int device, typename not None : cudaDevResourceType):

32048 """ Get device resources.

32049

32050 Get the `typename` resources available to the `device`. This may often

32051 be the starting point for further partitioning or configuring of

32052 resources.

32053

32054 Note: The API is not supported on 32-bit platforms.

32055

32056 Parameters

32057 ----------

32058 device : int

32059 Device to get resource for

32060 typename : :py:obj:`~.cudaDevResourceType`

32061 Type of resource to retrieve

32062

32063 Returns

32064 -------

32065 cudaError_t

32066 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32067 resource : :py:obj:`~.cudaDevResource`

32068 Output pointer to a cudaDevResource structure

32069

32070 See Also

32071 --------

32072 :py:obj:`~.cuDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`

32073 """

32074 cdef cudaDevResource resource = cudaDevResource()

32075 cdef cyruntime.cudaDevResourceType cytypename = typename.value

32076 with nogil:

32077 err = cyruntime.cudaDeviceGetDevResource(device, <cyruntime.cudaDevResource*>resource._pvt_ptr, cytypename)

32078 if err != cyruntime.cudaSuccess:

32079 return (_dict_cudaError_t[err], None)

32080 return (_dict_cudaError_t[err], resource)

32081

32082@cython.embedsignature(True)

32083def cudaDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[cudaDevResource], unsigned int flags, unsigned int minCount):

32084 """ Splits `cudaDevResourceTypeSm` resources.

32085

32086 Splits `cudaDevResourceTypeSm` resources into `nbGroups`, adhering to

32087 the minimum SM count specified in `minCount` and the usage flags in

32088 `flags`. If `result` is NULL, the API simulates a split and provides

32089 the amount of groups that would be created in `nbGroups`. Otherwise,

32090 `nbGroups` must point to the amount of elements in `result` and on

32091 return, the API will overwrite `nbGroups` with the amount actually

32092 created. The groups are written to the array in `result`. `nbGroups`

32093 can be less than the total amount if a smaller number of groups is

32094 needed.

32095

32096 This API is used to spatially partition the input resource. The input

32097 resource needs to come from one of

32098 :py:obj:`~.cudaDeviceGetDevResource`, or

32099 :py:obj:`~.cudaExecutionCtxGetDevResource`. A limitation of the API is

32100 that the output results cannot be split again without first creating a

32101 descriptor and a green context with that descriptor.

32102

32103 When creating the groups, the API will take into account the

32104 performance and functional characteristics of the input resource, and

32105 guarantee a split that will create a disjoint set of symmetrical

32106 partitions. This may lead to fewer groups created than purely dividing

32107 the total SM count by the `minCount` due to cluster requirements or

32108 alignment and granularity requirements for the minCount. These

32109 requirements can be queried with :py:obj:`~.cudaDeviceGetDevResource`,

32110 or :py:obj:`~.cudaExecutionCtxGetDevResource` for

32111 :py:obj:`~.cudaDevResourceTypeSm`, using the `minSmPartitionSize` and

32112 `smCoscheduledAlignment` fields to determine minimum partition size and

32113 alignment granularity, respectively.

32114

32115 The `remainder` set does not have the same functional or performance

32116 guarantees as the groups in `result`. Its use should be carefully

32117 planned and future partitions of the `remainder` set are discouraged.

32118

32119 The following flags are supported:

32120

32121 - `cudaDevSmResourceSplitIgnoreSmCoscheduling` : Lower the minimum SM

32122 count and alignment, and treat each SM independent of its hierarchy.

32123 This allows more fine grained partitions but at the cost of advanced

32124 features (such as large clusters on compute capability 9.0+).

32125

32126 - `cudaDevSmResourceSplitMaxPotentialClusterSize` : Compute Capability

32127 9.0+ only. Attempt to create groups that may allow for maximally

32128 sized thread clusters. This can be queried post green context

32129 creation using :py:obj:`~.cudaOccupancyMaxPotentialClusterSize`.

32130

32131 A successful API call must either have:

32132

32133 - A valid array of `result` pointers of size passed in `nbGroups`, with

32134 `input` of type `cudaDevResourceTypeSm`. Value of `minCount` must be

32135 between 0 and the SM count specified in `input`. `remaining` may be

32136 NULL.

32137

32138 - NULL passed in for `result`, with a valid integer pointer in

32139 `nbGroups` and `input` of type `cudaDevResourceTypeSm`. Value of

32140 `minCount` must be between 0 and the SM count specified in `input`.

32141 `remaining` may be NULL. This queries the number of groups that would

32142 be created by the API.

32143

32144 Note: The API is not supported on 32-bit platforms.

32145

32146 Parameters

32147 ----------

32148 nbGroups : unsigned int

32149 This is a pointer, specifying the number of groups that would be or

32150 should be created as described below.

32151 input : :py:obj:`~.cudaDevResource`

32152 Input SM resource to be split. Must be a valid `None` resource.

32153 flags : unsigned int

32154 Flags specifying how these partitions are used or which constraints

32155 to abide by when splitting the input. Zero is valid for default

32156 behavior.

32157 minCount : unsigned int

32158 Minimum number of SMs required

32159

32160 Returns

32161 -------

32162 cudaError_t

32163 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidResourceConfiguration`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32164 result : list[:py:obj:`~.cudaDevResource`]

32165 Output array of `cudaDevResource` resources. Can be NULL to query

32166 the number of groups.

32167 nbGroups : unsigned int

32168 This is a pointer, specifying the number of groups that would be or

32169 should be created as described below.

32170 remaining : :py:obj:`~.cudaDevResource`

32171 If the input resource cannot be cleanly split among `nbGroups`, the

32172 remaining is placed in here. Can be ommitted (NULL) if the user

32173 does not need the remaining set.

32174

32175 See Also

32176 --------

32177 :py:obj:`~.cuDevSmResourceSplitByCount`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevResourceGenerateDesc`

32178 """

32179 cdef cyruntime.cudaDevResource* cyresult = NULL

32180 pyresult = [cudaDevResource() for idx in range(nbGroups)]

32181 if nbGroups != 0:

32182 cyresult = <cyruntime.cudaDevResource*>calloc(nbGroups, sizeof(cyruntime.cudaDevResource))

32183 if cyresult is NULL:

32184 raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource)))

32185 cdef unsigned int cynbGroups = nbGroups

32186 cdef cyruntime.cudaDevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL

32187 cdef cudaDevResource remaining = cudaDevResource()

32188 with nogil:

32189 err = cyruntime.cudaDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, <cyruntime.cudaDevResource*>remaining._pvt_ptr, flags, minCount)

32190 if cudaError_t(err) == cudaError_t(0):

32191 for idx in range(nbGroups):

32192 string.memcpy((<cudaDevResource>pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource))

32193 if cyresult is not NULL:

32194 free(cyresult)

32195 if err != cyruntime.cudaSuccess:

32196 return (_dict_cudaError_t[err], None, None, None)

32197 return (_dict_cudaError_t[err], pyresult, cynbGroups, remaining)

32198

32199@cython.embedsignature(True)

32200def cudaDevSmResourceSplit(unsigned int nbGroups, input_ : Optional[cudaDevResource], unsigned int flags, groupParams : Optional[cudaDevSmResourceGroupParams]):

32201 """ Splits a `cudaDevResourceTypeSm` resource into structured groups.

32202

32203 This API will split a resource of :py:obj:`~.cudaDevResourceTypeSm`

32204 into `nbGroups` structured device resource groups (the `result` array),

32205 as well as an optional `remainder`, according to a set of requirements

32206 specified in the `groupParams` array. The term “structured” is a trait

32207 that specifies the `result` has SMs that are co-scheduled together.

32208 This co-scheduling can be specified via the `coscheduledSmCount` field

32209 of the `groupParams` structure, while the `smCount` will specify how

32210 many SMs are required in total for that result. The remainder is always

32211 “unstructured”, it does not have any set guarantees with respect to co-

32212 scheduling and those properties will need to either be queried via the

32213 occupancy set of APIs or further split into structured groups by this

32214 API.

32215

32216 The API has a discovery mode for use cases where it is difficult to

32217 know ahead of time what the SM count should be. Discovery happens when

32218 the `smCount` field of a given `groupParams` array entry is set to 0 -

32219 the smCount will be filled in by the API with the derived SM count

32220 according to the provided `groupParams` fields and constraints.

32221 Discovery can be used with both a valid result array and with a NULL

32222 `result` pointer value. The latter is useful in situations where the

32223 smCount will end up being zero, which is an invalid value to create a

32224 result entry with, but allowed for discovery purposes when the `result`

32225 is NULL.

32226

32227 The `groupParams` array is evaluated from index 0 to `nbGroups` - 1.

32228 For each index in the `groupParams` array, the API will evaluate which

32229 SMs may be a good fit based on constraints and assign those SMs to

32230 `result`. This evaluation order is important to consider when using

32231 discovery mode, as it helps discover the remaining SMs.

32232

32233 For a valid call:

32234

32235 - `result` should point to a `cudaDevResource` array of size

32236 `nbGroups`, or alternatively, may be NULL, if the developer wishes

32237 for only the groupParams entries to be updated

32238

32239 - `input` should be a valid :py:obj:`~.cudaDevResourceTypeSm` resource

32240 that originates from querying the execution context, or device.

32241

32242 - The `remainder` group may be NULL.

32243

32244 - There are no API `flags` at this time, so the value passed in should

32245 be 0.

32246

32247 - A :py:obj:`~.cudaDevSmResourceGroupParams` array of size `nbGroups`.

32248 Each entry must be zero-initialized.

32249

32250 - `smCount:` must be either 0 or in the range of [2,inputSmCount]

32251 where inputSmCount is the amount of SMs the `input` resource has.

32252 `smCount` must be a multiple of 2, as well as a multiple of

32253 `coscheduledSmCount`. When assigning SMs to a group (and if results

32254 are expected by having the `result` parameter set), `smCount`

32255 cannot end up with 0 or a value less than `coscheduledSmCount`

32256 otherwise :py:obj:`~.cudaErrorInvalidResourceConfiguration` will be

32257 returned.

32258

32259 - `coscheduledSmCount:` allows grouping SMs together in order to be

32260 able to launch clusters on Compute Architecture 9.0+. The default

32261 value may be queried from the device’s

32262 :py:obj:`~.cudaDevResourceTypeSm` resource (8 on Compute

32263 Architecture 9.0+ and 2 otherwise). The maximum is 32 on Compute

32264 Architecture 9.0+ and 2 otherwise.

32265

32266 - `preferredCoscheduledSmCount:` Attempts to merge

32267 `coscheduledSmCount` groups into larger groups, in order to make

32268 use of `preferredClusterDimensions` on Compute Architecture 10.0+.

32269 The default value is set to `coscheduledSmCount`.

32270

32271 - `flags:`

32272

32273 - `cudaDevSmResourceGroupBackfill:` lets `smCount` be a non-multiple of

32274 `coscheduledSmCount`, filling the difference between SM count and

32275 already assigned co-scheduled groupings with other SMs. This lets any

32276 resulting group behave similar to the `remainder` group for example.

32277

32278 Example params and their effect:

32279

32280 A groupParams array element is defined in the following order:

32281

32282 **View CUDA Toolkit Documentation for a C++ code example**

32283

32284 **View CUDA Toolkit Documentation for a C++ code example**

32285

32286 **View CUDA Toolkit Documentation for a C++ code example**

32287

32288 **View CUDA Toolkit Documentation for a C++ code example**

32289

32290 The difference between a catch-all param group as the last entry and

32291 the remainder is in two aspects:

32292

32293 - The remainder may be NULL / _TYPE_INVALID (if there are no SMs

32294 remaining), while a result group must always be valid.

32295

32296 - The remainder does not have a structure, while the result group will

32297 always need to adhere to a structure of coscheduledSmCount (even if

32298 its just 2), and therefore must always have enough coscheduled SMs to

32299 cover that requirement (even with the

32300 `cudaDevSmResourceGroupBackfill` flag enabled).

32301

32302 Splitting an input into N groups, can be accomplished by repeatedly

32303 splitting off 1 group and re-splitting the remainder (a bisect

32304 operation). However, it's recommended to accomplish this with a single

32305 call wherever possible.

32306

32307 Parameters

32308 ----------

32309 nbGroups : unsigned int

32310 Specifies the number of groups in `result` and `groupParams`

32311 input : :py:obj:`~.cudaDevResource`

32312 Input SM resource to be split. Must be a valid

32313 `cudaDevResourceTypeSm` resource.

32314 flags : unsigned int

32315 Flags specifying how the API should behave. The value should be 0

32316 for now.

32317 groupParams : :py:obj:`~.cudaDevSmResourceGroupParams`

32318 Description of how the SMs should be split and assigned to the

32319 corresponding result entry.

32320

32321 Returns

32322 -------

32323 cudaError_t

32324 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidResourceConfiguration`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32325 result : list[:py:obj:`~.cudaDevResource`]

32326 Output array of `cudaDevResource` resources. Can be NULL, alongside

32327 an smCount of 0, for discovery purpose.

32328 remainder : :py:obj:`~.cudaDevResource`

32329 If splitting the input resource leaves any SMs, the remainder is

32330 placed in here.

32331

32332 See Also

32333 --------

32334 :py:obj:`~.cuDevSmResourceSplit`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevResourceGenerateDesc`

32335 """

32336 cdef cyruntime.cudaDevResource* cyresult = NULL

32337 pyresult = [cudaDevResource() for idx in range(nbGroups)]

32338 if nbGroups != 0:

32339 cyresult = <cyruntime.cudaDevResource*>calloc(nbGroups, sizeof(cyruntime.cudaDevResource))

32340 if cyresult is NULL:

32341 raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource)))

32342 cdef cyruntime.cudaDevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL

32343 cdef cudaDevResource remainder = cudaDevResource()

32344 cdef cyruntime.cudaDevSmResourceGroupParams* cygroupParams_ptr = groupParams._pvt_ptr if groupParams is not None else NULL

32345 with nogil:

32346 err = cyruntime.cudaDevSmResourceSplit(cyresult, nbGroups, cyinput__ptr, <cyruntime.cudaDevResource*>remainder._pvt_ptr, flags, cygroupParams_ptr)

32347 if cudaError_t(err) == cudaError_t(0):

32348 for idx in range(nbGroups):

32349 string.memcpy((<cudaDevResource>pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource))

32350 if cyresult is not NULL:

32351 free(cyresult)

32352 if err != cyruntime.cudaSuccess:

32353 return (_dict_cudaError_t[err], None, None)

32354 return (_dict_cudaError_t[err], pyresult, remainder)

32355

32356@cython.embedsignature(True)

32357def cudaDevResourceGenerateDesc(resources : Optional[tuple[cudaDevResource] | list[cudaDevResource]], unsigned int nbResources):

32358 """ Generate a resource descriptor.

32359

32360 Generates a single resource descriptor with the set of resources

32361 specified in `resources`. The generated resource descriptor is

32362 necessary for the creation of green contexts via the

32363 :py:obj:`~.cudaGreenCtxCreate` API. Resources of the same type can be

32364 passed in, provided they meet the requirements as noted below.

32365

32366 A successful API call must have:

32367

32368 - A valid output pointer for the `phDesc` descriptor as well as a valid

32369 array of `resources` pointers, with the array size passed in

32370 `nbResources`. If multiple resources are provided in `resources`, the

32371 device they came from must be the same, otherwise

32372 :py:obj:`~.cudaErrorInvalidResourceConfiguration` is returned. If

32373 multiple resources are provided in `resources` and they are of type

32374 :py:obj:`~.cudaDevResourceTypeSm`, they must be outputs (whether

32375 `result` or `remaining`) from the same split API instance and have

32376 the same smCoscheduledAlignment values, otherwise

32377 :py:obj:`~.cudaErrorInvalidResourceConfiguration` is returned.

32378

32379 Note: The API is not supported on 32-bit platforms.

32380

32381 Parameters

32382 ----------

32383 resources : list[:py:obj:`~.cudaDevResource`]

32384 Array of resources to be included in the descriptor

32385 nbResources : unsigned int

32386 Number of resources passed in `resources`

32387

32388 Returns

32389 -------

32390 cudaError_t

32391 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidResourceConfiguration`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32392 phDesc : :py:obj:`~.cudaDevResourceDesc_t`

32393 Output descriptor

32394

32395 See Also

32396 --------

32397 :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaGreenCtxCreate`

32398 """

32399 resources = [] if resources is None else resources

32400 if not all(isinstance(_x, (cudaDevResource,)) for _x in resources):

32401 raise TypeError("Argument 'resources' is not instance of type (expected tuple[cyruntime.cudaDevResource,] or list[cyruntime.cudaDevResource,]")

32402 cdef cudaDevResourceDesc_t phDesc = cudaDevResourceDesc_t()

32403 cdef cyruntime.cudaDevResource* cyresources = NULL

32404 if len(resources) > 1:

32405 cyresources = <cyruntime.cudaDevResource*> calloc(len(resources), sizeof(cyruntime.cudaDevResource))

32406 if cyresources is NULL:

32407 raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cyruntime.cudaDevResource)))

32408 for idx in range(len(resources)):

32409 string.memcpy(&cyresources[idx], (<cudaDevResource>resources[idx])._pvt_ptr, sizeof(cyruntime.cudaDevResource))

32410 elif len(resources) == 1:

32411 cyresources = (<cudaDevResource>resources[0])._pvt_ptr

32412 with nogil:

32413 err = cyruntime.cudaDevResourceGenerateDesc(<cyruntime.cudaDevResourceDesc_t*>phDesc._pvt_ptr, cyresources, nbResources)

32414 if len(resources) > 1 and cyresources is not NULL:

32415 free(cyresources)

32416 if err != cyruntime.cudaSuccess:

32417 return (_dict_cudaError_t[err], None)

32418 return (_dict_cudaError_t[err], phDesc)

32419

32420@cython.embedsignature(True)

32421def cudaGreenCtxCreate(desc, int device, unsigned int flags):

32422 """ Creates a green context with a specified set of resources.

32423

32424 This API creates a green context with the resources specified in the

32425 descriptor `desc` and returns it in the handle represented by `phCtx`.

32426

32427 This API retains the device’s primary context for the lifetime of the

32428 green context. The primary context will be released when the green

32429 context is destroyed. To avoid the overhead of repeated initialization

32430 and teardown, it is recommended to explicitly initialize the device's

32431 primary context ahead of time using :py:obj:`~.cudaInitDevice`. This

32432 ensures that the primary context remains initialized throughout the

32433 program’s lifetime, minimizing overhead during green context creation

32434 and destruction.

32435

32436 The API does not create a default stream for the green context.

32437 Developers are expected to create streams explicitly using

32438 :py:obj:`~.cudaExecutionCtxStreamCreate` to submit work to the green

32439 context.

32440

32441 Note: The API is not supported on 32-bit platforms.

32442

32443 Parameters

32444 ----------

32445 desc : :py:obj:`~.cudaDevResourceDesc_t`

32446 Descriptor generated via :py:obj:`~.cudaDevResourceGenerateDesc`

32447 which contains the set of resources to be used

32448 device : int

32449 Device on which to create the green context.

32450 flags : unsigned int

32451 Green context creation flags. Must be 0, currently reserved for

32452 future use.

32453

32454 Returns

32455 -------

32456 cudaError_t

32457 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32458 phCtx : :py:obj:`~.cudaExecutionContext_t`

32459 Pointer for the output handle to the green context

32460

32461 See Also

32462 --------

32463 :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaExecutionCtxStreamCreate`

32464 """

32465 cdef cyruntime.cudaDevResourceDesc_t cydesc

32466 if desc is None:

32467 pdesc = 0

32468 elif isinstance(desc, (cudaDevResourceDesc_t,)):

32469 pdesc = int(desc)

32470 else:

32471 pdesc = int(cudaDevResourceDesc_t(desc))

32472 cydesc = <cyruntime.cudaDevResourceDesc_t><void_ptr>pdesc

32473 cdef cudaExecutionContext_t phCtx = cudaExecutionContext_t()

32474 with nogil:

32475 err = cyruntime.cudaGreenCtxCreate(<cyruntime.cudaExecutionContext_t*>phCtx._pvt_ptr, cydesc, device, flags)

32476 if err != cyruntime.cudaSuccess:

32477 return (_dict_cudaError_t[err], None)

32478 return (_dict_cudaError_t[err], phCtx)

32479

32480@cython.embedsignature(True)

32481def cudaExecutionCtxDestroy(ctx):

32482 """ Destroy a execution context.

32483

32484 Destroys the specified execution context `ctx`. It is the

32485 responsibility of the caller to ensure that no API call issues using

32486 `ctx` while :py:obj:`~.cudaExecutionCtxDestroy()` is executing or

32487 subsequently.

32488

32489 If `ctx` is a green context, any resources provisioned for it (that

32490 were initially available via the resource descriptor) are released as

32491 well.

32492

32493 The API does not destroy streams created via

32494 :py:obj:`~.cudaExecutionCtxStreamCreate`. Users are expected to destroy

32495 these streams explicitly using :py:obj:`~.cudaStreamDestroy` to avoid

32496 resource leaks. Once the execution context is destroyed, any subsequent

32497 API calls involving these streams will return

32498 :py:obj:`~.cudaErrorStreamDetached` with the exception of the following

32499 APIs:

32500

32501 - :py:obj:`~.cudaStreamDestroy`. Note this is only supported on CUDA

32502 drivers 13.1 and above.

32503

32504 Additionally, the API will invalidate all active captures on these

32505 streams.

32506

32507 Passing in a `ctx` that was not explicitly created via CUDA Runtime

32508 APIs is not allowed and will result in undefined behavior.

32509

32510 Parameters

32511 ----------

32512 ctx : :py:obj:`~.cudaExecutionContext_t`

32513 Execution context to destroy (required parameter, see note below)

32514

32515 Returns

32516 -------

32517 cudaError_t

32518 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32519

32520 See Also

32521 --------

32522 :py:obj:`~.cudaGreenCtxCreate`

32523 """

32524 cdef cyruntime.cudaExecutionContext_t cyctx

32525 if ctx is None:

32526 pctx = 0

32527 elif isinstance(ctx, (cudaExecutionContext_t,)):

32528 pctx = int(ctx)

32529 else:

32530 pctx = int(cudaExecutionContext_t(ctx))

32531 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32532 with nogil:

32533 err = cyruntime.cudaExecutionCtxDestroy(cyctx)

32534 return (_dict_cudaError_t[err],)

32535

32536@cython.embedsignature(True)

32537def cudaExecutionCtxGetDevResource(ctx, typename not None : cudaDevResourceType):

32538 """ Get context resources.

32539

32540 Get the `typename` resources available to context represented by `ctx`.

32541

32542 Note: The API is not supported on 32-bit platforms.

32543

32544 Parameters

32545 ----------

32546 ctx : :py:obj:`~.cudaExecutionContext_t`

32547 Execution context to get resource for (required parameter, see note

32548 below)

32549 typename : :py:obj:`~.cudaDevResourceType`

32550 Type of resource to retrieve

32551

32552 Returns

32553 -------

32554 cudaError_t

32555 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32556 resource : :py:obj:`~.cudaDevResource`

32557 Output pointer to a cudaDevResource structure

32558

32559 See Also

32560 --------

32561 :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cudaGreenCtxCreate`

32562 """

32563 cdef cyruntime.cudaExecutionContext_t cyctx

32564 if ctx is None:

32565 pctx = 0

32566 elif isinstance(ctx, (cudaExecutionContext_t,)):

32567 pctx = int(ctx)

32568 else:

32569 pctx = int(cudaExecutionContext_t(ctx))

32570 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32571 cdef cudaDevResource resource = cudaDevResource()

32572 cdef cyruntime.cudaDevResourceType cytypename = typename.value

32573 with nogil:

32574 err = cyruntime.cudaExecutionCtxGetDevResource(cyctx, <cyruntime.cudaDevResource*>resource._pvt_ptr, cytypename)

32575 if err != cyruntime.cudaSuccess:

32576 return (_dict_cudaError_t[err], None)

32577 return (_dict_cudaError_t[err], resource)

32578

32579@cython.embedsignature(True)

32580def cudaExecutionCtxGetDevice(ctx):

32581 """ Returns the device handle for the execution context.

32582

32583 Returns in `*device` the handle of the specified execution context's

32584 device. The execution context should not be NULL.

32585

32586 Parameters

32587 ----------

32588 ctx : :py:obj:`~.cudaExecutionContext_t`

32589 Execution context for which to obtain the device (required

32590 parameter, see note below)

32591

32592 Returns

32593 -------

32594 cudaError_t

32595 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`

32596 device : int

32597 Returned device handle for the specified execution context

32598

32599 See Also

32600 --------

32601 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cuCtxGetDevice`

32602 """

32603 cdef cyruntime.cudaExecutionContext_t cyctx

32604 if ctx is None:

32605 pctx = 0

32606 elif isinstance(ctx, (cudaExecutionContext_t,)):

32607 pctx = int(ctx)

32608 else:

32609 pctx = int(cudaExecutionContext_t(ctx))

32610 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32611 cdef int device = 0

32612 with nogil:

32613 err = cyruntime.cudaExecutionCtxGetDevice(&device, cyctx)

32614 if err != cyruntime.cudaSuccess:

32615 return (_dict_cudaError_t[err], None)

32616 return (_dict_cudaError_t[err], device)

32617

32618@cython.embedsignature(True)

32619def cudaExecutionCtxGetId(ctx):

32620 """ Returns the unique Id associated with the execution context supplied.

32621

32622 Returns in `ctxId` the unique Id which is associated with a given

32623 context. The Id is unique for the life of the program for this instance

32624 of CUDA. The execution context should not be NULL.

32625

32626 Parameters

32627 ----------

32628 ctx : :py:obj:`~.cudaExecutionContext_t`

32629 Context for which to obtain the Id (required parameter, see note

32630 below)

32631

32632 Returns

32633 -------

32634 cudaError_t

32635 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`

32636 ctxId : unsigned long long

32637 Pointer to store the Id of the context

32638

32639 See Also

32640 --------

32641 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaExecutionCtxGetDevice`, :py:obj:`~.cuCtxGetId`

32642 """

32643 cdef cyruntime.cudaExecutionContext_t cyctx

32644 if ctx is None:

32645 pctx = 0

32646 elif isinstance(ctx, (cudaExecutionContext_t,)):

32647 pctx = int(ctx)

32648 else:

32649 pctx = int(cudaExecutionContext_t(ctx))

32650 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32651 cdef unsigned long long ctxId = 0

32652 with nogil:

32653 err = cyruntime.cudaExecutionCtxGetId(cyctx, &ctxId)

32654 if err != cyruntime.cudaSuccess:

32655 return (_dict_cudaError_t[err], None)

32656 return (_dict_cudaError_t[err], ctxId)

32657

32658@cython.embedsignature(True)

32659def cudaExecutionCtxStreamCreate(ctx, unsigned int flags, int priority):

32660 """ Creates a stream and initializes it for the given execution context.

32661

32662 The API creates a CUDA stream with the specified `flags` and

32663 `priority`, initializing it with resources as defined at the time of

32664 creating the specified `ctx`. Additionally, the API also enables work

32665 submitted to to the stream to be tracked under `ctx`.

32666

32667 The supported values for `flags` are:

32668

32669 - :py:obj:`~.cudaStreamDefault`: Default stream creation flag. This

32670 would be :py:obj:`~.cudaStreamNonBlocking` for streams created on a

32671 green context.

32672

32673 - :py:obj:`~.cudaStreamNonBlocking`: Specifies that work running in the

32674 created stream may run concurrently with work in stream 0 (the NULL

32675 stream), and that the created stream should perform no implicit

32676 synchronization with stream 0

32677

32678 Specifying `priority` affects the scheduling priority of work in the

32679 stream. Priorities provide a hint to preferentially run work with

32680 higher priority when possible, but do not preempt already-running work

32681 or provide any other functional guarantee on execution order.

32682 `priority` follows a convention where lower numbers represent higher

32683 priorities. '0' represents default priority. The range of meaningful

32684 numerical priorities can be queried using

32685 :py:obj:`~.cudaDeviceGetStreamPriorityRange`. If the specified priority

32686 is outside the numerical range returned by

32687 :py:obj:`~.cudaDeviceGetStreamPriorityRange`, it will automatically be

32688 clamped to the lowest or the highest number in the range.

32689

32690 Parameters

32691 ----------

32692 ctx : :py:obj:`~.cudaExecutionContext_t`

32693 Execution context to initialize the stream with (required

32694 parameter, see note below)

32695 flags : unsigned int

32696 Flags for stream creation

32697 priority : int

32698 Stream priority

32699

32700 Returns

32701 -------

32702 cudaError_t

32703 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorOutOfMemory`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`

32704 phStream : :py:obj:`~.cudaStream_t`

32705 Returned stream handle

32706

32707 See Also

32708 --------

32709 :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`

32710

32711 Notes

32712 -----

32713 In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.

32714 """

32715 cdef cyruntime.cudaExecutionContext_t cyctx

32716 if ctx is None:

32717 pctx = 0

32718 elif isinstance(ctx, (cudaExecutionContext_t,)):

32719 pctx = int(ctx)

32720 else:

32721 pctx = int(cudaExecutionContext_t(ctx))

32722 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32723 cdef cudaStream_t phStream = cudaStream_t()

32724 with nogil:

32725 err = cyruntime.cudaExecutionCtxStreamCreate(<cyruntime.cudaStream_t*>phStream._pvt_ptr, cyctx, flags, priority)

32726 if err != cyruntime.cudaSuccess:

32727 return (_dict_cudaError_t[err], None)

32728 return (_dict_cudaError_t[err], phStream)

32729

32730@cython.embedsignature(True)

32731def cudaExecutionCtxSynchronize(ctx):

32732 """ Block for the specified execution context's tasks to complete.

32733

32734 Blocks until the specified execution context has completed all

32735 preceding requested tasks. If the specified execution context is the

32736 device (primary) context obtained via

32737 :py:obj:`~.cudaDeviceGetExecutionCtx`, green contexts that have been

32738 created on the device will also be synchronized.

32739

32740 The API returns an error if one of the preceding tasks failed.

32741

32742 Parameters

32743 ----------

32744 ctx : :py:obj:`~.cudaExecutionContext_t`

32745 Execution context to synchronize (required parameter, see note

32746 below)

32747

32748 Returns

32749 -------

32750 cudaError_t

32751 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorInvalidValue`

32752

32753 See Also

32754 --------

32755 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaDeviceSynchronize`, :py:obj:`~.cuCtxSynchronize_v2`

32756 """

32757 cdef cyruntime.cudaExecutionContext_t cyctx

32758 if ctx is None:

32759 pctx = 0

32760 elif isinstance(ctx, (cudaExecutionContext_t,)):

32761 pctx = int(ctx)

32762 else:

32763 pctx = int(cudaExecutionContext_t(ctx))

32764 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32765 with nogil:

32766 err = cyruntime.cudaExecutionCtxSynchronize(cyctx)

32767 return (_dict_cudaError_t[err],)

32768

32769@cython.embedsignature(True)

32770def cudaStreamGetDevResource(hStream, typename not None : cudaDevResourceType):

32771 """ Get stream resources.

32772

32773 Get the `typename` resources available to the `hStream` and store them

32774 in `resource`.

32775

32776 Note: The API will return :py:obj:`~.cudaErrorInvalidResourceType` is

32777 `typename` is `cudaDevResourceTypeWorkqueueConfig` or

32778 `cudaDevResourceTypeWorkqueue`.

32779

32780 Parameters

32781 ----------

32782 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`

32783 Stream to get resource for

32784 typename : :py:obj:`~.cudaDevResourceType`

32785 Type of resource to retrieve

32786

32787 Returns

32788 -------

32789 cudaError_t

32790 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidHandle`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorCallRequiresNewerDriver`,

32791 resource : :py:obj:`~.cudaDevResource`

32792 Output pointer to a cudaDevResource structure

32793

32794 See Also

32795 --------

32796 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxStreamCreate`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cuStreamGetDevResource`

32797 """

32798 cdef cyruntime.cudaStream_t cyhStream

32799 if hStream is None:

32800 phStream = 0

32801 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):

32802 phStream = int(hStream)

32803 else:

32804 phStream = int(cudaStream_t(hStream))

32805 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream

32806 cdef cudaDevResource resource = cudaDevResource()

32807 cdef cyruntime.cudaDevResourceType cytypename = typename.value

32808 with nogil:

32809 err = cyruntime.cudaStreamGetDevResource(cyhStream, <cyruntime.cudaDevResource*>resource._pvt_ptr, cytypename)

32810 if err != cyruntime.cudaSuccess:

32811 return (_dict_cudaError_t[err], None)

32812 return (_dict_cudaError_t[err], resource)

32813

32814@cython.embedsignature(True)

32815def cudaExecutionCtxRecordEvent(ctx, event):

32816 """ Records an event for the specified execution context.

32817

32818 Captures in `event` all the activities of the execution context `ctx`

32819 at the time of this call. `event` and `ctx` must be from the same CUDA

32820 device, otherwise :py:obj:`~.cudaErrorInvalidHandle` will be returned.

32821 Calls such as :py:obj:`~.cudaEventQuery()` or

32822 :py:obj:`~.cudaExecutionCtxWaitEvent()` will then examine or wait for

32823 completion of the work that was captured. Uses of `ctx` after this call

32824 do not modify `event`. If the execution context passed to `ctx` is the

32825 device (primary) context obtained via

32826 :py:obj:`~.cudaDeviceGetExecutionCtx()`, `event` will capture all the

32827 activities of the green contexts created on the device as well.

32828

32829 Parameters

32830 ----------

32831 ctx : :py:obj:`~.cudaExecutionContext_t`

32832 Execution context to record event for (required parameter, see note

32833 below)

32834 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

32835 Event to record

32836

32837 Returns

32838 -------

32839 cudaError_t

32840 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidHandle`, :py:obj:`~.cudaErrorStreamCaptureUnsupported`

32841

32842 See Also

32843 --------

32844 :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaExecutionCtxWaitEvent`, :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuGreenCtxRecordEvent`

32845

32846 Notes

32847 -----

32848 The API will return :py:obj:`~.cudaErrorStreamCaptureUnsupported` if the specified execution context `ctx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures.

32849 """

32850 cdef cyruntime.cudaEvent_t cyevent

32851 if event is None:

32852 pevent = 0

32853 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

32854 pevent = int(event)

32855 else:

32856 pevent = int(cudaEvent_t(event))

32857 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

32858 cdef cyruntime.cudaExecutionContext_t cyctx

32859 if ctx is None:

32860 pctx = 0

32861 elif isinstance(ctx, (cudaExecutionContext_t,)):

32862 pctx = int(ctx)

32863 else:

32864 pctx = int(cudaExecutionContext_t(ctx))

32865 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32866 with nogil:

32867 err = cyruntime.cudaExecutionCtxRecordEvent(cyctx, cyevent)

32868 return (_dict_cudaError_t[err],)

32869

32870@cython.embedsignature(True)

32871def cudaExecutionCtxWaitEvent(ctx, event):

32872 """ Make an execution context wait on an event.

32873

32874 Makes all future work submitted to execution context `ctx` wait for all

32875 work captured in `event`. The synchronization will be performed on the

32876 device and will not block the calling CPU thread. See

32877 :py:obj:`~.cudaExecutionCtxRecordEvent()` for details on what is

32878 captured by an event. If the execution context passed to `ctx` is the

32879 device (primary) context obtained via

32880 :py:obj:`~.cudaDeviceGetExecutionCtx()`, all green contexts created on

32881 the device will wait for `event` as well.

32882

32883 Parameters

32884 ----------

32885 ctx : :py:obj:`~.cudaExecutionContext_t`

32886 Execution context to wait for (required parameter, see note below)

32887 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`

32888 Event to wait on

32889

32890 Returns

32891 -------

32892 cudaError_t

32893 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidHandle`, :py:obj:`~.cudaErrorStreamCaptureUnsupported`

32894

32895 See Also

32896 --------

32897 :py:obj:`~.cudaExecutionCtxRecordEvent`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuCtxWaitEvent`, :py:obj:`~.cuGreenCtxWaitEvent`

32898

32899 Notes

32900 -----

32901 `event` may be from a different execution context or device than `ctx`.

32902

32903 The API will return :py:obj:`~.cudaErrorStreamCaptureUnsupported` and invalidate the capture if the specified event `event` is part of an ongoing capture sequence or if the specified execution context `ctx` has a stream in the capture mode.

32904 """

32905 cdef cyruntime.cudaEvent_t cyevent

32906 if event is None:

32907 pevent = 0

32908 elif isinstance(event, (cudaEvent_t,driver.CUevent)):

32909 pevent = int(event)

32910 else:

32911 pevent = int(cudaEvent_t(event))

32912 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent

32913 cdef cyruntime.cudaExecutionContext_t cyctx

32914 if ctx is None:

32915 pctx = 0

32916 elif isinstance(ctx, (cudaExecutionContext_t,)):

32917 pctx = int(ctx)

32918 else:

32919 pctx = int(cudaExecutionContext_t(ctx))

32920 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx

32921 with nogil:

32922 err = cyruntime.cudaExecutionCtxWaitEvent(cyctx, cyevent)

32923 return (_dict_cudaError_t[err],)

32924

32925@cython.embedsignature(True)

32926def cudaDeviceGetExecutionCtx(int device):

32927 """ Returns the execution context for a device.

32928

32929 Returns in `ctx` the execution context for the specified device. This

32930 is the device's primary context. The returned context can then be

32931 passed to APIs that take in a cudaExecutionContext_t enabling explicit

32932 context-based programming without relying on thread-local state.

32933

32934 Passing the returned execution context to

32935 :py:obj:`~.cudaExecutionCtxDestroy()` is not allowed and will result in

32936 undefined behavior.

32937

32938 Parameters

32939 ----------

32940 device : int

32941 Device to get the execution context for

32942

32943 Returns

32944 -------

32945 cudaError_t

32946 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`

32947 ctx : :py:obj:`~.cudaExecutionContext_t`

32948 Returns the device execution context

32949

32950 See Also

32951 --------

32952 :py:obj:`~.cudaExecutionCtxGetDevice`, :py:obj:`~.cudaExecutionCtxGetId`

32953 """

32954 cdef cudaExecutionContext_t ctx = cudaExecutionContext_t()

32955 with nogil:

32956 err = cyruntime.cudaDeviceGetExecutionCtx(<cyruntime.cudaExecutionContext_t*>ctx._pvt_ptr, device)

32957 if err != cyruntime.cudaSuccess:

32958 return (_dict_cudaError_t[err], None)

32959 return (_dict_cudaError_t[err], ctx)

32960

32961@cython.embedsignature(True)

32962def cudaGetExportTable(pExportTableId : Optional[cudaUUID_t]):

32963 """"""

32964 cdef void_ptr ppExportTable = 0

32965 cdef cyruntime.cudaUUID_t* cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId is not None else NULL

32966 with nogil:

32967 err = cyruntime.cudaGetExportTable(<const void**>&ppExportTable, cypExportTableId_ptr)

32968 if err != cyruntime.cudaSuccess:

32969 return (_dict_cudaError_t[err], None)

32970 return (_dict_cudaError_t[err], ppExportTable)

32971

32972@cython.embedsignature(True)

32973def cudaGetKernel(entryFuncAddr):

32974 """ Get pointer to device kernel that matches entry function `entryFuncAddr`.

32975

32976 Returns in `kernelPtr` the device kernel corresponding to the entry

32977 function `entryFuncAddr`.

32978

32979 Note that it is possible that there are multiple symbols belonging to

32980 different translation units with the same `entryFuncAddr` registered

32981 with this CUDA Runtime and so the order which the translation units are

32982 loaded and registered with the CUDA Runtime can lead to differing

32983 return pointers in `kernelPtr` . Suggested methods of ensuring

32984 uniqueness are to limit visibility of global device functions by using

32985 static or hidden visibility attribute in the respective translation

32986 units.

32987

32988 Parameters

32989 ----------

32990 entryFuncAddr : Any

32991 Address of device entry function to search kernel for

32992

32993 Returns

32994 -------

32995 cudaError_t

32996 :py:obj:`~.cudaSuccess`

32997 kernelPtr : :py:obj:`~.cudaKernel_t`

32998 Returns the device kernel

32999

33000 See Also

33001 --------

33002 cudaGetKernel (C++ API)

33003 """

33004 cdef cudaKernel_t kernelPtr = cudaKernel_t()

33005 cyentryFuncAddr = _HelperInputVoidPtr(entryFuncAddr)

33006 cdef void* cyentryFuncAddr_ptr = <void*><void_ptr>cyentryFuncAddr.cptr

33007 with nogil:

33008 err = cyruntime.cudaGetKernel(<cyruntime.cudaKernel_t*>kernelPtr._pvt_ptr, cyentryFuncAddr_ptr)

33009 if err != cyruntime.cudaSuccess:

33010 return (_dict_cudaError_t[err], None)

33011 return (_dict_cudaError_t[err], kernelPtr)

33012

33013@cython.embedsignature(True)

33014def make_cudaPitchedPtr(d, size_t p, size_t xsz, size_t ysz):

33015 """ Returns a :py:obj:`~.cudaPitchedPtr` based on input parameters.

33016

33017 Returns a :py:obj:`~.cudaPitchedPtr` based on the specified input

33018 parameters `d`, `p`, `xsz`, and `ysz`.

33019

33020 Parameters

33021 ----------

33022 d : Any

33023 Pointer to allocated memory

33024 p : size_t

33025 Pitch of allocated memory in bytes

33026 xsz : size_t

33027 Logical width of allocation in elements

33028 ysz : size_t

33029 Logical height of allocation in elements

33030

33031 Returns

33032 -------

33033 cudaError_t.cudaSuccess

33034 cudaError_t.cudaSuccess

33035 :py:obj:`~.cudaPitchedPtr`

33036 :py:obj:`~.cudaPitchedPtr` specified by `d`, `p`, `xsz`, and `ysz`

33037

33038 See Also

33039 --------

33040 make_cudaExtent, make_cudaPos

33041 """

33042 cyd = _HelperInputVoidPtr(d)

33043 cdef void* cyd_ptr = <void*><void_ptr>cyd.cptr

33044 with nogil:

33045 err = cyruntime.make_cudaPitchedPtr(cyd_ptr, p, xsz, ysz)

33046 cdef cudaPitchedPtr wrapper = cudaPitchedPtr()

33047 wrapper._pvt_ptr[0] = err

33048 return wrapper

33049

33050@cython.embedsignature(True)

33051def make_cudaPos(size_t x, size_t y, size_t z):

33052 """ Returns a :py:obj:`~.cudaPos` based on input parameters.

33053

33054 Returns a :py:obj:`~.cudaPos` based on the specified input parameters

33055 `x`, `y`, and `z`.

33056

33057 Parameters

33058 ----------

33059 x : size_t

33060 X position

33061 y : size_t

33062 Y position

33063 z : size_t

33064 Z position

33065

33066 Returns

33067 -------

33068 cudaError_t.cudaSuccess

33069 cudaError_t.cudaSuccess

33070 :py:obj:`~.cudaPos`

33071 :py:obj:`~.cudaPos` specified by `x`, `y`, and `z`

33072

33073 See Also

33074 --------

33075 make_cudaExtent, make_cudaPitchedPtr

33076 """

33077 with nogil:

33078 err = cyruntime.make_cudaPos(x, y, z)

33079 cdef cudaPos wrapper = cudaPos()

33080 wrapper._pvt_ptr[0] = err

33081 return wrapper

33082

33083@cython.embedsignature(True)

33084def make_cudaExtent(size_t w, size_t h, size_t d):

33085 """ Returns a :py:obj:`~.cudaExtent` based on input parameters.

33086

33087 Returns a :py:obj:`~.cudaExtent` based on the specified input

33088 parameters `w`, `h`, and `d`.

33089

33090 Parameters

33091 ----------

33092 w : size_t

33093 Width in elements when referring to array memory, in bytes when

33094 referring to linear memory

33095 h : size_t

33096 Height in elements

33097 d : size_t

33098 Depth in elements

33099

33100 Returns

33101 -------

33102 cudaError_t.cudaSuccess

33103 cudaError_t.cudaSuccess

33104 :py:obj:`~.cudaExtent`

33105 :py:obj:`~.cudaExtent` specified by `w`, `h`, and `d`

33106

33107 See Also

33108 --------

33109 make_cudaPitchedPtr, make_cudaPos

33110 """

33111 with nogil:

33112 err = cyruntime.make_cudaExtent(w, h, d)

33113 cdef cudaExtent wrapper = cudaExtent()

33114 wrapper._pvt_ptr[0] = err

33115 return wrapper

33116

33117@cython.embedsignature(True)

33118def cudaGraphicsEGLRegisterImage(image, unsigned int flags):

33119 """ Registers an EGL image.

33120

33121 Registers the EGLImageKHR specified by `image` for access by CUDA. A

33122 handle to the registered object is returned as `pCudaResource`.

33123 Additional Mapping/Unmapping is not required for the registered

33124 resource and :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame` can be

33125 directly called on the `pCudaResource`.

33126

33127 The application will be responsible for synchronizing access to shared

33128 objects. The application must ensure that any pending operation which

33129 access the objects have completed before passing control to CUDA. This

33130 may be accomplished by issuing and waiting for glFinish command on all

33131 GLcontexts (for OpenGL and likewise for other APIs). The application

33132 will be also responsible for ensuring that any pending operation on the

33133 registered CUDA resource has completed prior to executing subsequent

33134 commands in other APIs accesing the same memory objects. This can be

33135 accomplished by calling cuCtxSynchronize or cuEventSynchronize

33136 (preferably).

33137

33138 The surface's intended usage is specified using `flags`, as follows:

33139

33140 - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about

33141 how this resource will be used. It is therefore assumed that this

33142 resource will be read from and written to by CUDA. This is the

33143 default value.

33144

33145 - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA

33146 will not write to this resource.

33147

33148 - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that

33149 CUDA will not read from this resource and will write over the entire

33150 contents of the resource, so none of the data previously stored in

33151 the resource will be preserved.

33152

33153 The EGLImageKHR is an object which can be used to create EGLImage

33154 target resource. It is defined as a void pointer. typedef void*

33155 EGLImageKHR

33156

33157 Parameters

33158 ----------

33159 image : :py:obj:`~.EGLImageKHR`

33160 An EGLImageKHR image which can be used to create target resource.

33161 flags : unsigned int

33162 Map flags

33163

33164 Returns

33165 -------

33166 cudaError_t

33167 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33168 pCudaResource : :py:obj:`~.cudaGraphicsResource`

33169 Pointer to the returned object handle

33170

33171 See Also

33172 --------

33173 :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame`, :py:obj:`~.cuGraphicsEGLRegisterImage`

33174 """

33175 cdef cyruntime.EGLImageKHR cyimage

33176 if image is None:

33177 pimage = 0

33178 elif isinstance(image, (EGLImageKHR,)):

33179 pimage = int(image)

33180 else:

33181 pimage = int(EGLImageKHR(image))

33182 cyimage = <cyruntime.EGLImageKHR><void_ptr>pimage

33183 cdef cudaGraphicsResource_t pCudaResource = cudaGraphicsResource_t()

33184 with nogil:

33185 err = cyruntime.cudaGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags)

33186 if err != cyruntime.cudaSuccess:

33187 return (_dict_cudaError_t[err], None)

33188 return (_dict_cudaError_t[err], pCudaResource)

33189

33190@cython.embedsignature(True)

33191def cudaEGLStreamConsumerConnect(eglStream):

33192 """ Connect CUDA to EGLStream as a consumer.

33193

33194 Connect CUDA as a consumer to EGLStreamKHR specified by `eglStream`.

33195

33196 The EGLStreamKHR is an EGL object that transfers a sequence of image

33197 frames from one API to another.

33198

33199 Parameters

33200 ----------

33201 eglStream : :py:obj:`~.EGLStreamKHR`

33202 EGLStreamKHR handle

33203

33204 Returns

33205 -------

33206 cudaError_t

33207 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33208 conn : :py:obj:`~.cudaEglStreamConnection`

33209 Pointer to the returned connection handle

33210

33211 See Also

33212 --------

33213 :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnect`

33214 """

33215 cdef cyruntime.EGLStreamKHR cyeglStream

33216 if eglStream is None:

33217 peglStream = 0

33218 elif isinstance(eglStream, (EGLStreamKHR,)):

33219 peglStream = int(eglStream)

33220 else:

33221 peglStream = int(EGLStreamKHR(eglStream))

33222 cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream

33223 cdef cudaEglStreamConnection conn = cudaEglStreamConnection()

33224 with nogil:

33225 err = cyruntime.cudaEGLStreamConsumerConnect(<cyruntime.cudaEglStreamConnection*>conn._pvt_ptr, cyeglStream)

33226 if err != cyruntime.cudaSuccess:

33227 return (_dict_cudaError_t[err], None)

33228 return (_dict_cudaError_t[err], conn)

33229

33230@cython.embedsignature(True)

33231def cudaEGLStreamConsumerConnectWithFlags(eglStream, unsigned int flags):

33232 """ Connect CUDA to EGLStream as a consumer with given flags.

33233

33234 Connect CUDA as a consumer to EGLStreamKHR specified by `stream` with

33235 specified `flags` defined by :py:obj:`~.cudaEglResourceLocationFlags`.

33236

33237 The flags specify whether the consumer wants to access frames from

33238 system memory or video memory. Default is

33239 :py:obj:`~.cudaEglResourceLocationVidmem`.

33240

33241 Parameters

33242 ----------

33243 eglStream : :py:obj:`~.EGLStreamKHR`

33244 EGLStreamKHR handle

33245 flags : unsigned int

33246 Flags denote intended location - system or video.

33247

33248 Returns

33249 -------

33250 cudaError_t

33251 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33252 conn : :py:obj:`~.cudaEglStreamConnection`

33253 Pointer to the returned connection handle

33254

33255 See Also

33256 --------

33257 :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnectWithFlags`

33258 """

33259 cdef cyruntime.EGLStreamKHR cyeglStream

33260 if eglStream is None:

33261 peglStream = 0

33262 elif isinstance(eglStream, (EGLStreamKHR,)):

33263 peglStream = int(eglStream)

33264 else:

33265 peglStream = int(EGLStreamKHR(eglStream))

33266 cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream

33267 cdef cudaEglStreamConnection conn = cudaEglStreamConnection()

33268 with nogil:

33269 err = cyruntime.cudaEGLStreamConsumerConnectWithFlags(<cyruntime.cudaEglStreamConnection*>conn._pvt_ptr, cyeglStream, flags)

33270 if err != cyruntime.cudaSuccess:

33271 return (_dict_cudaError_t[err], None)

33272 return (_dict_cudaError_t[err], conn)

33273

33274@cython.embedsignature(True)

33275def cudaEGLStreamConsumerDisconnect(conn):

33276 """ Disconnect CUDA as a consumer to EGLStream .

33277

33278 Disconnect CUDA as a consumer to EGLStreamKHR.

33279

33280 Parameters

33281 ----------

33282 conn : :py:obj:`~.cudaEglStreamConnection`

33283 Conection to disconnect.

33284

33285 Returns

33286 -------

33287 cudaError_t

33288 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33289

33290 See Also

33291 --------

33292 :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerDisconnect`

33293 """

33294 cdef cyruntime.cudaEglStreamConnection *cyconn

33295 if conn is None:

33296 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL

33297 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):

33298 pconn = conn.getPtr()

33299 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn

33300 elif isinstance(conn, (int)):

33301 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn

33302 else:

33303 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))

33304 with nogil:

33305 err = cyruntime.cudaEGLStreamConsumerDisconnect(cyconn)

33306 return (_dict_cudaError_t[err],)

33307

33308@cython.embedsignature(True)

33309def cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int timeout):

33310 """ Acquire an image frame from the EGLStream with CUDA as a consumer.

33311

33312 Acquire an image frame from EGLStreamKHR.

33313 :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame` can be called on

33314 `pCudaResource` to get :py:obj:`~.cudaEglFrame`.

33315

33316 Parameters

33317 ----------

33318 conn : :py:obj:`~.cudaEglStreamConnection`

33319 Connection on which to acquire

33320 pCudaResource : :py:obj:`~.cudaGraphicsResource_t`

33321 CUDA resource on which the EGLStream frame will be mapped for use.

33322 pStream : :py:obj:`~.cudaStream_t`

33323 CUDA stream for synchronization and any data migrations implied by

33324 :py:obj:`~.cudaEglResourceLocationFlags`.

33325 timeout : unsigned int

33326 Desired timeout in usec.

33327

33328 Returns

33329 -------

33330 cudaError_t

33331 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorLaunchTimeout`

33332

33333 See Also

33334 --------

33335 :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`

33336 """

33337 cdef cyruntime.cudaStream_t *cypStream

33338 if pStream is None:

33339 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL

33340 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):

33341 ppStream = pStream.getPtr()

33342 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream

33343 elif isinstance(pStream, (int)):

33344 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream

33345 else:

33346 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))

33347 cdef cyruntime.cudaGraphicsResource_t *cypCudaResource

33348 if pCudaResource is None:

33349 cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL

33350 elif isinstance(pCudaResource, (cudaGraphicsResource_t,)):

33351 ppCudaResource = pCudaResource.getPtr()

33352 cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>ppCudaResource

33353 elif isinstance(pCudaResource, (int)):

33354 cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>pCudaResource

33355 else:

33356 raise TypeError("Argument 'pCudaResource' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(pCudaResource)))

33357 cdef cyruntime.cudaEglStreamConnection *cyconn

33358 if conn is None:

33359 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL

33360 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):

33361 pconn = conn.getPtr()

33362 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn

33363 elif isinstance(conn, (int)):

33364 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn

33365 else:

33366 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))

33367 with nogil:

33368 err = cyruntime.cudaEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout)

33369 return (_dict_cudaError_t[err],)

33370

33371@cython.embedsignature(True)

33372def cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream):

33373 """ Releases the last frame acquired from the EGLStream.

33374

33375 Release the acquired image frame specified by `pCudaResource` to

33376 EGLStreamKHR.

33377

33378 Parameters

33379 ----------

33380 conn : :py:obj:`~.cudaEglStreamConnection`

33381 Connection on which to release

33382 pCudaResource : :py:obj:`~.cudaGraphicsResource_t`

33383 CUDA resource whose corresponding frame is to be released

33384 pStream : :py:obj:`~.cudaStream_t`

33385 CUDA stream on which release will be done.

33386

33387 Returns

33388 -------

33389 cudaError_t

33390 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33391

33392 See Also

33393 --------

33394 :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`

33395 """

33396 cdef cyruntime.cudaStream_t *cypStream

33397 if pStream is None:

33398 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL

33399 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):

33400 ppStream = pStream.getPtr()

33401 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream

33402 elif isinstance(pStream, (int)):

33403 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream

33404 else:

33405 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))

33406 cdef cyruntime.cudaGraphicsResource_t cypCudaResource

33407 if pCudaResource is None:

33408 ppCudaResource = 0

33409 elif isinstance(pCudaResource, (cudaGraphicsResource_t,)):

33410 ppCudaResource = int(pCudaResource)

33411 else:

33412 ppCudaResource = int(cudaGraphicsResource_t(pCudaResource))

33413 cypCudaResource = <cyruntime.cudaGraphicsResource_t><void_ptr>ppCudaResource

33414 cdef cyruntime.cudaEglStreamConnection *cyconn

33415 if conn is None:

33416 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL

33417 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):

33418 pconn = conn.getPtr()

33419 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn

33420 elif isinstance(conn, (int)):

33421 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn

33422 else:

33423 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))

33424 with nogil:

33425 err = cyruntime.cudaEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream)

33426 return (_dict_cudaError_t[err],)

33427

33428@cython.embedsignature(True)

33429def cudaEGLStreamProducerConnect(eglStream, width, height):

33430 """ Connect CUDA to EGLStream as a producer.

33431

33432 Connect CUDA as a producer to EGLStreamKHR specified by `stream`.

33433

33434 The EGLStreamKHR is an EGL object that transfers a sequence of image

33435 frames from one API to another.

33436

33437 Parameters

33438 ----------

33439 eglStream : :py:obj:`~.EGLStreamKHR`

33440 EGLStreamKHR handle

33441 width : :py:obj:`~.EGLint`

33442 width of the image to be submitted to the stream

33443 height : :py:obj:`~.EGLint`

33444 height of the image to be submitted to the stream

33445

33446 Returns

33447 -------

33448 cudaError_t

33449 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33450 conn : :py:obj:`~.cudaEglStreamConnection`

33451 Pointer to the returned connection handle

33452

33453 See Also

33454 --------

33455 :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerConnect`

33456 """

33457 cdef cyruntime.EGLint cyheight

33458 if height is None:

33459 pheight = 0

33460 elif isinstance(height, (EGLint,)):

33461 pheight = int(height)

33462 else:

33463 pheight = int(EGLint(height))

33464 cyheight = <cyruntime.EGLint><void_ptr>pheight

33465 cdef cyruntime.EGLint cywidth

33466 if width is None:

33467 pwidth = 0

33468 elif isinstance(width, (EGLint,)):

33469 pwidth = int(width)

33470 else:

33471 pwidth = int(EGLint(width))

33472 cywidth = <cyruntime.EGLint><void_ptr>pwidth

33473 cdef cyruntime.EGLStreamKHR cyeglStream

33474 if eglStream is None:

33475 peglStream = 0

33476 elif isinstance(eglStream, (EGLStreamKHR,)):

33477 peglStream = int(eglStream)

33478 else:

33479 peglStream = int(EGLStreamKHR(eglStream))

33480 cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream

33481 cdef cudaEglStreamConnection conn = cudaEglStreamConnection()

33482 with nogil:

33483 err = cyruntime.cudaEGLStreamProducerConnect(<cyruntime.cudaEglStreamConnection*>conn._pvt_ptr, cyeglStream, cywidth, cyheight)

33484 if err != cyruntime.cudaSuccess:

33485 return (_dict_cudaError_t[err], None)

33486 return (_dict_cudaError_t[err], conn)

33487

33488@cython.embedsignature(True)

33489def cudaEGLStreamProducerDisconnect(conn):

33490 """ Disconnect CUDA as a producer to EGLStream .

33491

33492 Disconnect CUDA as a producer to EGLStreamKHR.

33493

33494 Parameters

33495 ----------

33496 conn : :py:obj:`~.cudaEglStreamConnection`

33497 Conection to disconnect.

33498

33499 Returns

33500 -------

33501 cudaError_t

33502 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33503

33504 See Also

33505 --------

33506 :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerDisconnect`

33507 """

33508 cdef cyruntime.cudaEglStreamConnection *cyconn

33509 if conn is None:

33510 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL

33511 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):

33512 pconn = conn.getPtr()

33513 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn

33514 elif isinstance(conn, (int)):

33515 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn

33516 else:

33517 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))

33518 with nogil:

33519 err = cyruntime.cudaEGLStreamProducerDisconnect(cyconn)

33520 return (_dict_cudaError_t[err],)

33521

33522@cython.embedsignature(True)

33523def cudaEGLStreamProducerPresentFrame(conn, eglframe not None : cudaEglFrame, pStream):

33524 """ Present a CUDA eglFrame to the EGLStream with CUDA as a producer.

33525

33526 The :py:obj:`~.cudaEglFrame` is defined as:

33527

33528 **View CUDA Toolkit Documentation for a C++ code example**

33529

33530 For :py:obj:`~.cudaEglFrame` of type :py:obj:`~.cudaEglFrameTypePitch`,

33531 the application may present sub-region of a memory allocation. In that

33532 case, :py:obj:`~.cudaPitchedPtr.ptr` will specify the start address of

33533 the sub-region in the allocation and :py:obj:`~.cudaEglPlaneDesc` will

33534 specify the dimensions of the sub-region.

33535

33536 Parameters

33537 ----------

33538 conn : :py:obj:`~.cudaEglStreamConnection`

33539 Connection on which to present the CUDA array

33540 eglframe : :py:obj:`~.cudaEglFrame`

33541 CUDA Eglstream Proucer Frame handle to be sent to the consumer over

33542 EglStream.

33543 pStream : :py:obj:`~.cudaStream_t`

33544 CUDA stream on which to present the frame.

33545

33546 Returns

33547 -------

33548 cudaError_t

33549 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33550

33551 See Also

33552 --------

33553 :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerPresentFrame`

33554 """

33555 cdef cyruntime.cudaStream_t *cypStream

33556 if pStream is None:

33557 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL

33558 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):

33559 ppStream = pStream.getPtr()

33560 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream

33561 elif isinstance(pStream, (int)):

33562 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream

33563 else:

33564 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))

33565 cdef cyruntime.cudaEglStreamConnection *cyconn

33566 if conn is None:

33567 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL

33568 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):

33569 pconn = conn.getPtr()

33570 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn

33571 elif isinstance(conn, (int)):

33572 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn

33573 else:

33574 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))

33575 with nogil:

33576 err = cyruntime.cudaEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream)

33577 return (_dict_cudaError_t[err],)

33578

33579@cython.embedsignature(True)

33580def cudaEGLStreamProducerReturnFrame(conn, eglframe : Optional[cudaEglFrame], pStream):

33581 """ Return the CUDA eglFrame to the EGLStream last released by the consumer.

33582

33583 This API can potentially return cudaErrorLaunchTimeout if the consumer

33584 has not returned a frame to EGL stream. If timeout is returned the

33585 application can retry.

33586

33587 Parameters

33588 ----------

33589 conn : :py:obj:`~.cudaEglStreamConnection`

33590 Connection on which to present the CUDA array

33591 eglframe : :py:obj:`~.cudaEglFrame`

33592 CUDA Eglstream Proucer Frame handle returned from the consumer over

33593 EglStream.

33594 pStream : :py:obj:`~.cudaStream_t`

33595 CUDA stream on which to return the frame.

33596

33597 Returns

33598 -------

33599 cudaError_t

33600 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33601

33602 See Also

33603 --------

33604 :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cuEGLStreamProducerReturnFrame`

33605 """

33606 cdef cyruntime.cudaStream_t *cypStream

33607 if pStream is None:

33608 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL

33609 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):

33610 ppStream = pStream.getPtr()

33611 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream

33612 elif isinstance(pStream, (int)):

33613 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream

33614 else:

33615 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))

33616 cdef cyruntime.cudaEglStreamConnection *cyconn

33617 if conn is None:

33618 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL

33619 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):

33620 pconn = conn.getPtr()

33621 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn

33622 elif isinstance(conn, (int)):

33623 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn

33624 else:

33625 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))

33626 cdef cyruntime.cudaEglFrame* cyeglframe_ptr = eglframe._pvt_ptr if eglframe is not None else NULL

33627 with nogil:

33628 err = cyruntime.cudaEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream)

33629 return (_dict_cudaError_t[err],)

33630

33631@cython.embedsignature(True)

33632def cudaGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned int mipLevel):

33633 """ Get an eglFrame through which to access a registered EGL graphics resource.

33634

33635 Returns in `*eglFrame` an eglFrame pointer through which the registered

33636 graphics resource `resource` may be accessed. This API can only be

33637 called for EGL graphics resources.

33638

33639 The :py:obj:`~.cudaEglFrame` is defined as

33640

33641 **View CUDA Toolkit Documentation for a C++ code example**

33642

33643 Parameters

33644 ----------

33645 resource : :py:obj:`~.cudaGraphicsResource_t`

33646 Registered resource to access.

33647 index : unsigned int

33648 Index for cubemap surfaces.

33649 mipLevel : unsigned int

33650 Mipmap level for the subresource to access.

33651

33652 Returns

33653 -------

33654 cudaError_t

33655 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`

33656 eglFrame : :py:obj:`~.cudaEglFrame`

33657 Returned eglFrame.

33658

33659 See Also

33660 --------

33661 :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedEglFrame`

33662

33663 Notes

33664 -----

33665 Note that in case of multiplanar `*eglFrame`, pitch of only first plane (unsigned int :py:obj:`~.cudaEglPlaneDesc.pitch`) is to be considered by the application.

33666 """

33667 cdef cyruntime.cudaGraphicsResource_t cyresource

33668 if resource is None:

33669 presource = 0

33670 elif isinstance(resource, (cudaGraphicsResource_t,)):

33671 presource = int(resource)

33672 else:

33673 presource = int(cudaGraphicsResource_t(resource))

33674 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource

33675 cdef cudaEglFrame eglFrame = cudaEglFrame()

33676 with nogil:

33677 err = cyruntime.cudaGraphicsResourceGetMappedEglFrame(<cyruntime.cudaEglFrame*>eglFrame._pvt_ptr, cyresource, index, mipLevel)

33678 if err != cyruntime.cudaSuccess:

33679 return (_dict_cudaError_t[err], None)

33680 return (_dict_cudaError_t[err], eglFrame)

33681

33682@cython.embedsignature(True)

33683def cudaEventCreateFromEGLSync(eglSync, unsigned int flags):

33684 """ Creates an event from EGLSync object.

33685

33686 Creates an event *phEvent from an EGLSyncKHR eglSync with the flages

33687 specified via `flags`. Valid flags include:

33688

33689 - :py:obj:`~.cudaEventDefault`: Default event creation flag.

33690

33691 - :py:obj:`~.cudaEventBlockingSync`: Specifies that the created event

33692 should use blocking synchronization. A CPU thread that uses

33693 :py:obj:`~.cudaEventSynchronize()` to wait on an event created with

33694 this flag will block until the event has actually been completed.

33695

33696 :py:obj:`~.cudaEventRecord` and TimingData are not supported for events

33697 created from EGLSync.

33698

33699 The EGLSyncKHR is an opaque handle to an EGL sync object. typedef void*

33700 EGLSyncKHR

33701

33702 Parameters

33703 ----------

33704 eglSync : :py:obj:`~.EGLSyncKHR`

33705 Opaque handle to EGLSync object

33706 flags : unsigned int

33707 Event creation flags

33708

33709 Returns

33710 -------

33711 cudaError_t

33712 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`

33713 phEvent : :py:obj:`~.cudaEvent_t`

33714 Returns newly created event

33715

33716 See Also

33717 --------

33718 :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`

33719 """

33720 cdef cyruntime.EGLSyncKHR cyeglSync

33721 if eglSync is None:

33722 peglSync = 0

33723 elif isinstance(eglSync, (EGLSyncKHR,)):

33724 peglSync = int(eglSync)

33725 else:

33726 peglSync = int(EGLSyncKHR(eglSync))

33727 cyeglSync = <cyruntime.EGLSyncKHR><void_ptr>peglSync

33728 cdef cudaEvent_t phEvent = cudaEvent_t()

33729 with nogil:

33730 err = cyruntime.cudaEventCreateFromEGLSync(<cyruntime.cudaEvent_t*>phEvent._pvt_ptr, cyeglSync, flags)

33731 if err != cyruntime.cudaSuccess:

33732 return (_dict_cudaError_t[err], None)

33733 return (_dict_cudaError_t[err], phEvent)

33734

33735@cython.embedsignature(True)

33736def cudaProfilerStart():

33737 """ Enable profiling.

33738

33739 Enables profile collection by the active profiling tool for the current

33740 context. If profiling is already enabled, then

33741 :py:obj:`~.cudaProfilerStart()` has no effect.

33742

33743 cudaProfilerStart and cudaProfilerStop APIs are used to

33744 programmatically control the profiling granularity by allowing

33745 profiling to be done only on selective pieces of code.

33746

33747 Returns

33748 -------

33749 cudaError_t

33750 :py:obj:`~.cudaSuccess`

33751

33752 See Also

33753 --------

33754 :py:obj:`~.cudaProfilerStop`, :py:obj:`~.cuProfilerStart`

33755 """

33756 with nogil:

33757 err = cyruntime.cudaProfilerStart()

33758 return (_dict_cudaError_t[err],)

33759

33760@cython.embedsignature(True)

33761def cudaProfilerStop():

33762 """ Disable profiling.

33763

33764 Disables profile collection by the active profiling tool for the

33765 current context. If profiling is already disabled, then

33766 :py:obj:`~.cudaProfilerStop()` has no effect.

33767

33768 cudaProfilerStart and cudaProfilerStop APIs are used to

33769 programmatically control the profiling granularity by allowing

33770 profiling to be done only on selective pieces of code.

33771

33772 Returns

33773 -------

33774 cudaError_t

33775 :py:obj:`~.cudaSuccess`

33776

33777 See Also

33778 --------

33779 :py:obj:`~.cudaProfilerStart`, :py:obj:`~.cuProfilerStop`

33780 """

33781 with nogil:

33782 err = cyruntime.cudaProfilerStop()

33783 return (_dict_cudaError_t[err],)

33784

33785@cython.embedsignature(True)

33786def cudaGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : cudaGLDeviceList):

33787 """ Gets the CUDA devices associated with the current OpenGL context.

33788

33789 Returns in `*pCudaDeviceCount` the number of CUDA-compatible devices

33790 corresponding to the current OpenGL context. Also returns in

33791 `*pCudaDevices` at most `cudaDeviceCount` of the CUDA-compatible

33792 devices corresponding to the current OpenGL context. If any of the GPUs

33793 being used by the current OpenGL context are not CUDA capable then the

33794 call will return cudaErrorNoDevice.

33795

33796 Parameters

33797 ----------

33798 cudaDeviceCount : unsigned int

33799 The size of the output device array `pCudaDevices`

33800 deviceList : cudaGLDeviceList

33801 The set of devices to return. This set may be cudaGLDeviceListAll

33802 for all devices, cudaGLDeviceListCurrentFrame for the devices used

33803 to render the current frame (in SLI), or cudaGLDeviceListNextFrame

33804 for the devices used to render the next frame (in SLI).

33805

33806 Returns

33807 -------

33808 cudaError_t

33809 cudaSuccess

33810 cudaErrorNoDevice

33811 cudaErrorInvalidGraphicsContext

33812 cudaErrorUnknown

33813 pCudaDeviceCount : unsigned int

33814 Returned number of CUDA devices corresponding to the current OpenGL

33815 context

33816 pCudaDevices : list[int]

33817 Returned CUDA devices corresponding to the current OpenGL context

33818

33819 See Also

33820 --------

33821 ~.cudaGraphicsUnregisterResource

33822 ~.cudaGraphicsMapResources

33823 ~.cudaGraphicsSubResourceGetMappedArray

33824 ~.cudaGraphicsResourceGetMappedPointer

33825 ~.cuGLGetDevices

33826

33827 Notes

33828 -----

33829 This function is not supported on Mac OS X.

33830

33831 """

33832 cdef unsigned int pCudaDeviceCount = 0

33833 cdef int* cypCudaDevices = NULL

33834 pypCudaDevices = []

33835 if cudaDeviceCount != 0:

33836 cypCudaDevices = <int*>calloc(cudaDeviceCount, sizeof(int))

33837 if cypCudaDevices is NULL:

33838 raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(int)))

33839 cdef cyruntime.cudaGLDeviceList cydeviceList = deviceList.value

33840 with nogil:

33841 err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList)

33842 if cudaError_t(err) == cudaError_t(0):

33843 pypCudaDevices = [<void_ptr>cypCudaDevices[idx] for idx in range(cudaDeviceCount)]

33844 if cypCudaDevices is not NULL:

33845 free(cypCudaDevices)

33846 if err != cyruntime.cudaSuccess:

33847 return (_dict_cudaError_t[err], None, None)

33848 return (_dict_cudaError_t[err], pCudaDeviceCount, pypCudaDevices)

33849

33850@cython.embedsignature(True)

33851def cudaGraphicsGLRegisterImage(image, target, unsigned int flags):

33852 """ Register an OpenGL texture or renderbuffer object.

33853

33854 Registers the texture or renderbuffer object specified by `image` for

33855 access by CUDA. A handle to the registered object is returned as

33856 `resource`.

33857

33858 `target` must match the type of the object, and must be one of

33859 :py:obj:`~.GL_TEXTURE_2D`, :py:obj:`~.GL_TEXTURE_RECTANGLE`,

33860 :py:obj:`~.GL_TEXTURE_CUBE_MAP`, :py:obj:`~.GL_TEXTURE_3D`,

33861 :py:obj:`~.GL_TEXTURE_2D_ARRAY`, or :py:obj:`~.GL_RENDERBUFFER`.

33862

33863 The register flags `flags` specify the intended usage, as follows:

33864

33865 - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about

33866 how this resource will be used. It is therefore assumed that this

33867 resource will be read from and written to by CUDA. This is the

33868 default value.

33869

33870 - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA

33871 will not write to this resource.

33872

33873 - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that

33874 CUDA will not read from this resource and will write over the entire

33875 contents of the resource, so none of the data previously stored in

33876 the resource will be preserved.

33877

33878 - :py:obj:`~.cudaGraphicsRegisterFlagsSurfaceLoadStore`: Specifies that

33879 CUDA will bind this resource to a surface reference.

33880

33881 - :py:obj:`~.cudaGraphicsRegisterFlagsTextureGather`: Specifies that

33882 CUDA will perform texture gather operations on this resource.

33883

33884 The following image formats are supported. For brevity's sake, the list

33885 is abbreviated. For ex., {GL_R, GL_RG} X {8, 16} would expand to the

33886 following 4 formats {GL_R8, GL_R16, GL_RG8, GL_RG16} :

33887

33888 - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA,

33889 GL_INTENSITY

33890

33891 - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I,

33892 32I}

33893

33894 - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16,

33895 16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT,

33896 32I_EXT}

33897

33898 The following image classes are currently disallowed:

33899

33900 - Textures with borders

33901

33902 - Multisampled renderbuffers

33903

33904 Parameters

33905 ----------

33906 image : :py:obj:`~.GLuint`

33907 name of texture or renderbuffer object to be registered

33908 target : :py:obj:`~.GLenum`

33909 Identifies the type of object specified by `image`

33910 flags : unsigned int

33911 Register flags

33912

33913 Returns

33914 -------

33915 cudaError_t

33916 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`, :py:obj:`~.cudaErrorUnknown`

33917 resource : :py:obj:`~.cudaGraphicsResource`

33918 Pointer to the returned object handle

33919

33920 See Also

33921 --------

33922 :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsGLRegisterImage`

33923 """

33924 cdef cyruntime.GLenum cytarget

33925 if target is None:

33926 ptarget = 0

33927 elif isinstance(target, (GLenum,)):

33928 ptarget = int(target)

33929 else:

33930 ptarget = int(GLenum(target))

33931 cytarget = <cyruntime.GLenum><void_ptr>ptarget

33932 cdef cyruntime.GLuint cyimage

33933 if image is None:

33934 pimage = 0

33935 elif isinstance(image, (GLuint,)):

33936 pimage = int(image)

33937 else:

33938 pimage = int(GLuint(image))

33939 cyimage = <cyruntime.GLuint><void_ptr>pimage

33940 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()

33941 with nogil:

33942 err = cyruntime.cudaGraphicsGLRegisterImage(resource._pvt_ptr, cyimage, cytarget, flags)

33943 if err != cyruntime.cudaSuccess:

33944 return (_dict_cudaError_t[err], None)

33945 return (_dict_cudaError_t[err], resource)

33946

33947@cython.embedsignature(True)

33948def cudaGraphicsGLRegisterBuffer(buffer, unsigned int flags):

33949 """ Registers an OpenGL buffer object.

33950

33951 Registers the buffer object specified by `buffer` for access by CUDA. A

33952 handle to the registered object is returned as `resource`. The register

33953 flags `flags` specify the intended usage, as follows:

33954

33955 - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about

33956 how this resource will be used. It is therefore assumed that this

33957 resource will be read from and written to by CUDA. This is the

33958 default value.

33959

33960 - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA

33961 will not write to this resource.

33962

33963 - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that

33964 CUDA will not read from this resource and will write over the entire

33965 contents of the resource, so none of the data previously stored in

33966 the resource will be preserved.

33967

33968 Parameters

33969 ----------

33970 buffer : :py:obj:`~.GLuint`

33971 name of buffer object to be registered

33972 flags : unsigned int

33973 Register flags

33974

33975 Returns

33976 -------

33977 cudaError_t

33978 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`, :py:obj:`~.cudaErrorUnknown`

33979 resource : :py:obj:`~.cudaGraphicsResource`

33980 Pointer to the returned object handle

33981

33982 See Also

33983 --------

33984 :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsGLRegisterBuffer`

33985 """

33986 cdef cyruntime.GLuint cybuffer

33987 if buffer is None:

33988 pbuffer = 0

33989 elif isinstance(buffer, (GLuint,)):

33990 pbuffer = int(buffer)

33991 else:

33992 pbuffer = int(GLuint(buffer))

33993 cybuffer = <cyruntime.GLuint><void_ptr>pbuffer

33994 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()

33995 with nogil:

33996 err = cyruntime.cudaGraphicsGLRegisterBuffer(resource._pvt_ptr, cybuffer, flags)

33997 if err != cyruntime.cudaSuccess:

33998 return (_dict_cudaError_t[err], None)

33999 return (_dict_cudaError_t[err], resource)

34000

34001@cython.embedsignature(True)

34002def cudaVDPAUGetDevice(vdpDevice, vdpGetProcAddress):

34003 """ Gets the CUDA device associated with a VdpDevice.

34004

34005 Returns the CUDA device associated with a VdpDevice, if applicable.

34006

34007 Parameters

34008 ----------

34009 vdpDevice : :py:obj:`~.VdpDevice`

34010 A VdpDevice handle

34011 vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`

34012 VDPAU's VdpGetProcAddress function pointer

34013

34014 Returns

34015 -------

34016 cudaError_t

34017 :py:obj:`~.cudaSuccess`

34018 device : int

34019 Returns the device associated with vdpDevice, or -1 if the device

34020 associated with vdpDevice is not a compute device.

34021

34022 See Also

34023 --------

34024 :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cuVDPAUGetDevice`

34025 """

34026 cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress

34027 if vdpGetProcAddress is None:

34028 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>NULL

34029 elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):

34030 pvdpGetProcAddress = vdpGetProcAddress.getPtr()

34031 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress

34032 elif isinstance(vdpGetProcAddress, (int)):

34033 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>vdpGetProcAddress

34034 else:

34035 raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, runtime.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))

34036 cdef cyruntime.VdpDevice cyvdpDevice

34037 if vdpDevice is None:

34038 pvdpDevice = 0

34039 elif isinstance(vdpDevice, (VdpDevice,)):

34040 pvdpDevice = int(vdpDevice)

34041 else:

34042 pvdpDevice = int(VdpDevice(vdpDevice))

34043 cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice

34044 cdef int device = 0

34045 with nogil:

34046 err = cyruntime.cudaVDPAUGetDevice(&device, cyvdpDevice, cyvdpGetProcAddress)

34047 if err != cyruntime.cudaSuccess:

34048 return (_dict_cudaError_t[err], None)

34049 return (_dict_cudaError_t[err], device)

34050

34051@cython.embedsignature(True)

34052def cudaVDPAUSetVDPAUDevice(int device, vdpDevice, vdpGetProcAddress):

34053 """ Sets a CUDA device to use VDPAU interoperability.

34054

34055 Records `vdpDevice` as the VdpDevice for VDPAU interoperability with

34056 the CUDA device `device` and sets `device` as the current device for

34057 the calling host thread.

34058

34059 This function will immediately initialize the primary context on

34060 `device` if needed.

34061

34062 If `device` has already been initialized then this call will fail with

34063 the error :py:obj:`~.cudaErrorSetOnActiveProcess`. In this case it is

34064 necessary to reset `device` using :py:obj:`~.cudaDeviceReset()` before

34065 VDPAU interoperability on `device` may be enabled.

34066

34067 Parameters

34068 ----------

34069 device : int

34070 Device to use for VDPAU interoperability

34071 vdpDevice : :py:obj:`~.VdpDevice`

34072 The VdpDevice to interoperate with

34073 vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`

34074 VDPAU's VdpGetProcAddress function pointer

34075

34076 Returns

34077 -------

34078 cudaError_t

34079 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`

34080

34081 See Also

34082 --------

34083 :py:obj:`~.cudaGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cudaGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cudaDeviceReset`

34084 """

34085 cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress

34086 if vdpGetProcAddress is None:

34087 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>NULL

34088 elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):

34089 pvdpGetProcAddress = vdpGetProcAddress.getPtr()

34090 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress

34091 elif isinstance(vdpGetProcAddress, (int)):

34092 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>vdpGetProcAddress

34093 else:

34094 raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, runtime.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))

34095 cdef cyruntime.VdpDevice cyvdpDevice

34096 if vdpDevice is None:

34097 pvdpDevice = 0

34098 elif isinstance(vdpDevice, (VdpDevice,)):

34099 pvdpDevice = int(vdpDevice)

34100 else:

34101 pvdpDevice = int(VdpDevice(vdpDevice))

34102 cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice

34103 with nogil:

34104 err = cyruntime.cudaVDPAUSetVDPAUDevice(device, cyvdpDevice, cyvdpGetProcAddress)

34105 return (_dict_cudaError_t[err],)

34106

34107@cython.embedsignature(True)

34108def cudaGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags):

34109 """ Register a VdpVideoSurface object.

34110

34111 Registers the VdpVideoSurface specified by `vdpSurface` for access by

34112 CUDA. A handle to the registered object is returned as `resource`. The

34113 surface's intended usage is specified using `flags`, as follows:

34114

34115 - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how

34116 this resource will be used. It is therefore assumed that this

34117 resource will be read from and written to by CUDA. This is the

34118 default value.

34119

34120 - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will

34121 not write to this resource.

34122

34123 - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies that CUDA

34124 will not read from this resource and will write over the entire

34125 contents of the resource, so none of the data previously stored in

34126 the resource will be preserved.

34127

34128 Parameters

34129 ----------

34130 vdpSurface : :py:obj:`~.VdpVideoSurface`

34131 VDPAU object to be registered

34132 flags : unsigned int

34133 Map flags

34134

34135 Returns

34136 -------

34137 cudaError_t

34138 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

34139 resource : :py:obj:`~.cudaGraphicsResource`

34140 Pointer to the returned object handle

34141

34142 See Also

34143 --------

34144 :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`

34145 """

34146 cdef cyruntime.VdpVideoSurface cyvdpSurface

34147 if vdpSurface is None:

34148 pvdpSurface = 0

34149 elif isinstance(vdpSurface, (VdpVideoSurface,)):

34150 pvdpSurface = int(vdpSurface)

34151 else:

34152 pvdpSurface = int(VdpVideoSurface(vdpSurface))

34153 cyvdpSurface = <cyruntime.VdpVideoSurface><void_ptr>pvdpSurface

34154 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()

34155 with nogil:

34156 err = cyruntime.cudaGraphicsVDPAURegisterVideoSurface(resource._pvt_ptr, cyvdpSurface, flags)

34157 if err != cyruntime.cudaSuccess:

34158 return (_dict_cudaError_t[err], None)

34159 return (_dict_cudaError_t[err], resource)

34160

34161@cython.embedsignature(True)

34162def cudaGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags):

34163 """ Register a VdpOutputSurface object.

34164

34165 Registers the VdpOutputSurface specified by `vdpSurface` for access by

34166 CUDA. A handle to the registered object is returned as `resource`. The

34167 surface's intended usage is specified using `flags`, as follows:

34168

34169 - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how

34170 this resource will be used. It is therefore assumed that this

34171 resource will be read from and written to by CUDA. This is the

34172 default value.

34173

34174 - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will

34175 not write to this resource.

34176

34177 - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies that CUDA

34178 will not read from this resource and will write over the entire

34179 contents of the resource, so none of the data previously stored in

34180 the resource will be preserved.

34181

34182 Parameters

34183 ----------

34184 vdpSurface : :py:obj:`~.VdpOutputSurface`

34185 VDPAU object to be registered

34186 flags : unsigned int

34187 Map flags

34188

34189 Returns

34190 -------

34191 cudaError_t

34192 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`

34193 resource : :py:obj:`~.cudaGraphicsResource`

34194 Pointer to the returned object handle

34195

34196 See Also

34197 --------

34198 :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`

34199 """

34200 cdef cyruntime.VdpOutputSurface cyvdpSurface

34201 if vdpSurface is None:

34202 pvdpSurface = 0

34203 elif isinstance(vdpSurface, (VdpOutputSurface,)):

34204 pvdpSurface = int(vdpSurface)

34205 else:

34206 pvdpSurface = int(VdpOutputSurface(vdpSurface))

34207 cyvdpSurface = <cyruntime.VdpOutputSurface><void_ptr>pvdpSurface

34208 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()

34209 with nogil:

34210 err = cyruntime.cudaGraphicsVDPAURegisterOutputSurface(resource._pvt_ptr, cyvdpSurface, flags)

34211 if err != cyruntime.cudaSuccess:

34212 return (_dict_cudaError_t[err], None)

34213 return (_dict_cudaError_t[err], resource)

34214

34215

34216@cython.embedsignature(True)

34217def getLocalRuntimeVersion():

34218 """ Returns the CUDA Runtime version of local shared library.

34219

34220 Returns in `*runtimeVersion` the version number of the current CUDA

34221 Runtime instance. The version is returned as (1000 * major + 10 *

34222 minor). For example, CUDA 9.2 would be represented by 9020.

34223

34224 As of CUDA 12.0, this function no longer initializes CUDA. The purpose

34225 of this API is solely to return a compile-time constant stating the

34226 CUDA Toolkit version in the above format.

34227

34228 This function automatically returns :py:obj:`~.cudaErrorInvalidValue`

34229 if the `runtimeVersion` argument is NULL.

34230

34231 Returns

34232 -------

34233 cudaError_t

34234 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`

34235 runtimeVersion : int

34236 Returns the CUDA Runtime version.

34237

34238 See Also

34239 --------

34240 :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion`

34241 """

34242 cdef int runtimeVersion = 0

34243 err = cyruntime.getLocalRuntimeVersion(&runtimeVersion)

34244 return (cudaError_t(err), runtimeVersion)

34245

34246

34247cdef class cudaBindingsRuntimeGlobal:

34248 cdef map[void_ptr, void*] _allocated

34249

34250 def __dealloc__(self):

34251 for item in self._allocated:

34252 free(item.second)

34253 self._allocated.clear()

34254

34255cdef cudaBindingsRuntimeGlobal m_global = cudaBindingsRuntimeGlobal()

34256

34257

34258@cython.embedsignature(True)

34259def sizeof(objType):

34260 """ Returns the size of provided CUDA Python structure in bytes

34261

34262 Parameters

34263 ----------

34264 objType : Any

34265 CUDA Python object

34266

34267 Returns

34268 -------

34269 lowered_name : int

34270 The size of `objType` in bytes

34271 """

34272 if objType == dim3:

34273 return sizeof(cyruntime.dim3)

34274 if objType == cudaDevResourceDesc_t:

34275 return sizeof(cyruntime.cudaDevResourceDesc_t)

34276 if objType == cudaExecutionContext_t:

34277 return sizeof(cyruntime.cudaExecutionContext_t)

34278 if objType == cudaChannelFormatDesc:

34279 return sizeof(cyruntime.cudaChannelFormatDesc)

34280 if objType == cudaArray_t:

34281 return sizeof(cyruntime.cudaArray_t)

34282 if objType == cudaArray_const_t:

34283 return sizeof(cyruntime.cudaArray_const_t)

34284 if objType == cudaMipmappedArray_t:

34285 return sizeof(cyruntime.cudaMipmappedArray_t)

34286 if objType == cudaMipmappedArray_const_t:

34287 return sizeof(cyruntime.cudaMipmappedArray_const_t)

34288 if objType == cudaArraySparseProperties:

34289 return sizeof(cyruntime.cudaArraySparseProperties)

34290 if objType == cudaArrayMemoryRequirements:

34291 return sizeof(cyruntime.cudaArrayMemoryRequirements)

34292 if objType == cudaPitchedPtr:

34293 return sizeof(cyruntime.cudaPitchedPtr)

34294 if objType == cudaExtent:

34295 return sizeof(cyruntime.cudaExtent)

34296 if objType == cudaPos:

34297 return sizeof(cyruntime.cudaPos)

34298 if objType == cudaMemcpy3DParms:

34299 return sizeof(cyruntime.cudaMemcpy3DParms)

34300 if objType == cudaMemcpyNodeParams:

34301 return sizeof(cyruntime.cudaMemcpyNodeParams)

34302 if objType == cudaMemcpy3DPeerParms:

34303 return sizeof(cyruntime.cudaMemcpy3DPeerParms)

34304 if objType == cudaMemsetParams:

34305 return sizeof(cyruntime.cudaMemsetParams)

34306 if objType == cudaMemsetParamsV2:

34307 return sizeof(cyruntime.cudaMemsetParamsV2)

34308 if objType == cudaAccessPolicyWindow:

34309 return sizeof(cyruntime.cudaAccessPolicyWindow)

34310 if objType == cudaHostFn_t:

34311 return sizeof(cyruntime.cudaHostFn_t)

34312 if objType == cudaHostNodeParams:

34313 return sizeof(cyruntime.cudaHostNodeParams)

34314 if objType == cudaHostNodeParamsV2:

34315 return sizeof(cyruntime.cudaHostNodeParamsV2)

34316 if objType == cudaResourceDesc:

34317 return sizeof(cyruntime.cudaResourceDesc)

34318 if objType == cudaResourceViewDesc:

34319 return sizeof(cyruntime.cudaResourceViewDesc)

34320 if objType == cudaPointerAttributes:

34321 return sizeof(cyruntime.cudaPointerAttributes)

34322 if objType == cudaFuncAttributes:

34323 return sizeof(cyruntime.cudaFuncAttributes)

34324 if objType == cudaMemLocation:

34325 return sizeof(cyruntime.cudaMemLocation)

34326 if objType == cudaMemAccessDesc:

34327 return sizeof(cyruntime.cudaMemAccessDesc)

34328 if objType == cudaMemPoolProps:

34329 return sizeof(cyruntime.cudaMemPoolProps)

34330 if objType == cudaMemPoolPtrExportData:

34331 return sizeof(cyruntime.cudaMemPoolPtrExportData)

34332 if objType == cudaMemAllocNodeParams:

34333 return sizeof(cyruntime.cudaMemAllocNodeParams)

34334 if objType == cudaMemAllocNodeParamsV2:

34335 return sizeof(cyruntime.cudaMemAllocNodeParamsV2)

34336 if objType == cudaMemFreeNodeParams:

34337 return sizeof(cyruntime.cudaMemFreeNodeParams)

34338 if objType == cudaMemcpyAttributes:

34339 return sizeof(cyruntime.cudaMemcpyAttributes)

34340 if objType == cudaOffset3D:

34341 return sizeof(cyruntime.cudaOffset3D)

34342 if objType == cudaMemcpy3DOperand:

34343 return sizeof(cyruntime.cudaMemcpy3DOperand)

34344 if objType == cudaMemcpy3DBatchOp:

34345 return sizeof(cyruntime.cudaMemcpy3DBatchOp)

34346 if objType == CUuuid_st:

34347 return sizeof(cyruntime.CUuuid_st)

34348 if objType == CUuuid:

34349 return sizeof(cyruntime.CUuuid)

34350 if objType == cudaUUID_t:

34351 return sizeof(cyruntime.cudaUUID_t)

34352 if objType == cudaDeviceProp:

34353 return sizeof(cyruntime.cudaDeviceProp)

34354 if objType == cudaIpcEventHandle_st:

34355 return sizeof(cyruntime.cudaIpcEventHandle_st)

34356 if objType == cudaIpcEventHandle_t:

34357 return sizeof(cyruntime.cudaIpcEventHandle_t)

34358 if objType == cudaIpcMemHandle_st:

34359 return sizeof(cyruntime.cudaIpcMemHandle_st)

34360 if objType == cudaIpcMemHandle_t:

34361 return sizeof(cyruntime.cudaIpcMemHandle_t)

34362 if objType == cudaMemFabricHandle_st:

34363 return sizeof(cyruntime.cudaMemFabricHandle_st)

34364 if objType == cudaMemFabricHandle_t:

34365 return sizeof(cyruntime.cudaMemFabricHandle_t)

34366 if objType == cudaExternalMemoryHandleDesc:

34367 return sizeof(cyruntime.cudaExternalMemoryHandleDesc)

34368 if objType == cudaExternalMemoryBufferDesc:

34369 return sizeof(cyruntime.cudaExternalMemoryBufferDesc)

34370 if objType == cudaExternalMemoryMipmappedArrayDesc:

34371 return sizeof(cyruntime.cudaExternalMemoryMipmappedArrayDesc)

34372 if objType == cudaExternalSemaphoreHandleDesc:

34373 return sizeof(cyruntime.cudaExternalSemaphoreHandleDesc)

34374 if objType == cudaExternalSemaphoreSignalParams:

34375 return sizeof(cyruntime.cudaExternalSemaphoreSignalParams)

34376 if objType == cudaExternalSemaphoreWaitParams:

34377 return sizeof(cyruntime.cudaExternalSemaphoreWaitParams)

34378 if objType == cudaDevSmResource:

34379 return sizeof(cyruntime.cudaDevSmResource)

34380 if objType == cudaDevWorkqueueConfigResource:

34381 return sizeof(cyruntime.cudaDevWorkqueueConfigResource)

34382 if objType == cudaDevWorkqueueResource:

34383 return sizeof(cyruntime.cudaDevWorkqueueResource)

34384 if objType == cudaDevSmResourceGroupParams_st:

34385 return sizeof(cyruntime.cudaDevSmResourceGroupParams_st)

34386 if objType == cudaDevSmResourceGroupParams:

34387 return sizeof(cyruntime.cudaDevSmResourceGroupParams)

34388 if objType == cudaDevResource_st:

34389 return sizeof(cyruntime.cudaDevResource_st)

34390 if objType == cudaDevResource:

34391 return sizeof(cyruntime.cudaDevResource)

34392 if objType == cudaStream_t:

34393 return sizeof(cyruntime.cudaStream_t)

34394 if objType == cudaEvent_t:

34395 return sizeof(cyruntime.cudaEvent_t)

34396 if objType == cudaGraphicsResource_t:

34397 return sizeof(cyruntime.cudaGraphicsResource_t)

34398 if objType == cudaExternalMemory_t:

34399 return sizeof(cyruntime.cudaExternalMemory_t)

34400 if objType == cudaExternalSemaphore_t:

34401 return sizeof(cyruntime.cudaExternalSemaphore_t)

34402 if objType == cudaGraph_t:

34403 return sizeof(cyruntime.cudaGraph_t)

34404 if objType == cudaGraphNode_t:

34405 return sizeof(cyruntime.cudaGraphNode_t)

34406 if objType == cudaUserObject_t:

34407 return sizeof(cyruntime.cudaUserObject_t)

34408 if objType == cudaGraphConditionalHandle:

34409 return sizeof(cyruntime.cudaGraphConditionalHandle)

34410 if objType == cudaFunction_t:

34411 return sizeof(cyruntime.cudaFunction_t)

34412 if objType == cudaKernel_t:

34413 return sizeof(cyruntime.cudaKernel_t)

34414 if objType == cudalibraryHostUniversalFunctionAndDataTable:

34415 return sizeof(cyruntime.cudalibraryHostUniversalFunctionAndDataTable)

34416 if objType == cudaLibrary_t:

34417 return sizeof(cyruntime.cudaLibrary_t)

34418 if objType == cudaMemPool_t:

34419 return sizeof(cyruntime.cudaMemPool_t)

34420 if objType == cudaKernelNodeParams:

34421 return sizeof(cyruntime.cudaKernelNodeParams)

34422 if objType == cudaKernelNodeParamsV2:

34423 return sizeof(cyruntime.cudaKernelNodeParamsV2)

34424 if objType == cudaExternalSemaphoreSignalNodeParams:

34425 return sizeof(cyruntime.cudaExternalSemaphoreSignalNodeParams)

34426 if objType == cudaExternalSemaphoreSignalNodeParamsV2:

34427 return sizeof(cyruntime.cudaExternalSemaphoreSignalNodeParamsV2)

34428 if objType == cudaExternalSemaphoreWaitNodeParams:

34429 return sizeof(cyruntime.cudaExternalSemaphoreWaitNodeParams)

34430 if objType == cudaExternalSemaphoreWaitNodeParamsV2:

34431 return sizeof(cyruntime.cudaExternalSemaphoreWaitNodeParamsV2)

34432 if objType == cudaConditionalNodeParams:

34433 return sizeof(cyruntime.cudaConditionalNodeParams)

34434 if objType == cudaChildGraphNodeParams:

34435 return sizeof(cyruntime.cudaChildGraphNodeParams)

34436 if objType == cudaEventRecordNodeParams:

34437 return sizeof(cyruntime.cudaEventRecordNodeParams)

34438 if objType == cudaEventWaitNodeParams:

34439 return sizeof(cyruntime.cudaEventWaitNodeParams)

34440 if objType == cudaGraphNodeParams:

34441 return sizeof(cyruntime.cudaGraphNodeParams)

34442 if objType == cudaGraphEdgeData_st:

34443 return sizeof(cyruntime.cudaGraphEdgeData_st)

34444 if objType == cudaGraphEdgeData:

34445 return sizeof(cyruntime.cudaGraphEdgeData)

34446 if objType == cudaGraphExec_t:

34447 return sizeof(cyruntime.cudaGraphExec_t)

34448 if objType == cudaGraphInstantiateParams_st:

34449 return sizeof(cyruntime.cudaGraphInstantiateParams_st)

34450 if objType == cudaGraphInstantiateParams:

34451 return sizeof(cyruntime.cudaGraphInstantiateParams)

34452 if objType == cudaGraphExecUpdateResultInfo_st:

34453 return sizeof(cyruntime.cudaGraphExecUpdateResultInfo_st)

34454 if objType == cudaGraphExecUpdateResultInfo:

34455 return sizeof(cyruntime.cudaGraphExecUpdateResultInfo)

34456 if objType == cudaGraphDeviceNode_t:

34457 return sizeof(cyruntime.cudaGraphDeviceNode_t)

34458 if objType == cudaGraphKernelNodeUpdate:

34459 return sizeof(cyruntime.cudaGraphKernelNodeUpdate)

34460 if objType == cudaLaunchMemSyncDomainMap_st:

34461 return sizeof(cyruntime.cudaLaunchMemSyncDomainMap_st)

34462 if objType == cudaLaunchMemSyncDomainMap:

34463 return sizeof(cyruntime.cudaLaunchMemSyncDomainMap)

34464 if objType == cudaLaunchAttributeValue:

34465 return sizeof(cyruntime.cudaLaunchAttributeValue)

34466 if objType == cudaLaunchAttribute_st:

34467 return sizeof(cyruntime.cudaLaunchAttribute_st)

34468 if objType == cudaLaunchAttribute:

34469 return sizeof(cyruntime.cudaLaunchAttribute)

34470 if objType == cudaAsyncCallbackHandle_t:

34471 return sizeof(cyruntime.cudaAsyncCallbackHandle_t)

34472 if objType == cudaAsyncNotificationInfo:

34473 return sizeof(cyruntime.cudaAsyncNotificationInfo)

34474 if objType == cudaAsyncNotificationInfo_t:

34475 return sizeof(cyruntime.cudaAsyncNotificationInfo_t)

34476 if objType == cudaAsyncCallback:

34477 return sizeof(cyruntime.cudaAsyncCallback)

34478 if objType == cudaLogsCallbackHandle:

34479 return sizeof(cyruntime.cudaLogsCallbackHandle)

34480 if objType == cudaLogIterator:

34481 return sizeof(cyruntime.cudaLogIterator)

34482 if objType == cudaSurfaceObject_t:

34483 return sizeof(cyruntime.cudaSurfaceObject_t)

34484 if objType == cudaTextureDesc:

34485 return sizeof(cyruntime.cudaTextureDesc)

34486 if objType == cudaTextureObject_t:

34487 return sizeof(cyruntime.cudaTextureObject_t)

34488 if objType == cudaStreamCallback_t:

34489 return sizeof(cyruntime.cudaStreamCallback_t)

34490 if objType == cudaLogsCallback_t:

34491 return sizeof(cyruntime.cudaLogsCallback_t)

34492 if objType == GLenum:

34493 return sizeof(cyruntime.GLenum)

34494 if objType == GLuint:

34495 return sizeof(cyruntime.GLuint)

34496 if objType == EGLImageKHR:

34497 return sizeof(cyruntime.EGLImageKHR)

34498 if objType == EGLStreamKHR:

34499 return sizeof(cyruntime.EGLStreamKHR)

34500 if objType == EGLint:

34501 return sizeof(cyruntime.EGLint)

34502 if objType == EGLSyncKHR:

34503 return sizeof(cyruntime.EGLSyncKHR)

34504 if objType == VdpDevice:

34505 return sizeof(cyruntime.VdpDevice)

34506 if objType == VdpGetProcAddress:

34507 return sizeof(cyruntime.VdpGetProcAddress)

34508 if objType == VdpVideoSurface:

34509 return sizeof(cyruntime.VdpVideoSurface)

34510 if objType == VdpOutputSurface:

34511 return sizeof(cyruntime.VdpOutputSurface)

34512 if objType == cudaStreamAttrValue:

34513 return sizeof(cyruntime.cudaStreamAttrValue)

34514 if objType == cudaKernelNodeAttrValue:

34515 return sizeof(cyruntime.cudaKernelNodeAttrValue)

34516 if objType == cudaEglPlaneDesc_st:

34517 return sizeof(cyruntime.cudaEglPlaneDesc_st)

34518 if objType == cudaEglPlaneDesc:

34519 return sizeof(cyruntime.cudaEglPlaneDesc)

34520 if objType == cudaEglFrame_st:

34521 return sizeof(cyruntime.cudaEglFrame_st)

34522 if objType == cudaEglFrame:

34523 return sizeof(cyruntime.cudaEglFrame)

34524 if objType == cudaEglStreamConnection:

34525 return sizeof(cyruntime.cudaEglStreamConnection)

34526 raise TypeError("Unknown type: " + str(objType))

34527

34528cdef int _add_native_handle_getters() except?-1:

34529 from cuda.bindings.utils import _add_cuda_native_handle_getter

34530 def cudaDevResourceDesc_t_getter(cudaDevResourceDesc_t x): return <uintptr_t><void*><cyruntime.cudaDevResourceDesc_t>(x._pvt_ptr[0])

34531 _add_cuda_native_handle_getter(cudaDevResourceDesc_t, cudaDevResourceDesc_t_getter)

34532 def cudaExecutionContext_t_getter(cudaExecutionContext_t x): return <uintptr_t><void*><cyruntime.cudaExecutionContext_t>(x._pvt_ptr[0])

34533 _add_cuda_native_handle_getter(cudaExecutionContext_t, cudaExecutionContext_t_getter)

34534 def cudaArray_t_getter(cudaArray_t x): return <uintptr_t><void*><cyruntime.cudaArray_t>(x._pvt_ptr[0])

34535 _add_cuda_native_handle_getter(cudaArray_t, cudaArray_t_getter)

34536 def cudaArray_const_t_getter(cudaArray_const_t x): return <uintptr_t><void*><cyruntime.cudaArray_const_t>(x._pvt_ptr[0])

34537 _add_cuda_native_handle_getter(cudaArray_const_t, cudaArray_const_t_getter)

34538 def cudaMipmappedArray_t_getter(cudaMipmappedArray_t x): return <uintptr_t><void*><cyruntime.cudaMipmappedArray_t>(x._pvt_ptr[0])

34539 _add_cuda_native_handle_getter(cudaMipmappedArray_t, cudaMipmappedArray_t_getter)

34540 def cudaMipmappedArray_const_t_getter(cudaMipmappedArray_const_t x): return <uintptr_t><void*><cyruntime.cudaMipmappedArray_const_t>(x._pvt_ptr[0])

34541 _add_cuda_native_handle_getter(cudaMipmappedArray_const_t, cudaMipmappedArray_const_t_getter)

34542 def cudaStream_t_getter(cudaStream_t x): return <uintptr_t><void*><cyruntime.cudaStream_t>(x._pvt_ptr[0])

34543 _add_cuda_native_handle_getter(cudaStream_t, cudaStream_t_getter)

34544 def cudaEvent_t_getter(cudaEvent_t x): return <uintptr_t><void*><cyruntime.cudaEvent_t>(x._pvt_ptr[0])

34545 _add_cuda_native_handle_getter(cudaEvent_t, cudaEvent_t_getter)

34546 def cudaGraphicsResource_t_getter(cudaGraphicsResource_t x): return <uintptr_t><void*><cyruntime.cudaGraphicsResource_t>(x._pvt_ptr[0])

34547 _add_cuda_native_handle_getter(cudaGraphicsResource_t, cudaGraphicsResource_t_getter)

34548 def cudaExternalMemory_t_getter(cudaExternalMemory_t x): return <uintptr_t><void*><cyruntime.cudaExternalMemory_t>(x._pvt_ptr[0])

34549 _add_cuda_native_handle_getter(cudaExternalMemory_t, cudaExternalMemory_t_getter)

34550 def cudaExternalSemaphore_t_getter(cudaExternalSemaphore_t x): return <uintptr_t><void*><cyruntime.cudaExternalSemaphore_t>(x._pvt_ptr[0])

34551 _add_cuda_native_handle_getter(cudaExternalSemaphore_t, cudaExternalSemaphore_t_getter)

34552 def cudaGraph_t_getter(cudaGraph_t x): return <uintptr_t><void*><cyruntime.cudaGraph_t>(x._pvt_ptr[0])

34553 _add_cuda_native_handle_getter(cudaGraph_t, cudaGraph_t_getter)

34554 def cudaGraphNode_t_getter(cudaGraphNode_t x): return <uintptr_t><void*><cyruntime.cudaGraphNode_t>(x._pvt_ptr[0])

34555 _add_cuda_native_handle_getter(cudaGraphNode_t, cudaGraphNode_t_getter)

34556 def cudaUserObject_t_getter(cudaUserObject_t x): return <uintptr_t><void*><cyruntime.cudaUserObject_t>(x._pvt_ptr[0])

34557 _add_cuda_native_handle_getter(cudaUserObject_t, cudaUserObject_t_getter)

34558 def cudaFunction_t_getter(cudaFunction_t x): return <uintptr_t><void*><cyruntime.cudaFunction_t>(x._pvt_ptr[0])

34559 _add_cuda_native_handle_getter(cudaFunction_t, cudaFunction_t_getter)

34560 def cudaKernel_t_getter(cudaKernel_t x): return <uintptr_t><void*><cyruntime.cudaKernel_t>(x._pvt_ptr[0])

34561 _add_cuda_native_handle_getter(cudaKernel_t, cudaKernel_t_getter)

34562 def cudaLibrary_t_getter(cudaLibrary_t x): return <uintptr_t><void*><cyruntime.cudaLibrary_t>(x._pvt_ptr[0])

34563 _add_cuda_native_handle_getter(cudaLibrary_t, cudaLibrary_t_getter)

34564 def cudaMemPool_t_getter(cudaMemPool_t x): return <uintptr_t><void*><cyruntime.cudaMemPool_t>(x._pvt_ptr[0])

34565 _add_cuda_native_handle_getter(cudaMemPool_t, cudaMemPool_t_getter)

34566 def cudaGraphExec_t_getter(cudaGraphExec_t x): return <uintptr_t><void*><cyruntime.cudaGraphExec_t>(x._pvt_ptr[0])

34567 _add_cuda_native_handle_getter(cudaGraphExec_t, cudaGraphExec_t_getter)

34568 def cudaGraphDeviceNode_t_getter(cudaGraphDeviceNode_t x): return <uintptr_t><void*><cyruntime.cudaGraphDeviceNode_t>(x._pvt_ptr[0])

34569 _add_cuda_native_handle_getter(cudaGraphDeviceNode_t, cudaGraphDeviceNode_t_getter)

34570 def cudaAsyncCallbackHandle_t_getter(cudaAsyncCallbackHandle_t x): return <uintptr_t><void*><cyruntime.cudaAsyncCallbackHandle_t>(x._pvt_ptr[0])

34571 _add_cuda_native_handle_getter(cudaAsyncCallbackHandle_t, cudaAsyncCallbackHandle_t_getter)

34572 def cudaLogsCallbackHandle_getter(cudaLogsCallbackHandle x): return <uintptr_t><void*><cyruntime.cudaLogsCallbackHandle>(x._pvt_ptr[0])

34573 _add_cuda_native_handle_getter(cudaLogsCallbackHandle, cudaLogsCallbackHandle_getter)

34574 def EGLImageKHR_getter(EGLImageKHR x): return <uintptr_t><void*><cyruntime.EGLImageKHR>(x._pvt_ptr[0])

34575 _add_cuda_native_handle_getter(EGLImageKHR, EGLImageKHR_getter)

34576 def EGLStreamKHR_getter(EGLStreamKHR x): return <uintptr_t><void*><cyruntime.EGLStreamKHR>(x._pvt_ptr[0])

34577 _add_cuda_native_handle_getter(EGLStreamKHR, EGLStreamKHR_getter)

34578 def EGLSyncKHR_getter(EGLSyncKHR x): return <uintptr_t><void*><cyruntime.EGLSyncKHR>(x._pvt_ptr[0])

34579 _add_cuda_native_handle_getter(EGLSyncKHR, EGLSyncKHR_getter)

34580 def cudaEglStreamConnection_getter(cudaEglStreamConnection x): return <uintptr_t><void*><cyruntime.cudaEglStreamConnection>(x._pvt_ptr[0])

34581 _add_cuda_native_handle_getter(cudaEglStreamConnection, cudaEglStreamConnection_getter)

34582 return 0

34583_add_native_handle_getters()

34584

Coverage for cuda / bindings / runtime.pyx: 27%

10974 statements