Coverage for cuda / core / experimental / _device.pyx: 92%
413 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5cimport cpython
6from libc.stdint cimport uintptr_t
8from cuda.bindings cimport cydriver
9from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
11import threading
12from typing import Optional, TYPE_CHECKING, Union
14from cuda.core.experimental._context import Context, ContextOptions
15from cuda.core.experimental._event import Event, EventOptions
16from cuda.core.experimental._graph import GraphBuilder
17from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions
18from cuda.core.experimental._utils.clear_error_support import assert_type
19from cuda.core.experimental._utils.cuda_utils import (
20 ComputeCapability,
21 CUDAError,
22 driver,
23 handle_return,
24 runtime,
25)
26from cuda.core.experimental._stream cimport default_stream
28if TYPE_CHECKING:
29 from cuda.core.experimental._memory import Buffer, MemoryResource
31# TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python,
32# but it seems it is very convenient to expose them for testing purposes...
33_tls = threading.local()
34_lock = threading.Lock()
35cdef bint _is_cuInit = False
38cdef class DeviceProperties:
39 """
40 A class to query various attributes of a CUDA device.
42 Attributes are read-only and provide information about the device.
43 """
44 cdef:
45 int _handle
46 dict _cache
48 def __init__(self, *args, **kwargs):
49 raise RuntimeError("DeviceProperties cannot be instantiated directly. Please use Device APIs.")
51 @classmethod
52 def _init(cls, handle):
53 cdef DeviceProperties self = DeviceProperties.__new__(cls)
54 self._handle = handle
55 self._cache = {}
56 return self
58 cdef inline _get_attribute(self, cydriver.CUdevice_attribute attr):
59 """Retrieve the attribute value directly from the driver."""
60 cdef int val
61 with nogil:
62 HANDLE_RETURN(cydriver.cuDeviceGetAttribute(&val, attr, self._handle))
63 return val
65 cdef _get_cached_attribute(self, attr):
66 """Retrieve the attribute value, using cache if applicable."""
67 if attr not in self._cache:
68 self._cache[attr] = self._get_attribute(attr)
69 return self._cache[attr]
71 @property
72 def max_threads_per_block(self) -> int:
73 """int: Maximum number of threads per block."""
74 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)
76 @property
77 def max_block_dim_x(self) -> int:
78 """int: Maximum block dimension X."""
79 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)
81 @property
82 def max_block_dim_y(self) -> int:
83 """int: Maximum block dimension Y."""
84 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)
86 @property
87 def max_block_dim_z(self) -> int:
88 """int: Maximum block dimension Z."""
89 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z)
91 @property
92 def max_grid_dim_x(self) -> int:
93 """int: Maximum grid dimension X."""
94 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X)
96 @property
97 def max_grid_dim_y(self) -> int:
98 """int: Maximum grid dimension Y."""
99 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y)
101 @property
102 def max_grid_dim_z(self) -> int:
103 """int: Maximum grid dimension Z."""
104 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z)
106 @property
107 def max_shared_memory_per_block(self) -> int:
108 """int: Maximum shared memory available per block in bytes."""
109 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK)
111 @property
112 def total_constant_memory(self) -> int:
113 """int: Memory available on device for constant variables in a CUDA C kernel in bytes."""
114 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY)
116 @property
117 def warp_size(self) -> int:
118 """int: Warp size in threads."""
119 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE)
121 @property
122 def max_pitch(self) -> int:
123 """int: Maximum pitch in bytes allowed by memory copies."""
124 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH)
126 @property
127 def maximum_texture1d_width(self) -> int:
128 """int: Maximum 1D texture width."""
129 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH)
131 @property
132 def maximum_texture1d_linear_width(self) -> int:
133 """int: Maximum width for a 1D texture bound to linear memory."""
134 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH)
136 @property
137 def maximum_texture1d_mipmapped_width(self) -> int:
138 """int: Maximum mipmapped 1D texture width."""
139 return self._get_cached_attribute(
140 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH
141 )
143 @property
144 def maximum_texture2d_width(self) -> int:
145 """int: Maximum 2D texture width."""
146 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH)
148 @property
149 def maximum_texture2d_height(self) -> int:
150 """int: Maximum 2D texture height."""
151 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT)
153 @property
154 def maximum_texture2d_linear_width(self) -> int:
155 """int: Maximum width for a 2D texture bound to linear memory."""
156 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH)
158 @property
159 def maximum_texture2d_linear_height(self) -> int:
160 """int: Maximum height for a 2D texture bound to linear memory."""
161 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT)
163 @property
164 def maximum_texture2d_linear_pitch(self) -> int:
165 """int: Maximum pitch in bytes for a 2D texture bound to linear memory."""
166 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH)
168 @property
169 def maximum_texture2d_mipmapped_width(self) -> int:
170 """int: Maximum mipmapped 2D texture width."""
171 return self._get_cached_attribute(
172 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH
173 )
175 @property
176 def maximum_texture2d_mipmapped_height(self) -> int:
177 """int: Maximum mipmapped 2D texture height."""
178 return self._get_cached_attribute(
179 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT
180 )
182 @property
183 def maximum_texture3d_width(self) -> int:
184 """int: Maximum 3D texture width."""
185 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH)
187 @property
188 def maximum_texture3d_height(self) -> int:
189 """int: Maximum 3D texture height."""
190 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT)
192 @property
193 def maximum_texture3d_depth(self) -> int:
194 """int: Maximum 3D texture depth."""
195 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH)
197 @property
198 def maximum_texture3d_width_alternate(self) -> int:
199 """int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported."""
200 return self._get_cached_attribute(
201 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
202 )
204 @property
205 def maximum_texture3d_height_alternate(self) -> int:
206 """int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported."""
207 return self._get_cached_attribute(
208 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
209 )
211 @property
212 def maximum_texture3d_depth_alternate(self) -> int:
213 """int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported."""
214 return self._get_cached_attribute(
215 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
216 )
218 @property
219 def maximum_texturecubemap_width(self) -> int:
220 """int: Maximum cubemap texture width or height."""
221 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH)
223 @property
224 def maximum_texture1d_layered_width(self) -> int:
225 """int: Maximum 1D layered texture width."""
226 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH)
228 @property
229 def maximum_texture1d_layered_layers(self) -> int:
230 """int: Maximum layers in a 1D layered texture."""
231 return self._get_cached_attribute(
232 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
233 )
235 @property
236 def maximum_texture2d_layered_width(self) -> int:
237 """int: Maximum 2D layered texture width."""
238 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH)
240 @property
241 def maximum_texture2d_layered_height(self) -> int:
242 """int: Maximum 2D layered texture height."""
243 return self._get_cached_attribute(
244 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
245 )
247 @property
248 def maximum_texture2d_layered_layers(self) -> int:
249 """int: Maximum layers in a 2D layered texture."""
250 return self._get_cached_attribute(
251 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
252 )
254 @property
255 def maximum_texturecubemap_layered_width(self) -> int:
256 """int: Maximum cubemap layered texture width or height."""
257 return self._get_cached_attribute(
258 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
259 )
261 @property
262 def maximum_texturecubemap_layered_layers(self) -> int:
263 """int: Maximum layers in a cubemap layered texture."""
264 return self._get_cached_attribute(
265 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
266 )
268 @property
269 def maximum_surface1d_width(self) -> int:
270 """int: Maximum 1D surface width."""
271 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH)
273 @property
274 def maximum_surface2d_width(self) -> int:
275 """int: Maximum 2D surface width."""
276 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH)
278 @property
279 def maximum_surface2d_height(self) -> int:
280 """int: Maximum 2D surface height."""
281 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT)
283 @property
284 def maximum_surface3d_width(self) -> int:
285 """int: Maximum 3D surface width."""
286 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH)
288 @property
289 def maximum_surface3d_height(self) -> int:
290 """int: Maximum 3D surface height."""
291 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT)
293 @property
294 def maximum_surface3d_depth(self) -> int:
295 """int: Maximum 3D surface depth."""
296 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH)
298 @property
299 def maximum_surface1d_layered_width(self) -> int:
300 """int: Maximum 1D layered surface width."""
301 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH)
303 @property
304 def maximum_surface1d_layered_layers(self) -> int:
305 """int: Maximum layers in a 1D layered surface."""
306 return self._get_cached_attribute(
307 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
308 )
310 @property
311 def maximum_surface2d_layered_width(self) -> int:
312 """int: Maximum 2D layered surface width."""
313 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH)
315 @property
316 def maximum_surface2d_layered_height(self) -> int:
317 """int: Maximum 2D layered surface height."""
318 return self._get_cached_attribute(
319 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT
320 )
322 @property
323 def maximum_surface2d_layered_layers(self) -> int:
324 """int: Maximum layers in a 2D layered surface."""
325 return self._get_cached_attribute(
326 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS
327 )
329 @property
330 def maximum_surfacecubemap_width(self) -> int:
331 """int: Maximum cubemap surface width."""
332 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH)
334 @property
335 def maximum_surfacecubemap_layered_width(self) -> int:
336 """int: Maximum cubemap layered surface width."""
337 return self._get_cached_attribute(
338 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
339 )
341 @property
342 def maximum_surfacecubemap_layered_layers(self) -> int:
343 """int: Maximum layers in a cubemap layered surface."""
344 return self._get_cached_attribute(
345 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
346 )
348 @property
349 def max_registers_per_block(self) -> int:
350 """int: Maximum number of 32-bit registers available to a thread block."""
351 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK)
353 @property
354 def clock_rate(self) -> int:
355 """int: Typical clock frequency in kilohertz."""
356 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE)
358 @property
359 def texture_alignment(self) -> int:
360 """int: Alignment requirement for textures."""
361 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT)
363 @property
364 def texture_pitch_alignment(self) -> int:
365 """int: Pitch alignment requirement for textures."""
366 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT)
368 @property
369 def gpu_overlap(self) -> bool:
370 """bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead async_engine_count."""
371 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP))
373 @property
374 def multiprocessor_count(self) -> int:
375 """int: Number of multiprocessors on device."""
376 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
378 @property
379 def kernel_exec_timeout(self) -> bool:
380 """bool: Specifies whether there is a run time limit on kernels."""
381 return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT))
383 @property
384 def integrated(self) -> bool:
385 """bool: Device is integrated with host memory."""
386 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED))
388 @property
389 def can_map_host_memory(self) -> bool:
390 """bool: Device can map host memory into CUDA address space."""
391 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY))
393 @property
394 def compute_mode(self) -> int:
395 """int: Compute mode (See CUcomputemode for details)."""
396 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE)
398 @property
399 def concurrent_kernels(self) -> bool:
400 """bool: Device can possibly execute multiple kernels concurrently."""
401 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS))
403 @property
404 def ecc_enabled(self) -> bool:
405 """bool: Device has ECC support enabled."""
406 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED))
408 @property
409 def pci_bus_id(self) -> int:
410 """int: PCI bus ID of the device."""
411 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID)
413 @property
414 def pci_device_id(self) -> int:
415 """int: PCI device ID of the device."""
416 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID)
418 @property
419 def pci_domain_id(self) -> int:
420 """int: PCI domain ID of the device."""
421 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID)
423 @property
424 def tcc_driver(self) -> bool:
425 """bool: Device is using TCC driver model."""
426 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER))
428 @property
429 def memory_clock_rate(self) -> int:
430 """int: Peak memory clock frequency in kilohertz."""
431 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE)
433 @property
434 def global_memory_bus_width(self) -> int:
435 """int: Global memory bus width in bits."""
436 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH)
438 @property
439 def l2_cache_size(self) -> int:
440 """int: Size of L2 cache in bytes."""
441 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE)
443 @property
444 def max_threads_per_multiprocessor(self) -> int:
445 """int: Maximum resident threads per multiprocessor."""
446 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR)
448 @property
449 def unified_addressing(self) -> bool:
450 """bool: Device shares a unified address space with the host."""
451 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING))
453 @property
454 def compute_capability_major(self) -> int:
455 """int: Major compute capability version number."""
456 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR)
458 @property
459 def compute_capability_minor(self) -> int:
460 """int: Minor compute capability version number."""
461 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)
463 @property
464 def global_l1_cache_supported(self) -> bool:
465 """bool: Device supports caching globals in L1."""
466 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED))
468 @property
469 def local_l1_cache_supported(self) -> bool:
470 """bool: Device supports caching locals in L1."""
471 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED))
473 @property
474 def max_shared_memory_per_multiprocessor(self) -> int:
475 """int: Maximum shared memory available per multiprocessor in bytes."""
476 return self._get_cached_attribute(
477 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR
478 )
480 @property
481 def max_registers_per_multiprocessor(self) -> int:
482 """int: Maximum number of 32-bit registers available per multiprocessor."""
483 return self._get_cached_attribute(
484 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
485 )
487 @property
488 def managed_memory(self) -> bool:
489 """bool: Device can allocate managed memory on this system."""
490 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY))
492 @property
493 def multi_gpu_board(self) -> bool:
494 """bool: Device is on a multi-GPU board."""
495 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD))
497 @property
498 def multi_gpu_board_group_id(self) -> int:
499 """int: Unique id for a group of devices on the same multi-GPU board."""
500 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID)
502 @property
503 def host_native_atomic_supported(self) -> bool:
504 """bool: Link between the device and the host supports all native atomic operations."""
505 return bool(
506 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED)
507 )
509 @property
510 def single_to_double_precision_perf_ratio(self) -> int:
511 """int: Ratio of single precision performance (in floating-point operations per second) to double precision performance."""
512 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO)
514 @property
515 def pageable_memory_access(self) -> bool:
516 """bool: Device supports coherently accessing pageable memory without calling cudaHostRegister on it."""
517 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS))
519 @property
520 def concurrent_managed_access(self) -> bool:
521 """bool: Device can coherently access managed memory concurrently with the CPU."""
522 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS))
524 @property
525 def compute_preemption_supported(self) -> bool:
526 """bool: Device supports compute preemption."""
527 return bool(
528 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED)
529 )
531 @property
532 def can_use_host_pointer_for_registered_mem(self) -> bool:
533 """bool: Device can access host registered memory at the same virtual address as the CPU."""
534 return bool(
535 self._get_cached_attribute(
536 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM
537 )
538 )
540 # TODO: A few attrs are missing here (NVIDIA/cuda-python#675)
542 @property
543 def cooperative_launch(self) -> bool:
544 """bool: Device supports launching cooperative kernels via cuLaunchCooperativeKernel."""
545 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH))
547 # TODO: A few attrs are missing here (NVIDIA/cuda-python#675)
549 @property
550 def max_shared_memory_per_block_optin(self) -> int:
551 """int: Maximum optin shared memory per block."""
552 return self._get_cached_attribute(
553 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
554 )
556 @property
557 def pageable_memory_access_uses_host_page_tables(self) -> bool:
558 """bool: Device accesses pageable memory via the host's page tables."""
559 return bool(
560 self._get_cached_attribute(
561 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES
562 )
563 )
565 @property
566 def direct_managed_mem_access_from_host(self) -> bool:
567 """bool: The host can directly access managed memory on the device without migration."""
568 return bool(
569 self._get_cached_attribute(
570 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST
571 )
572 )
574 @property
575 def virtual_memory_management_supported(self) -> bool:
576 """bool: Device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs."""
577 return bool(
578 self._get_cached_attribute(
579 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
580 )
581 )
583 @property
584 def handle_type_posix_file_descriptor_supported(self) -> bool:
585 """bool: Device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, if requested via cuMemCreate."""
586 return bool(
587 self._get_cached_attribute(
588 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED
589 )
590 )
592 @property
593 def handle_type_win32_handle_supported(self) -> bool:
594 """bool: Device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, if requested via cuMemCreate."""
595 return bool(
596 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED)
597 )
599 @property
600 def handle_type_win32_kmt_handle_supported(self) -> bool:
601 """bool: Device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, if requested via cuMemCreate."""
602 return bool(
603 self._get_cached_attribute(
604 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED
605 )
606 )
608 @property
609 def max_blocks_per_multiprocessor(self) -> int:
610 """int: Maximum number of blocks per multiprocessor."""
611 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR)
613 @property
614 def generic_compression_supported(self) -> bool:
615 """bool: Device supports compression of memory."""
616 return bool(
617 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED)
618 )
620 @property
621 def max_persisting_l2_cache_size(self) -> int:
622 """int: Maximum L2 persisting lines capacity setting in bytes."""
623 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE)
625 @property
626 def max_access_policy_window_size(self) -> int:
627 """int: Maximum value of CUaccessPolicyWindow.num_bytes."""
628 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE)
630 @property
631 def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool:
632 """bool: Device supports specifying the GPUDirect RDMA flag with cuMemCreate."""
633 return bool(
634 self._get_cached_attribute(
635 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED
636 )
637 )
639 @property
640 def reserved_shared_memory_per_block(self) -> int:
641 """int: Shared memory reserved by CUDA driver per block in bytes."""
642 return self._get_cached_attribute(
643 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK
644 )
646 @property
647 def sparse_cuda_array_supported(self) -> bool:
648 """bool: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays."""
649 return bool(
650 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED)
651 )
653 @property
654 def read_only_host_register_supported(self) -> bool:
655 """bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not."""
656 return bool(
657 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED)
658 )
660 @property
661 def memory_pools_supported(self) -> bool:
662 """bool: Device supports using the cuMemAllocAsync and cuMemPool family of APIs."""
663 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED))
665 @property
666 def gpu_direct_rdma_supported(self) -> bool:
667 """bool: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)."""
668 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED))
670 @property
671 def gpu_direct_rdma_flush_writes_options(self) -> int:
672 """int: The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum."""
673 return self._get_cached_attribute(
674 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS
675 )
677 @property
678 def gpu_direct_rdma_writes_ordering(self) -> int:
679 """int: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See CUGPUDirectRDMAWritesOrdering for the numerical values returned here."""
680 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING)
682 @property
683 def mempool_supported_handle_types(self) -> int:
684 """int: Handle types supported with mempool based IPC."""
685 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)
687 @property
688 def deferred_mapping_cuda_array_supported(self) -> bool:
689 """bool: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays."""
690 return bool(
691 self._get_cached_attribute(
692 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED
693 )
694 )
696 @property
697 def numa_config(self) -> int:
698 """int: NUMA configuration of a device: value is of type CUdeviceNumaConfig enum."""
699 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG)
701 @property
702 def numa_id(self) -> int:
703 """int: NUMA node ID of the GPU memory."""
704 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID)
706 @property
707 def multicast_supported(self) -> bool:
708 """bool: Device supports switch multicast and reduction operations."""
709 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED))
711 @property
712 def surface_alignment(self) -> int:
713 """int: Surface alignment requirement in bytes."""
714 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT)
716 @property
717 def async_engine_count(self) -> int:
718 """int: Number of asynchronous engines."""
719 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT)
721 @property
722 def can_tex2d_gather(self) -> bool:
723 """bool: True if device supports 2D texture gather operations, False if not."""
724 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER))
726 @property
727 def maximum_texture2d_gather_width(self) -> int:
728 """int: Maximum 2D texture gather width."""
729 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH)
731 @property
732 def maximum_texture2d_gather_height(self) -> int:
733 """int: Maximum 2D texture gather height."""
734 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT)
736 @property
737 def stream_priorities_supported(self) -> bool:
738 """bool: True if device supports stream priorities, False if not."""
739 return bool(
740 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED)
741 )
743 @property
744 def can_flush_remote_writes(self) -> bool:
745 """bool: The CU_STREAM_WAIT_VALUE_FLUSH flag and the CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See Stream Memory Operations for additional details."""
746 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES))
748 @property
749 def host_register_supported(self) -> bool:
750 """bool: Device supports host memory registration via cudaHostRegister."""
751 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED))
753 @property
754 def timeline_semaphore_interop_supported(self) -> bool:
755 """bool: External timeline semaphore interop is supported on the device."""
756 return bool(
757 self._get_cached_attribute(
758 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED
759 )
760 )
762 @property
763 def cluster_launch(self) -> bool:
764 """bool: Indicates device supports cluster launch."""
765 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH))
767 @property
768 def can_use_64_bit_stream_mem_ops(self) -> bool:
769 """bool: 64-bit operations are supported in cuStreamBatchMemOp and related MemOp APIs."""
770 return bool(
771 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS)
772 )
774 @property
775 def can_use_stream_wait_value_nor(self) -> bool:
776 """bool: CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs."""
777 return bool(
778 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR)
779 )
781 @property
782 def dma_buf_supported(self) -> bool:
783 """bool: Device supports buffer sharing with dma_buf mechanism."""
784 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED))
786 @property
787 def ipc_event_supported(self) -> bool:
788 """bool: Device supports IPC Events."""
789 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED))
791 @property
792 def mem_sync_domain_count(self) -> int:
793 """int: Number of memory domains the device supports."""
794 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT)
796 @property
797 def tensor_map_access_supported(self) -> bool:
798 """bool: Device supports accessing memory using Tensor Map."""
799 return bool(
800 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED)
801 )
803 @property
804 def handle_type_fabric_supported(self) -> bool:
805 """bool: Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or requested with cuMemCreate()."""
806 return bool(
807 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED)
808 )
810 @property
811 def unified_function_pointers(self) -> bool:
812 """bool: Device supports unified function pointers."""
813 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS))
815 @property
816 def mps_enabled(self) -> bool:
817 """bool: Indicates if contexts created on this device will be shared via MPS."""
818 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MPS_ENABLED))
820 @property
821 def host_numa_id(self) -> int:
822 """int: NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA."""
823 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID)
825 @property
826 def d3d12_cig_supported(self) -> bool:
827 """bool: Device supports CIG with D3D12."""
828 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED))
830 @property
831 def mem_decompress_algorithm_mask(self) -> int:
832 """int: The returned valued shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum."""
833 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK)
835 @property
836 def mem_decompress_maximum_length(self) -> int:
837 """int: The returned valued is the maximum length in bytes of a single decompress operation that is allowed."""
838 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH)
840 @property
841 def vulkan_cig_supported(self) -> bool:
842 """bool: Device supports CIG with Vulkan."""
843 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VULKAN_CIG_SUPPORTED))
845 @property
846 def gpu_pci_device_id(self) -> int:
847 """int: The combined 16-bit PCI device ID and 16-bit PCI vendor ID."""
848 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_PCI_DEVICE_ID)
850 @property
851 def gpu_pci_subsystem_id(self) -> int:
852 """int: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID."""
853 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_PCI_SUBSYSTEM_ID)
855 @property
856 def host_numa_virtual_memory_management_supported(self) -> bool:
857 """bool: Device supports HOST_NUMA location with the virtual memory management APIs like cuMemCreate, cuMemMap and related APIs."""
858 return bool(
859 self._get_cached_attribute(
860 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
861 )
862 )
864 @property
865 def host_numa_memory_pools_supported(self) -> bool:
866 """bool: Device supports HOST_NUMA location with the cuMemAllocAsync and cuMemPool family of APIs."""
867 return bool(
868 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_MEMORY_POOLS_SUPPORTED)
869 )
871 @property
872 def host_numa_multinode_ipc_supported(self) -> bool:
873 """bool: Device supports HOST_NUMA location IPC between nodes in a multi-node system."""
874 return bool(
875 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_MULTINODE_IPC_SUPPORTED)
876 )
878 @property
879 def host_memory_pools_supported(self) -> bool:
880 """bool: Device suports HOST location with the cuMemAllocAsync and cuMemPool family of APIs."""
881 return bool(
882 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_MEMORY_POOLS_SUPPORTED)
883 )
885 @property
886 def host_virtual_memory_management_supported(self) -> bool:
887 """bool: Device supports HOST location with the virtual memory management APIs like cuMemCreate, cuMemMap and related APIs."""
888 return bool(
889 self._get_cached_attribute(
890 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
891 )
892 )
894 @property
895 def host_alloc_dma_buf_supported(self) -> bool:
896 """bool: Device supports page-locked host memory buffer sharing with dma_buf mechanism."""
897 return bool(
898 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_ALLOC_DMA_BUF_SUPPORTED)
899 )
901 @property
902 def only_partial_host_native_atomic_supported(self) -> bool:
903 """bool: Link between the device and the host supports only some native atomic operations."""
904 return bool(
905 self._get_cached_attribute(
906 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ONLY_PARTIAL_HOST_NATIVE_ATOMIC_SUPPORTED
907 )
908 )
911cdef cydriver.CUcontext _get_primary_context(int dev_id) except?NULL:
912 try:
913 primary_ctxs = _tls.primary_ctxs
914 except AttributeError:
915 total = len(_tls.devices)
916 primary_ctxs = _tls.primary_ctxs = [0] * total
917 cdef cydriver.CUcontext ctx = <cydriver.CUcontext><uintptr_t>(primary_ctxs[dev_id])
918 if ctx == NULL:
919 with nogil:
920 HANDLE_RETURN(cydriver.cuDevicePrimaryCtxRetain(&ctx, dev_id))
921 primary_ctxs[dev_id] = <uintptr_t>(ctx)
922 return ctx
925class Device:
926 """Represent a GPU and act as an entry point for cuda.core features.
928 This is a singleton object that helps ensure interoperability
929 across multiple libraries imported in the process to both see
930 and use the same GPU device.
932 While acting as the entry point, many other CUDA resources can be
933 allocated such as streams and buffers. Any :obj:`~_context.Context` dependent
934 resource created through this device, will continue to refer to
935 this device's context.
937 Newly returned :obj:`~_device.Device` objects are thread-local singletons
938 for a specified device.
940 Note
941 ----
942 Will not initialize the GPU.
944 Parameters
945 ----------
946 device_id : int, optional
947 Device ordinal to return a :obj:`~_device.Device` object for.
948 Default value of `None` return the currently used device.
950 """
951 __slots__ = ("_id", "_memory_resource", "_has_inited", "_properties", "_uuid")
953 def __new__(cls, device_id: Device | int | None = None):
954 # Handle device_id argument.
955 if isinstance(device_id, Device):
956 return device_id
957 else:
958 device_id = getattr(device_id, 'device_id', device_id)
960 # Initialize CUDA.
961 global _is_cuInit
962 if _is_cuInit is False:
963 with _lock, nogil:
964 HANDLE_RETURN(cydriver.cuInit(0))
965 _is_cuInit = True
967 # important: creating a Device instance does not initialize the GPU!
968 cdef cydriver.CUdevice dev
969 cdef cydriver.CUcontext ctx
970 if device_id is None:
971 with nogil:
972 err = cydriver.cuCtxGetDevice(&dev)
973 if err == cydriver.CUresult.CUDA_SUCCESS:
974 device_id = int(dev)
975 elif err == cydriver.CUresult.CUDA_ERROR_INVALID_CONTEXT:
976 with nogil:
977 HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx))
978 assert <void*>(ctx) == NULL
979 device_id = 0 # cudart behavior
980 else:
981 HANDLE_RETURN(err)
982 elif device_id < 0:
983 raise ValueError(f"device_id must be >= 0, got {device_id}")
985 # ensure Device is singleton
986 cdef int total
987 try:
988 devices = _tls.devices
989 except AttributeError:
990 with nogil:
991 HANDLE_RETURN(cydriver.cuDeviceGetCount(&total))
992 devices = _tls.devices = []
993 for dev_id in range(total):
994 device = super().__new__(cls)
995 device._id = dev_id
996 device._memory_resource = None
997 device._has_inited = False
998 device._properties = None
999 device._uuid = None
1000 devices.append(device)
1002 try:
1003 return devices[device_id]
1004 except IndexError:
1005 raise ValueError(f"device_id must be within [0, {len(devices)}), got {device_id}") from None
1007 def _check_context_initialized(self):
1008 if not self._has_inited:
1009 raise CUDAError(
1010 f"Device {self._id} is not yet initialized, perhaps you forgot to call .set_current() first?"
1011 )
1013 def _get_current_context(self, bint check_consistency=False) -> driver.CUcontext:
1014 cdef cydriver.CUcontext ctx
1015 cdef cydriver.CUdevice dev
1016 cdef cydriver.CUdevice this_dev = self._id
1017 with nogil:
1018 HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx))
1019 if ctx == NULL:
1020 raise CUDAError("No context is bound to the calling CPU thread.")
1021 if check_consistency:
1022 HANDLE_RETURN(cydriver.cuCtxGetDevice(&dev))
1023 if dev != this_dev:
1024 raise CUDAError("Internal error (current device is not equal to Device.device_id)")
1025 return driver.CUcontext(<uintptr_t>ctx)
1027 @property
1028 def device_id(self) -> int:
1029 """Return device ordinal."""
1030 return self._id
1032 @property
1033 def pci_bus_id(self) -> str:
1034 """Return a PCI Bus Id string for this device."""
1035 bus_id = handle_return(runtime.cudaDeviceGetPCIBusId(13, self._id))
1036 return bus_id[:12].decode()
1038 def can_access_peer(self, peer: Device | int) -> bool:
1039 """Check if this device can access memory from the specified peer device.
1041 Queries whether peer-to-peer memory access is supported between this
1042 device and the specified peer device.
1044 Parameters
1045 ----------
1046 peer : Device | int
1047 The peer device to check accessibility to. Can be a Device object or device ID.
1048 """
1049 peer = Device(peer)
1050 cdef int d1 = <int> self.device_id
1051 cdef int d2 = <int> peer.device_id
1052 if d1 == d2:
1053 return True
1054 cdef int value = 0
1055 with nogil:
1056 HANDLE_RETURN(cydriver.cuDeviceCanAccessPeer(&value, d1, d2))
1057 return bool(value)
1059 @property
1060 def uuid(self) -> str:
1061 """Return a UUID for the device.
1063 Returns 16-octets identifying the device. If the device is in
1064 MIG mode, returns its MIG UUID which uniquely identifies the
1065 subscribed MIG compute instance.
1067 Note
1068 ----
1069 MIG UUID is only returned when device is in MIG mode and the
1070 driver is older than CUDA 11.4.
1072 The UUID is cached after first access to avoid repeated CUDA API calls.
1074 """
1075 cdef cydriver.CUuuid uuid
1076 cdef cydriver.CUdevice dev
1077 cdef bytes uuid_b
1078 cdef str uuid_hex
1080 if self._uuid is None:
1081 dev = self._id
1082 with nogil:
1083 IF CUDA_CORE_BUILD_MAJOR == "12":
1084 HANDLE_RETURN(cydriver.cuDeviceGetUuid_v2(&uuid, dev))
1085 ELSE: # 13.0+
1086 HANDLE_RETURN(cydriver.cuDeviceGetUuid(&uuid, dev))
1087 uuid_b = cpython.PyBytes_FromStringAndSize(uuid.bytes, sizeof(uuid.bytes))
1088 uuid_hex = uuid_b.hex()
1089 # 8-4-4-4-12
1090 self._uuid = f"{uuid_hex[:8]}-{uuid_hex[8:12]}-{uuid_hex[12:16]}-{uuid_hex[16:20]}-{uuid_hex[20:]}"
1091 return self._uuid
1093 @property
1094 def name(self) -> str:
1095 """Return the device name."""
1096 # Use 256 characters to be consistent with CUDA Runtime
1097 cdef int LENGTH = 256
1098 cdef bytes name = bytes(LENGTH)
1099 cdef char* name_ptr = name
1100 cdef cydriver.CUdevice this_dev = self._id
1101 with nogil:
1102 HANDLE_RETURN(cydriver.cuDeviceGetName(name_ptr, LENGTH, this_dev))
1103 name = name.split(b"\0")[0]
1104 return name.decode()
1106 @property
1107 def properties(self) -> DeviceProperties:
1108 """Return a :obj:`~_device.DeviceProperties` class with information about the device."""
1109 if self._properties is None:
1110 self._properties = DeviceProperties._init(self._id)
1112 return self._properties
1114 @property
1115 def compute_capability(self) -> ComputeCapability:
1116 """Return a named tuple with 2 fields: major and minor."""
1117 cdef DeviceProperties prop = self.properties
1118 if "compute_capability" in prop._cache:
1119 return prop._cache["compute_capability"]
1120 cc = ComputeCapability(prop.compute_capability_major, prop.compute_capability_minor)
1121 prop._cache["compute_capability"] = cc
1122 return cc
1124 @property
1125 def arch(self) -> str:
1126 """Return compute capability as a string (e.g., '75' for CC 7.5)."""
1127 return f"{self.compute_capability.major}{self.compute_capability.minor}"
1129 @property
1130 def context(self) -> Context:
1131 """Return the current :obj:`~_context.Context` associated with this device.
1133 Note
1134 ----
1135 Device must be initialized.
1137 """
1138 self._check_context_initialized()
1139 ctx = self._get_current_context(check_consistency=True)
1140 return Context._from_ctx(ctx, self._id)
1142 @property
1143 def memory_resource(self) -> MemoryResource:
1144 """Return :obj:`~_memory.MemoryResource` associated with this device."""
1145 cdef int attr, device_id
1146 if self._memory_resource is None:
1147 # If the device is in TCC mode, or does not support memory pools for some other reason,
1148 # use the SynchronousMemoryResource which does not use memory pools.
1149 device_id = self._id
1150 with nogil:
1151 HANDLE_RETURN(
1152 cydriver.cuDeviceGetAttribute(
1153 &attr, cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, device_id
1154 )
1155 )
1156 if attr == 1:
1157 from cuda.core.experimental._memory import DeviceMemoryResource
1158 self._memory_resource = DeviceMemoryResource(self._id)
1159 else:
1160 from cuda.core.experimental._memory import _SynchronousMemoryResource
1161 self._memory_resource = _SynchronousMemoryResource(self._id)
1163 return self._memory_resource
1165 @memory_resource.setter
1166 def memory_resource(self, mr):
1167 from cuda.core.experimental._memory import MemoryResource
1168 assert_type(mr, MemoryResource)
1169 self._memory_resource = mr
1171 @property
1172 def default_stream(self) -> Stream:
1173 """Return default CUDA :obj:`~_stream.Stream` associated with this device.
1175 The type of default stream returned depends on if the environment
1176 variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.
1178 If set, returns a per-thread default stream. Otherwise returns
1179 the legacy stream.
1181 """
1182 return default_stream()
1184 def __int__(self):
1185 """Return device_id."""
1186 return self._id
1188 def __repr__(self):
1189 return f"<Device {self._id} ({self.name})>"
1191 def __hash__(self) -> int:
1192 return hash(self.uuid)
1194 def __eq__(self, other) -> bool:
1195 if not isinstance(other, Device):
1196 return NotImplemented
1197 return self._id == other._id
1199 def __reduce__(self):
1200 return Device, (self.device_id,)
1202 def set_current(self, ctx: Context = None) -> Union[Context, None]:
1203 """Set device to be used for GPU executions.
1205 Initializes CUDA and sets the calling thread to a valid CUDA
1206 context. By default the primary context is used, but optional `ctx`
1207 parameter can be used to explicitly supply a :obj:`~_context.Context` object.
1209 Providing a `ctx` causes the previous set context to be popped and returned.
1211 Parameters
1212 ----------
1213 ctx : :obj:`~_context.Context`, optional
1214 Optional context to push onto this device's current thread stack.
1216 Returns
1217 -------
1218 Union[:obj:`~_context.Context`, None], optional
1219 Popped context.
1221 Examples
1222 --------
1223 Acts as an entry point of this object. Users always start a code by
1224 calling this method, e.g.
1226 >>> from cuda.core.experimental import Device
1227 >>> dev0 = Device(0)
1228 >>> dev0.set_current()
1229 >>> # ... do work on device 0 ...
1231 """
1232 cdef cydriver.CUcontext prev_ctx
1233 cdef cydriver.CUcontext curr_ctx
1234 if ctx is not None:
1235 # TODO: revisit once Context is cythonized
1236 assert_type(ctx, Context)
1237 if ctx._id != self._id:
1238 raise RuntimeError(
1239 "the provided context was created on the device with"
1240 f" id={ctx._id}, which is different from the target id={self._id}"
1241 )
1242 # prev_ctx is the previous context
1243 curr_ctx = <cydriver.CUcontext>(ctx._handle)
1244 with nogil:
1245 HANDLE_RETURN(cydriver.cuCtxPopCurrent(&prev_ctx))
1246 HANDLE_RETURN(cydriver.cuCtxPushCurrent(curr_ctx))
1247 self._has_inited = True
1248 if prev_ctx != NULL:
1249 return Context._from_ctx(<uintptr_t>(prev_ctx), self._id)
1250 else:
1251 # use primary ctx
1252 curr_ctx = _get_primary_context(self._id)
1253 with nogil:
1254 HANDLE_RETURN(cydriver.cuCtxSetCurrent(curr_ctx))
1255 self._has_inited = True
1257 def create_context(self, options: ContextOptions = None) -> Context:
1258 """Create a new :obj:`~_context.Context` object.
1260 Note
1261 ----
1262 The newly context will not be set as current.
1264 Parameters
1265 ----------
1266 options : :obj:`~_context.ContextOptions`, optional
1267 Customizable dataclass for context creation options.
1269 Returns
1270 -------
1271 :obj:`~_context.Context`
1272 Newly created context object.
1274 """
1275 raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
1277 def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
1278 """Create a Stream object.
1280 New stream objects can be created in two different ways:
1282 1) Create a new CUDA stream with customizable ``options``.
1283 2) Wrap an existing foreign `obj` supporting the ``__cuda_stream__`` protocol.
1285 Option (2) internally holds a reference to the foreign object
1286 such that the lifetime is managed.
1288 Note
1289 ----
1290 Device must be initialized.
1292 Parameters
1293 ----------
1294 obj : :obj:`~_stream.IsStreamT`, optional
1295 Any object supporting the ``__cuda_stream__`` protocol.
1296 options : :obj:`~_stream.StreamOptions`, optional
1297 Customizable dataclass for stream creation options.
1299 Returns
1300 -------
1301 :obj:`~_stream.Stream`
1302 Newly created stream object.
1304 """
1305 self._check_context_initialized()
1306 return Stream._init(obj=obj, options=options, device_id=self._id)
1308 def create_event(self, options: EventOptions | None = None) -> Event:
1309 """Create an Event object without recording it to a Stream.
1311 Note
1312 ----
1313 Device must be initialized.
1315 Parameters
1316 ----------
1317 options : :obj:`EventOptions`, optional
1318 Customizable dataclass for event creation options.
1320 Returns
1321 -------
1322 :obj:`~_event.Event`
1323 Newly created event object.
1325 """
1326 self._check_context_initialized()
1327 ctx = self._get_current_context()
1328 return Event._init(self._id, ctx, options, True)
1330 def allocate(self, size, stream: Stream | GraphBuilder | None = None) -> Buffer:
1331 """Allocate device memory from a specified stream.
1333 Allocates device memory of `size` bytes on the specified `stream`
1334 using the memory resource currently associated with this Device.
1336 Parameter `stream` is optional, using a default stream by default.
1338 Note
1339 ----
1340 Device must be initialized.
1342 Parameters
1343 ----------
1344 size : int
1345 Number of bytes to allocate.
1346 stream : :obj:`~_stream.Stream`, optional
1347 The stream establishing the stream ordering semantic.
1348 Default value of `None` uses default stream.
1350 Returns
1351 -------
1352 :obj:`~_memory.Buffer`
1353 Newly created buffer object.
1355 """
1356 self._check_context_initialized()
1357 return self.memory_resource.allocate(size, stream)
1359 def sync(self):
1360 """Synchronize the device.
1362 Note
1363 ----
1364 Device must be initialized.
1366 """
1367 self._check_context_initialized()
1368 handle_return(runtime.cudaDeviceSynchronize())
1370 def create_graph_builder(self) -> GraphBuilder:
1371 """Create a new :obj:`~_graph.GraphBuilder` object.
1373 Returns
1374 -------
1375 :obj:`~_graph.GraphBuilder`
1376 Newly created graph builder object.
1378 """
1379 self._check_context_initialized()
1380 return GraphBuilder._init(stream=self.create_stream(), is_stream_owner=True)