Coverage for cuda/core/texture/_array.pyx: 90.87%
208 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 01:38 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from __future__ import annotations
7cimport cpython
8from libc.stdint cimport intptr_t
9from libc.string cimport memset
11from cuda.bindings cimport cydriver
12from cuda.core._memory._buffer cimport Buffer
13from cuda.core._resource_handles cimport (
14 OpaqueArrayHandle,
15 as_cu,
16 as_intptr,
17 create_array_handle,
18 create_array_handle_owning,
19 create_array_handle_ref,
20 get_last_error,
21)
22from cuda.core._stream cimport Stream, Stream_accept
23from cuda.core._utils.cuda_utils cimport (
24 HANDLE_RETURN,
25 _get_current_device_id,
26)
28from enum import IntEnum
31class ArrayFormat(IntEnum):
32 """Element format for a :class:`OpaqueArray` allocation.
34 Mirrors ``CUarray_format`` from the CUDA driver API.
35 """
36 UINT8 = cydriver.CU_AD_FORMAT_UNSIGNED_INT8
37 UINT16 = cydriver.CU_AD_FORMAT_UNSIGNED_INT16
38 UINT32 = cydriver.CU_AD_FORMAT_UNSIGNED_INT32
39 INT8 = cydriver.CU_AD_FORMAT_SIGNED_INT8
40 INT16 = cydriver.CU_AD_FORMAT_SIGNED_INT16
41 INT32 = cydriver.CU_AD_FORMAT_SIGNED_INT32
42 FLOAT16 = cydriver.CU_AD_FORMAT_HALF
43 FLOAT32 = cydriver.CU_AD_FORMAT_FLOAT
46# Bytes per element (single channel) for each format.
47_FORMAT_ELEM_SIZE = {
48 int(ArrayFormat.UINT8): 1,
49 int(ArrayFormat.INT8): 1,
50 int(ArrayFormat.UINT16): 2,
51 int(ArrayFormat.INT16): 2,
52 int(ArrayFormat.FLOAT16): 2,
53 int(ArrayFormat.UINT32): 4,
54 int(ArrayFormat.INT32): 4,
55 int(ArrayFormat.FLOAT32): 4,
56}
59def _validate_format_channels(format, num_channels):
60 """Validate the ``(format, num_channels)`` pair shared by the array,
61 mipmap, and texture factories. Raises on an invalid combination."""
62 if not isinstance(format, ArrayFormat): 1nopqrgibjfedOEDNFaGCHBcPSIJTQUVWXKRYZ01ls2Lmt3M4Auvwxyzk
63 raise TypeError(f"format must be an ArrayFormat, got {type(format).__name__}") 1OSY
64 if isinstance(num_channels, bool) or num_channels not in (1, 2, 4): 1nopqrgibjfedEDNFaGCHBcPIJTQUVWXKRZ01ls2Lmt3M4Auvwxyzk
65 raise ValueError(f"num_channels must be 1, 2, or 4, got {num_channels!r}") 1NPQR
68def _validate_array_shape(shape):
69 """Coerce ``shape`` to a tuple of ints and validate rank (1-3) and that
70 every extent is >= 1. Returns the normalized tuple."""
71 try: 1nopqrgibjfedEDFaGCHBcIJKlsLmtMAuvwxyzk
72 shape_t = tuple(int(s) for s in shape) 1nopqrgibjfedEDFaGCHBcIJKlsLmtMAuvwxyzk
73 except TypeError as e: 1E
74 raise TypeError(f"shape must be a tuple of ints, got {type(shape).__name__}") from e 1E
75 if not 1 <= len(shape_t) <= 3: 1nopqrgibjfedDFaGCHBcIJKlsLmtMAuvwxyzk
76 raise ValueError(f"shape rank must be 1, 2, or 3, got {len(shape_t)}") 1F
77 for i, dim in enumerate(shape_t): 1nopqrgibjfedDaGCHBcIJKlsLmtMAuvwxyzk
78 if dim < 1: 1nopqrgibjfedDaGCHBcIJKlsLmtMAuvwxyzk
79 raise ValueError(f"shape[{i}] must be >= 1, got {dim}") 1DI
80 return shape_t 1nopqrgibjfedaGCHBcJKlsLmtMAuvwxyzk
83cdef void _fill_array_endpoint(
84 cydriver.CUDA_MEMCPY3D* p, OpaqueArray arr, bint is_src
85) noexcept:
86 """Populate the src or dst array fields of a CUDA_MEMCPY3D struct."""
87 if is_src: 1bfedac
88 p.srcMemoryType = cydriver.CU_MEMORYTYPE_ARRAY 1bfedac
89 p.srcArray = as_cu(arr._handle) 1bfedac
90 p.srcXInBytes = 0 1bfedac
91 p.srcY = 0 1bfedac
92 p.srcZ = 0 1bfedac
93 else:
94 p.dstMemoryType = cydriver.CU_MEMORYTYPE_ARRAY 1bac
95 p.dstArray = as_cu(arr._handle) 1bac
96 p.dstXInBytes = 0 1bac
97 p.dstY = 0 1bac
98 p.dstZ = 0 1bac
101cdef int _fill_host_endpoint(
102 cydriver.CUDA_MEMCPY3D* p,
103 object obj,
104 bint is_src,
105 size_t width_bytes,
106 size_t height,
107 size_t required,
108 cpython.Py_buffer* pybuf_out,
109) except -1:
110 """Populate src/dst host fields from a buffer-protocol ``obj``.
112 Acquires a Py_buffer view; the caller is responsible for releasing it
113 (this function always returns with the view held when it returns 1).
114 """
115 cdef int flags = cpython.PyBUF_SIMPLE 1hedac
116 if not is_src: 1edac
117 flags |= cpython.PyBUF_WRITABLE 1edac
118 if cpython.PyObject_GetBuffer(obj, pybuf_out, flags) != 0: 1edac
119 raise TypeError(
120 f"Source/destination must be a Buffer or a contiguous "
121 f"buffer-protocol object, got {type(obj).__name__}"
122 )
123 if <size_t>pybuf_out.len < required: 1edac
124 cpython.PyBuffer_Release(pybuf_out) 1e
125 raise ValueError( 1e
126 f"Host buffer has {pybuf_out.len} bytes, smaller than the array " 1e
127 f"extent ({required} bytes)" 1e
128 )
129 if is_src: 1dac
130 p.srcMemoryType = cydriver.CU_MEMORYTYPE_HOST 1ac
131 p.srcHost = pybuf_out.buf 1ac
132 p.srcPitch = width_bytes 1ac
133 p.srcHeight = height 1hac
134 p.srcXInBytes = 0 1ac
135 p.srcY = 0 1ac
136 p.srcZ = 0 1ac
137 else:
138 p.dstMemoryType = cydriver.CU_MEMORYTYPE_HOST 1dac
139 p.dstHost = pybuf_out.buf 1dac
140 p.dstPitch = width_bytes 1dac
141 p.dstHeight = height 1dac
142 p.dstXInBytes = 0 1dac
143 p.dstY = 0 1dac
144 p.dstZ = 0 1dac
145 return 1 1dac
148cdef int _fill_linear_endpoint(
149 cydriver.CUDA_MEMCPY3D* p,
150 object obj,
151 bint is_src,
152 size_t width_bytes,
153 size_t height,
154 size_t depth,
155 cpython.Py_buffer* pybuf_out,
156) except -1:
157 """Populate the src or dst linear fields. Returns 1 if pybuf_out was
158 filled (caller must release it), 0 otherwise.
159 """
160 cdef intptr_t ptr
161 cdef size_t required = width_bytes * height * depth 1bfedac
162 if isinstance(obj, Buffer): 1bfedac
163 if <size_t>(<Buffer>obj).size < required: 1bf
164 raise ValueError( 1f
165 f"Buffer size ({(<Buffer>obj).size} bytes) is smaller than " 1f
166 f"the array extent ({required} bytes)" 1f
167 )
168 ptr = int((<Buffer>obj).handle) 1b
169 if is_src: 1b
170 p.srcMemoryType = cydriver.CU_MEMORYTYPE_DEVICE 1b
171 p.srcDevice = <cydriver.CUdeviceptr>ptr 1b
172 p.srcPitch = width_bytes 1b
173 p.srcHeight = height 1b
174 p.srcXInBytes = 0 1b
175 p.srcY = 0 1b
176 p.srcZ = 0 1b
177 else:
178 p.dstMemoryType = cydriver.CU_MEMORYTYPE_DEVICE 1b
179 p.dstDevice = <cydriver.CUdeviceptr>ptr 1b
180 p.dstPitch = width_bytes 1b
181 p.dstHeight = height 1b
182 p.dstXInBytes = 0 1b
183 p.dstY = 0 1b
184 p.dstZ = 0 1b
185 return 0 1b
186 return _fill_host_endpoint( 1edac
187 p, obj, is_src, width_bytes, height, required, pybuf_out
188 )
191cdef _copy3d(OpaqueArray arr, object other, Stream stream, bint to_array):
192 """Issue a full-array async 3D memcpy between ``arr`` and ``other``.
194 Direction is determined by ``to_array``: True copies *into* arr, False
195 copies *out of* arr. ``stream`` must already be a concrete :class:`Stream`
196 (callers coerce via :func:`Stream_accept`).
197 """
198 cdef cydriver.CUDA_MEMCPY3D params
199 cdef cpython.Py_buffer pybuf
200 cdef int got_buffer = 0 1bfedac
201 cdef intptr_t stream_handle
202 cdef cydriver.CUstream c_stream
204 memset(¶ms, 0, sizeof(params)) 1bfedac
205 width_bytes, height, depth = arr._extent_bytes() 1bfedac
206 params.WidthInBytes = <size_t>width_bytes 1bfedac
207 params.Height = <size_t>height 1bfedac
208 params.Depth = <size_t>depth 1bfedac
210 try: 1bfedac
211 if to_array: 1bfedac
212 got_buffer = _fill_linear_endpoint( 1bfeac
213 ¶ms, other, True, width_bytes, height, depth, &pybuf 1bfeac
214 )
215 _fill_array_endpoint(¶ms, arr, False) 1bac
216 else:
217 _fill_array_endpoint(¶ms, arr, True) 1bfedac
218 got_buffer = _fill_linear_endpoint( 1bfedac
219 ¶ms, other, False, width_bytes, height, depth, &pybuf 1bfedac
220 )
222 stream_handle = int((<Stream>stream).handle) 1bdac
223 c_stream = <cydriver.CUstream><void*>stream_handle 1bdac
224 with nogil: 1bdac
225 HANDLE_RETURN(cydriver.cuMemcpy3DAsync(¶ms, c_stream)) 1bdac
226 finally:
227 if got_buffer: 1bdac
228 cpython.PyBuffer_Release(&pybuf) 1dac
231cdef class OpaqueArray:
232 """An opaque, hardware-laid-out GPU allocation for texture/surface access.
234 Distinct from :class:`Buffer`: a ``CUarray`` has no exposed device pointer
235 and can only be accessed from kernels through a :class:`TextureObject` or
236 :class:`SurfaceObject`. Its memory layout is chosen by the driver for 2D/3D
237 spatial locality.
239 **Copy-only interop.** Because the layout is opaque and there is no linear
240 device pointer, a ``OpaqueArray`` cannot expose ``__cuda_array_interface__`` /
241 DLPack and cannot be shared zero-copy with NumPy, CuPy, numba-cuda, or
242 PyTorch. Moving data in or out is therefore always a copy: use
243 :meth:`copy_from` / :meth:`copy_to` against a linear :class:`Buffer` or a
244 host buffer-protocol object. There is no allocation helper — allocate the
245 linear :class:`Buffer` yourself (e.g. ``mr.allocate(arr.size_bytes,
246 stream=s)``) and copy.
248 Construct via :meth:`from_descriptor`. Only plain 1D/2D/3D allocations are
249 supported in this initial version; layered/cubemap/sparse variants will
250 follow once their shape semantics are settled.
251 """
253 def __init__(self, *args, **kwargs):
254 raise RuntimeError( 15
255 "OpaqueArray cannot be instantiated directly. Use OpaqueArray.from_descriptor()."
256 )
258 @classmethod
259 def from_descriptor(cls, *, shape, format, num_channels, is_surface_load_store=False):
260 """Allocate a new CUDA array.
262 Parameters
263 ----------
264 shape : tuple of int
265 ``(width,)``, ``(width, height)``, or ``(width, height, depth)``
266 in elements.
267 format : ArrayFormat
268 Element format.
269 num_channels : int
270 Channels per element. Must be 1, 2, or 4.
271 is_surface_load_store : bool
272 If True, allocate with ``CUDA_ARRAY3D_SURFACE_LDST`` so the array
273 can be bound as a :class:`SurfaceObject` for kernel-side writes.
274 Default False.
276 Returns
277 -------
278 OpaqueArray
279 """
280 _validate_format_channels(format, num_channels) 1nopqrgibjfedOEDNFalsmtAuvwxyzk
281 shape_t = _validate_array_shape(shape) 1nopqrgibjfedEDFalsmtAuvwxyzk
283 cdef cydriver.CUarray_format c_format = <cydriver.CUarray_format><int>format 1nopqrgibjfedalsmtAuvwxyzk
284 cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR desc3d
285 cdef int rank = len(shape_t) 1nopqrgibjfedalsmtAuvwxyzk
286 cdef unsigned int flags = (
287 cydriver.CUDA_ARRAY3D_SURFACE_LDST if is_surface_load_store else 0 1nopqrgibjfedalsmtAuvwxyzk
288 )
290 # cuArray3DCreate handles 1D/2D/3D uniformly (Height/Depth 0 sentinels),
291 # so a single descriptor + create_array_handle covers every shape.
292 memset(&desc3d, 0, sizeof(desc3d)) 1nopqrgibjfedalsmtAuvwxyzk
293 desc3d.Width = <size_t>shape_t[0] 1nopqrgibjfedalsmtAuvwxyzk
294 desc3d.Height = <size_t>(shape_t[1] if rank >= 2 else 0) 1nopqrgibjfedalsmtAuvwxyzk
295 desc3d.Depth = <size_t>(shape_t[2] if rank >= 3 else 0) 1nopqrgibjfedalsmtAuvwxyzk
296 desc3d.Format = c_format 1nopqrgibjfedalsmtAuvwxyzk
297 desc3d.NumChannels = <unsigned int>num_channels 1nopqrgibjfedalsmtAuvwxyzk
298 desc3d.Flags = flags 1nopqrgibjfedalsmtAuvwxyzk
300 cdef OpaqueArrayHandle h = create_array_handle(desc3d) 1nopqrgibjfedalsmtAuvwxyzk
301 if not h: 1nopqrgibjfedalsmtAuvwxyzk
302 HANDLE_RETURN(get_last_error())
304 cdef OpaqueArray self = cls.__new__(cls) 1nopqrgibjfedalsmtAuvwxyzk
305 self._handle = h 1nopqrgibjfedalsmtAuvwxyzk
306 self._shape = shape_t 1nopqrgibjfedalsmtAuvwxyzk
307 self._format = c_format 1nopqrgibjfedalsmtAuvwxyzk
308 self._num_channels = num_channels 1nopqrgibjfedalsmtAuvwxyzk
309 self._surface_load_store = bool(is_surface_load_store) 1nopqrgibjfedalsmtAuvwxyzk
310 self._device_id = _get_current_device_id() 1nopqrgibjfedalsmtAuvwxyzk
311 return self 1nopqrgibjfedalsmtAuvwxyzk
313 @classmethod
314 def _from_handle(cls, intptr_t handle, bint owning, *, device_id=None):
315 """Wrap an externally-allocated ``CUarray``.
317 Intended for graphics interop (``cuGraphicsSubResourceGetMappedArray``)
318 where the array is owned by the graphics API. With ``owning=False`` the
319 underlying ``CUarray`` is never destroyed by this object. Shape, format,
320 and channel count are queried from the driver.
321 """
322 cdef cydriver.CUarray raw = <cydriver.CUarray><void*>handle
323 cdef OpaqueArrayHandle h
324 if owning:
325 h = create_array_handle_owning(raw)
326 else:
327 h = create_array_handle_ref(raw)
328 cdef int dev = _get_current_device_id() if device_id is None else int(device_id)
329 return _array_from_handle(h, dev)
331 @property
332 def handle(self):
333 """The underlying ``CUarray`` as an integer."""
334 return as_intptr(self._handle) 1gBck
336 @property
337 def shape(self):
338 """Allocation shape, in elements."""
339 return self._shape 1giCBc
341 @property
342 def format(self):
343 """The element :class:`ArrayFormat`."""
344 return ArrayFormat(self._format) 1gB
346 @property
347 def num_channels(self):
348 """Channels per element (1, 2, or 4)."""
349 return self._num_channels 1gB
351 @property
352 def element_size(self):
353 """Bytes per element (format size * channels)."""
354 return _FORMAT_ELEM_SIZE[self._format] * self._num_channels 1gi
356 @property
357 def device(self):
358 """The :class:`Device` this array was allocated on."""
359 from cuda.core._device import Device 1g
360 return Device(self._device_id) 1g
362 @property
363 def is_surface_load_store(self):
364 """True if this array was created with ``CUDA_ARRAY3D_SURFACE_LDST``
365 and can be bound as a :class:`SurfaceObject`."""
366 return self._surface_load_store 1gilsmk
368 def _extent_bytes(self):
369 """Return (width_bytes, height, depth) for cuMemcpy3D, with height/depth
370 normalized to >=1 for lower-rank arrays."""
371 cdef int rank = len(self._shape) 1bfedac
372 cdef size_t w = <size_t>self._shape[0] * <size_t>( 1bfedac
373 _FORMAT_ELEM_SIZE[self._format] * self._num_channels 1bfedac
374 )
375 cdef size_t h = <size_t>(self._shape[1] if rank >= 2 else 1) 1bfedac
376 cdef size_t d = <size_t>(self._shape[2] if rank >= 3 else 1) 1bfedac
377 return w, h, d 1bfedac
379 def copy_from(self, src, *, stream) -> None:
380 """Copy a full-array's worth of data into this array.
382 Parameters
383 ----------
384 src : Buffer or buffer-protocol object
385 Source data. Must contain at least ``self.size_bytes`` bytes
386 of contiguous data.
387 stream : Stream or GraphBuilder
388 Stream to issue the copy on. A :class:`~cuda.core.graph.GraphBuilder`
389 is accepted so the copy can be captured into a graph.
390 """
391 _copy3d(self, src, Stream_accept(stream), to_array=True) 1bjfeac
393 def copy_to(self, dst, *, stream):
394 """Copy a full-array's worth of data out of this array.
396 Parameters
397 ----------
398 dst : Buffer or writable buffer-protocol object
399 Destination. Must have at least ``self.size_bytes`` bytes of
400 writable, contiguous space.
401 stream : Stream or GraphBuilder
402 Stream to issue the copy on. A :class:`~cuda.core.graph.GraphBuilder`
403 is accepted so the copy can be captured into a graph.
405 Returns
406 -------
407 The ``dst`` object, for parity with :meth:`Buffer.copy_to`.
408 """
409 _copy3d(self, dst, Stream_accept(stream), to_array=False) 1bjfedac
410 return dst 1bdac
412 @property
413 def size_bytes(self):
414 """Total bytes of array storage (``prod(shape) * element_size``)."""
415 cdef size_t n = 1 1gb
416 for s in self._shape: 1gb
417 n *= <size_t>s 1gb
418 return n * <size_t>(_FORMAT_ELEM_SIZE[self._format] * self._num_channels) 1gb
420 cpdef close(self):
421 """Release this object's reference to the underlying ``CUarray``.
423 Destruction (``cuArrayDestroy``) happens via the handle's deleter when
424 the last reference is dropped; for a non-owning handle (graphics interop
425 or a mipmap-level view) nothing is destroyed. Idempotent: a second call
426 (or destruction after ``close()``) is a no-op.
427 """
428 self._handle.reset() 1nopqrgibjfedaCBclmtuvwxyzk
430 def __enter__(self):
431 return self
433 def __exit__(self, exc_type, exc, tb):
434 self.close()
436 def __repr__(self):
437 return (
438 f"OpaqueArray(shape={self._shape}, "
439 f"format={ArrayFormat(self._format).name}, "
440 f"num_channels={self._num_channels})"
441 )
444cdef OpaqueArray _array_from_handle(OpaqueArrayHandle h, int device_id):
445 """Wrap an existing OpaqueArrayHandle as a OpaqueArray, querying the driver for the
446 array's shape/format/channels/surface-flag metadata.
448 Any owning/non-owning semantics and parent (mipmap) dependency are already
449 captured structurally inside ``h``'s C++ box.
450 """
451 if not h: 1CBc
452 HANDLE_RETURN(get_last_error())
454 cdef OpaqueArray self = OpaqueArray.__new__(OpaqueArray) 1CBc
455 self._handle = h 1CBc
456 self._device_id = device_id 1CBc
458 cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR desc
459 cdef cydriver.CUarray raw = as_cu(h) 1CBc
460 with nogil: 1CBc
461 HANDLE_RETURN(cydriver.cuArray3DGetDescriptor(&desc, raw)) 1CBc
463 if desc.Depth > 0: 1CBc
464 self._shape = (int(desc.Width), int(desc.Height), int(desc.Depth))
465 elif desc.Height > 0: 1CBc
466 self._shape = (int(desc.Width), int(desc.Height)) 1CBc
467 else:
468 self._shape = (int(desc.Width),)
469 self._format = desc.Format 1CBc
470 self._num_channels = desc.NumChannels 1CBc
471 self._surface_load_store = bool(desc.Flags & cydriver.CUDA_ARRAY3D_SURFACE_LDST) 1CBc
472 return self 1CBc