Coverage for cuda / core / _memoryview.pyx: 59.06%
662 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-25 01:07 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from __future__ import annotations
7from ._dlpack cimport *
8from libc.stdint cimport intptr_t
9from cuda.core._layout cimport _StridedLayout, get_strides_ptr
10from cuda.core._stream import Stream
12import functools
13import warnings
15import numpy
17from cuda.bindings cimport cydriver
18from cuda.core._resource_handles cimport (
19 EventHandle,
20 create_event_handle_noctx,
21 as_cu,
22)
24from cuda.core._utils.cuda_utils import handle_return, driver
25from cuda.core._utils.cuda_utils cimport HANDLE_RETURN
28from cuda.core._memory import Buffer
31try:
32 from ml_dtypes import bfloat16
33except ImportError:
34 bfloat16 = None
36# TODO(leofang): support NumPy structured dtypes
39cdef extern from "Python.h":
40 ctypedef struct PyTypeObject:
41 void* tp_dict
42 void PyType_Modified(PyTypeObject*)
45cdef DLPackExchangeAPI _SMV_DLPACK_EXCHANGE_API
46cdef bint _SMV_DLPACK_EXCHANGE_API_INITED = False
47_SMV_DLPACK_EXCHANGE_API_CAPSULE = cpython.PyCapsule_New(
48 <void*>&_SMV_DLPACK_EXCHANGE_API,
49 b"dlpack_exchange_api",
50 NULL,
51)
54cdef class StridedMemoryView:
55 """A class holding metadata of a strided dense array/tensor.
57 A :obj:`StridedMemoryView` instance can be created in three ways:
59 1. Using the :obj:`args_viewable_as_strided_memory` decorator (recommended)
60 2. Explicit construction relying on DLPack or CUDA Array Interface, see below.
61 3. From :obj:`~_memory.Buffer` and shape and size tuples (see
62 :meth:`from_buffer` classmethod)
64 ``StridedMemoryView(obj, stream_ptr)`` can be used to create a view from
65 objects supporting either DLPack (up to v1.0) or CUDA Array Interface
66 (CAI) v3. When wrapping an arbitrary object it will try the DLPack protocol
67 first, then the CAI protocol. A :obj:`BufferError` is raised if neither is
68 supported.
70 Since either way would take a consumer stream, for DLPack it is passed to
71 ``obj.__dlpack__()`` as-is (except for :obj:`None`, see below); for CAI, a
72 stream order will be established between the consumer stream and the
73 producer stream (from ``obj.__cuda_array_interface__()["stream"]``), as if
74 ``cudaStreamWaitEvent`` is called by this method.
76 To opt-out of the stream ordering operation in either DLPack or CAI,
77 please pass ``stream_ptr=-1``. Note that this deviates (on purpose)
78 from the semantics of ``obj.__dlpack__(stream=None, ...)`` since ``cuda.core``
79 does not encourage using the (legacy) default/null stream, but is
80 consistent with the CAI's semantics. For DLPack, ``stream=-1`` will be
81 internally passed to ``obj.__dlpack__()`` instead.
83 Parameters
84 ----------
85 obj : Any
86 Any objects that supports either DLPack (up to v1.0) or CUDA Array
87 Interface (v3).
88 stream_ptr: int
89 The pointer address (as Python `int`) to the **consumer** stream.
90 Stream ordering will be properly established unless ``-1`` is passed.
93 Attributes
94 -----------
95 ptr : int
96 Pointer to the tensor buffer (as a Python `int`).
97 device_id : int
98 The device ID for where the tensor is located. It is -1 for CPU tensors
99 (meaning those only accessible from the host).
100 is_device_accessible : bool
101 Whether the tensor data can be accessed on the GPU.
102 readonly: bool
103 Whether the tensor data can be modified in place.
104 exporting_obj : Any
105 A reference to the original tensor object that is being viewed.
106 If the view is created with :meth:`from_buffer`,
107 it will be the Buffer instance passed to the method.
109 """
110 def __init__(self, obj: object = None, stream_ptr: int | None = None) -> None:
111 cdef str clsname = self.__class__.__name__ 1nopqr
112 if obj is not None: 1nopqr
113 # populate self's attributes
114 if check_has_dlpack(obj): 1nopqr
115 warnings.warn( 1bnopqr
116 f"Constructing a {clsname} directly from a DLPack-supporting object is deprecated; " 1nopqr
117 "Use `StridedMemoryView.from_dlpack` or `StridedMemoryView.from_any_interface` instead.",
118 DeprecationWarning, 1nopqr
119 stacklevel=2,
120 )
121 view_as_dlpack(obj, stream_ptr, self) 1nopqr
122 else:
123 warnings.warn(
124 f"Constructing a {clsname} directly from a CUDA-array-interface-supporting object is deprecated; "
125 "Use `StridedMemoryView.from_cuda_array_interface` or `StridedMemoryView.from_any_interface` instead.",
126 DeprecationWarning,
127 stacklevel=2,
128 )
129 view_as_cai(obj, stream_ptr, self)
130 else:
131 warnings.warn(
132 f"Constructing an empty {clsname} is deprecated; "
133 "use one of the classmethods `from_dlpack`, `from_cuda_array_interface` or `from_any_interface` "
134 "to construct a StridedMemoryView from an object",
135 DeprecationWarning,
136 stacklevel=2,
137 )
139 @classmethod
140 def from_dlpack(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
141 """Create a view from an object supporting the `DLPack <https://dmlc.github.io/dlpack/latest/>`_ protocol.
143 Parameters
144 ----------
145 obj : object
146 An object implementing the `DLPack <https://dmlc.github.io/dlpack/latest/>`_ protocol
147 (via ``__dlpack__``).
148 stream_ptr : int, optional
149 Stream pointer for synchronization. If ``None``, no synchronization is performed.
150 """
151 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 1zAyijklmtuvwxacdefgh
152 view_as_dlpack(obj, stream_ptr, buf) 1zAyijklmtuvwxacdefgh
153 return buf 1zAyijklmtuvwxacdefgh
155 @classmethod
156 def from_cuda_array_interface(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
157 """Create a view from an object supporting the `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_ protocol.
159 Parameters
160 ----------
161 obj : object
162 An object implementing the `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_ protocol.
163 stream_ptr : int, optional
164 Stream pointer for synchronization. If ``None``, no synchronization is performed.
165 """
166 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 11~2
167 view_as_cai(obj, stream_ptr, buf) 11~2
168 return buf 112
170 @classmethod
171 def from_array_interface(cls, obj: object) -> StridedMemoryView:
172 """Create a view from an object supporting the `__array_interface__ <https://numpy.org/doc/stable/reference/arrays.interface.html>`_ protocol.
174 Parameters
175 ----------
176 obj : object
177 An object implementing the `__array_interface__ <https://numpy.org/doc/stable/reference/arrays.interface.html>`_ protocol (e.g., a numpy array).
178 """
179 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 2b B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
180 view_as_array_interface(obj, buf) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
181 return buf 1BCODPEQFGHIRSTJUVWXKYZ0L
183 @classmethod
184 def from_any_interface(cls, obj: object, stream_ptr: int | None = None) -> StridedMemoryView:
185 """Create a view by automatically selecting the best available protocol.
187 Tries `DLPack <https://dmlc.github.io/dlpack/latest/>`_ first, then falls back to
188 `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_.
190 Parameters
191 ----------
192 obj : object
193 An object implementing `DLPack <https://dmlc.github.io/dlpack/latest/>`_ or
194 `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_.
195 stream_ptr : int, optional
196 Stream pointer for synchronization. If ``None``, no synchronization is performed.
197 """
198 if check_has_dlpack(obj): 1zAytuvwxa
199 return cls.from_dlpack(obj, stream_ptr) 1zAytuvwxa
200 return cls.from_cuda_array_interface(obj, stream_ptr)
202 @classmethod
203 def from_buffer(
204 cls,
205 buffer : Buffer,
206 shape : tuple[int, ...],
207 strides : tuple[int, ...] | None = None,
208 *,
209 itemsize : int | None = None,
210 dtype : numpy.dtype | None = None,
211 is_readonly : bool = False
212 ) -> StridedMemoryView:
213 """
214 Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and shape and strides tuples.
215 The Buffer can be either allocation coming from a :obj:`MemoryResource` or an external allocation
216 wrapped in a :obj:`~_memory.Buffer` object with ``Buffer.from_handle(ptr, size, owner=...)``.
218 .. caution::
219 When creating a :obj:`StridedMemoryView` from a :obj:`~_memory.Buffer`,
220 no synchronization is performed. It is the user's responsibility to ensure
221 the data in ``buffer`` is properly synchronized when consuming the view.
223 Parameters
224 ----------
225 buffer : :obj:`~_memory.Buffer`
226 The buffer to create the view from.
227 shape : :obj:`tuple`
228 The layout describing the shape, strides and itemsize of the elements in
229 the buffer.
230 strides : :obj:`tuple`
231 The layout describing the shape, strides and itemsize of the elements in
232 the buffer.
233 dtype : :obj:`numpy.dtype`
234 Optional dtype.
235 If specified, the dtype's itemsize must match the layout's itemsize.
236 is_readonly : bool, optional
237 Whether the mark the view as readonly.
238 """
239 cdef StridedMemoryView view = StridedMemoryView.__new__(cls) 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} s
240 if itemsize is None and dtype is None: 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} s
241 raise ValueError("Either itemsize or dtype must be specified") 2b gb
242 if itemsize is not None and dtype is not None and itemsize != dtype.itemsize: 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbdbM N bb} s
243 raise ValueError( 2db
244 f"itemsize ({itemsize}) does not match dtype.itemsize ({dtype.itemsize})" 2db
245 )
246 # (itemsize is None XOR dtype is None) OR they are equal
247 view_buffer_strided( 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s
248 view,
249 buffer,
250 _StridedLayout(shape=shape, strides=strides, itemsize=getattr(dtype, "itemsize", itemsize)), 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s
251 dtype,
252 is_readonly,
253 )
254 return view 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}s
256 def __dealloc__(self):
257 if self.dl_tensor == NULL: 2ebfbz A y i j k l m t u v w x n o p q r B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} 1 ~ 2 a s c d e f g h
258 return 2b ebfbB C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} 1 ~ 2 s c d e f g h
260 if cpython.PyCapsule_IsValid( 1zAyijklmtuvwxnopqracdefgh
261 self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME): 1zAyijklmtuvwxnopqracdefgh
262 data = cpython.PyCapsule_GetPointer( 1zAyijklmtuvwxnopqracdefgh
263 self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME) 1bzAyijklmtuvwxnopqracdefgh
264 dlm_tensor_ver = <DLManagedTensorVersioned*>data 1zAyijklmtuvwxnopqracdefgh
265 dlm_tensor_ver.deleter(dlm_tensor_ver) 1zAyijklmtuvwxnopqracdefgh
266 elif cpython.PyCapsule_IsValid(
267 self.metadata, DLPACK_TENSOR_USED_NAME):
268 data = cpython.PyCapsule_GetPointer(
269 self.metadata, DLPACK_TENSOR_USED_NAME)
270 dlm_tensor = <DLManagedTensor*>data
271 dlm_tensor.deleter(dlm_tensor)
273 def view(
274 self, layout : _StridedLayout | None = None, dtype : numpy.dtype | None = None
275 ) -> StridedMemoryView:
276 """
277 Creates a new view with adjusted layout and dtype.
278 Same as calling :meth:`from_buffer` with the current buffer.
279 """
280 cdef StridedMemoryView view = StridedMemoryView.__new__(self.__class__) 1MNcdefgh
281 if layout is None and dtype is None: 1MNcdefgh
282 return self
283 if layout is None: 1MNcdefgh
284 layout = self.get_layout()
285 if dtype is None: 1MNcdefgh
286 dtype = self.get_dtype() 1MNcdefgh
287 view_buffer_strided(view, self.get_buffer(), layout, dtype, self.readonly) 1MNcdefgh
288 return view 1MNcdefgh
290 def as_tensor_map(
291 self,
292 box_dim=None,
293 *,
294 options=None,
295 element_strides=None,
296 data_type=None,
297 interleave=None,
298 swizzle=None,
299 l2_promotion=None,
300 oob_fill=None,
301 ):
302 """Create a tiled :obj:`TensorMapDescriptor` from this view.
304 This is the public entry point for creating tiled tensor map
305 descriptors in ``cuda.core``. Pass either ``box_dim`` and the
306 individual keyword arguments directly, or provide bundled tiled
307 options via ``options=``.
308 """
309 from cuda.core._tensor_map import TensorMapDescriptor
311 kwargs = {}
312 if options is not None:
313 kwargs["options"] = options
314 if element_strides is not None:
315 kwargs["element_strides"] = element_strides
316 if data_type is not None:
317 kwargs["data_type"] = data_type
318 if interleave is not None:
319 kwargs["interleave"] = interleave
320 if swizzle is not None:
321 kwargs["swizzle"] = swizzle
322 if l2_promotion is not None:
323 kwargs["l2_promotion"] = l2_promotion
324 if oob_fill is not None:
325 kwargs["oob_fill"] = oob_fill
326 return TensorMapDescriptor._from_tiled(self, box_dim, **kwargs)
328 def copy_from(
329 self, other : StridedMemoryView, stream : Stream,
330 allocator = None,
331 blocking : bool | None = None,
332 ):
333 """
334 Copies the data from the other view into this view.
336 The copy can be performed between following memory spaces:
337 host-to-device, device-to-host, device-to-device (on the same device).
339 Parameters
340 ----------
341 other : StridedMemoryView
342 The view to copy data from.
343 stream : Stream | None, optional
344 The stream to schedule the copy on.
345 allocator : MemoryResource | None, optional
346 If temporary buffers are needed, the specified memory resources
347 will be used to allocate the memory. If not specified, default
348 resources will be used.
349 blocking : bool | None, optional
350 Whether the call should block until the copy is complete.
351 * ``True``: the ``stream`` is synchronized with the host at the end of the call,
352 blocking until the copy is complete.
353 * ``False``: if possible, the call returns immediately once the copy is scheduled.
354 However, in some cases of host-to-device or device-to-host copies, the call may
355 still synchronize with the host if necessary.
356 * ``None`` (default):
357 * for device-to-device, it defaults to ``False`` (non-blocking),
358 * for host-to-device or device-to-host, it defaults to ``True`` (blocking).
359 """
360 raise NotImplementedError("Sorry, not supported: copy_from")
362 def copy_to(
363 self, other : StridedMemoryView, stream : Stream | None = None,
364 allocator = None,
365 blocking : bool | None = None,
366 ):
367 """
368 Copies the data from this view into the ``other`` view.
370 For details, see :meth:`copy_from`.
371 """
372 raise NotImplementedError("Sorry, not supported: copy_to")
374 def __dlpack__(
375 self,
376 *,
377 stream: int | None = None,
378 max_version: tuple[int, int] | None = None,
379 dl_device: tuple[int, int] | None = None,
380 copy: bool | None = None,
381 ):
382 # Similar to Buffer.__dlpack__: no implicit synchronization is performed.
383 if dl_device is not None: 1zas
384 raise BufferError("Sorry, not supported: dl_device other than None") 1z
385 if copy is True: 1zas
386 raise BufferError("Sorry, not supported: copy=True") 1z
388 cdef bint versioned
389 if max_version is None: 1zas
390 versioned = False 1s
391 else:
392 if not isinstance(max_version, tuple) or len(max_version) != 2: 1za
393 raise BufferError(f"Expected max_version tuple[int, int], got {max_version}") 1bz
394 versioned = max_version >= (1, 0) 1a
396 # NOTE: stream is accepted for protocol compatibility but not used.
397 cdef object capsule = _smv_make_py_capsule(self, versioned) 1as
398 return capsule 1a
400 def __dlpack_device__(self) -> tuple[int, int]:
401 cdef _DLDeviceType device_type
402 cdef int32_t device_id
403 _smv_get_dl_device(self, &device_type, &device_id) 1a
404 return (<int>device_type, int(device_id)) 1a
406 @property
407 def _layout(self) -> _StridedLayout:
408 """
409 The layout of the tensor. For StridedMemoryView created from DLPack or CAI,
410 the layout is inferred from the tensor object's metadata.
411 """
412 return self.get_layout() 13456789!#$%'()*+,-./:;=?@[]^_`{|MNcdefgh
414 @property
415 def size(self) -> int:
416 return self.get_layout().get_volume() 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L1
418 @property
419 def shape(self) -> tuple[int, ...]:
420 """
421 Shape of the tensor.
422 """
423 return self.get_layout().get_shape_tuple() 1byijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN12cdefgh
425 @property
426 def strides(self) -> tuple[int, ...] | None:
427 """
428 Strides of the tensor (in **counts**, not bytes).
429 """
430 return self.get_layout().get_strides_tuple() 1yijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|12
432 @property
433 def dtype(self) -> numpy.dtype | None:
434 """
435 Data type of the tensor.
437 Supports standard NumPy dtypes as well as narrow data types (e.g., ``bfloat16``)
438 when the optional `ml_dtypes <https://github.com/jax-ml/ml_dtypes>`_ package is
439 installed. If ``ml_dtypes`` is not available and such a tensor is encountered,
440 a :obj:`NotImplementedError` will be raised.
441 """
442 return self.get_dtype() 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|}cdefgh
444 def __repr__(self):
445 return (f"StridedMemoryView(ptr={self.ptr},\n"
446 + f" shape={self.shape},\n"
447 + f" strides={self.strides},\n"
448 + f" itemsize={self._layout.itemsize},\n"
449 + f" dtype={get_simple_repr(self.dtype)},\n"
450 + f" device_id={self.device_id},\n"
451 + f" is_device_accessible={self.is_device_accessible},\n"
452 + f" readonly={self.readonly},\n"
453 + f" exporting_obj={get_simple_repr(self.exporting_obj)})")
455 cdef inline _StridedLayout get_layout(self):
456 if self._layout is None: 2y i j k l m t u v w x n o p q r B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | M N 1 ~ 2 a s c d e f g h
457 if self.dl_tensor: 2y i j k l m t u v w x n o p q r B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2 a c d e f g h
458 self._layout = layout_from_dlpack(self.dl_tensor) 1yijklmtuvwxnopqracdefgh
459 elif self.metadata is not None: 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
460 self._layout = layout_from_cai(self.metadata) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
461 else:
462 raise ValueError("Cannot infer layout from the exporting object")
463 return self._layout 1yijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN12ascdefgh
465 cdef inline object get_buffer(self):
466 """
467 Returns Buffer instance with the underlying data.
468 If the SMV was created from a Buffer, it will return the same Buffer instance.
469 Otherwise, it will create a new instance with owner set to the exporting object.
470 """
471 if self._buffer is None: 1MNcdefgh
472 if isinstance(self.exporting_obj, Buffer): 1cdefgh
473 self._buffer = self.exporting_obj
474 else:
475 self._buffer = Buffer.from_handle(self.ptr, 0, owner=self.exporting_obj) 1cdefgh
476 return self._buffer 1MNcdefgh
478 cdef inline object get_dtype(self):
479 if self._dtype is None: 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN}ascdefgh
480 if self.dl_tensor != NULL: 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0Lascdefgh
481 self._dtype = dtype_dlpack_to_numpy(&self.dl_tensor.dtype) 1bijklmtuvwxnopqracdefgh
482 elif self.metadata is not None: 1BCODPEQFGHIRSTJUVWXKYZ0Ls
483 self._dtype = _typestr2dtype(self.metadata["typestr"]) 1BCODPEQFGHIRSTJUVWXKYZ0L
484 return self._dtype 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN}ascdefgh
487cdef void _smv_pycapsule_deleter(object capsule) noexcept:
488 cdef DLManagedTensor* dlm_tensor
489 cdef DLManagedTensorVersioned* dlm_tensor_ver
490 # Do not invoke the deleter on a used capsule.
491 if cpython.PyCapsule_IsValid(capsule, DLPACK_TENSOR_UNUSED_NAME): 1a
492 dlm_tensor = <DLManagedTensor*>(
493 cpython.PyCapsule_GetPointer(capsule, DLPACK_TENSOR_UNUSED_NAME)
494 )
495 if dlm_tensor.deleter:
496 dlm_tensor.deleter(dlm_tensor)
497 elif cpython.PyCapsule_IsValid(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 1ba
498 dlm_tensor_ver = <DLManagedTensorVersioned*>(
499 cpython.PyCapsule_GetPointer(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
500 )
501 if dlm_tensor_ver.deleter:
502 dlm_tensor_ver.deleter(dlm_tensor_ver)
505cdef inline void _smv_release_export_resources(void* manager_ctx, int64_t* shape_ptr) noexcept with gil:
506 if shape_ptr: 1as
507 stdlib.free(shape_ptr) 1a
508 if manager_ctx: 1as
509 cpython.Py_DECREF(<object>manager_ctx) 1as
512cdef void _smv_deleter(DLManagedTensor* tensor) noexcept with gil:
513 if tensor: 1s
514 _smv_release_export_resources(tensor.manager_ctx, tensor.dl_tensor.shape) 1s
515 tensor.manager_ctx = NULL 1s
516 stdlib.free(tensor) 1s
519cdef void _smv_versioned_deleter(DLManagedTensorVersioned* tensor) noexcept with gil:
520 if tensor: 1as
521 _smv_release_export_resources(tensor.manager_ctx, tensor.dl_tensor.shape) 1a
522 tensor.manager_ctx = NULL 1a
523 stdlib.free(tensor) 1ba
526cdef inline DLManagedTensorVersioned* _smv_allocate_dlm_tensor_versioned() except? NULL:
527 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1a
528 dlm_tensor_ver = <DLManagedTensorVersioned*>stdlib.malloc(sizeof(DLManagedTensorVersioned)) 1a
529 if dlm_tensor_ver == NULL: 1a
530 raise MemoryError()
531 dlm_tensor_ver.dl_tensor.shape = NULL 1a
532 dlm_tensor_ver.manager_ctx = NULL 1a
533 return dlm_tensor_ver 1a
536cdef inline DLManagedTensor* _smv_allocate_dlm_tensor() except? NULL:
537 cdef DLManagedTensor* dlm_tensor = NULL 1s
538 dlm_tensor = <DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor)) 1s
539 if dlm_tensor == NULL: 1s
540 raise MemoryError()
541 dlm_tensor.dl_tensor.shape = NULL 1bs
542 dlm_tensor.manager_ctx = NULL 1s
543 return dlm_tensor 1s
546cdef inline int _smv_dtype_numpy_to_dlpack(object dtype_obj, DLDataType* out_dtype) except -1:
547 cdef object np_dtype = numpy.dtype(dtype_obj) 1a
548 if np_dtype.fields is not None: 1ba
549 raise BufferError("Structured dtypes are not supported for DLPack export")
550 if not np_dtype.isnative and np_dtype.byteorder not in ("=", "|"): 1a
551 raise BufferError("Non-native-endian dtypes are not supported for DLPack export")
553 cdef str kind = np_dtype.kind 1a
554 cdef int bits = np_dtype.itemsize * 8 1a
555 cdef uint8_t code
556 if kind == "b": 1a
557 if bits != 8:
558 raise BufferError(f"Unsupported bool dtype itemsize: {np_dtype.itemsize}")
559 code = <uint8_t>kDLBool
560 elif kind == "i": 1a
561 if bits not in (8, 16, 32, 64): 1a
562 raise BufferError(f"Unsupported signed integer dtype: {np_dtype}")
563 code = <uint8_t>kDLInt 1a
564 elif kind == "u":
565 if bits not in (8, 16, 32, 64):
566 raise BufferError(f"Unsupported unsigned integer dtype: {np_dtype}")
567 code = <uint8_t>kDLUInt
568 elif kind == "f":
569 if bits not in (16, 32, 64):
570 raise BufferError(f"Unsupported floating dtype: {np_dtype}")
571 code = <uint8_t>kDLFloat
572 elif kind == "c":
573 if bits not in (64, 128):
574 raise BufferError(f"Unsupported complex dtype: {np_dtype}")
575 code = <uint8_t>kDLComplex
576 else:
577 raise BufferError(f"Unsupported dtype for DLPack export: {np_dtype}")
579 out_dtype.code = code 1a
580 out_dtype.bits = <uint8_t>bits 1a
581 out_dtype.lanes = <uint16_t>1 1a
582 return 0 1a
585cdef inline int _smv_get_dl_device(
586 StridedMemoryView view,
587 _DLDeviceType* out_device_type,
588 int32_t* out_device_id,
589) except -1:
590 cdef _DLDeviceType device_type
591 cdef int32_t device_id
592 cdef object buf
593 cdef bint d
594 cdef bint h
595 if view.dl_tensor != NULL: 1ba
596 device_type = view.dl_tensor.device.device_type 1a
597 if device_type == _kDLCUDA: 1a
598 device_id = view.dl_tensor.device.device_id
599 else:
600 # CPU, CUDAHost, and CUDAManaged use device_id=0 in DLPack.
601 device_id = 0 1a
602 elif view.is_device_accessible:
603 buf = view.get_buffer()
604 d = buf.is_device_accessible
605 h = buf.is_host_accessible
606 if d and (not h):
607 device_type = _kDLCUDA
608 device_id = buf.device_id
609 elif d and h:
610 # We do not currently differentiate pinned vs managed here.
611 device_type = _kDLCUDAHost
612 device_id = 0
613 elif (not d) and h:
614 device_type = _kDLCPU
615 device_id = 0
616 else:
617 raise BufferError("buffer is neither device-accessible nor host-accessible")
618 else:
619 device_type = _kDLCPU
620 device_id = 0
622 out_device_type[0] = device_type 1a
623 out_device_id[0] = device_id 1a
624 return 0 1a
627cdef inline int _smv_setup_dl_tensor_common(
628 DLTensor* dl_tensor,
629 StridedMemoryView view,
630 _StridedLayout layout,
631) except -1:
632 cdef object dtype_obj = view.get_dtype() 1bas
633 if dtype_obj is None: 1as
634 raise BufferError( 1s
635 "Cannot export StridedMemoryView via DLPack without dtype information; "
636 "create the view with dtype specified."
637 )
638 _smv_dtype_numpy_to_dlpack(dtype_obj, &dl_tensor.dtype) 1a
639 _smv_get_dl_device(view, &dl_tensor.device.device_type, &dl_tensor.device.device_id) 1a
641 cdef int ndim = layout.base.ndim 1a
642 dl_tensor.ndim = ndim 1a
643 if layout.get_volume() == 0: 1a
644 dl_tensor.data = NULL
645 else:
646 dl_tensor.data = <void*><intptr_t>view.ptr 1a
647 dl_tensor.byte_offset = 0 1a
648 return 0 1ba
651cdef inline int _smv_setup_dl_tensor(DLTensor* dl_tensor, StridedMemoryView view) except -1:
652 cdef _StridedLayout layout = view.get_layout() 1as
653 _smv_setup_dl_tensor_common(dl_tensor, view, layout) 1as
655 cdef int i
656 cdef int64_t* shape_strides = NULL 1a
657 cdef int64_t* strides_src = NULL 1a
658 cdef int ndim = dl_tensor.ndim 1a
659 if ndim == 0: 1a
660 dl_tensor.shape = NULL
661 dl_tensor.strides = NULL
662 else:
663 # DLPack v1.2+ requires non-NULL strides for ndim != 0.
664 shape_strides = <int64_t*>stdlib.malloc(sizeof(int64_t) * 2 * ndim) 1a
665 if shape_strides == NULL: 1a
666 raise MemoryError()
667 try: 1a
668 strides_src = get_strides_ptr(layout.base) 1ba
669 for i in range(ndim): 1a
670 shape_strides[i] = layout.base.shape[i] 1a
671 shape_strides[i + ndim] = strides_src[i] 1a
672 except Exception:
673 stdlib.free(shape_strides)
674 raise
675 dl_tensor.shape = shape_strides 1a
676 dl_tensor.strides = shape_strides + ndim 1a
677 return 0 1a
680cdef inline int _smv_setup_dltensor_borrowed(DLTensor* dl_tensor, StridedMemoryView view) except -1:
681 cdef _StridedLayout layout = view.get_layout()
682 _smv_setup_dl_tensor_common(dl_tensor, view, layout)
684 if dl_tensor.ndim == 0:
685 dl_tensor.shape = NULL
686 dl_tensor.strides = NULL
687 else:
688 dl_tensor.shape = layout.base.shape
689 # For temporary/non-owning exchange we provide explicit strides.
690 dl_tensor.strides = get_strides_ptr(layout.base)
691 return 0
694cdef inline int _smv_fill_managed_tensor_versioned(
695 DLManagedTensorVersioned* dlm_tensor_ver,
696 StridedMemoryView view,
697) except -1:
698 cpython.Py_INCREF(view) 1a
699 dlm_tensor_ver.manager_ctx = <void*>view 1a
700 dlm_tensor_ver.deleter = _smv_versioned_deleter 1a
701 dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION 1a
702 dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION 1a
703 dlm_tensor_ver.flags = DLPACK_FLAG_BITMASK_READ_ONLY if view.readonly else 0 1a
704 _smv_setup_dl_tensor(&dlm_tensor_ver.dl_tensor, view) 1a
705 return 0 1a
708cdef inline int _smv_fill_managed_tensor(
709 DLManagedTensor* dlm_tensor,
710 StridedMemoryView view,
711) except -1:
712 cpython.Py_INCREF(view) 1s
713 dlm_tensor.manager_ctx = <void*>view 1s
714 dlm_tensor.deleter = _smv_deleter 1s
715 _smv_setup_dl_tensor(&dlm_tensor.dl_tensor, view) 1s
716 return 0
719cdef object _smv_make_py_capsule(StridedMemoryView view, bint versioned):
720 cdef DLManagedTensor* dlm_tensor = NULL 1as
721 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1as
722 cdef object capsule = None 1as
723 cdef void* tensor_ptr = NULL 1as
724 cdef const char* capsule_name
725 try: 1as
726 if versioned: 1as
727 dlm_tensor_ver = _smv_allocate_dlm_tensor_versioned() 1a
728 _smv_fill_managed_tensor_versioned(dlm_tensor_ver, view) 1a
729 tensor_ptr = <void*>dlm_tensor_ver 1a
730 capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME 1a
731 else:
732 dlm_tensor = _smv_allocate_dlm_tensor() 1s
733 _smv_fill_managed_tensor(dlm_tensor, view) 1s
734 tensor_ptr = <void*>dlm_tensor
735 capsule_name = DLPACK_TENSOR_UNUSED_NAME
736 capsule = cpython.PyCapsule_New(tensor_ptr, capsule_name, _smv_pycapsule_deleter) 1a
737 except Exception: 1s
738 if capsule is None: 1s
739 _smv_deleter(dlm_tensor) 1s
740 _smv_versioned_deleter(dlm_tensor_ver) 1s
741 raise 1s
742 return capsule 1a
745cdef inline StridedMemoryView _smv_from_dlpack_capsule(object capsule, object exporting_obj):
746 cdef void* data = NULL
747 cdef DLTensor* dl_tensor = NULL
748 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL
749 cdef DLManagedTensor* dlm_tensor = NULL
750 cdef bint is_readonly = False
751 cdef const char* used_name = NULL
752 if cpython.PyCapsule_IsValid(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
753 data = cpython.PyCapsule_GetPointer(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
754 dlm_tensor_ver = <DLManagedTensorVersioned*>data
755 dl_tensor = &dlm_tensor_ver.dl_tensor
756 is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
757 used_name = DLPACK_VERSIONED_TENSOR_USED_NAME
758 elif cpython.PyCapsule_IsValid(capsule, DLPACK_TENSOR_UNUSED_NAME):
759 data = cpython.PyCapsule_GetPointer(capsule, DLPACK_TENSOR_UNUSED_NAME)
760 dlm_tensor = <DLManagedTensor*>data
761 dl_tensor = &dlm_tensor.dl_tensor
762 is_readonly = False
763 used_name = DLPACK_TENSOR_USED_NAME
764 else:
765 raise BufferError("Invalid DLPack capsule")
767 cpython.PyCapsule_SetName(capsule, used_name)
769 cdef StridedMemoryView view = StridedMemoryView.__new__(StridedMemoryView)
770 view.dl_tensor = dl_tensor
771 view.metadata = capsule
772 view.ptr = <intptr_t>(dl_tensor.data) + <intptr_t>(dl_tensor.byte_offset)
773 view.readonly = is_readonly
774 view.exporting_obj = exporting_obj
775 if dl_tensor.device.device_type == _kDLCPU:
776 view.device_id = -1
777 view.is_device_accessible = False
778 elif dl_tensor.device.device_type in (_kDLCUDA, _kDLCUDAHost, _kDLCUDAManaged):
779 view.device_id = dl_tensor.device.device_id
780 view.is_device_accessible = True
781 else:
782 raise BufferError("device not supported")
783 return view
786cdef int _smv_managed_tensor_allocator(
787 DLTensor* prototype,
788 DLManagedTensorVersioned** out,
789 void* error_ctx,
790 void (*SetError)(void* error_ctx, const char* kind, const char* message) noexcept,
791) noexcept with gil:
792 if out != NULL:
793 out[0] = NULL
794 if SetError != NULL:
795 SetError(error_ctx, b"NotImplementedError", b"managed_tensor_allocator is not supported by StridedMemoryView")
796 cpython.PyErr_SetString(NotImplementedError, b"managed_tensor_allocator is not supported by StridedMemoryView")
797 return -1
800cdef int _smv_managed_tensor_from_py_object_no_sync(
801 void* py_object,
802 DLManagedTensorVersioned** out,
803) noexcept with gil:
804 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL
805 if out == NULL:
806 cpython.PyErr_SetString(RuntimeError, b"out cannot be NULL")
807 return -1
808 out[0] = NULL
809 cdef object obj = <object>py_object
810 if not isinstance(obj, StridedMemoryView):
811 cpython.PyErr_SetString(TypeError, b"py_object must be a StridedMemoryView")
812 return -1
813 try:
814 dlm_tensor_ver = _smv_allocate_dlm_tensor_versioned()
815 _smv_fill_managed_tensor_versioned(dlm_tensor_ver, <StridedMemoryView>obj)
816 except Exception:
817 _smv_versioned_deleter(dlm_tensor_ver)
818 return -1
819 out[0] = dlm_tensor_ver
820 return 0
823cdef int _smv_managed_tensor_to_py_object_no_sync(
824 DLManagedTensorVersioned* tensor,
825 void** out_py_object,
826) noexcept with gil:
827 cdef object capsule
828 cdef object py_view
829 if out_py_object == NULL:
830 cpython.PyErr_SetString(RuntimeError, b"out_py_object cannot be NULL")
831 return -1
832 out_py_object[0] = NULL
833 if tensor == NULL:
834 cpython.PyErr_SetString(RuntimeError, b"tensor cannot be NULL")
835 return -1
836 try:
837 capsule = cpython.PyCapsule_New(
838 <void*>tensor,
839 DLPACK_VERSIONED_TENSOR_UNUSED_NAME,
840 _smv_pycapsule_deleter,
841 )
842 py_view = _smv_from_dlpack_capsule(capsule, capsule)
843 cpython.Py_INCREF(py_view)
844 out_py_object[0] = <void*>py_view
845 except Exception:
846 return -1
847 return 0
850cdef int _smv_dltensor_from_py_object_no_sync(
851 void* py_object,
852 DLTensor* out,
853) noexcept with gil:
854 if out == NULL:
855 cpython.PyErr_SetString(RuntimeError, b"out cannot be NULL")
856 return -1
857 cdef object obj = <object>py_object
858 if not isinstance(obj, StridedMemoryView):
859 cpython.PyErr_SetString(TypeError, b"py_object must be a StridedMemoryView")
860 return -1
861 try:
862 _smv_setup_dltensor_borrowed(out, <StridedMemoryView>obj)
863 except Exception:
864 return -1
865 return 0
868cdef int _smv_current_work_stream(
869 _DLDeviceType device_type,
870 int32_t device_id,
871 void** out_current_stream,
872) noexcept with gil:
873 if out_current_stream == NULL:
874 cpython.PyErr_SetString(RuntimeError, b"out_current_stream cannot be NULL")
875 return -1
876 # cuda.core has no global/current stream state today.
877 out_current_stream[0] = NULL
878 return 0
881cdef void _init_smv_dlpack_exchange_api():
882 global _SMV_DLPACK_EXCHANGE_API_INITED
883 if _SMV_DLPACK_EXCHANGE_API_INITED:
884 return
885 _SMV_DLPACK_EXCHANGE_API.header.version.major = DLPACK_MAJOR_VERSION
886 _SMV_DLPACK_EXCHANGE_API.header.version.minor = DLPACK_MINOR_VERSION
887 _SMV_DLPACK_EXCHANGE_API.header.prev_api = NULL
888 _SMV_DLPACK_EXCHANGE_API.managed_tensor_allocator = _smv_managed_tensor_allocator
889 _SMV_DLPACK_EXCHANGE_API.managed_tensor_from_py_object_no_sync = _smv_managed_tensor_from_py_object_no_sync
890 _SMV_DLPACK_EXCHANGE_API.managed_tensor_to_py_object_no_sync = _smv_managed_tensor_to_py_object_no_sync
891 _SMV_DLPACK_EXCHANGE_API.dltensor_from_py_object_no_sync = _smv_dltensor_from_py_object_no_sync
892 _SMV_DLPACK_EXCHANGE_API.current_work_stream = _smv_current_work_stream
893 _SMV_DLPACK_EXCHANGE_API_INITED = True
896_init_smv_dlpack_exchange_api()
897# cdef classes are immutable types in Cython 3, so inject these attributes
898# directly into the type dict.
899(<dict>(<PyTypeObject*>StridedMemoryView).tp_dict)["__dlpack_c_exchange_api__"] = _SMV_DLPACK_EXCHANGE_API_CAPSULE
900(<dict>(<PyTypeObject*>StridedMemoryView).tp_dict)["__c_dlpack_exchange_api__"] = _SMV_DLPACK_EXCHANGE_API_CAPSULE
901PyType_Modified(<PyTypeObject*>StridedMemoryView)
904cdef str get_simple_repr(obj):
905 # TODO: better handling in np.dtype objects
906 cdef object obj_class
907 cdef str obj_repr
908 if isinstance(obj, type):
909 obj_class = obj
910 else:
911 obj_class = obj.__class__
912 if obj_class.__module__ in (None, "builtins"):
913 obj_repr = obj_class.__name__
914 else:
915 obj_repr = f"{obj_class.__module__}.{obj_class.__name__}"
916 return obj_repr
920cdef bint check_has_dlpack(obj) except*:
921 cdef bint has_dlpack
922 if hasattr(obj, "__dlpack__") and hasattr(obj, "__dlpack_device__"): 1zAyijklmtuvwxnopqra
923 has_dlpack = True 1zAyijklmtuvwxnopqra
924 elif hasattr(obj, "__cuda_array_interface__"):
925 has_dlpack = False
926 else:
927 raise RuntimeError(
928 "the input object does not support any data exchange protocol")
929 return has_dlpack 1zAyijklmtuvwxnopqra
932cdef class _StridedMemoryViewProxy:
933 cdef readonly:
934 object obj
935 bint has_dlpack
937 def __init__(self, obj):
938 self.obj = obj 1ijklm
939 self.has_dlpack = check_has_dlpack(obj) 1ijklm
941 cpdef StridedMemoryView view(self, stream_ptr=None):
942 if self.has_dlpack: 1ijklm
943 return StridedMemoryView.from_dlpack(self.obj, stream_ptr) 1ijklm
944 else:
945 return StridedMemoryView.from_cuda_array_interface(self.obj, stream_ptr)
948cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
949 cdef int dldevice, device_id
950 cdef bint is_device_accessible, is_readonly
951 is_device_accessible = False 1zAyijklmtuvwxnopqracdefgh
952 dldevice, device_id = obj.__dlpack_device__() 1zAyijklmtuvwxnopqracdefgh
953 if dldevice == _kDLCPU: 1zAyijklmtuvwxnopqracdefgh
954 assert device_id == 0 1zAyijklmtuvwxnopqracdefgh
955 device_id = -1 1zAyijklmtuvwxnopqracdefgh
956 if stream_ptr is None: 1zAyijklmtuvwxnopqracdefgh
957 raise BufferError("stream=None is ambiguous with view()")
958 elif stream_ptr == -1: 1zAyijklmtuvwxnopqracdefgh
959 stream_ptr = None 1zAyijklmtuvwxnopqracdefgh
960 elif dldevice == _kDLCUDA:
961 assert device_id >= 0
962 is_device_accessible = True
963 # no need to check other stream values, it's a pass-through
964 if stream_ptr is None:
965 raise BufferError("stream=None is ambiguous with view()")
966 elif dldevice in (_kDLCUDAHost, _kDLCUDAManaged):
967 is_device_accessible = True
968 # just do a pass-through without any checks, as pinned/managed memory can be
969 # accessed on both host and device
970 else:
971 raise BufferError("device not supported")
973 cdef object capsule
974 try: 1zAyijklmtuvwxnopqracdefgh
975 capsule = obj.__dlpack__( 1zAyijklmtuvwxnopqracdefgh
976 stream=int(stream_ptr) if stream_ptr else None, 1zAyijklmtuvwxnopqracdefgh
977 max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION)) 1zAyijklmtuvwxnopqracdefgh
978 except TypeError:
979 capsule = obj.__dlpack__(
980 stream=int(stream_ptr) if stream_ptr else None)
982 cdef void* data = NULL 1zAyijklmtuvwxnopqracdefgh
983 cdef DLTensor* dl_tensor
984 cdef DLManagedTensorVersioned* dlm_tensor_ver
985 cdef DLManagedTensor* dlm_tensor
986 cdef const char *used_name
987 if cpython.PyCapsule_IsValid( 1zAyijklmtuvwxnopqracdefgh
988 capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
989 data = cpython.PyCapsule_GetPointer( 1zAyijklmtuvwxnopqracdefgh
990 capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
991 dlm_tensor_ver = <DLManagedTensorVersioned*>data 1zAyijklmtuvwxnopqracdefgh
992 dl_tensor = &dlm_tensor_ver.dl_tensor 1zAyijklmtuvwxnopqracdefgh
993 is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) 1zAyijklmtuvwxnopqracdefgh
994 used_name = DLPACK_VERSIONED_TENSOR_USED_NAME 1zAyijklmtuvwxnopqracdefgh
995 elif cpython.PyCapsule_IsValid(
996 capsule, DLPACK_TENSOR_UNUSED_NAME):
997 data = cpython.PyCapsule_GetPointer(
998 capsule, DLPACK_TENSOR_UNUSED_NAME)
999 dlm_tensor = <DLManagedTensor*>data
1000 dl_tensor = &dlm_tensor.dl_tensor
1001 is_readonly = False
1002 used_name = DLPACK_TENSOR_USED_NAME
1003 else:
1004 assert False
1006 cpython.PyCapsule_SetName(capsule, used_name) 1zAyijklmtuvwxnopqracdefgh
1008 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 1zAyijklmtuvwxnopqracdefgh
1009 buf.dl_tensor = dl_tensor 1zAyijklmtuvwxnopqracdefgh
1010 buf.metadata = capsule 1zAyijklmtuvwxnopqracdefgh
1011 buf.ptr = <intptr_t>(dl_tensor.data) 1zAyijklmtuvwxnopqracdefgh
1012 buf.device_id = device_id 1zAyijklmtuvwxnopqracdefgh
1013 buf.is_device_accessible = is_device_accessible 1zAyijklmtuvwxnopqracdefgh
1014 buf.readonly = is_readonly 1zAyijklmtuvwxnopqracdefgh
1015 buf.exporting_obj = obj 1zAyijklmtuvwxnopqracdefgh
1017 return buf 1zAyijklmtuvwxnopqracdefgh
1020@functools.lru_cache
1021def _typestr2dtype(str typestr):
1022 return numpy.dtype(typestr) 1BCDEFGHIJKL
1025@functools.lru_cache
1026def _typestr2itemsize(str typestr):
1027 return _typestr2dtype(typestr).itemsize 1BCDEFGHIJKL
1030cdef object dtype_dlpack_to_numpy(DLDataType* dtype):
1031 cdef int bits = dtype.bits 1ijklmtuvwxnopqracdefgh
1032 if dtype.lanes != 1: 1ijklmtuvwxnopqracdefgh
1033 # TODO: return a NumPy structured dtype?
1034 raise NotImplementedError(
1035 f'vector dtypes (lanes={dtype.lanes}) is not supported')
1036 if dtype.code == kDLUInt: 1ijklmtuvwxnopqracdefgh
1037 if bits == 8:
1038 np_dtype = numpy.uint8
1039 elif bits == 16:
1040 np_dtype = numpy.uint16
1041 elif bits == 32:
1042 np_dtype = numpy.uint32
1043 elif bits == 64:
1044 np_dtype = numpy.uint64
1045 else:
1046 raise TypeError('uint{} is not supported.'.format(bits))
1047 elif dtype.code == kDLInt:
1048 if bits == 8: 1itnacdefgh
1049 np_dtype = numpy.int8
1050 elif bits == 16:
1051 np_dtype = numpy.int16
1052 elif bits == 32:
1053 np_dtype = numpy.int32 1itnacdefgh
1054 elif bits == 64:
1055 np_dtype = numpy.int64
1056 else:
1057 raise TypeError('int{} is not supported.'.format(bits))
1058 elif dtype.code == kDLFloat:
1059 if bits == 16: 1jklmuvwxopqr
1060 np_dtype = numpy.float16 1lwq
1061 elif bits == 32:
1062 np_dtype = numpy.float32
1063 elif bits == 64:
1064 np_dtype = numpy.float64 1jkmuvxopr
1065 else:
1066 raise TypeError('float{} is not supported.'.format(bits))
1067 elif dtype.code == kDLComplex:
1068 # TODO(leofang): support complex32
1069 if bits == 64:
1070 np_dtype = numpy.complex64
1071 elif bits == 128:
1072 np_dtype = numpy.complex128
1073 else:
1074 raise TypeError('complex{} is not supported.'.format(bits))
1075 elif dtype.code == kDLBool:
1076 if bits == 8:
1077 np_dtype = numpy.bool_
1078 else:
1079 raise TypeError(f'{bits}-bit bool is not supported')
1080 elif dtype.code == kDLBfloat:
1081 if bfloat16 is not None:
1082 np_dtype = numpy.dtype("bfloat16")
1083 else:
1084 raise NotImplementedError(
1085 'Support for bfloat16 within cuda-core requires `ml_dtypes`'
1086 'to be installed.'
1087 )
1088 else:
1089 raise TypeError('Unsupported dtype. dtype code: {}'.format(dtype.code))
1091 # We want the dtype object not just the type object
1092 return numpy.dtype(np_dtype) 1ijklmtuvwxnopqracdefgh
1095cpdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None):
1096 cdef dict cai_data = obj.__cuda_array_interface__ 11~2
1097 if cai_data["version"] < 3: 11~2
1098 raise BufferError("only CUDA Array Interface v3 or above is supported")
1099 if cai_data.get("mask") is not None: 11~2
1100 raise BufferError("mask is not supported")
1101 if stream_ptr is None: 11~2
1102 raise BufferError("stream=None is ambiguous with view()")
1104 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 11~2
1105 buf.exporting_obj = obj 11~2
1106 buf.metadata = cai_data 11~2
1107 buf.dl_tensor = NULL 11~2
1108 # Validate shape/strides/typestr eagerly so constructor paths fail fast.
1109 buf.get_layout() 11~2
1110 buf.ptr, buf.readonly = cai_data["data"] 112
1111 buf.is_device_accessible = True 112
1112 if buf.ptr != 0: 112
1113 buf.device_id = handle_return(
1114 driver.cuPointerGetAttribute(
1115 driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
1116 buf.ptr))
1117 else:
1118 buf.device_id = handle_return(driver.cuCtxGetDevice()) 112
1120 cdef intptr_t producer_s, consumer_s
1121 cdef EventHandle h_event
1122 stream_ptr = int(stream_ptr) 112
1123 if stream_ptr != -1: 112
1124 stream = cai_data.get("stream")
1125 if stream is not None:
1126 producer_s = <intptr_t>(stream)
1127 consumer_s = <intptr_t>(stream_ptr)
1128 assert producer_s > 0
1129 # establish stream order
1130 if producer_s != consumer_s:
1131 with nogil:
1132 h_event = create_event_handle_noctx(cydriver.CUevent_flags.CU_EVENT_DISABLE_TIMING)
1133 HANDLE_RETURN(cydriver.cuEventRecord(
1134 as_cu(h_event), <cydriver.CUstream>producer_s))
1135 HANDLE_RETURN(cydriver.cuStreamWaitEvent(
1136 <cydriver.CUstream>consumer_s, as_cu(h_event), 0))
1138 return buf 112
1141cpdef StridedMemoryView view_as_array_interface(obj, view=None):
1142 cdef dict data = obj.__array_interface__ 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1143 if data["version"] < 3: 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1144 raise BufferError("only NumPy Array Interface v3 or above is supported")
1145 if data.get("mask") is not None: 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1146 raise BufferError("mask is not supported")
1148 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1149 buf.exporting_obj = obj 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1150 buf.metadata = data 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1151 buf.dl_tensor = NULL 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1152 # Validate shape/strides/typestr eagerly so constructor paths fail fast.
1153 buf.get_layout() 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab
1154 buf.ptr, buf.readonly = data["data"] 1BCODPEQFGHIRSTJUVWXKYZ0L
1155 buf.is_device_accessible = False 1BCODPEQFGHIRSTJUVWXKYZ0L
1156 buf.device_id = handle_return(driver.cuCtxGetDevice()) 1BCODPEQFGHIRSTJUVWXKYZ0L
1157 return buf 1BCODPEQFGHIRSTJUVWXKYZ0L
1160def args_viewable_as_strided_memory(tuple arg_indices):
1161 """
1162 Decorator to create proxy objects to :obj:`StridedMemoryView` for the
1163 specified positional arguments.
1165 This allows array/tensor attributes to be accessed inside the function
1166 implementation, while keeping the function body array-library-agnostic (if
1167 desired).
1169 Inside the decorated function, the specified arguments become instances
1170 of an (undocumented) proxy type, regardless of its original source. A
1171 :obj:`StridedMemoryView` instance can be obtained by passing the (consumer)
1172 stream pointer (as a Python `int`) to the proxies's ``view()`` method. For
1173 example:
1175 .. code-block:: python
1177 @args_viewable_as_strided_memory((1,))
1178 def my_func(arg0, arg1, arg2, stream: Stream):
1179 # arg1 can be any object supporting DLPack or CUDA Array Interface
1180 view = arg1.view(stream.handle)
1181 assert isinstance(view, StridedMemoryView)
1182 ...
1184 Parameters
1185 ----------
1186 arg_indices : tuple
1187 The indices of the target positional arguments.
1188 """
1189 def wrapped_func_with_indices(func): 1ijklm
1190 @functools.wraps(func) 1ijklm
1191 def wrapped_func(*args, **kwargs):
1192 args = list(args) 1ijklm
1193 cdef int idx
1194 for idx in arg_indices: 1ijklm
1195 args[idx] = _StridedMemoryViewProxy(args[idx]) 1ijklm
1196 return func(*args, **kwargs) 1ijklm
1197 return wrapped_func 1ijklm
1198 return wrapped_func_with_indices 1ijklm
1201cdef inline _StridedLayout layout_from_dlpack(DLTensor* dl_tensor):
1202 cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout) 1yijklmtuvwxnopqracdefgh
1203 cdef int nbits = dl_tensor.dtype.bits * dl_tensor.dtype.lanes 1yijklmtuvwxnopqracdefgh
1204 cdef int itemsize = nbits >> 3 1yijklmtuvwxnopqracdefgh
1205 if (itemsize << 3) != nbits: 1yijklmtuvwxnopqracdefgh
1206 raise ValueError("dl_tensor.dtype.bits must be a multiple of 8")
1207 layout.init_from_ptr(dl_tensor.ndim, dl_tensor.shape, dl_tensor.strides, itemsize) 1yijklmtuvwxnopqracdefgh
1208 return layout 1yijklmtuvwxnopqracdefgh
1211cdef _StridedLayout layout_from_cai(object metadata):
1212 cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
1213 cdef object shape = metadata["shape"] 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
1214 cdef object strides = metadata.get("strides") 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
1215 cdef int itemsize = _typestr2itemsize(metadata["typestr"]) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
1216 layout.init_from_tuple(shape, strides, itemsize, True) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2
1217 return layout 1BCODPEQFGHIRSTJUVWXKYZ0L12
1220cdef inline intptr_t get_data_ptr(object buffer, _StridedLayout layout) except? 0:
1221 return <intptr_t>(int(buffer.handle)) + layout.get_slice_offset_in_bytes() 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1224cdef inline int view_buffer_strided(
1225 StridedMemoryView view,
1226 object buffer,
1227 _StridedLayout layout,
1228 object dtype,
1229 bint is_readonly,
1230) except -1:
1231 if dtype is not None: 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s c d e f g h
1232 dtype = numpy.dtype(dtype) 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} c d e f g h
1233 if dtype.itemsize != layout.itemsize: 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} c d e f g h
1234 raise ValueError(
1235 f"The dtype's itemsize ({dtype.itemsize}) does not match the layout's "
1236 f"itemsize ({layout.itemsize})."
1237 )
1238 # Check the layout's offset range [min_offset, max_offset] fits
1239 # within the [0, buffer.size - 1] range.
1240 # The required_size_in_bytes fails if min_offset < 0.
1241 # NB. For external memory, both positive and negative offsets can be valid,
1242 # but for a proper check we'd need to know both size and data offset,
1243 # while neither is reported by the packages.
1244 cdef bint is_allocated = buffer.memory_resource is not None 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s c d e f g h
1245 if is_allocated and buffer.size < layout.get_required_size_in_bytes(): 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s c d e f g h
1246 raise ValueError( 2bb
1247 f"Buffer size is too small for the layout. " 2bb
1248 f"Expected at least {layout.get_required_size_in_bytes()} bytes, " 2bb
1249 f"got {buffer.size} bytes." 2bb
1250 )
1251 # set the public attributes
1252 view.ptr = get_data_ptr(buffer, layout) 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1253 view.device_id = buffer.device_id 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1254 view.is_device_accessible = buffer.is_device_accessible 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1255 view.readonly = is_readonly 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1256 view.exporting_obj = view._buffer = buffer 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1257 # no dlpack/cai metadata
1258 view.dl_tensor = NULL 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1259 view.metadata = None 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1260 # we get the layout from the caller
1261 view._layout = layout 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1262 view._dtype = dtype 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh
1263 return 0 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh