Coverage for cuda / core / _memoryview.pyx: 59.06%

662 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-25 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from __future__ import annotations 

6  

7from ._dlpack cimport * 

8from libc.stdint cimport intptr_t 

9from cuda.core._layout cimport _StridedLayout, get_strides_ptr 

10from cuda.core._stream import Stream 

11  

12import functools 

13import warnings 

14  

15import numpy 

16  

17from cuda.bindings cimport cydriver 

18from cuda.core._resource_handles cimport ( 

19 EventHandle, 

20 create_event_handle_noctx, 

21 as_cu, 

22) 

23  

24from cuda.core._utils.cuda_utils import handle_return, driver 

25from cuda.core._utils.cuda_utils cimport HANDLE_RETURN 

26  

27  

28from cuda.core._memory import Buffer 

29  

30  

31try: 

32 from ml_dtypes import bfloat16 

33except ImportError: 

34 bfloat16 = None 

35  

36# TODO(leofang): support NumPy structured dtypes 

37  

38  

39cdef extern from "Python.h": 

40 ctypedef struct PyTypeObject: 

41 void* tp_dict 

42 void PyType_Modified(PyTypeObject*) 

43  

44  

45cdef DLPackExchangeAPI _SMV_DLPACK_EXCHANGE_API 

46cdef bint _SMV_DLPACK_EXCHANGE_API_INITED = False 

47_SMV_DLPACK_EXCHANGE_API_CAPSULE = cpython.PyCapsule_New( 

48 <void*>&_SMV_DLPACK_EXCHANGE_API, 

49 b"dlpack_exchange_api", 

50 NULL, 

51) 

52  

53  

54cdef class StridedMemoryView: 

55 """A class holding metadata of a strided dense array/tensor. 

56  

57 A :obj:`StridedMemoryView` instance can be created in three ways: 

58  

59 1. Using the :obj:`args_viewable_as_strided_memory` decorator (recommended) 

60 2. Explicit construction relying on DLPack or CUDA Array Interface, see below. 

61 3. From :obj:`~_memory.Buffer` and shape and size tuples (see 

62 :meth:`from_buffer` classmethod) 

63  

64 ``StridedMemoryView(obj, stream_ptr)`` can be used to create a view from 

65 objects supporting either DLPack (up to v1.0) or CUDA Array Interface 

66 (CAI) v3. When wrapping an arbitrary object it will try the DLPack protocol 

67 first, then the CAI protocol. A :obj:`BufferError` is raised if neither is 

68 supported. 

69  

70 Since either way would take a consumer stream, for DLPack it is passed to 

71 ``obj.__dlpack__()`` as-is (except for :obj:`None`, see below); for CAI, a 

72 stream order will be established between the consumer stream and the 

73 producer stream (from ``obj.__cuda_array_interface__()["stream"]``), as if 

74 ``cudaStreamWaitEvent`` is called by this method. 

75  

76 To opt-out of the stream ordering operation in either DLPack or CAI, 

77 please pass ``stream_ptr=-1``. Note that this deviates (on purpose) 

78 from the semantics of ``obj.__dlpack__(stream=None, ...)`` since ``cuda.core`` 

79 does not encourage using the (legacy) default/null stream, but is 

80 consistent with the CAI's semantics. For DLPack, ``stream=-1`` will be 

81 internally passed to ``obj.__dlpack__()`` instead. 

82  

83 Parameters 

84 ---------- 

85 obj : Any 

86 Any objects that supports either DLPack (up to v1.0) or CUDA Array 

87 Interface (v3). 

88 stream_ptr: int 

89 The pointer address (as Python `int`) to the **consumer** stream. 

90 Stream ordering will be properly established unless ``-1`` is passed. 

91  

92  

93 Attributes 

94 ----------- 

95 ptr : int 

96 Pointer to the tensor buffer (as a Python `int`). 

97 device_id : int 

98 The device ID for where the tensor is located. It is -1 for CPU tensors 

99 (meaning those only accessible from the host). 

100 is_device_accessible : bool 

101 Whether the tensor data can be accessed on the GPU. 

102 readonly: bool 

103 Whether the tensor data can be modified in place. 

104 exporting_obj : Any 

105 A reference to the original tensor object that is being viewed. 

106 If the view is created with :meth:`from_buffer`, 

107 it will be the Buffer instance passed to the method. 

108  

109 """ 

110 def __init__(self, obj: object = None, stream_ptr: int | None = None) -> None: 

111 cdef str clsname = self.__class__.__name__ 1nopqr

112 if obj is not None: 1nopqr

113 # populate self's attributes 

114 if check_has_dlpack(obj): 1nopqr

115 warnings.warn( 1bnopqr

116 f"Constructing a {clsname} directly from a DLPack-supporting object is deprecated; " 1nopqr

117 "Use `StridedMemoryView.from_dlpack` or `StridedMemoryView.from_any_interface` instead.", 

118 DeprecationWarning, 1nopqr

119 stacklevel=2, 

120 ) 

121 view_as_dlpack(obj, stream_ptr, self) 1nopqr

122 else: 

123 warnings.warn( 

124 f"Constructing a {clsname} directly from a CUDA-array-interface-supporting object is deprecated; " 

125 "Use `StridedMemoryView.from_cuda_array_interface` or `StridedMemoryView.from_any_interface` instead.", 

126 DeprecationWarning, 

127 stacklevel=2, 

128 ) 

129 view_as_cai(obj, stream_ptr, self) 

130 else: 

131 warnings.warn( 

132 f"Constructing an empty {clsname} is deprecated; " 

133 "use one of the classmethods `from_dlpack`, `from_cuda_array_interface` or `from_any_interface` " 

134 "to construct a StridedMemoryView from an object", 

135 DeprecationWarning, 

136 stacklevel=2, 

137 ) 

138  

139 @classmethod 

140 def from_dlpack(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView: 

141 """Create a view from an object supporting the `DLPack <https://dmlc.github.io/dlpack/latest/>`_ protocol. 

142  

143 Parameters 

144 ---------- 

145 obj : object 

146 An object implementing the `DLPack <https://dmlc.github.io/dlpack/latest/>`_ protocol 

147 (via ``__dlpack__``). 

148 stream_ptr : int, optional 

149 Stream pointer for synchronization. If ``None``, no synchronization is performed. 

150 """ 

151 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 1zAyijklmtuvwxacdefgh

152 view_as_dlpack(obj, stream_ptr, buf) 1zAyijklmtuvwxacdefgh

153 return buf 1zAyijklmtuvwxacdefgh

154  

155 @classmethod 

156 def from_cuda_array_interface(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView: 

157 """Create a view from an object supporting the `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_ protocol. 

158  

159 Parameters 

160 ---------- 

161 obj : object 

162 An object implementing the `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_ protocol. 

163 stream_ptr : int, optional 

164 Stream pointer for synchronization. If ``None``, no synchronization is performed. 

165 """ 

166 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 11~2

167 view_as_cai(obj, stream_ptr, buf) 11~2

168 return buf 112

169  

170 @classmethod 

171 def from_array_interface(cls, obj: object) -> StridedMemoryView: 

172 """Create a view from an object supporting the `__array_interface__ <https://numpy.org/doc/stable/reference/arrays.interface.html>`_ protocol. 

173  

174 Parameters 

175 ---------- 

176 obj : object 

177 An object implementing the `__array_interface__ <https://numpy.org/doc/stable/reference/arrays.interface.html>`_ protocol (e.g., a numpy array). 

178 """ 

179 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 2b B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

180 view_as_array_interface(obj, buf) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

181 return buf 1BCODPEQFGHIRSTJUVWXKYZ0L

182  

183 @classmethod 

184 def from_any_interface(cls, obj: object, stream_ptr: int | None = None) -> StridedMemoryView: 

185 """Create a view by automatically selecting the best available protocol. 

186  

187 Tries `DLPack <https://dmlc.github.io/dlpack/latest/>`_ first, then falls back to 

188 `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_. 

189  

190 Parameters 

191 ---------- 

192 obj : object 

193 An object implementing `DLPack <https://dmlc.github.io/dlpack/latest/>`_ or 

194 `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_. 

195 stream_ptr : int, optional 

196 Stream pointer for synchronization. If ``None``, no synchronization is performed. 

197 """ 

198 if check_has_dlpack(obj): 1zAytuvwxa

199 return cls.from_dlpack(obj, stream_ptr) 1zAytuvwxa

200 return cls.from_cuda_array_interface(obj, stream_ptr) 

201  

202 @classmethod 

203 def from_buffer( 

204 cls, 

205 buffer : Buffer, 

206 shape : tuple[int, ...], 

207 strides : tuple[int, ...] | None = None, 

208 *, 

209 itemsize : int | None = None, 

210 dtype : numpy.dtype | None = None, 

211 is_readonly : bool = False 

212 ) -> StridedMemoryView: 

213 """ 

214 Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and shape and strides tuples. 

215 The Buffer can be either allocation coming from a :obj:`MemoryResource` or an external allocation 

216 wrapped in a :obj:`~_memory.Buffer` object with ``Buffer.from_handle(ptr, size, owner=...)``. 

217  

218 .. caution:: 

219 When creating a :obj:`StridedMemoryView` from a :obj:`~_memory.Buffer`, 

220 no synchronization is performed. It is the user's responsibility to ensure 

221 the data in ``buffer`` is properly synchronized when consuming the view. 

222  

223 Parameters 

224 ---------- 

225 buffer : :obj:`~_memory.Buffer` 

226 The buffer to create the view from. 

227 shape : :obj:`tuple` 

228 The layout describing the shape, strides and itemsize of the elements in 

229 the buffer. 

230 strides : :obj:`tuple` 

231 The layout describing the shape, strides and itemsize of the elements in 

232 the buffer. 

233 dtype : :obj:`numpy.dtype` 

234 Optional dtype. 

235 If specified, the dtype's itemsize must match the layout's itemsize. 

236 is_readonly : bool, optional 

237 Whether the mark the view as readonly. 

238 """ 

239 cdef StridedMemoryView view = StridedMemoryView.__new__(cls) 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} s

240 if itemsize is None and dtype is None: 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} s

241 raise ValueError("Either itemsize or dtype must be specified") 2b gb

242 if itemsize is not None and dtype is not None and itemsize != dtype.itemsize: 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbdbM N bb} s

243 raise ValueError( 2db

244 f"itemsize ({itemsize}) does not match dtype.itemsize ({dtype.itemsize})" 2db

245 ) 

246 # (itemsize is None XOR dtype is None) OR they are equal 

247 view_buffer_strided( 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s

248 view, 

249 buffer, 

250 _StridedLayout(shape=shape, strides=strides, itemsize=getattr(dtype, "itemsize", itemsize)), 2ebfb3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s

251 dtype, 

252 is_readonly, 

253 ) 

254 return view 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}s

255  

256 def __dealloc__(self): 

257 if self.dl_tensor == NULL: 2ebfbz A y i j k l m t u v w x n o p q r B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} 1 ~ 2 a s c d e f g h

258 return 2b ebfbB C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbgbdbM N bb} 1 ~ 2 s c d e f g h

259  

260 if cpython.PyCapsule_IsValid( 1zAyijklmtuvwxnopqracdefgh

261 self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME): 1zAyijklmtuvwxnopqracdefgh

262 data = cpython.PyCapsule_GetPointer( 1zAyijklmtuvwxnopqracdefgh

263 self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME) 1bzAyijklmtuvwxnopqracdefgh

264 dlm_tensor_ver = <DLManagedTensorVersioned*>data 1zAyijklmtuvwxnopqracdefgh

265 dlm_tensor_ver.deleter(dlm_tensor_ver) 1zAyijklmtuvwxnopqracdefgh

266 elif cpython.PyCapsule_IsValid( 

267 self.metadata, DLPACK_TENSOR_USED_NAME): 

268 data = cpython.PyCapsule_GetPointer( 

269 self.metadata, DLPACK_TENSOR_USED_NAME) 

270 dlm_tensor = <DLManagedTensor*>data 

271 dlm_tensor.deleter(dlm_tensor) 

272  

273 def view( 

274 self, layout : _StridedLayout | None = None, dtype : numpy.dtype | None = None 

275 ) -> StridedMemoryView: 

276 """ 

277 Creates a new view with adjusted layout and dtype. 

278 Same as calling :meth:`from_buffer` with the current buffer. 

279 """ 

280 cdef StridedMemoryView view = StridedMemoryView.__new__(self.__class__) 1MNcdefgh

281 if layout is None and dtype is None: 1MNcdefgh

282 return self 

283 if layout is None: 1MNcdefgh

284 layout = self.get_layout() 

285 if dtype is None: 1MNcdefgh

286 dtype = self.get_dtype() 1MNcdefgh

287 view_buffer_strided(view, self.get_buffer(), layout, dtype, self.readonly) 1MNcdefgh

288 return view 1MNcdefgh

289  

290 def as_tensor_map( 

291 self, 

292 box_dim=None, 

293 *, 

294 options=None, 

295 element_strides=None, 

296 data_type=None, 

297 interleave=None, 

298 swizzle=None, 

299 l2_promotion=None, 

300 oob_fill=None, 

301 ): 

302 """Create a tiled :obj:`TensorMapDescriptor` from this view. 

303  

304 This is the public entry point for creating tiled tensor map 

305 descriptors in ``cuda.core``. Pass either ``box_dim`` and the 

306 individual keyword arguments directly, or provide bundled tiled 

307 options via ``options=``. 

308 """ 

309 from cuda.core._tensor_map import TensorMapDescriptor 

310  

311 kwargs = {} 

312 if options is not None: 

313 kwargs["options"] = options 

314 if element_strides is not None: 

315 kwargs["element_strides"] = element_strides 

316 if data_type is not None: 

317 kwargs["data_type"] = data_type 

318 if interleave is not None: 

319 kwargs["interleave"] = interleave 

320 if swizzle is not None: 

321 kwargs["swizzle"] = swizzle 

322 if l2_promotion is not None: 

323 kwargs["l2_promotion"] = l2_promotion 

324 if oob_fill is not None: 

325 kwargs["oob_fill"] = oob_fill 

326 return TensorMapDescriptor._from_tiled(self, box_dim, **kwargs) 

327  

328 def copy_from( 

329 self, other : StridedMemoryView, stream : Stream, 

330 allocator = None, 

331 blocking : bool | None = None, 

332 ): 

333 """ 

334 Copies the data from the other view into this view. 

335  

336 The copy can be performed between following memory spaces: 

337 host-to-device, device-to-host, device-to-device (on the same device). 

338  

339 Parameters 

340 ---------- 

341 other : StridedMemoryView 

342 The view to copy data from. 

343 stream : Stream | None, optional 

344 The stream to schedule the copy on. 

345 allocator : MemoryResource | None, optional 

346 If temporary buffers are needed, the specified memory resources 

347 will be used to allocate the memory. If not specified, default 

348 resources will be used. 

349 blocking : bool | None, optional 

350 Whether the call should block until the copy is complete. 

351 * ``True``: the ``stream`` is synchronized with the host at the end of the call, 

352 blocking until the copy is complete. 

353 * ``False``: if possible, the call returns immediately once the copy is scheduled. 

354 However, in some cases of host-to-device or device-to-host copies, the call may 

355 still synchronize with the host if necessary. 

356 * ``None`` (default): 

357 * for device-to-device, it defaults to ``False`` (non-blocking), 

358 * for host-to-device or device-to-host, it defaults to ``True`` (blocking). 

359 """ 

360 raise NotImplementedError("Sorry, not supported: copy_from") 

361  

362 def copy_to( 

363 self, other : StridedMemoryView, stream : Stream | None = None, 

364 allocator = None, 

365 blocking : bool | None = None, 

366 ): 

367 """ 

368 Copies the data from this view into the ``other`` view. 

369  

370 For details, see :meth:`copy_from`. 

371 """ 

372 raise NotImplementedError("Sorry, not supported: copy_to") 

373  

374 def __dlpack__( 

375 self, 

376 *, 

377 stream: int | None = None, 

378 max_version: tuple[int, int] | None = None, 

379 dl_device: tuple[int, int] | None = None, 

380 copy: bool | None = None, 

381 ): 

382 # Similar to Buffer.__dlpack__: no implicit synchronization is performed. 

383 if dl_device is not None: 1zas

384 raise BufferError("Sorry, not supported: dl_device other than None") 1z

385 if copy is True: 1zas

386 raise BufferError("Sorry, not supported: copy=True") 1z

387  

388 cdef bint versioned 

389 if max_version is None: 1zas

390 versioned = False 1s

391 else: 

392 if not isinstance(max_version, tuple) or len(max_version) != 2: 1za

393 raise BufferError(f"Expected max_version tuple[int, int], got {max_version}") 1bz

394 versioned = max_version >= (1, 0) 1a

395  

396 # NOTE: stream is accepted for protocol compatibility but not used. 

397 cdef object capsule = _smv_make_py_capsule(self, versioned) 1as

398 return capsule 1a

399  

400 def __dlpack_device__(self) -> tuple[int, int]: 

401 cdef _DLDeviceType device_type 

402 cdef int32_t device_id 

403 _smv_get_dl_device(self, &device_type, &device_id) 1a

404 return (<int>device_type, int(device_id)) 1a

405  

406 @property 

407 def _layout(self) -> _StridedLayout: 

408 """ 

409 The layout of the tensor. For StridedMemoryView created from DLPack or CAI, 

410 the layout is inferred from the tensor object's metadata. 

411 """ 

412 return self.get_layout() 13456789!#$%'()*+,-./:;=?@[]^_`{|MNcdefgh

413  

414 @property 

415 def size(self) -> int: 

416 return self.get_layout().get_volume() 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L1

417  

418 @property 

419 def shape(self) -> tuple[int, ...]: 

420 """ 

421 Shape of the tensor. 

422 """ 

423 return self.get_layout().get_shape_tuple() 1byijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN12cdefgh

424  

425 @property 

426 def strides(self) -> tuple[int, ...] | None: 

427 """ 

428 Strides of the tensor (in **counts**, not bytes). 

429 """ 

430 return self.get_layout().get_strides_tuple() 1yijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|12

431  

432 @property 

433 def dtype(self) -> numpy.dtype | None: 

434 """ 

435 Data type of the tensor. 

436  

437 Supports standard NumPy dtypes as well as narrow data types (e.g., ``bfloat16``) 

438 when the optional `ml_dtypes <https://github.com/jax-ml/ml_dtypes>`_ package is 

439 installed. If ``ml_dtypes`` is not available and such a tensor is encountered, 

440 a :obj:`NotImplementedError` will be raised. 

441 """ 

442 return self.get_dtype() 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|}cdefgh

443  

444 def __repr__(self): 

445 return (f"StridedMemoryView(ptr={self.ptr},\n" 

446 + f" shape={self.shape},\n" 

447 + f" strides={self.strides},\n" 

448 + f" itemsize={self._layout.itemsize},\n" 

449 + f" dtype={get_simple_repr(self.dtype)},\n" 

450 + f" device_id={self.device_id},\n" 

451 + f" is_device_accessible={self.is_device_accessible},\n" 

452 + f" readonly={self.readonly},\n" 

453 + f" exporting_obj={get_simple_repr(self.exporting_obj)})") 

454  

455 cdef inline _StridedLayout get_layout(self): 

456 if self._layout is None: 2y i j k l m t u v w x n o p q r B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab3 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | M N 1 ~ 2 a s c d e f g h

457 if self.dl_tensor: 2y i j k l m t u v w x n o p q r B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2 a c d e f g h

458 self._layout = layout_from_dlpack(self.dl_tensor) 1yijklmtuvwxnopqracdefgh

459 elif self.metadata is not None: 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

460 self._layout = layout_from_cai(self.metadata) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

461 else: 

462 raise ValueError("Cannot infer layout from the exporting object") 

463 return self._layout 1yijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN12ascdefgh

464  

465 cdef inline object get_buffer(self): 

466 """ 

467 Returns Buffer instance with the underlying data. 

468 If the SMV was created from a Buffer, it will return the same Buffer instance. 

469 Otherwise, it will create a new instance with owner set to the exporting object. 

470 """ 

471 if self._buffer is None: 1MNcdefgh

472 if isinstance(self.exporting_obj, Buffer): 1cdefgh

473 self._buffer = self.exporting_obj 

474 else: 

475 self._buffer = Buffer.from_handle(self.ptr, 0, owner=self.exporting_obj) 1cdefgh

476 return self._buffer 1MNcdefgh

477  

478 cdef inline object get_dtype(self): 

479 if self._dtype is None: 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN}ascdefgh

480 if self.dl_tensor != NULL: 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0Lascdefgh

481 self._dtype = dtype_dlpack_to_numpy(&self.dl_tensor.dtype) 1bijklmtuvwxnopqracdefgh

482 elif self.metadata is not None: 1BCODPEQFGHIRSTJUVWXKYZ0Ls

483 self._dtype = _typestr2dtype(self.metadata["typestr"]) 1BCODPEQFGHIRSTJUVWXKYZ0L

484 return self._dtype 1ijklmtuvwxnopqrBCODPEQFGHIRSTJUVWXKYZ0L3456789!#$%'()*+,-./:;=?@[]^_`{|MN}ascdefgh

485  

486  

487cdef void _smv_pycapsule_deleter(object capsule) noexcept: 

488 cdef DLManagedTensor* dlm_tensor 

489 cdef DLManagedTensorVersioned* dlm_tensor_ver 

490 # Do not invoke the deleter on a used capsule. 

491 if cpython.PyCapsule_IsValid(capsule, DLPACK_TENSOR_UNUSED_NAME): 1a

492 dlm_tensor = <DLManagedTensor*>( 

493 cpython.PyCapsule_GetPointer(capsule, DLPACK_TENSOR_UNUSED_NAME) 

494 ) 

495 if dlm_tensor.deleter: 

496 dlm_tensor.deleter(dlm_tensor) 

497 elif cpython.PyCapsule_IsValid(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 1ba

498 dlm_tensor_ver = <DLManagedTensorVersioned*>( 

499 cpython.PyCapsule_GetPointer(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) 

500 ) 

501 if dlm_tensor_ver.deleter: 

502 dlm_tensor_ver.deleter(dlm_tensor_ver) 

503  

504  

505cdef inline void _smv_release_export_resources(void* manager_ctx, int64_t* shape_ptr) noexcept with gil: 

506 if shape_ptr: 1as

507 stdlib.free(shape_ptr) 1a

508 if manager_ctx: 1as

509 cpython.Py_DECREF(<object>manager_ctx) 1as

510  

511  

512cdef void _smv_deleter(DLManagedTensor* tensor) noexcept with gil: 

513 if tensor: 1s

514 _smv_release_export_resources(tensor.manager_ctx, tensor.dl_tensor.shape) 1s

515 tensor.manager_ctx = NULL 1s

516 stdlib.free(tensor) 1s

517  

518  

519cdef void _smv_versioned_deleter(DLManagedTensorVersioned* tensor) noexcept with gil: 

520 if tensor: 1as

521 _smv_release_export_resources(tensor.manager_ctx, tensor.dl_tensor.shape) 1a

522 tensor.manager_ctx = NULL 1a

523 stdlib.free(tensor) 1ba

524  

525  

526cdef inline DLManagedTensorVersioned* _smv_allocate_dlm_tensor_versioned() except? NULL: 

527 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1a

528 dlm_tensor_ver = <DLManagedTensorVersioned*>stdlib.malloc(sizeof(DLManagedTensorVersioned)) 1a

529 if dlm_tensor_ver == NULL: 1a

530 raise MemoryError() 

531 dlm_tensor_ver.dl_tensor.shape = NULL 1a

532 dlm_tensor_ver.manager_ctx = NULL 1a

533 return dlm_tensor_ver 1a

534  

535  

536cdef inline DLManagedTensor* _smv_allocate_dlm_tensor() except? NULL: 

537 cdef DLManagedTensor* dlm_tensor = NULL 1s

538 dlm_tensor = <DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor)) 1s

539 if dlm_tensor == NULL: 1s

540 raise MemoryError() 

541 dlm_tensor.dl_tensor.shape = NULL 1bs

542 dlm_tensor.manager_ctx = NULL 1s

543 return dlm_tensor 1s

544  

545  

546cdef inline int _smv_dtype_numpy_to_dlpack(object dtype_obj, DLDataType* out_dtype) except -1: 

547 cdef object np_dtype = numpy.dtype(dtype_obj) 1a

548 if np_dtype.fields is not None: 1ba

549 raise BufferError("Structured dtypes are not supported for DLPack export") 

550 if not np_dtype.isnative and np_dtype.byteorder not in ("=", "|"): 1a

551 raise BufferError("Non-native-endian dtypes are not supported for DLPack export") 

552  

553 cdef str kind = np_dtype.kind 1a

554 cdef int bits = np_dtype.itemsize * 8 1a

555 cdef uint8_t code 

556 if kind == "b": 1a

557 if bits != 8: 

558 raise BufferError(f"Unsupported bool dtype itemsize: {np_dtype.itemsize}") 

559 code = <uint8_t>kDLBool 

560 elif kind == "i": 1a

561 if bits not in (8, 16, 32, 64): 1a

562 raise BufferError(f"Unsupported signed integer dtype: {np_dtype}") 

563 code = <uint8_t>kDLInt 1a

564 elif kind == "u": 

565 if bits not in (8, 16, 32, 64): 

566 raise BufferError(f"Unsupported unsigned integer dtype: {np_dtype}") 

567 code = <uint8_t>kDLUInt 

568 elif kind == "f": 

569 if bits not in (16, 32, 64): 

570 raise BufferError(f"Unsupported floating dtype: {np_dtype}") 

571 code = <uint8_t>kDLFloat 

572 elif kind == "c": 

573 if bits not in (64, 128): 

574 raise BufferError(f"Unsupported complex dtype: {np_dtype}") 

575 code = <uint8_t>kDLComplex 

576 else: 

577 raise BufferError(f"Unsupported dtype for DLPack export: {np_dtype}") 

578  

579 out_dtype.code = code 1a

580 out_dtype.bits = <uint8_t>bits 1a

581 out_dtype.lanes = <uint16_t>1 1a

582 return 0 1a

583  

584  

585cdef inline int _smv_get_dl_device( 

586 StridedMemoryView view, 

587 _DLDeviceType* out_device_type, 

588 int32_t* out_device_id, 

589) except -1: 

590 cdef _DLDeviceType device_type 

591 cdef int32_t device_id 

592 cdef object buf 

593 cdef bint d 

594 cdef bint h 

595 if view.dl_tensor != NULL: 1ba

596 device_type = view.dl_tensor.device.device_type 1a

597 if device_type == _kDLCUDA: 1a

598 device_id = view.dl_tensor.device.device_id 

599 else: 

600 # CPU, CUDAHost, and CUDAManaged use device_id=0 in DLPack. 

601 device_id = 0 1a

602 elif view.is_device_accessible: 

603 buf = view.get_buffer() 

604 d = buf.is_device_accessible 

605 h = buf.is_host_accessible 

606 if d and (not h): 

607 device_type = _kDLCUDA 

608 device_id = buf.device_id 

609 elif d and h: 

610 # We do not currently differentiate pinned vs managed here. 

611 device_type = _kDLCUDAHost 

612 device_id = 0 

613 elif (not d) and h: 

614 device_type = _kDLCPU 

615 device_id = 0 

616 else: 

617 raise BufferError("buffer is neither device-accessible nor host-accessible") 

618 else: 

619 device_type = _kDLCPU 

620 device_id = 0 

621  

622 out_device_type[0] = device_type 1a

623 out_device_id[0] = device_id 1a

624 return 0 1a

625  

626  

627cdef inline int _smv_setup_dl_tensor_common( 

628 DLTensor* dl_tensor, 

629 StridedMemoryView view, 

630 _StridedLayout layout, 

631) except -1: 

632 cdef object dtype_obj = view.get_dtype() 1bas

633 if dtype_obj is None: 1as

634 raise BufferError( 1s

635 "Cannot export StridedMemoryView via DLPack without dtype information; " 

636 "create the view with dtype specified." 

637 ) 

638 _smv_dtype_numpy_to_dlpack(dtype_obj, &dl_tensor.dtype) 1a

639 _smv_get_dl_device(view, &dl_tensor.device.device_type, &dl_tensor.device.device_id) 1a

640  

641 cdef int ndim = layout.base.ndim 1a

642 dl_tensor.ndim = ndim 1a

643 if layout.get_volume() == 0: 1a

644 dl_tensor.data = NULL 

645 else: 

646 dl_tensor.data = <void*><intptr_t>view.ptr 1a

647 dl_tensor.byte_offset = 0 1a

648 return 0 1ba

649  

650  

651cdef inline int _smv_setup_dl_tensor(DLTensor* dl_tensor, StridedMemoryView view) except -1: 

652 cdef _StridedLayout layout = view.get_layout() 1as

653 _smv_setup_dl_tensor_common(dl_tensor, view, layout) 1as

654  

655 cdef int i 

656 cdef int64_t* shape_strides = NULL 1a

657 cdef int64_t* strides_src = NULL 1a

658 cdef int ndim = dl_tensor.ndim 1a

659 if ndim == 0: 1a

660 dl_tensor.shape = NULL 

661 dl_tensor.strides = NULL 

662 else: 

663 # DLPack v1.2+ requires non-NULL strides for ndim != 0. 

664 shape_strides = <int64_t*>stdlib.malloc(sizeof(int64_t) * 2 * ndim) 1a

665 if shape_strides == NULL: 1a

666 raise MemoryError() 

667 try: 1a

668 strides_src = get_strides_ptr(layout.base) 1ba

669 for i in range(ndim): 1a

670 shape_strides[i] = layout.base.shape[i] 1a

671 shape_strides[i + ndim] = strides_src[i] 1a

672 except Exception: 

673 stdlib.free(shape_strides) 

674 raise 

675 dl_tensor.shape = shape_strides 1a

676 dl_tensor.strides = shape_strides + ndim 1a

677 return 0 1a

678  

679  

680cdef inline int _smv_setup_dltensor_borrowed(DLTensor* dl_tensor, StridedMemoryView view) except -1: 

681 cdef _StridedLayout layout = view.get_layout() 

682 _smv_setup_dl_tensor_common(dl_tensor, view, layout) 

683  

684 if dl_tensor.ndim == 0: 

685 dl_tensor.shape = NULL 

686 dl_tensor.strides = NULL 

687 else: 

688 dl_tensor.shape = layout.base.shape 

689 # For temporary/non-owning exchange we provide explicit strides. 

690 dl_tensor.strides = get_strides_ptr(layout.base) 

691 return 0 

692  

693  

694cdef inline int _smv_fill_managed_tensor_versioned( 

695 DLManagedTensorVersioned* dlm_tensor_ver, 

696 StridedMemoryView view, 

697) except -1: 

698 cpython.Py_INCREF(view) 1a

699 dlm_tensor_ver.manager_ctx = <void*>view 1a

700 dlm_tensor_ver.deleter = _smv_versioned_deleter 1a

701 dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION 1a

702 dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION 1a

703 dlm_tensor_ver.flags = DLPACK_FLAG_BITMASK_READ_ONLY if view.readonly else 0 1a

704 _smv_setup_dl_tensor(&dlm_tensor_ver.dl_tensor, view) 1a

705 return 0 1a

706  

707  

708cdef inline int _smv_fill_managed_tensor( 

709 DLManagedTensor* dlm_tensor, 

710 StridedMemoryView view, 

711) except -1: 

712 cpython.Py_INCREF(view) 1s

713 dlm_tensor.manager_ctx = <void*>view 1s

714 dlm_tensor.deleter = _smv_deleter 1s

715 _smv_setup_dl_tensor(&dlm_tensor.dl_tensor, view) 1s

716 return 0 

717  

718  

719cdef object _smv_make_py_capsule(StridedMemoryView view, bint versioned): 

720 cdef DLManagedTensor* dlm_tensor = NULL 1as

721 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1as

722 cdef object capsule = None 1as

723 cdef void* tensor_ptr = NULL 1as

724 cdef const char* capsule_name 

725 try: 1as

726 if versioned: 1as

727 dlm_tensor_ver = _smv_allocate_dlm_tensor_versioned() 1a

728 _smv_fill_managed_tensor_versioned(dlm_tensor_ver, view) 1a

729 tensor_ptr = <void*>dlm_tensor_ver 1a

730 capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME 1a

731 else: 

732 dlm_tensor = _smv_allocate_dlm_tensor() 1s

733 _smv_fill_managed_tensor(dlm_tensor, view) 1s

734 tensor_ptr = <void*>dlm_tensor 

735 capsule_name = DLPACK_TENSOR_UNUSED_NAME 

736 capsule = cpython.PyCapsule_New(tensor_ptr, capsule_name, _smv_pycapsule_deleter) 1a

737 except Exception: 1s

738 if capsule is None: 1s

739 _smv_deleter(dlm_tensor) 1s

740 _smv_versioned_deleter(dlm_tensor_ver) 1s

741 raise 1s

742 return capsule 1a

743  

744  

745cdef inline StridedMemoryView _smv_from_dlpack_capsule(object capsule, object exporting_obj): 

746 cdef void* data = NULL 

747 cdef DLTensor* dl_tensor = NULL 

748 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 

749 cdef DLManagedTensor* dlm_tensor = NULL 

750 cdef bint is_readonly = False 

751 cdef const char* used_name = NULL 

752 if cpython.PyCapsule_IsValid(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 

753 data = cpython.PyCapsule_GetPointer(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) 

754 dlm_tensor_ver = <DLManagedTensorVersioned*>data 

755 dl_tensor = &dlm_tensor_ver.dl_tensor 

756 is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) 

757 used_name = DLPACK_VERSIONED_TENSOR_USED_NAME 

758 elif cpython.PyCapsule_IsValid(capsule, DLPACK_TENSOR_UNUSED_NAME): 

759 data = cpython.PyCapsule_GetPointer(capsule, DLPACK_TENSOR_UNUSED_NAME) 

760 dlm_tensor = <DLManagedTensor*>data 

761 dl_tensor = &dlm_tensor.dl_tensor 

762 is_readonly = False 

763 used_name = DLPACK_TENSOR_USED_NAME 

764 else: 

765 raise BufferError("Invalid DLPack capsule") 

766  

767 cpython.PyCapsule_SetName(capsule, used_name) 

768  

769 cdef StridedMemoryView view = StridedMemoryView.__new__(StridedMemoryView) 

770 view.dl_tensor = dl_tensor 

771 view.metadata = capsule 

772 view.ptr = <intptr_t>(dl_tensor.data) + <intptr_t>(dl_tensor.byte_offset) 

773 view.readonly = is_readonly 

774 view.exporting_obj = exporting_obj 

775 if dl_tensor.device.device_type == _kDLCPU: 

776 view.device_id = -1 

777 view.is_device_accessible = False 

778 elif dl_tensor.device.device_type in (_kDLCUDA, _kDLCUDAHost, _kDLCUDAManaged): 

779 view.device_id = dl_tensor.device.device_id 

780 view.is_device_accessible = True 

781 else: 

782 raise BufferError("device not supported") 

783 return view 

784  

785  

786cdef int _smv_managed_tensor_allocator( 

787 DLTensor* prototype, 

788 DLManagedTensorVersioned** out, 

789 void* error_ctx, 

790 void (*SetError)(void* error_ctx, const char* kind, const char* message) noexcept, 

791) noexcept with gil: 

792 if out != NULL: 

793 out[0] = NULL 

794 if SetError != NULL: 

795 SetError(error_ctx, b"NotImplementedError", b"managed_tensor_allocator is not supported by StridedMemoryView") 

796 cpython.PyErr_SetString(NotImplementedError, b"managed_tensor_allocator is not supported by StridedMemoryView") 

797 return -1 

798  

799  

800cdef int _smv_managed_tensor_from_py_object_no_sync( 

801 void* py_object, 

802 DLManagedTensorVersioned** out, 

803) noexcept with gil: 

804 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 

805 if out == NULL: 

806 cpython.PyErr_SetString(RuntimeError, b"out cannot be NULL") 

807 return -1 

808 out[0] = NULL 

809 cdef object obj = <object>py_object 

810 if not isinstance(obj, StridedMemoryView): 

811 cpython.PyErr_SetString(TypeError, b"py_object must be a StridedMemoryView") 

812 return -1 

813 try: 

814 dlm_tensor_ver = _smv_allocate_dlm_tensor_versioned() 

815 _smv_fill_managed_tensor_versioned(dlm_tensor_ver, <StridedMemoryView>obj) 

816 except Exception: 

817 _smv_versioned_deleter(dlm_tensor_ver) 

818 return -1 

819 out[0] = dlm_tensor_ver 

820 return 0 

821  

822  

823cdef int _smv_managed_tensor_to_py_object_no_sync( 

824 DLManagedTensorVersioned* tensor, 

825 void** out_py_object, 

826) noexcept with gil: 

827 cdef object capsule 

828 cdef object py_view 

829 if out_py_object == NULL: 

830 cpython.PyErr_SetString(RuntimeError, b"out_py_object cannot be NULL") 

831 return -1 

832 out_py_object[0] = NULL 

833 if tensor == NULL: 

834 cpython.PyErr_SetString(RuntimeError, b"tensor cannot be NULL") 

835 return -1 

836 try: 

837 capsule = cpython.PyCapsule_New( 

838 <void*>tensor, 

839 DLPACK_VERSIONED_TENSOR_UNUSED_NAME, 

840 _smv_pycapsule_deleter, 

841 ) 

842 py_view = _smv_from_dlpack_capsule(capsule, capsule) 

843 cpython.Py_INCREF(py_view) 

844 out_py_object[0] = <void*>py_view 

845 except Exception: 

846 return -1 

847 return 0 

848  

849  

850cdef int _smv_dltensor_from_py_object_no_sync( 

851 void* py_object, 

852 DLTensor* out, 

853) noexcept with gil: 

854 if out == NULL: 

855 cpython.PyErr_SetString(RuntimeError, b"out cannot be NULL") 

856 return -1 

857 cdef object obj = <object>py_object 

858 if not isinstance(obj, StridedMemoryView): 

859 cpython.PyErr_SetString(TypeError, b"py_object must be a StridedMemoryView") 

860 return -1 

861 try: 

862 _smv_setup_dltensor_borrowed(out, <StridedMemoryView>obj) 

863 except Exception: 

864 return -1 

865 return 0 

866  

867  

868cdef int _smv_current_work_stream( 

869 _DLDeviceType device_type, 

870 int32_t device_id, 

871 void** out_current_stream, 

872) noexcept with gil: 

873 if out_current_stream == NULL: 

874 cpython.PyErr_SetString(RuntimeError, b"out_current_stream cannot be NULL") 

875 return -1 

876 # cuda.core has no global/current stream state today. 

877 out_current_stream[0] = NULL 

878 return 0 

879  

880  

881cdef void _init_smv_dlpack_exchange_api(): 

882 global _SMV_DLPACK_EXCHANGE_API_INITED 

883 if _SMV_DLPACK_EXCHANGE_API_INITED: 

884 return 

885 _SMV_DLPACK_EXCHANGE_API.header.version.major = DLPACK_MAJOR_VERSION 

886 _SMV_DLPACK_EXCHANGE_API.header.version.minor = DLPACK_MINOR_VERSION 

887 _SMV_DLPACK_EXCHANGE_API.header.prev_api = NULL 

888 _SMV_DLPACK_EXCHANGE_API.managed_tensor_allocator = _smv_managed_tensor_allocator 

889 _SMV_DLPACK_EXCHANGE_API.managed_tensor_from_py_object_no_sync = _smv_managed_tensor_from_py_object_no_sync 

890 _SMV_DLPACK_EXCHANGE_API.managed_tensor_to_py_object_no_sync = _smv_managed_tensor_to_py_object_no_sync 

891 _SMV_DLPACK_EXCHANGE_API.dltensor_from_py_object_no_sync = _smv_dltensor_from_py_object_no_sync 

892 _SMV_DLPACK_EXCHANGE_API.current_work_stream = _smv_current_work_stream 

893 _SMV_DLPACK_EXCHANGE_API_INITED = True 

894  

895  

896_init_smv_dlpack_exchange_api() 

897# cdef classes are immutable types in Cython 3, so inject these attributes 

898# directly into the type dict. 

899(<dict>(<PyTypeObject*>StridedMemoryView).tp_dict)["__dlpack_c_exchange_api__"] = _SMV_DLPACK_EXCHANGE_API_CAPSULE 

900(<dict>(<PyTypeObject*>StridedMemoryView).tp_dict)["__c_dlpack_exchange_api__"] = _SMV_DLPACK_EXCHANGE_API_CAPSULE 

901PyType_Modified(<PyTypeObject*>StridedMemoryView) 

902  

903  

904cdef str get_simple_repr(obj): 

905 # TODO: better handling in np.dtype objects 

906 cdef object obj_class 

907 cdef str obj_repr 

908 if isinstance(obj, type): 

909 obj_class = obj 

910 else: 

911 obj_class = obj.__class__ 

912 if obj_class.__module__ in (None, "builtins"): 

913 obj_repr = obj_class.__name__ 

914 else: 

915 obj_repr = f"{obj_class.__module__}.{obj_class.__name__}" 

916 return obj_repr 

917  

918  

919  

920cdef bint check_has_dlpack(obj) except*: 

921 cdef bint has_dlpack 

922 if hasattr(obj, "__dlpack__") and hasattr(obj, "__dlpack_device__"): 1zAyijklmtuvwxnopqra

923 has_dlpack = True 1zAyijklmtuvwxnopqra

924 elif hasattr(obj, "__cuda_array_interface__"): 

925 has_dlpack = False 

926 else: 

927 raise RuntimeError( 

928 "the input object does not support any data exchange protocol") 

929 return has_dlpack 1zAyijklmtuvwxnopqra

930  

931  

932cdef class _StridedMemoryViewProxy: 

933 cdef readonly: 

934 object obj 

935 bint has_dlpack 

936  

937 def __init__(self, obj): 

938 self.obj = obj 1ijklm

939 self.has_dlpack = check_has_dlpack(obj) 1ijklm

940  

941 cpdef StridedMemoryView view(self, stream_ptr=None): 

942 if self.has_dlpack: 1ijklm

943 return StridedMemoryView.from_dlpack(self.obj, stream_ptr) 1ijklm

944 else: 

945 return StridedMemoryView.from_cuda_array_interface(self.obj, stream_ptr) 

946  

947  

948cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None): 

949 cdef int dldevice, device_id 

950 cdef bint is_device_accessible, is_readonly 

951 is_device_accessible = False 1zAyijklmtuvwxnopqracdefgh

952 dldevice, device_id = obj.__dlpack_device__() 1zAyijklmtuvwxnopqracdefgh

953 if dldevice == _kDLCPU: 1zAyijklmtuvwxnopqracdefgh

954 assert device_id == 0 1zAyijklmtuvwxnopqracdefgh

955 device_id = -1 1zAyijklmtuvwxnopqracdefgh

956 if stream_ptr is None: 1zAyijklmtuvwxnopqracdefgh

957 raise BufferError("stream=None is ambiguous with view()") 

958 elif stream_ptr == -1: 1zAyijklmtuvwxnopqracdefgh

959 stream_ptr = None 1zAyijklmtuvwxnopqracdefgh

960 elif dldevice == _kDLCUDA: 

961 assert device_id >= 0 

962 is_device_accessible = True 

963 # no need to check other stream values, it's a pass-through 

964 if stream_ptr is None: 

965 raise BufferError("stream=None is ambiguous with view()") 

966 elif dldevice in (_kDLCUDAHost, _kDLCUDAManaged): 

967 is_device_accessible = True 

968 # just do a pass-through without any checks, as pinned/managed memory can be 

969 # accessed on both host and device 

970 else: 

971 raise BufferError("device not supported") 

972  

973 cdef object capsule 

974 try: 1zAyijklmtuvwxnopqracdefgh

975 capsule = obj.__dlpack__( 1zAyijklmtuvwxnopqracdefgh

976 stream=int(stream_ptr) if stream_ptr else None, 1zAyijklmtuvwxnopqracdefgh

977 max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION)) 1zAyijklmtuvwxnopqracdefgh

978 except TypeError: 

979 capsule = obj.__dlpack__( 

980 stream=int(stream_ptr) if stream_ptr else None) 

981  

982 cdef void* data = NULL 1zAyijklmtuvwxnopqracdefgh

983 cdef DLTensor* dl_tensor 

984 cdef DLManagedTensorVersioned* dlm_tensor_ver 

985 cdef DLManagedTensor* dlm_tensor 

986 cdef const char *used_name 

987 if cpython.PyCapsule_IsValid( 1zAyijklmtuvwxnopqracdefgh

988 capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 

989 data = cpython.PyCapsule_GetPointer( 1zAyijklmtuvwxnopqracdefgh

990 capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) 

991 dlm_tensor_ver = <DLManagedTensorVersioned*>data 1zAyijklmtuvwxnopqracdefgh

992 dl_tensor = &dlm_tensor_ver.dl_tensor 1zAyijklmtuvwxnopqracdefgh

993 is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) 1zAyijklmtuvwxnopqracdefgh

994 used_name = DLPACK_VERSIONED_TENSOR_USED_NAME 1zAyijklmtuvwxnopqracdefgh

995 elif cpython.PyCapsule_IsValid( 

996 capsule, DLPACK_TENSOR_UNUSED_NAME): 

997 data = cpython.PyCapsule_GetPointer( 

998 capsule, DLPACK_TENSOR_UNUSED_NAME) 

999 dlm_tensor = <DLManagedTensor*>data 

1000 dl_tensor = &dlm_tensor.dl_tensor 

1001 is_readonly = False 

1002 used_name = DLPACK_TENSOR_USED_NAME 

1003 else: 

1004 assert False 

1005  

1006 cpython.PyCapsule_SetName(capsule, used_name) 1zAyijklmtuvwxnopqracdefgh

1007  

1008 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 1zAyijklmtuvwxnopqracdefgh

1009 buf.dl_tensor = dl_tensor 1zAyijklmtuvwxnopqracdefgh

1010 buf.metadata = capsule 1zAyijklmtuvwxnopqracdefgh

1011 buf.ptr = <intptr_t>(dl_tensor.data) 1zAyijklmtuvwxnopqracdefgh

1012 buf.device_id = device_id 1zAyijklmtuvwxnopqracdefgh

1013 buf.is_device_accessible = is_device_accessible 1zAyijklmtuvwxnopqracdefgh

1014 buf.readonly = is_readonly 1zAyijklmtuvwxnopqracdefgh

1015 buf.exporting_obj = obj 1zAyijklmtuvwxnopqracdefgh

1016  

1017 return buf 1zAyijklmtuvwxnopqracdefgh

1018  

1019  

1020@functools.lru_cache 

1021def _typestr2dtype(str typestr): 

1022 return numpy.dtype(typestr) 1BCDEFGHIJKL

1023  

1024  

1025@functools.lru_cache 

1026def _typestr2itemsize(str typestr): 

1027 return _typestr2dtype(typestr).itemsize 1BCDEFGHIJKL

1028  

1029  

1030cdef object dtype_dlpack_to_numpy(DLDataType* dtype): 

1031 cdef int bits = dtype.bits 1ijklmtuvwxnopqracdefgh

1032 if dtype.lanes != 1: 1ijklmtuvwxnopqracdefgh

1033 # TODO: return a NumPy structured dtype? 

1034 raise NotImplementedError( 

1035 f'vector dtypes (lanes={dtype.lanes}) is not supported') 

1036 if dtype.code == kDLUInt: 1ijklmtuvwxnopqracdefgh

1037 if bits == 8: 

1038 np_dtype = numpy.uint8 

1039 elif bits == 16: 

1040 np_dtype = numpy.uint16 

1041 elif bits == 32: 

1042 np_dtype = numpy.uint32 

1043 elif bits == 64: 

1044 np_dtype = numpy.uint64 

1045 else: 

1046 raise TypeError('uint{} is not supported.'.format(bits)) 

1047 elif dtype.code == kDLInt: 

1048 if bits == 8: 1itnacdefgh

1049 np_dtype = numpy.int8 

1050 elif bits == 16: 

1051 np_dtype = numpy.int16 

1052 elif bits == 32: 

1053 np_dtype = numpy.int32 1itnacdefgh

1054 elif bits == 64: 

1055 np_dtype = numpy.int64 

1056 else: 

1057 raise TypeError('int{} is not supported.'.format(bits)) 

1058 elif dtype.code == kDLFloat: 

1059 if bits == 16: 1jklmuvwxopqr

1060 np_dtype = numpy.float16 1lwq

1061 elif bits == 32: 

1062 np_dtype = numpy.float32 

1063 elif bits == 64: 

1064 np_dtype = numpy.float64 1jkmuvxopr

1065 else: 

1066 raise TypeError('float{} is not supported.'.format(bits)) 

1067 elif dtype.code == kDLComplex: 

1068 # TODO(leofang): support complex32 

1069 if bits == 64: 

1070 np_dtype = numpy.complex64 

1071 elif bits == 128: 

1072 np_dtype = numpy.complex128 

1073 else: 

1074 raise TypeError('complex{} is not supported.'.format(bits)) 

1075 elif dtype.code == kDLBool: 

1076 if bits == 8: 

1077 np_dtype = numpy.bool_ 

1078 else: 

1079 raise TypeError(f'{bits}-bit bool is not supported') 

1080 elif dtype.code == kDLBfloat: 

1081 if bfloat16 is not None: 

1082 np_dtype = numpy.dtype("bfloat16") 

1083 else: 

1084 raise NotImplementedError( 

1085 'Support for bfloat16 within cuda-core requires `ml_dtypes`' 

1086 'to be installed.' 

1087 ) 

1088 else: 

1089 raise TypeError('Unsupported dtype. dtype code: {}'.format(dtype.code)) 

1090  

1091 # We want the dtype object not just the type object 

1092 return numpy.dtype(np_dtype) 1ijklmtuvwxnopqracdefgh

1093  

1094  

1095cpdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None): 

1096 cdef dict cai_data = obj.__cuda_array_interface__ 11~2

1097 if cai_data["version"] < 3: 11~2

1098 raise BufferError("only CUDA Array Interface v3 or above is supported") 

1099 if cai_data.get("mask") is not None: 11~2

1100 raise BufferError("mask is not supported") 

1101 if stream_ptr is None: 11~2

1102 raise BufferError("stream=None is ambiguous with view()") 

1103  

1104 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 11~2

1105 buf.exporting_obj = obj 11~2

1106 buf.metadata = cai_data 11~2

1107 buf.dl_tensor = NULL 11~2

1108 # Validate shape/strides/typestr eagerly so constructor paths fail fast. 

1109 buf.get_layout() 11~2

1110 buf.ptr, buf.readonly = cai_data["data"] 112

1111 buf.is_device_accessible = True 112

1112 if buf.ptr != 0: 112

1113 buf.device_id = handle_return( 

1114 driver.cuPointerGetAttribute( 

1115 driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL, 

1116 buf.ptr)) 

1117 else: 

1118 buf.device_id = handle_return(driver.cuCtxGetDevice()) 112

1119  

1120 cdef intptr_t producer_s, consumer_s 

1121 cdef EventHandle h_event 

1122 stream_ptr = int(stream_ptr) 112

1123 if stream_ptr != -1: 112

1124 stream = cai_data.get("stream") 

1125 if stream is not None: 

1126 producer_s = <intptr_t>(stream) 

1127 consumer_s = <intptr_t>(stream_ptr) 

1128 assert producer_s > 0 

1129 # establish stream order 

1130 if producer_s != consumer_s: 

1131 with nogil: 

1132 h_event = create_event_handle_noctx(cydriver.CUevent_flags.CU_EVENT_DISABLE_TIMING) 

1133 HANDLE_RETURN(cydriver.cuEventRecord( 

1134 as_cu(h_event), <cydriver.CUstream>producer_s)) 

1135 HANDLE_RETURN(cydriver.cuStreamWaitEvent( 

1136 <cydriver.CUstream>consumer_s, as_cu(h_event), 0)) 

1137  

1138 return buf 112

1139  

1140  

1141cpdef StridedMemoryView view_as_array_interface(obj, view=None): 

1142 cdef dict data = obj.__array_interface__ 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1143 if data["version"] < 3: 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1144 raise BufferError("only NumPy Array Interface v3 or above is supported") 

1145 if data.get("mask") is not None: 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1146 raise BufferError("mask is not supported") 

1147  

1148 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1149 buf.exporting_obj = obj 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1150 buf.metadata = data 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1151 buf.dl_tensor = NULL 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1152 # Validate shape/strides/typestr eagerly so constructor paths fail fast. 

1153 buf.get_layout() 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab

1154 buf.ptr, buf.readonly = data["data"] 1BCODPEQFGHIRSTJUVWXKYZ0L

1155 buf.is_device_accessible = False 1BCODPEQFGHIRSTJUVWXKYZ0L

1156 buf.device_id = handle_return(driver.cuCtxGetDevice()) 1BCODPEQFGHIRSTJUVWXKYZ0L

1157 return buf 1BCODPEQFGHIRSTJUVWXKYZ0L

1158  

1159  

1160def args_viewable_as_strided_memory(tuple arg_indices): 

1161 """ 

1162 Decorator to create proxy objects to :obj:`StridedMemoryView` for the 

1163 specified positional arguments. 

1164  

1165 This allows array/tensor attributes to be accessed inside the function 

1166 implementation, while keeping the function body array-library-agnostic (if 

1167 desired). 

1168  

1169 Inside the decorated function, the specified arguments become instances 

1170 of an (undocumented) proxy type, regardless of its original source. A 

1171 :obj:`StridedMemoryView` instance can be obtained by passing the (consumer) 

1172 stream pointer (as a Python `int`) to the proxies's ``view()`` method. For 

1173 example: 

1174  

1175 .. code-block:: python 

1176  

1177 @args_viewable_as_strided_memory((1,)) 

1178 def my_func(arg0, arg1, arg2, stream: Stream): 

1179 # arg1 can be any object supporting DLPack or CUDA Array Interface 

1180 view = arg1.view(stream.handle) 

1181 assert isinstance(view, StridedMemoryView) 

1182 ... 

1183  

1184 Parameters 

1185 ---------- 

1186 arg_indices : tuple 

1187 The indices of the target positional arguments. 

1188 """ 

1189 def wrapped_func_with_indices(func): 1ijklm

1190 @functools.wraps(func) 1ijklm

1191 def wrapped_func(*args, **kwargs): 

1192 args = list(args) 1ijklm

1193 cdef int idx 

1194 for idx in arg_indices: 1ijklm

1195 args[idx] = _StridedMemoryViewProxy(args[idx]) 1ijklm

1196 return func(*args, **kwargs) 1ijklm

1197 return wrapped_func 1ijklm

1198 return wrapped_func_with_indices 1ijklm

1199  

1200  

1201cdef inline _StridedLayout layout_from_dlpack(DLTensor* dl_tensor): 

1202 cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout) 1yijklmtuvwxnopqracdefgh

1203 cdef int nbits = dl_tensor.dtype.bits * dl_tensor.dtype.lanes 1yijklmtuvwxnopqracdefgh

1204 cdef int itemsize = nbits >> 3 1yijklmtuvwxnopqracdefgh

1205 if (itemsize << 3) != nbits: 1yijklmtuvwxnopqracdefgh

1206 raise ValueError("dl_tensor.dtype.bits must be a multiple of 8") 

1207 layout.init_from_ptr(dl_tensor.ndim, dl_tensor.shape, dl_tensor.strides, itemsize) 1yijklmtuvwxnopqracdefgh

1208 return layout 1yijklmtuvwxnopqracdefgh

1209  

1210  

1211cdef _StridedLayout layout_from_cai(object metadata): 

1212 cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

1213 cdef object shape = metadata["shape"] 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

1214 cdef object strides = metadata.get("strides") 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

1215 cdef int itemsize = _typestr2itemsize(metadata["typestr"]) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

1216 layout.init_from_tuple(shape, strides, itemsize, True) 2B C O D P E Q F G H I R S T J U V W X K Y Z 0 L ab1 ~ 2

1217 return layout 1BCODPEQFGHIRSTJUVWXKYZ0L12

1218  

1219  

1220cdef inline intptr_t get_data_ptr(object buffer, _StridedLayout layout) except? 0: 

1221 return <intptr_t>(int(buffer.handle)) + layout.get_slice_offset_in_bytes() 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1222  

1223  

1224cdef inline int view_buffer_strided( 

1225 StridedMemoryView view, 

1226 object buffer, 

1227 _StridedLayout layout, 

1228 object dtype, 

1229 bint is_readonly, 

1230) except -1: 

1231 if dtype is not None: 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s c d e f g h

1232 dtype = numpy.dtype(dtype) 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} c d e f g h

1233 if dtype.itemsize != layout.itemsize: 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} c d e f g h

1234 raise ValueError( 

1235 f"The dtype's itemsize ({dtype.itemsize}) does not match the layout's " 

1236 f"itemsize ({layout.itemsize})." 

1237 ) 

1238 # Check the layout's offset range [min_offset, max_offset] fits 

1239 # within the [0, buffer.size - 1] range. 

1240 # The required_size_in_bytes fails if min_offset < 0. 

1241 # NB. For external memory, both positive and negative offsets can be valid, 

1242 # but for a proper check we'd need to know both size and data offset, 

1243 # while neither is reported by the packages. 

1244 cdef bint is_allocated = buffer.memory_resource is not None 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s c d e f g h

1245 if is_allocated and buffer.size < layout.get_required_size_in_bytes(): 23 4 5 6 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | cbM N bb} s c d e f g h

1246 raise ValueError( 2bb

1247 f"Buffer size is too small for the layout. " 2bb

1248 f"Expected at least {layout.get_required_size_in_bytes()} bytes, " 2bb

1249 f"got {buffer.size} bytes." 2bb

1250 ) 

1251 # set the public attributes 

1252 view.ptr = get_data_ptr(buffer, layout) 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1253 view.device_id = buffer.device_id 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1254 view.is_device_accessible = buffer.is_device_accessible 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1255 view.readonly = is_readonly 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1256 view.exporting_obj = view._buffer = buffer 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1257 # no dlpack/cai metadata 

1258 view.dl_tensor = NULL 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1259 view.metadata = None 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1260 # we get the layout from the caller 

1261 view._layout = layout 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1262 view._dtype = dtype 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh

1263 return 0 13456789!#$%'()*+,-./:;=?@[]^_`{|MN}scdefgh