Coverage for cuda/core/_memoryview.pyx: 85.07%

710 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-03 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from __future__ import annotations 

6  

7from ._dlpack cimport * 

8from ._dlpack import classify_dl_device 

9from libc.stdint cimport intptr_t 

10from cuda.core._layout cimport _StridedLayout, get_strides_ptr 

11from cuda.core._stream import Stream 

12  

13import ctypes 

14import functools 

15import sys 

16import warnings 

17from collections.abc import Callable # no-cython-lint # used in string annotations below 

18from typing import Any # no-cython-lint # used in string annotations below 

19  

20import numpy 

21  

22from cuda.bindings cimport cydriver 

23from cuda.core._resource_handles cimport ( 

24 EventHandle, 

25 create_event_handle_noctx, 

26 as_cu, 

27) 

28  

29from cuda.core._utils.cuda_utils import handle_return, driver 

30from cuda.core._utils.cuda_utils cimport HANDLE_RETURN 

31  

32  

33from cuda.core._memory import Buffer 

34  

35  

36# --------------------------------------------------------------------------- 

37# Lazy tensor bridge (avoids loading _tensor_bridge.so until torch is used) 

38# --------------------------------------------------------------------------- 

39  

40cdef object _tensor_bridge = None 

41# Cache: type(obj) -> True/False for the torch tensor check. 

42# Once a type is seen, we never re-check. 

43cdef dict _torch_type_cache = {} 

44# Tri-state: None = not checked, True/False = result of version check 

45cdef object _torch_version_ok = None 

46  

47cdef inline bint _torch_version_check(): 

48 """Return True if 2.3 <= torch <= 2.12 (known AOTI ABI range). Memoized. 

49  

50 Lower bound: AOTI functions we use were introduced in PyTorch 2.3. 

51 Upper bound: the ``pyobj_to_aten_handle`` trick relies on the 

52 THPVariable struct layout (PyObject_HEAD followed by at::Tensor cdata) 

53 and the identity ``AtenTensorHandle == at::Tensor*``. Both are 

54 undocumented internals that could change in a future PyTorch version. 

55 We cap at the latest version we have tested against; unknown versions 

56 fall back to the standard DLPack/CAI paths. Bump the upper bound 

57 after verifying a new PyTorch release. 

58 """ 

59 global _torch_version_ok 

60 if _torch_version_ok is not None: 

61 return <bint>_torch_version_ok 

62 torch = sys.modules.get("torch") 

63 if torch is None: 

64 _torch_version_ok = False 

65 return False 

66 try: 

67 major, minor = int(torch.__version__.split(".")[0]), \ 

68 int(torch.__version__.split(".")[1]) 

69 _torch_version_ok = (2, 3) <= (major, minor) <= (2, 12) 

70 except (ValueError, IndexError): 

71 _torch_version_ok = False 

72 return <bint>_torch_version_ok 

73  

74  

75cdef inline bint _is_torch_tensor(object obj): 

76 cdef type tp = type(obj) 21 2 3 8 V P J K L M N Q R S T U v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ 6 7 c I 5 H 4 B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

77 cdef object cached = _torch_type_cache.get(tp) 21 2 3 8 V P J K L M N Q R S T U v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ 6 7 c I 5 H 4 B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

78 if cached is not None: 21 2 3 8 V P J K L M N Q R S T U v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ 6 7 c I 5 H 4 B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

79 return <bint>cached 23 8 V P J K L M N Q R S T U x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb6 7 I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba

80 cdef str mod = tp.__module__ or "" 21 2 v | Ib~ c 4 a Mb

81 cdef bint result = mod.startswith("torch") and hasattr(obj, "data_ptr") \ 21 2 v | Ib~ c 4 a Mb

82 and _torch_version_check() 

83 _torch_type_cache[tp] = result # setdefault not needed for bools 21 2 v | Ib~ c 4 a Mb

84 return result 21 2 v | Ib~ c 4 a Mb

85  

86  

87cdef object _get_tensor_bridge(): 

88 """Bootstrap AOTI symbols, then import _tensor_bridge on first use.""" 

89 global _tensor_bridge 

90 if _tensor_bridge is not None: 

91 return _tensor_bridge 

92 torch_C = sys.modules.get("torch._C") 

93 if torch_C is None: 

94 raise RuntimeError( 

95 "torch._C is not loaded; cannot initialise the tensor bridge. " 

96 "Make sure PyTorch is imported before passing a torch.Tensor.") 

97 ctypes.CDLL(torch_C.__file__, mode=ctypes.RTLD_GLOBAL) 

98 from cuda.core import _tensor_bridge as tb 

99 _tensor_bridge = tb 

100 return _tensor_bridge 

101  

102  

103try: 

104 from ml_dtypes import bfloat16 

105except ImportError: 

106 bfloat16 = None 

107  

108# TODO(leofang): support NumPy structured dtypes 

109  

110  

111cdef extern from "Python.h": 

112 ctypedef struct PyTypeObject: 

113 void* tp_dict 

114 void PyType_Modified(PyTypeObject*) 

115  

116  

117cdef DLPackExchangeAPI _SMV_DLPACK_EXCHANGE_API 

118cdef bint _SMV_DLPACK_EXCHANGE_API_INITED = False 

119_SMV_DLPACK_EXCHANGE_API_CAPSULE = cpython.PyCapsule_New( 

120 <void*>&_SMV_DLPACK_EXCHANGE_API, 

121 b"dlpack_exchange_api", 

122 NULL, 

123) 

124  

125  

126cdef class StridedMemoryView: 

127 """A class holding metadata of a strided dense array/tensor. 

128  

129 A :obj:`StridedMemoryView` instance can be created in three ways: 

130  

131 1. Using the :obj:`args_viewable_as_strided_memory` decorator (recommended) 

132 2. Explicit construction relying on DLPack or CUDA Array Interface, see below. 

133 3. From :obj:`~_memory.Buffer` and shape and size tuples (see 

134 :meth:`from_buffer` classmethod) 

135  

136 ``StridedMemoryView(obj, stream_ptr)`` can be used to create a view from 

137 objects supporting either DLPack (up to v1.0) or CUDA Array Interface 

138 (CAI) v3. When wrapping an arbitrary object it will try the DLPack protocol 

139 first, then the CAI protocol. A :obj:`BufferError` is raised if neither is 

140 supported. 

141  

142 Since either way would take a consumer stream, for DLPack it is passed to 

143 ``obj.__dlpack__()`` as-is (except for :obj:`None`, see below); for CAI, a 

144 stream order will be established between the consumer stream and the 

145 producer stream (from ``obj.__cuda_array_interface__()["stream"]``), as if 

146 ``cudaStreamWaitEvent`` is called by this method. 

147  

148 To opt-out of the stream ordering operation in either DLPack or CAI, 

149 please pass ``stream_ptr=-1``. Note that this deviates (on purpose) 

150 from the semantics of ``obj.__dlpack__(stream=None, ...)`` since ``cuda.core`` 

151 does not encourage using the (legacy) default/null stream, but is 

152 consistent with the CAI's semantics. For DLPack, ``stream=-1`` will be 

153 internally passed to ``obj.__dlpack__()`` instead. 

154  

155 Parameters 

156 ---------- 

157 obj : Any 

158 Any objects that supports either DLPack (up to v1.0) or CUDA Array 

159 Interface (v3). 

160 stream_ptr: int 

161 The pointer address (as Python `int`) to the **consumer** stream. 

162 Stream ordering will be properly established unless ``-1`` is passed. 

163  

164  

165 Attributes 

166 ----------- 

167 ptr : int 

168 Pointer to the tensor buffer (as a Python `int`). 

169 device_id : int 

170 The device ID for where the tensor is located. It is -1 for CPU tensors 

171 (meaning those only accessible from the host). 

172 is_device_accessible : bool 

173 Whether the tensor data can be accessed on the GPU. 

174 readonly: bool 

175 Whether the tensor data can be modified in place. 

176 exporting_obj : Any 

177 A reference to the original tensor object that is being viewed. 

178 If the view is created with :meth:`from_buffer`, 

179 it will be the Buffer instance passed to the method. 

180  

181 """ 

182 def __init__(self, obj: object = None, stream_ptr: int | None = None) -> None: 

183 cdef str clsname = self.__class__.__name__ 2W X Y Z 0 Ob}

184 if obj is not None: 2W X Y Z 0 Ob}

185 # populate self's attributes 

186 if check_has_dlpack(obj): 1WXYZ0}

187 warnings.warn( 1WXYZ0

188 f"Constructing a {clsname} directly from a DLPack-supporting object is deprecated; " 1WXYZ0

189 "Use `StridedMemoryView.from_dlpack` or `StridedMemoryView.from_any_interface` instead.", 

190 DeprecationWarning, 1WXYZ0

191 stacklevel=2, 

192 ) 

193 view_as_dlpack(obj, stream_ptr, self) 1WXYZ0

194 else: 

195 warnings.warn( 1}

196 f"Constructing a {clsname} directly from a CUDA-array-interface-supporting object is deprecated; " 1}

197 "Use `StridedMemoryView.from_cuda_array_interface` or `StridedMemoryView.from_any_interface` instead.", 

198 DeprecationWarning, 1}

199 stacklevel=2, 

200 ) 

201 view_as_cai(obj, stream_ptr, self) 1z}

202 else: 

203 warnings.warn( 2Ob

204 f"Constructing an empty {clsname} is deprecated; " 2Ob

205 "use one of the classmethods `from_dlpack`, `from_cuda_array_interface` or `from_any_interface` " 

206 "to construct a StridedMemoryView from an object", 

207 DeprecationWarning, 2Ob

208 stacklevel=2, 

209 ) 

210  

211 @classmethod 

212 def from_dlpack(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView: 

213 """Create a view from an object supporting the `DLPack <https://dmlc.github.io/dlpack/latest/>`_ protocol. 

214  

215 Parameters 

216 ---------- 

217 obj : object 

218 An object implementing the `DLPack <https://dmlc.github.io/dlpack/latest/>`_ protocol 

219 (via ``__dlpack__``). 

220 stream_ptr : int, optional 

221 Stream pointer for synchronization. If ``None``, no synchronization is performed. 

222 """ 

223 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 2z 1 2 3 8 V P J K L M N Q R S T U x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

224 if _is_torch_tensor(obj): 21 2 3 8 V P J K L M N Q R S T U x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

225 _get_tensor_bridge().view_as_torch_tensor(obj, stream_ptr, buf) 

226 return buf 

227 view_as_dlpack(obj, stream_ptr, buf) 21 2 3 8 V P J K L M N Q R S T U x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

228 return buf 11238VPJKLMNQRSTUxyw67cI5HBCDEFGAbsefhijopqrklmngutda

229  

230 @classmethod 

231 def from_cuda_array_interface(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView: 

232 """Create a view from an object supporting the `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_ protocol. 

233  

234 Parameters 

235 ---------- 

236 obj : object 

237 An object implementing the `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_ protocol. 

238 stream_ptr : int, optional 

239 Stream pointer for synchronization. If ``None``, no synchronization is performed. 

240 """ 

241 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 2z v | Ib~ 4

242 if _is_torch_tensor(obj): 2v | Ib~ 4

243 _get_tensor_bridge().view_as_torch_tensor(obj, stream_ptr, buf) 

244 return buf 

245 view_as_cai(obj, stream_ptr, buf) 2v | Ib~ 4

246 return buf 1zv|~4

247  

248 @classmethod 

249 def from_array_interface(cls, obj: object) -> StridedMemoryView: 

250 """Create a view from an object supporting the `__array_interface__ <https://numpy.org/doc/stable/reference/arrays.interface.html>`_ protocol. 

251  

252 Parameters 

253 ---------- 

254 obj : object 

255 An object implementing the `__array_interface__ <https://numpy.org/doc/stable/reference/arrays.interface.html>`_ protocol (e.g., a numpy array). 

256 """ 

257 cdef StridedMemoryView buf = StridedMemoryView.__new__(cls) 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

258 if _is_torch_tensor(obj): 2z ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

259 _get_tensor_bridge().view_as_torch_tensor(obj, None, buf) 

260 return buf 

261 view_as_array_interface(obj, buf) 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

262 return buf 1!#-$.%'()*/:;=?@[]^+_`{,

263  

264 @classmethod 

265 def from_any_interface(cls, obj: object, stream_ptr: int | None = None) -> StridedMemoryView: 

266 """Create a view by automatically selecting the best available protocol. 

267  

268 Tries `DLPack <https://dmlc.github.io/dlpack/latest/>`_ first, then falls back to 

269 `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_. 

270 ``torch.Tensor`` objects are transparently handled via a fast AOTI path 

271 regardless of which protocol is selected. 

272  

273 Parameters 

274 ---------- 

275 obj : object 

276 An object implementing `DLPack <https://dmlc.github.io/dlpack/latest/>`_ or 

277 `__cuda_array_interface__ <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>`_. 

278 stream_ptr : int, optional 

279 Stream pointer for synchronization. If ``None``, no synchronization is performed. 

280 """ 

281 if check_has_dlpack(obj): 21 2 3 8 V P Q R S T U Zbx y w 6 7 c I 5 H A b s e f h i j o p q r k l m n g u t d a

282 return cls.from_dlpack(obj, stream_ptr) 11238VPQRSTUxyw67cI5HAbsefhijopqrklmngutda

283 return cls.from_cuda_array_interface(obj, stream_ptr) 

284  

285 @classmethod 

286 def from_buffer( 

287 cls, 

288 buffer : Buffer, 

289 shape : tuple[int, ...], 

290 strides : tuple[int, ...] | None = None, 

291 *, 

292 itemsize : int | None = None, 

293 dtype : numpy.dtype | None = None, 

294 is_readonly : bool = False 

295 ) -> StridedMemoryView: 

296 """ 

297 Creates a :obj:`StridedMemoryView` instance from a :obj:`~_memory.Buffer` and shape and strides tuples. 

298 The Buffer can be either allocation coming from a :obj:`MemoryResource` or an external allocation 

299 wrapped in a :obj:`~_memory.Buffer` object with ``Buffer.from_handle(ptr, size, owner=...)``. 

300  

301 .. caution:: 

302 When creating a :obj:`StridedMemoryView` from a :obj:`~_memory.Buffer`, 

303 no synchronization is performed. It is the user's responsibility to ensure 

304 the data in ``buffer`` is properly synchronized when consuming the view. 

305  

306 Parameters 

307 ---------- 

308 buffer : :obj:`~_memory.Buffer` 

309 The buffer to create the view from. 

310 shape : :obj:`tuple` 

311 The layout describing the shape, strides and itemsize of the elements in 

312 the buffer. 

313 strides : :obj:`tuple` 

314 The layout describing the shape, strides and itemsize of the elements in 

315 the buffer. 

316 dtype : :obj:`numpy.dtype` 

317 Optional dtype. 

318 If specified, the dtype's itemsize must match the layout's itemsize. 

319 is_readonly : bool, optional 

320 Whether the mark the view as readonly. 

321 """ 

322 cdef StridedMemoryView view = StridedMemoryView.__new__(cls) 2RbSbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbUbQbabbbNbJbO 9

323 if itemsize is None and dtype is None: 2RbSbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbUbQbabbbNbJbO 9

324 raise ValueError("Either itemsize or dtype must be specified") 2Ub

325 if itemsize is not None and dtype is not None and itemsize != dtype.itemsize: 2RbSbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbQbabbbNbJbO 9

326 raise ValueError( 2Qb

327 f"itemsize ({itemsize}) does not match dtype.itemsize ({dtype.itemsize})" 2Qb

328 ) 

329 # (itemsize is None XOR dtype is None) OR they are equal 

330 view_buffer_strided( 2cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbO 9

331 view, 

332 buffer, 

333 _StridedLayout(shape=shape, strides=strides, itemsize=getattr(dtype, "itemsize", itemsize)), 2RbSbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbO 9

334 dtype, 

335 is_readonly, 

336 ) 

337 return view 2cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9

338  

339 def __dealloc__(self) -> None: 

340 if self.dl_tensor == NULL: 2RbSb1 2 3 8 V P J K L M N Q R S T U W X Y Z 0 v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , KbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbUbQbabbbNbJb| Ib~ 6 7 c O Ob} 9 I 5 H 4 B C D E F G A s e f h i j o p q r k l m n g u t d Lba Mb

341 return 2RbSbv x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , KbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbUbQbabbbNbJb| Ib~ O Ob} 9 5 H 4 B C D E F G LbMb

342  

343 if cpython.PyCapsule_IsValid( 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAsefhijopqrklmngutda

344 self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME): 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAsefhijopqrklmngutda

345 data = cpython.PyCapsule_GetPointer( 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAsefhijopqrklmngutda

346 self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME) 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAsefhijopqrklmngutda

347 dlm_tensor_ver = <DLManagedTensorVersioned*>data 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAsefhijopqrklmngutda

348 dlm_tensor_ver.deleter(dlm_tensor_ver) 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAsefhijopqrklmngutda

349 elif cpython.PyCapsule_IsValid( 1a

350 self.metadata, DLPACK_TENSOR_USED_NAME): 1a

351 data = cpython.PyCapsule_GetPointer( 1za

352 self.metadata, DLPACK_TENSOR_USED_NAME) 1a

353 dlm_tensor = <DLManagedTensor*>data 1a

354 dlm_tensor.deleter(dlm_tensor) 1a

355  

356 def view( 

357 self, layout : _StridedLayout | None = None, dtype : numpy.dtype | None = None 

358 ) -> StridedMemoryView: 

359 """ 

360 Creates a new view with adjusted layout and dtype. 

361 Same as calling :meth:`from_buffer` with the current buffer. 

362 """ 

363 cdef StridedMemoryView view = StridedMemoryView.__new__(self.__class__) 2x y w abbb5 H B C D E F G

364 if layout is None and dtype is None: 2x y w abbb5 H B C D E F G

365 return self 1z5

366 if layout is None: 2x y w abbbH B C D E F G

367 layout = self.get_layout() 1xywH

368 if dtype is None: 2x y w abbbH B C D E F G

369 dtype = self.get_dtype() 2abbbB C D E F G

370 view_buffer_strided(view, self.get_buffer(), layout, dtype, self.readonly) 2z x y w abbbH B C D E F G

371 return view 2x y w abbbH B C D E F G

372  

373 def as_tensor_map( 

374 self, 

375 box_dim: tuple[int, ...] | None = None, 

376 *, 

377 options: object = None, 

378 element_strides: tuple[int, ...] | None = None, 

379 data_type: object = None, 

380 interleave: object = None, 

381 swizzle: object = None, 

382 l2_promotion: object = None, 

383 oob_fill: object = None, 

384 ) -> object: 

385 """Create a tiled :obj:`TensorMapDescriptor` from this view. 

386  

387 This is the public entry point for creating tiled tensor map 

388 descriptors in ``cuda.core``. Pass either ``box_dim`` and the 

389 individual keyword arguments directly, or provide bundled tiled 

390 options via ``options=``. 

391 """ 

392 from cuda.core._tensor_map import TensorMapDescriptor 1P

393  

394 kwargs = {} 1P

395 if options is not None: 1P

396 kwargs["options"] = options 

397 if element_strides is not None: 1P

398 kwargs["element_strides"] = element_strides 1P

399 if data_type is not None: 1P

400 kwargs["data_type"] = data_type 1P

401 if interleave is not None: 1zP

402 kwargs["interleave"] = interleave 

403 if swizzle is not None: 1P

404 kwargs["swizzle"] = swizzle 1P

405 if l2_promotion is not None: 1P

406 kwargs["l2_promotion"] = l2_promotion 1P

407 if oob_fill is not None: 1P

408 kwargs["oob_fill"] = oob_fill 1P

409 return TensorMapDescriptor._from_tiled(self, box_dim, **kwargs) 1P

410  

411 def copy_from( 

412 self, 

413 other: StridedMemoryView, 

414 stream: Stream, 

415 allocator: object = None, 

416 blocking: bool | None = None, 

417 ) -> None: 

418 """ 

419 Copies the data from the other view into this view. 

420  

421 The copy can be performed between following memory spaces: 

422 host-to-device, device-to-host, device-to-device (on the same device). 

423  

424 Parameters 

425 ---------- 

426 other : StridedMemoryView 

427 The view to copy data from. 

428 stream : Stream | None, optional 

429 The stream to schedule the copy on. 

430 allocator : MemoryResource | None, optional 

431 If temporary buffers are needed, the specified memory resources 

432 will be used to allocate the memory. If not specified, default 

433 resources will be used. 

434 blocking : bool | None, optional 

435 Whether the call should block until the copy is complete. 

436 * ``True``: the ``stream`` is synchronized with the host at the end of the call, 

437 blocking until the copy is complete. 

438 * ``False``: if possible, the call returns immediately once the copy is scheduled. 

439 However, in some cases of host-to-device or device-to-host copies, the call may 

440 still synchronize with the host if necessary. 

441 * ``None`` (default): 

442 * for device-to-device, it defaults to ``False`` (non-blocking), 

443 * for host-to-device or device-to-host, it defaults to ``True`` (blocking). 

444 """ 

445 raise NotImplementedError("Sorry, not supported: copy_from") 16

446  

447 def copy_to( 

448 self, 

449 other: StridedMemoryView, 

450 stream: Stream | None = None, 

451 allocator: object = None, 

452 blocking: bool | None = None, 

453 ) -> None: 

454 """ 

455 Copies the data from this view into the ``other`` view. 

456  

457 For details, see :meth:`copy_from`. 

458 """ 

459 raise NotImplementedError("Sorry, not supported: copy_to") 17

460  

461 def __dlpack__( 

462 self, 

463 *, 

464 stream: int | None = None, 

465 max_version: tuple[int, int] | None = None, 

466 dl_device: tuple[int, int] | None = None, 

467 copy: bool | None = None, 

468 ) -> object: 

469 # Similar to Buffer.__dlpack__: no implicit synchronization is performed. 

470 if dl_device is not None: 13vxywcOsefhijopqrklmngutda

471 raise BufferError("Sorry, not supported: dl_device other than None") 13

472 if copy is True: 13vxywcOsefhijopqrklmngutda

473 raise BufferError("Sorry, not supported: copy=True") 13

474  

475 cdef bint versioned 

476 if max_version is None: 13vxywcOsefhijopqrklmngutda

477 versioned = False 1vxywOta

478 else: 

479 if not isinstance(max_version, tuple) or len(max_version) != 2: 13csefhijopqrklmnguda

480 raise BufferError(f"Expected max_version tuple[int, int], got {max_version}") 13

481 versioned = max_version >= (1, 0) 1zcsefhijopqrklmnguda

482  

483 # NOTE: stream is accepted for protocol compatibility but not used. 

484 cdef object capsule = _smv_make_py_capsule(self, versioned) 1vxywcOsefhijopqrklmngutda

485 return capsule 1vcsefhijopqrklmngutda

486  

487 def __dlpack_device__(self) -> tuple[int, int]: 

488 cdef _DLDeviceType device_type 

489 cdef int32_t device_id 

490 _smv_get_dl_device(self, &device_type, &device_id) 112vca

491 return (<int>device_type, int(device_id)) 112vca

492  

493 @property 

494 def _layout(self) -> _StridedLayout: 

495 """ 

496 The layout of the tensor. For StridedMemoryView created from DLPack or CAI, 

497 the layout is inferred from the tensor object's metadata. 

498 """ 

499 return self.get_layout() 2cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbObH B C D E F G

500  

501 @property 

502 def size(self) -> int: 

503 return self.get_layout().get_volume() 1JKLMNQRSTUWXYZ0!#-$.%'()*/:;=?@[]^+_`{,|

504  

505 @property 

506 def shape(self) -> tuple[int, ...]: 

507 """ 

508 Shape of the tensor. 

509 """ 

510 return self.get_layout().get_shape_tuple() 2V J K L M N Q R S T U W X Y Z 0 ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbb| ~ } 9 I 4 B C D E F G b

511  

512 @property 

513 def strides(self) -> tuple[int, ...] | None: 

514 """ 

515 Strides of the tensor (in **counts**, not bytes). 

516 """ 

517 return self.get_layout().get_strides_tuple() 2V J K L M N Q R S T U W X Y Z 0 ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHb| ~ 9 I

518  

519 @property 

520 def dtype(self) -> numpy.dtype | None: 

521 """ 

522 Data type of the tensor. 

523  

524 Supports standard NumPy dtypes as well as narrow data types (e.g., ``bfloat16``) 

525 when the optional `ml_dtypes <https://github.com/jax-ml/ml_dtypes>`_ package is 

526 installed. If ``ml_dtypes`` is not available and such a tensor is encountered, 

527 a :obj:`NotImplementedError` will be raised. 

528 """ 

529 return self.get_dtype() 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbJb9 I H B C D E F G

530  

531 def __repr__(self) -> str: 

532 return (f"StridedMemoryView(ptr={self.ptr},\n" 19I

533 + f" shape={self.shape},\n" 19I

534 + f" strides={self.strides},\n" 19I

535 + f" itemsize={self._layout.itemsize},\n" 19I

536 + f" dtype={get_simple_repr(self.dtype)},\n" 19I

537 + f" device_id={self.device_id},\n" 19I

538 + f" is_device_accessible={self.is_device_accessible},\n" 19I

539 + f" readonly={self.readonly},\n" 19I

540 + f" exporting_obj={get_simple_repr(self.exporting_obj)})") 19I

541  

542 cdef inline _StridedLayout get_layout(self): 

543 if self._layout is None: 2V J K L M N Q R S T U W X Y Z 0 v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , KbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbb| Ib~ c O Ob} 9 I H 4 B C D E F G A b s e f h i j o p q r k l m n g u t d a

544 if self.dl_tensor: 2V J K L M N Q R S T U W X Y Z 0 v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ c Ob} I H 4 B C D E F G A b s e f h i j o p q r k l m n g u t d a

545 self._layout = layout_from_dlpack(self.dl_tensor) 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

546 elif self.metadata is not None: 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ Ob} 4

547 self._layout = layout_from_cai(self.metadata) 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ } 4

548 else: 

549 raise ValueError("Cannot infer layout from the exporting object") 2Ob

550 return self._layout 2V J K L M N Q R S T U W X Y Z 0 v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbb| ~ c O } 9 I H 4 B C D E F G A b s e f h i j o p q r k l m n g u t d a

551  

552 cdef inline object get_buffer(self): 

553 """ 

554 Returns Buffer instance with the underlying data. 

555 If the SMV was created from a Buffer, it will return the same Buffer instance. 

556 Otherwise, it will create a new instance with owner set to the exporting object. 

557 """ 

558 if self._buffer is None: 2v x y w abbbH B C D E F G

559 if isinstance(self.exporting_obj, Buffer): 1vxywHBCDEFG

560 self._buffer = self.exporting_obj 

561 else: 

562 self._buffer = Buffer.from_handle(self.ptr, 0, owner=self.exporting_obj) 1vxywHBCDEFG

563 return self._buffer 2v x y w abbbH B C D E F G

564  

565 cdef inline object get_dtype(self): 

566 if self._dtype is None: 2v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbc O 9 I H B C D E F G A b s e f h i j o p q r k l m n g u t d a

567 if self.dl_tensor != NULL: 1v!#-$.%'()*/:;=?@[]^+_`{,cO9IBCDEFGAbsefhijopqrklmngutda

568 self._dtype = dtype_dlpack_to_numpy(&self.dl_tensor.dtype) 1cIBCDEFGAbsefhijopqrklmngutda

569 elif isinstance(self.metadata, int): 1v!#-$.%'()*/:;=?@[]^+_`{,O9

570 # AOTI dtype code stored by the torch tensor bridge 

571 self._dtype = _get_tensor_bridge().resolve_aoti_dtype( 

572 self.metadata) 

573 elif self.metadata is not None: 1v!#-$.%'()*/:;=?@[]^+_`{,O9

574 self._dtype = _typestr2dtype(self.metadata["typestr"]) 1v!#-$.%'()*/:;=?@[]^+_`{,

575 return self._dtype 2v x y w ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbc O 9 I H B C D E F G A b s e f h i j o p q r k l m n g u t d a

576  

577  

578cdef void _smv_pycapsule_deleter(object capsule) noexcept: 

579 cdef DLManagedTensor* dlm_tensor 

580 cdef DLManagedTensorVersioned* dlm_tensor_ver 

581 # Do not invoke the deleter on a used capsule. 

582 if cpython.PyCapsule_IsValid(capsule, DLPACK_TENSOR_UNUSED_NAME): 1vcsefhijopqrklmngutda

583 dlm_tensor = <DLManagedTensor*>( 1zvt

584 cpython.PyCapsule_GetPointer(capsule, DLPACK_TENSOR_UNUSED_NAME) 1vt

585 ) 

586 if dlm_tensor.deleter: 1vt

587 dlm_tensor.deleter(dlm_tensor) 1zvt

588 elif cpython.PyCapsule_IsValid(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 1csefhijopqrklmnguda

589 dlm_tensor_ver = <DLManagedTensorVersioned*>( 1d

590 cpython.PyCapsule_GetPointer(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) 1d

591 ) 

592 if dlm_tensor_ver.deleter: 1d

593 dlm_tensor_ver.deleter(dlm_tensor_ver) 1d

594  

595  

596cdef inline void _smv_release_export_resources(void* manager_ctx, int64_t* shape_ptr) noexcept with gil: 

597 if shape_ptr: 1vxywcOsefhijopqrklmngutda

598 stdlib.free(shape_ptr) 1vcsefhijopqrklmngtda

599 if manager_ctx: 1vxywcOsefhijopqrklmngutda

600 cpython.Py_DECREF(<object>manager_ctx) 1vxywcOsefhijopqrklmngutda

601  

602  

603cdef void _smv_deleter(DLManagedTensor* tensor) noexcept with gil: 

604 if tensor: 1vxywOta

605 _smv_release_export_resources(tensor.manager_ctx, tensor.dl_tensor.shape) 1vxywOta

606 tensor.manager_ctx = NULL 1vxywOta

607 stdlib.free(tensor) 1vxywOta

608  

609  

610cdef void _smv_versioned_deleter(DLManagedTensorVersioned* tensor) noexcept with gil: 

611 if tensor: 1xywcOsefhijopqrklmnguda

612 _smv_release_export_resources(tensor.manager_ctx, tensor.dl_tensor.shape) 1csefhijopqrklmnguda

613 tensor.manager_ctx = NULL 1zcsefhijopqrklmnguda

614 stdlib.free(tensor) 1csefhijopqrklmnguda

615  

616  

617cdef inline DLManagedTensorVersioned* _smv_allocate_dlm_tensor_versioned() except? NULL: 

618 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1cbsefhijopqrklmnguda

619 dlm_tensor_ver = <DLManagedTensorVersioned*>stdlib.malloc(sizeof(DLManagedTensorVersioned)) 1cbsefhijopqrklmnguda

620 if dlm_tensor_ver == NULL: 1cbsefhijopqrklmnguda

621 raise MemoryError() 

622 dlm_tensor_ver.dl_tensor.shape = NULL 1cbsefhijopqrklmnguda

623 dlm_tensor_ver.manager_ctx = NULL 1cbsefhijopqrklmnguda

624 return dlm_tensor_ver 1cbsefhijopqrklmnguda

625  

626  

627cdef inline DLManagedTensor* _smv_allocate_dlm_tensor() except? NULL: 

628 cdef DLManagedTensor* dlm_tensor = NULL 1vxywOta

629 dlm_tensor = <DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor)) 1vxywOta

630 if dlm_tensor == NULL: 1vxywOta

631 raise MemoryError() 

632 dlm_tensor.dl_tensor.shape = NULL 1zvxywOta

633 dlm_tensor.manager_ctx = NULL 1vxywOta

634 return dlm_tensor 1vxywOta

635  

636  

637cdef inline int _smv_dtype_numpy_to_dlpack(object dtype_obj, DLDataType* out_dtype) except -1: 

638 cdef object np_dtype = numpy.dtype(dtype_obj) 1vxywcAbsefhijopqrklmngutda

639 if np_dtype.fields is not None: 1vxywcAbsefhijopqrklmngutda

640 raise BufferError("Structured dtypes are not supported for DLPack export") 1zy

641 if not np_dtype.isnative and np_dtype.byteorder not in ("=", "|"): 1vxwcAbsefhijopqrklmngutda

642 raise BufferError("Non-native-endian dtypes are not supported for DLPack export") 1x

643  

644 cdef str kind = np_dtype.kind 1vwcAbsefhijopqrklmngutda

645 cdef int bits = np_dtype.itemsize * 8 1vwcAbsefhijopqrklmngutda

646 cdef uint8_t code 

647 if kind == "b": 1vwcAbsefhijopqrklmngutda

648 if bits != 8: 1zs

649 raise BufferError(f"Unsupported bool dtype itemsize: {np_dtype.itemsize}") 

650 code = <uint8_t>kDLBool 1s

651 elif kind == "i": 1vwcAbefhijopqrklmngutda

652 if bits not in (8, 16, 32, 64): 1cAopqrtda

653 raise BufferError(f"Unsupported signed integer dtype: {np_dtype}") 

654 code = <uint8_t>kDLInt 1cAopqrtda

655 elif kind == "u": 1vwbefhijklmngu

656 if bits not in (8, 16, 32, 64): 1klmn

657 raise BufferError(f"Unsupported unsigned integer dtype: {np_dtype}") 

658 code = <uint8_t>kDLUInt 1klmn

659 elif kind == "f": 1vwbefhijgu

660 if bits not in (16, 32, 64): 1vbhij

661 raise BufferError(f"Unsupported floating dtype: {np_dtype}") 

662 code = <uint8_t>kDLFloat 1vbhij

663 elif kind == "c": 1wefgu

664 if bits not in (64, 128): 1efgu

665 raise BufferError(f"Unsupported complex dtype: {np_dtype}") 

666 code = <uint8_t>kDLComplex 1efgu

667 else: 

668 raise BufferError(f"Unsupported dtype for DLPack export: {np_dtype}") 1zw

669  

670 out_dtype.code = code 1vcAbsefhijopqrklmngutda

671 out_dtype.bits = <uint8_t>bits 1vcAbsefhijopqrklmngutda

672 out_dtype.lanes = <uint16_t>1 1vcAbsefhijopqrklmngutda

673 return 0 1zvcAbsefhijopqrklmngutda

674  

675  

676cdef inline int _smv_get_dl_device( 

677 StridedMemoryView view, 

678 _DLDeviceType* out_device_type, 

679 int32_t* out_device_id, 

680) except -1: 

681 cdef _DLDeviceType device_type 

682 cdef int32_t device_id 

683 cdef object buf 

684 if view.dl_tensor != NULL: 112vcAbsefhijopqrklmngutda

685 device_type = view.dl_tensor.device.device_type 112cAbsefhijopqrklmngutda

686 if device_type == _kDLCUDA: 112cAbsefhijopqrklmngutda

687 device_id = view.dl_tensor.device.device_id 

688 else: 

689 # CPU, CUDAHost, and CUDAManaged use device_id=0 in DLPack. 

690 device_id = 0 112cAbsefhijopqrklmngutda

691 elif view.is_device_accessible: 1v

692 buf = view.get_buffer() 1v

693 dev_type, dev_id = classify_dl_device(buf) 1v

694 device_type = <_DLDeviceType>dev_type 1v

695 device_id = <int32_t>dev_id 1v

696 else: 

697 device_type = _kDLCPU 

698 device_id = 0 

699  

700 out_device_type[0] = device_type 112vcAbsefhijopqrklmngutda

701 out_device_id[0] = device_id 112vcAbsefhijopqrklmngutda

702 return 0 112vcAbsefhijopqrklmngutda

703  

704  

705cdef inline int _smv_setup_dl_tensor_common( 

706 DLTensor* dl_tensor, 

707 StridedMemoryView view, 

708 _StridedLayout layout, 

709) except -1: 

710 cdef object dtype_obj = view.get_dtype() 1vxywcOAbsefhijopqrklmngutda

711 if dtype_obj is None: 1vxywcOAbsefhijopqrklmngutda

712 raise BufferError( 1O

713 "Cannot export StridedMemoryView via DLPack without dtype information; " 

714 "create the view with dtype specified." 

715 ) 

716 _smv_dtype_numpy_to_dlpack(dtype_obj, &dl_tensor.dtype) 1vxywcAbsefhijopqrklmngutda

717 _smv_get_dl_device(view, &dl_tensor.device.device_type, &dl_tensor.device.device_id) 1vcAbsefhijopqrklmngutda

718  

719 cdef int ndim = layout.base.ndim 1vcAbsefhijopqrklmngutda

720 dl_tensor.ndim = ndim 1vcAbsefhijopqrklmngutda

721 if layout.get_volume() == 0: 1vcAbsefhijopqrklmngutda

722 dl_tensor.data = NULL 1g

723 else: 

724 dl_tensor.data = <void*><intptr_t>view.ptr 1vcAbsefhijopqrklmnutda

725 dl_tensor.byte_offset = 0 1vcAbsefhijopqrklmngutda

726 return 0 1vcAbsefhijopqrklmngutda

727  

728  

729cdef inline int _smv_setup_dl_tensor(DLTensor* dl_tensor, StridedMemoryView view) except -1: 

730 cdef _StridedLayout layout = view.get_layout() 1vxywcObsefhijopqrklmngutda

731 _smv_setup_dl_tensor_common(dl_tensor, view, layout) 1vxywcObsefhijopqrklmngutda

732  

733 cdef int i 

734 cdef int64_t* shape_strides = NULL 1vcbsefhijopqrklmngutda

735 cdef int64_t* strides_src = NULL 1vcbsefhijopqrklmngutda

736 cdef int ndim = dl_tensor.ndim 1vcbsefhijopqrklmngutda

737 if ndim == 0: 1vcbsefhijopqrklmngutda

738 dl_tensor.shape = NULL 1u

739 dl_tensor.strides = NULL 1u

740 else: 

741 # DLPack v1.2+ requires non-NULL strides for ndim != 0. 

742 shape_strides = <int64_t*>stdlib.malloc(sizeof(int64_t) * 2 * ndim) 1vcbsefhijopqrklmngtda

743 if shape_strides == NULL: 1vcbsefhijopqrklmngtda

744 raise MemoryError() 

745 try: 1vcbsefhijopqrklmngtda

746 strides_src = get_strides_ptr(layout.base) 1vcbsefhijopqrklmngtda

747 for i in range(ndim): 1vcbsefhijopqrklmngtda

748 shape_strides[i] = layout.base.shape[i] 1vcbsefhijopqrklmngtda

749 shape_strides[i + ndim] = strides_src[i] 1vcbsefhijopqrklmngtda

750 except Exception: 

751 stdlib.free(shape_strides) 

752 raise 

753 dl_tensor.shape = shape_strides 1vcbsefhijopqrklmngtda

754 dl_tensor.strides = shape_strides + ndim 1vcbsefhijopqrklmngtda

755 return 0 1vcbsefhijopqrklmngutda

756  

757  

758cdef inline int _smv_setup_dltensor_borrowed(DLTensor* dl_tensor, StridedMemoryView view) except -1: 

759 cdef _StridedLayout layout = view.get_layout() 1A

760 _smv_setup_dl_tensor_common(dl_tensor, view, layout) 1A

761  

762 if dl_tensor.ndim == 0: 1A

763 dl_tensor.shape = NULL 

764 dl_tensor.strides = NULL 

765 else: 

766 dl_tensor.shape = layout.base.shape 1A

767 # For temporary/non-owning exchange we provide explicit strides. 

768 dl_tensor.strides = get_strides_ptr(layout.base) 1A

769 return 0 1A

770  

771  

772cdef inline int _smv_fill_managed_tensor_versioned( 

773 DLManagedTensorVersioned* dlm_tensor_ver, 

774 StridedMemoryView view, 

775) except -1: 

776 cpython.Py_INCREF(view) 1cbsefhijopqrklmnguda

777 dlm_tensor_ver.manager_ctx = <void*>view 1cbsefhijopqrklmnguda

778 dlm_tensor_ver.deleter = _smv_versioned_deleter 1cbsefhijopqrklmnguda

779 dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION 1cbsefhijopqrklmnguda

780 dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION 1cbsefhijopqrklmnguda

781 dlm_tensor_ver.flags = DLPACK_FLAG_BITMASK_READ_ONLY if view.readonly else 0 1cbsefhijopqrklmnguda

782 _smv_setup_dl_tensor(&dlm_tensor_ver.dl_tensor, view) 1cbsefhijopqrklmnguda

783 return 0 1cbsefhijopqrklmnguda

784  

785  

786cdef inline int _smv_fill_managed_tensor( 

787 DLManagedTensor* dlm_tensor, 

788 StridedMemoryView view, 

789) except -1: 

790 cpython.Py_INCREF(view) 1vxywOta

791 dlm_tensor.manager_ctx = <void*>view 1vxywOta

792 dlm_tensor.deleter = _smv_deleter 1vxywOta

793 _smv_setup_dl_tensor(&dlm_tensor.dl_tensor, view) 1vxywOta

794 return 0 1vta

795  

796  

797cdef object _smv_make_py_capsule(StridedMemoryView view, bint versioned): 

798 cdef DLManagedTensor* dlm_tensor = NULL 1vxywcOsefhijopqrklmngutda

799 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1vxywcOsefhijopqrklmngutda

800 cdef object capsule = None 1vxywcOsefhijopqrklmngutda

801 cdef void* tensor_ptr = NULL 1vxywcOsefhijopqrklmngutda

802 cdef const char* capsule_name 

803 try: 1vxywcOsefhijopqrklmngutda

804 if versioned: 1vxywcOsefhijopqrklmngutda

805 dlm_tensor_ver = _smv_allocate_dlm_tensor_versioned() 1csefhijopqrklmnguda

806 _smv_fill_managed_tensor_versioned(dlm_tensor_ver, view) 1csefhijopqrklmnguda

807 tensor_ptr = <void*>dlm_tensor_ver 1csefhijopqrklmnguda

808 capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME 1csefhijopqrklmnguda

809 else: 

810 dlm_tensor = _smv_allocate_dlm_tensor() 1vxywOta

811 _smv_fill_managed_tensor(dlm_tensor, view) 1vxywOta

812 tensor_ptr = <void*>dlm_tensor 1vta

813 capsule_name = DLPACK_TENSOR_UNUSED_NAME 1vta

814 capsule = cpython.PyCapsule_New(tensor_ptr, capsule_name, _smv_pycapsule_deleter) 1vcsefhijopqrklmngutda

815 except Exception: 1xywO

816 if capsule is None: 1xywO

817 _smv_deleter(dlm_tensor) 1xywO

818 _smv_versioned_deleter(dlm_tensor_ver) 1xywO

819 raise 1xywO

820 return capsule 1vcsefhijopqrklmngutda

821  

822  

823cdef inline StridedMemoryView _smv_from_dlpack_capsule(object capsule, object exporting_obj): 

824 cdef void* data = NULL 1b

825 cdef DLTensor* dl_tensor = NULL 1b

826 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1b

827 cdef DLManagedTensor* dlm_tensor = NULL 1b

828 cdef bint is_readonly = False 1b

829 cdef const char* used_name = NULL 1b

830 if cpython.PyCapsule_IsValid(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 1b

831 data = cpython.PyCapsule_GetPointer(capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) 1b

832 dlm_tensor_ver = <DLManagedTensorVersioned*>data 1b

833 dl_tensor = &dlm_tensor_ver.dl_tensor 1b

834 is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) 1b

835 used_name = DLPACK_VERSIONED_TENSOR_USED_NAME 1b

836 elif cpython.PyCapsule_IsValid(capsule, DLPACK_TENSOR_UNUSED_NAME): 

837 data = cpython.PyCapsule_GetPointer(capsule, DLPACK_TENSOR_UNUSED_NAME) 

838 dlm_tensor = <DLManagedTensor*>data 

839 dl_tensor = &dlm_tensor.dl_tensor 

840 is_readonly = False 

841 used_name = DLPACK_TENSOR_USED_NAME 

842 else: 

843 raise BufferError("Invalid DLPack capsule") 

844  

845 cpython.PyCapsule_SetName(capsule, used_name) 1b

846  

847 cdef StridedMemoryView view = StridedMemoryView.__new__(StridedMemoryView) 1b

848 view.dl_tensor = dl_tensor 1b

849 view.metadata = capsule 1b

850 view.ptr = <intptr_t>(dl_tensor.data) + <intptr_t>(dl_tensor.byte_offset) 1b

851 view.readonly = is_readonly 1b

852 view.exporting_obj = exporting_obj 1b

853 if dl_tensor.device.device_type == _kDLCPU: 1b

854 view.device_id = -1 1b

855 view.is_device_accessible = False 1b

856 elif dl_tensor.device.device_type in (_kDLCUDA, _kDLCUDAHost, _kDLCUDAManaged): 

857 view.device_id = dl_tensor.device.device_id 

858 view.is_device_accessible = True 

859 else: 

860 raise BufferError("device not supported") 

861 return view 1b

862  

863  

864cdef int _smv_managed_tensor_allocator( 

865 DLTensor* prototype, 

866 DLManagedTensorVersioned** out, 

867 void* error_ctx, 

868 void (*SetError)(void* error_ctx, const char* kind, const char* message) noexcept, 

869) noexcept with gil: 

870 if out != NULL: 2Vb

871 out[0] = NULL 2Vb

872 if SetError != NULL: 2Vb

873 SetError(error_ctx, b"NotImplementedError", b"managed_tensor_allocator is not supported by StridedMemoryView") 

874 cpython.PyErr_SetString(NotImplementedError, b"managed_tensor_allocator is not supported by StridedMemoryView") 2Vb

875 return -1 2Vb

876  

877  

878cdef int _smv_managed_tensor_from_py_object_no_sync( 

879 void* py_object, 

880 DLManagedTensorVersioned** out, 

881) noexcept with gil: 

882 cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL 1b

883 if out == NULL: 1b

884 cpython.PyErr_SetString(RuntimeError, b"out cannot be NULL") 

885 return -1 

886 out[0] = NULL 1b

887 cdef object obj = <object>py_object 1b

888 if not isinstance(obj, StridedMemoryView): 1b

889 cpython.PyErr_SetString(TypeError, b"py_object must be a StridedMemoryView") 

890 return -1 

891 try: 1b

892 dlm_tensor_ver = _smv_allocate_dlm_tensor_versioned() 1b

893 _smv_fill_managed_tensor_versioned(dlm_tensor_ver, <StridedMemoryView>obj) 1b

894 except Exception: 

895 _smv_versioned_deleter(dlm_tensor_ver) 

896 return -1 

897 out[0] = dlm_tensor_ver 1b

898 return 0 1b

899  

900  

901cdef int _smv_managed_tensor_to_py_object_no_sync( 

902 DLManagedTensorVersioned* tensor, 

903 void** out_py_object, 

904) noexcept with gil: 

905 cdef object capsule 

906 cdef object py_view 

907 if out_py_object == NULL: 2b Wb

908 cpython.PyErr_SetString(RuntimeError, b"out_py_object cannot be NULL") 

909 return -1 

910 out_py_object[0] = NULL 2b Wb

911 if tensor == NULL: 2b Wb

912 cpython.PyErr_SetString(RuntimeError, b"tensor cannot be NULL") 2Wb

913 return -1 2Wb

914 try: 1b

915 capsule = cpython.PyCapsule_New( 1b

916 <void*>tensor, 

917 DLPACK_VERSIONED_TENSOR_UNUSED_NAME, 

918 _smv_pycapsule_deleter, 

919 ) 

920 py_view = _smv_from_dlpack_capsule(capsule, capsule) 1b

921 cpython.Py_INCREF(py_view) 1b

922 out_py_object[0] = <void*>py_view 1b

923 except Exception: 

924 return -1 

925 return 0 1b

926  

927  

928cdef int _smv_dltensor_from_py_object_no_sync( 

929 void* py_object, 

930 DLTensor* out, 

931) noexcept with gil: 

932 if out == NULL: 2XbA

933 cpython.PyErr_SetString(RuntimeError, b"out cannot be NULL") 

934 return -1 

935 cdef object obj = <object>py_object 2XbA

936 if not isinstance(obj, StridedMemoryView): 2XbA

937 cpython.PyErr_SetString(TypeError, b"py_object must be a StridedMemoryView") 2Xb

938 return -1 2Xb

939 try: 1A

940 _smv_setup_dltensor_borrowed(out, <StridedMemoryView>obj) 1A

941 except Exception: 

942 return -1 

943 return 0 1A

944  

945  

946cdef int _smv_current_work_stream( 

947 _DLDeviceType device_type, 

948 int32_t device_id, 

949 void** out_current_stream, 

950) noexcept with gil: 

951 if out_current_stream == NULL: 22b

952 cpython.PyErr_SetString(RuntimeError, b"out_current_stream cannot be NULL") 

953 return -1 

954 # cuda.core has no global/current stream state today. 

955 out_current_stream[0] = NULL 22b

956 return 0 22b

957  

958  

959cdef void _init_smv_dlpack_exchange_api(): 

960 global _SMV_DLPACK_EXCHANGE_API_INITED 

961 if _SMV_DLPACK_EXCHANGE_API_INITED: 

962 return 

963 _SMV_DLPACK_EXCHANGE_API.header.version.major = DLPACK_MAJOR_VERSION 

964 _SMV_DLPACK_EXCHANGE_API.header.version.minor = DLPACK_MINOR_VERSION 

965 _SMV_DLPACK_EXCHANGE_API.header.prev_api = NULL 

966 _SMV_DLPACK_EXCHANGE_API.managed_tensor_allocator = _smv_managed_tensor_allocator 

967 _SMV_DLPACK_EXCHANGE_API.managed_tensor_from_py_object_no_sync = _smv_managed_tensor_from_py_object_no_sync 

968 _SMV_DLPACK_EXCHANGE_API.managed_tensor_to_py_object_no_sync = _smv_managed_tensor_to_py_object_no_sync 

969 _SMV_DLPACK_EXCHANGE_API.dltensor_from_py_object_no_sync = _smv_dltensor_from_py_object_no_sync 

970 _SMV_DLPACK_EXCHANGE_API.current_work_stream = _smv_current_work_stream 

971 _SMV_DLPACK_EXCHANGE_API_INITED = True 

972  

973  

974_init_smv_dlpack_exchange_api() 

975# cdef classes are immutable types in Cython 3, so inject these attributes 

976# directly into the type dict. 

977(<dict>(<PyTypeObject*>StridedMemoryView).tp_dict)["__dlpack_c_exchange_api__"] = _SMV_DLPACK_EXCHANGE_API_CAPSULE 

978(<dict>(<PyTypeObject*>StridedMemoryView).tp_dict)["__c_dlpack_exchange_api__"] = _SMV_DLPACK_EXCHANGE_API_CAPSULE 

979PyType_Modified(<PyTypeObject*>StridedMemoryView) 

980  

981  

982cdef str get_simple_repr(obj): 

983 # TODO: better handling in np.dtype objects 

984 cdef object obj_class 

985 cdef str obj_repr 

986 if isinstance(obj, type): 19I

987 obj_class = obj 

988 else: 

989 obj_class = obj.__class__ 19I

990 if obj_class.__module__ in (None, "builtins"): 19I

991 obj_repr = obj_class.__name__ 19

992 else: 

993 obj_repr = f"{obj_class.__module__}.{obj_class.__name__}" 19I

994 return obj_repr 19I

995  

996  

997  

998cdef bint check_has_dlpack(obj) except*: 

999 cdef bint has_dlpack 

1000 if hasattr(obj, "__dlpack__") and hasattr(obj, "__dlpack_device__"): 21 2 3 8 V P J K L M N Q R S T U W X Y Z 0 Zbx y w 6 7 c } TbI 5 H A b s e f h i j o p q r k l m n g u t d a

1001 has_dlpack = True 11238VPJKLMNQRSTUWXYZ0xyw67cI5HAbsefhijopqrklmngutda

1002 elif hasattr(obj, "__cuda_array_interface__"): 2Zb} Tb

1003 has_dlpack = False 2} Tb

1004 else: 

1005 raise BufferError( 2Zb

1006 "the input object does not support any data exchange protocol") 

1007 return has_dlpack 21 2 3 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c } TbI 5 H A b s e f h i j o p q r k l m n g u t d a

1008  

1009  

1010cdef class _StridedMemoryViewProxy: 

1011 cdef readonly: 

1012 object obj 

1013 bint has_dlpack 

1014  

1015 def __init__(self, obj: object) -> None: 

1016 self.obj = obj 2J K L M N Tb

1017 self.has_dlpack = check_has_dlpack(obj) 2J K L M N Tb

1018  

1019 cpdef StridedMemoryView view(self, stream_ptr=None): 

1020 if self.has_dlpack: 1JKLMN

1021 return StridedMemoryView.from_dlpack(self.obj, stream_ptr) 1JKLMN

1022 else: 

1023 return StridedMemoryView.from_cuda_array_interface(self.obj, stream_ptr) 

1024  

1025  

1026cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None): 

1027 cdef int dldevice, device_id 

1028 cdef bint is_device_accessible, is_readonly 

1029 is_device_accessible = False 21 2 3 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

1030 dldevice, device_id = obj.__dlpack_device__() 21 2 3 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

1031 if dldevice == _kDLCPU: 21 2 3 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba Mb

1032 assert device_id == 0 23 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba

1033 device_id = -1 23 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba

1034 if stream_ptr is None: 23 8 V P J K L M N Q R S T U W X Y Z 0 x y w 6 7 c I 5 H B C D E F G A b s e f h i j o p q r k l m n g u t d Lba

1035 raise BufferError("stream=None is ambiguous with view()") 2Lb

1036 elif stream_ptr == -1: 138VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1037 stream_ptr = None 138VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1038 elif dldevice == _kDLCUDA: 

1039 assert device_id >= 0 

1040 is_device_accessible = True 

1041 # no need to check other stream values, it's a pass-through 

1042 if stream_ptr is None: 

1043 raise BufferError("stream=None is ambiguous with view()") 

1044 elif dldevice in (_kDLCUDAHost, _kDLCUDAManaged): 

1045 is_device_accessible = True 112

1046 # just do a pass-through without any checks, as pinned/managed memory can be 

1047 # accessed on both host and device 

1048 else: 

1049 raise BufferError("device not supported") 2Mb

1050  

1051 cdef object capsule 

1052 try: 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1053 capsule = obj.__dlpack__( 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1054 stream=int(stream_ptr) if stream_ptr else None, 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1055 max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION)) 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1056 except TypeError: 1a

1057 capsule = obj.__dlpack__( 1a

1058 stream=int(stream_ptr) if stream_ptr else None) 1a

1059  

1060 cdef void* data = NULL 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1061 cdef DLTensor* dl_tensor 

1062 cdef DLManagedTensorVersioned* dlm_tensor_ver 

1063 cdef DLManagedTensor* dlm_tensor 

1064 cdef const char *used_name 

1065 if cpython.PyCapsule_IsValid( 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1066 capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME): 

1067 data = cpython.PyCapsule_GetPointer( 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1068 capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME) 

1069 dlm_tensor_ver = <DLManagedTensorVersioned*>data 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1070 dl_tensor = &dlm_tensor_ver.dl_tensor 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1071 is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0) 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1072 used_name = DLPACK_VERSIONED_TENSOR_USED_NAME 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1073 elif cpython.PyCapsule_IsValid( 1a

1074 capsule, DLPACK_TENSOR_UNUSED_NAME): 

1075 data = cpython.PyCapsule_GetPointer( 1a

1076 capsule, DLPACK_TENSOR_UNUSED_NAME) 

1077 dlm_tensor = <DLManagedTensor*>data 1a

1078 dl_tensor = &dlm_tensor.dl_tensor 1a

1079 is_readonly = False 1a

1080 used_name = DLPACK_TENSOR_USED_NAME 1a

1081 else: 

1082 assert False 

1083  

1084 cpython.PyCapsule_SetName(capsule, used_name) 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1085  

1086 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1087 buf.dl_tensor = dl_tensor 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1088 buf.metadata = capsule 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1089 buf.ptr = <intptr_t>(dl_tensor.data) 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1090 buf.device_id = device_id 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1091 buf.is_device_accessible = is_device_accessible 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1092 buf.readonly = is_readonly 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1093 buf.exporting_obj = obj 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1094  

1095 return buf 11238VPJKLMNQRSTUWXYZ0xyw67cI5HBCDEFGAbsefhijopqrklmngutda

1096  

1097  

1098@functools.lru_cache 

1099def _typestr2dtype(str typestr) -> numpy.dtype: 

1100 return numpy.dtype(typestr) 1!#$%'()*+,}

1101  

1102  

1103@functools.lru_cache 

1104def _typestr2itemsize(str typestr) -> int: 

1105 return _typestr2dtype(typestr).itemsize 1!#$%'()*+,}

1106  

1107  

1108cdef object dtype_dlpack_to_numpy(DLDataType* dtype): 

1109 cdef int bits = dtype.bits 1cIBCDEFGAbsefhijopqrklmngutda

1110 if dtype.lanes != 1: 1cIBCDEFGAbsefhijopqrklmngutda

1111 # TODO: return a NumPy structured dtype? 

1112 raise NotImplementedError( 

1113 f'vector dtypes (lanes={dtype.lanes}) is not supported') 

1114 if dtype.code == kDLUInt: 1cIBCDEFGAbsefhijopqrklmngutda

1115 if bits == 8: 1klmn

1116 np_dtype = numpy.uint8 1k

1117 elif bits == 16: 

1118 np_dtype = numpy.uint16 1l

1119 elif bits == 32: 

1120 np_dtype = numpy.uint32 1m

1121 elif bits == 64: 

1122 np_dtype = numpy.uint64 1n

1123 else: 

1124 raise TypeError('uint{} is not supported.'.format(bits)) 

1125 elif dtype.code == kDLInt: 

1126 if bits == 8: 1cIBCDEFGAopqrtda

1127 np_dtype = numpy.int8 1o

1128 elif bits == 16: 

1129 np_dtype = numpy.int16 1p

1130 elif bits == 32: 

1131 np_dtype = numpy.int32 1cIBCDEFGAqtda

1132 elif bits == 64: 

1133 np_dtype = numpy.int64 1r

1134 else: 

1135 raise TypeError('int{} is not supported.'.format(bits)) 

1136 elif dtype.code == kDLFloat: 

1137 if bits == 16: 1bhij

1138 np_dtype = numpy.float16 1h

1139 elif bits == 32: 

1140 np_dtype = numpy.float32 1i

1141 elif bits == 64: 

1142 np_dtype = numpy.float64 1bj

1143 else: 

1144 raise TypeError('float{} is not supported.'.format(bits)) 

1145 elif dtype.code == kDLComplex: 

1146 # TODO(leofang): support complex32 

1147 if bits == 64: 1efgu

1148 np_dtype = numpy.complex64 1e

1149 elif bits == 128: 

1150 np_dtype = numpy.complex128 1fgu

1151 else: 

1152 raise TypeError('complex{} is not supported.'.format(bits)) 

1153 elif dtype.code == kDLBool: 

1154 if bits == 8: 1s

1155 np_dtype = numpy.bool_ 1s

1156 else: 

1157 raise TypeError(f'{bits}-bit bool is not supported') 

1158 elif dtype.code == kDLBfloat: 

1159 if bfloat16 is not None: 

1160 np_dtype = numpy.dtype("bfloat16") 

1161 else: 

1162 raise NotImplementedError( 

1163 'Support for bfloat16 within cuda-core requires `ml_dtypes`' 

1164 'to be installed.' 

1165 ) 

1166 else: 

1167 raise TypeError('Unsupported dtype. dtype code: {}'.format(dtype.code)) 

1168  

1169 # We want the dtype object not just the type object 

1170 return numpy.dtype(np_dtype) 1cIBCDEFGAbsefhijopqrklmngutda

1171  

1172  

1173cpdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None): 

1174 cdef dict cai_data = obj.__cuda_array_interface__ 20bYb3bv | Ib~ } 4

1175 if cai_data["version"] < 3: 20bYb3bv | Ib~ } 4

1176 raise BufferError("only CUDA Array Interface v3 or above is supported") 23b

1177 if cai_data.get("mask") is not None: 20bYbv | Ib~ } 4

1178 raise BufferError("mask is not supported") 20b

1179 if stream_ptr is None: 2Ybv | Ib~ } 4

1180 raise BufferError("stream=None is ambiguous with view()") 2Yb

1181  

1182 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 2v | Ib~ } 4

1183 buf.exporting_obj = obj 2v | Ib~ } 4

1184 buf.metadata = cai_data 2v | Ib~ } 4

1185 buf.dl_tensor = NULL 2v | Ib~ } 4

1186 # Validate shape/strides/typestr eagerly so constructor paths fail fast. 

1187 buf.get_layout() 2v | Ib~ } 4

1188 buf.ptr, buf.readonly = cai_data["data"] 1v|~}4

1189 buf.is_device_accessible = True 1v|~}4

1190 if buf.ptr != 0: 1v|~}4

1191 buf.device_id = handle_return( 1v4

1192 driver.cuPointerGetAttribute( 1v4

1193 driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL, 1v4

1194 buf.ptr)) 1v4

1195 else: 

1196 buf.device_id = handle_return(driver.cuCtxGetDevice()) 1|~}

1197  

1198 cdef intptr_t producer_s, consumer_s 

1199 cdef EventHandle h_event 

1200 stream_ptr = int(stream_ptr) 1v|~}4

1201 if stream_ptr != -1: 1v|~}4

1202 stream = cai_data.get("stream") 14

1203 if stream is not None: 14

1204 producer_s = <intptr_t>(stream) 14

1205 consumer_s = <intptr_t>(stream_ptr) 14

1206 assert producer_s > 0 14

1207 # establish stream order 

1208 if producer_s != consumer_s: 14

1209 with nogil: 14

1210 h_event = create_event_handle_noctx(cydriver.CUevent_flags.CU_EVENT_DISABLE_TIMING) 14

1211 HANDLE_RETURN(cydriver.cuEventRecord( 14

1212 as_cu(h_event), <cydriver.CUstream>producer_s)) 

1213 HANDLE_RETURN(cydriver.cuStreamWaitEvent( 14

1214 <cydriver.CUstream>consumer_s, as_cu(h_event), 0)) 

1215 elif _is_torch_tensor(obj): 

1216 # PyTorch's __cuda_array_interface__ reports version 2 and 

1217 # omits the "stream" field, so the standard CAI sync path 

1218 # above is a no-op for torch tensors. This is unsafe: the 

1219 # consumer has no guarantee that the producer's work is 

1220 # visible. We fix this by querying PyTorch's current CUDA 

1221 # stream via the AOTI stable C ABI and performing the same 

1222 # event-based stream ordering. 

1223 _get_tensor_bridge().sync_torch_stream( 

1224 buf.device_id, <intptr_t>(stream_ptr)) 

1225  

1226 return buf 1v|~}4

1227  

1228  

1229cpdef StridedMemoryView view_as_array_interface(obj, view=None): 

1230 cdef dict data = obj.__array_interface__ 21b4b! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1231 if data["version"] < 3: 21b4b! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1232 raise BufferError("only NumPy Array Interface v3 or above is supported") 24b

1233 if data.get("mask") is not None: 21b! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1234 raise BufferError("mask is not supported") 21b

1235  

1236 cdef StridedMemoryView buf = StridedMemoryView() if view is None else view 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1237 buf.exporting_obj = obj 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1238 buf.metadata = data 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1239 buf.dl_tensor = NULL 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1240 # Validate shape/strides/typestr eagerly so constructor paths fail fast. 

1241 buf.get_layout() 2! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb

1242 buf.ptr, buf.readonly = data["data"] 1!#-$.%'()*/:;=?@[]^+_`{,

1243 buf.is_device_accessible = False 1!#-$.%'()*/:;=?@[]^+_`{,

1244 buf.device_id = handle_return(driver.cuCtxGetDevice()) 1!#-$.%'()*/:;=?@[]^+_`{,

1245 return buf 1!#-$.%'()*/:;=?@[]^+_`{,

1246  

1247  

1248def args_viewable_as_strided_memory(arg_indices: tuple[int, ...]) -> Callable[[Callable[..., Any]], Callable[..., Any]]: 

1249 """ 

1250 Decorator to create proxy objects to :obj:`StridedMemoryView` for the 

1251 specified positional arguments. 

1252  

1253 This allows array/tensor attributes to be accessed inside the function 

1254 implementation, while keeping the function body array-library-agnostic (if 

1255 desired). 

1256  

1257 Inside the decorated function, the specified arguments become instances 

1258 of an (undocumented) proxy type, regardless of its original source. A 

1259 :obj:`StridedMemoryView` instance can be obtained by passing the (consumer) 

1260 stream pointer (as a Python `int`) to the proxies's ``view()`` method. For 

1261 example: 

1262  

1263 .. code-block:: python 

1264  

1265 @args_viewable_as_strided_memory((1,)) 

1266 def my_func(arg0, arg1, arg2, stream: Stream): 

1267 # arg1 can be any object supporting DLPack or CUDA Array Interface 

1268 view = arg1.view(stream.handle) 

1269 assert isinstance(view, StridedMemoryView) 

1270 ... 

1271  

1272 Parameters 

1273 ---------- 

1274 arg_indices : tuple 

1275 The indices of the target positional arguments. 

1276 """ 

1277 def wrapped_func_with_indices(func: "Callable") -> "Callable": 1JKLMN

1278 @functools.wraps(func) 1JKLMN

1279 def wrapped_func(*args, **kwargs) -> object: 

1280 args = list(args) 1JKLMN

1281 cdef int idx 

1282 for idx in arg_indices: 1JKLMN

1283 args[idx] = _StridedMemoryViewProxy(args[idx]) 1JKLMN

1284 return func(*args, **kwargs) 1JKLMN

1285 return wrapped_func 1JKLMN

1286 return wrapped_func_with_indices 1JKLMN

1287  

1288  

1289cdef inline _StridedLayout layout_from_dlpack(DLTensor* dl_tensor): 

1290 cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout) 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

1291 cdef int nbits = dl_tensor.dtype.bits * dl_tensor.dtype.lanes 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

1292 cdef int itemsize = nbits >> 3 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

1293 if (itemsize << 3) != nbits: 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

1294 raise ValueError("dl_tensor.dtype.bits must be a multiple of 8") 

1295 layout.init_from_ptr(dl_tensor.ndim, dl_tensor.shape, dl_tensor.strides, itemsize) 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

1296 return layout 1VJKLMNQRSTUWXYZ0xywcIHBCDEFGAbsefhijopqrklmngutda

1297  

1298  

1299cdef _StridedLayout layout_from_cai(object metadata): 

1300 cdef _StridedLayout layout = _StridedLayout.__new__(_StridedLayout) 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ } 4

1301 cdef object shape = metadata["shape"] 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ } 4

1302 cdef object strides = metadata.get("strides") 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ } 4

1303 cdef int itemsize = _typestr2itemsize(metadata["typestr"]) 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ } 4

1304 layout.init_from_tuple(shape, strides, itemsize, True) 2v ! # - $ . % ' ( ) * / : ; = ? @ [ ] ^ + _ ` { , Kb| Ib~ } 4

1305 return layout 1v!#-$.%'()*/:;=?@[]^+_`{,|~}4

1306  

1307  

1308cdef inline intptr_t get_data_ptr(object buffer, _StridedLayout layout) except? 0: 

1309 return <intptr_t>(int(buffer.handle)) + layout.get_slice_offset_in_bytes() 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1310  

1311  

1312cdef inline int view_buffer_strided( 

1313 StridedMemoryView view, 

1314 object buffer, 

1315 _StridedLayout layout, 

1316 object dtype, 

1317 bint is_readonly, 

1318) except -1: 

1319 if dtype is not None: 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbO 9 H B C D E F G

1320 dtype = numpy.dtype(dtype) 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbH B C D E F G

1321 if dtype.itemsize != layout.itemsize: 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbH B C D E F G

1322 raise ValueError( 

1323 f"The dtype's itemsize ({dtype.itemsize}) does not match the layout's " 

1324 f"itemsize ({layout.itemsize})." 

1325 ) 

1326 # Check the layout's offset range [min_offset, max_offset] fits 

1327 # within the [0, buffer.size - 1] range. 

1328 # The required_size_in_bytes fails if min_offset < 0. 

1329 # NB. For external memory, both positive and negative offsets can be valid, 

1330 # but for a proper check we'd need to know both size and data offset, 

1331 # while neither is reported by the packages. 

1332 cdef bint is_allocated = buffer.memory_resource is not None 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbO 9 H B C D E F G

1333 if is_allocated and buffer.size < layout.get_required_size_in_bytes(): 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbPbabbbNbJbO 9 H B C D E F G

1334 raise ValueError( 2Nb

1335 f"Buffer size is too small for the layout. " 2Nb

1336 f"Expected at least {layout.get_required_size_in_bytes()} bytes, " 2Nb

1337 f"got {buffer.size} bytes." 2Nb

1338 ) 

1339 # set the public attributes 

1340 view.ptr = get_data_ptr(buffer, layout) 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1341 view.device_id = buffer.device_id 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1342 view.is_device_accessible = buffer.is_device_accessible 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1343 view.readonly = is_readonly 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1344 view.exporting_obj = view._buffer = buffer 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1345 # no dlpack/cai metadata 

1346 view.dl_tensor = NULL 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1347 view.metadata = None 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1348 # we get the layout from the caller 

1349 view._layout = layout 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1350 view._dtype = dtype 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G

1351 return 0 2x y w cbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbabbbJbO 9 H B C D E F G