Coverage for cuda/core/experimental/

3# SPDX-License-Identifier: Apache-2.0

5import weakref

6from collections import namedtuple

7from typing import Union

8from warnings import warn

10from cuda.core.experimental._device import Device

11from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config

12from cuda.core.experimental._stream import Stream

13from cuda.core.experimental._utils.clear_error_support import (

14 assert_type,

15 assert_type_str_or_bytes_like,

16 raise_code_path_meant_to_be_unreachable,

17)

18from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition

20_backend = {

21 "old": {

22 "file": driver.cuModuleLoad,

23 "data": driver.cuModuleLoadDataEx,

24 "kernel": driver.cuModuleGetFunction,

25 "attribute": driver.cuFuncGetAttribute,

26 },

27}

30# TODO: revisit this treatment for py313t builds

31_inited = False

32_py_major_ver = None

33_driver_ver = None

34_kernel_ctypes = None

37def _lazy_init():

38 global _inited

39 if _inited:

40 return

42 global _py_major_ver, _driver_ver, _kernel_ctypes

43 # binding availability depends on cuda-python version

44 _py_major_ver, _ = get_binding_version()

45 if _py_major_ver >= 12:

46 _backend["new"] = {

47 "file": driver.cuLibraryLoadFromFile,

48 "data": driver.cuLibraryLoadData,

49 "kernel": driver.cuLibraryGetKernel,

50 "attribute": driver.cuKernelGetAttribute,

51 }

52 _kernel_ctypes = (driver.CUfunction, driver.CUkernel)

53 else:

54 _kernel_ctypes = (driver.CUfunction,)

55 _driver_ver = handle_return(driver.cuDriverGetVersion())

56 if _py_major_ver >= 12 and _driver_ver >= 12040:

57 _backend["new"]["paraminfo"] = driver.cuKernelGetParamInfo

58 _inited = True

61class KernelAttributes:

62 def __new__(self, *args, **kwargs):

63 raise RuntimeError("KernelAttributes cannot be instantiated directly. Please use Kernel APIs.")

65 slots = ("_kernel", "_cache", "_backend_version", "_loader")

67 @classmethod

68 def _init(cls, kernel):

69 self = super().__new__(cls)

70 self._kernel = weakref.ref(kernel)

71 self._cache = {}

73 self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"

74 self._loader = _backend[self._backend_version]

75 return self

77 def _get_cached_attribute(self, device_id: Device | int, attribute: driver.CUfunction_attribute) -> int:

78 """Helper function to get a cached attribute or fetch and cache it if not present."""

79 device_id = Device(device_id).device_id

80 cache_key = device_id, attribute

81 result = self._cache.get(cache_key, cache_key)

82 if result is not cache_key:

83 return result

84 kernel = self._kernel()

85 if kernel is None:

86 raise RuntimeError("Cannot access kernel attributes for expired Kernel object")

87 if self._backend_version == "new":

88 result = handle_return(self._loader["attribute"](attribute, kernel._handle, device_id))

89 else: # "old" backend

90 warn(

91 "Device ID argument is ignored when getting attribute from kernel when cuda version < 12. ",

92 RuntimeWarning,

93 stacklevel=2,

94 )

95 result = handle_return(self._loader["attribute"](attribute, kernel._handle))

96 self._cache[cache_key] = result

97 return result

99 def max_threads_per_block(self, device_id: Device | int = None) -> int:

100 """int : The maximum number of threads per block.

101 This attribute is read-only."""

102 return self._get_cached_attribute(

103 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK

104 )

105

106 def shared_size_bytes(self, device_id: Device | int = None) -> int:

107 """int : The size in bytes of statically-allocated shared memory required by this function.

108 This attribute is read-only."""

109 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES)

110

111 def const_size_bytes(self, device_id: Device | int = None) -> int:

112 """int : The size in bytes of user-allocated constant memory required by this function.

113 This attribute is read-only."""

114 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES)

115

116 def local_size_bytes(self, device_id: Device | int = None) -> int:

117 """int : The size in bytes of local memory used by each thread of this function.

118 This attribute is read-only."""

119 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES)

120

121 def num_regs(self, device_id: Device | int = None) -> int:

122 """int : The number of registers used by each thread of this function.

123 This attribute is read-only."""

124 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NUM_REGS)

125

126 def ptx_version(self, device_id: Device | int = None) -> int:

127 """int : The PTX virtual architecture version for which the function was compiled.

128 This attribute is read-only."""

129 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PTX_VERSION)

130

131 def binary_version(self, device_id: Device | int = None) -> int:

132 """int : The binary architecture version for which the function was compiled.

133 This attribute is read-only."""

134 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_BINARY_VERSION)

135

136 def cache_mode_ca(self, device_id: Device | int = None) -> bool:

137 """bool : Whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" set.

138 This attribute is read-only."""

139 return bool(self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA))

140

141 def max_dynamic_shared_size_bytes(self, device_id: Device | int = None) -> int:

142 """int : The maximum size in bytes of dynamically-allocated shared memory that can be used

143 by this function."""

144 return self._get_cached_attribute(

145 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES

146 )

147

148 def preferred_shared_memory_carveout(self, device_id: Device | int = None) -> int:

149 """int : The shared memory carveout preference, in percent of the total shared memory."""

150 return self._get_cached_attribute(

151 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT

152 )

153

154 def cluster_size_must_be_set(self, device_id: Device | int = None) -> bool:

155 """bool : The kernel must launch with a valid cluster size specified.

156 This attribute is read-only."""

157 return bool(

158 self._get_cached_attribute(

159 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET

160 )

161 )

162

163 def required_cluster_width(self, device_id: Device | int = None) -> int:

164 """int : The required cluster width in blocks."""

165 return self._get_cached_attribute(

166 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH

167 )

168

169 def required_cluster_height(self, device_id: Device | int = None) -> int:

170 """int : The required cluster height in blocks."""

171 return self._get_cached_attribute(

172 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT

173 )

174

175 def required_cluster_depth(self, device_id: Device | int = None) -> int:

176 """int : The required cluster depth in blocks."""

177 return self._get_cached_attribute(

178 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH

179 )

180

181 def non_portable_cluster_size_allowed(self, device_id: Device | int = None) -> bool:

182 """bool : Whether the function can be launched with non-portable cluster size."""

183 return bool(

184 self._get_cached_attribute(

185 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED

186 )

187 )

188

189 def cluster_scheduling_policy_preference(self, device_id: Device | int = None) -> int:

190 """int : The block scheduling policy of a function."""

191 return self._get_cached_attribute(

192 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE

193 )

194

195

196MaxPotentialBlockSizeOccupancyResult = namedtuple("MaxPotential", ("min_grid_size", "max_block_size"))

197

198

199class KernelOccupancy:

200 """ """

201

202 def __new__(self, *args, **kwargs):

203 raise RuntimeError("KernelOccupancy cannot be instantiated directly. Please use Kernel APIs.")

204

205 slots = ("_handle",)

206

207 @classmethod

208 def _init(cls, handle):

209 self = super().__new__(cls)

210 self._handle = handle

211

212 return self

213

214 def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_memory_size: int) -> int:

215 """Occupancy of the kernel.

216

217 Returns the maximum number of active blocks per multiprocessor for this kernel.

218

219 Parameters

220 ----------

221 block_size: int

222 Block size parameter used to launch this kernel.

223 dynamic_shared_memory_size: int

224 The amount of dynamic shared memory in bytes needed by block.

225 Use `0` if block does not need shared memory.

226

227 Returns

228 -------

229 int

230 The maximum number of active blocks per multiprocessor.

231

232 Note

233 ----

234 The fraction of the product of maximum number of active blocks per multiprocessor

235 and the block size to the maximum number of threads per multiprocessor is known as

236 theoretical multiprocessor utilization (occupancy).

237

238 """

239 return handle_return(

240 driver.cuOccupancyMaxActiveBlocksPerMultiprocessor(self._handle, block_size, dynamic_shared_memory_size)

241 )

242

243 def max_potential_block_size(

244 self, dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize], block_size_limit: int

245 ) -> MaxPotentialBlockSizeOccupancyResult:

246 """MaxPotentialBlockSizeOccupancyResult: Suggested launch configuration for reasonable occupancy.

247

248 Returns the minimum grid size needed to achieve the maximum occupancy and

249 the maximum block size that can achieve the maximum occupancy.

250

251 Parameters

252 ----------

253 dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize]

254 The amount of dynamic shared memory in bytes needed by block.

255 Use `0` if block does not need shared memory. Use C-callable

256 represented by :obj:`~driver.CUoccupancyB2DSize` to encode

257 amount of needed dynamic shared memory which varies depending

258 on tne block size.

259 block_size_limit: int

260 Known upper limit on the kernel block size. Use `0` to indicate

261 the maximum block size permitted by the device / kernel instead

262

263 Returns

264 -------

265 :obj:`~MaxPotentialBlockSizeOccupancyResult`

266 An object with `min_grid_size` amd `max_block_size` attributes encoding

267 the suggested launch configuration.

268

269 Note

270 ----

271 Please be advised that use of C-callable that requires Python Global

272 Interpreter Lock may lead to deadlocks.

273

274 """

275 if isinstance(dynamic_shared_memory_needed, int):

276 min_grid_size, max_block_size = handle_return(

277 driver.cuOccupancyMaxPotentialBlockSize(

278 self._handle, None, dynamic_shared_memory_needed, block_size_limit

279 )

280 )

281 elif isinstance(dynamic_shared_memory_needed, driver.CUoccupancyB2DSize):

282 min_grid_size, max_block_size = handle_return(

283 driver.cuOccupancyMaxPotentialBlockSize(

284 self._handle, dynamic_shared_memory_needed.getPtr(), 0, block_size_limit

285 )

286 )

287 else:

288 raise TypeError(

289 "dynamic_shared_memory_needed expected to have type int, or CUoccupancyB2DSize, "

290 f"got {type(dynamic_shared_memory_needed)}"

291 )

292 return MaxPotentialBlockSizeOccupancyResult(min_grid_size=min_grid_size, max_block_size=max_block_size)

293

294 def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocessor: int, block_size: int) -> int:

295 """Dynamic shared memory available per block for given launch configuration.

296

297 The amount of dynamic shared memory per block, in bytes, for given kernel launch configuration.

298

299 Parameters

300 ----------

301 num_blocks_per_multiprocessor: int

302 Number of blocks to be concurrently executing on a multiprocessor.

303 block_size: int

304 Block size parameter used to launch this kernel.

305

306 Returns

307 -------

308 int

309 Dynamic shared memory available per block for given launch configuration.

310 """

311 return handle_return(

312 driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)

313 )

314

315 def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int:

316 """Maximum potential cluster size.

317

318 The maximum potential cluster size for this kernel and given launch configuration.

319

320 Parameters

321 ----------

322 config: :obj:`~_launch_config.LaunchConfig`

323 Kernel launch configuration. Cluster dimensions in the configuration are ignored.

324 stream: :obj:`~Stream`, optional

325 The stream on which this kernel is to be launched.

326

327 Returns

328 -------

329 int

330 The maximum cluster size that can be launched for this kernel and launch configuration.

331 """

332 drv_cfg = _to_native_launch_config(config)

333 if stream is not None:

334 drv_cfg.hStream = stream.handle

335 return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))

336

337 def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int:

338 """Maximum number of active clusters on the target device.

339

340 The maximum number of clusters that could concurrently execute on the target device.

341

342 Parameters

343 ----------

344 config: :obj:`~_launch_config.LaunchConfig`

345 Kernel launch configuration.

346 stream: :obj:`~Stream`, optional

347 The stream on which this kernel is to be launched.

348

349 Returns

350 -------

351 int

352 The maximum number of clusters that could co-exist on the target device.

353 """

354 drv_cfg = _to_native_launch_config(config)

355 if stream is not None:

356 drv_cfg.hStream = stream.handle

357 return handle_return(driver.cuOccupancyMaxActiveClusters(self._handle, drv_cfg))

358

359

360ParamInfo = namedtuple("ParamInfo", ["offset", "size"])

361

362

363class Kernel:

364 """Represent a compiled kernel that had been loaded onto the device.

365

366 Kernel instances can execution when passed directly into the

367 :func:`~launch` function.

368

369 Directly creating a :obj:`~_module.Kernel` is not supported, and they

370 should instead be created through a :obj:`~_module.ObjectCode` object.

371

372 """

373

374 __slots__ = ("_handle", "_module", "_attributes", "_occupancy", "__weakref__")

375

376 def __new__(self, *args, **kwargs):

377 raise RuntimeError("Kernel objects cannot be instantiated directly. Please use ObjectCode APIs.")

378

379 @classmethod

380 def _from_obj(cls, obj, mod):

381 assert_type(obj, _kernel_ctypes)

382 assert_type(mod, ObjectCode)

383 ker = super().__new__(cls)

384 ker._handle = obj

385 ker._module = mod

386 ker._attributes = None

387 ker._occupancy = None

388 return ker

389

390 @property

391 def attributes(self) -> KernelAttributes:

392 """Get the read-only attributes of this kernel."""

393 if self._attributes is None:

394 self._attributes = KernelAttributes._init(self)

395 return self._attributes

396

397 def _get_arguments_info(self, param_info=False) -> tuple[int, list[ParamInfo]]:

398 attr_impl = self.attributes

399 if attr_impl._backend_version != "new":

400 raise NotImplementedError("New backend is required")

401 if "paraminfo" not in attr_impl._loader:

402 raise NotImplementedError(

403 "Driver version 12.4 or newer is required for this function. "

404 f"Using driver version {_driver_ver // 1000}.{(_driver_ver % 1000) // 10}"

405 )

406 arg_pos = 0

407 param_info_data = []

408 while True:

409 result = attr_impl._loader["paraminfo"](self._handle, arg_pos)

410 if result[0] != driver.CUresult.CUDA_SUCCESS:

411 break

412 if param_info:

413 p_info = ParamInfo(offset=result[1], size=result[2])

414 param_info_data.append(p_info)

415 arg_pos = arg_pos + 1

416 if result[0] != driver.CUresult.CUDA_ERROR_INVALID_VALUE:

417 handle_return(result)

418 return arg_pos, param_info_data

419

420 @property

421 def num_arguments(self) -> int:

422 """int : The number of arguments of this function"""

423 num_args, _ = self._get_arguments_info()

424 return num_args

425

426 @property

427 def arguments_info(self) -> list[ParamInfo]:

428 """list[ParamInfo]: (offset, size) for each argument of this function"""

429 _, param_info = self._get_arguments_info(param_info=True)

430 return param_info

431

432 @property

433 def occupancy(self) -> KernelOccupancy:

434 """Get the occupancy information for launching this kernel."""

435 if self._occupancy is None:

436 self._occupancy = KernelOccupancy._init(self._handle)

437 return self._occupancy

438

439 # TODO: implement from_handle()

440

441

442CodeTypeT = Union[bytes, bytearray, str]

443

444

445class ObjectCode:

446 """Represent a compiled program to be loaded onto the device.

447

448 This object provides a unified interface for different types of

449 compiled programs that will be loaded onto the device.

450

451 Note

452 ----

453 This class has no default constructor. If you already have a cubin that you would

454 like to load, use the :meth:`from_cubin` alternative constructor. Constructing directly

455 from all other possible code types should be avoided in favor of compilation through

456 :class:`~cuda.core.experimental.Program`

457

458 Note

459 ----

460 Usage under CUDA 11.x will only load to the current device

461 context.

462 """

463

464 __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name")

465 _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library")

466

467 def __new__(self, *args, **kwargs):

468 raise RuntimeError(

469 "ObjectCode objects cannot be instantiated directly. "

470 "Please use ObjectCode APIs (from_cubin, from_ptx) or Program APIs (compile)."

471 )

472

473 @classmethod

474 def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None):

475 self = super().__new__(cls)

476 assert code_type in self._supported_code_type, f"{code_type=} is not supported"

477 _lazy_init()

478

479 # handle is assigned during _lazy_load

480 self._handle = None

481

482 self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"

483 self._loader = _backend[self._backend_version]

484

485 self._code_type = code_type

486 self._module = module

487 self._sym_map = {} if symbol_mapping is None else symbol_mapping

488 self._name = name

489

490 return self

491

492 @classmethod

493 def _reduce_helper(self, module, code_type, name, symbol_mapping):

494 # just for forwarding kwargs

495 return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping)

496

497 def __reduce__(self):

498 return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)

499

500 @staticmethod

501 def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":

502 """Create an :class:`ObjectCode` instance from an existing cubin.

503

504 Parameters

505 ----------

506 module : Union[bytes, str]

507 Either a bytes object containing the in-memory cubin to load, or

508 a file path string pointing to the on-disk cubin to load.

509 name : Optional[str]

510 A human-readable identifier representing this code object.

511 symbol_mapping : Optional[dict]

512 A dictionary specifying how the unmangled symbol names (as keys)

513 should be mapped to the mangled names before trying to retrieve

514 them (default to no mappings).

515 """

516 return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)

517

518 @staticmethod

519 def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":

520 """Create an :class:`ObjectCode` instance from an existing PTX.

521

522 Parameters

523 ----------

524 module : Union[bytes, str]

525 Either a bytes object containing the in-memory ptx code to load, or

526 a file path string pointing to the on-disk ptx file to load.

527 name : Optional[str]

528 A human-readable identifier representing this code object.

529 symbol_mapping : Optional[dict]

530 A dictionary specifying how the unmangled symbol names (as keys)

531 should be mapped to the mangled names before trying to retrieve

532 them (default to no mappings).

533 """

534 return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)

535

536 @staticmethod

537 def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":

538 """Create an :class:`ObjectCode` instance from an existing LTOIR.

539

540 Parameters

541 ----------

542 module : Union[bytes, str]

543 Either a bytes object containing the in-memory ltoir code to load, or

544 a file path string pointing to the on-disk ltoir file to load.

545 name : Optional[str]

546 A human-readable identifier representing this code object.

547 symbol_mapping : Optional[dict]

548 A dictionary specifying how the unmangled symbol names (as keys)

549 should be mapped to the mangled names before trying to retrieve

550 them (default to no mappings).

551 """

552 return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)

553

554 @staticmethod

555 def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":

556 """Create an :class:`ObjectCode` instance from an existing fatbin.

557

558 Parameters

559 ----------

560 module : Union[bytes, str]

561 Either a bytes object containing the in-memory fatbin to load, or

562 a file path string pointing to the on-disk fatbin to load.

563 name : Optional[str]

564 A human-readable identifier representing this code object.

565 symbol_mapping : Optional[dict]

566 A dictionary specifying how the unmangled symbol names (as keys)

567 should be mapped to the mangled names before trying to retrieve

568 them (default to no mappings).

569 """

570 return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)

571

572 @staticmethod

573 def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":

574 """Create an :class:`ObjectCode` instance from an existing object code.

575

576 Parameters

577 ----------

578 module : Union[bytes, str]

579 Either a bytes object containing the in-memory object code to load, or

580 a file path string pointing to the on-disk object code to load.

581 name : Optional[str]

582 A human-readable identifier representing this code object.

583 symbol_mapping : Optional[dict]

584 A dictionary specifying how the unmangled symbol names (as keys)

585 should be mapped to the mangled names before trying to retrieve

586 them (default to no mappings).

587 """

588 return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)

589

590 @staticmethod

591 def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":

592 """Create an :class:`ObjectCode` instance from an existing library.

593

594 Parameters

595 ----------

596 module : Union[bytes, str]

597 Either a bytes object containing the in-memory library to load, or

598 a file path string pointing to the on-disk library to load.

599 name : Optional[str]

600 A human-readable identifier representing this code object.

601 symbol_mapping : Optional[dict]

602 A dictionary specifying how the unmangled symbol names (as keys)

603 should be mapped to the mangled names before trying to retrieve

604 them (default to no mappings).

605 """

606 return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping)

607

608 # TODO: do we want to unload in a finalizer? Probably not..

609

610 def _lazy_load_module(self, *args, **kwargs):

611 if self._handle is not None:

612 return

613 module = self._module

614 assert_type_str_or_bytes_like(module)

615 if isinstance(module, str):

616 if self._backend_version == "new":

617 self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0))

618 else: # "old" backend

619 self._handle = handle_return(self._loader["file"](module.encode()))

620 return

621 if isinstance(module, (bytes, bytearray)):

622 if self._backend_version == "new":

623 self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0))

624 else: # "old" backend

625 self._handle = handle_return(self._loader["data"](module, 0, [], []))

626 return

627 raise_code_path_meant_to_be_unreachable()

628

629 @precondition(_lazy_load_module)

630 def get_kernel(self, name) -> Kernel:

631 """Return the :obj:`~_module.Kernel` of a specified name from this object code.

632

633 Parameters

634 ----------

635 name : Any

636 Name of the kernel to retrieve.

637

638 Returns

639 -------

640 :obj:`~_module.Kernel`

641 Newly created kernel object.

642

643 """

644 supported_code_types = ("cubin", "ptx", "fatbin")

645 if self._code_type not in supported_code_types:

646 raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})')

647 try:

648 name = self._sym_map[name]

649 except KeyError:

650 name = name.encode()

651

652 data = handle_return(self._loader["kernel"](self._handle, name))

653 return Kernel._from_obj(data, self)

654

655 @property

656 def code(self) -> CodeTypeT:

657 """Return the underlying code object."""

658 return self._module

659

660 @property

661 def name(self) -> str:

662 """Return a human-readable name of this code object."""

663 return self._name

664

665 @property

666 def code_type(self) -> str:

667 """Return the type of the underlying code object."""

668 return self._code_type

669

670 @property

671 @precondition(_lazy_load_module)

672 def handle(self):

673 """Return the underlying handle object.

674

675 .. caution::

676

677 This handle is a Python object. To get the memory address of the underlying C

678 handle, call ``int(ObjectCode.handle)``.

679 """

680 return self._handle

Coverage for cuda / core / experimental / _module.py: 92%

250 statements