Coverage for cuda / core / experimental / _module.py: 92%

250 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-10 01:19 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5import weakref 

6from collections import namedtuple 

7from typing import Union 

8from warnings import warn 

9 

10from cuda.core.experimental._device import Device 

11from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config 

12from cuda.core.experimental._stream import Stream 

13from cuda.core.experimental._utils.clear_error_support import ( 

14 assert_type, 

15 assert_type_str_or_bytes_like, 

16 raise_code_path_meant_to_be_unreachable, 

17) 

18from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition 

19 

20_backend = { 

21 "old": { 

22 "file": driver.cuModuleLoad, 

23 "data": driver.cuModuleLoadDataEx, 

24 "kernel": driver.cuModuleGetFunction, 

25 "attribute": driver.cuFuncGetAttribute, 

26 }, 

27} 

28 

29 

30# TODO: revisit this treatment for py313t builds 

31_inited = False 

32_py_major_ver = None 

33_driver_ver = None 

34_kernel_ctypes = None 

35 

36 

37def _lazy_init(): 

38 global _inited 

39 if _inited: 

40 return 

41 

42 global _py_major_ver, _driver_ver, _kernel_ctypes 

43 # binding availability depends on cuda-python version 

44 _py_major_ver, _ = get_binding_version() 

45 if _py_major_ver >= 12: 

46 _backend["new"] = { 

47 "file": driver.cuLibraryLoadFromFile, 

48 "data": driver.cuLibraryLoadData, 

49 "kernel": driver.cuLibraryGetKernel, 

50 "attribute": driver.cuKernelGetAttribute, 

51 } 

52 _kernel_ctypes = (driver.CUfunction, driver.CUkernel) 

53 else: 

54 _kernel_ctypes = (driver.CUfunction,) 

55 _driver_ver = handle_return(driver.cuDriverGetVersion()) 

56 if _py_major_ver >= 12 and _driver_ver >= 12040: 

57 _backend["new"]["paraminfo"] = driver.cuKernelGetParamInfo 

58 _inited = True 

59 

60 

61class KernelAttributes: 

62 def __new__(self, *args, **kwargs): 

63 raise RuntimeError("KernelAttributes cannot be instantiated directly. Please use Kernel APIs.") 

64 

65 slots = ("_kernel", "_cache", "_backend_version", "_loader") 

66 

67 @classmethod 

68 def _init(cls, kernel): 

69 self = super().__new__(cls) 

70 self._kernel = weakref.ref(kernel) 

71 self._cache = {} 

72 

73 self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" 

74 self._loader = _backend[self._backend_version] 

75 return self 

76 

77 def _get_cached_attribute(self, device_id: Device | int, attribute: driver.CUfunction_attribute) -> int: 

78 """Helper function to get a cached attribute or fetch and cache it if not present.""" 

79 device_id = Device(device_id).device_id 

80 cache_key = device_id, attribute 

81 result = self._cache.get(cache_key, cache_key) 

82 if result is not cache_key: 

83 return result 

84 kernel = self._kernel() 

85 if kernel is None: 

86 raise RuntimeError("Cannot access kernel attributes for expired Kernel object") 

87 if self._backend_version == "new": 

88 result = handle_return(self._loader["attribute"](attribute, kernel._handle, device_id)) 

89 else: # "old" backend 

90 warn( 

91 "Device ID argument is ignored when getting attribute from kernel when cuda version < 12. ", 

92 RuntimeWarning, 

93 stacklevel=2, 

94 ) 

95 result = handle_return(self._loader["attribute"](attribute, kernel._handle)) 

96 self._cache[cache_key] = result 

97 return result 

98 

99 def max_threads_per_block(self, device_id: Device | int = None) -> int: 

100 """int : The maximum number of threads per block. 

101 This attribute is read-only.""" 

102 return self._get_cached_attribute( 

103 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK 

104 ) 

105 

106 def shared_size_bytes(self, device_id: Device | int = None) -> int: 

107 """int : The size in bytes of statically-allocated shared memory required by this function. 

108 This attribute is read-only.""" 

109 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES) 

110 

111 def const_size_bytes(self, device_id: Device | int = None) -> int: 

112 """int : The size in bytes of user-allocated constant memory required by this function. 

113 This attribute is read-only.""" 

114 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES) 

115 

116 def local_size_bytes(self, device_id: Device | int = None) -> int: 

117 """int : The size in bytes of local memory used by each thread of this function. 

118 This attribute is read-only.""" 

119 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES) 

120 

121 def num_regs(self, device_id: Device | int = None) -> int: 

122 """int : The number of registers used by each thread of this function. 

123 This attribute is read-only.""" 

124 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NUM_REGS) 

125 

126 def ptx_version(self, device_id: Device | int = None) -> int: 

127 """int : The PTX virtual architecture version for which the function was compiled. 

128 This attribute is read-only.""" 

129 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PTX_VERSION) 

130 

131 def binary_version(self, device_id: Device | int = None) -> int: 

132 """int : The binary architecture version for which the function was compiled. 

133 This attribute is read-only.""" 

134 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_BINARY_VERSION) 

135 

136 def cache_mode_ca(self, device_id: Device | int = None) -> bool: 

137 """bool : Whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" set. 

138 This attribute is read-only.""" 

139 return bool(self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA)) 

140 

141 def max_dynamic_shared_size_bytes(self, device_id: Device | int = None) -> int: 

142 """int : The maximum size in bytes of dynamically-allocated shared memory that can be used 

143 by this function.""" 

144 return self._get_cached_attribute( 

145 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES 

146 ) 

147 

148 def preferred_shared_memory_carveout(self, device_id: Device | int = None) -> int: 

149 """int : The shared memory carveout preference, in percent of the total shared memory.""" 

150 return self._get_cached_attribute( 

151 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT 

152 ) 

153 

154 def cluster_size_must_be_set(self, device_id: Device | int = None) -> bool: 

155 """bool : The kernel must launch with a valid cluster size specified. 

156 This attribute is read-only.""" 

157 return bool( 

158 self._get_cached_attribute( 

159 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET 

160 ) 

161 ) 

162 

163 def required_cluster_width(self, device_id: Device | int = None) -> int: 

164 """int : The required cluster width in blocks.""" 

165 return self._get_cached_attribute( 

166 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH 

167 ) 

168 

169 def required_cluster_height(self, device_id: Device | int = None) -> int: 

170 """int : The required cluster height in blocks.""" 

171 return self._get_cached_attribute( 

172 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT 

173 ) 

174 

175 def required_cluster_depth(self, device_id: Device | int = None) -> int: 

176 """int : The required cluster depth in blocks.""" 

177 return self._get_cached_attribute( 

178 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH 

179 ) 

180 

181 def non_portable_cluster_size_allowed(self, device_id: Device | int = None) -> bool: 

182 """bool : Whether the function can be launched with non-portable cluster size.""" 

183 return bool( 

184 self._get_cached_attribute( 

185 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED 

186 ) 

187 ) 

188 

189 def cluster_scheduling_policy_preference(self, device_id: Device | int = None) -> int: 

190 """int : The block scheduling policy of a function.""" 

191 return self._get_cached_attribute( 

192 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE 

193 ) 

194 

195 

196MaxPotentialBlockSizeOccupancyResult = namedtuple("MaxPotential", ("min_grid_size", "max_block_size")) 

197 

198 

199class KernelOccupancy: 

200 """ """ 

201 

202 def __new__(self, *args, **kwargs): 

203 raise RuntimeError("KernelOccupancy cannot be instantiated directly. Please use Kernel APIs.") 

204 

205 slots = ("_handle",) 

206 

207 @classmethod 

208 def _init(cls, handle): 

209 self = super().__new__(cls) 

210 self._handle = handle 

211 

212 return self 

213 

214 def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_memory_size: int) -> int: 

215 """Occupancy of the kernel. 

216 

217 Returns the maximum number of active blocks per multiprocessor for this kernel. 

218 

219 Parameters 

220 ---------- 

221 block_size: int 

222 Block size parameter used to launch this kernel. 

223 dynamic_shared_memory_size: int 

224 The amount of dynamic shared memory in bytes needed by block. 

225 Use `0` if block does not need shared memory. 

226 

227 Returns 

228 ------- 

229 int 

230 The maximum number of active blocks per multiprocessor. 

231 

232 Note 

233 ---- 

234 The fraction of the product of maximum number of active blocks per multiprocessor 

235 and the block size to the maximum number of threads per multiprocessor is known as 

236 theoretical multiprocessor utilization (occupancy). 

237 

238 """ 

239 return handle_return( 

240 driver.cuOccupancyMaxActiveBlocksPerMultiprocessor(self._handle, block_size, dynamic_shared_memory_size) 

241 ) 

242 

243 def max_potential_block_size( 

244 self, dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize], block_size_limit: int 

245 ) -> MaxPotentialBlockSizeOccupancyResult: 

246 """MaxPotentialBlockSizeOccupancyResult: Suggested launch configuration for reasonable occupancy. 

247 

248 Returns the minimum grid size needed to achieve the maximum occupancy and 

249 the maximum block size that can achieve the maximum occupancy. 

250 

251 Parameters 

252 ---------- 

253 dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize] 

254 The amount of dynamic shared memory in bytes needed by block. 

255 Use `0` if block does not need shared memory. Use C-callable 

256 represented by :obj:`~driver.CUoccupancyB2DSize` to encode 

257 amount of needed dynamic shared memory which varies depending 

258 on tne block size. 

259 block_size_limit: int 

260 Known upper limit on the kernel block size. Use `0` to indicate 

261 the maximum block size permitted by the device / kernel instead 

262 

263 Returns 

264 ------- 

265 :obj:`~MaxPotentialBlockSizeOccupancyResult` 

266 An object with `min_grid_size` amd `max_block_size` attributes encoding 

267 the suggested launch configuration. 

268 

269 Note 

270 ---- 

271 Please be advised that use of C-callable that requires Python Global 

272 Interpreter Lock may lead to deadlocks. 

273 

274 """ 

275 if isinstance(dynamic_shared_memory_needed, int): 

276 min_grid_size, max_block_size = handle_return( 

277 driver.cuOccupancyMaxPotentialBlockSize( 

278 self._handle, None, dynamic_shared_memory_needed, block_size_limit 

279 ) 

280 ) 

281 elif isinstance(dynamic_shared_memory_needed, driver.CUoccupancyB2DSize): 

282 min_grid_size, max_block_size = handle_return( 

283 driver.cuOccupancyMaxPotentialBlockSize( 

284 self._handle, dynamic_shared_memory_needed.getPtr(), 0, block_size_limit 

285 ) 

286 ) 

287 else: 

288 raise TypeError( 

289 "dynamic_shared_memory_needed expected to have type int, or CUoccupancyB2DSize, " 

290 f"got {type(dynamic_shared_memory_needed)}" 

291 ) 

292 return MaxPotentialBlockSizeOccupancyResult(min_grid_size=min_grid_size, max_block_size=max_block_size) 

293 

294 def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocessor: int, block_size: int) -> int: 

295 """Dynamic shared memory available per block for given launch configuration. 

296 

297 The amount of dynamic shared memory per block, in bytes, for given kernel launch configuration. 

298 

299 Parameters 

300 ---------- 

301 num_blocks_per_multiprocessor: int 

302 Number of blocks to be concurrently executing on a multiprocessor. 

303 block_size: int 

304 Block size parameter used to launch this kernel. 

305 

306 Returns 

307 ------- 

308 int 

309 Dynamic shared memory available per block for given launch configuration. 

310 """ 

311 return handle_return( 

312 driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size) 

313 ) 

314 

315 def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int: 

316 """Maximum potential cluster size. 

317 

318 The maximum potential cluster size for this kernel and given launch configuration. 

319 

320 Parameters 

321 ---------- 

322 config: :obj:`~_launch_config.LaunchConfig` 

323 Kernel launch configuration. Cluster dimensions in the configuration are ignored. 

324 stream: :obj:`~Stream`, optional 

325 The stream on which this kernel is to be launched. 

326 

327 Returns 

328 ------- 

329 int 

330 The maximum cluster size that can be launched for this kernel and launch configuration. 

331 """ 

332 drv_cfg = _to_native_launch_config(config) 

333 if stream is not None: 

334 drv_cfg.hStream = stream.handle 

335 return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg)) 

336 

337 def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int: 

338 """Maximum number of active clusters on the target device. 

339 

340 The maximum number of clusters that could concurrently execute on the target device. 

341 

342 Parameters 

343 ---------- 

344 config: :obj:`~_launch_config.LaunchConfig` 

345 Kernel launch configuration. 

346 stream: :obj:`~Stream`, optional 

347 The stream on which this kernel is to be launched. 

348 

349 Returns 

350 ------- 

351 int 

352 The maximum number of clusters that could co-exist on the target device. 

353 """ 

354 drv_cfg = _to_native_launch_config(config) 

355 if stream is not None: 

356 drv_cfg.hStream = stream.handle 

357 return handle_return(driver.cuOccupancyMaxActiveClusters(self._handle, drv_cfg)) 

358 

359 

360ParamInfo = namedtuple("ParamInfo", ["offset", "size"]) 

361 

362 

363class Kernel: 

364 """Represent a compiled kernel that had been loaded onto the device. 

365 

366 Kernel instances can execution when passed directly into the 

367 :func:`~launch` function. 

368 

369 Directly creating a :obj:`~_module.Kernel` is not supported, and they 

370 should instead be created through a :obj:`~_module.ObjectCode` object. 

371 

372 """ 

373 

374 __slots__ = ("_handle", "_module", "_attributes", "_occupancy", "__weakref__") 

375 

376 def __new__(self, *args, **kwargs): 

377 raise RuntimeError("Kernel objects cannot be instantiated directly. Please use ObjectCode APIs.") 

378 

379 @classmethod 

380 def _from_obj(cls, obj, mod): 

381 assert_type(obj, _kernel_ctypes) 

382 assert_type(mod, ObjectCode) 

383 ker = super().__new__(cls) 

384 ker._handle = obj 

385 ker._module = mod 

386 ker._attributes = None 

387 ker._occupancy = None 

388 return ker 

389 

390 @property 

391 def attributes(self) -> KernelAttributes: 

392 """Get the read-only attributes of this kernel.""" 

393 if self._attributes is None: 

394 self._attributes = KernelAttributes._init(self) 

395 return self._attributes 

396 

397 def _get_arguments_info(self, param_info=False) -> tuple[int, list[ParamInfo]]: 

398 attr_impl = self.attributes 

399 if attr_impl._backend_version != "new": 

400 raise NotImplementedError("New backend is required") 

401 if "paraminfo" not in attr_impl._loader: 

402 raise NotImplementedError( 

403 "Driver version 12.4 or newer is required for this function. " 

404 f"Using driver version {_driver_ver // 1000}.{(_driver_ver % 1000) // 10}" 

405 ) 

406 arg_pos = 0 

407 param_info_data = [] 

408 while True: 

409 result = attr_impl._loader["paraminfo"](self._handle, arg_pos) 

410 if result[0] != driver.CUresult.CUDA_SUCCESS: 

411 break 

412 if param_info: 

413 p_info = ParamInfo(offset=result[1], size=result[2]) 

414 param_info_data.append(p_info) 

415 arg_pos = arg_pos + 1 

416 if result[0] != driver.CUresult.CUDA_ERROR_INVALID_VALUE: 

417 handle_return(result) 

418 return arg_pos, param_info_data 

419 

420 @property 

421 def num_arguments(self) -> int: 

422 """int : The number of arguments of this function""" 

423 num_args, _ = self._get_arguments_info() 

424 return num_args 

425 

426 @property 

427 def arguments_info(self) -> list[ParamInfo]: 

428 """list[ParamInfo]: (offset, size) for each argument of this function""" 

429 _, param_info = self._get_arguments_info(param_info=True) 

430 return param_info 

431 

432 @property 

433 def occupancy(self) -> KernelOccupancy: 

434 """Get the occupancy information for launching this kernel.""" 

435 if self._occupancy is None: 

436 self._occupancy = KernelOccupancy._init(self._handle) 

437 return self._occupancy 

438 

439 # TODO: implement from_handle() 

440 

441 

442CodeTypeT = Union[bytes, bytearray, str] 

443 

444 

445class ObjectCode: 

446 """Represent a compiled program to be loaded onto the device. 

447 

448 This object provides a unified interface for different types of 

449 compiled programs that will be loaded onto the device. 

450 

451 Note 

452 ---- 

453 This class has no default constructor. If you already have a cubin that you would 

454 like to load, use the :meth:`from_cubin` alternative constructor. Constructing directly 

455 from all other possible code types should be avoided in favor of compilation through 

456 :class:`~cuda.core.experimental.Program` 

457 

458 Note 

459 ---- 

460 Usage under CUDA 11.x will only load to the current device 

461 context. 

462 """ 

463 

464 __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name") 

465 _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library") 

466 

467 def __new__(self, *args, **kwargs): 

468 raise RuntimeError( 

469 "ObjectCode objects cannot be instantiated directly. " 

470 "Please use ObjectCode APIs (from_cubin, from_ptx) or Program APIs (compile)." 

471 ) 

472 

473 @classmethod 

474 def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None): 

475 self = super().__new__(cls) 

476 assert code_type in self._supported_code_type, f"{code_type=} is not supported" 

477 _lazy_init() 

478 

479 # handle is assigned during _lazy_load 

480 self._handle = None 

481 

482 self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" 

483 self._loader = _backend[self._backend_version] 

484 

485 self._code_type = code_type 

486 self._module = module 

487 self._sym_map = {} if symbol_mapping is None else symbol_mapping 

488 self._name = name 

489 

490 return self 

491 

492 @classmethod 

493 def _reduce_helper(self, module, code_type, name, symbol_mapping): 

494 # just for forwarding kwargs 

495 return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping) 

496 

497 def __reduce__(self): 

498 return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map) 

499 

500 @staticmethod 

501 def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": 

502 """Create an :class:`ObjectCode` instance from an existing cubin. 

503 

504 Parameters 

505 ---------- 

506 module : Union[bytes, str] 

507 Either a bytes object containing the in-memory cubin to load, or 

508 a file path string pointing to the on-disk cubin to load. 

509 name : Optional[str] 

510 A human-readable identifier representing this code object. 

511 symbol_mapping : Optional[dict] 

512 A dictionary specifying how the unmangled symbol names (as keys) 

513 should be mapped to the mangled names before trying to retrieve 

514 them (default to no mappings). 

515 """ 

516 return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) 

517 

518 @staticmethod 

519 def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": 

520 """Create an :class:`ObjectCode` instance from an existing PTX. 

521 

522 Parameters 

523 ---------- 

524 module : Union[bytes, str] 

525 Either a bytes object containing the in-memory ptx code to load, or 

526 a file path string pointing to the on-disk ptx file to load. 

527 name : Optional[str] 

528 A human-readable identifier representing this code object. 

529 symbol_mapping : Optional[dict] 

530 A dictionary specifying how the unmangled symbol names (as keys) 

531 should be mapped to the mangled names before trying to retrieve 

532 them (default to no mappings). 

533 """ 

534 return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) 

535 

536 @staticmethod 

537 def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": 

538 """Create an :class:`ObjectCode` instance from an existing LTOIR. 

539 

540 Parameters 

541 ---------- 

542 module : Union[bytes, str] 

543 Either a bytes object containing the in-memory ltoir code to load, or 

544 a file path string pointing to the on-disk ltoir file to load. 

545 name : Optional[str] 

546 A human-readable identifier representing this code object. 

547 symbol_mapping : Optional[dict] 

548 A dictionary specifying how the unmangled symbol names (as keys) 

549 should be mapped to the mangled names before trying to retrieve 

550 them (default to no mappings). 

551 """ 

552 return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) 

553 

554 @staticmethod 

555 def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": 

556 """Create an :class:`ObjectCode` instance from an existing fatbin. 

557 

558 Parameters 

559 ---------- 

560 module : Union[bytes, str] 

561 Either a bytes object containing the in-memory fatbin to load, or 

562 a file path string pointing to the on-disk fatbin to load. 

563 name : Optional[str] 

564 A human-readable identifier representing this code object. 

565 symbol_mapping : Optional[dict] 

566 A dictionary specifying how the unmangled symbol names (as keys) 

567 should be mapped to the mangled names before trying to retrieve 

568 them (default to no mappings). 

569 """ 

570 return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) 

571 

572 @staticmethod 

573 def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": 

574 """Create an :class:`ObjectCode` instance from an existing object code. 

575 

576 Parameters 

577 ---------- 

578 module : Union[bytes, str] 

579 Either a bytes object containing the in-memory object code to load, or 

580 a file path string pointing to the on-disk object code to load. 

581 name : Optional[str] 

582 A human-readable identifier representing this code object. 

583 symbol_mapping : Optional[dict] 

584 A dictionary specifying how the unmangled symbol names (as keys) 

585 should be mapped to the mangled names before trying to retrieve 

586 them (default to no mappings). 

587 """ 

588 return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) 

589 

590 @staticmethod 

591 def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode": 

592 """Create an :class:`ObjectCode` instance from an existing library. 

593 

594 Parameters 

595 ---------- 

596 module : Union[bytes, str] 

597 Either a bytes object containing the in-memory library to load, or 

598 a file path string pointing to the on-disk library to load. 

599 name : Optional[str] 

600 A human-readable identifier representing this code object. 

601 symbol_mapping : Optional[dict] 

602 A dictionary specifying how the unmangled symbol names (as keys) 

603 should be mapped to the mangled names before trying to retrieve 

604 them (default to no mappings). 

605 """ 

606 return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping) 

607 

608 # TODO: do we want to unload in a finalizer? Probably not.. 

609 

610 def _lazy_load_module(self, *args, **kwargs): 

611 if self._handle is not None: 

612 return 

613 module = self._module 

614 assert_type_str_or_bytes_like(module) 

615 if isinstance(module, str): 

616 if self._backend_version == "new": 

617 self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0)) 

618 else: # "old" backend 

619 self._handle = handle_return(self._loader["file"](module.encode())) 

620 return 

621 if isinstance(module, (bytes, bytearray)): 

622 if self._backend_version == "new": 

623 self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0)) 

624 else: # "old" backend 

625 self._handle = handle_return(self._loader["data"](module, 0, [], [])) 

626 return 

627 raise_code_path_meant_to_be_unreachable() 

628 

629 @precondition(_lazy_load_module) 

630 def get_kernel(self, name) -> Kernel: 

631 """Return the :obj:`~_module.Kernel` of a specified name from this object code. 

632 

633 Parameters 

634 ---------- 

635 name : Any 

636 Name of the kernel to retrieve. 

637 

638 Returns 

639 ------- 

640 :obj:`~_module.Kernel` 

641 Newly created kernel object. 

642 

643 """ 

644 supported_code_types = ("cubin", "ptx", "fatbin") 

645 if self._code_type not in supported_code_types: 

646 raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})') 

647 try: 

648 name = self._sym_map[name] 

649 except KeyError: 

650 name = name.encode() 

651 

652 data = handle_return(self._loader["kernel"](self._handle, name)) 

653 return Kernel._from_obj(data, self) 

654 

655 @property 

656 def code(self) -> CodeTypeT: 

657 """Return the underlying code object.""" 

658 return self._module 

659 

660 @property 

661 def name(self) -> str: 

662 """Return a human-readable name of this code object.""" 

663 return self._name 

664 

665 @property 

666 def code_type(self) -> str: 

667 """Return the type of the underlying code object.""" 

668 return self._code_type 

669 

670 @property 

671 @precondition(_lazy_load_module) 

672 def handle(self): 

673 """Return the underlying handle object. 

674 

675 .. caution:: 

676 

677 This handle is a Python object. To get the memory address of the underlying C 

678 handle, call ``int(ObjectCode.handle)``. 

679 """ 

680 return self._handle