Coverage for cuda / core / experimental / _module.py: 92%
250 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5import weakref
6from collections import namedtuple
7from typing import Union
8from warnings import warn
10from cuda.core.experimental._device import Device
11from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
12from cuda.core.experimental._stream import Stream
13from cuda.core.experimental._utils.clear_error_support import (
14 assert_type,
15 assert_type_str_or_bytes_like,
16 raise_code_path_meant_to_be_unreachable,
17)
18from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition
20_backend = {
21 "old": {
22 "file": driver.cuModuleLoad,
23 "data": driver.cuModuleLoadDataEx,
24 "kernel": driver.cuModuleGetFunction,
25 "attribute": driver.cuFuncGetAttribute,
26 },
27}
30# TODO: revisit this treatment for py313t builds
31_inited = False
32_py_major_ver = None
33_driver_ver = None
34_kernel_ctypes = None
37def _lazy_init():
38 global _inited
39 if _inited:
40 return
42 global _py_major_ver, _driver_ver, _kernel_ctypes
43 # binding availability depends on cuda-python version
44 _py_major_ver, _ = get_binding_version()
45 if _py_major_ver >= 12:
46 _backend["new"] = {
47 "file": driver.cuLibraryLoadFromFile,
48 "data": driver.cuLibraryLoadData,
49 "kernel": driver.cuLibraryGetKernel,
50 "attribute": driver.cuKernelGetAttribute,
51 }
52 _kernel_ctypes = (driver.CUfunction, driver.CUkernel)
53 else:
54 _kernel_ctypes = (driver.CUfunction,)
55 _driver_ver = handle_return(driver.cuDriverGetVersion())
56 if _py_major_ver >= 12 and _driver_ver >= 12040:
57 _backend["new"]["paraminfo"] = driver.cuKernelGetParamInfo
58 _inited = True
61class KernelAttributes:
62 def __new__(self, *args, **kwargs):
63 raise RuntimeError("KernelAttributes cannot be instantiated directly. Please use Kernel APIs.")
65 slots = ("_kernel", "_cache", "_backend_version", "_loader")
67 @classmethod
68 def _init(cls, kernel):
69 self = super().__new__(cls)
70 self._kernel = weakref.ref(kernel)
71 self._cache = {}
73 self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
74 self._loader = _backend[self._backend_version]
75 return self
77 def _get_cached_attribute(self, device_id: Device | int, attribute: driver.CUfunction_attribute) -> int:
78 """Helper function to get a cached attribute or fetch and cache it if not present."""
79 device_id = Device(device_id).device_id
80 cache_key = device_id, attribute
81 result = self._cache.get(cache_key, cache_key)
82 if result is not cache_key:
83 return result
84 kernel = self._kernel()
85 if kernel is None:
86 raise RuntimeError("Cannot access kernel attributes for expired Kernel object")
87 if self._backend_version == "new":
88 result = handle_return(self._loader["attribute"](attribute, kernel._handle, device_id))
89 else: # "old" backend
90 warn(
91 "Device ID argument is ignored when getting attribute from kernel when cuda version < 12. ",
92 RuntimeWarning,
93 stacklevel=2,
94 )
95 result = handle_return(self._loader["attribute"](attribute, kernel._handle))
96 self._cache[cache_key] = result
97 return result
99 def max_threads_per_block(self, device_id: Device | int = None) -> int:
100 """int : The maximum number of threads per block.
101 This attribute is read-only."""
102 return self._get_cached_attribute(
103 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
104 )
106 def shared_size_bytes(self, device_id: Device | int = None) -> int:
107 """int : The size in bytes of statically-allocated shared memory required by this function.
108 This attribute is read-only."""
109 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES)
111 def const_size_bytes(self, device_id: Device | int = None) -> int:
112 """int : The size in bytes of user-allocated constant memory required by this function.
113 This attribute is read-only."""
114 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES)
116 def local_size_bytes(self, device_id: Device | int = None) -> int:
117 """int : The size in bytes of local memory used by each thread of this function.
118 This attribute is read-only."""
119 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES)
121 def num_regs(self, device_id: Device | int = None) -> int:
122 """int : The number of registers used by each thread of this function.
123 This attribute is read-only."""
124 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NUM_REGS)
126 def ptx_version(self, device_id: Device | int = None) -> int:
127 """int : The PTX virtual architecture version for which the function was compiled.
128 This attribute is read-only."""
129 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PTX_VERSION)
131 def binary_version(self, device_id: Device | int = None) -> int:
132 """int : The binary architecture version for which the function was compiled.
133 This attribute is read-only."""
134 return self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_BINARY_VERSION)
136 def cache_mode_ca(self, device_id: Device | int = None) -> bool:
137 """bool : Whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" set.
138 This attribute is read-only."""
139 return bool(self._get_cached_attribute(device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA))
141 def max_dynamic_shared_size_bytes(self, device_id: Device | int = None) -> int:
142 """int : The maximum size in bytes of dynamically-allocated shared memory that can be used
143 by this function."""
144 return self._get_cached_attribute(
145 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
146 )
148 def preferred_shared_memory_carveout(self, device_id: Device | int = None) -> int:
149 """int : The shared memory carveout preference, in percent of the total shared memory."""
150 return self._get_cached_attribute(
151 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
152 )
154 def cluster_size_must_be_set(self, device_id: Device | int = None) -> bool:
155 """bool : The kernel must launch with a valid cluster size specified.
156 This attribute is read-only."""
157 return bool(
158 self._get_cached_attribute(
159 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET
160 )
161 )
163 def required_cluster_width(self, device_id: Device | int = None) -> int:
164 """int : The required cluster width in blocks."""
165 return self._get_cached_attribute(
166 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH
167 )
169 def required_cluster_height(self, device_id: Device | int = None) -> int:
170 """int : The required cluster height in blocks."""
171 return self._get_cached_attribute(
172 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT
173 )
175 def required_cluster_depth(self, device_id: Device | int = None) -> int:
176 """int : The required cluster depth in blocks."""
177 return self._get_cached_attribute(
178 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH
179 )
181 def non_portable_cluster_size_allowed(self, device_id: Device | int = None) -> bool:
182 """bool : Whether the function can be launched with non-portable cluster size."""
183 return bool(
184 self._get_cached_attribute(
185 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED
186 )
187 )
189 def cluster_scheduling_policy_preference(self, device_id: Device | int = None) -> int:
190 """int : The block scheduling policy of a function."""
191 return self._get_cached_attribute(
192 device_id, driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
193 )
196MaxPotentialBlockSizeOccupancyResult = namedtuple("MaxPotential", ("min_grid_size", "max_block_size"))
199class KernelOccupancy:
200 """ """
202 def __new__(self, *args, **kwargs):
203 raise RuntimeError("KernelOccupancy cannot be instantiated directly. Please use Kernel APIs.")
205 slots = ("_handle",)
207 @classmethod
208 def _init(cls, handle):
209 self = super().__new__(cls)
210 self._handle = handle
212 return self
214 def max_active_blocks_per_multiprocessor(self, block_size: int, dynamic_shared_memory_size: int) -> int:
215 """Occupancy of the kernel.
217 Returns the maximum number of active blocks per multiprocessor for this kernel.
219 Parameters
220 ----------
221 block_size: int
222 Block size parameter used to launch this kernel.
223 dynamic_shared_memory_size: int
224 The amount of dynamic shared memory in bytes needed by block.
225 Use `0` if block does not need shared memory.
227 Returns
228 -------
229 int
230 The maximum number of active blocks per multiprocessor.
232 Note
233 ----
234 The fraction of the product of maximum number of active blocks per multiprocessor
235 and the block size to the maximum number of threads per multiprocessor is known as
236 theoretical multiprocessor utilization (occupancy).
238 """
239 return handle_return(
240 driver.cuOccupancyMaxActiveBlocksPerMultiprocessor(self._handle, block_size, dynamic_shared_memory_size)
241 )
243 def max_potential_block_size(
244 self, dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize], block_size_limit: int
245 ) -> MaxPotentialBlockSizeOccupancyResult:
246 """MaxPotentialBlockSizeOccupancyResult: Suggested launch configuration for reasonable occupancy.
248 Returns the minimum grid size needed to achieve the maximum occupancy and
249 the maximum block size that can achieve the maximum occupancy.
251 Parameters
252 ----------
253 dynamic_shared_memory_needed: Union[int, driver.CUoccupancyB2DSize]
254 The amount of dynamic shared memory in bytes needed by block.
255 Use `0` if block does not need shared memory. Use C-callable
256 represented by :obj:`~driver.CUoccupancyB2DSize` to encode
257 amount of needed dynamic shared memory which varies depending
258 on tne block size.
259 block_size_limit: int
260 Known upper limit on the kernel block size. Use `0` to indicate
261 the maximum block size permitted by the device / kernel instead
263 Returns
264 -------
265 :obj:`~MaxPotentialBlockSizeOccupancyResult`
266 An object with `min_grid_size` amd `max_block_size` attributes encoding
267 the suggested launch configuration.
269 Note
270 ----
271 Please be advised that use of C-callable that requires Python Global
272 Interpreter Lock may lead to deadlocks.
274 """
275 if isinstance(dynamic_shared_memory_needed, int):
276 min_grid_size, max_block_size = handle_return(
277 driver.cuOccupancyMaxPotentialBlockSize(
278 self._handle, None, dynamic_shared_memory_needed, block_size_limit
279 )
280 )
281 elif isinstance(dynamic_shared_memory_needed, driver.CUoccupancyB2DSize):
282 min_grid_size, max_block_size = handle_return(
283 driver.cuOccupancyMaxPotentialBlockSize(
284 self._handle, dynamic_shared_memory_needed.getPtr(), 0, block_size_limit
285 )
286 )
287 else:
288 raise TypeError(
289 "dynamic_shared_memory_needed expected to have type int, or CUoccupancyB2DSize, "
290 f"got {type(dynamic_shared_memory_needed)}"
291 )
292 return MaxPotentialBlockSizeOccupancyResult(min_grid_size=min_grid_size, max_block_size=max_block_size)
294 def available_dynamic_shared_memory_per_block(self, num_blocks_per_multiprocessor: int, block_size: int) -> int:
295 """Dynamic shared memory available per block for given launch configuration.
297 The amount of dynamic shared memory per block, in bytes, for given kernel launch configuration.
299 Parameters
300 ----------
301 num_blocks_per_multiprocessor: int
302 Number of blocks to be concurrently executing on a multiprocessor.
303 block_size: int
304 Block size parameter used to launch this kernel.
306 Returns
307 -------
308 int
309 Dynamic shared memory available per block for given launch configuration.
310 """
311 return handle_return(
312 driver.cuOccupancyAvailableDynamicSMemPerBlock(self._handle, num_blocks_per_multiprocessor, block_size)
313 )
315 def max_potential_cluster_size(self, config: LaunchConfig, stream: Stream | None = None) -> int:
316 """Maximum potential cluster size.
318 The maximum potential cluster size for this kernel and given launch configuration.
320 Parameters
321 ----------
322 config: :obj:`~_launch_config.LaunchConfig`
323 Kernel launch configuration. Cluster dimensions in the configuration are ignored.
324 stream: :obj:`~Stream`, optional
325 The stream on which this kernel is to be launched.
327 Returns
328 -------
329 int
330 The maximum cluster size that can be launched for this kernel and launch configuration.
331 """
332 drv_cfg = _to_native_launch_config(config)
333 if stream is not None:
334 drv_cfg.hStream = stream.handle
335 return handle_return(driver.cuOccupancyMaxPotentialClusterSize(self._handle, drv_cfg))
337 def max_active_clusters(self, config: LaunchConfig, stream: Stream | None = None) -> int:
338 """Maximum number of active clusters on the target device.
340 The maximum number of clusters that could concurrently execute on the target device.
342 Parameters
343 ----------
344 config: :obj:`~_launch_config.LaunchConfig`
345 Kernel launch configuration.
346 stream: :obj:`~Stream`, optional
347 The stream on which this kernel is to be launched.
349 Returns
350 -------
351 int
352 The maximum number of clusters that could co-exist on the target device.
353 """
354 drv_cfg = _to_native_launch_config(config)
355 if stream is not None:
356 drv_cfg.hStream = stream.handle
357 return handle_return(driver.cuOccupancyMaxActiveClusters(self._handle, drv_cfg))
360ParamInfo = namedtuple("ParamInfo", ["offset", "size"])
363class Kernel:
364 """Represent a compiled kernel that had been loaded onto the device.
366 Kernel instances can execution when passed directly into the
367 :func:`~launch` function.
369 Directly creating a :obj:`~_module.Kernel` is not supported, and they
370 should instead be created through a :obj:`~_module.ObjectCode` object.
372 """
374 __slots__ = ("_handle", "_module", "_attributes", "_occupancy", "__weakref__")
376 def __new__(self, *args, **kwargs):
377 raise RuntimeError("Kernel objects cannot be instantiated directly. Please use ObjectCode APIs.")
379 @classmethod
380 def _from_obj(cls, obj, mod):
381 assert_type(obj, _kernel_ctypes)
382 assert_type(mod, ObjectCode)
383 ker = super().__new__(cls)
384 ker._handle = obj
385 ker._module = mod
386 ker._attributes = None
387 ker._occupancy = None
388 return ker
390 @property
391 def attributes(self) -> KernelAttributes:
392 """Get the read-only attributes of this kernel."""
393 if self._attributes is None:
394 self._attributes = KernelAttributes._init(self)
395 return self._attributes
397 def _get_arguments_info(self, param_info=False) -> tuple[int, list[ParamInfo]]:
398 attr_impl = self.attributes
399 if attr_impl._backend_version != "new":
400 raise NotImplementedError("New backend is required")
401 if "paraminfo" not in attr_impl._loader:
402 raise NotImplementedError(
403 "Driver version 12.4 or newer is required for this function. "
404 f"Using driver version {_driver_ver // 1000}.{(_driver_ver % 1000) // 10}"
405 )
406 arg_pos = 0
407 param_info_data = []
408 while True:
409 result = attr_impl._loader["paraminfo"](self._handle, arg_pos)
410 if result[0] != driver.CUresult.CUDA_SUCCESS:
411 break
412 if param_info:
413 p_info = ParamInfo(offset=result[1], size=result[2])
414 param_info_data.append(p_info)
415 arg_pos = arg_pos + 1
416 if result[0] != driver.CUresult.CUDA_ERROR_INVALID_VALUE:
417 handle_return(result)
418 return arg_pos, param_info_data
420 @property
421 def num_arguments(self) -> int:
422 """int : The number of arguments of this function"""
423 num_args, _ = self._get_arguments_info()
424 return num_args
426 @property
427 def arguments_info(self) -> list[ParamInfo]:
428 """list[ParamInfo]: (offset, size) for each argument of this function"""
429 _, param_info = self._get_arguments_info(param_info=True)
430 return param_info
432 @property
433 def occupancy(self) -> KernelOccupancy:
434 """Get the occupancy information for launching this kernel."""
435 if self._occupancy is None:
436 self._occupancy = KernelOccupancy._init(self._handle)
437 return self._occupancy
439 # TODO: implement from_handle()
442CodeTypeT = Union[bytes, bytearray, str]
445class ObjectCode:
446 """Represent a compiled program to be loaded onto the device.
448 This object provides a unified interface for different types of
449 compiled programs that will be loaded onto the device.
451 Note
452 ----
453 This class has no default constructor. If you already have a cubin that you would
454 like to load, use the :meth:`from_cubin` alternative constructor. Constructing directly
455 from all other possible code types should be avoided in favor of compilation through
456 :class:`~cuda.core.experimental.Program`
458 Note
459 ----
460 Usage under CUDA 11.x will only load to the current device
461 context.
462 """
464 __slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name")
465 _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library")
467 def __new__(self, *args, **kwargs):
468 raise RuntimeError(
469 "ObjectCode objects cannot be instantiated directly. "
470 "Please use ObjectCode APIs (from_cubin, from_ptx) or Program APIs (compile)."
471 )
473 @classmethod
474 def _init(cls, module, code_type, *, name: str = "", symbol_mapping: dict | None = None):
475 self = super().__new__(cls)
476 assert code_type in self._supported_code_type, f"{code_type=} is not supported"
477 _lazy_init()
479 # handle is assigned during _lazy_load
480 self._handle = None
482 self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
483 self._loader = _backend[self._backend_version]
485 self._code_type = code_type
486 self._module = module
487 self._sym_map = {} if symbol_mapping is None else symbol_mapping
488 self._name = name
490 return self
492 @classmethod
493 def _reduce_helper(self, module, code_type, name, symbol_mapping):
494 # just for forwarding kwargs
495 return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping)
497 def __reduce__(self):
498 return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)
500 @staticmethod
501 def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
502 """Create an :class:`ObjectCode` instance from an existing cubin.
504 Parameters
505 ----------
506 module : Union[bytes, str]
507 Either a bytes object containing the in-memory cubin to load, or
508 a file path string pointing to the on-disk cubin to load.
509 name : Optional[str]
510 A human-readable identifier representing this code object.
511 symbol_mapping : Optional[dict]
512 A dictionary specifying how the unmangled symbol names (as keys)
513 should be mapped to the mangled names before trying to retrieve
514 them (default to no mappings).
515 """
516 return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
518 @staticmethod
519 def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
520 """Create an :class:`ObjectCode` instance from an existing PTX.
522 Parameters
523 ----------
524 module : Union[bytes, str]
525 Either a bytes object containing the in-memory ptx code to load, or
526 a file path string pointing to the on-disk ptx file to load.
527 name : Optional[str]
528 A human-readable identifier representing this code object.
529 symbol_mapping : Optional[dict]
530 A dictionary specifying how the unmangled symbol names (as keys)
531 should be mapped to the mangled names before trying to retrieve
532 them (default to no mappings).
533 """
534 return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
536 @staticmethod
537 def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
538 """Create an :class:`ObjectCode` instance from an existing LTOIR.
540 Parameters
541 ----------
542 module : Union[bytes, str]
543 Either a bytes object containing the in-memory ltoir code to load, or
544 a file path string pointing to the on-disk ltoir file to load.
545 name : Optional[str]
546 A human-readable identifier representing this code object.
547 symbol_mapping : Optional[dict]
548 A dictionary specifying how the unmangled symbol names (as keys)
549 should be mapped to the mangled names before trying to retrieve
550 them (default to no mappings).
551 """
552 return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
554 @staticmethod
555 def from_fatbin(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
556 """Create an :class:`ObjectCode` instance from an existing fatbin.
558 Parameters
559 ----------
560 module : Union[bytes, str]
561 Either a bytes object containing the in-memory fatbin to load, or
562 a file path string pointing to the on-disk fatbin to load.
563 name : Optional[str]
564 A human-readable identifier representing this code object.
565 symbol_mapping : Optional[dict]
566 A dictionary specifying how the unmangled symbol names (as keys)
567 should be mapped to the mangled names before trying to retrieve
568 them (default to no mappings).
569 """
570 return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
572 @staticmethod
573 def from_object(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
574 """Create an :class:`ObjectCode` instance from an existing object code.
576 Parameters
577 ----------
578 module : Union[bytes, str]
579 Either a bytes object containing the in-memory object code to load, or
580 a file path string pointing to the on-disk object code to load.
581 name : Optional[str]
582 A human-readable identifier representing this code object.
583 symbol_mapping : Optional[dict]
584 A dictionary specifying how the unmangled symbol names (as keys)
585 should be mapped to the mangled names before trying to retrieve
586 them (default to no mappings).
587 """
588 return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
590 @staticmethod
591 def from_library(module: Union[bytes, str], *, name: str = "", symbol_mapping: dict | None = None) -> "ObjectCode":
592 """Create an :class:`ObjectCode` instance from an existing library.
594 Parameters
595 ----------
596 module : Union[bytes, str]
597 Either a bytes object containing the in-memory library to load, or
598 a file path string pointing to the on-disk library to load.
599 name : Optional[str]
600 A human-readable identifier representing this code object.
601 symbol_mapping : Optional[dict]
602 A dictionary specifying how the unmangled symbol names (as keys)
603 should be mapped to the mangled names before trying to retrieve
604 them (default to no mappings).
605 """
606 return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping)
608 # TODO: do we want to unload in a finalizer? Probably not..
610 def _lazy_load_module(self, *args, **kwargs):
611 if self._handle is not None:
612 return
613 module = self._module
614 assert_type_str_or_bytes_like(module)
615 if isinstance(module, str):
616 if self._backend_version == "new":
617 self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0))
618 else: # "old" backend
619 self._handle = handle_return(self._loader["file"](module.encode()))
620 return
621 if isinstance(module, (bytes, bytearray)):
622 if self._backend_version == "new":
623 self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0))
624 else: # "old" backend
625 self._handle = handle_return(self._loader["data"](module, 0, [], []))
626 return
627 raise_code_path_meant_to_be_unreachable()
629 @precondition(_lazy_load_module)
630 def get_kernel(self, name) -> Kernel:
631 """Return the :obj:`~_module.Kernel` of a specified name from this object code.
633 Parameters
634 ----------
635 name : Any
636 Name of the kernel to retrieve.
638 Returns
639 -------
640 :obj:`~_module.Kernel`
641 Newly created kernel object.
643 """
644 supported_code_types = ("cubin", "ptx", "fatbin")
645 if self._code_type not in supported_code_types:
646 raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})')
647 try:
648 name = self._sym_map[name]
649 except KeyError:
650 name = name.encode()
652 data = handle_return(self._loader["kernel"](self._handle, name))
653 return Kernel._from_obj(data, self)
655 @property
656 def code(self) -> CodeTypeT:
657 """Return the underlying code object."""
658 return self._module
660 @property
661 def name(self) -> str:
662 """Return a human-readable name of this code object."""
663 return self._name
665 @property
666 def code_type(self) -> str:
667 """Return the type of the underlying code object."""
668 return self._code_type
670 @property
671 @precondition(_lazy_load_module)
672 def handle(self):
673 """Return the underlying handle object.
675 .. caution::
677 This handle is a Python object. To get the memory address of the underlying C
678 handle, call ``int(ObjectCode.handle)``.
679 """
680 return self._handle