Coverage for cuda/core/experimental/

3# SPDX-License-Identifier: Apache-2.0

4from cuda.core.experimental._launch_config cimport LaunchConfig, _to_native_launch_config

5from cuda.core.experimental._stream cimport Stream_accept

8from cuda.core.experimental._kernel_arg_handler import ParamHolder

9from cuda.core.experimental._module import Kernel

10from cuda.core.experimental._stream import Stream

11from cuda.core.experimental._utils.clear_error_support import assert_type

12from cuda.core.experimental._utils.cuda_utils import (

13 _reduce_3_tuple,

14 check_or_create_options,

15 driver,

16 get_binding_version,

17 handle_return,

18)

20# TODO: revisit this treatment for py313t builds

21_inited = False

22_use_ex = None

25def _lazy_init():

26 global _inited

27 if _inited:

28 return

30 global _use_ex

31 # binding availability depends on cuda-python version

32 _py_major_minor = get_binding_version()

33 _driver_ver = handle_return(driver.cuDriverGetVersion())

34 _use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))

35 _inited = True

38def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kernel: Kernel, *kernel_args):

39 """Launches a :obj:`~_module.Kernel`

40 object with launch-time configuration.

42 Parameters

43 ----------

44 stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`

45 The stream establishing the stream ordering semantic of a

46 launch.

47 config : :obj:`LaunchConfig`

48 Launch configurations inline with options provided by

49 :obj:`~_launcher.LaunchConfig` dataclass.

50 kernel : :obj:`~_module.Kernel`

51 Kernel to launch.

52 *kernel_args : Any

53 Variable length argument list that is provided to the

54 launching kernel.

56 """

57 stream = Stream_accept(stream, allow_stream_protocol=True)

58 assert_type(kernel, Kernel)

59 _lazy_init()

60 config = check_or_create_options(LaunchConfig, config, "launch config")

62 # TODO: can we ensure kernel_args is valid/safe to use here?

63 # TODO: merge with HelperKernelParams?

64 kernel_args = ParamHolder(kernel_args)

65 args_ptr = kernel_args.ptr

67 # Note: CUkernel can still be launched via the old cuLaunchKernel and we do not care

68 # about the CUfunction/CUkernel difference (which depends on whether the "old" or

69 # "new" module loading APIs are in use). We check both binding & driver versions here

70 # mainly to see if the "Ex" API is available and if so we use it, as it's more feature

71 # rich.

72 if _use_ex:

73 drv_cfg = _to_native_launch_config(config)

74 drv_cfg.hStream = stream.handle

75 if config.cooperative_launch:

76 _check_cooperative_launch(kernel, config, stream)

77 handle_return(driver.cuLaunchKernelEx(drv_cfg, int(kernel._handle), args_ptr, 0))

78 else:

79 # TODO: check if config has any unsupported attrs

80 handle_return(

81 driver.cuLaunchKernel(

82 int(kernel._handle), *config.grid, *config.block, config.shmem_size, stream.handle, args_ptr, 0

83 )

84 )

87cdef _check_cooperative_launch(kernel: Kernel, config: LaunchConfig, stream: Stream):

88 dev = stream.device

89 num_sm = dev.properties.multiprocessor_count

90 max_grid_size = (

91 kernel.occupancy.max_active_blocks_per_multiprocessor(_reduce_3_tuple(config.block), config.shmem_size) * num_sm

92 )

93 if _reduce_3_tuple(config.grid) > max_grid_size:

94 # For now let's try not to be smart and adjust the grid size behind users' back.

95 # We explicitly ask users to adjust.

96 x, y, z = config.grid

97 raise ValueError(f"The specified grid size ({x} * {y} * {z}) exceeds the limit ({max_grid_size})")

Coverage for cuda / core / experimental / _launcher.pyx: 92%

38 statements