Coverage for cuda / core / _launcher.pyx: 100.00%
24 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from libc.stdint cimport uintptr_t
7from cuda.bindings cimport cydriver
9from cuda.core._launch_config cimport LaunchConfig
10from cuda.core._kernel_arg_handler cimport ParamHolder
11from cuda.core._module cimport Kernel
12from cuda.core._resource_handles cimport as_cu
13from cuda.core._stream cimport Stream_accept, Stream
14from cuda.core._utils.cuda_utils cimport (
15 check_or_create_options,
16 HANDLE_RETURN,
17)
18from cuda.core._module import Kernel
19from cuda.core._stream import Stream
20from math import prod
23def launch(stream: Stream | GraphBuilder | IsStreamType, config: LaunchConfig, kernel: Kernel, *kernel_args):
24 """Launches a :obj:`~_module.Kernel`
25 object with launch-time configuration.
27 Parameters
28 ----------
29 stream : :obj:`~_stream.Stream` | :obj:`~graph.GraphBuilder`
30 The stream establishing the stream ordering semantic of a
31 launch.
32 config : :obj:`LaunchConfig`
33 Launch configurations inline with options provided by
34 :obj:`~_launcher.LaunchConfig` dataclass.
35 kernel : :obj:`~_module.Kernel`
36 Kernel to launch.
37 *kernel_args : Any
38 Variable length argument list that is provided to the
39 launching kernel.
41 """
42 cdef Stream s = Stream_accept(stream, allow_stream_protocol=True) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
43 cdef LaunchConfig conf = check_or_create_options(LaunchConfig, config, "launch config") 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
45 # TODO: can we ensure kernel_args is valid/safe to use here?
46 # TODO: merge with HelperKernelParams?
47 cdef ParamHolder ker_args = ParamHolder(kernel_args) 2cbb c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
48 cdef void** args_ptr = <void**><uintptr_t>(ker_args.ptr) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
50 cdef Kernel ker = <Kernel>kernel 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
51 cdef cydriver.CUfunction func_handle = <cydriver.CUfunction>as_cu(ker._h_kernel) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
53 drv_cfg = conf._to_native_launch_config() 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
54 drv_cfg.hStream = as_cu(s._h_stream) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
55 if conf.is_cooperative: 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
56 _check_cooperative_launch(kernel, conf, s) 1a
57 with nogil: 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
58 HANDLE_RETURN(cydriver.cuLaunchKernelEx(&drv_cfg, func_handle, args_ptr, NULL)) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb
61cdef _check_cooperative_launch(kernel: Kernel, config: LaunchConfig, stream: Stream):
62 dev = stream.device 1a
63 num_sm = dev.properties.multiprocessor_count 1a
64 max_grid_size = (
65 kernel.occupancy.max_active_blocks_per_multiprocessor(prod(config.block), config.shmem_size) * num_sm 1a
66 )
67 if prod(config.grid) > max_grid_size: 1a
68 # For now let's try not to be smart and adjust the grid size behind users' back.
69 # We explicitly ask users to adjust.
70 x, y, z = config.grid 1a
71 raise ValueError(f"The specified grid size ({x} * {y} * {z}) exceeds the limit ({max_grid_size})") 1a