Coverage for cuda/core/_launcher.pyx: 92.59%

27 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-13 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from libc.stdint cimport uintptr_t 

6  

7from cuda.bindings cimport cydriver 

8  

9from cuda.core._launch_config cimport LaunchConfig 

10from cuda.core._kernel_arg_handler cimport ParamHolder 

11from cuda.core._module cimport Kernel 

12from cuda.core._resource_handles cimport as_cu 

13from cuda.core._stream cimport Stream_accept, Stream 

14from cuda.core._utils.cuda_utils cimport ( 

15 check_or_create_options, 

16 HANDLE_RETURN, 

17) 

18from cuda.core._module import Kernel 

19from cuda.core._stream import Stream 

20from math import prod 

21from typing import TYPE_CHECKING 

22  

23if TYPE_CHECKING: 

24 from cuda.core.graph import GraphBuilder 

25 from cuda.core.typing import IsStreamType 

26  

27  

28def launch( 

29 stream: Stream | GraphBuilder | IsStreamType, 

30 config: LaunchConfig, 

31 kernel: Kernel, 

32 *kernel_args 

33) -> None: 

34 """Launches a :obj:`~_module.Kernel` 

35 object with launch-time configuration. 

36  

37 Parameters 

38 ---------- 

39 stream : :obj:`~_stream.Stream` | :obj:`~graph.GraphBuilder` 

40 The stream establishing the stream ordering semantic of a 

41 launch. 

42 config : :obj:`LaunchConfig` 

43 Launch configurations inline with options provided by 

44 :obj:`~_launcher.LaunchConfig` dataclass. 

45 kernel : :obj:`~_module.Kernel` 

46 Kernel to launch. 

47 *kernel_args : Any 

48 Variable length argument list that is provided to the 

49 launching kernel. 

50  

51 """ 

52 cdef Stream s = Stream_accept(stream, allow_stream_protocol=True) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

53 cdef LaunchConfig conf = check_or_create_options(LaunchConfig, config, "launch config") 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

54  

55 # TODO: can we ensure kernel_args is valid/safe to use here? 

56 # TODO: merge with HelperKernelParams? 

57 cdef ParamHolder ker_args = ParamHolder(kernel_args) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

58 cdef void** args_ptr = <void**><uintptr_t>(ker_args.ptr) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

59  

60 cdef Kernel ker = <Kernel>kernel 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

61 cdef cydriver.CUfunction func_handle = <cydriver.CUfunction>as_cu(ker._h_kernel) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

62  

63 drv_cfg = conf._to_native_launch_config() 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

64 drv_cfg.hStream = as_cu(s._h_stream) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

65 if conf.is_cooperative: 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

66 _check_cooperative_launch(kernel, conf, s) 1a

67 with nogil: 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

68 HANDLE_RETURN(cydriver.cuLaunchKernelEx(&drv_cfg, func_handle, args_ptr, NULL)) 2b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 a 7 8 9 ! # $ % ' ( ) * + , - . / : ; = ? @ [ ] ^ _ ` { | } ~ abbb

69  

70  

71cdef _check_cooperative_launch(kernel: Kernel, config: LaunchConfig, stream: Stream): 

72 dev = stream.device 1a

73 num_sm = dev.properties.multiprocessor_count 1a

74 max_grid_size = ( 

75 kernel.occupancy.max_active_blocks_per_multiprocessor(prod(config.block), config.shmem_size) * num_sm 1a

76 ) 

77 if prod(config.grid) > max_grid_size: 1a

78 # For now let's try not to be smart and adjust the grid size behind users' back. 

79 # We explicitly ask users to adjust. 

80 x, y, z = config.grid 1a

81 raise ValueError(f"The specified grid size ({x} * {y} * {z}) exceeds the limit ({max_grid_size})") 1a