Coverage for cuda/core/_kernel_arg_handler.pyx: 83.33%

222 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-13 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from cpython.mem cimport PyMem_Malloc, PyMem_Free 

6from libc.stdint cimport (intptr_t, 

7 int8_t, int16_t, int32_t, int64_t, 

8 uint8_t, uint16_t, uint32_t, uint64_t,) 

9from libcpp cimport bool as cpp_bool 

10from libcpp.complex cimport complex as cpp_complex 

11from libcpp cimport nullptr 

12from libcpp cimport vector 

13  

14import ctypes 

15from typing import Sequence, Any 

16  

17import numpy 

18  

19from cuda.core._memory import Buffer 

20from cuda.core._tensor_map import TensorMapDescriptor as _TensorMapDescriptor_py 

21from cuda.core._tensor_map cimport TensorMapDescriptor 

22from cuda.core.graph._graph_definition cimport GraphCondition 

23from cuda.core._utils.cuda_utils import driver 

24from cuda.bindings cimport cydriver 

25  

26  

27ctypedef cpp_complex.complex[float] cpp_single_complex 

28ctypedef cpp_complex.complex[double] cpp_double_complex 

29  

30  

31# We need an identifier for fp16 for copying scalars on the host. This is a minimal 

32# implementation borrowed from cuda_fp16.h. 

33cdef extern from *: 

34 """ 

35 #if __cplusplus >= 201103L 

36 #define __CUDA_ALIGN__(n) alignas(n) /* C++11 kindly gives us a keyword for this */ 

37 #else 

38 #if defined(__GNUC__) 

39 #define __CUDA_ALIGN__(n) __attribute__ ((aligned(n))) 

40 #elif defined(_MSC_VER) 

41 #define __CUDA_ALIGN__(n) __declspec(align(n)) 

42 #else 

43 #define __CUDA_ALIGN__(n) 

44 #endif /* defined(__GNUC__) */ 

45 #endif /* __cplusplus >= 201103L */ 

46  

47 typedef struct __CUDA_ALIGN__(2) { 

48 /** 

49 * Storage field contains bits representation of the \p half floating-point number. 

50 */ 

51 unsigned short x; 

52 } __half_raw; 

53 """ 

54 ctypedef struct __half_raw: 

55 unsigned short x 

56  

57  

58ctypedef fused supported_type: 

59 cpp_bool 

60 int8_t 

61 int16_t 

62 int32_t 

63 int64_t 

64 uint8_t 

65 uint16_t 

66 uint32_t 

67 uint64_t 

68 __half_raw 

69 float 

70 double 

71 intptr_t 

72 cpp_single_complex 

73 cpp_double_complex 

74  

75  

76# cache ctypes/numpy type objects to avoid attribute access 

77cdef object ctypes_bool = ctypes.c_bool 

78cdef object ctypes_int8 = ctypes.c_int8 

79cdef object ctypes_int16 = ctypes.c_int16 

80cdef object ctypes_int32 = ctypes.c_int32 

81cdef object ctypes_int64 = ctypes.c_int64 

82cdef object ctypes_uint8 = ctypes.c_uint8 

83cdef object ctypes_uint16 = ctypes.c_uint16 

84cdef object ctypes_uint32 = ctypes.c_uint32 

85cdef object ctypes_uint64 = ctypes.c_uint64 

86cdef object ctypes_float = ctypes.c_float 

87cdef object ctypes_double = ctypes.c_double 

88cdef object numpy_bool = numpy.bool_ 

89cdef object numpy_int8 = numpy.int8 

90cdef object numpy_int16 = numpy.int16 

91cdef object numpy_int32 = numpy.int32 

92cdef object numpy_int64 = numpy.int64 

93cdef object numpy_uint8 = numpy.uint8 

94cdef object numpy_uint16 = numpy.uint16 

95cdef object numpy_uint32 = numpy.uint32 

96cdef object numpy_uint64 = numpy.uint64 

97cdef object numpy_float16 = numpy.float16 

98cdef object numpy_float32 = numpy.float32 

99cdef object numpy_float64 = numpy.float64 

100cdef object numpy_complex64 = numpy.complex64 

101cdef object numpy_complex128 = numpy.complex128 

102  

103  

104cdef object tensor_map_descriptor_type = _TensorMapDescriptor_py 

105  

106  

107# limitation due to cython/cython#534 

108ctypedef void* voidptr 

109  

110  

111# Cython can't infer the overload without at least one input argument with fused type 

112cdef inline int prepare_arg( 

113 vector.vector[void*]& data, 

114 vector.vector[void*]& data_addresses, 

115 arg, # important: keep it a Python object and don't cast 

116 const size_t idx, 

117 const supported_type* __unused=NULL) except -1: 

118 cdef void* ptr = PyMem_Malloc(sizeof(supported_type)) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

119 # note: this should also work once ctypes has complex support: 

120 # python/cpython#121248 

121 if supported_type is cpp_single_complex: 

122 (<supported_type*>ptr)[0] = cpp_complex.complex[float](arg.real, arg.imag) 1S

123 elif supported_type is cpp_double_complex: 

124 (<supported_type*>ptr)[0] = cpp_complex.complex[double](arg.real, arg.imag) 1aR8

125 elif supported_type is __half_raw: 

126 (<supported_type*>ptr).x = <int16_t>(arg.view(numpy_int16)) 1OU

127 else: 

128 (<supported_type*>ptr)[0] = <supported_type>(arg) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

129 data_addresses[idx] = ptr # take the address to the scalar 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

130 data[idx] = ptr # for later dealloc 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

131 return 0 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

132  

133  

134cdef inline int prepare_tensor_map_arg( 

135 vector.vector[void*]& data, 

136 vector.vector[void*]& data_addresses, 

137 TensorMapDescriptor arg, 

138 const size_t idx) except -1: 

139 # cuLaunchKernel copies argument bytes during launch, so a TensorMap 

140 # descriptor can point directly at its internal CUtensorMap storage. 

141 data_addresses[idx] = arg._get_data_ptr() 

142 return 0 

143  

144  

145cdef inline int prepare_ctypes_arg( 

146 vector.vector[void*]& data, 

147 vector.vector[void*]& data_addresses, 

148 arg, 

149 const size_t idx) except -1: 

150 cdef object arg_type = type(arg) 1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

151 if arg_type is ctypes_bool: 1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

152 return prepare_arg[cpp_bool](data, data_addresses, arg.value, idx) 1klopN

153 elif arg_type is ctypes_int8: 1stbcklmnuvdeopqrwxyzfghijBaAMDEKJLIGFHC

154 return prepare_arg[int8_t](data, data_addresses, arg.value, idx) 1OM

155 elif arg_type is ctypes_int16: 1stbcklmnuvdeopqrwxyzfghijBaADEKJLIGFHC

156 return prepare_arg[int16_t](data, data_addresses, arg.value, idx) 1L

157 elif arg_type is ctypes_int32: 1stbcklmnuvdeopqrwxyzfghijBaADEKJIGFHC

158 return prepare_arg[int32_t](data, data_addresses, arg.value, idx) 1K

159 elif arg_type is ctypes_int64: 1OstbcklmnuvdeopqrwxyzfghijBaADEJIGFHC

160 return prepare_arg[int64_t](data, data_addresses, arg.value, idx) 1J

161 elif arg_type is ctypes_uint8: 1stbcklmnuvdeopqrwxyzfghijBaADEIGFHC

162 return prepare_arg[uint8_t](data, data_addresses, arg.value, idx) 1I

163 elif arg_type is ctypes_uint16: 1stbcklmnuvdeopqrwxyzfghijBaADEGFHC

164 return prepare_arg[uint16_t](data, data_addresses, arg.value, idx) 1OH

165 elif arg_type is ctypes_uint32: 1stbcklmnuvdeopqrwxyzfghijBaADEGFC

166 return prepare_arg[uint32_t](data, data_addresses, arg.value, idx) 1G

167 elif arg_type is ctypes_uint64: 1stbcklmnuvdeopqrwxyzfghijBaADEFC

168 return prepare_arg[uint64_t](data, data_addresses, arg.value, idx) 1F

169 elif arg_type is ctypes_float: 1stbcklmnuvdeopqrwxyzfghijBaADEC

170 return prepare_arg[float](data, data_addresses, arg.value, idx) 1E

171 elif arg_type is ctypes_double: 1stbcklmnuvdeopqrwxyzfghijBaADC

172 return prepare_arg[double](data, data_addresses, arg.value, idx) 1D

173 else: 

174 # If no exact types are found, fallback to slower `isinstance` check 

175 if isinstance(arg, ctypes_bool): 1stbcklmnuvdeopqrwxyzfghijBaAC

176 return prepare_arg[cpp_bool](data, data_addresses, arg.value, idx) 1B

177 elif isinstance(arg, ctypes_int8): 1stbcklmnuvdeopqrwxyzfghijBaAC

178 return prepare_arg[int8_t](data, data_addresses, arg.value, idx) 

179 elif isinstance(arg, ctypes_int16): 1OstbcklmnuvdeopqrwxyzfghijBaAC

180 return prepare_arg[int16_t](data, data_addresses, arg.value, idx) 

181 elif isinstance(arg, ctypes_int32): 1stbcklmnuvdeopqrwxyzfghijBaAC

182 return prepare_arg[int32_t](data, data_addresses, arg.value, idx) 1BC

183 elif isinstance(arg, ctypes_int64): 1stbcklmnuvdeopqrwxyzfghijBaA

184 return prepare_arg[int64_t](data, data_addresses, arg.value, idx) 

185 elif isinstance(arg, ctypes_uint8): 1OstbcklmnuvdeopqrwxyzfghijBaA

186 return prepare_arg[uint8_t](data, data_addresses, arg.value, idx) 

187 elif isinstance(arg, ctypes_uint16): 1stbcklmnuvdeopqrwxyzfghijBaA

188 return prepare_arg[uint16_t](data, data_addresses, arg.value, idx) 

189 elif isinstance(arg, ctypes_uint32): 1stbcklmnuvdeopqrwxyzfghijBaA

190 return prepare_arg[uint32_t](data, data_addresses, arg.value, idx) 

191 elif isinstance(arg, ctypes_uint64): 1stbcklmnuvdeopqrwxyzfghijBaA

192 return prepare_arg[uint64_t](data, data_addresses, arg.value, idx) 

193 elif isinstance(arg, ctypes_float): 1stbcklmnuvdeopqrwxyzfghijBaA

194 return prepare_arg[float](data, data_addresses, arg.value, idx) 1OB

195 elif isinstance(arg, ctypes_double): 1stbcklmnuvdeopqrwxyzfghijaA

196 return prepare_arg[double](data, data_addresses, arg.value, idx) 

197 else: 

198 return 1 1stbcklmnuvdeopqrwxyzfghijaA

199  

200  

201cdef inline int prepare_numpy_arg( 

202 vector.vector[void*]& data, 

203 vector.vector[void*]& data_addresses, 

204 arg, 

205 const size_t idx) except -1: 

206 cdef object arg_type = type(arg) 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

207 if arg_type is numpy_bool: 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

208 return prepare_arg[cpp_bool](data, data_addresses, arg, idx) 1mnqr7

209 elif arg_type is numpy_int8: 1stbcklmnuvdeopqrwxyzfghij012BQaANMDEKJLIGFHSRUVT6543ZYXWCP

210 return prepare_arg[int8_t](data, data_addresses, arg, idx) 16

211 elif arg_type is numpy_int16: 1stbcklmnuvdeopqrwxyzfghij012BQaANMDEKJLIGFHSRUVT543ZYXWCP

212 return prepare_arg[int16_t](data, data_addresses, arg, idx) 15

213 elif arg_type is numpy_int32: 1Ostbcklmnuvdeopqrwxyzfghij012BQaANMDEKJLIGFHSRUVT43ZYXWCP

214 return prepare_arg[int32_t](data, data_addresses, arg, idx) 10124

215 elif arg_type is numpy_int64: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVT3ZYXWCP

216 return prepare_arg[int64_t](data, data_addresses, arg, idx) 13

217 elif arg_type is numpy_uint8: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTZYXWCP

218 return prepare_arg[uint8_t](data, data_addresses, arg, idx) 1Z

219 elif arg_type is numpy_uint16: 1OstbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTYXWCP

220 return prepare_arg[uint16_t](data, data_addresses, arg, idx) 1Y

221 elif arg_type is numpy_uint32: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTXWCP

222 return prepare_arg[uint32_t](data, data_addresses, arg, idx) 1X

223 elif arg_type is numpy_uint64: 1OstbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTWCP

224 return prepare_arg[uint64_t](data, data_addresses, arg, idx) 1W

225 elif arg_type is numpy_float16: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTCP

226 return prepare_arg[__half_raw](data, data_addresses, arg, idx) 1U

227 elif arg_type is numpy_float32: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRVTCP

228 return prepare_arg[float](data, data_addresses, arg, idx) 1V

229 elif arg_type is numpy_float64: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRTCP

230 return prepare_arg[double](data, data_addresses, arg, idx) 1T

231 elif arg_type is numpy_complex64: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRCP

232 return prepare_arg[cpp_single_complex](data, data_addresses, arg, idx) 1OS

233 elif arg_type is numpy_complex128: 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHRCP

234 return prepare_arg[cpp_double_complex](data, data_addresses, arg, idx) 1R

235 else: 

236 # If no exact types are found, fallback to slower `isinstance` check 

237 if isinstance(arg, numpy_bool): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

238 return prepare_arg[cpp_bool](data, data_addresses, arg, idx) 

239 elif isinstance(arg, numpy_int8): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

240 return prepare_arg[int8_t](data, data_addresses, arg, idx) 

241 elif isinstance(arg, numpy_int16): 1OstbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

242 return prepare_arg[int16_t](data, data_addresses, arg, idx) 

243 elif isinstance(arg, numpy_int32): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

244 return prepare_arg[int32_t](data, data_addresses, arg, idx) 1Q

245 elif isinstance(arg, numpy_int64): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

246 return prepare_arg[int64_t](data, data_addresses, arg, idx) 

247 elif isinstance(arg, numpy_uint8): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

248 return prepare_arg[uint8_t](data, data_addresses, arg, idx) 

249 elif isinstance(arg, numpy_uint16): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

250 return prepare_arg[uint16_t](data, data_addresses, arg, idx) 

251 elif isinstance(arg, numpy_uint32): 1OstbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

252 return prepare_arg[uint32_t](data, data_addresses, arg, idx) 

253 elif isinstance(arg, numpy_uint64): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

254 return prepare_arg[uint64_t](data, data_addresses, arg, idx) 

255 elif isinstance(arg, numpy_float16): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

256 return prepare_arg[__half_raw](data, data_addresses, arg, idx) 

257 elif isinstance(arg, numpy_float32): 1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

258 return prepare_arg[float](data, data_addresses, arg, idx) 1OQP

259 elif isinstance(arg, numpy_float64): 1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

260 return prepare_arg[double](data, data_addresses, arg, idx) 

261 elif isinstance(arg, numpy_complex64): 1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

262 return prepare_arg[cpp_single_complex](data, data_addresses, arg, idx) 

263 elif isinstance(arg, numpy_complex128): 1OstbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

264 return prepare_arg[cpp_double_complex](data, data_addresses, arg, idx) 

265 else: 

266 return 1 1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

267  

268  

269cdef class ParamHolder: 

270  

271 def __init__(self, kernel_args: Sequence[Any]) -> None: 

272 if len(kernel_args) == 0: 2/ ebfb: gbhbs t b c k l m n u v d e o p q r w x y z f g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbbbcbIbJbKbLbMb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` NbObPbQbRbj { | } ~ Sb0 1 2 abTbB Q a A Ub. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P dbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

273 self.ptr = 0 2ebfbgbhbf g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbj SbTbUbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

274 return 2ebfbgbhbf g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbj SbTbUbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

275  

276 cdef size_t n_args = len(kernel_args) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

277 cdef size_t i 

278 cdef int not_prepared 

279 cdef object arg_type 

280 self.data = vector.vector[voidptr](n_args, nullptr) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

281 self.data_addresses = vector.vector[voidptr](n_args) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

282 for i, arg in enumerate(kernel_args): 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

283 arg_type = type(arg) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

284 if arg_type is Buffer: 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

285 # we need the address of where the actual buffer address is stored 

286 if type(arg.handle) is int: 2bbcb9 ! # $ % ' ( ) * + , 0 1 2 db

287 # see note below on handling int arguments 

288 prepare_arg[intptr_t](self.data, self.data_addresses, arg.handle, i) 2bbcb9 ! # $ % ' ( ) * + , 0 1 2 db

289 continue 2bbcb9 ! # $ % ' ( ) * + , 0 1 2 db

290 else: 

291 # it's a CUdeviceptr: 

292 self.data_addresses[i] = <void*><intptr_t>(arg.handle.getPtr()) 

293 continue 

294 elif arg_type is bool: 2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

295 prepare_arg[cpp_bool](self.data, self.data_addresses, arg, i) 1bcdej.

296 continue 1bcdej.

297 elif arg_type is int: 2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

298 # Here's the dilemma: We want to have a fast path to pass in Python 

299 # integers as pointer addresses, but one could also (mistakenly) pass 

300 # it with the intention of passing a scalar integer. It's a mistake 

301 # bacause a Python int is ambiguous (arbitrary width). Our judgement 

302 # call here is to treat it as a pointer address, without any warning! 

303 prepare_arg[intptr_t](self.data, self.data_addresses, arg, i) 2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` { | } ~ ab. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

304 continue 2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` { | } ~ ab. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

305 elif arg_type is float: 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSR8UVT-6543ZYXWCP

306 prepare_arg[double](self.data, self.data_addresses, arg, i) 1-

307 continue 1-

308 elif arg_type is complex: 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSR8UVT6543ZYXWCP

309 prepare_arg[cpp_double_complex](self.data, self.data_addresses, arg, i) 18

310 continue 18

311 elif arg_type is tensor_map_descriptor_type: 1Ostbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

312 prepare_tensor_map_arg(self.data, self.data_addresses, <TensorMapDescriptor>arg, i) 

313 continue 

314  

315 not_prepared = prepare_numpy_arg(self.data, self.data_addresses, arg, i) 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

316 if not_prepared: 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

317 not_prepared = prepare_ctypes_arg(self.data, self.data_addresses, arg, i) 1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

318 if not_prepared: 1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

319 # TODO: revisit this treatment if we decide to cythonize cuda.core 

320 if arg_type is driver.CUgraphConditionalHandle: 1stbcklmnuvdeopqrwxyzfghijaA

321 prepare_arg[cydriver.CUgraphConditionalHandle](self.data, self.data_addresses, <intptr_t>int(arg), i) 

322 continue 

323 elif arg_type is GraphCondition: 1stbcklmnuvdeopqrwxyzfghijaA

324 prepare_arg[cydriver.CUgraphConditionalHandle]( 1stbcklmnuvdeopqrwxyzfghij

325 self.data, self.data_addresses, 

326 <intptr_t><unsigned long long>(<GraphCondition>arg)._c_handle, i) 1stbcklmnuvdeopqrwxyzfghij

327 continue 1stbcklmnuvdeopqrwxyzfghij

328 # If no exact types are found, fallback to slower `isinstance` check 

329 elif isinstance(arg, Buffer): 1aA

330 if isinstance(arg.handle, int): 

331 prepare_arg[intptr_t](self.data, self.data_addresses, arg.handle, i) 

332 continue 

333 else: 

334 self.data_addresses[i] = <void*><intptr_t>(arg.handle.getPtr()) 

335 continue 

336 elif isinstance(arg, bool): 1aA

337 prepare_arg[cpp_bool](self.data, self.data_addresses, arg, i) 

338 continue 

339 elif isinstance(arg, int): 1aA

340 prepare_arg[intptr_t](self.data, self.data_addresses, arg, i) 1a

341 continue 1a

342 elif isinstance(arg, float): 1aA

343 prepare_arg[double](self.data, self.data_addresses, arg, i) 1a

344 continue 1a

345 elif isinstance(arg, complex): 1aA

346 prepare_arg[cpp_double_complex](self.data, self.data_addresses, arg, i) 1a

347 continue 1a

348 elif isinstance(arg, driver.CUgraphConditionalHandle): 1A

349 prepare_arg[cydriver.CUgraphConditionalHandle](self.data, self.data_addresses, arg, i) 

350 continue 

351 elif isinstance(arg, GraphCondition): 1OA

352 prepare_arg[cydriver.CUgraphConditionalHandle]( 

353 self.data, self.data_addresses, 

354 <intptr_t><unsigned long long>(<GraphCondition>arg)._c_handle, i) 

355 continue 

356 # TODO: support ctypes/numpy struct 

357 raise TypeError("the argument is of unsupported type: " + str(type(arg))) 1A

358  

359 self.kernel_args = kernel_args 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

360 self.ptr = <intptr_t>self.data_addresses.data() 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

361  

362 def __dealloc__(self) -> None: 

363 for data in self.data: 2/ ebfb: gbhbs t b c k l m n u v d e o p q r w x y z f g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbbbcbIbJbKbLbMb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` NbObPbQbRbj { | } ~ Sb0 1 2 abTbB Q a A Ub. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P dbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

364 if data: 2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

365 PyMem_Free(data) 2O / : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db