Coverage for cuda/core/_kernel_arg

3# SPDX-License-Identifier: Apache-2.0

5from cpython.mem cimport PyMem_Malloc, PyMem_Free

6from libc.stdint cimport (intptr_t,

7 int8_t, int16_t, int32_t, int64_t,

8 uint8_t, uint16_t, uint32_t, uint64_t,)

9from libcpp cimport bool as cpp_bool

10from libcpp.complex cimport complex as cpp_complex

11from libcpp cimport nullptr

12from libcpp cimport vector

14import ctypes (empty)

16import numpy (empty)

18from cuda.core._memory import Buffer (empty)

19from cuda.core._tensor_map import TensorMapDescriptor as _TensorMapDescriptor_py (empty)

20from cuda.core._tensor_map cimport TensorMapDescriptor

21from cuda.core.graph._graph_definition cimport GraphCondition

22from cuda.core._utils.cuda_utils import driver (empty)

23from cuda.bindings cimport cydriver

26ctypedef cpp_complex.complex[float] cpp_single_complex

27ctypedef cpp_complex.complex[double] cpp_double_complex

30# We need an identifier for fp16 for copying scalars on the host. This is a minimal

31# implementation borrowed from cuda_fp16.h.

32cdef extern from *:

33 """

34 #if __cplusplus >= 201103L

35 #define __CUDA_ALIGN__(n) alignas(n) /* C++11 kindly gives us a keyword for this */

36 #else

37 #if defined(__GNUC__)

38 #define __CUDA_ALIGN__(n) __attribute__ ((aligned(n)))

39 #elif defined(_MSC_VER)

40 #define __CUDA_ALIGN__(n) __declspec(align(n))

41 #else

42 #define __CUDA_ALIGN__(n)

43 #endif /* defined(__GNUC__) */

44 #endif /* __cplusplus >= 201103L */

46 typedef struct __CUDA_ALIGN__(2) {

47 /**

48 * Storage field contains bits representation of the \p half floating-point number.

49 */

50 unsigned short x;

51 } __half_raw;

52 """

53 ctypedef struct __half_raw:

54 unsigned short x

57ctypedef fused supported_type:

58 cpp_bool

59 int8_t

60 int16_t

61 int32_t

62 int64_t

63 uint8_t

64 uint16_t

65 uint32_t

66 uint64_t

67 __half_raw

68 float

69 double

70 intptr_t

71 cpp_single_complex

72 cpp_double_complex

75# cache ctypes/numpy type objects to avoid attribute access

76cdef object ctypes_bool = ctypes.c_bool (empty)

77cdef object ctypes_int8 = ctypes.c_int8 (empty)

78cdef object ctypes_int16 = ctypes.c_int16 (empty)

79cdef object ctypes_int32 = ctypes.c_int32 (empty)

80cdef object ctypes_int64 = ctypes.c_int64 (empty)

81cdef object ctypes_uint8 = ctypes.c_uint8 (empty)

82cdef object ctypes_uint16 = ctypes.c_uint16 (empty)

83cdef object ctypes_uint32 = ctypes.c_uint32 (empty)

84cdef object ctypes_uint64 = ctypes.c_uint64 (empty)

85cdef object ctypes_float = ctypes.c_float (empty)

86cdef object ctypes_double = ctypes.c_double (empty)

87cdef object numpy_bool = numpy.bool_ (empty)

88cdef object numpy_int8 = numpy.int8 (empty)

89cdef object numpy_int16 = numpy.int16 (empty)

90cdef object numpy_int32 = numpy.int32 (empty)

91cdef object numpy_int64 = numpy.int64 (empty)

92cdef object numpy_uint8 = numpy.uint8 (empty)

93cdef object numpy_uint16 = numpy.uint16 (empty)

94cdef object numpy_uint32 = numpy.uint32 (empty)

95cdef object numpy_uint64 = numpy.uint64 (empty)

96cdef object numpy_float16 = numpy.float16 (empty)

97cdef object numpy_float32 = numpy.float32 (empty)

98cdef object numpy_float64 = numpy.float64 (empty)

99cdef object numpy_complex64 = numpy.complex64 (empty)

100cdef object numpy_complex128 = numpy.complex128 (empty)

101

102

103cdef object tensor_map_descriptor_type = _TensorMapDescriptor_py (empty)

104

105

106# limitation due to cython/cython#534

107ctypedef void* voidptr

108

109

110# Cython can't infer the overload without at least one input argument with fused type

111cdef inline int prepare_arg( (empty)

112 vector.vector[void*]& data,

113 vector.vector[void*]& data_addresses,

114 arg, # important: keep it a Python object and don't cast

115 const size_t idx,

116 const supported_type* __unused=NULL) except -1:

117 cdef void* ptr = PyMem_Malloc(sizeof(supported_type)) 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

118 # note: this should also work once ctypes has complex support:

119 # python/cpython#121248

120 if supported_type is cpp_single_complex:

121 (<supported_type*>ptr)[0] = cpp_complex.complex[float](arg.real, arg.imag) 1 ctx1S

122 elif supported_type is cpp_double_complex:

123 (<supported_type*>ptr)[0] = cpp_complex.complex[double](arg.real, arg.imag) 3 ctx1aR8

124 elif supported_type is __half_raw:

125 (<supported_type*>ptr).x = <int16_t>(arg.view(numpy_int16)) 1 ctx1U

126 else:

127 (<supported_type*>ptr)[0] = <supported_type>(arg) 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

128 data_addresses[idx] = ptr # take the address to the scalar 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

129 data[idx] = ptr # for later dealloc 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

130 return 0 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

131

132

133cdef inline int prepare_tensor_map_arg( (empty)

134 vector.vector[void*]& data,

135 vector.vector[void*]& data_addresses,

136 TensorMapDescriptor arg,

137 const size_t idx) except -1:

138 # cuLaunchKernel copies argument bytes during launch, so a TensorMap

139 # descriptor can point directly at its internal CUtensorMap storage.

140 data_addresses[idx] = arg._get_data_ptr()

141 return 0

142

143

144cdef inline int prepare_ctypes_arg( (empty)

145 vector.vector[void*]& data,

146 vector.vector[void*]& data_addresses,

147 arg,

148 const size_t idx) except -1:

149 cdef object arg_type = type(arg) 41 ctx1OstbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

150 if arg_type is ctypes_bool: 40 ctx1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

151 return prepare_arg[cpp_bool](data, data_addresses, arg.value, idx) 5 ctx1klopN

152 elif arg_type is ctypes_int8: 39 ctx1stbcklmnuvdeopqrwxyzfghijBaAMDEKJLIGFHC

153 return prepare_arg[int8_t](data, data_addresses, arg.value, idx) 1 ctx1M

154 elif arg_type is ctypes_int16: 39 ctx1OstbcklmnuvdeopqrwxyzfghijBaADEKJLIGFHC

155 return prepare_arg[int16_t](data, data_addresses, arg.value, idx) 1 ctx1L

156 elif arg_type is ctypes_int32: 37 ctx1stbcklmnuvdeopqrwxyzfghijBaADEKJIGFHC

157 return prepare_arg[int32_t](data, data_addresses, arg.value, idx) 1 ctx1K

158 elif arg_type is ctypes_int64: 36 ctx1stbcklmnuvdeopqrwxyzfghijBaADEJIGFHC

159 return prepare_arg[int64_t](data, data_addresses, arg.value, idx) 2 ctx1OJ

160 elif arg_type is ctypes_uint8: 35 ctx1stbcklmnuvdeopqrwxyzfghijBaADEIGFHC

161 return prepare_arg[uint8_t](data, data_addresses, arg.value, idx) 1 ctx1I

162 elif arg_type is ctypes_uint16: 34 ctx1stbcklmnuvdeopqrwxyzfghijBaADEGFHC

163 return prepare_arg[uint16_t](data, data_addresses, arg.value, idx) 1 ctx1H

164 elif arg_type is ctypes_uint32: 34 ctx1OstbcklmnuvdeopqrwxyzfghijBaADEGFC

165 return prepare_arg[uint32_t](data, data_addresses, arg.value, idx) 1 ctx1G

166 elif arg_type is ctypes_uint64: 32 ctx1stbcklmnuvdeopqrwxyzfghijBaADEFC

167 return prepare_arg[uint64_t](data, data_addresses, arg.value, idx) 1 ctx1F

168 elif arg_type is ctypes_float: 31 ctx1stbcklmnuvdeopqrwxyzfghijBaADEC

169 return prepare_arg[float](data, data_addresses, arg.value, idx) 1 ctx1E

170 elif arg_type is ctypes_double: 30 ctx1stbcklmnuvdeopqrwxyzfghijBaADC

171 return prepare_arg[double](data, data_addresses, arg.value, idx) 1 ctx1D

172 else:

173 # If no exact types are found, fallback to slower `isinstance` check

174 if isinstance(arg, ctypes_bool): 29 ctx1stbcklmnuvdeopqrwxyzfghijBaAC

175 return prepare_arg[cpp_bool](data, data_addresses, arg.value, idx) 1 ctx1B

176 elif isinstance(arg, ctypes_int8): 29 ctx1stbcklmnuvdeopqrwxyzfghijBaAC

177 return prepare_arg[int8_t](data, data_addresses, arg.value, idx)

178 elif isinstance(arg, ctypes_int16): 29 ctx1stbcklmnuvdeopqrwxyzfghijBaAC

179 return prepare_arg[int16_t](data, data_addresses, arg.value, idx) (empty)

180 elif isinstance(arg, ctypes_int32): 29 ctx1stbcklmnuvdeopqrwxyzfghijBaAC

181 return prepare_arg[int32_t](data, data_addresses, arg.value, idx) 2 ctx1BC

182 elif isinstance(arg, ctypes_int64): 28 ctx1stbcklmnuvdeopqrwxyzfghijBaA

183 return prepare_arg[int64_t](data, data_addresses, arg.value, idx)

184 elif isinstance(arg, ctypes_uint8): 28 ctx1stbcklmnuvdeopqrwxyzfghijBaA

185 return prepare_arg[uint8_t](data, data_addresses, arg.value, idx) (empty)

186 elif isinstance(arg, ctypes_uint16): 28 ctx1stbcklmnuvdeopqrwxyzfghijBaA

187 return prepare_arg[uint16_t](data, data_addresses, arg.value, idx)

188 elif isinstance(arg, ctypes_uint32): 28 ctx1stbcklmnuvdeopqrwxyzfghijBaA

189 return prepare_arg[uint32_t](data, data_addresses, arg.value, idx)

190 elif isinstance(arg, ctypes_uint64): 28 ctx1stbcklmnuvdeopqrwxyzfghijBaA

191 return prepare_arg[uint64_t](data, data_addresses, arg.value, idx)

192 elif isinstance(arg, ctypes_float): 28 ctx1stbcklmnuvdeopqrwxyzfghijBaA

193 return prepare_arg[float](data, data_addresses, arg.value, idx) 1 ctx1B

194 elif isinstance(arg, ctypes_double): 28 ctx1OstbcklmnuvdeopqrwxyzfghijaA

195 return prepare_arg[double](data, data_addresses, arg.value, idx)

196 else:

197 return 1 27 ctx1stbcklmnuvdeopqrwxyzfghijaA

198

199

200cdef inline int prepare_numpy_arg( (empty)

201 vector.vector[void*]& data,

202 vector.vector[void*]& data_addresses,

203 arg,

204 const size_t idx) except -1:

205 cdef object arg_type = type(arg) 59 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

206 if arg_type is numpy_bool: 59 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

207 return prepare_arg[cpp_bool](data, data_addresses, arg, idx) 5 ctx1mnqr7

208 elif arg_type is numpy_int8: 58 ctx1stbcklmnuvdeopqrwxyzfghij012BQaANMDEKJLIGFHSRUVT6543ZYXWCP

209 return prepare_arg[int8_t](data, data_addresses, arg, idx) 1 ctx16

210 elif arg_type is numpy_int16: 57 ctx1stbcklmnuvdeopqrwxyzfghij012BQaANMDEKJLIGFHSRUVT543ZYXWCP

211 return prepare_arg[int16_t](data, data_addresses, arg, idx) 1 ctx15

212 elif arg_type is numpy_int32: 56 ctx1stbcklmnuvdeopqrwxyzfghij012BQaANMDEKJLIGFHSRUVT43ZYXWCP

213 return prepare_arg[int32_t](data, data_addresses, arg, idx) 5 ctx1O0124

214 elif arg_type is numpy_int64: 52 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVT3ZYXWCP

215 return prepare_arg[int64_t](data, data_addresses, arg, idx) 1 ctx13

216 elif arg_type is numpy_uint8: 51 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTZYXWCP

217 return prepare_arg[uint8_t](data, data_addresses, arg, idx) 1 ctx1Z

218 elif arg_type is numpy_uint16: 50 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTYXWCP

219 return prepare_arg[uint16_t](data, data_addresses, arg, idx) 2 ctx1OY

220 elif arg_type is numpy_uint32: 49 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTXWCP

221 return prepare_arg[uint32_t](data, data_addresses, arg, idx) 1 ctx1X

222 elif arg_type is numpy_uint64: 48 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTWCP

223 return prepare_arg[uint64_t](data, data_addresses, arg, idx) 2 ctx1OW

224 elif arg_type is numpy_float16: 47 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRUVTCP

225 return prepare_arg[__half_raw](data, data_addresses, arg, idx) 1 ctx1U

226 elif arg_type is numpy_float32: 46 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRVTCP

227 return prepare_arg[float](data, data_addresses, arg, idx) 1 ctx1V

228 elif arg_type is numpy_float64: 45 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRTCP

229 return prepare_arg[double](data, data_addresses, arg, idx) 1 ctx1T

230 elif arg_type is numpy_complex64: 44 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHSRCP

231 return prepare_arg[cpp_single_complex](data, data_addresses, arg, idx) 1 ctx1S

232 elif arg_type is numpy_complex128: 44 ctx1OstbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHRCP

233 return prepare_arg[cpp_double_complex](data, data_addresses, arg, idx) 1 ctx1R

234 else:

235 # If no exact types are found, fallback to slower `isinstance` check

236 if isinstance(arg, numpy_bool): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

237 return prepare_arg[cpp_bool](data, data_addresses, arg, idx)

238 elif isinstance(arg, numpy_int8): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

239 return prepare_arg[int8_t](data, data_addresses, arg, idx)

240 elif isinstance(arg, numpy_int16): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

241 return prepare_arg[int16_t](data, data_addresses, arg, idx) (empty)

242 elif isinstance(arg, numpy_int32): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

243 return prepare_arg[int32_t](data, data_addresses, arg, idx) 1 ctx1Q

244 elif isinstance(arg, numpy_int64): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

245 return prepare_arg[int64_t](data, data_addresses, arg, idx)

246 elif isinstance(arg, numpy_uint8): 43 ctx1OstbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

247 return prepare_arg[uint8_t](data, data_addresses, arg, idx)

248 elif isinstance(arg, numpy_uint16): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

249 return prepare_arg[uint16_t](data, data_addresses, arg, idx)

250 elif isinstance(arg, numpy_uint32): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

251 return prepare_arg[uint32_t](data, data_addresses, arg, idx) (empty)

252 elif isinstance(arg, numpy_uint64): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

253 return prepare_arg[uint64_t](data, data_addresses, arg, idx)

254 elif isinstance(arg, numpy_float16): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

255 return prepare_arg[__half_raw](data, data_addresses, arg, idx)

256 elif isinstance(arg, numpy_float32): 42 ctx1stbcklmnuvdeopqrwxyzfghijBQaANMDEKJLIGFHCP

257 return prepare_arg[float](data, data_addresses, arg, idx) 2 ctx1QP

258 elif isinstance(arg, numpy_float64): 41 ctx1OstbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

259 return prepare_arg[double](data, data_addresses, arg, idx)

260 elif isinstance(arg, numpy_complex64): 40 ctx1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

261 return prepare_arg[cpp_single_complex](data, data_addresses, arg, idx)

262 elif isinstance(arg, numpy_complex128): 40 ctx1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

263 return prepare_arg[cpp_double_complex](data, data_addresses, arg, idx) (empty)

264 else:

265 return 1 40 ctx1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

266

267

268cdef class ParamHolder:

269

270 def __init__(self, kernel_args):

271 if len(kernel_args) == 0: 251 ctx2/ ebfb: gbhbs t b c k l m n u v d e o p q r w x y z f g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbbbcbIbJbKbLbMb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` NbObPbQbRbj { | } ~ Sb0 1 2 abTbB Q a A Ub. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P dbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

272 self.ptr = 0 164 ctx2ebfbgbhbf g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbj SbTbUbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

273 return 164 ctx2ebfbgbhbf g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbj SbTbUbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

274

275 cdef size_t n_args = len(kernel_args) 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

276 cdef size_t i

277 cdef int not_prepared

278 cdef object arg_type

279 self.data = vector.vector[voidptr](n_args, nullptr) 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

280 self.data_addresses = vector.vector[voidptr](n_args) 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

281 for i, arg in enumerate(kernel_args): 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

282 arg_type = type(arg) 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

283 if arg_type is Buffer: 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

284 # we need the address of where the actual buffer address is stored

285 if type(arg.handle) is int: 17 ctx2bbcb9 ! # $ % ' ( ) * + , 0 1 2 db

286 # see note below on handling int arguments

287 prepare_arg[intptr_t](self.data, self.data_addresses, arg.handle, i) 17 ctx2bbcb9 ! # $ % ' ( ) * + , 0 1 2 db

288 continue 17 ctx2bbcb9 ! # $ % ' ( ) * + , 0 1 2 db

289 else:

290 # it's a CUdeviceptr:

291 self.data_addresses[i] = <void*><intptr_t>(arg.handle.getPtr())

292 continue (empty)

293 elif arg_type is bool: 89 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

294 prepare_arg[cpp_bool](self.data, self.data_addresses, arg, i) 6 ctx1bcdej.

295 continue 6 ctx1bcdej.

296 elif arg_type is int: 89 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

297 # Here's the dilemma: We want to have a fast path to pass in Python

298 # integers as pointer addresses, but one could also (mistakenly) pass

299 # it with the intention of passing a scalar integer. It's a mistake

300 # bacause a Python int is ambiguous (arbitrary width). Our judgement

301 # call here is to treat it as a pointer address, without any warning!

302 prepare_arg[intptr_t](self.data, self.data_addresses, arg, i) 81 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` { | } ~ ab. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

303 continue 81 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i ; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` { | } ~ ab. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P

304 elif arg_type is float: 61 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSR8UVT-6543ZYXWCP

305 prepare_arg[double](self.data, self.data_addresses, arg, i) 1 ctx1-

306 continue 1 ctx1-

307 elif arg_type is complex: 60 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSR8UVT6543ZYXWCP

308 prepare_arg[cpp_double_complex](self.data, self.data_addresses, arg, i) 1 ctx18

309 continue 1 ctx18

310 elif arg_type is tensor_map_descriptor_type: 59 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

311 prepare_tensor_map_arg(self.data, self.data_addresses, <TensorMapDescriptor>arg, i) (empty)

312 continue

313

314 not_prepared = prepare_numpy_arg(self.data, self.data_addresses, arg, i) 59 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

315 if not_prepared: 59 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

316 not_prepared = prepare_ctypes_arg(self.data, self.data_addresses, arg, i) 40 ctx1stbcklmnuvdeopqrwxyzfghijBaANMDEKJLIGFHC

317 if not_prepared: 59 ctx1stbcklmnuvdeopqrwxyzfghij012BQaA7NMDEKJLIGFHSRUVT6543ZYXWCP

318 # TODO: revisit this treatment if we decide to cythonize cuda.core

319 if arg_type is driver.CUgraphConditionalHandle: 27 ctx1stbcklmnuvdeopqrwxyzfghijaA

320 prepare_arg[cydriver.CUgraphConditionalHandle](self.data, self.data_addresses, <intptr_t>int(arg), i)

321 continue

322 elif arg_type is GraphCondition: 27 ctx1stbcklmnuvdeopqrwxyzfghijaA

323 prepare_arg[cydriver.CUgraphConditionalHandle]( 25 ctx1stbcklmnuvdeopqrwxyzfghij

324 self.data, self.data_addresses,

325 <intptr_t><unsigned long long>(<GraphCondition>arg)._c_handle, i) 25 ctx1stbcklmnuvdeopqrwxyzfghij

326 continue 25 ctx1stbcklmnuvdeopqrwxyzfghij

327 # If no exact types are found, fallback to slower `isinstance` check

328 elif isinstance(arg, Buffer): 2 ctx1aA

329 if isinstance(arg.handle, int):

330 prepare_arg[intptr_t](self.data, self.data_addresses, arg.handle, i)

331 continue

332 else:

333 self.data_addresses[i] = <void*><intptr_t>(arg.handle.getPtr())

334 continue

335 elif isinstance(arg, bool): 2 ctx1aA

336 prepare_arg[cpp_bool](self.data, self.data_addresses, arg, i)

337 continue

338 elif isinstance(arg, int): 3 ctx1OaA

339 prepare_arg[intptr_t](self.data, self.data_addresses, arg, i) 1 ctx1a

340 continue 1 ctx1a

341 elif isinstance(arg, float): 2 ctx1aA

342 prepare_arg[double](self.data, self.data_addresses, arg, i) 1 ctx1a

343 continue 1 ctx1a

344 elif isinstance(arg, complex): 2 ctx1aA

345 prepare_arg[cpp_double_complex](self.data, self.data_addresses, arg, i) 1 ctx1a

346 continue 1 ctx1a

347 elif isinstance(arg, driver.CUgraphConditionalHandle): 1 ctx1A

348 prepare_arg[cydriver.CUgraphConditionalHandle](self.data, self.data_addresses, arg, i)

349 continue

350 elif isinstance(arg, GraphCondition): 1 ctx1A

351 prepare_arg[cydriver.CUgraphConditionalHandle]( (empty)

352 self.data, self.data_addresses,

353 <intptr_t><unsigned long long>(<GraphCondition>arg)._c_handle, i)

354 continue

355 # TODO: support ctypes/numpy struct

356 raise TypeError("the argument is of unsupported type: " + str(type(arg))) 1 ctx1A

357

358 self.kernel_args = kernel_args 92 ctx2O / : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

359 self.ptr = <intptr_t>self.data_addresses.data() 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

360

361 def __dealloc__(self):

362 for data in self.data: 251 ctx2/ ebfb: gbhbs t b c k l m n u v d e o p q r w x y z f g h i ibjbkblbmbnbobpbqbrbsbtbubvbwbxbybzbAbBbCbDbEbFbGbHbbbcbIbJbKbLbMb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` NbObPbQbRbj { | } ~ Sb0 1 2 abTbB Q a A Ub. 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P dbVbWbXbYbZb0b1b2b3b4b5b6b7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b/b:b;b=b?b@b[b]b^b_b`b{b|b}b~bacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcyczcAcBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c

363 if data: 92 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a A . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

364 PyMem_Free(data) 91 ctx2/ : s t b c k l m n u v d e o p q r w x y z f g h i bbcb; = ? @ [ ] 9 ! # $ % ' ( ) * + , ^ _ ` j { | } ~ 0 1 2 abB Q a . 7 N M D E K J L I G F H S R 8 U V T - 6 5 4 3 Z Y X W C P db

Coverage for cuda / core / _kernel_arg_handler.pyx: 85.52%

221 statements