Coverage for cuda/core/experimental/_utils/driver_cu_result

2# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

4# To regenerate the dictionary below run:

5# ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/cuda.h

6# Replace the dictionary below with the output.

7# Also update the CUDA Toolkit version number below.

9# ruff: noqa: E501

10# CUDA Toolkit v13.1.0

11DRIVER_CU_RESULT_EXPLANATIONS = {

12 0: (

13 "The API call returned with no errors. In the case of query calls, this"

14 " also means that the operation being queried is complete (see"

15 " ::cuEventQuery() and ::cuStreamQuery())."

16 ),

17 1: (

18 "This indicates that one or more of the parameters passed to the API call"

19 " is not within an acceptable range of values."

20 ),

21 2: (

22 "The API call failed because it was unable to allocate enough memory or"

23 " other resources to perform the requested operation."

24 ),

25 3: (

26 "This indicates that the CUDA driver has not been initialized with"

27 " ::cuInit() or that initialization has failed."

28 ),

29 4: "This indicates that the CUDA driver is in the process of shutting down.",

30 5: (

31 "This indicates profiler is not initialized for this run. This can"

32 " happen when the application is running with external profiling tools"

33 " like visual profiler."

34 ),

35 6: (

36 "This error return is deprecated as of CUDA 5.0. It is no longer an error"

37 " to attempt to enable/disable the profiling via ::cuProfilerStart or"

38 " ::cuProfilerStop without initialization."

39 ),

40 7: (

41 "This error return is deprecated as of CUDA 5.0. It is no longer an error"

42 " to call cuProfilerStart() when profiling is already enabled."

43 ),

44 8: (

45 "This error return is deprecated as of CUDA 5.0. It is no longer an error"

46 " to call cuProfilerStop() when profiling is already disabled."

47 ),

48 34: (

49 "This indicates that the CUDA driver that the application has loaded is a"

50 " stub library. Applications that run with the stub rather than a real"

51 " driver loaded will result in CUDA API returning this error."

52 ),

53 36: (

54 "This indicates that the API call requires a newer CUDA driver than the one"

55 " currently installed. Users should install an updated NVIDIA CUDA driver"

56 " to allow the API call to succeed."

57 ),

58 46: (

59 "This indicates that requested CUDA device is unavailable at the current"

60 " time. Devices are often unavailable due to use of"

61 " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED."

62 ),

63 100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),

64 101: (

65 "This indicates that the device ordinal supplied by the user does not"

66 " correspond to a valid CUDA device or that the action requested is"

67 " invalid for the specified device."

68 ),

69 102: "This error indicates that the Grid license is not applied.",

70 200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."),

71 201: (

72 "This most frequently indicates that there is no context bound to the"

73 " current thread. This can also be returned if the context passed to an"

74 " API call is not a valid handle (such as a context that has had"

75 " ::cuCtxDestroy() invoked on it). This can also be returned if a user"

76 " mixes different API versions (i.e. 3010 context with 3020 API calls)."

77 " See ::cuCtxGetApiVersion() for more details."

78 " This can also be returned if the green context passed to an API call"

79 " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API."

80 ),

81 202: (

82 "This indicated that the context being supplied as a parameter to the"

83 " API call was already the active context."

84 " This error return is deprecated as of CUDA 3.2. It is no longer an"

85 " error to attempt to push the active context via ::cuCtxPushCurrent()."

86 ),

87 205: "This indicates that a map or register operation has failed.",

88 206: "This indicates that an unmap or unregister operation has failed.",

89 207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),

90 208: "This indicates that the resource is already mapped.",

91 209: (

92 "This indicates that there is no kernel image available that is suitable"

93 " for the device. This can occur when a user specifies code generation"

94 " options for a particular CUDA source file that do not include the"

95 " corresponding device configuration."

96 ),

97 210: "This indicates that a resource has already been acquired.",

98 211: "This indicates that a resource is not mapped.",

99 212: ("This indicates that a mapped resource is not available for access as an array."),

100 213: ("This indicates that a mapped resource is not available for access as a pointer."),

101 214: ("This indicates that an uncorrectable ECC error was detected during execution."),

102 215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."),

103 216: (

104 "This indicates that the ::CUcontext passed to the API call can"

105 " only be bound to a single CPU thread at a time but is already"

106 " bound to a CPU thread."

107 ),

108 217: ("This indicates that peer access is not supported across the given devices."),

109 218: "This indicates that a PTX JIT compilation failed.",

110 219: "This indicates an error with OpenGL or DirectX context.",

111 220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),

112 221: "This indicates that the PTX JIT compiler library was not found.",

113 222: "This indicates that the provided PTX was compiled with an unsupported toolchain.",

114 223: "This indicates that the PTX JIT compilation was disabled.",

115 224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."),

116 225: (

117 "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."

118 ),

119 226: (

120 "This indicates that an exception occurred on the device that is now"

121 " contained by the GPU's error containment capability. Common causes are -"

122 " a. Certain types of invalid accesses of peer GPU memory over nvlink"

123 " b. Certain classes of hardware errors"

124 " This leaves the process in an inconsistent state and any further CUDA"

125 " work will return the same error. To continue using CUDA, the process must"

126 " be terminated and relaunched."

127 ),

128 300: (

129 "This indicates that the device kernel source is invalid. This includes"

130 " compilation/linker errors encountered in device code or user error."

131 ),

132 301: "This indicates that the file specified was not found.",

133 302: "This indicates that a link to a shared object failed to resolve.",

134 303: "This indicates that initialization of a shared object failed.",

135 304: "This indicates that an OS call failed.",

136 400: (

137 "This indicates that a resource handle passed to the API call was not"

138 " valid. Resource handles are opaque types like ::CUstream and ::CUevent."

139 ),

140 401: (

141 "This indicates that a resource required by the API call is not in a"

142 " valid state to perform the requested operation."

143 ),

144 402: (

145 "This indicates an attempt was made to introspect an object in a way that"

146 " would discard semantically important information. This is either due to"

147 " the object using funtionality newer than the API version used to"

148 " introspect it or omission of optional return arguments."

149 ),

150 500: (

151 "This indicates that a named symbol was not found. Examples of symbols"

152 " are global/constant variable names, driver function names, texture names,"

153 " and surface names."

154 ),

155 600: (

156 "This indicates that asynchronous operations issued previously have not"

157 " completed yet. This result is not actually an error, but must be indicated"

158 " differently than ::CUDA_SUCCESS (which indicates completion). Calls that"

159 " may return this value include ::cuEventQuery() and ::cuStreamQuery()."

160 ),

161 700: (

162 "While executing a kernel, the device encountered a"

163 " load or store instruction on an invalid memory address."

164 " This leaves the process in an inconsistent state and any further CUDA work"

165 " will return the same error. To continue using CUDA, the process must be terminated"

166 " and relaunched."

167 ),

168 701: (

169 "This indicates that a launch did not occur because it did not have"

170 " appropriate resources. This error usually indicates that the user has"

171 " attempted to pass too many arguments to the device kernel, or the"

172 " kernel launch specifies too many threads for the kernel's register"

173 " count. Passing arguments of the wrong size (i.e. a 64-bit pointer"

174 " when a 32-bit int is expected) is equivalent to passing too many"

175 " arguments and can also result in this error."

176 ),

177 702: (

178 "This indicates that the device kernel took too long to execute. This can"

179 " only occur if timeouts are enabled - see the device attribute"

180 " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information."

181 " This leaves the process in an inconsistent state and any further CUDA work"

182 " will return the same error. To continue using CUDA, the process must be terminated"

183 " and relaunched."

184 ),

185 703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),

186 704: (

187 "This error indicates that a call to ::cuCtxEnablePeerAccess() is"

188 " trying to re-enable peer access to a context which has already"

189 " had peer access to it enabled."

190 ),

191 705: (

192 "This error indicates that ::cuCtxDisablePeerAccess() is"

193 " trying to disable peer access which has not been enabled yet"

194 " via ::cuCtxEnablePeerAccess()."

195 ),

196 708: ("This error indicates that the primary context for the specified device has already been initialized."),

197 709: (

198 "This error indicates that the context current to the calling thread"

199 " has been destroyed using ::cuCtxDestroy, or is a primary context which"

200 " has not yet been initialized."

201 ),

202 710: (

203 "A device-side assert triggered during kernel execution. The context"

204 " cannot be used anymore, and must be destroyed. All existing device"

205 " memory allocations from this context are invalid and must be"

206 " reconstructed if the program is to continue using CUDA."

207 ),

208 711: (

209 "This error indicates that the hardware resources required to enable"

210 " peer access have been exhausted for one or more of the devices"

211 " passed to ::cuCtxEnablePeerAccess()."

212 ),

213 712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."),

214 713: (

215 "This error indicates that the pointer passed to ::cuMemHostUnregister()"

216 " does not correspond to any currently registered memory region."

217 ),

218 714: (

219 "While executing a kernel, the device encountered a stack error."

220 " This can be due to stack corruption or exceeding the stack size limit."

221 " This leaves the process in an inconsistent state and any further CUDA work"

222 " will return the same error. To continue using CUDA, the process must be terminated"

223 " and relaunched."

224 ),

225 715: (

226 "While executing a kernel, the device encountered an illegal instruction."

227 " This leaves the process in an inconsistent state and any further CUDA work"

228 " will return the same error. To continue using CUDA, the process must be terminated"

229 " and relaunched."

230 ),

231 716: (

232 "While executing a kernel, the device encountered a load or store instruction"

233 " on a memory address which is not aligned."

234 " This leaves the process in an inconsistent state and any further CUDA work"

235 " will return the same error. To continue using CUDA, the process must be terminated"

236 " and relaunched."

237 ),

238 717: (

239 "While executing a kernel, the device encountered an instruction"

240 " which can only operate on memory locations in certain address spaces"

241 " (global, shared, or local), but was supplied a memory address not"

242 " belonging to an allowed address space."

243 " This leaves the process in an inconsistent state and any further CUDA work"

244 " will return the same error. To continue using CUDA, the process must be terminated"

245 " and relaunched."

246 ),

247 718: (

248 "While executing a kernel, the device program counter wrapped its address space."

249 " This leaves the process in an inconsistent state and any further CUDA work"

250 " will return the same error. To continue using CUDA, the process must be terminated"

251 " and relaunched."

252 ),

253 719: (

254 "An exception occurred on the device while executing a kernel. Common"

255 " causes include dereferencing an invalid device pointer and accessing"

256 " out of bounds shared memory. Less common cases can be system specific - more"

257 " information about these cases can be found in the system specific user guide."

258 " This leaves the process in an inconsistent state and any further CUDA work"

259 " will return the same error. To continue using CUDA, the process must be terminated"

260 " and relaunched."

261 ),

262 720: (

263 "This error indicates that the number of blocks launched per grid for a kernel that was"

264 " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice"

265 " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor"

266 " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"

267 " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT."

268 ),

269 721: (

270 "An exception occurred on the device while exiting a kernel using tensor memory: the"

271 " tensor memory was not completely deallocated. This leaves the process in an inconsistent"

272 " state and any further CUDA work will return the same error. To continue using CUDA, the"

273 " process must be terminated and relaunched."

274 ),

275 800: "This error indicates that the attempted operation is not permitted.",

276 801: ("This error indicates that the attempted operation is not supported on the current system or device."),

277 802: (

278 "This error indicates that the system is not yet ready to start any CUDA"

279 " work. To continue using CUDA, verify the system configuration is in a"

280 " valid state and all required driver daemons are actively running."

281 " More information about this error can be found in the system specific"

282 " user guide."

283 ),

284 803: (

285 "This error indicates that there is a mismatch between the versions of"

286 " the display driver and the CUDA driver. Refer to the compatibility documentation"

287 " for supported versions."

288 ),

289 804: (

290 "This error indicates that the system was upgraded to run with forward compatibility"

291 " but the visible hardware detected by CUDA does not support this configuration."

292 " Refer to the compatibility documentation for the supported hardware matrix or ensure"

293 " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"

294 " environment variable."

295 ),

296 805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",

297 806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",

298 807: (

299 "This error indicates that the MPS server is not ready to accept new MPS client requests."

300 " This error can be returned when the MPS server is in the process of recovering from a fatal failure."

301 ),

302 808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",

303 809: "This error indicates the the hardware resources required to support device connections have been exhausted.",

304 810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",

305 811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",

306 812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",

307 900: ("This error indicates that the operation is not permitted when the stream is capturing."),

308 901: (

309 "This error indicates that the current capture sequence on the stream"

310 " has been invalidated due to a previous error."

311 ),

312 902: (

313 "This error indicates that the operation would have resulted in a merge of two independent capture sequences."

314 ),

315 903: "This error indicates that the capture was not initiated in this stream.",

316 904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."),

317 905: (

318 "This error indicates that a dependency would have been created which"

319 " crosses the capture sequence boundary. Only implicit in-stream ordering"

320 " dependencies are allowed to cross the boundary."

321 ),

322 906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."),

323 907: (

324 "This error indicates that the operation is not permitted on an event which"

325 " was last recorded in a capturing stream."

326 ),

327 908: (

328 "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED"

329 " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a"

330 " different thread."

331 ),

332 909: "This error indicates that the timeout specified for the wait operation has lapsed.",

333 910: (

334 "This error indicates that the graph update was not performed because it included"

335 " changes which violated constraints specific to instantiated graph update."

336 ),

337 911: (

338 "This indicates that an async error has occurred in a device outside of CUDA."

339 " If CUDA was waiting for an external device's signal before consuming shared data,"

340 " the external device signaled an error indicating that the data is not valid for"

341 " consumption. This leaves the process in an inconsistent state and any further CUDA"

342 " work will return the same error. To continue using CUDA, the process must be"

343 " terminated and relaunched."

344 ),

345 912: "Indicates a kernel launch error due to cluster misconfiguration.",

346 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),

347 914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),

348 915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),

349 916: ("This error indicates that an error happened during the key rotation sequence."),

350 917: (

351 "This error indicates that the requested operation is not permitted because the"

352 " stream is in a detached state. This can occur if the green context associated"

353 " with the stream has been destroyed, limiting the stream's operational capabilities."

354 ),

355 999: "This indicates that an unknown internal error has occurred.",

356}

Coverage for cuda / core / experimental / _utils / driver_cu_result_explanations.py: 100%

1 statements