Coverage for cuda/core/utils/_program_cache/_keys.py: 90.20%
245 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5"""Cache-key construction.
7A backend-strategy hierarchy (:class:`_KeyBackend`) owns the per-code-type
8guard / fingerprint / version-probe logic; :func:`make_program_cache_key`
9dispatches to the right backend and assembles the digest.
10"""
12from __future__ import annotations
14import abc
15import collections.abc
16import hashlib
17from typing import Any, Callable, Sequence
19# Mutual-dependency contract: this module imports ProgramOptions from
20# cuda.core._program at module level, and cuda.core._program imports
21# ProgramCacheResource / make_program_cache_key from cuda.core.utils
22# only via deferred imports inside ``Program.compile``. Adding a
23# top-level ``from cuda.core.utils import ...`` to _program.pyx would
24# turn this into a real import cycle -- keep the import in _program.pyx
25# deferred (or import the symbols from the leaf submodule directly).
26from cuda.core._program import ProgramOptions
27from cuda.core._utils.cuda_utils import (
28 driver as _driver,
29)
30from cuda.core._utils.cuda_utils import (
31 handle_return as _handle_return,
32)
33from cuda.core._utils.cuda_utils import (
34 nvrtc as _nvrtc,
35)
37# Bump when the key schema changes in a way that invalidates existing caches.
38_KEY_SCHEMA_VERSION = 2
40_VALID_CODE_TYPES = frozenset({"c++", "ptx", "nvvm"})
41_VALID_TARGET_TYPES = frozenset({"ptx", "cubin", "ltoir"})
43# code_type -> allowed target_type set, mirroring Program.compile's
44# SUPPORTED_TARGETS matrix in _program.pyx.
45_SUPPORTED_TARGETS_BY_CODE_TYPE = {
46 "c++": frozenset({"ptx", "cubin", "ltoir"}),
47 "ptx": frozenset({"cubin", "ptx"}),
48 "nvvm": frozenset({"ptx", "ltoir"}),
49}
52# Map each ProgramOptions field that reaches the Linker via
53# _translate_program_options (see cuda_core/cuda/core/_program.pyx) to the
54# gate the Linker uses to turn it into a flag (see
55# ``_prepare_nvjitlink_options`` and ``_prepare_driver_options`` in
56# _linker.pyx). All other fields on ProgramOptions are NVRTC-only and must
57# NOT perturb a PTX cache key: a PTX compile with a shared ProgramOptions
58# that happens to set include_path/pch/frandom_seed would otherwise miss the
59# cache unnecessarily. Collapsing inputs through these gates means
60# semantically-equivalent configurations (``debug=False`` vs ``None``,
61# ``time=True`` vs ``time="path"``) hash to the same cache key instead of
62# forcing spurious misses. Single source of truth: every reader iterates
63# this dict, so adding a field here is enough -- there is no parallel
64# field-name list to keep in sync.
65def _gate_presence(v: Any) -> bool:
66 return v is not None 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
69def _gate_truthy(v: Any) -> bool:
70 return bool(v) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
73def _gate_is_true(v: Any) -> bool:
74 return v is True 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
77def _gate_tristate_bool(v: Any) -> bool | None:
78 return None if v is None else bool(v) 1mnopqrsDbtuvwxyzAEFGHIJKghcdefBC
81def _gate_identity(v: Any) -> Any:
82 return v 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
85def _gate_ptxas_options(v: Any) -> Any:
86 # ``_prepare_nvjitlink_options`` emits one ``-Xptxas=<s>`` per element, and
87 # treats ``str`` as a single-element sequence. Canonicalize to a tuple so
88 # ``"-v"`` / ``["-v"]`` / ``("-v",)`` all hash the same. An empty sequence
89 # emits no flags, so collapse it to ``None`` too.
90 #
91 # Order is preserved on purpose: ptxas accepts ordering-sensitive flag
92 # pairs (e.g. ``-O2`` after ``-O3`` lowers the active level), so
93 # ``["-v", "-O2"]`` and ``["-O2", "-v"]`` are not guaranteed to produce
94 # identical bytes. We accept the spurious miss when callers reorder
95 # flags; treating order as semantic keeps the cache safe in the
96 # ordering-sensitive case.
97 if v is None: 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
98 return None 1mnopqrsaDbtuvwxyijklzAEFGHIJKcdBC
99 if isinstance(v, str): 1ghcdef
100 return ("-Xptxas=" + v,) 1ef
101 if isinstance(v, collections.abc.Sequence): 1ghcdef
102 if len(v) == 0: 1ghcdef
103 return None 1gcd
104 return tuple(f"-Xptxas={s}" for s in v) 1hef
105 return v
108_LINKER_FIELD_GATES = {
109 "name": _gate_identity,
110 "arch": _gate_identity,
111 "max_register_count": _gate_identity,
112 "time": _gate_presence, # linker emits ``-time`` iff value is not None
113 "link_time_optimization": _gate_truthy,
114 "debug": _gate_truthy,
115 "lineinfo": _gate_truthy,
116 "ftz": _gate_tristate_bool,
117 "prec_div": _gate_tristate_bool,
118 "prec_sqrt": _gate_tristate_bool,
119 "fma": _gate_tristate_bool,
120 "split_compile": _gate_identity,
121 "ptxas_options": _gate_ptxas_options,
122 "no_cache": _gate_is_true,
123}
126# LinkerOptions fields the ``cuLink`` driver backend silently ignores
127# (emits only a DeprecationWarning; no actual flag reaches the compiler).
128# When the driver backend is active, collapse them to a single sentinel in
129# the fingerprint so nvJitLink<->driver parity of ``ObjectCode`` doesn't
130# cause cache misses from otherwise-equivalent configurations.
131_DRIVER_IGNORED_LINKER_FIELDS = frozenset({"ftz", "prec_div", "prec_sqrt", "fma"})
134def _linker_option_fingerprint(options: ProgramOptions, *, use_driver_linker: bool | None) -> list[bytes]:
135 """Backend-aware fingerprint of ProgramOptions fields consumed by the Linker.
137 Each field passes through the gate the Linker itself uses so equivalent
138 inputs (e.g. ``debug=False`` / ``None``) hash to the same bytes. When
139 the driver (cuLink) linker backend is in use, fields it silently
140 ignores collapse to one sentinel so those options don't perturb the
141 key on driver-backed hosts either. ``use_driver_linker=None`` means we
142 couldn't probe the backend; we don't collapse driver-ignored fields in
143 that case, to stay conservative.
144 """
145 parts = [] 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
146 driver_ignored = use_driver_linker is True 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
147 for name, gate in _LINKER_FIELD_GATES.items(): 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
148 if driver_ignored and name in _DRIVER_IGNORED_LINKER_FIELDS: 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
149 parts.append(f"{name}=<driver-ignored>".encode()) 1aijkl
150 continue 1aijkl
151 gated = gate(getattr(options, name, None)) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
152 parts.append(f"{name}={gated!r}".encode()) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
153 return parts 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
156# ProgramOptions fields that map to LinkerOptions fields the cuLink (driver)
157# backend rejects outright (see _prepare_driver_options in _linker.pyx).
158# ``split_compile_extended`` exists on LinkerOptions but is not exposed via
159# ProgramOptions / _translate_program_options, so it cannot reach the driver
160# linker from the cache path and is omitted here.
161_DRIVER_LINKER_UNSUPPORTED_FIELDS = ("time", "ptxas_options", "split_compile")
164def _driver_version() -> int:
165 return int(_handle_return(_driver.cuDriverGetVersion())) 1aijkl
168def _nvrtc_version() -> tuple[int, int]:
169 major, minor = _handle_return(_nvrtc.nvrtcVersion()) 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
170 return int(major), int(minor) 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
173def _linker_backend_and_version(use_driver: bool) -> tuple[str, str]:
174 """Return ``(backend, version)`` for the linker used on PTX inputs.
176 ``use_driver`` is the result of ``_decide_nvjitlink_or_driver()`` and
177 must be passed in so a single ``make_program_cache_key`` call shares
178 one probe across :meth:`_LinkerBackend.validate`,
179 :meth:`option_fingerprint`, and :meth:`hash_version_probe` (otherwise
180 a transient probe flap could write inconsistent fields into the same
181 key).
183 Raises any underlying probe exception. ``make_program_cache_key`` catches
184 and mixes the exception's class name into the digest, so the same probe
185 failure produces the same key across processes -- the cache stays
186 persistent in broken environments, while never sharing a key with a
187 working probe (``_probe_failed`` label vs. ``driver``/``nvrtc``/...).
189 nvJitLink version lookup goes through ``sys.modules`` first so we hit the
190 same module ``_decide_nvjitlink_or_driver()`` already loaded. That keeps
191 fingerprinting aligned with whichever ``cuda.bindings.nvjitlink`` import
192 path the linker actually uses.
193 """
194 import sys 1mnopqrsabtuvwxyijklzAghcdefBC
196 if use_driver: 1mnopqrsabtuvwxyijklzAghcdefBC
197 return ("driver", str(_driver_version())) 1aijkl
198 nvjitlink = sys.modules.get("cuda.bindings.nvjitlink") 1mnopqrsbtuvwxyzAghcdefBC
199 if nvjitlink is None: 1mnopqrsbtuvwxyzAghcdefBC
200 from cuda.bindings import nvjitlink as _nvjitlink
202 nvjitlink = _nvjitlink
204 return ("nvJitLink", str(nvjitlink.version())) 1mnopqrsbtuvwxyzAghcdefBC
207def _nvvm_fingerprint() -> str:
208 """Stable identifier for the loaded NVVM toolchain.
210 Combines the libNVVM library version (``module.version()``) with the IR
211 version reported by ``module.ir_version()``. The library version is the
212 primary invalidation lever: a libNVVM patch upgrade can change codegen
213 while keeping the same IR major/minor, so keying only on the IR pair
214 would silently reuse stale entries. Paired with cuda-core, the IR pair
215 adds defence in depth without making the key any less stable.
217 Both calls go through ``_get_nvvm_module()`` so this fingerprint follows
218 the same availability / cuda-bindings-version gate that real NVVM
219 compilation does -- if NVVM is unusable at compile time, the probe
220 fails the same way and ``_probe`` mixes the failure label into the key.
221 """
222 from cuda.core._program import _get_nvvm_module 2_ { | } ~ abbb` ^ cb
224 module = _get_nvvm_module() 2_ { | } ~ abbb` ^ cb
225 lib_major, lib_minor = module.version() # type: ignore[attr-defined] 2_ { | } ~ abbb` ^ cb
226 major, minor, debug_major, debug_minor = module.ir_version() # type: ignore[attr-defined] 2_ { | } ~ abbb` ^ cb
227 return f"lib={lib_major}.{lib_minor};ir={major}.{minor}.{debug_major}.{debug_minor}" 2_ { | } ~ abbb` ^ cb
230# ProgramOptions fields that reference external files whose *contents* the
231# cache key cannot observe without reading the filesystem. Callers that set
232# any of these must supply an ``extra_digest`` covering the dependency surface
233# (e.g. a hash over all reachable headers / PCH bytes).
234_EXTERNAL_CONTENT_OPTIONS = (
235 "include_path",
236 "pre_include",
237 "pch",
238 "use_pch",
239 "pch_dir",
240)
242# ProgramOptions fields whose compilation effect is not captured in the
243# returned ``ObjectCode`` -- they produce a filesystem artifact as a side
244# effect. A cache hit skips compilation, so that artifact would never be
245# written. Reject these outright: the persistent cache is for pure ObjectCode
246# reuse, not for replaying compile-time side effects.
247# * create_pch -- writes a PCH file (NVRTC).
248# * time -- writes NVRTC timing info to a file.
249# * fdevice_time_trace -- writes a device-compilation time trace file (NVRTC).
250# These are all NVRTC-specific; the Linker's ``-time`` logs to the info log
251# (not a file) and NVVM explicitly rejects all three at compile time. The
252# side-effect guard is therefore gated on ``backend == "nvrtc"`` below.
253_SIDE_EFFECT_OPTIONS = ("create_pch", "time", "fdevice_time_trace")
256# ProgramOptions fields gated by plain truthiness in ``_program.pyx`` (the
257# compiler writes the flag only when the value is truthy).
258_BOOLEAN_OPTION_FIELDS = frozenset({"pch"})
260# Fields whose compiler emission requires ``isinstance(value, str)`` or a
261# non-empty sequence; anything else (``False``, ``int``, ``None``, ``[]``)
262# is silently ignored at compile time.
263_STR_OR_SEQUENCE_OPTION_FIELDS = frozenset({"include_path", "pre_include"})
266def _option_is_set(options: ProgramOptions, name: str) -> bool:
267 """Match how ``_program.pyx`` gates option emission, per field shape.
269 - Boolean flags (``pch``): truthy only.
270 - str-or-sequence fields (``include_path``, ``pre_include``): ``str``
271 (including empty) or a non-empty ``collections.abc.Sequence`` (list,
272 tuple, range, user subclass, ...); everything else (``False``, ``int``,
273 empty sequence, ``None``) is ignored by the compiler and must not
274 trigger a cache-time guard.
275 - Path/string-shaped fields (``create_pch``, ``time``,
276 ``fdevice_time_trace``, ``use_pch``, ``pch_dir``): ``is not None`` --
277 the compiler emits ``--flag=<value>`` for any non-None value, so
278 ``False`` / ``""`` / ``0`` must still count as set.
279 """
280 value = getattr(options, name, None) 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbVbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' ( ) * + , - . / : nbCbUb; = ?
281 if value is None: 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbVbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' ( ) * + , - . / : nbCbUb; = ?
282 return False 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbVbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' ( ) * + , - . / : nbCbUb; = ?
283 if name in _BOOLEAN_OPTION_FIELDS: 2M N O P Q R obmbxbrbsbtbpbqbubvbMbNbObPbQbRbSbTbnbUb
284 return bool(value) 2xb
285 if name in _STR_OR_SEQUENCE_OPTION_FIELDS: 2M N O P Q R obmbrbsbtbpbqbubvbMbNbObPbQbRbSbTbnbUb
286 # Mirror ``_prepare_nvrtc_options_impl``: it checks ``isinstance(v, str)``
287 # first, then ``is_sequence(v)`` (which is ``isinstance(v, Sequence)``).
288 # We therefore accept any ``collections.abc.Sequence`` (range, deque,
289 # user subclass, etc.), not just list/tuple.
290 if isinstance(value, str): 2M N O P Q R obmbpbqbnb
291 return True 2obpbqb
292 if isinstance(value, collections.abc.Sequence): 2M N O P Q R mbnb
293 return len(value) > 0 2M N O Q mbnb
294 return False 1PR
295 return True 2rbsbtbubvbMbNbObPbQbRbSbTbUb
298def _hash_probe_failure(update: Callable[[str, bytes], None], label: str, exc: BaseException) -> None:
299 """Mix a probe failure into the digest under a stable, content-free label.
301 Hashing only the exception's CLASS NAME (not its message) keeps the
302 digest stable across repeated calls within one process (e.g. NVVM's
303 loader reports different messages on first vs. cached-failure attempts)
304 AND across processes that hit the same failure mode. The
305 ``_probe_failed`` label differs from every backend's success label, so a
306 broken environment never collides with a working one -- the cache
307 "fails closed" between broken and working environments while staying
308 persistent within either.
309 """
310 update(f"{label}_probe_failed", type(exc).__name__.encode()) 1abT
313class _KeyBackend(abc.ABC):
314 """Strategy for deriving the cache key for one ``Program`` ``code_type``.
316 Each subclass owns the backend-specific guard logic, code coercion,
317 option fingerprinting, name-expression handling, version probing, and
318 extra-payload hashing. The orchestrator :func:`make_program_cache_key`
319 validates the code_type / target_type pair, dispatches to the right
320 backend, and assembles the digest.
321 """
323 @abc.abstractmethod
324 def validate(self, options: ProgramOptions, target_type: str, extra_digest: bytes | None) -> None:
325 """Reject inputs the cache cannot key safely.
327 Raises ``ValueError`` for options that have compile-time side
328 effects, options that pull in external file content the cache
329 can't observe, or any other backend-specific invariants.
330 """
332 def encode_code(self, code: object, code_type: str) -> bytes:
333 """Coerce ``code`` to bytes. Default rejects bytes-like input
334 (only NVVM accepts it; ``Program()`` does the same)."""
335 if isinstance(code, str): 2W M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 a D b T t u 7 v U S 8 w x 9 ! # $ % jby i j k l z A E F G H I J K g h c d e f kbeblbwbfbgbhbib' B ( ) C * + , - . / : ; = ?
336 return code.encode("utf-8") 2W M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 a D b T t u 7 v U S 8 w x 9 ! # $ % jby i j k l z A E F G H I J K g h c d e f ebfbgbhbib' B ( ) C * + , - . / : ; = ?
337 if isinstance(code, (bytes, bytearray)): 2kblbwb
338 raise TypeError( 2lbwb
339 f"code must be str for code_type={code_type!r}; bytes/bytearray are only accepted for code_type='nvvm'."
340 )
341 raise TypeError(f"code must be str or bytes, got {type(code).__name__}") 2kb
343 @abc.abstractmethod
344 def option_fingerprint(self, options: ProgramOptions, target_type: str) -> list[bytes]:
345 """Fingerprint of the ``ProgramOptions`` fields that reach the compiler."""
347 def encode_name_expressions(self, name_expressions: Sequence[Any]) -> tuple[bytes, ...] | None: # noqa: ARG002
348 """Sorted, type-tagged name expressions, or ``None`` if the
349 backend does not consume them.
351 ``None`` means the orchestrator emits no ``names_count`` /
352 ``name`` entries at all (a backend that ignores
353 ``name_expressions`` should never have them perturb its key). An
354 empty tuple means the backend supports them but the caller
355 passed none -- the orchestrator still emits ``names_count=0`` so
356 the schema is stable across "absent" and "empty".
357 """
358 return None 2_ m n o p q r s { a D b | t } u v ~ w x abbbdb` ^ y i j k l z A E F G H I J K g h c d e f B cbC
360 @abc.abstractmethod
361 def hash_version_probe(self, update: Callable[[str, bytes], None]) -> None:
362 """Mix the runtime/compiler version probe into the digest via
363 ``update(label, payload)``. On probe failure, mix
364 ``_hash_probe_failure(update, "<label>", exc)`` instead so the
365 digest is stable across processes hitting the same failure
366 mode.
367 """
369 def hash_extra_payload(self, options: ProgramOptions, update: Callable[[str, bytes], None]) -> None: # noqa: B027 1LWMNOPQRmnopq@[XYZrs012V]3456aDbTtu7vUS8wx9!#$%yijklzAEFGHIJKghcdef'B()C*+,-./:;=?
370 """Mix backend-specific extras (e.g. NVVM ``extra_sources`` /
371 ``use_libdevice``). Default: nothing.
372 """
375class _NvrtcBackend(_KeyBackend):
376 def validate(self, options: ProgramOptions, target_type: str, extra_digest: bytes | None) -> None: # noqa: ARG002
377 # Side-effect options are NVRTC-specific:
378 # ``time``/``fdevice_time_trace`` write artifacts via NVRTC,
379 # ``create_pch`` writes via NVRTC. The Linker's ``-time`` logs to
380 # the info log (not a file), and NVVM explicitly rejects all three
381 # at compile time, so the side-effect guard is meaningful only for
382 # the NVRTC path.
383 side_effects = [name for name in _SIDE_EFFECT_OPTIONS if _option_is_set(options, name)] 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbVbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' ( ) * + , - . / : nbCbUb; = ?
384 if side_effects: 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbVbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' ( ) * + , - . / : nbCbUb; = ?
385 raise ValueError( 2MbNbObPbQbRbSbTbUb
386 f"make_program_cache_key() refuses to build a key for options that "
387 f"have compile-time side effects ({', '.join(side_effects)}); a "
388 f"cache hit skips compilation, so the side effect would not occur. "
389 f"Disable the option, or compile directly without the cache."
390 )
391 # ``extra_sources`` is NVVM-only -- ``Program`` raises for non-NVVM
392 # backends (_program.pyx). Reject here so callers get the same
393 # error from the cache-key path as from a real compile.
394 if getattr(options, "extra_sources", None) is not None: 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbVbfbgbhbibybzbAbBb' ( ) * + , - . / : nbCb; = ?
395 raise ValueError( 2Vb
396 "extra_sources is only valid for code_type='nvvm'; Program() rejects it for code_type='c++'."
397 )
398 if extra_digest is None: 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbfbgbhbibybzbAbBb' ( ) * + , - . / : nbCb; = ?
399 # ``Program.compile`` for PTX inputs runs
400 # ``_translate_program_options``, which drops these entirely;
401 # NVVM rejects them. Only NVRTC reads the external content.
402 external = [name for name in _EXTERNAL_CONTENT_OPTIONS if _option_is_set(options, name)] 2W M N O P Q R X Y Z 0 1 2 V 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbfbgbhbibybzbAbBb' ( ) * + , - . / : nbCb; = ?
403 if external: 2W M N O P Q R X Y Z 0 1 2 V 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbobmbxbrbsbtbpbqbubvbfbgbhbibybzbAbBb' ( ) * + , - . / : nbCb; = ?
404 raise ValueError( 2obmbxbrbsbtbpbqbubvbnb
405 f"make_program_cache_key() refuses to build a key for options that "
406 f"pull in external file content ({', '.join(external)}) without an "
407 f"extra_digest; compute a digest over the header/PCH bytes the "
408 f"compile will read and pass it as extra_digest=..."
409 )
410 # NVRTC uses ``options.name`` as the source filename and
411 # resolves quoted ``#include "x.h"`` directives relative to
412 # the directory component of that name. The directory's
413 # contents are external to anything else the key observes,
414 # so a name with a directory component requires the same
415 # ``extra_digest`` treatment as ``include_path`` etc.
416 options_name = getattr(options, "name", None) 2W M N O P Q R X Y Z 0 1 2 V 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbfbgbhbibybzbAbBb' ( ) * + , - . / : Cb; = ?
417 if isinstance(options_name, str) and ("/" in options_name or "\\" in options_name): 2W M N O P Q R X Y Z 0 1 2 V 3 4 5 6 T 7 U S 8 9 ! # $ % jbkbeblbfbgbhbibybzbAbBb' ( ) * + , - . / : Cb; = ?
418 raise ValueError( 2ybzbAbBbCb
419 f"make_program_cache_key() refuses to build a key for options.name="
420 f"{options_name!r} (NVRTC source-filename with a directory "
421 f"component) without an extra_digest; NVRTC resolves quoted "
422 f"#include directives relative to that directory, so a digest "
423 f"covering the headers it may pull in must be supplied."
424 )
426 def option_fingerprint(self, options: ProgramOptions, target_type: str) -> list[bytes]:
427 # ``ProgramOptions.as_bytes("nvrtc", ...)`` gives the real
428 # compile-time flag surface for NVRTC.
429 return options.as_bytes("nvrtc", target_type) 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % jbebfbgbhbib' ( ) * + , - . / : ; = ?
431 def encode_name_expressions(self, name_expressions: Sequence[Any]) -> tuple[bytes, ...]:
432 # ``"foo"`` and ``b"foo"`` get distinct tags because
433 # ``Program.compile`` records the original Python object as the
434 # ``ObjectCode.symbol_mapping`` key, so a cached ObjectCode whose
435 # mapping-key type differs from what the caller's later
436 # ``get_kernel`` passes would silently miss. Reject ``bytearray``
437 # because ``Program.compile`` also uses the raw element as a dict
438 # key -- bytearray is unhashable, so a cache miss would compile
439 # then crash in ``symbol_mapping[n] = ...``. Accepting it here
440 # would let the cache serve hits for inputs the uncached path
441 # can't handle.
442 def _tag(n: Any) -> bytes: 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % ebfbgbhbib' ( ) * + , - . / : ; = ?
443 if isinstance(n, bytes): 2U S ebfbgbhbib
444 return b"b:" + n 1S
445 if isinstance(n, str): 2U S ebfbgbhbib
446 return b"s:" + n.encode("utf-8") 2U S ebfbgbhbib
447 if isinstance(n, bytearray): 2ebfbgbhbib
448 raise TypeError( 2eb
449 "name_expressions elements must be str or bytes; "
450 "bytearray is not accepted because Program.compile uses "
451 "each element as a dict key and bytearray is unhashable."
452 )
453 raise TypeError(f"name_expressions elements must be str or bytes; got {type(n).__name__}") 2fbgbhbib
455 return tuple(sorted(_tag(n) for n in name_expressions)) 2W M N O P Q R @ [ X Y Z 0 1 2 V ] 3 4 5 6 T 7 U S 8 9 ! # $ % ebfbgbhbib' ( ) * + , - . / : ; = ?
457 def hash_version_probe(self, update: Callable[[str, bytes], None]) -> None:
458 try: 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
459 major, minor = _nvrtc_version() 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
460 except Exception as exc: 1T
461 _hash_probe_failure(update, "nvrtc", exc) 1T
462 return 1T
463 update("nvrtc", f"{major}.{minor}".encode("ascii")) 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
466_DECISION_UNSET = object()
469class _LinkerBackend(_KeyBackend):
470 def __init__(self) -> None:
471 # Cache the linker-backend decision (and any probe failure) for
472 # the duration of one ``make_program_cache_key`` call so
473 # ``validate``, ``option_fingerprint``, and ``hash_version_probe``
474 # all see the same answer; a transient probe flap mid-call
475 # otherwise mints a key whose option fingerprint and version
476 # probe disagree on which linker is in use.
477 self._cached_decision = _DECISION_UNSET 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbWbB C
478 self._cached_decision_exc: BaseException | None = None 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbWbB C
480 def _decide_driver(self) -> bool | None:
481 """``True`` if the cuLink driver linker will be used, ``False`` if
482 nvJitLink, ``None`` if the probe failed (in which case
483 :meth:`hash_version_probe` mixes a ``_probe_failed`` taint into
484 the digest instead of a backend label).
485 """
486 if self._cached_decision is _DECISION_UNSET: 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbB C
487 try: 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbB C
488 from cuda.core._linker import _decide_nvjitlink_or_driver 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbB C
490 self._cached_decision = _decide_nvjitlink_or_driver() 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbB C
491 except Exception as exc:
492 self._cached_decision = None
493 self._cached_decision_exc = exc
494 return self._cached_decision # type: ignore[return-value] 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbB C
496 def validate(self, options: ProgramOptions, target_type: str, extra_digest: bytes | None) -> None: # noqa: ARG002
497 if getattr(options, "extra_sources", None) is not None: 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbWbB C
498 raise ValueError( 2Wb
499 "extra_sources is only valid for code_type='nvvm'; Program() rejects it for code_type='ptx'."
500 )
501 # PTX compiles go through the Linker. When the driver (cuLink)
502 # backend is selected (nvJitLink unavailable), ``Program.compile``
503 # rejects a subset of options that nvJitLink would accept; reject
504 # them here too so we never store a key for a compilation that
505 # can't succeed in this environment. If the probe fails we can't
506 # tell which backend will run, so skip -- the failed-probe taint
507 # in ``hash_version_probe`` already poisons the key.
508 if self._decide_driver() is True: 2m n o p q r s a D b t u v w x y i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbwbB C
509 # Mirror ``_prepare_driver_options``'s exact gate: ``is not
510 # None`` for these fields, so ``time=False`` or
511 # ``ptxas_options=[]`` is still a rejection. Do NOT use the
512 # truthiness-based ``_option_is_set`` helper here.
513 unsupported = [ 2a i j k l DbEbFbGbHbIbJbKbLb
514 name for name in _DRIVER_LINKER_UNSUPPORTED_FIELDS if getattr(options, name, None) is not None
515 ]
516 if unsupported: 2a i j k l DbEbFbGbHbIbJbKbLb
517 raise ValueError( 2DbEbFbGbHbIbJbKbLb
518 f"the cuLink driver linker does not support these options: "
519 f"{', '.join(unsupported)}; Program.compile() would reject this "
520 f"configuration before producing an ObjectCode."
521 )
523 def option_fingerprint(self, options: ProgramOptions, target_type: str) -> list[bytes]: # noqa: ARG002
524 # For PTX inputs the Linker reads only a subset of ProgramOptions
525 # (see ``_translate_program_options`` in _program.pyx); fingerprint
526 # just those fields so shared ProgramOptions carrying NVRTC-only
527 # flags (``include_path``, ``pch_*``, ``frandom_seed``, ...) don't
528 # force spurious cache misses on PTX.
529 return _linker_option_fingerprint(options, use_driver_linker=self._decide_driver()) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
531 def hash_version_probe(self, update: Callable[[str, bytes], None]) -> None:
532 # Only cuLink (driver-backed linker) goes through the CUDA driver
533 # for codegen. nvJitLink is a separate library, so a driver
534 # upgrade under it does not change the compiled bytes -- skip the
535 # driver version there. ``_linker_backend_and_version`` already
536 # returns the driver version when the driver backend is active,
537 # so the bytes are still in the digest via ``linker_version``.
538 use_driver = self._decide_driver() 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
539 if use_driver is None: 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
540 assert self._cached_decision_exc is not None
541 _hash_probe_failure(update, "linker", self._cached_decision_exc)
542 return
543 try: 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
544 lb_name, lb_version = _linker_backend_and_version(use_driver) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
545 except Exception as exc: 1ab
546 _hash_probe_failure(update, "linker", exc) 1ab
547 return 1ab
548 update("linker_backend", lb_name.encode("ascii")) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
549 update("linker_version", lb_version.encode("ascii")) 1mnopqrsaDbtuvwxyijklzAEFGHIJKghcdefBC
552class _NvvmBackend(_KeyBackend):
553 def encode_code(self, code: object, code_type: str) -> bytes: # noqa: ARG002
554 # NVVM accepts both str and bytes (matching ``Program()``).
555 if isinstance(code, str): 2_ { | } ~ abbbdb` ^ cb
556 return code.encode("utf-8") 2_ { | } ~ abbbdb` ^ cb
557 if isinstance(code, (bytes, bytearray)): 1_
558 return bytes(code) 1_
559 raise TypeError(f"code must be str or bytes, got {type(code).__name__}")
561 def validate(self, options: ProgramOptions, target_type: str, extra_digest: bytes | None) -> None: # noqa: ARG002
562 # NVVM with ``use_libdevice=True`` reads external libdevice
563 # bitcode at compile time (see Program_init in _program.pyx). The
564 # file is resolved from the active toolkit, so a changed
565 # CUDA_HOME / libdevice upgrade changes the linked output without
566 # touching any key input the cache can observe. Require the
567 # caller to supply an ``extra_digest`` that fingerprints the
568 # libdevice bytes (or simply disable use_libdevice for
569 # caching-sensitive workflows).
570 if extra_digest is None and getattr(options, "use_libdevice", None): 2_ { | } ~ abbbdb` ^ cb
571 raise ValueError( 1^
572 "make_program_cache_key() refuses to build an NVVM key with "
573 "use_libdevice=True and no extra_digest: the linked libdevice "
574 "bitcode can change out from under a cached ObjectCode. Pass an "
575 "extra_digest that fingerprints the libdevice file you intend "
576 "to link against, or disable use_libdevice."
577 )
579 def option_fingerprint(self, options: ProgramOptions, target_type: str) -> list[bytes]:
580 return options.as_bytes("nvvm", target_type) 2_ { | } ~ abbbdb` ^ cb
582 def hash_version_probe(self, update: Callable[[str, bytes], None]) -> None:
583 try: 2_ { | } ~ abbbdb` ^ cb
584 fp = _nvvm_fingerprint() 2_ { | } ~ abbbdb` ^ cb
585 except Exception as exc:
586 _hash_probe_failure(update, "nvvm", exc)
587 return
588 update("nvvm", fp.encode("ascii")) 2_ { | } ~ abbbdb` ^ cb
590 def hash_extra_payload(self, options: ProgramOptions, update: Callable[[str, bytes], None]) -> None:
591 extra_sources = getattr(options, "extra_sources", None) 2_ { | } ~ abbbdb` ^ cb
592 if extra_sources: 2_ { | } ~ abbbdb` ^ cb
593 # ``extra_sources`` is hashed in caller-provided order on purpose.
594 # NVVM module linking is order-dependent in the general case
595 # (overlapping symbols, weak definitions, definition order can
596 # change which body wins), so canonicalising by sorting on the
597 # source name would produce the same key for two compiles whose
598 # outputs may legitimately differ. If a future test proves the
599 # relevant input subset is order-insensitive, sorting can be
600 # introduced under that proof; absent that proof, preserving
601 # caller order is the safe default.
602 update("extra_sources_count", str(len(extra_sources)).encode("ascii"))
603 for item in extra_sources:
604 # ``extra_sources`` is a sequence of (name, source) tuples.
605 if isinstance(item, (tuple, list)) and len(item) == 2:
606 name, src = item
607 update("extra_source_name", str(name).encode("utf-8"))
608 if isinstance(src, str):
609 update("extra_source_code", src.encode("utf-8"))
610 elif isinstance(src, (bytes, bytearray)):
611 update("extra_source_code", bytes(src))
612 else:
613 update("extra_source_code", str(src).encode("utf-8"))
614 else:
615 # Fallback for unexpected format.
616 update("extra_source", str(item).encode("utf-8"))
617 # ``use_libdevice`` is gated on truthiness to match Program_init's
618 # gate -- ``False`` and ``None`` collapse to the same key.
619 if getattr(options, "use_libdevice", None): 2_ { | } ~ abbbdb` ^ cb
620 update("use_libdevice", b"1") 1`^
623# Class registry keyed by code_type. ``make_program_cache_key`` instantiates
624# fresh per call so backends like ``_LinkerBackend`` can cache per-call probe
625# results on ``self`` without leaking that state across calls.
626_BACKENDS_BY_CODE_TYPE: dict[str, type[_KeyBackend]] = {
627 "c++": _NvrtcBackend,
628 "ptx": _LinkerBackend,
629 "nvvm": _NvvmBackend,
630}
633def make_program_cache_key(
634 *,
635 code: str | bytes,
636 code_type: str,
637 options: ProgramOptions,
638 target_type: str,
639 name_expressions: Sequence[str | bytes | bytearray] = (),
640 extra_digest: bytes | None = None,
641) -> bytes:
642 """Build a stable cache key from compile inputs.
644 Parameters
645 ----------
646 code:
647 Source text. ``str`` is encoded as UTF-8.
648 code_type:
649 One of ``"c++"``, ``"ptx"``, ``"nvvm"``.
650 options:
651 A :class:`cuda.core.ProgramOptions`. Its ``arch`` must be set (the
652 default ``ProgramOptions.__post_init__`` populates it from the current
653 device).
654 target_type:
655 One of ``"ptx"``, ``"cubin"``, ``"ltoir"``.
656 name_expressions:
657 Optional iterable of mangled-name lookups. Order is not significant.
658 Elements may be ``str`` or ``bytes``; ``"foo"`` and ``b"foo"`` produce
659 distinct keys because ``Program.compile`` records the original Python
660 object as the ``ObjectCode.symbol_mapping`` key, and ``get_kernel``
661 lookups must use the same type the cache key recorded. ``bytearray``
662 is rejected because ``Program.compile`` stores each element as a
663 dict key and ``bytearray`` is unhashable.
664 extra_digest:
665 Caller-supplied bytes mixed into the key. Required whenever
666 :class:`cuda.core.ProgramOptions` sets any option that pulls in
667 external file content (``include_path``, ``pre_include``, ``pch``,
668 ``use_pch``, ``pch_dir``) -- the cache cannot read those files on
669 the caller's behalf, so the caller must fingerprint the header /
670 PCH surface and pass it here. Callers may pass this for other
671 inputs too (embedded kernels, generated sources, etc.).
673 Returns
674 -------
675 bytes
676 An opaque bytes digest suitable for use as a cache key.
678 Raises
679 ------
680 ValueError
681 If ``options`` sets an option with compile-time side effects (such
682 as ``create_pch``) -- a cache hit skips compilation, so the side
683 effect would not occur.
684 ValueError
685 If ``extra_digest`` is ``None`` while ``options`` sets any option
686 whose compilation effect depends on external file content that the
687 key cannot otherwise observe.
689 Examples
690 --------
691 For most workflows you should not call ``make_program_cache_key``
692 yourself -- pass ``cache=`` to :meth:`cuda.core.Program.compile`,
693 which derives the key, returns the cached
694 :class:`~cuda.core.ObjectCode` on hit, and stores the compile
695 result on miss::
697 from cuda.core import Program, ProgramOptions
698 from cuda.core.utils import FileStreamProgramCache
700 source = 'extern "C" __global__ void k(int *a){ *a = 1; }'
701 options = ProgramOptions(arch="sm_80")
703 with FileStreamProgramCache() as cache:
704 obj = Program(source, "c++", options=options).compile("cubin", cache=cache)
706 Call ``make_program_cache_key`` directly when the compile inputs
707 require an ``extra_digest`` (the cache cannot read external file
708 content on the caller's behalf) -- ``Program.compile(cache=...)``
709 refuses those inputs with a ``ValueError`` pointing here::
711 from cuda.core import ObjectCode
712 from cuda.core.utils import FileStreamProgramCache, make_program_cache_key
714 with FileStreamProgramCache() as cache:
715 key = make_program_cache_key(
716 code=source,
717 code_type="c++",
718 options=options,
719 target_type="cubin",
720 extra_digest=fingerprint_headers(options.include_path),
721 )
722 data = cache.get(key)
723 if data is None:
724 obj = Program(source, "c++", options=options).compile("cubin")
725 cache[key] = obj # extracts bytes(obj.code)
726 else:
727 obj = ObjectCode.from_cubin(data)
729 The cache stores raw binary bytes -- cubin / PTX / LTO-IR with no
730 pickle, JSON, or framing -- so entry files are directly consumable
731 by external NVIDIA tools (``cuobjdump``, ``nvdisasm``, ...). Note
732 that an :class:`~cuda.core.ObjectCode` round-tripped through the
733 cache loses ``symbol_mapping``: callers that compile with
734 ``name_expressions`` and rely on ``get_kernel(name_expression)``
735 after a cache hit must either compile fresh or look up the mangled
736 symbol explicitly.
738 Options that read external files (``include_path``, ``pre_include``,
739 ``pch``, ``use_pch``, ``pch_dir``; ``use_libdevice=True`` on the NVVM
740 path; and on NVRTC, an ``options.name`` with a directory component,
741 which NVRTC uses for relative-include resolution) require
742 ``extra_digest`` -- fingerprint the bytes the compiler will pull in
743 and pass that digest so changes to those files force a cache miss.
744 Options that have compile-time side effects (``create_pch``,
745 ``time``, ``fdevice_time_trace``) cannot be cached and raise
746 ``ValueError``; compile directly, or disable the flag, for those
747 cases.
748 """
749 # Mirror Program.compile (_program.pyx lowercases code_type at Program
750 # init and target_type at the top of compile); a caller that passes
751 # "PTX" or "C++" must get the same routing and the same cache key as
752 # the lowercase form.
753 code_type = code_type.lower() if isinstance(code_type, str) else code_type 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbXbYb0bZbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
754 target_type = target_type.lower() if isinstance(target_type, str) else target_type 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbXbYb0bZbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
755 if code_type not in _VALID_CODE_TYPES: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbXbYb0bZbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
756 raise ValueError(f"code_type={code_type!r} is not supported (must be one of {sorted(_VALID_CODE_TYPES)})") 20b
757 if target_type not in _VALID_TARGET_TYPES: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbXbYbZbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
758 raise ValueError(f"target_type={target_type!r} is not supported (must be one of {sorted(_VALID_TARGET_TYPES)})") 2Zb
759 supported_for_code = _SUPPORTED_TARGETS_BY_CODE_TYPE[code_type] 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbXbYbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
760 if target_type not in supported_for_code: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbXbYbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
761 raise ValueError( 2XbYb
762 f"target_type={target_type!r} is not valid for code_type={code_type!r}"
763 f" (supported: {sorted(supported_for_code)}). Program.compile() rejects"
764 f" this combination, so caching a key for it is meaningless."
765 )
767 backend = _BACKENDS_BY_CODE_TYPE[code_type]() 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
768 backend.validate(options, target_type, extra_digest) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f DbEbFbGbHbIbJbKbLbkbeblbwbobmbxbrbsbtbpbqbubvbVbWbfbgbhbibybzbAbBbMbNbObPbQbRbSbTb' B ( cb) C * + , - . / : nbCbUb; = ?
770 code_bytes = backend.encode_code(code, code_type) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f kbeblbwbfbgbhbib' B ( cb) C * + , - . / : ; = ?
771 option_bytes = backend.option_fingerprint(options, target_type) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ jby i j k l z A E F G H I J K g h c d e f ebfbgbhbib' B ( cb) C * + , - . / : ; = ?
772 name_tags = backend.encode_name_expressions(name_expressions) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ebfbgbhbib' B ( cb) C * + , - . / : ; = ?
774 # IMPORTANT: Must use a FIPS-approved hash algorithm (SHA-2 family).
775 # FIPS-enforcing systems can disable non-approved hashlib algorithms
776 # (for example blake2b) at the OpenSSL level. See #2043.
777 hasher = hashlib.sha256(usedforsecurity=False) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
779 def _update(label: str, payload: bytes) -> None: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
780 hasher.update(label.encode("ascii")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
781 hasher.update(len(payload).to_bytes(8, "big")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
782 hasher.update(payload) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
784 _update("schema", str(_KEY_SCHEMA_VERSION).encode("ascii")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
785 backend.hash_version_probe(_update) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
786 _update("code_type", code_type.encode("ascii")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
787 _update("target_type", target_type.encode("ascii")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
788 _update("code", code_bytes) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
789 _update("option_count", str(len(option_bytes)).encode("ascii")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
790 for opt in option_bytes: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
791 _update("option", bytes(opt)) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
792 if name_tags is not None: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
793 # ``encode_name_expressions`` returns ``None`` from backends that
794 # ignore name_expressions and a (possibly-empty) tuple from those
795 # that consume them. Hashing ``names_count=0`` for the latter
796 # keeps the schema stable across "absent" and "empty" inputs.
797 _update("names_count", str(len(name_tags)).encode("ascii")) 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
798 for n in name_tags: 1WMNOPQR@[XYZ012V]3456T7US89!#$%'()*+,-./:;=?
799 _update("name", n) 1US
800 backend.hash_extra_payload(options, _update) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
802 # ``Program.compile()`` propagates ``options.name`` onto the returned
803 # ObjectCode, so two compiles identical in everything but name produce
804 # ObjectCodes that differ in their public ``name`` attribute. The key
805 # must reflect that or a cache hit could hand back an entry with the
806 # wrong name. Universal across backends. PTX additionally hashes
807 # ``name`` via ``_linker_option_fingerprint`` (the linker reads it),
808 # so for the linker path the value is mixed in twice under
809 # different labels. The redundancy is harmless -- distinct labels
810 # mean it cannot collide -- and the universal hash here keeps the
811 # ``options.name`` invariant in one place rather than per-backend.
812 options_name = getattr(options, "name", None) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
813 if options_name is not None: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
814 _update("options_name", str(options_name).encode("utf-8")) 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
816 if extra_digest is not None: 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?
817 _update("extra_digest", bytes(extra_digest)) 1@[V]`^
819 return hasher.digest() 2W _ M N O P Q R m n o p q @ [ X Y Z r s 0 1 2 V ] 3 4 5 6 { a D b T | t } u 7 v U S 8 ~ w x 9 ! # $ % abbbdb` ^ y i j k l z A E F G H I J K g h c d e f ' B ( cb) C * + , - . / : ; = ?