Coverage for cuda / pathfinder / _dynamic_libs / load_nvidia_dynamic_lib.py: 98.82%
85 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 01:05 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 01:05 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
4from __future__ import annotations
6import functools
7import json
8import struct
9import sys
10from typing import TYPE_CHECKING
12from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json
13from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
14from cuda.pathfinder._dynamic_libs.load_dl_common import (
15 DynamicLibNotAvailableError,
16 DynamicLibNotFoundError,
17 DynamicLibUnknownError,
18 LoadedDL,
19 load_dependencies,
20)
21from cuda.pathfinder._dynamic_libs.platform_loader import LOADER
22from cuda.pathfinder._dynamic_libs.search_steps import (
23 EARLY_FIND_STEPS,
24 LATE_FIND_STEPS,
25 SearchContext,
26 derive_ctk_root,
27 find_via_ctk_root,
28 run_find_steps,
29)
30from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
31from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process
33if TYPE_CHECKING:
34 from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor
36# All libnames recognized by load_nvidia_dynamic_lib, across all categories
37# (CTK, third-party, driver).
38_ALL_KNOWN_LIBNAMES: frozenset[str] = frozenset(LIB_DESCRIPTORS)
39_ALL_SUPPORTED_LIBNAMES: frozenset[str] = frozenset(
40 name for name, desc in LIB_DESCRIPTORS.items() if (desc.windows_dlls if IS_WINDOWS else desc.linux_sonames)
41)
42_PLATFORM_NAME = "Windows" if IS_WINDOWS else "Linux"
44# Driver libraries: shipped with the NVIDIA display driver, always on the
45# system linker path. These skip all CTK search steps (site-packages,
46# conda, CUDA_HOME, canary) and go straight to system search.
47_DRIVER_ONLY_LIBNAMES = frozenset(name for name, desc in LIB_DESCRIPTORS.items() if desc.packaged_with == "driver")
50def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL:
51 """Load an NVIDIA driver library (system-search only).
53 Driver libs (libcuda, libnvidia-ml) are part of the display driver, not
54 the CUDA Toolkit. They are always on the system linker path, so the
55 full CTK search cascade (site-packages, conda, CUDA_HOME, canary) is
56 unnecessary.
57 """
58 loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, False) 1awxyE
59 if loaded is not None: 1awxyE
60 return loaded 1E
61 loaded = LOADER.load_with_system_search(desc) 1awxy
62 if loaded is not None: 1awxy
63 return loaded 1awx
64 raise DynamicLibNotFoundError( 1y
65 f'"{desc.name}" is an NVIDIA driver library and can only be found via'
66 f" system search. Ensure the NVIDIA display driver is installed."
67 )
70@functools.cache
71def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
72 """Resolve a canary library's absolute path in a spawned child process."""
73 result = run_in_spawned_child_process( 1ICsmtnuojp
74 probe_canary_abs_path_and_print_json,
75 args=(libname,),
76 timeout=10.0,
77 rethrow=True,
78 )
80 # Use the final non-empty line in case earlier output lines are emitted.
81 lines = [line for line in result.stdout.splitlines() if line.strip()] 1Csmtnuojp
82 if not lines: 1Csmtnuojp
83 raise RuntimeError(f"Canary probe child process produced no stdout payload for {libname!r}") 1C
84 try: 1smtnuojp
85 payload = json.loads(lines[-1]) 1smtnuojp
86 except json.JSONDecodeError: 1s
87 raise RuntimeError( 1s
88 f"Canary probe child process emitted invalid JSON payload for {libname!r}: {lines[-1]!r}"
89 ) from None
90 if isinstance(payload, str): 1mtnuojp
91 return payload 1tuj
92 if payload is None: 1mnojp
93 return None 1nojp
94 raise RuntimeError(f"Canary probe child process emitted unexpected payload for {libname!r}: {payload!r}") 1m
97def _try_ctk_root_canary(ctx: SearchContext) -> str | None:
98 """Try CTK-root canary fallback for descriptor-configured libraries."""
99 for canary_libname in ctx.desc.ctk_root_canary_anchor_libnames: 1qbvrzAc
100 canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) 1qbvrzAc
101 if canary_abs_path is None: 1qbvrzAc
102 continue 1zAc
103 ctk_root = derive_ctk_root(canary_abs_path) 1qbvr
104 if ctk_root is None: 1qbvr
105 continue 1v
106 find = find_via_ctk_root(ctx, ctk_root) 1qbr
107 if find is not None: 1qbr
108 return str(find.abs_path) 1qb
109 return None 1vrzAc
112def _load_lib_no_cache(libname: str) -> LoadedDL:
113 desc = LIB_DESCRIPTORS[libname] 1abhgfFGikclde
115 if libname in _DRIVER_ONLY_LIBNAMES: 1abhgfFGikclde
116 return _load_driver_lib_no_cache(desc) 1aFG
118 ctx = SearchContext(desc) 1abhgfikclde
120 # Phase 1: Try to find the library file on disk (pip wheels, conda).
121 find = run_find_steps(ctx, EARLY_FIND_STEPS) 1abhgfikclde
123 # Phase 2: Cross-cutting — already-loaded check and dependency loading.
124 # The already-loaded check on Windows uses the "have we found a path?"
125 # flag to decide whether to apply AddDllDirectory side-effects.
126 loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, find is not None) 1abhgfikclde
127 load_dependencies(desc, load_nvidia_dynamic_lib) 1abhgfikclde
128 if loaded is not None: 1abhgfikclde
129 return loaded
131 # Phase 3: Load from found path, or fall back to system search + late find.
132 if find is not None: 1abhgfikclde
133 return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via) 1kl
135 loaded = LOADER.load_with_system_search(desc) 1abhgficde
136 if loaded is not None: 1abhgficde
137 return loaded 1afi
139 find = run_find_steps(ctx, LATE_FIND_STEPS) 1abhgcde
140 if find is not None: 1abhgcde
141 return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via) 1ahde
143 if desc.ctk_root_canary_anchor_libnames: 1bgc
144 canary_abs_path = _try_ctk_root_canary(ctx) 1bc
145 if canary_abs_path is not None: 1bc
146 return LOADER.load_with_abs_path(desc, canary_abs_path, "system-ctk-root") 1b
148 ctx.raise_not_found() 1gc
151@functools.cache
152def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
153 """Load an NVIDIA dynamic library by name.
155 Args:
156 libname (str): The short name of the library to load (e.g., ``"cudart"``,
157 ``"nvvm"``, etc.).
159 Returns:
160 LoadedDL: Object containing the OS library handle and absolute path.
162 **Important:**
164 **Never close the returned handle.** Do **not** call ``dlclose`` (Linux) or
165 ``FreeLibrary`` (Windows) on the ``LoadedDL._handle_uint``.
167 **Why:** the return value is cached (``functools.cache``) and shared across the
168 process. Closing the handle can unload the module while other code still uses
169 it, leading to crashes or subtle failures.
171 This applies to Linux and Windows. For context, see issue #1011:
172 https://github.com/NVIDIA/cuda-python/issues/1011
174 Raises:
175 DynamicLibUnknownError: If ``libname`` is not a recognized library name.
176 DynamicLibNotAvailableError: If ``libname`` is recognized but not
177 supported on this platform.
178 DynamicLibNotFoundError: If the library cannot be found or loaded.
179 RuntimeError: If Python is not 64-bit.
181 Search order:
182 0. **Already loaded in the current process**
184 - If a matching library is already loaded by some other component,
185 return its absolute path and handle and skip the rest of the search.
187 1. **NVIDIA Python wheels**
189 - Scan installed distributions (``site-packages``) to find libraries
190 shipped in NVIDIA wheels.
192 2. **Conda environment**
194 - Conda installations are discovered via ``CONDA_PREFIX``, which is
195 defined automatically in activated conda environments (see
196 https://docs.conda.io/projects/conda-build/en/stable/user-guide/environment-variables.html).
198 3. **OS default mechanisms**
200 - Fall back to the native loader:
202 - Linux: ``dlopen()``
204 - Windows: ``LoadLibraryW()``
206 - CUDA Toolkit (CTK) system installs with system config updates are often
207 discovered via:
209 - Linux: ``/etc/ld.so.conf.d/*cuda*.conf``
211 - Windows: ``C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\vX.Y\\bin``
212 on the system ``PATH``.
214 4. **Environment variables**
216 - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
218 5. **CTK root canary probe (discoverable libs only)**
220 - For selected libraries whose shared object doesn't reside on the
221 standard linker path (currently ``nvvm``), attempt to derive CTK
222 root by system-loading a well-known CTK canary library in a
223 subprocess and then searching relative to that root.
225 **Driver libraries** (``"cuda"``, ``"nvml"``):
227 These are part of the NVIDIA display driver (not the CUDA Toolkit) and
228 are always on the system linker path. For these libraries the search
229 is simplified to:
231 0. Already loaded in the current process
232 1. OS default mechanisms (``dlopen`` / ``LoadLibraryW``)
234 The CTK-specific steps (site-packages, conda, ``CUDA_HOME``, canary
235 probe) are skipped entirely.
237 Notes:
238 The search is performed **per library**. There is currently no mechanism to
239 guarantee that multiple libraries are all resolved from the same location.
241 """
242 pointer_size_bits = struct.calcsize("P") * 8 1afBHDde
243 if pointer_size_bits != 64: 1afBHDde
244 raise RuntimeError( 1H
245 f"cuda.pathfinder.load_nvidia_dynamic_lib() requires 64-bit Python."
246 f" Currently running: {pointer_size_bits}-bit Python"
247 f" {sys.version_info.major}.{sys.version_info.minor}"
248 )
249 if libname not in _ALL_KNOWN_LIBNAMES: 1afBDde
250 raise DynamicLibUnknownError(f"Unknown library name: {libname!r}. Known names: {sorted(_ALL_KNOWN_LIBNAMES)}") 1D
251 if libname not in _ALL_SUPPORTED_LIBNAMES: 1afBde
252 raise DynamicLibNotAvailableError( 1B
253 f"Library name {libname!r} is known but not available on {_PLATFORM_NAME}. "
254 f"Supported names on {_PLATFORM_NAME}: {sorted(_ALL_SUPPORTED_LIBNAMES)}"
255 )
256 return _load_lib_no_cache(libname) 1afde