Coverage for cuda / pathfinder / _dynamic_libs / load_dl_linux.py: 75%
103 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
4import contextlib
5import ctypes
6import ctypes.util
7import os
8from typing import cast
10from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
11from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
12 LIBNAMES_REQUIRING_RTLD_DEEPBIND,
13 SUPPORTED_LINUX_SONAMES,
14)
16CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL
19def _load_libdl() -> ctypes.CDLL:
20 # In normal glibc-based Linux environments, find_library("dl") should return
21 # something like "libdl.so.2". In minimal or stripped-down environments
22 # (no ldconfig/gcc, incomplete linker cache), this can return None even
23 # though libdl is present. In that case, we fall back to the stable SONAME.
24 name = ctypes.util.find_library("dl") or "libdl.so.2"
25 try:
26 return ctypes.CDLL(name)
27 except OSError as e:
28 raise RuntimeError(f"Could not load {name!r} (required for dlinfo/dlerror on Linux)") from e
31LIBDL = _load_libdl()
33# dlinfo
34LIBDL.dlinfo.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p]
35LIBDL.dlinfo.restype = ctypes.c_int
37# dlerror (thread-local error string; cleared after read)
38LIBDL.dlerror.argtypes = []
39LIBDL.dlerror.restype = ctypes.c_char_p
41# First appeared in 2004-era glibc. Universally correct on Linux for all practical purposes.
42RTLD_DI_LINKMAP = 2
43RTLD_DI_ORIGIN = 6
46class _LinkMapLNameView(ctypes.Structure):
47 """
48 Prefix-only view of glibc's `struct link_map` used **solely** to read `l_name`.
50 Background:
51 - `dlinfo(handle, RTLD_DI_LINKMAP, ...)` returns a `struct link_map*`.
52 - The first few members of `struct link_map` (including `l_name`) have been
53 stable on glibc for decades and are documented as debugger-visible.
54 - We only need the offset/layout of `l_name`, not the full struct.
56 Safety constraints:
57 - This is a **partial** definition (prefix). It must only be used via a pointer
58 returned by `dlinfo(...)`.
59 - Do **not** instantiate it or pass it **by value** to any C function.
60 - Do **not** access any members beyond those declared here.
61 - Do **not** rely on `ctypes.sizeof(LinkMapPrefix)` for allocation.
63 Rationale:
64 - Defining only the leading fields avoids depending on internal/unstable
65 tail members while keeping code more readable than raw pointer arithmetic.
66 """
68 _fields_ = (
69 ("l_addr", ctypes.c_void_p), # ElfW(Addr)
70 ("l_name", ctypes.c_char_p), # char*
71 )
74# Defensive assertions, mainly to document the invariants we depend on
75assert _LinkMapLNameView.l_addr.offset == 0
76assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p)
79def _dl_last_error() -> str | None:
80 msg_bytes = cast(bytes | None, LIBDL.dlerror())
81 if not msg_bytes:
82 return None # no pending error
83 # Never raises; undecodable bytes are mapped to U+DC80..U+DCFF
84 return msg_bytes.decode("utf-8", "surrogateescape")
87def l_name_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str:
88 lm_view = ctypes.POINTER(_LinkMapLNameView)()
89 rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_LINKMAP, ctypes.byref(lm_view))
90 if rc != 0:
91 err = _dl_last_error()
92 raise OSError(f"dlinfo failed for {libname=!r} (rc={rc})" + (f": {err}" if err else ""))
93 if not lm_view: # NULL link_map**
94 raise OSError(f"dlinfo returned NULL link_map pointer for {libname=!r}")
96 l_name_bytes = lm_view.contents.l_name
97 if not l_name_bytes:
98 raise OSError(f"dlinfo returned empty link_map->l_name for {libname=!r}")
100 path = os.fsdecode(l_name_bytes)
101 if not path:
102 raise OSError(f"dlinfo returned empty l_name string for {libname=!r}")
104 return path
107def l_origin_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str:
108 l_origin_buf = ctypes.create_string_buffer(4096)
109 rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_ORIGIN, l_origin_buf)
110 if rc != 0:
111 err = _dl_last_error()
112 raise OSError(f"dlinfo failed for {libname=!r} (rc={rc})" + (f": {err}" if err else ""))
114 path = os.fsdecode(l_origin_buf.value)
115 if not path:
116 raise OSError(f"dlinfo returned empty l_origin string for {libname=!r}")
118 return path
121def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str:
122 l_name = l_name_for_dynamic_library(libname, handle)
123 l_origin = l_origin_for_dynamic_library(libname, handle)
124 return os.path.join(l_origin, os.path.basename(l_name))
127def get_candidate_sonames(libname: str) -> list[str]:
128 # Reverse tabulated names to achieve new → old search order.
129 candidate_sonames = list(reversed(SUPPORTED_LINUX_SONAMES.get(libname, ())))
130 candidate_sonames.append(f"lib{libname}.so")
131 return candidate_sonames
134def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> LoadedDL | None:
135 for soname in get_candidate_sonames(libname):
136 try:
137 handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD)
138 except OSError:
139 continue
140 else:
141 return LoadedDL(
142 abs_path_for_dynamic_library(libname, handle), True, handle._handle, "was-already-loaded-from-elsewhere"
143 )
144 return None
147def _load_lib(libname: str, filename: str) -> ctypes.CDLL:
148 cdll_mode = CDLL_MODE
149 if libname in LIBNAMES_REQUIRING_RTLD_DEEPBIND:
150 cdll_mode |= os.RTLD_DEEPBIND
151 return ctypes.CDLL(filename, cdll_mode)
154def load_with_system_search(libname: str) -> LoadedDL | None:
155 """Try to load a library using system search paths.
157 Args:
158 libname: The name of the library to load
160 Returns:
161 A LoadedDL object if successful, None if the library cannot be loaded
163 Raises:
164 RuntimeError: If the library is loaded but no expected symbol is found
165 """
166 for soname in get_candidate_sonames(libname):
167 try:
168 handle = _load_lib(libname, soname)
169 except OSError:
170 pass
171 else:
172 abs_path = abs_path_for_dynamic_library(libname, handle)
173 if abs_path is None:
174 raise RuntimeError(f"No expected symbol for {libname=!r}")
175 return LoadedDL(abs_path, False, handle._handle, "system-search")
176 return None
179def _work_around_known_bugs(libname: str, found_path: str) -> None:
180 if libname == "nvrtc":
181 # Work around bug/oversight in
182 # nvidia_cuda_nvrtc-13.0.48-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl
183 # Issue: libnvrtc.so.13 RUNPATH is not set.
184 # This workaround is highly specific
185 # - for simplicity.
186 # - to not mask bugs in future nvidia-cuda-nvrtc releases.
187 # - because a more general workaround is complicated.
188 dirname, basename = os.path.split(found_path)
189 if basename == "libnvrtc.so.13":
190 dep_basename = "libnvrtc-builtins.so.13.0"
191 dep_path = os.path.join(dirname, dep_basename)
192 if os.path.isfile(dep_path):
193 # In case of failure, defer to primary load, which is almost certain to fail, too.
194 with contextlib.suppress(OSError):
195 ctypes.CDLL(dep_path, CDLL_MODE)
198def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL:
199 """Load a dynamic library from the given path.
201 Args:
202 libname: The name of the library to load
203 found_path: The absolute path to the library file
205 Returns:
206 A LoadedDL object representing the loaded library
208 Raises:
209 RuntimeError: If the library cannot be loaded
210 """
211 _work_around_known_bugs(libname, found_path)
212 try:
213 handle = _load_lib(libname, found_path)
214 except OSError as e:
215 raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e
216 return LoadedDL(found_path, False, handle._handle, found_via)