Coverage for cuda / pathfinder / _dynamic_libs / load_dl_linux.py: 74.76%

103 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-08 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# SPDX-License-Identifier: Apache-2.0 

3 

4from __future__ import annotations 

5 

6import contextlib 

7import ctypes 

8import ctypes.util 

9import os 

10from typing import TYPE_CHECKING, cast 

11 

12from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL 

13 

14if TYPE_CHECKING: 

15 from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor 

16 

17CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL 

18 

19 

20def _load_libdl() -> ctypes.CDLL: 

21 # In normal glibc-based Linux environments, find_library("dl") should return 

22 # something like "libdl.so.2". In minimal or stripped-down environments 

23 # (no ldconfig/gcc, incomplete linker cache), this can return None even 

24 # though libdl is present. In that case, we fall back to the stable SONAME. 

25 name = ctypes.util.find_library("dl") or "libdl.so.2" 

26 try: 

27 return ctypes.CDLL(name) 

28 except OSError as e: 

29 raise RuntimeError(f"Could not load {name!r} (required for dlinfo/dlerror on Linux)") from e 

30 

31 

32LIBDL = _load_libdl() 

33 

34# dlinfo 

35LIBDL.dlinfo.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p] 

36LIBDL.dlinfo.restype = ctypes.c_int 

37 

38# dlerror (thread-local error string; cleared after read) 

39LIBDL.dlerror.argtypes = [] 

40LIBDL.dlerror.restype = ctypes.c_char_p 

41 

42# First appeared in 2004-era glibc. Universally correct on Linux for all practical purposes. 

43RTLD_DI_LINKMAP = 2 

44RTLD_DI_ORIGIN = 6 

45 

46 

47class _LinkMapLNameView(ctypes.Structure): 

48 """ 

49 Prefix-only view of glibc's `struct link_map` used **solely** to read `l_name`. 

50 

51 Background: 

52 - `dlinfo(handle, RTLD_DI_LINKMAP, ...)` returns a `struct link_map*`. 

53 - The first few members of `struct link_map` (including `l_name`) have been 

54 stable on glibc for decades and are documented as debugger-visible. 

55 - We only need the offset/layout of `l_name`, not the full struct. 

56 

57 Safety constraints: 

58 - This is a **partial** definition (prefix). It must only be used via a pointer 

59 returned by `dlinfo(...)`. 

60 - Do **not** instantiate it or pass it **by value** to any C function. 

61 - Do **not** access any members beyond those declared here. 

62 - Do **not** rely on `ctypes.sizeof(LinkMapPrefix)` for allocation. 

63 

64 Rationale: 

65 - Defining only the leading fields avoids depending on internal/unstable 

66 tail members while keeping code more readable than raw pointer arithmetic. 

67 """ 

68 

69 _fields_ = ( 

70 ("l_addr", ctypes.c_void_p), # ElfW(Addr) 

71 ("l_name", ctypes.c_char_p), # char* 

72 ) 

73 

74 

75# Defensive assertions, mainly to document the invariants we depend on 

76assert _LinkMapLNameView.l_addr.offset == 0 

77assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p) 

78 

79 

80def _dl_last_error() -> str | None: 

81 msg_bytes = cast(bytes | None, LIBDL.dlerror()) 

82 if not msg_bytes: 

83 return None # no pending error 

84 # Never raises; undecodable bytes are mapped to U+DC80..U+DCFF 

85 return msg_bytes.decode("utf-8", "surrogateescape") 

86 

87 

88def l_name_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str: 

89 lm_view = ctypes.POINTER(_LinkMapLNameView)() 1ab

90 rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_LINKMAP, ctypes.byref(lm_view)) 1ab

91 if rc != 0: 1ab

92 err = _dl_last_error() 

93 raise OSError(f"dlinfo failed for {libname=!r} (rc={rc})" + (f": {err}" if err else "")) 

94 if not lm_view: # NULL link_map** 1ab

95 raise OSError(f"dlinfo returned NULL link_map pointer for {libname=!r}") 

96 

97 l_name_bytes = lm_view.contents.l_name 1ab

98 if not l_name_bytes: 1ab

99 raise OSError(f"dlinfo returned empty link_map->l_name for {libname=!r}") 

100 

101 path = os.fsdecode(l_name_bytes) 1ab

102 if not path: 1ab

103 raise OSError(f"dlinfo returned empty l_name string for {libname=!r}") 

104 

105 return path 1ab

106 

107 

108def l_origin_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str: 

109 l_origin_buf = ctypes.create_string_buffer(4096) 1ab

110 rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_ORIGIN, l_origin_buf) 1ab

111 if rc != 0: 1ab

112 err = _dl_last_error() 

113 raise OSError(f"dlinfo failed for {libname=!r} (rc={rc})" + (f": {err}" if err else "")) 

114 

115 path = os.fsdecode(l_origin_buf.value) 1ab

116 if not path: 1ab

117 raise OSError(f"dlinfo returned empty l_origin string for {libname=!r}") 

118 

119 return path 1ab

120 

121 

122def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str: 

123 l_name = l_name_for_dynamic_library(libname, handle) 1ab

124 l_origin = l_origin_for_dynamic_library(libname, handle) 1ab

125 return os.path.join(l_origin, os.path.basename(l_name)) 1ab

126 

127 

128def _candidate_sonames(desc: LibDescriptor) -> list[str]: 

129 # Reverse tabulated names to achieve new -> old search order. 

130 candidates = list(reversed(desc.linux_sonames)) 1abc

131 candidates.append(f"lib{desc.name}.so") 1abc

132 return candidates 1abc

133 

134 

135def check_if_already_loaded_from_elsewhere(desc: LibDescriptor, _have_abs_path: bool) -> LoadedDL | None: 

136 for soname in _candidate_sonames(desc): 1abc

137 try: 1abc

138 handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) 1abc

139 except OSError: 1abc

140 continue 1abc

141 else: 

142 return LoadedDL( 

143 abs_path_for_dynamic_library(desc.name, handle), 

144 True, 

145 handle._handle, 

146 "was-already-loaded-from-elsewhere", 

147 ) 

148 return None 1abc

149 

150 

151def _load_lib(desc: LibDescriptor, filename: str) -> ctypes.CDLL: 

152 cdll_mode = CDLL_MODE 1abc

153 if desc.requires_rtld_deepbind: 1abc

154 cdll_mode |= os.RTLD_DEEPBIND 

155 return ctypes.CDLL(filename, cdll_mode) 1abc

156 

157 

158def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None: 

159 """Try to load a library using system search paths. 

160 

161 Args: 

162 libname: The name of the library to load 

163 

164 Returns: 

165 A LoadedDL object if successful, None if the library cannot be loaded 

166 

167 Raises: 

168 RuntimeError: If the library is loaded but no expected symbol is found 

169 """ 

170 for soname in _candidate_sonames(desc): 1abc

171 try: 1abc

172 handle = _load_lib(desc, soname) 1abc

173 except OSError: 1ac

174 pass 1ac

175 else: 

176 abs_path = abs_path_for_dynamic_library(desc.name, handle) 1ab

177 if abs_path is None: 1ab

178 raise RuntimeError(f"No expected symbol for libname={desc.name!r}") 

179 return LoadedDL(abs_path, False, handle._handle, "system-search") 1ab

180 return None 1ac

181 

182 

183def _work_around_known_bugs(libname: str, found_path: str) -> None: 

184 if libname == "nvrtc": 1ac

185 # Work around bug/oversight in 

186 # nvidia_cuda_nvrtc-13.0.48-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl 

187 # Issue: libnvrtc.so.13 RUNPATH is not set. 

188 # This workaround is highly specific 

189 # - for simplicity. 

190 # - to not mask bugs in future nvidia-cuda-nvrtc releases. 

191 # - because a more general workaround is complicated. 

192 dirname, basename = os.path.split(found_path) 

193 if basename == "libnvrtc.so.13": 

194 dep_basename = "libnvrtc-builtins.so.13.0" 

195 dep_path = os.path.join(dirname, dep_basename) 

196 if os.path.isfile(dep_path): 

197 # In case of failure, defer to primary load, which is almost certain to fail, too. 

198 with contextlib.suppress(OSError): 

199 ctypes.CDLL(dep_path, CDLL_MODE) 

200 

201 

202def load_with_abs_path(desc: LibDescriptor, found_path: str, found_via: str | None = None) -> LoadedDL: 

203 """Load a dynamic library from the given path. 

204 

205 Args: 

206 desc: Descriptor for the library to load. 

207 found_path: The absolute path to the library file. 

208 found_via: Label indicating how the path was discovered. 

209 

210 Returns: 

211 A LoadedDL object representing the loaded library. 

212 

213 Raises: 

214 RuntimeError: If the library cannot be loaded. 

215 """ 

216 _work_around_known_bugs(desc.name, found_path) 1ac

217 try: 1ac

218 handle = _load_lib(desc, found_path) 1ac

219 except OSError as e: 

220 raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e 

221 return LoadedDL(found_path, False, handle._handle, found_via) 1ac