Coverage for cuda / pathfinder / _dynamic_libs / load_dl_linux.py: 75%

103 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-10 01:19 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# SPDX-License-Identifier: Apache-2.0 

3 

4import contextlib 

5import ctypes 

6import ctypes.util 

7import os 

8from typing import cast 

9 

10from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL 

11from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( 

12 LIBNAMES_REQUIRING_RTLD_DEEPBIND, 

13 SUPPORTED_LINUX_SONAMES, 

14) 

15 

16CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL 

17 

18 

19def _load_libdl() -> ctypes.CDLL: 

20 # In normal glibc-based Linux environments, find_library("dl") should return 

21 # something like "libdl.so.2". In minimal or stripped-down environments 

22 # (no ldconfig/gcc, incomplete linker cache), this can return None even 

23 # though libdl is present. In that case, we fall back to the stable SONAME. 

24 name = ctypes.util.find_library("dl") or "libdl.so.2" 

25 try: 

26 return ctypes.CDLL(name) 

27 except OSError as e: 

28 raise RuntimeError(f"Could not load {name!r} (required for dlinfo/dlerror on Linux)") from e 

29 

30 

31LIBDL = _load_libdl() 

32 

33# dlinfo 

34LIBDL.dlinfo.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p] 

35LIBDL.dlinfo.restype = ctypes.c_int 

36 

37# dlerror (thread-local error string; cleared after read) 

38LIBDL.dlerror.argtypes = [] 

39LIBDL.dlerror.restype = ctypes.c_char_p 

40 

41# First appeared in 2004-era glibc. Universally correct on Linux for all practical purposes. 

42RTLD_DI_LINKMAP = 2 

43RTLD_DI_ORIGIN = 6 

44 

45 

46class _LinkMapLNameView(ctypes.Structure): 

47 """ 

48 Prefix-only view of glibc's `struct link_map` used **solely** to read `l_name`. 

49 

50 Background: 

51 - `dlinfo(handle, RTLD_DI_LINKMAP, ...)` returns a `struct link_map*`. 

52 - The first few members of `struct link_map` (including `l_name`) have been 

53 stable on glibc for decades and are documented as debugger-visible. 

54 - We only need the offset/layout of `l_name`, not the full struct. 

55 

56 Safety constraints: 

57 - This is a **partial** definition (prefix). It must only be used via a pointer 

58 returned by `dlinfo(...)`. 

59 - Do **not** instantiate it or pass it **by value** to any C function. 

60 - Do **not** access any members beyond those declared here. 

61 - Do **not** rely on `ctypes.sizeof(LinkMapPrefix)` for allocation. 

62 

63 Rationale: 

64 - Defining only the leading fields avoids depending on internal/unstable 

65 tail members while keeping code more readable than raw pointer arithmetic. 

66 """ 

67 

68 _fields_ = ( 

69 ("l_addr", ctypes.c_void_p), # ElfW(Addr) 

70 ("l_name", ctypes.c_char_p), # char* 

71 ) 

72 

73 

74# Defensive assertions, mainly to document the invariants we depend on 

75assert _LinkMapLNameView.l_addr.offset == 0 

76assert _LinkMapLNameView.l_name.offset == ctypes.sizeof(ctypes.c_void_p) 

77 

78 

79def _dl_last_error() -> str | None: 

80 msg_bytes = cast(bytes | None, LIBDL.dlerror()) 

81 if not msg_bytes: 

82 return None # no pending error 

83 # Never raises; undecodable bytes are mapped to U+DC80..U+DCFF 

84 return msg_bytes.decode("utf-8", "surrogateescape") 

85 

86 

87def l_name_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str: 

88 lm_view = ctypes.POINTER(_LinkMapLNameView)() 

89 rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_LINKMAP, ctypes.byref(lm_view)) 

90 if rc != 0: 

91 err = _dl_last_error() 

92 raise OSError(f"dlinfo failed for {libname=!r} (rc={rc})" + (f": {err}" if err else "")) 

93 if not lm_view: # NULL link_map** 

94 raise OSError(f"dlinfo returned NULL link_map pointer for {libname=!r}") 

95 

96 l_name_bytes = lm_view.contents.l_name 

97 if not l_name_bytes: 

98 raise OSError(f"dlinfo returned empty link_map->l_name for {libname=!r}") 

99 

100 path = os.fsdecode(l_name_bytes) 

101 if not path: 

102 raise OSError(f"dlinfo returned empty l_name string for {libname=!r}") 

103 

104 return path 

105 

106 

107def l_origin_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str: 

108 l_origin_buf = ctypes.create_string_buffer(4096) 

109 rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_ORIGIN, l_origin_buf) 

110 if rc != 0: 

111 err = _dl_last_error() 

112 raise OSError(f"dlinfo failed for {libname=!r} (rc={rc})" + (f": {err}" if err else "")) 

113 

114 path = os.fsdecode(l_origin_buf.value) 

115 if not path: 

116 raise OSError(f"dlinfo returned empty l_origin string for {libname=!r}") 

117 

118 return path 

119 

120 

121def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str: 

122 l_name = l_name_for_dynamic_library(libname, handle) 

123 l_origin = l_origin_for_dynamic_library(libname, handle) 

124 return os.path.join(l_origin, os.path.basename(l_name)) 

125 

126 

127def get_candidate_sonames(libname: str) -> list[str]: 

128 # Reverse tabulated names to achieve new → old search order. 

129 candidate_sonames = list(reversed(SUPPORTED_LINUX_SONAMES.get(libname, ()))) 

130 candidate_sonames.append(f"lib{libname}.so") 

131 return candidate_sonames 

132 

133 

134def check_if_already_loaded_from_elsewhere(libname: str, _have_abs_path: bool) -> LoadedDL | None: 

135 for soname in get_candidate_sonames(libname): 

136 try: 

137 handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) 

138 except OSError: 

139 continue 

140 else: 

141 return LoadedDL( 

142 abs_path_for_dynamic_library(libname, handle), True, handle._handle, "was-already-loaded-from-elsewhere" 

143 ) 

144 return None 

145 

146 

147def _load_lib(libname: str, filename: str) -> ctypes.CDLL: 

148 cdll_mode = CDLL_MODE 

149 if libname in LIBNAMES_REQUIRING_RTLD_DEEPBIND: 

150 cdll_mode |= os.RTLD_DEEPBIND 

151 return ctypes.CDLL(filename, cdll_mode) 

152 

153 

154def load_with_system_search(libname: str) -> LoadedDL | None: 

155 """Try to load a library using system search paths. 

156 

157 Args: 

158 libname: The name of the library to load 

159 

160 Returns: 

161 A LoadedDL object if successful, None if the library cannot be loaded 

162 

163 Raises: 

164 RuntimeError: If the library is loaded but no expected symbol is found 

165 """ 

166 for soname in get_candidate_sonames(libname): 

167 try: 

168 handle = _load_lib(libname, soname) 

169 except OSError: 

170 pass 

171 else: 

172 abs_path = abs_path_for_dynamic_library(libname, handle) 

173 if abs_path is None: 

174 raise RuntimeError(f"No expected symbol for {libname=!r}") 

175 return LoadedDL(abs_path, False, handle._handle, "system-search") 

176 return None 

177 

178 

179def _work_around_known_bugs(libname: str, found_path: str) -> None: 

180 if libname == "nvrtc": 

181 # Work around bug/oversight in 

182 # nvidia_cuda_nvrtc-13.0.48-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl 

183 # Issue: libnvrtc.so.13 RUNPATH is not set. 

184 # This workaround is highly specific 

185 # - for simplicity. 

186 # - to not mask bugs in future nvidia-cuda-nvrtc releases. 

187 # - because a more general workaround is complicated. 

188 dirname, basename = os.path.split(found_path) 

189 if basename == "libnvrtc.so.13": 

190 dep_basename = "libnvrtc-builtins.so.13.0" 

191 dep_path = os.path.join(dirname, dep_basename) 

192 if os.path.isfile(dep_path): 

193 # In case of failure, defer to primary load, which is almost certain to fail, too. 

194 with contextlib.suppress(OSError): 

195 ctypes.CDLL(dep_path, CDLL_MODE) 

196 

197 

198def load_with_abs_path(libname: str, found_path: str, found_via: str | None = None) -> LoadedDL: 

199 """Load a dynamic library from the given path. 

200 

201 Args: 

202 libname: The name of the library to load 

203 found_path: The absolute path to the library file 

204 

205 Returns: 

206 A LoadedDL object representing the loaded library 

207 

208 Raises: 

209 RuntimeError: If the library cannot be loaded 

210 """ 

211 _work_around_known_bugs(libname, found_path) 

212 try: 

213 handle = _load_lib(libname, found_path) 

214 except OSError as e: 

215 raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e 

216 return LoadedDL(found_path, False, handle._handle, found_via)