1# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import logging
17import os
18import threading
19from collections.abc import Callable, Iterator
20from typing import Any, Optional, Union
21from urllib.parse import ParseResult, urlparse
22
23from .client import StorageClient
24from .config import RESERVED_POSIX_PROFILE_NAME, SUPPORTED_IMPLICIT_PROFILE_PROTOCOLS, PathMapping, StorageClientConfig
25from .file import ObjectFile, PosixFile
26from .telemetry import Telemetry
27from .types import MSC_PROTOCOL, ExecutionMode, ObjectMetadata, PatternList, SignerType, SymlinkHandling, SyncResult
28
29_TELEMETRY_PROVIDER: Optional[Callable[[], Telemetry]] = None
30_TELEMETRY_PROVIDER_LOCK = threading.Lock()
31_STORAGE_CLIENT_CACHE: dict[str, StorageClient] = {}
32_STORAGE_CLIENT_CACHE_LOCK = threading.Lock()
33_PATH_MAPPING_CACHE: dict[Optional[str], PathMapping] = {}
34_PATH_MAPPING_CACHE_LOCK = threading.Lock()
35_PROCESS_ID = os.getpid()
36
37logger = logging.getLogger(__name__)
38
39
40def _reinitialize_after_fork() -> None:
41 """
42 Reinitialize module state after fork to ensure fork-safety.
43
44 This function is called automatically after a fork to:
45 1. Clear the storage client cache (cached clients may have invalid state)
46 2. Reinitialize locks (parent's lock state must not be inherited)
47 3. Update process ID tracking
48
49 Note: The telemetry provider is intentionally inherited by child processes,
50 only its lock is reinitialized.
51 """
52 global _STORAGE_CLIENT_CACHE, _STORAGE_CLIENT_CACHE_LOCK
53 global _PATH_MAPPING_CACHE, _PATH_MAPPING_CACHE_LOCK
54 global _TELEMETRY_PROVIDER_LOCK
55 global _PROCESS_ID
56
57 _STORAGE_CLIENT_CACHE.clear()
58 _STORAGE_CLIENT_CACHE_LOCK = threading.Lock()
59 _PATH_MAPPING_CACHE.clear()
60 _PATH_MAPPING_CACHE_LOCK = threading.Lock()
61 # we don't need to reset telemetry provider as it is supposed to be a top-level Python function
62 _TELEMETRY_PROVIDER_LOCK = threading.Lock()
63 _PROCESS_ID = os.getpid()
64
65
66def _check_and_reinitialize_if_forked() -> None:
67 """
68 Check if the current process is a fork and reinitialize if needed.
69
70 This provides fork-safety for systems where os.register_at_fork is not available
71 or as a fallback mechanism.
72 """
73 global _PROCESS_ID
74
75 current_pid = os.getpid()
76 if current_pid != _PROCESS_ID:
77 _reinitialize_after_fork()
78
79
80if hasattr(os, "register_at_fork"):
81 os.register_at_fork(after_in_child=_reinitialize_after_fork)
82
83
[docs]
84def get_telemetry_provider() -> Optional[Callable[[], Telemetry]]:
85 """
86 Get the function used to create :py:class:``Telemetry`` instances for storage clients created by shortcuts.
87
88 :return: A function that provides a telemetry instance.
89 """
90 global _TELEMETRY_PROVIDER
91
92 return _TELEMETRY_PROVIDER
93
94
[docs]
95def set_telemetry_provider(telemetry_provider: Optional[Callable[[], Telemetry]]) -> None:
96 """
97 Set the function used to create :py:class:``Telemetry`` instances for storage clients created by shortcuts.
98
99 :param telemetry_provider: A function that provides a telemetry instance. The function must be defined at the top level of a module to work with pickling.
100 """
101 global _TELEMETRY_PROVIDER
102 global _TELEMETRY_PROVIDER_LOCK
103
104 with _TELEMETRY_PROVIDER_LOCK:
105 _TELEMETRY_PROVIDER = telemetry_provider
106
107
108def _build_full_path(original_url: str, pr: ParseResult) -> str:
109 """
110 Helper function to construct the full path from a parsed URL, including query and fragment.
111
112 :param original_url: The original URL before parsing
113 :param pr: The parsed URL result from urlparse
114 :return: The complete path including query and fragment if present
115 """
116 path = pr.path
117 if pr.query:
118 path += "?" + pr.query
119 elif original_url.endswith("?"):
120 path += "?" # handle the glob pattern that has a trailing question mark
121 if pr.fragment:
122 path += "#" + pr.fragment
123 return path
124
125
126def _resolve_msc_url(url: str) -> tuple[str, str]:
127 """
128 Resolve an MSC URL to a profile name and path.
129
130 :param url: The MSC URL to resolve (msc://profile/path)
131 :return: A tuple of (profile_name, path)
132 """
133 pr = urlparse(url)
134 profile = pr.netloc
135 path = _build_full_path(url, pr)
136 if path.startswith("/"):
137 path = path[1:]
138 return profile, path
139
140
141def _read_cached_path_mapping() -> PathMapping:
142 """
143 Read path mapping once per ``MSC_CONFIG`` value for shortcut URL resolution.
144
145 Path mapping checks happen on every non-MSC shortcut call, including POSIX paths. Caching here keeps that hot path
146 from repeatedly loading and validating the full MSC config while preserving ``StorageClientConfig.read_path_mapping``
147 behavior for direct callers.
148 """
149 cache_key = os.getenv("MSC_CONFIG", None)
150 if cache_key in _PATH_MAPPING_CACHE:
151 return _PATH_MAPPING_CACHE[cache_key]
152
153 with _PATH_MAPPING_CACHE_LOCK:
154 if cache_key in _PATH_MAPPING_CACHE:
155 return _PATH_MAPPING_CACHE[cache_key]
156
157 path_mapping = StorageClientConfig.read_path_mapping()
158 _PATH_MAPPING_CACHE[cache_key] = path_mapping
159 return path_mapping
160
161
162def _resolve_non_msc_url(url: str) -> tuple[str, str]:
163 """
164 Resolve a non-MSC URL to a profile name and path.
165
166 Resolution process:
167 1. First check if MSC config exists
168 2. If config exists, check for possible path mapping
169 3. If no mapping is found, fall back to the reserved POSIX profile (``__filesystem__``) for file paths or create an implicit profile based on URL
170
171 :param url: The non-MSC URL to resolve
172 :return: A tuple of (profile_name, path)
173 """
174 # Check if we have a valid path mapping, if so check if there is a matching mapping
175 path_mapping = _read_cached_path_mapping()
176 if path_mapping:
177 # Look for a matching mapping
178 possible_mapping = path_mapping.find_mapping(url)
179 if possible_mapping:
180 return possible_mapping # return the profile name and path
181
182 # For file paths, use the default POSIX profile
183 if url.startswith("file://"):
184 pr = urlparse(url)
185 return RESERVED_POSIX_PROFILE_NAME, _build_full_path(url, pr)
186 elif url.startswith("/"):
187 url = os.path.normpath(url)
188 return RESERVED_POSIX_PROFILE_NAME, url
189
190 # For other URL protocol, create an implicit profile name
191 pr = urlparse(url)
192 protocol = pr.scheme.lower()
193
194 # Translate relative paths to absolute paths
195 if not protocol:
196 return RESERVED_POSIX_PROFILE_NAME, os.path.realpath(url)
197
198 # Validate the protocol is supported
199 if protocol not in SUPPORTED_IMPLICIT_PROFILE_PROTOCOLS:
200 supported_protocols = ", ".join([f"{p}://" for p in SUPPORTED_IMPLICIT_PROFILE_PROTOCOLS])
201 raise ValueError(
202 f'Unknown URL "{url}", expecting "{MSC_PROTOCOL}" or a supported protocol ({supported_protocols}) or a POSIX path'
203 )
204
205 # Build the implicit profile name using the format _protocol-bucket
206 bucket = pr.netloc
207 if not bucket:
208 raise ValueError(f'Invalid URL "{url}", bucket name is required for {protocol}:// URLs')
209
210 profile_name = f"_{protocol}-{bucket}"
211
212 # Return normalized path with leading slash removed
213 path = pr.path
214 if path.startswith("/"):
215 path = path[1:]
216
217 return profile_name, path
218
219
[docs]
220def resolve_storage_client(url: str) -> tuple[StorageClient, str]:
221 """
222 Build and return a :py:class:`multistorageclient.StorageClient` instance based on the provided URL or path.
223
224 This function parses the given URL or path and determines the appropriate storage profile and path.
225 It supports URLs with the protocol ``msc://``, as well as POSIX paths or ``file://`` URLs for local file
226 system access. If the profile has already been instantiated, it returns the cached client. Otherwise,
227 it creates a new :py:class:`StorageClient` and caches it.
228
229 The function also supports implicit profiles for non-MSC URLs. When a non-MSC URL is provided (like s3://,
230 gs://, ais://, file://), MSC will infer the storage provider based on the URL protocol and create an implicit
231 profile with the naming convention "_protocol-bucket" (e.g., "_s3-bucket1", "_gs-bucket1").
232
233 Path mapping defined in the MSC configuration are also applied before creating implicit profiles.
234 This allows for explicit mappings between source paths and destination MSC profiles.
235
236 This function is fork-safe: after a fork, the cache is automatically cleared and new client instances
237 are created in the child process to avoid sharing stale connections or file descriptors.
238
239 :param url: The storage location, which can be:
240 - A URL in the format ``msc://profile/path`` for object storage.
241 - A local file system path (absolute POSIX path) or a ``file://`` URL.
242 - A non-MSC URL with a supported protocol (s3://, gs://, ais://).
243
244 :return: A tuple containing the :py:class:`multistorageclient.StorageClient` instance and the parsed path.
245
246 :raises ValueError: If the URL's protocol is neither ``msc`` nor a valid local file system path
247 or a supported non-MSC protocol.
248 """
249 global _STORAGE_CLIENT_CACHE
250 global _STORAGE_CLIENT_CACHE_LOCK
251
252 _check_and_reinitialize_if_forked()
253
254 # Normalize the path for msc:/ prefix due to pathlib.Path('msc://')
255 if url.startswith("msc:/") and not url.startswith("msc://"):
256 url = url.replace("msc:/", "msc://")
257
258 # Resolve the URL to a profile name and path
259 profile, path = _resolve_msc_url(url) if url.startswith(MSC_PROTOCOL) else _resolve_non_msc_url(url)
260
261 # Check if the profile has already been instantiated
262 if profile in _STORAGE_CLIENT_CACHE:
263 return _STORAGE_CLIENT_CACHE[profile], path
264
265 # Create a new StorageClient instance and cache it
266 with _STORAGE_CLIENT_CACHE_LOCK:
267 if profile in _STORAGE_CLIENT_CACHE:
268 return _STORAGE_CLIENT_CACHE[profile], path
269 else:
270 client = StorageClient(
271 config=StorageClientConfig.from_file(profile=profile, telemetry_provider=get_telemetry_provider())
272 )
273 _STORAGE_CLIENT_CACHE[profile] = client
274
275 return client, path
276
277
[docs]
278def open(url: str, mode: str = "rb", **kwargs: Any) -> Union[PosixFile, ObjectFile]:
279 """
280 Open a file at the given URL using the specified mode.
281
282 The function utilizes the :py:class:`multistorageclient.StorageClient` to open a file at the provided path.
283 The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient` is retrieved or built.
284
285 :param url: The URL of the file to open. (example: ``msc://profile/prefix/dataset.tar``)
286 :param mode: The file mode to open the file in.
287
288 :return: A file-like object that allows interaction with the file.
289
290 :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``.
291 """
292 client, path = resolve_storage_client(url)
293 return client.open(path, mode, **kwargs)
294
295
[docs]
296def glob(pattern: str, attribute_filter_expression: Optional[str] = None) -> list[str]:
297 """
298 Return a list of files matching a pattern.
299
300 This function supports glob-style patterns for matching multiple files within a storage system. The pattern is
301 parsed, and the associated :py:class:`multistorageclient.StorageClient` is used to retrieve the
302 list of matching files.
303
304 :param pattern: The glob-style pattern to match files. (example: ``msc://profile/prefix/**/*.tar``)
305 :param attribute_filter_expression: The attribute filter expression to apply to the result.
306
307 :return: A list of file paths matching the pattern.
308
309 :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``.
310 """
311 client, path = resolve_storage_client(pattern)
312 if not pattern.startswith(MSC_PROTOCOL) and client.profile == RESERVED_POSIX_PROFILE_NAME:
313 return client.glob(path, include_url_prefix=False, attribute_filter_expression=attribute_filter_expression)
314 else:
315 return client.glob(path, include_url_prefix=True, attribute_filter_expression=attribute_filter_expression)
316
317
[docs]
318def upload_file(url: str, local_path: str, attributes: Optional[dict[str, Any]] = None) -> None:
319 """
320 Upload a file to the given URL from a local path.
321
322 The function utilizes the :py:class:`multistorageclient.StorageClient` to upload a file (object) to the
323 provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient`
324 is retrieved or built.
325
326 :param url: The URL of the file. (example: ``msc://profile/prefix/dataset.tar``)
327 :param local_path: The local path of the file.
328
329 :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``.
330 """
331 client, path = resolve_storage_client(url)
332 return client.upload_file(remote_path=path, local_path=local_path, attributes=attributes)
333
334
[docs]
335def download_file(url: str, local_path: str) -> None:
336 """
337 Download a file in a given remote_path to a local path
338
339 The function utilizes the :py:class:`multistorageclient.StorageClient` to download a file (object) at the
340 provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient`
341 is retrieved or built.
342
343 :param url: The URL of the file to download. (example: ``msc://profile/prefix/dataset.tar``)
344 :param local_path: The local path where the file should be downloaded.
345
346 :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``.
347 """
348 client, path = resolve_storage_client(url)
349 return client.download_file(remote_path=path, local_path=local_path)
350
351
[docs]
352def is_empty(url: str) -> bool:
353 """
354 Checks whether the specified URL contains any objects.
355
356 :param url: The URL to check, typically pointing to a storage location.
357 :return: ``True`` if there are no objects/files under this URL, ``False`` otherwise.
358
359 :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``.
360 """
361 client, path = resolve_storage_client(url)
362 return client.is_empty(path)
363
364
[docs]
365def is_file(url: str) -> bool:
366 """
367 Checks whether the specified url points to a file (rather than a directory or folder).
368
369 The function utilizes the :py:class:`multistorageclient.StorageClient` to check if a file (object) exists
370 at the provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient`
371 is retrieved or built.
372
373 :param url: The URL to check the existence of a file. (example: ``msc://profile/prefix/dataset.tar``)
374 """
375 client, path = resolve_storage_client(url)
376 return client.is_file(path=path)
377
378
[docs]
379def sync(
380 source_url: str,
381 target_url: str,
382 delete_unmatched_files: bool = False,
383 execution_mode: ExecutionMode = ExecutionMode.LOCAL,
384 patterns: Optional[PatternList] = None,
385 preserve_source_attributes: bool = False,
386 ignore_hidden: bool = True,
387 dryrun: bool = False,
388 dryrun_output_path: Optional[str] = None,
389 symlink_handling: SymlinkHandling = SymlinkHandling.FOLLOW,
390) -> SyncResult:
391 """
392 Syncs files from the source storage to the target storage.
393
394 :param source_url: The URL for the source storage.
395 :param target_url: The URL for the target storage.
396 :param delete_unmatched_files: Whether to delete files at the target that are not present at the source.
397 :param execution_mode: The execution mode to use. Currently supports "local" and "ray".
398 :param patterns: PatternList for include/exclude filtering. If None, all files are included.
399 :param preserve_source_attributes: Whether to preserve source file metadata attributes during synchronization.
400 When False (default), only file content is copied. When True, custom metadata attributes are also preserved.
401
402 .. warning::
403 **Performance Impact**: When enabled without a ``metadata_provider`` configured, this will make a HEAD
404 request for each object to retrieve attributes, which can significantly impact performance on large-scale
405 sync operations. For production use at scale, configure a ``metadata_provider`` in your storage profile.
406 :param ignore_hidden: Whether to ignore hidden files and directories (starting with dot). Default is True.
407 :param dryrun: If True, only enumerate and compare objects without performing any copy/delete operations.
408 The returned :py:class:`SyncResult` will include a :py:class:`DryrunResult` with paths to JSONL files.
409 :param dryrun_output_path: Directory to write dryrun JSONL files into. If None (default), a temporary
410 directory is created automatically. Ignored when dryrun is False.
411 :param symlink_handling: How to handle symbolic links during sync.
412 :py:attr:`SymlinkHandling.FOLLOW` (default) dereferences symlinks and copies the target's bytes.
413 :py:attr:`SymlinkHandling.SKIP` excludes symlinks from the sync.
414 :py:attr:`SymlinkHandling.PRESERVE` recreates symlinks on the target via
415 :py:meth:`AbstractStorageClient.make_symlink` instead of copying bytes (required for
416 round-trip preservation of symlinks).
417 """
418 source_client, source_path = resolve_storage_client(source_url)
419 target_client, target_path = resolve_storage_client(target_url)
420 return target_client.sync_from(
421 source_client,
422 source_path,
423 target_path,
424 delete_unmatched_files,
425 execution_mode=execution_mode,
426 patterns=patterns,
427 preserve_source_attributes=preserve_source_attributes,
428 ignore_hidden=ignore_hidden,
429 dryrun=dryrun,
430 dryrun_output_path=dryrun_output_path,
431 symlink_handling=symlink_handling,
432 )
433
434
[docs]
435def sync_replicas(
436 source_url: str,
437 replica_indices: Optional[list[int]] = None,
438 delete_unmatched_files: bool = False,
439 execution_mode: ExecutionMode = ExecutionMode.LOCAL,
440 patterns: Optional[PatternList] = None,
441 ignore_hidden: bool = True,
442 symlink_handling: SymlinkHandling = SymlinkHandling.FOLLOW,
443) -> None:
444 """
445 Syncs files from the source storage to all the replicas.
446
447 :param source_url: The URL for the source storage.
448 :param replica_indices: Specify the indices of the replicas to sync to. If not provided, all replicas will be synced. Index starts from 0.
449 :param delete_unmatched_files: Whether to delete files at the replicas that are not present at the source.
450 :param execution_mode: The execution mode to use. Currently supports "local" and "ray".
451 :param patterns: PatternList for include/exclude filtering. If None, all files are included.
452 :param ignore_hidden: Whether to ignore hidden files and directories (starting with dot). Default is True.
453 :param symlink_handling: How to handle symbolic links during sync.
454 :py:attr:`SymlinkHandling.FOLLOW` (default) dereferences symlinks and copies the target's bytes.
455 :py:attr:`SymlinkHandling.SKIP` excludes symlinks from the sync.
456 :py:attr:`SymlinkHandling.PRESERVE` recreates symlinks on each replica via
457 :py:meth:`AbstractStorageClient.make_symlink` instead of copying bytes.
458 """
459 source_client, source_path = resolve_storage_client(source_url)
460 source_client.sync_replicas(
461 source_path,
462 replica_indices=replica_indices,
463 delete_unmatched_files=delete_unmatched_files,
464 execution_mode=execution_mode,
465 patterns=patterns,
466 ignore_hidden=ignore_hidden,
467 symlink_handling=symlink_handling,
468 )
469
470
[docs]
471def list(
472 url: str,
473 start_after: Optional[str] = None,
474 end_at: Optional[str] = None,
475 include_directories: bool = False,
476 attribute_filter_expression: Optional[str] = None,
477 show_attributes: bool = False,
478 follow_symlinks: Optional[bool] = None,
479 patterns: Optional[PatternList] = None,
480 symlink_handling: SymlinkHandling = SymlinkHandling.FOLLOW,
481) -> Iterator[ObjectMetadata]:
482 """
483 Lists the contents of the specified URL prefix.
484
485 This function retrieves the corresponding :py:class:`multistorageclient.StorageClient`
486 for the given URL and returns an iterator of objects (files or directories) stored under the provided prefix.
487
488 :param url: The prefix to list objects under.
489 :param start_after: The key to start after (i.e. exclusive). An object with this key doesn't have to exist.
490 :param end_at: The key to end at (i.e. inclusive). An object with this key doesn't have to exist.
491 :param include_directories: Whether to include directories in the result. When True, directories are returned alongside objects.
492 :param attribute_filter_expression: The attribute filter expression to apply to the result.
493 :param show_attributes: Whether to return attributes in the result.
494 :param follow_symlinks: **Deprecated.** Use ``symlink_handling`` instead.
495 :param patterns: PatternList for include/exclude filtering. If None, all files are included.
496 :param symlink_handling: How to handle symbolic links. Only applicable for POSIX file storage.
497 :return: An iterator of :py:class:`ObjectMetadata` objects representing the files (and optionally directories)
498 accessible under the specified URL prefix. The returned keys will always be prefixed with msc://.
499 """
500 client, path = resolve_storage_client(url)
501 return client.list(
502 path=path,
503 start_after=start_after,
504 end_at=end_at,
505 include_directories=include_directories,
506 include_url_prefix=True,
507 attribute_filter_expression=attribute_filter_expression,
508 show_attributes=show_attributes,
509 follow_symlinks=follow_symlinks,
510 patterns=patterns,
511 symlink_handling=symlink_handling,
512 )
513
514
[docs]
515def list_recursive(
516 url: str,
517 start_after: Optional[str] = None,
518 end_at: Optional[str] = None,
519 max_workers: int = 32,
520 look_ahead: int = 2,
521 follow_symlinks: Optional[bool] = None,
522 patterns: Optional[PatternList] = None,
523 symlink_handling: SymlinkHandling = SymlinkHandling.FOLLOW,
524) -> Iterator[ObjectMetadata]:
525 """
526 Lists files recursively under the specified URL.
527
528 This function retrieves the corresponding :py:class:`multistorageclient.StorageClient`
529 for the given URL and returns an iterator of files under the provided path.
530
531 :param url: The path to list objects under.
532 :param start_after: The key to start after (i.e. exclusive). An object with this key doesn't have to exist.
533 :param end_at: The key to end at (i.e. inclusive). An object with this key doesn't have to exist.
534 :param max_workers: Maximum concurrent workers for provider-level recursive listing.
535 :param look_ahead: Prefixes to buffer per worker for provider-level recursive listing.
536 :param follow_symlinks: **Deprecated.** Use ``symlink_handling`` instead.
537 :param patterns: PatternList for include/exclude filtering. If None, all files are included.
538 :param symlink_handling: How to handle symbolic links during listing.
539 :return: An iterator of :py:class:`ObjectMetadata` objects representing files accessible under the specified URL path.
540 The returned keys use the same URL-prefix behavior as :py:meth:`multistorageclient.list`.
541 """
542 client, path = resolve_storage_client(url)
543 return client.list_recursive(
544 path=path,
545 start_after=start_after,
546 end_at=end_at,
547 max_workers=max_workers,
548 look_ahead=look_ahead,
549 include_url_prefix=True,
550 follow_symlinks=follow_symlinks,
551 patterns=patterns,
552 symlink_handling=symlink_handling,
553 )
554
555
[docs]
556def write(url: str, body: bytes, attributes: Optional[dict[str, Any]] = None) -> None:
557 """
558 Writes an object to the storage provider at the specified path.
559
560 :param url: The path where the object should be written.
561 :param body: The content to write to the object.
562 """
563 client, path = resolve_storage_client(url)
564 client.write(path=path, body=body, attributes=attributes)
565
566
[docs]
567def make_symlink(url: str, target_url: str) -> None:
568 """
569 Creates a symbolic link at ``url`` pointing to ``target_url``.
570
571 Both URLs must resolve to the same storage profile.
572
573 :param url: The URL where the symlink will be created.
574 :param target_url: The URL of the target that the symlink points to.
575 :raises ValueError: If the two URLs resolve to different storage profiles.
576 """
577 client, path = resolve_storage_client(url)
578 target_client, target_path = resolve_storage_client(target_url)
579 if client is not target_client:
580 raise ValueError("Cannot create cross-profile symlink: url and target_url must belong to the same profile.")
581 client.make_symlink(path=path, target=target_path)
582
583
[docs]
584def delete(url: str, recursive: bool = False) -> None:
585 """
586 Deletes the specified object(s) from the storage provider.
587
588 This function retrieves the corresponding :py:class:`multistorageclient.StorageClient`
589 for the given URL and deletes the object(s) at the specified path.
590
591 :param url: The URL of the object to delete. (example: ``msc://profile/prefix/file.txt``)
592 :param recursive: Whether to delete objects in the path recursively.
593 """
594 client, path = resolve_storage_client(url)
595 client.delete(path, recursive=recursive)
596
597
[docs]
598def info(url: str) -> ObjectMetadata:
599 """
600 Retrieves metadata or information about an object stored at the specified path.
601
602 :param url: The URL of the object to retrieve information about. (example: ``msc://profile/prefix/file.txt``)
603
604 :return: An :py:class:`ObjectMetadata` object representing the object's metadata.
605 """
606 client, path = resolve_storage_client(url)
607 return client.info(path)
608
609
618
619
[docs]
620def generate_presigned_url(
621 url: str,
622 *,
623 method: str = "GET",
624 signer_type: Optional[SignerType] = None,
625 signer_options: Optional[dict[str, Any]] = None,
626) -> str:
627 """
628 Generate a pre-signed URL granting temporary access to the object at *url*.
629
630 :param url: The storage URL. (example: ``msc://profile/prefix/file.bin``)
631 :param method: The HTTP method the URL should authorise (e.g. ``"GET"``, ``"PUT"``).
632 :param signer_type: The signing backend to use. ``None`` means the provider's native signer.
633 :param signer_options: Backend-specific options forwarded to the signer.
634 :return: A pre-signed URL string.
635 """
636 client, path = resolve_storage_client(url)
637 return client.generate_presigned_url(path, method=method, signer_type=signer_type, signer_options=signer_options)