# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.# SPDX-License-Identifier: Apache-2.0## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importthreadingfrompathlibimportPathfromtypingimportDict,List,Tuple,Unionfromurllib.parseimporturlparsefrom.clientimportStorageClientfrom.configimportStorageClientConfigfrom.fileimportObjectFile,PosixFilefrom.typesimportDEFAULT_POSIX_PROFILE_NAME,MSC_PROTOCOL,MSC_PROTOCOL_NAME_instance_cache:Dict[str,StorageClient]={}_cache_lock=threading.Lock()
[docs]defresolve_storage_client(url:str)->Tuple[StorageClient,str]:""" Build and return a :py:class:`multistorageclient.StorageClient` instance based on the provided URL or path. This function parses the given URL or path and determines the appropriate storage profile and path. It supports URLs with the protocol ``msc://``, as well as POSIX paths or ``file://`` URLs for local file system access. If the profile has already been instantiated, it returns the cached client. Otherwise, it creates a new :py:class:`StorageClient` and caches it. :param url: The storage location, which can be: - A URL in the format ``msc://profile/path`` for object storage. - A local file system path (absolute POSIX path) or a ``file://`` URL. :return: A tuple containing the :py:class:`multistorageclient.StorageClient` instance and the parsed path. :raises ValueError: If the URL's protocol is neither ``msc`` nor a valid local file system path. """pr=urlparse(url)ifpr.scheme==MSC_PROTOCOL_NAME:profile=pr.netloc# Remove the leading slashifpr.path.startswith("/"):path=pr.path[1:]else:path=pr.pathelifpr.scheme==""orpr.scheme=="file":ifPath(pr.path).is_absolute():profile=DEFAULT_POSIX_PROFILE_NAMEpath=pr.pathelse:raiseValueError(f'Invalid POSIX path "{url}", only absolute path is allowed')else:raiseValueError(f'Unknown URL "{url}", expecting "{MSC_PROTOCOL}" or a POSIX path')ifprofilein_instance_cache:return_instance_cache[profile],pathwith_cache_lock:ifprofilein_instance_cache:return_instance_cache[profile],pathelse:client=StorageClient(config=StorageClientConfig.from_file(profile=profile))_instance_cache[profile]=clientreturnclient,path
[docs]defopen(url:str,mode:str="rb")->Union[PosixFile,ObjectFile]:""" Open a file at the given URL using the specified mode. The function utilizes the :py:class:`multistorageclient.StorageClient` to open a file at the provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient` is retrieved or built. :param url: The URL of the file to open. (example: ``msc://profile/prefix/dataset.tar``) :param mode: The file mode to open the file in. :return: A file-like object that allows interaction with the file. :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``. """client,path=resolve_storage_client(url)returnclient.open(path,mode)
[docs]defglob(pattern:str)->List[str]:""" Return a list of files matching a pattern. This function supports glob-style patterns for matching multiple files within a storage system. The pattern is parsed, and the associated :py:class:`multistorageclient.StorageClient` is used to retrieve the list of matching files. :param pattern: The glob-style pattern to match files. (example: ``msc://profile/prefix/**/*.tar``) :return: A list of file paths matching the pattern. :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``. """client,path=resolve_storage_client(pattern)returnclient.glob(path,include_url_prefix=True)
[docs]defupload_file(url:str,local_path:str)->None:""" Upload a file to the given URL from a local path. The function utilizes the :py:class:`multistorageclient.StorageClient` to upload a file (object) to the provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient` is retrieved or built. :param url: The URL of the file. (example: ``msc://profile/prefix/dataset.tar``) :param local_path: The local path of the file. :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``. """client,path=resolve_storage_client(url)returnclient.upload_file(remote_path=path,local_path=local_path)
[docs]defdownload_file(url:str,local_path:str)->None:""" Download a file in a given remote_path to a local path The function utilizes the :py:class:`multistorageclient.StorageClient` to download a file (object) at the provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient` is retrieved or built. :param url: The URL of the file to download. (example: ``msc://profile/prefix/dataset.tar``) :param local_path: The local path where the file should be downloaded. :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``. """client,path=resolve_storage_client(url)returnclient.download_file(remote_path=path,local_path=local_path)
[docs]defis_empty(url:str)->bool:""" Checks whether the specified URL contains any objects. :param url: The URL to check, typically pointing to a storage location. :return: ``True`` if there are no objects/files under this URL, ``False`` otherwise. :raises ValueError: If the URL's protocol does not match the expected protocol ``msc``. """client,path=resolve_storage_client(url)returnclient.is_empty(path)
[docs]defis_file(url:str)->bool:""" Checks whether the specified url points to a file (rather than a directory or folder). The function utilizes the :py:class:`multistorageclient.StorageClient` to check if a file (object) exists at the provided path. The URL is parsed, and the corresponding :py:class:`multistorageclient.StorageClient` is retrieved or built. :param url: The URL to check the existence of a file. (example: ``msc://profile/prefix/dataset.tar``) """client,path=resolve_storage_client(url)returnclient.is_file(path=path)