Source code for multistorageclient.types

  1# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2# SPDX-License-Identifier: Apache-2.0
  3#
  4# Licensed under the Apache License, Version 2.0 (the "License");
  5# you may not use this file except in compliance with the License.
  6# You may obtain a copy of the License at
  7#
  8# http://www.apache.org/licenses/LICENSE-2.0
  9#
 10# Unless required by applicable law or agreed to in writing, software
 11# distributed under the License is distributed on an "AS IS" BASIS,
 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13# See the License for the specific language governing permissions and
 14# limitations under the License.
 15
 16from __future__ import annotations
 17
 18from abc import ABC, abstractmethod
 19from collections.abc import Iterator
 20from dataclasses import asdict, dataclass, field
 21from datetime import datetime, timezone
 22from enum import Enum
 23from typing import IO, Any, NamedTuple, Optional, Tuple, Union
 24
 25from dateutil.parser import parse as dateutil_parser
 26
 27MSC_PROTOCOL_NAME = "msc"
 28MSC_PROTOCOL = MSC_PROTOCOL_NAME + "://"
 29
 30DEFAULT_RETRY_ATTEMPTS = 3
 31DEFAULT_RETRY_DELAY = 1.0
 32DEFAULT_RETRY_BACKOFF_MULTIPLIER = 2.0
 33
 34# datetime.min is a naive datetime.
 35#
 36# This creates issues when doing datetime.astimezone(timezone.utc) since it assumes the local timezone for the naive datetime.
 37# If the local timezone is offset behind UTC, it attempts to subtract off the offset which goes below the representable limit (i.e. an underflow).
 38# A `ValueError: year 0 is out of range` is thrown as a result.
 39AWARE_DATETIME_MIN = datetime.min.replace(tzinfo=timezone.utc)
 40
 41
[docs] 42class SignerType(str, Enum): 43 """Supported signer backends for presigned URL generation.""" 44 45 S3 = "s3" 46 CLOUDFRONT = "cloudfront"
47 48
[docs] 49@dataclass 50class Credentials: 51 """ 52 A data class representing the credentials needed to access a storage provider. 53 """ 54 55 #: The access key for authentication. 56 access_key: str 57 #: The secret key for authentication. 58 secret_key: str 59 #: An optional security token for temporary credentials. 60 token: Optional[str] 61 #: The expiration time of the credentials in ISO 8601 format. 62 expiration: Optional[str] 63 #: A dictionary for storing custom key-value pairs. 64 custom_fields: dict[str, Any] = field(default_factory=dict) 65
[docs] 66 def is_expired(self) -> bool: 67 """ 68 Checks if the credentials are expired based on the expiration time. 69 70 :return: ``True`` if the credentials are expired, ``False`` otherwise. 71 """ 72 expiry = dateutil_parser(self.expiration) if self.expiration else None 73 if expiry is None: 74 return False 75 return expiry <= datetime.now(tz=timezone.utc)
76
[docs] 77 def get_custom_field(self, key: str, default: Any = None) -> Any: 78 """ 79 Retrieves a value from custom fields by its key. 80 81 :param key: The key to look up in custom fields. 82 :param default: The default value to return if the key is not found. 83 :return: The value associated with the key, or the default value if not found. 84 """ 85 return self.custom_fields.get(key, default)
86 87
[docs] 88@dataclass 89class ObjectMetadata: 90 """ 91 A data class that represents the metadata associated with an object stored in a cloud storage service. This metadata 92 includes both required and optional information about the object. 93 """ 94 95 #: Relative path of the object. 96 key: str 97 #: The size of the object in bytes. 98 content_length: int 99 #: The timestamp indicating when the object was last modified. 100 last_modified: datetime 101 type: str = "file" 102 #: The MIME type of the object. 103 content_type: Optional[str] = field(default=None) 104 #: The entity tag (ETag) of the object. 105 etag: Optional[str] = field(default=None) 106 #: The storage class of the object. 107 storage_class: Optional[str] = field(default=None) 108 109 metadata: Optional[dict[str, Any]] = field(default=None) 110
[docs] 111 @staticmethod 112 def from_dict(data: dict) -> "ObjectMetadata": 113 """ 114 Creates an ObjectMetadata instance from a dictionary (parsed from JSON). 115 """ 116 try: 117 last_modified = dateutil_parser(data["last_modified"]) 118 key = data.get("key") 119 if key is None: 120 raise ValueError("Missing required field: 'key'") 121 return ObjectMetadata( 122 key=key, 123 content_length=data["content_length"], 124 last_modified=last_modified, 125 type=data.get("type", "file"), # default to file 126 content_type=data.get("content_type"), 127 etag=data.get("etag"), 128 storage_class=data.get("storage_class"), 129 metadata=data.get("metadata"), 130 ) 131 except KeyError as e: 132 raise ValueError("Missing required field.") from e
133
[docs] 134 def to_dict(self) -> dict: 135 data = asdict(self) 136 data["last_modified"] = self.last_modified.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ") 137 return {k: v for k, v in data.items() if v is not None}
138 139
[docs] 140class CredentialsProvider(ABC): 141 """ 142 Abstract base class for providing credentials to access a storage provider. 143 """ 144
[docs] 145 @abstractmethod 146 def get_credentials(self) -> Credentials: 147 """ 148 Retrieves the current credentials. 149 150 :return: The current credentials used for authentication. 151 """ 152 pass
153
[docs] 154 @abstractmethod 155 def refresh_credentials(self) -> None: 156 """ 157 Refreshes the credentials if they are expired or about to expire. 158 """ 159 pass
160 161
[docs] 162@dataclass 163class Range: 164 """ 165 A data class that represents a byte range for read operations. 166 """ 167 168 #: The start offset in bytes. 169 offset: int 170 #: The number of bytes to read. 171 size: int
172 173
[docs] 174class StorageProvider(ABC): 175 """ 176 Abstract base class for interacting with a storage provider. 177 """ 178
[docs] 179 @abstractmethod 180 def put_object( 181 self, 182 path: str, 183 body: bytes, 184 if_match: Optional[str] = None, 185 if_none_match: Optional[str] = None, 186 attributes: Optional[dict[str, str]] = None, 187 ) -> None: 188 """ 189 Uploads an object to the storage provider. 190 191 :param path: The path where the object will be stored. 192 :param body: The content of the object to store. 193 :param if_match: Optional If-Match value for conditional upload. 194 :param if_none_match: Optional If-None-Match value for conditional upload. 195 :param attributes: The attributes to add to the file. 196 """ 197 pass
198
[docs] 199 @abstractmethod 200 def get_object(self, path: str, byte_range: Optional[Range] = None) -> bytes: 201 """ 202 Retrieves an object from the storage provider. 203 204 :param path: The path where the object is stored. 205 :param byte_range: Optional byte range (offset, length) to read. 206 :return: The content of the retrieved object. 207 """ 208 pass
209
[docs] 210 @abstractmethod 211 def copy_object(self, src_path: str, dest_path: str) -> None: 212 """ 213 Copies an object from source to destination in the storage provider. 214 215 :param src_path: The path of the source object to copy. 216 :param dest_path: The path of the destination. 217 """ 218 pass
219
[docs] 220 @abstractmethod 221 def delete_object(self, path: str, if_match: Optional[str] = None) -> None: 222 """ 223 Deletes an object from the storage provider. 224 225 :param path: The path of the object to delete. 226 :param if_match: Optional if-match value to use for conditional deletion. 227 """ 228 pass
229
[docs] 230 @abstractmethod 231 def delete_objects(self, paths: list[str]) -> None: 232 """ 233 Deletes multiple objects from the storage provider. 234 235 :param paths: A list of paths of objects to delete. 236 """ 237 pass
238
[docs] 239 @abstractmethod 240 def get_object_metadata(self, path: str, strict: bool = True) -> ObjectMetadata: 241 """ 242 Retrieves metadata or information about an object stored in the provider. 243 244 :param path: The path of the object. 245 :param strict: When ``True``, performs additional validation to determine whether the path refers to a directory. 246 247 :return: A metadata object containing the information about the object. 248 """ 249 pass
250
[docs] 251 @abstractmethod 252 def list_objects( 253 self, 254 path: str, 255 start_after: Optional[str] = None, 256 end_at: Optional[str] = None, 257 include_directories: bool = False, 258 attribute_filter_expression: Optional[str] = None, 259 show_attributes: bool = False, 260 follow_symlinks: bool = True, 261 ) -> Iterator[ObjectMetadata]: 262 """ 263 Lists objects in the storage provider under the specified path. 264 265 :param path: The path to list objects under. The path must be a valid file or subdirectory path, cannot be partial or just "prefix". 266 :param start_after: The key to start after (i.e. exclusive). An object with this key doesn't have to exist. 267 :param end_at: The key to end at (i.e. inclusive). An object with this key doesn't have to exist. 268 :param include_directories: Whether to include directories in the result. When ``True``, directories are returned alongside objects. 269 :param attribute_filter_expression: The attribute filter expression to apply to the result. 270 :param show_attributes: Whether to return attributes in the result. There will be performance impact if this is True as now we need to get object metadata for each object. 271 :param follow_symlinks: Whether to follow symbolic links. Only applicable for POSIX file storage providers. 272 273 :return: An iterator over objects metadata under the specified path. 274 """ 275 pass
276
[docs] 277 @abstractmethod 278 def list_objects_recursive( 279 self, 280 path: str = "", 281 start_after: Optional[str] = None, 282 end_at: Optional[str] = None, 283 max_workers: int = 32, 284 look_ahead: int = 2, 285 follow_symlinks: bool = True, 286 ) -> Iterator[ObjectMetadata]: 287 """ 288 Lists files recursively in the storage provider under the specified path. 289 290 :param path: The path to list objects under. 291 :param start_after: The key to start after (i.e. exclusive). An object with this key doesn't have to exist. 292 :param end_at: The key to end at (i.e. inclusive). An object with this key doesn't have to exist. 293 :param max_workers: Maximum concurrent workers for provider-level recursive listing. 294 :param look_ahead: Prefixes to buffer per worker for provider-level recursive listing. 295 :param follow_symlinks: Whether to follow symbolic links. Only applicable for POSIX file storage providers. 296 :return: An iterator over object metadata under the specified path. 297 """ 298 pass
299
[docs] 300 @abstractmethod 301 def upload_file(self, remote_path: str, f: Union[str, IO], attributes: Optional[dict[str, str]] = None) -> None: 302 """ 303 Uploads a file from the local file system to the storage provider. 304 305 :param remote_path: The path where the object will be stored. 306 :param f: The source file to upload. This can either be a string representing the local 307 file path, or a file-like object (e.g., an open file handle). 308 :param attributes: The attributes to add to the file if a new file is created. 309 """ 310 pass
311
[docs] 312 @abstractmethod 313 def download_file(self, remote_path: str, f: Union[str, IO], metadata: Optional[ObjectMetadata] = None) -> None: 314 """ 315 Downloads a file from the storage provider to the local file system. 316 317 :param remote_path: The path of the file to download. 318 :param f: The destination for the downloaded file. This can either be a string representing 319 the local file path where the file will be saved, or a file-like object to write the 320 downloaded content into. 321 :param metadata: Metadata about the object to download. 322 """ 323 pass
324
[docs] 325 @abstractmethod 326 def glob(self, pattern: str, attribute_filter_expression: Optional[str] = None) -> list[str]: 327 """ 328 Matches and retrieves a list of object keys in the storage provider that match the specified pattern. 329 330 :param pattern: The pattern to match object keys against, supporting wildcards (e.g., ``*.txt``). 331 :param attribute_filter_expression: The attribute filter expression to apply to the result. 332 333 :return: A list of object keys that match the specified pattern. 334 """ 335 pass
336
[docs] 337 @abstractmethod 338 def is_file(self, path: str) -> bool: 339 """ 340 Checks whether the specified key in the storage provider points to a file (as opposed to a folder or directory). 341 342 :param path: The path to check. 343 344 :return: ``True`` if the key points to a file, ``False`` if it points to a directory or folder. 345 """ 346 pass
347
[docs] 348 def generate_presigned_url( 349 self, 350 path: str, 351 *, 352 method: str = "GET", 353 signer_type: Optional[SignerType] = None, 354 signer_options: Optional[dict[str, Any]] = None, 355 ) -> str: 356 """ 357 Generate a pre-signed URL granting temporary access to the object at *path*. 358 359 :param path: The object path within the storage provider. 360 :param method: The HTTP method the URL should authorise (e.g. ``"GET"``, ``"PUT"``). 361 :param signer_type: The signing backend to use. ``None`` means the provider's native signer. 362 :param signer_options: Backend-specific options forwarded to the signer. 363 :return: A pre-signed URL string. 364 :raises NotImplementedError: If this storage provider does not support presigned URLs. 365 """ 366 raise NotImplementedError(f"{type(self).__name__} does not support presigned URL generation.")
367 368
[docs] 369class ResolvedPathState(str, Enum): 370 """ 371 Enum representing the state of a resolved path. 372 """ 373 374 EXISTS = "exists" # File currently exists 375 DELETED = "deleted" # File existed before but has been deleted 376 UNTRACKED = "untracked" # File never existed or was never tracked
377 378
[docs] 379class ResolvedPath(NamedTuple): 380 """ 381 Result of resolving a virtual path to a physical path. 382 383 :param physical_path: The physical path in storage backend 384 :param state: The state of the path (EXISTS, DELETED, or UNTRACKED) 385 :param profile: Optional profile name for routing in CompositeStorageClient. 386 None means use current client's storage provider. 387 String means route to named child StorageClient. 388 389 State meanings: 390 - EXISTS: File currently exists in metadata 391 - DELETED: File existed before but has been deleted (soft delete) 392 - UNTRACKED: File never existed or was never tracked 393 """ 394 395 physical_path: str 396 state: ResolvedPathState 397 profile: Optional[str] = None 398 399 @property 400 def exists(self) -> bool: 401 """Backward compatibility property: True if state is EXISTS.""" 402 return self.state == ResolvedPathState.EXISTS
403 404
[docs] 405class MetadataProvider(ABC): 406 """ 407 Abstract base class for accessing file metadata. 408 """ 409
[docs] 410 @abstractmethod 411 def list_objects( 412 self, 413 path: str, 414 start_after: Optional[str] = None, 415 end_at: Optional[str] = None, 416 include_directories: bool = False, 417 attribute_filter_expression: Optional[str] = None, 418 show_attributes: bool = False, 419 ) -> Iterator[ObjectMetadata]: 420 """ 421 Lists objects in the metadata provider under the specified path. 422 423 :param path: The path to list objects under. The path must be a valid file or subdirectory path, cannot be partial or just "prefix". 424 :param start_after: The key to start after (i.e. exclusive). An object with this key doesn't have to exist. 425 :param end_at: The key to end at (i.e. inclusive). An object with this key doesn't have to exist. 426 :param include_directories: Whether to include directories in the result. When ``True``, directories are returned alongside objects. 427 :param attribute_filter_expression: The attribute filter expression to apply to the result. 428 :param show_attributes: Whether to return attributes in the result. Depending on implementation, there may be a performance impact if this is set to ``True``. 429 430 :return: An iterator over object metadata under the specified path. 431 """ 432 pass
433
[docs] 434 @abstractmethod 435 def get_object_metadata(self, path: str, include_pending: bool = False) -> ObjectMetadata: 436 """ 437 Retrieves metadata or information about an object or directory stored in the provider. 438 439 If the path does not match a file, implementations should check whether the path 440 represents a valid directory (i.e. files exist under the path prefix) and return 441 directory metadata accordingly. 442 443 :param path: The path of the object or directory. 444 :param include_pending: Whether to include metadata that is not yet committed. 445 446 :return: A metadata object containing the information about the object or directory. 447 :raises FileNotFoundError: If no object or directory exists at the specified path. 448 """ 449 pass
450
[docs] 451 @abstractmethod 452 def glob(self, pattern: str, attribute_filter_expression: Optional[str] = None) -> list[str]: 453 """ 454 Matches and retrieves a list of object keys in the storage provider that match the specified pattern. 455 456 :param pattern: The pattern to match object keys against, supporting wildcards (e.g., ``*.txt``). 457 :param attribute_filter_expression: The attribute filter expression to apply to the result. 458 459 :return: A list of object keys that match the specified pattern. 460 """ 461 pass
462
[docs] 463 @abstractmethod 464 def realpath(self, logical_path: str) -> ResolvedPath: 465 """ 466 Resolves a logical path to its physical storage path. 467 468 This method checks if the object exists in the committed state and returns 469 the appropriate physical path with the current state of the path. 470 471 :param logical_path: The user-facing logical path 472 473 :return: ResolvedPath with physical_path and state: 474 - ResolvedPathState.EXISTS: File currently exists 475 - ResolvedPathState.UNTRACKED: File never existed 476 - ResolvedPathState.DELETED: File was deleted 477 If state is EXISTS, physical_path is the committed storage path. 478 Otherwise, physical_path is typically the logical_path as fallback. 479 """ 480 pass
481
[docs] 482 @abstractmethod 483 def generate_physical_path(self, logical_path: str, for_overwrite: bool = False) -> ResolvedPath: 484 """ 485 Generates a physical storage path for writing a new or overwritten object. 486 487 This method is used for write operations to determine where the object should 488 be physically stored. Implementations can use this to: 489 - Generate UUID-based paths for deduplication 490 - Create versioned paths (file-v1.txt, file-v2.txt) for time travel 491 - Implement path rewriting strategies 492 493 :param logical_path: The user-facing logical path 494 :param for_overwrite: When ``True``, indicates the path is for overwriting an existing object. 495 Implementations may generate unique paths for overwrites to support versioning. 496 497 :return: ResolvedPath with physical_path for writing. The exists flag indicates 498 whether the logical path currently exists in committed state (for overwrite scenarios). 499 """ 500 pass
501
[docs] 502 @abstractmethod 503 def add_file(self, path: str, metadata: ObjectMetadata) -> None: 504 """ 505 Add a file to be tracked by the :py:class:`MetadataProvider`. Does not have to be 506 reflected in listing until a :py:meth:`MetadataProvider.commit_updates` forces a persist. 507 This function must tolerate duplicate calls (idempotent behavior). 508 509 :param path: User-supplied virtual path 510 :param metadata: physical file metadata from StorageProvider 511 """ 512 pass
513
[docs] 514 @abstractmethod 515 def remove_file(self, path: str) -> None: 516 """ 517 Remove a file tracked by the :py:class:`MetadataProvider`. Does not have to be 518 reflected in listing until a :py:meth:`MetadataProvider.commit_updates` forces a persist. 519 This function must tolerate duplicate calls (idempotent behavior). 520 521 :param path: User-supplied virtual path 522 """ 523 pass
524
[docs] 525 @abstractmethod 526 def commit_updates(self) -> None: 527 """ 528 Commit any newly adding files, used in conjunction with :py:meth:`MetadataProvider.add_file`. 529 :py:class:`MetadataProvider` will persistently record any metadata changes. 530 """ 531 pass
532
[docs] 533 @abstractmethod 534 def is_writable(self) -> bool: 535 """ 536 Returns ``True`` if the :py:class:`MetadataProvider` supports writes else ``False``. 537 """ 538 pass
539
[docs] 540 @abstractmethod 541 def allow_overwrites(self) -> bool: 542 """ 543 Returns ``True`` if the :py:class:`MetadataProvider` allows overwriting existing files else ``False``. 544 When ``True``, :py:meth:`add_file` will not raise an error if the file already exists. 545 """ 546 pass
547
[docs] 548 @abstractmethod 549 def should_use_soft_delete(self) -> bool: 550 """ 551 Returns ``True`` if the :py:class:`MetadataProvider` should use soft-delete behavior else ``False``. 552 553 When ``True``, delete operations will only mark files as deleted in metadata without removing 554 the physical data from storage. The file will return :py:class:`ResolvedPathState.DELETED` state 555 when queried and will not appear in listings. 556 557 When ``False``, delete operations will remove both the metadata and the physical file from storage 558 (hard delete). 559 """ 560 pass
561 562
[docs] 563@dataclass 564class StorageProviderConfig: 565 """ 566 A data class that represents the configuration needed to initialize a storage provider. 567 """ 568 569 #: The name or type of the storage provider (e.g., ``s3``, ``gcs``, ``oci``, ``azure``). 570 type: str 571 #: Additional options required to configure the storage provider (e.g., endpoint URLs, region, etc.). 572 options: Optional[dict[str, Any]] = None
573 574
[docs] 575@dataclass 576class StorageBackend: 577 """ 578 Represents configuration for a single storage backend. 579 """ 580 581 storage_provider_config: StorageProviderConfig 582 credentials_provider: Optional[CredentialsProvider] = None 583 replicas: list["Replica"] = field(default_factory=list)
584 585
[docs] 586class ProviderBundle(ABC): 587 """ 588 Abstract base class that serves as a container for various providers (storage, credentials, and metadata) 589 that interact with a storage service. The :py:class:`ProviderBundle` abstracts access to these providers, allowing for 590 flexible implementations of cloud storage solutions. 591 """ 592 593 @property 594 @abstractmethod 595 def storage_provider_config(self) -> StorageProviderConfig: 596 """ 597 :return: The configuration for the storage provider, which includes the provider 598 name/type and additional options. 599 """ 600 pass 601 602 @property 603 @abstractmethod 604 def credentials_provider(self) -> Optional[CredentialsProvider]: 605 """ 606 :return: The credentials provider responsible for managing authentication credentials 607 required to access the storage service. 608 """ 609 pass 610 611 @property 612 @abstractmethod 613 def metadata_provider(self) -> Optional[MetadataProvider]: 614 """ 615 :return: The metadata provider responsible for retrieving metadata about objects in the storage service. 616 """ 617 pass 618 619 @property 620 @abstractmethod 621 def replicas(self) -> list["Replica"]: 622 """ 623 :return: The replicas configuration for this provider bundle, if any. 624 """ 625 pass
626 627
[docs] 628class ProviderBundleV2(ABC): 629 """ 630 Abstract base class that serves as a container for various providers (storage, credentials, and metadata) 631 that interact with one or multiple storage service. The :py:class:`ProviderBundleV2` abstracts access to these providers, allowing for 632 flexible implementations of cloud storage solutions. 633 634 """ 635 636 @property 637 @abstractmethod 638 def storage_backends(self) -> dict[str, StorageBackend]: 639 """ 640 :return: Mapping of storage backend name -> StorageBackend. Must have at least one backend. 641 """ 642 pass 643 644 @property 645 @abstractmethod 646 def metadata_provider(self) -> Optional[MetadataProvider]: 647 """ 648 :return: The metadata provider responsible for retrieving metadata about objects in the storage service. If there are multiple backends, this is required. 649 """ 650 pass
651 652
[docs] 653@dataclass 654class RetryConfig: 655 """ 656 A data class that represents the configuration for retry strategy. 657 """ 658 659 #: The number of attempts before giving up. Must be at least 1. 660 attempts: int = DEFAULT_RETRY_ATTEMPTS 661 #: The base delay (in seconds) for exponential backoff. Must be a non-negative value. 662 delay: float = DEFAULT_RETRY_DELAY 663 #: The backoff multiplier for exponential backoff. Must be at least 1.0. 664 backoff_multiplier: float = DEFAULT_RETRY_BACKOFF_MULTIPLIER 665 666 def __post_init__(self) -> None: 667 if self.attempts < 1: 668 raise ValueError("Attempts must be at least 1.") 669 if self.delay < 0: 670 raise ValueError("Delay must be a non-negative number.") 671 if self.backoff_multiplier < 1.0: 672 raise ValueError("Backoff multiplier must be at least 1.0.")
673 674
[docs] 675class RetryableError(Exception): 676 """ 677 Exception raised for errors that should trigger a retry. 678 """ 679 680 pass
681 682
[docs] 683class PreconditionFailedError(Exception): 684 """ 685 Exception raised when a precondition fails. e.g. if-match, if-none-match, etc. 686 """ 687 688 pass
689 690
[docs] 691class NotModifiedError(Exception): 692 """ 693 Raised when a conditional operation fails because the resource has not been modified. 694 695 This typically occurs when using if-none-match with a specific generation/etag 696 and the resource's current generation/etag matches the specified one. 697 """ 698 699 pass
700 701
[docs] 702class SourceVersionCheckMode(Enum): 703 """ 704 Enum for controlling source version checking behavior. 705 """ 706 707 INHERIT = "inherit" # Inherit from configuration (cache config) 708 ENABLE = "enable" # Always check source version 709 DISABLE = "disable" # Never check source version
710 711
[docs] 712@dataclass 713class Replica: 714 """ 715 A tier of storage that can be used to store data. 716 """ 717 718 replica_profile: str 719 read_priority: int
720 721
[docs] 722class AutoCommitConfig: 723 """ 724 A data class that represents the configuration for auto commit. 725 """ 726 727 interval_minutes: Optional[float] # The interval in minutes for auto commit. 728 at_exit: bool = False # if True, commit on program exit 729 730 def __init__(self, interval_minutes: Optional[float] = None, at_exit: bool = False) -> None: 731 self.interval_minutes = interval_minutes 732 self.at_exit = at_exit
733 734
[docs] 735class ExecutionMode(Enum): 736 """ 737 Enum for controlling execution mode in sync operations. 738 """ 739 740 LOCAL = "local" 741 RAY = "ray"
742 743
[docs] 744class PatternType(Enum): 745 """ 746 Type of pattern operation for include/exclude filtering. 747 """ 748 749 INCLUDE = "include" 750 EXCLUDE = "exclude"
751 752 753# Type alias for pattern matching 754PatternList = list[Tuple[PatternType, str]] 755 756
[docs] 757@dataclass 758class DryrunResult: 759 """ 760 Holds references to JSONL files produced by a dryrun sync operation. 761 762 Each file contains one JSON object per line, matching the :py:class:`ObjectMetadata` 763 serialization format (see :py:meth:`ObjectMetadata.to_dict` / :py:meth:`ObjectMetadata.from_dict`). 764 765 The caller is responsible for cleaning up the files when they are no longer needed. 766 """ 767 768 #: Path to a JSONL file listing source objects that would be added to the target. 769 files_to_add: str 770 #: Path to a JSONL file listing target objects that would be deleted. 771 files_to_delete: str
772 773
[docs] 774@dataclass 775class SyncResult: 776 """ 777 A data class that represents the summary of a sync operation. 778 """ 779 780 #: The total number of work units tracked for progress (including files from both source and target after filtering). Each work unit represents an ADD or DELETE operation. 781 total_work_units: int = 0 782 #: The total number of files processed to the target. 783 total_files_added: int = 0 784 #: The total number of files deleted from the target. 785 total_files_deleted: int = 0 786 #: The total number of bytes transferred to the target. 787 total_bytes_added: int = 0 788 #: The total number of bytes deleted from the target. 789 total_bytes_deleted: int = 0 790 #: The total time taken to process the sync operation. 791 total_time_seconds: float = 0.0 792 #: Dryrun details with paths to JSONL files. ``None`` for normal (non-dryrun) sync operations. 793 dryrun: Optional[DryrunResult] = None 794 795 def __str__(self) -> str: 796 header = "Sync dryrun statistics:" if self.dryrun else "Sync statistics:" 797 lines = ( 798 f"{header}\n" 799 f" Work units: {self.total_work_units}\n" 800 f" Files added: {self.total_files_added}\n" 801 f" Files deleted: {self.total_files_deleted}\n" 802 f" Bytes added: {self.total_bytes_added}\n" 803 f" Bytes deleted: {self.total_bytes_deleted}\n" 804 f" Time elapsed: {self.total_time_seconds:.2f}s" 805 ) 806 if self.dryrun: 807 lines += f"\n Files to add: {self.dryrun.files_to_add}\n Files to delete: {self.dryrun.files_to_delete}" 808 return lines
809 810
[docs] 811class SyncError(RuntimeError): 812 """ 813 Exception raised when errors occur during a sync operation. 814 815 This exception includes the partial SyncResult showing what was accomplished 816 before the error occurred, allowing users to understand the state of the sync. 817 818 :param message: The error message describing what went wrong. 819 :param sync_result: The partial SyncResult with statistics from the failed sync operation. 820 """ 821 822 def __init__(self, message: str, sync_result: SyncResult): 823 super().__init__(message) 824 self.sync_result = sync_result 825 826 def __str__(self) -> str: 827 sync_stats = str(self.sync_result).replace("Sync statistics:", "Partial sync statistics:") 828 return f"{super().__str__()}\n\n{sync_stats}"