1# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import importlib.util
17import io
18import os
19import tempfile
20from collections.abc import Callable, Iterator
21from typing import IO, Any, Optional, TypeVar, Union
22
23from huggingface_hub import CommitOperationCopy, HfApi
24from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError, RevisionNotFoundError
25from huggingface_hub.hf_api import RepoFile, RepoFolder
26
27from ..telemetry import Telemetry
28from ..types import (
29 AWARE_DATETIME_MIN,
30 Credentials,
31 CredentialsProvider,
32 ObjectMetadata,
33 Range,
34 RetryableError,
35 SymlinkHandling,
36)
37from ..utils import safe_makedirs
38from .base import BaseStorageProvider
39
40_T = TypeVar("_T")
41
42PROVIDER = "huggingface"
43
44HF_TRANSFER_UNAVAILABLE_ERROR_MESSAGE = (
45 "Fast transfer using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) "
46 "but 'hf_transfer' package is not available in your environment. "
47 "Either install hf_transfer with 'pip install hf_transfer' or "
48 "disable it by setting HF_HUB_ENABLE_HF_TRANSFER=0"
49)
50
51
[docs]
52class HuggingFaceCredentialsProvider(CredentialsProvider):
53 """
54 A concrete implementation of the :py:class:`multistorageclient.types.CredentialsProvider` that provides HuggingFace credentials.
55 """
56
57 def __init__(self, access_token: str):
58 """
59 Initializes the :py:class:`HuggingFaceCredentialsProvider` with the provided access token.
60
61 :param access_token: The HuggingFace access token for authentication.
62 """
63 self.token = access_token
64
[docs]
65 def get_credentials(self) -> Credentials:
66 """
67 Retrieves the current HuggingFace credentials.
68
69 :return: The current credentials used for HuggingFace authentication.
70 """
71 return Credentials(
72 access_key="",
73 secret_key="",
74 token=self.token,
75 expiration=None,
76 )
77
[docs]
78 def refresh_credentials(self) -> None:
79 """
80 Refreshes the credentials if they are expired or about to expire.
81
82 Note: HuggingFace tokens typically don't expire, so this is a no-op.
83 """
84 pass
85
86
[docs]
87class HuggingFaceStorageProvider(BaseStorageProvider):
88 """
89 A concrete implementation of the :py:class:`multistorageclient.types.StorageProvider` for interacting with HuggingFace Hub repositories.
90 """
91
92 def __init__(
93 self,
94 repository_id: str,
95 repo_type: str = "model",
96 base_path: str = "",
97 repo_revision: str = "main",
98 credentials_provider: Optional[CredentialsProvider] = None,
99 config_dict: Optional[dict[str, Any]] = None,
100 telemetry_provider: Optional[Callable[[], Telemetry]] = None,
101 ):
102 """
103 Initializes the :py:class:`HuggingFaceStorageProvider` with repository information and optional credentials provider.
104
105 :param repository_id: The HuggingFace repository ID (e.g., 'username/repo-name').
106 :param repo_type: The type of repository ('dataset', 'model', 'space'). Defaults to 'model'.
107 :param base_path: The root prefix path within the repository where all operations will be scoped.
108 :param repo_revision: The git revision (branch, tag, or commit) to use. Defaults to 'main'.
109 :param credentials_provider: The provider to retrieve HuggingFace credentials.
110 :param config_dict: Resolved MSC config.
111 :param telemetry_provider: A function that provides a telemetry instance.
112 """
113
114 # Validate repo_type
115 allowed_repo_types = {"dataset", "model", "space"}
116 if repo_type not in allowed_repo_types:
117 raise ValueError(f"Invalid repo_type '{repo_type}'. Must be one of: {allowed_repo_types}")
118
119 # Validate repository_id format
120 if not repository_id or "/" not in repository_id:
121 raise ValueError(f"Invalid repository_id '{repository_id}'. Expected format: 'username/repo-name'")
122
123 self._validate_hf_transfer_availability()
124
125 super().__init__(
126 base_path=base_path,
127 provider_name=PROVIDER,
128 config_dict=config_dict,
129 telemetry_provider=telemetry_provider,
130 )
131
132 self._repository_id = repository_id
133 self._repo_type = repo_type
134 self._repo_revision = repo_revision
135 self._credentials_provider = credentials_provider
136
137 self._hf_client: HfApi = self._create_hf_api_client()
138
139 def _create_hf_api_client(self) -> HfApi:
140 """
141 Creates and configures the HuggingFace API client.
142
143 Initializes the HfApi client with authentication token if credentials are provided,
144 otherwise creates an unauthenticated client for public repositories.
145
146 :return: Configured HfApi client instance.
147 """
148
149 token = None
150 if self._credentials_provider:
151 creds = self._credentials_provider.get_credentials()
152 token = creds.token
153
154 return HfApi(token=token)
155
156 def _validate_hf_transfer_availability(self) -> None:
157 """
158 Validates that hf_transfer is available if it's enabled via environment variables.
159
160 Raises:
161 ValueError: If hf_transfer is enabled but not available.
162 """
163 # Check if hf_transfer is enabled via environment variable
164 hf_transfer_enabled = os.environ.get("HF_HUB_ENABLE_HF_TRANSFER", "").lower() in ("1", "on", "true", "yes")
165
166 if hf_transfer_enabled and importlib.util.find_spec("hf_transfer") is None:
167 raise ValueError(HF_TRANSFER_UNAVAILABLE_ERROR_MESSAGE)
168
169 def _parse_rate_limit_headers(self, response) -> str:
170 """
171 Parses HuggingFace rate limit headers and returns formatted information.
172
173 HuggingFace returns rate limit information in these headers:
174 - RateLimit: "api";r=0;t=142
175 - r = requests remaining in the current window
176 - t = seconds until rate limit resets
177 - RateLimit-Policy: "fixed window";"api";q=10000;w=300
178 - q = total requests allowed per window
179 - w = window size in seconds
180
181 Reference: https://huggingface.co/docs/hub/rate-limits
182
183 :param response: The HTTP response object containing rate limit headers.
184 :return: Formatted string with rate limit information, or empty string if headers not found.
185 """
186
187 try:
188 headers = response.headers
189 except Exception:
190 return ""
191
192 rate_limit_info = []
193
194 # Note: HTTP headers are case-insensitive, but we use the canonical casing from HF docs
195 if "RateLimit" in headers:
196 rate_limit = headers["RateLimit"]
197 # Extract r (remaining) and t (time until reset)
198 remaining = None
199 reset_seconds = None
200
201 parts = rate_limit.split(";")
202 for part in parts:
203 part = part.strip()
204 if part.startswith("r="):
205 try:
206 remaining = int(part[2:])
207 except ValueError:
208 pass
209 elif part.startswith("t="):
210 try:
211 reset_seconds = int(part[2:])
212 except ValueError:
213 pass
214
215 if remaining is not None:
216 rate_limit_info.append(f"Requests remaining in current window: {remaining}")
217 if reset_seconds is not None:
218 rate_limit_info.append(f"Rate limit resets in: {reset_seconds} seconds")
219
220 if "RateLimit-Policy" in headers:
221 policy = headers["RateLimit-Policy"]
222 # Extract q (quota) and w (window size)
223 quota = None
224 window_seconds = None
225
226 parts = policy.split(";")
227 for part in parts:
228 part = part.strip()
229 if part.startswith("q="):
230 try:
231 quota = int(part[2:])
232 except ValueError:
233 pass
234 elif part.startswith("w="):
235 try:
236 window_seconds = int(part[2:])
237 except ValueError:
238 pass
239
240 if quota is not None and window_seconds is not None:
241 window_minutes = window_seconds / 60
242 rate_limit_info.append(f"Rate limit policy: {quota} requests per {window_minutes:.0f}-minute window")
243
244 if rate_limit_info:
245 return " | ".join(rate_limit_info)
246
247 return ""
248
249 def _translate_errors(
250 self,
251 func: Callable[[], _T],
252 operation: str,
253 repo_id: str,
254 path: str,
255 ) -> _T:
256 """
257 Translates HuggingFace errors into standardized exceptions with retry logic.
258
259 Parses HuggingFace rate limit headers (RateLimit and RateLimit-Policy) to provide
260 detailed information about rate limiting to users. See https://huggingface.co/docs/hub/rate-limits
261
262 :param func: The function that performs the actual HuggingFace operation.
263 :param operation: The type of operation being performed (e.g., "upload", "download", "delete").
264 :param repo_id: The HuggingFace repository ID.
265 :param path: The path of the object within the repository.
266 :return: The result of the HuggingFace operation.
267 :raises RetryableError: For transient errors that can be retried (429, 503, connection errors).
268 :raises FileNotFoundError: When the requested resource is not found.
269 :raises RuntimeError: For other non-retryable errors.
270 """
271 try:
272 return func()
273 except RepositoryNotFoundError as error:
274 raise FileNotFoundError(
275 f"Repository not found or access denied: {repo_id}. "
276 f"Verify the repository exists and you have access permissions."
277 ) from error
278 except RevisionNotFoundError as error:
279 raise FileNotFoundError(
280 f"Revision '{self._repo_revision}' not found in repository {repo_id}. "
281 f"Verify the branch, tag, or commit exists."
282 ) from error
283 except EntryNotFoundError as error:
284 raise FileNotFoundError(f"File not found in HuggingFace repository: {path}") from error
285 except FileNotFoundError:
286 raise
287 except HfHubHTTPError as error:
288 # Extract status code and parse rate limit headers
289 # Don't use hasattr() - it's unreliable with response objects
290 status_code = None
291 response = None
292
293 try:
294 response = error.response
295 if response is not None:
296 status_code = response.status_code
297 except AttributeError:
298 pass
299
300 rate_limit_info = self._parse_rate_limit_headers(response)
301 quota_suffix = f" | {rate_limit_info}" if rate_limit_info else ""
302
303 error_info = f"repo_id: {repo_id}, path: {path}, status_code: {status_code}, error: {error}"
304
305 if status_code == 404:
306 raise FileNotFoundError(f"Object {repo_id}/{path} does not exist. {error_info}") from error
307 elif status_code == 409:
308 raise RetryableError(f"Conflict Error for {repo_id}. {error_info}{quota_suffix}") from error
309 elif status_code == 429:
310 base_message = f"Rate limit exceeded when {operation} object(s) at {repo_id}/{path}. {error_info}"
311 raise RetryableError(f"{base_message}{quota_suffix}") from error
312 elif status_code == 503:
313 raise RetryableError(
314 f"Service unavailable when {operation} object(s) at {repo_id}/{path}. {error_info}{quota_suffix}"
315 ) from error
316 elif status_code in (408, 500, 502, 504):
317 raise RetryableError(
318 f"Transient error ({status_code}) when {operation} object(s) at {repo_id}/{path}. {error_info}{quota_suffix}"
319 ) from error
320 else:
321 raise RuntimeError(
322 f"HuggingFace API error during {operation} of {path}: {error}{quota_suffix}"
323 ) from error
324 except (ConnectionError, TimeoutError, OSError) as error:
325 raise RetryableError(
326 f"Connection error when {operation} object(s) at {repo_id}/{path}, error type: {type(error).__name__}"
327 ) from error
328 except Exception as error:
329 raise RuntimeError(f"Unexpected error during {operation} of {path}: {error}") from error
330
331 def _put_object(
332 self,
333 path: str,
334 body: bytes,
335 if_match: Optional[str] = None,
336 if_none_match: Optional[str] = None,
337 attributes: Optional[dict[str, str]] = None,
338 ) -> int:
339 """
340 Uploads an object to the HuggingFace repository.
341
342 :param path: The path where the object will be stored in the repository.
343 :param body: The content of the object to store.
344 :param if_match: Optional ETag for conditional uploads (not supported by HuggingFace).
345 :param if_none_match: Optional ETag for conditional uploads (not supported by HuggingFace).
346 :param attributes: Optional attributes for the object (not supported by HuggingFace).
347 :return: Data size in bytes.
348 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
349 :raises ValueError: If client attempts to create a directory.
350 :raises ValueError: If conditional upload parameters are provided (not supported).
351 """
352 if not self._hf_client:
353 raise RuntimeError("HuggingFace client not initialized")
354
355 if if_match is not None or if_none_match is not None:
356 raise ValueError(
357 "HuggingFace provider does not support conditional uploads. "
358 "if_match and if_none_match parameters are not supported."
359 )
360
361 if attributes is not None:
362 raise ValueError(
363 "HuggingFace provider does not support custom object attributes. "
364 "Use commit messages or repository metadata instead."
365 )
366
367 if path.endswith("/"):
368 raise ValueError(
369 "HuggingFace Storage Provider does not support explicit directory creation. "
370 "Directories are created implicitly when files are uploaded to paths within them."
371 )
372
373 path = self._normalize_path(path)
374
375 def _invoke_api():
376 with tempfile.NamedTemporaryFile(delete=False) as temp_file:
377 temp_file.write(body)
378 temp_file_path = temp_file.name
379
380 try:
381 self._hf_client.upload_file(
382 path_or_fileobj=temp_file_path,
383 path_in_repo=path,
384 repo_id=self._repository_id,
385 repo_type=self._repo_type,
386 revision=self._repo_revision,
387 commit_message=f"Upload {path}",
388 commit_description=None,
389 create_pr=False,
390 )
391
392 return len(body)
393
394 finally:
395 os.unlink(temp_file_path)
396
397 return self._translate_errors(_invoke_api, "PUT", self._repository_id, path)
398
399 def _get_object(self, path: str, byte_range: Optional[Range] = None) -> bytes:
400 """
401 Retrieves an object from the HuggingFace repository.
402
403 :param path: The path of the object to retrieve from the repository.
404 :param byte_range: Optional byte range for partial content (not supported by HuggingFace).
405 :return: The content of the retrieved object.
406 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
407 :raises ValueError: If a byte range is requested (HuggingFace doesn't support range reads).
408 :raises FileNotFoundError: If the file doesn't exist in the repository.
409 """
410
411 if not self._hf_client:
412 raise RuntimeError("HuggingFace client not initialized")
413
414 if byte_range is not None:
415 raise ValueError(
416 "HuggingFace provider does not support partial range reads. "
417 f"Requested range: offset={byte_range.offset}, size={byte_range.size}. "
418 "To read the entire file, call get_object() without the byte_range parameter."
419 )
420
421 path = self._normalize_path(path)
422
423 def _invoke_api():
424 with tempfile.TemporaryDirectory() as temp_dir:
425 downloaded_path = self._hf_client.hf_hub_download(
426 repo_id=self._repository_id,
427 filename=path,
428 repo_type=self._repo_type,
429 revision=self._repo_revision,
430 local_dir=temp_dir,
431 )
432
433 with open(downloaded_path, "rb") as f:
434 data = f.read()
435
436 return data
437
438 return self._translate_errors(_invoke_api, "GET", self._repository_id, path)
439
440 def _copy_object(self, src_path: str, dest_path: str) -> int:
441 """
442 Copies an object within the HuggingFace repository using server-side copy.
443
444 .. note::
445 Copy behavior is size-dependent: files ≥10MB are copied remotely via
446 metadata (LFS), while files <10MB are downloaded and re-uploaded.
447
448 :param src_path: The source path of the object to copy.
449 :param dest_path: The destination path for the copied object.
450 :return: Data size in bytes.
451 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
452 :raises FileNotFoundError: If the source file doesn't exist.
453 """
454 if not self._hf_client:
455 raise RuntimeError("HuggingFace client not initialized")
456
457 src_path = self._normalize_path(src_path)
458 dest_path = self._normalize_path(dest_path)
459
460 src_object = self._get_object_metadata(src_path)
461
462 def _invoke_api():
463 operations = [
464 CommitOperationCopy(
465 src_path_in_repo=src_path,
466 path_in_repo=dest_path,
467 )
468 ]
469
470 self._hf_client.create_commit(
471 repo_id=self._repository_id,
472 operations=operations,
473 commit_message=f"Copy {src_path} to {dest_path}",
474 repo_type=self._repo_type,
475 revision=self._repo_revision,
476 )
477
478 return src_object.content_length
479
480 return self._translate_errors(_invoke_api, "COPY", self._repository_id, f"{src_path} to {dest_path}")
481
482 def _delete_object(self, path: str, if_match: Optional[str] = None) -> None:
483 """
484 Deletes an object from the HuggingFace repository.
485
486 :param path: The path of the object to delete from the repository.
487 :param if_match: Optional ETag for conditional deletion (not supported by HuggingFace).
488 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
489 :raises ValueError: If conditional deletion parameters are provided (not supported).
490 :raises FileNotFoundError: If the file doesn't exist in the repository.
491 """
492 if not self._hf_client:
493 raise RuntimeError("HuggingFace client not initialized")
494
495 if if_match is not None:
496 raise ValueError(
497 "HuggingFace provider does not support conditional deletion. if_match parameter is not supported."
498 )
499
500 path = self._normalize_path(path)
501
502 def _invoke_api():
503 self._hf_client.delete_file(
504 path_in_repo=path,
505 repo_id=self._repository_id,
506 repo_type=self._repo_type,
507 revision=self._repo_revision,
508 commit_message=f"Delete {path}",
509 )
510
511 self._translate_errors(_invoke_api, "DELETE", self._repository_id, path)
512
513 def _item_to_metadata(self, item: Union[RepoFile, RepoFolder]) -> ObjectMetadata:
514 """
515 Convert a RepoFile or RepoFolder into ObjectMetadata.
516
517 :param item: The RepoFile or RepoFolder item from HuggingFace API.
518 :return: ObjectMetadata representing the item.
519 """
520 last_modified = AWARE_DATETIME_MIN
521
522 if isinstance(item, RepoFile):
523 etag = item.blob_id
524 return ObjectMetadata(
525 key=item.path,
526 type="file",
527 content_length=item.size,
528 last_modified=last_modified,
529 etag=etag,
530 content_type=None,
531 storage_class=None,
532 metadata=None,
533 )
534 else:
535 etag = item.tree_id
536 return ObjectMetadata(
537 key=item.path,
538 type="directory",
539 content_length=0,
540 last_modified=last_modified,
541 etag=etag,
542 content_type=None,
543 storage_class=None,
544 metadata=None,
545 )
546
547 def _make_symlink(self, path: str, target: str) -> None:
548 """
549 Not supported. HuggingFace repositories are read-only through this provider.
550
551 :raises NotImplementedError: Always.
552 """
553 raise NotImplementedError("HuggingFace provider does not support symlink creation.")
554
555 def _get_object_metadata(self, path: str, strict: bool = True) -> ObjectMetadata:
556 """
557 Retrieves metadata for an object in the HuggingFace repository.
558
559 :param path: The path of the object to get metadata for.
560 :param strict: Whether to raise an error if the object doesn't exist.
561 :return: Metadata about the object.
562 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
563 :raises FileNotFoundError: If the file doesn't exist and strict=True.
564 """
565 if not self._hf_client:
566 raise RuntimeError("HuggingFace client not initialized")
567
568 path = self._normalize_path(path)
569
570 def _invoke_api():
571 items = self._hf_client.get_paths_info(
572 repo_id=self._repository_id,
573 paths=[path],
574 repo_type=self._repo_type,
575 revision=self._repo_revision,
576 expand=True,
577 )
578
579 if not items:
580 raise FileNotFoundError(f"File not found in HuggingFace repository: {path}")
581
582 item = items[0]
583 return self._item_to_metadata(item)
584
585 try:
586 return self._translate_errors(_invoke_api, "HEAD", self._repository_id, path)
587 except FileNotFoundError as error:
588 if strict:
589 dir_path = path.rstrip("/") + "/"
590 if self._is_dir(dir_path):
591 return ObjectMetadata(
592 key=dir_path,
593 type="directory",
594 content_length=0,
595 last_modified=AWARE_DATETIME_MIN,
596 etag=None,
597 content_type=None,
598 storage_class=None,
599 metadata=None,
600 )
601 raise error
602
603 def _list_objects(
604 self,
605 path: str,
606 start_after: Optional[str] = None,
607 end_at: Optional[str] = None,
608 include_directories: bool = False,
609 symlink_handling: SymlinkHandling = SymlinkHandling.FOLLOW,
610 ) -> Iterator[ObjectMetadata]:
611 """
612 Lists objects in the HuggingFace repository under the specified path.
613
614 :param path: The path to list objects under.
615 :param start_after: The key to start listing after (exclusive, used as cursor).
616 :param end_at: The key to end listing at (inclusive, used as cursor).
617 :param include_directories: Whether to include directories in the listing.
618 :return: An iterator over object metadata for objects under the specified path.
619 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
620
621 .. note::
622 HuggingFace Hub API does not natively support pagination parameters.
623 This implementation fetches all items and uses cursor-based filtering,
624 which may impact performance for large repositories. The ordering is
625 directory-first, then files, with lexicographical ordering within each group.
626 """
627 if not self._hf_client:
628 raise RuntimeError("HuggingFace client not initialized")
629
630 path = self._normalize_path(path)
631
632 try:
633 metadata = self._get_object_metadata(path.rstrip("/"), strict=False)
634 if metadata and metadata.type == "file":
635 yield metadata
636 return
637 except FileNotFoundError:
638 pass
639
640 def _invoke_api():
641 dir_path = path.rstrip("/")
642
643 repo_items = self._hf_client.list_repo_tree(
644 repo_id=self._repository_id,
645 path_in_repo=dir_path + "/" if dir_path else None,
646 repo_type=self._repo_type,
647 revision=self._repo_revision,
648 expand=True,
649 recursive=not include_directories,
650 )
651
652 return list(repo_items)
653
654 try:
655 items = self._translate_errors(_invoke_api, "LIST", self._repository_id, path)
656
657 # Use cursor-based pagination because HuggingFace returns items with
658 # directory-first ordering (not pure lexicographical).
659 seen_start = start_after is None
660 seen_end = False
661
662 for item in items:
663 if seen_end:
664 break
665
666 metadata = self._item_to_metadata(item)
667 key = metadata.key
668
669 if not seen_start:
670 if key == start_after:
671 seen_start = True
672 continue
673
674 should_yield = False
675 if include_directories and isinstance(item, RepoFolder):
676 should_yield = True
677 elif isinstance(item, RepoFile):
678 should_yield = True
679
680 if should_yield:
681 yield metadata
682
683 if end_at is not None and key == end_at:
684 seen_end = True
685
686 except FileNotFoundError:
687 # Directory doesn't exist - return empty (matches POSIX behavior)
688 pass
689
690 def _upload_file(self, remote_path: str, f: Union[str, IO], attributes: Optional[dict[str, str]] = None) -> int:
691 """
692 Uploads a file to the HuggingFace repository.
693
694 :param remote_path: The remote path where the file will be stored in the repository.
695 :param f: File path or file object to upload.
696 :param attributes: Optional attributes for the file (not supported by HuggingFace).
697 :return: Data size in bytes.
698 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
699 :raises ValueError: If client attempts to create a directory.
700 :raises ValueError: If custom attributes are provided (not supported).
701 """
702 if not self._hf_client:
703 raise RuntimeError("HuggingFace client not initialized")
704
705 if attributes is not None:
706 raise ValueError(
707 "HuggingFace provider does not support custom file attributes. "
708 "Use commit messages or repository metadata instead."
709 )
710
711 if remote_path.endswith("/"):
712 raise ValueError(
713 "HuggingFace Storage Provider does not support explicit directory creation. "
714 "Directories are created implicitly when files are uploaded to paths within them."
715 )
716
717 remote_path = self._normalize_path(remote_path)
718
719 def _invoke_api():
720 if isinstance(f, str):
721 file_size = os.path.getsize(f)
722
723 self._hf_client.upload_file(
724 path_or_fileobj=f,
725 path_in_repo=remote_path,
726 repo_id=self._repository_id,
727 repo_type=self._repo_type,
728 revision=self._repo_revision,
729 commit_message=f"Upload {remote_path}",
730 commit_description=None,
731 create_pr=False,
732 )
733
734 return file_size
735
736 else:
737 content = f.read()
738
739 if isinstance(content, str):
740 content_bytes = content.encode("utf-8")
741 else:
742 content_bytes = content
743
744 # Create temporary file since HfAPI.upload_file requires BinaryIO, not generic IO
745 with tempfile.NamedTemporaryFile(delete=False) as temp_file:
746 temp_file.write(content_bytes)
747 temp_file_path = temp_file.name
748
749 try:
750 self._hf_client.upload_file(
751 path_or_fileobj=temp_file_path,
752 path_in_repo=remote_path,
753 repo_id=self._repository_id,
754 repo_type=self._repo_type,
755 revision=self._repo_revision,
756 commit_message=f"Upload {remote_path}",
757 create_pr=False,
758 )
759
760 return len(content_bytes)
761
762 finally:
763 os.unlink(temp_file_path)
764
765 return self._translate_errors(_invoke_api, "PUT", self._repository_id, remote_path)
766
767 def _download_file(self, remote_path: str, f: Union[str, IO], metadata: Optional[ObjectMetadata] = None) -> int:
768 """
769 Downloads a file from the HuggingFace repository.
770
771 :param remote_path: The remote path of the file to download from the repository.
772 :param f: Local file path or file object to write to.
773 :param metadata: Optional object metadata (not used in this implementation).
774 :return: Data size in bytes.
775 """
776 if not self._hf_client:
777 raise RuntimeError("HuggingFace client not initialized")
778
779 remote_path = self._normalize_path(remote_path)
780
781 def _invoke_api():
782 if isinstance(f, str):
783 parent_dir = os.path.dirname(f)
784 if parent_dir:
785 safe_makedirs(parent_dir)
786
787 target_dir = parent_dir if parent_dir else "."
788 downloaded_path = self._hf_client.hf_hub_download(
789 repo_id=self._repository_id,
790 filename=remote_path,
791 repo_type=self._repo_type,
792 revision=self._repo_revision,
793 local_dir=target_dir,
794 )
795
796 if os.path.abspath(downloaded_path) != os.path.abspath(f):
797 os.rename(downloaded_path, f)
798
799 return os.path.getsize(f)
800
801 else:
802 with tempfile.TemporaryDirectory() as temp_dir:
803 downloaded_path = self._hf_client.hf_hub_download(
804 repo_id=self._repository_id,
805 filename=remote_path,
806 repo_type=self._repo_type,
807 revision=self._repo_revision,
808 local_dir=temp_dir,
809 )
810
811 with open(downloaded_path, "rb") as src:
812 data = src.read()
813 if isinstance(f, io.TextIOBase):
814 f.write(data.decode("utf-8"))
815 else:
816 f.write(data)
817
818 return len(data)
819
820 return self._translate_errors(_invoke_api, "GET", self._repository_id, remote_path)
821
822 def _is_dir(self, path: str) -> bool:
823 """
824 Helper method to check if a path is a directory.
825
826 :param path: The path to check.
827 :return: True if the path appears to be a directory (has files under it).
828 """
829 path = path.rstrip("/")
830 if not path:
831 # The root of the repo is always a directory
832 return True
833
834 try:
835 path_info = self._hf_client.get_paths_info(
836 repo_id=self._repository_id,
837 paths=[path],
838 repo_type=self._repo_type,
839 revision=self._repo_revision,
840 )
841
842 if not path_info:
843 return False
844
845 return isinstance(path_info[0], RepoFolder)
846
847 except RepositoryNotFoundError as e:
848 raise FileNotFoundError(
849 f"Repository not found or access denied: {self._repository_id}. "
850 f"Verify the repository exists and you have access permissions."
851 ) from e
852 except RevisionNotFoundError as e:
853 raise FileNotFoundError(
854 f"Revision '{self._repo_revision}' not found in repository {self._repository_id}. "
855 f"Verify the branch, tag, or commit exists."
856 ) from e
857 except IndexError:
858 return False
859 except Exception as e:
860 raise Exception(f"Unexpected error: {e}")
861
862 def _normalize_path(self, path: str) -> str:
863 """
864 Normalize path for HuggingFace API by removing leading slashes.
865 HuggingFace expects relative paths within repositories.
866 """
867 return path.lstrip("/")