1# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import importlib.util
17import io
18import os
19import tempfile
20from collections.abc import Callable, Iterator
21from typing import IO, Any, Optional, TypeVar, Union
22
23from huggingface_hub import CommitOperationCopy, HfApi
24from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError, RevisionNotFoundError
25from huggingface_hub.hf_api import RepoFile, RepoFolder
26
27from ..telemetry import Telemetry
28from ..types import AWARE_DATETIME_MIN, Credentials, CredentialsProvider, ObjectMetadata, Range, RetryableError
29from ..utils import safe_makedirs
30from .base import BaseStorageProvider
31
32_T = TypeVar("_T")
33
34PROVIDER = "huggingface"
35
36HF_TRANSFER_UNAVAILABLE_ERROR_MESSAGE = (
37 "Fast transfer using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) "
38 "but 'hf_transfer' package is not available in your environment. "
39 "Either install hf_transfer with 'pip install hf_transfer' or "
40 "disable it by setting HF_HUB_ENABLE_HF_TRANSFER=0"
41)
42
43
[docs]
44class HuggingFaceCredentialsProvider(CredentialsProvider):
45 """
46 A concrete implementation of the :py:class:`multistorageclient.types.CredentialsProvider` that provides HuggingFace credentials.
47 """
48
49 def __init__(self, access_token: str):
50 """
51 Initializes the :py:class:`HuggingFaceCredentialsProvider` with the provided access token.
52
53 :param access_token: The HuggingFace access token for authentication.
54 """
55 self.token = access_token
56
[docs]
57 def get_credentials(self) -> Credentials:
58 """
59 Retrieves the current HuggingFace credentials.
60
61 :return: The current credentials used for HuggingFace authentication.
62 """
63 return Credentials(
64 access_key="",
65 secret_key="",
66 token=self.token,
67 expiration=None,
68 )
69
[docs]
70 def refresh_credentials(self) -> None:
71 """
72 Refreshes the credentials if they are expired or about to expire.
73
74 Note: HuggingFace tokens typically don't expire, so this is a no-op.
75 """
76 pass
77
78
[docs]
79class HuggingFaceStorageProvider(BaseStorageProvider):
80 """
81 A concrete implementation of the :py:class:`multistorageclient.types.StorageProvider` for interacting with HuggingFace Hub repositories.
82 """
83
84 def __init__(
85 self,
86 repository_id: str,
87 repo_type: str = "model",
88 base_path: str = "",
89 repo_revision: str = "main",
90 credentials_provider: Optional[CredentialsProvider] = None,
91 config_dict: Optional[dict[str, Any]] = None,
92 telemetry_provider: Optional[Callable[[], Telemetry]] = None,
93 ):
94 """
95 Initializes the :py:class:`HuggingFaceStorageProvider` with repository information and optional credentials provider.
96
97 :param repository_id: The HuggingFace repository ID (e.g., 'username/repo-name').
98 :param repo_type: The type of repository ('dataset', 'model', 'space'). Defaults to 'model'.
99 :param base_path: The root prefix path within the repository where all operations will be scoped.
100 :param repo_revision: The git revision (branch, tag, or commit) to use. Defaults to 'main'.
101 :param credentials_provider: The provider to retrieve HuggingFace credentials.
102 :param config_dict: Resolved MSC config.
103 :param telemetry_provider: A function that provides a telemetry instance.
104 """
105
106 # Validate repo_type
107 allowed_repo_types = {"dataset", "model", "space"}
108 if repo_type not in allowed_repo_types:
109 raise ValueError(f"Invalid repo_type '{repo_type}'. Must be one of: {allowed_repo_types}")
110
111 # Validate repository_id format
112 if not repository_id or "/" not in repository_id:
113 raise ValueError(f"Invalid repository_id '{repository_id}'. Expected format: 'username/repo-name'")
114
115 self._validate_hf_transfer_availability()
116
117 super().__init__(
118 base_path=base_path,
119 provider_name=PROVIDER,
120 config_dict=config_dict,
121 telemetry_provider=telemetry_provider,
122 )
123
124 self._repository_id = repository_id
125 self._repo_type = repo_type
126 self._repo_revision = repo_revision
127 self._credentials_provider = credentials_provider
128
129 self._hf_client: HfApi = self._create_hf_api_client()
130
131 def _create_hf_api_client(self) -> HfApi:
132 """
133 Creates and configures the HuggingFace API client.
134
135 Initializes the HfApi client with authentication token if credentials are provided,
136 otherwise creates an unauthenticated client for public repositories.
137
138 :return: Configured HfApi client instance.
139 """
140
141 token = None
142 if self._credentials_provider:
143 creds = self._credentials_provider.get_credentials()
144 token = creds.token
145
146 return HfApi(token=token)
147
148 def _validate_hf_transfer_availability(self) -> None:
149 """
150 Validates that hf_transfer is available if it's enabled via environment variables.
151
152 Raises:
153 ValueError: If hf_transfer is enabled but not available.
154 """
155 # Check if hf_transfer is enabled via environment variable
156 hf_transfer_enabled = os.environ.get("HF_HUB_ENABLE_HF_TRANSFER", "").lower() in ("1", "on", "true", "yes")
157
158 if hf_transfer_enabled and importlib.util.find_spec("hf_transfer") is None:
159 raise ValueError(HF_TRANSFER_UNAVAILABLE_ERROR_MESSAGE)
160
161 def _parse_rate_limit_headers(self, response) -> str:
162 """
163 Parses HuggingFace rate limit headers and returns formatted information.
164
165 HuggingFace returns rate limit information in these headers:
166 - RateLimit: "api";r=0;t=142
167 - r = requests remaining in the current window
168 - t = seconds until rate limit resets
169 - RateLimit-Policy: "fixed window";"api";q=10000;w=300
170 - q = total requests allowed per window
171 - w = window size in seconds
172
173 Reference: https://huggingface.co/docs/hub/rate-limits
174
175 :param response: The HTTP response object containing rate limit headers.
176 :return: Formatted string with rate limit information, or empty string if headers not found.
177 """
178
179 try:
180 headers = response.headers
181 except Exception:
182 return ""
183
184 rate_limit_info = []
185
186 # Note: HTTP headers are case-insensitive, but we use the canonical casing from HF docs
187 if "RateLimit" in headers:
188 rate_limit = headers["RateLimit"]
189 # Extract r (remaining) and t (time until reset)
190 remaining = None
191 reset_seconds = None
192
193 parts = rate_limit.split(";")
194 for part in parts:
195 part = part.strip()
196 if part.startswith("r="):
197 try:
198 remaining = int(part[2:])
199 except ValueError:
200 pass
201 elif part.startswith("t="):
202 try:
203 reset_seconds = int(part[2:])
204 except ValueError:
205 pass
206
207 if remaining is not None:
208 rate_limit_info.append(f"Requests remaining in current window: {remaining}")
209 if reset_seconds is not None:
210 rate_limit_info.append(f"Rate limit resets in: {reset_seconds} seconds")
211
212 if "RateLimit-Policy" in headers:
213 policy = headers["RateLimit-Policy"]
214 # Extract q (quota) and w (window size)
215 quota = None
216 window_seconds = None
217
218 parts = policy.split(";")
219 for part in parts:
220 part = part.strip()
221 if part.startswith("q="):
222 try:
223 quota = int(part[2:])
224 except ValueError:
225 pass
226 elif part.startswith("w="):
227 try:
228 window_seconds = int(part[2:])
229 except ValueError:
230 pass
231
232 if quota is not None and window_seconds is not None:
233 window_minutes = window_seconds / 60
234 rate_limit_info.append(f"Rate limit policy: {quota} requests per {window_minutes:.0f}-minute window")
235
236 if rate_limit_info:
237 return " | ".join(rate_limit_info)
238
239 return ""
240
241 def _translate_errors(
242 self,
243 func: Callable[[], _T],
244 operation: str,
245 repo_id: str,
246 path: str,
247 ) -> _T:
248 """
249 Translates HuggingFace errors into standardized exceptions with retry logic.
250
251 Parses HuggingFace rate limit headers (RateLimit and RateLimit-Policy) to provide
252 detailed information about rate limiting to users. See https://huggingface.co/docs/hub/rate-limits
253
254 :param func: The function that performs the actual HuggingFace operation.
255 :param operation: The type of operation being performed (e.g., "upload", "download", "delete").
256 :param repo_id: The HuggingFace repository ID.
257 :param path: The path of the object within the repository.
258 :return: The result of the HuggingFace operation.
259 :raises RetryableError: For transient errors that can be retried (429, 503, connection errors).
260 :raises FileNotFoundError: When the requested resource is not found.
261 :raises RuntimeError: For other non-retryable errors.
262 """
263 try:
264 return func()
265 except RepositoryNotFoundError as error:
266 raise FileNotFoundError(
267 f"Repository not found or access denied: {repo_id}. "
268 f"Verify the repository exists and you have access permissions."
269 ) from error
270 except RevisionNotFoundError as error:
271 raise FileNotFoundError(
272 f"Revision '{self._repo_revision}' not found in repository {repo_id}. "
273 f"Verify the branch, tag, or commit exists."
274 ) from error
275 except EntryNotFoundError as error:
276 raise FileNotFoundError(f"File not found in HuggingFace repository: {path}") from error
277 except FileNotFoundError:
278 raise
279 except HfHubHTTPError as error:
280 # Extract status code and parse rate limit headers
281 # Don't use hasattr() - it's unreliable with response objects
282 status_code = None
283 response = None
284
285 try:
286 response = error.response
287 if response is not None:
288 status_code = response.status_code
289 except AttributeError:
290 pass
291
292 rate_limit_info = self._parse_rate_limit_headers(response)
293 quota_suffix = f" | {rate_limit_info}" if rate_limit_info else ""
294
295 error_info = f"repo_id: {repo_id}, path: {path}, status_code: {status_code}, error: {error}"
296
297 if status_code == 404:
298 raise FileNotFoundError(f"Object {repo_id}/{path} does not exist. {error_info}") from error
299 elif status_code == 409:
300 raise RetryableError(f"Conflict Error for {repo_id}. {error_info}{quota_suffix}") from error
301 elif status_code == 429:
302 base_message = f"Rate limit exceeded when {operation} object(s) at {repo_id}/{path}. {error_info}"
303 raise RetryableError(f"{base_message}{quota_suffix}") from error
304 elif status_code == 503:
305 raise RetryableError(
306 f"Service unavailable when {operation} object(s) at {repo_id}/{path}. {error_info}{quota_suffix}"
307 ) from error
308 elif status_code in (408, 500, 502, 504):
309 raise RetryableError(
310 f"Transient error ({status_code}) when {operation} object(s) at {repo_id}/{path}. {error_info}{quota_suffix}"
311 ) from error
312 else:
313 raise RuntimeError(
314 f"HuggingFace API error during {operation} of {path}: {error}{quota_suffix}"
315 ) from error
316 except (ConnectionError, TimeoutError, OSError) as error:
317 raise RetryableError(
318 f"Connection error when {operation} object(s) at {repo_id}/{path}, error type: {type(error).__name__}"
319 ) from error
320 except Exception as error:
321 raise RuntimeError(f"Unexpected error during {operation} of {path}: {error}") from error
322
323 def _put_object(
324 self,
325 path: str,
326 body: bytes,
327 if_match: Optional[str] = None,
328 if_none_match: Optional[str] = None,
329 attributes: Optional[dict[str, str]] = None,
330 ) -> int:
331 """
332 Uploads an object to the HuggingFace repository.
333
334 :param path: The path where the object will be stored in the repository.
335 :param body: The content of the object to store.
336 :param if_match: Optional ETag for conditional uploads (not supported by HuggingFace).
337 :param if_none_match: Optional ETag for conditional uploads (not supported by HuggingFace).
338 :param attributes: Optional attributes for the object (not supported by HuggingFace).
339 :return: Data size in bytes.
340 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
341 :raises ValueError: If client attempts to create a directory.
342 :raises ValueError: If conditional upload parameters are provided (not supported).
343 """
344 if not self._hf_client:
345 raise RuntimeError("HuggingFace client not initialized")
346
347 if if_match is not None or if_none_match is not None:
348 raise ValueError(
349 "HuggingFace provider does not support conditional uploads. "
350 "if_match and if_none_match parameters are not supported."
351 )
352
353 if attributes is not None:
354 raise ValueError(
355 "HuggingFace provider does not support custom object attributes. "
356 "Use commit messages or repository metadata instead."
357 )
358
359 if path.endswith("/"):
360 raise ValueError(
361 "HuggingFace Storage Provider does not support explicit directory creation. "
362 "Directories are created implicitly when files are uploaded to paths within them."
363 )
364
365 path = self._normalize_path(path)
366
367 def _invoke_api():
368 with tempfile.NamedTemporaryFile(delete=False) as temp_file:
369 temp_file.write(body)
370 temp_file_path = temp_file.name
371
372 try:
373 self._hf_client.upload_file(
374 path_or_fileobj=temp_file_path,
375 path_in_repo=path,
376 repo_id=self._repository_id,
377 repo_type=self._repo_type,
378 revision=self._repo_revision,
379 commit_message=f"Upload {path}",
380 commit_description=None,
381 create_pr=False,
382 )
383
384 return len(body)
385
386 finally:
387 os.unlink(temp_file_path)
388
389 return self._translate_errors(_invoke_api, "PUT", self._repository_id, path)
390
391 def _get_object(self, path: str, byte_range: Optional[Range] = None) -> bytes:
392 """
393 Retrieves an object from the HuggingFace repository.
394
395 :param path: The path of the object to retrieve from the repository.
396 :param byte_range: Optional byte range for partial content (not supported by HuggingFace).
397 :return: The content of the retrieved object.
398 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
399 :raises ValueError: If a byte range is requested (HuggingFace doesn't support range reads).
400 :raises FileNotFoundError: If the file doesn't exist in the repository.
401 """
402
403 if not self._hf_client:
404 raise RuntimeError("HuggingFace client not initialized")
405
406 if byte_range is not None:
407 raise ValueError(
408 "HuggingFace provider does not support partial range reads. "
409 f"Requested range: offset={byte_range.offset}, size={byte_range.size}. "
410 "To read the entire file, call get_object() without the byte_range parameter."
411 )
412
413 path = self._normalize_path(path)
414
415 def _invoke_api():
416 with tempfile.TemporaryDirectory() as temp_dir:
417 downloaded_path = self._hf_client.hf_hub_download(
418 repo_id=self._repository_id,
419 filename=path,
420 repo_type=self._repo_type,
421 revision=self._repo_revision,
422 local_dir=temp_dir,
423 )
424
425 with open(downloaded_path, "rb") as f:
426 data = f.read()
427
428 return data
429
430 return self._translate_errors(_invoke_api, "GET", self._repository_id, path)
431
432 def _copy_object(self, src_path: str, dest_path: str) -> int:
433 """
434 Copies an object within the HuggingFace repository using server-side copy.
435
436 .. note::
437 Copy behavior is size-dependent: files ≥10MB are copied remotely via
438 metadata (LFS), while files <10MB are downloaded and re-uploaded.
439
440 :param src_path: The source path of the object to copy.
441 :param dest_path: The destination path for the copied object.
442 :return: Data size in bytes.
443 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
444 :raises FileNotFoundError: If the source file doesn't exist.
445 """
446 if not self._hf_client:
447 raise RuntimeError("HuggingFace client not initialized")
448
449 src_path = self._normalize_path(src_path)
450 dest_path = self._normalize_path(dest_path)
451
452 src_object = self._get_object_metadata(src_path)
453
454 def _invoke_api():
455 operations = [
456 CommitOperationCopy(
457 src_path_in_repo=src_path,
458 path_in_repo=dest_path,
459 )
460 ]
461
462 self._hf_client.create_commit(
463 repo_id=self._repository_id,
464 operations=operations,
465 commit_message=f"Copy {src_path} to {dest_path}",
466 repo_type=self._repo_type,
467 revision=self._repo_revision,
468 )
469
470 return src_object.content_length
471
472 return self._translate_errors(_invoke_api, "COPY", self._repository_id, f"{src_path} to {dest_path}")
473
474 def _delete_object(self, path: str, if_match: Optional[str] = None) -> None:
475 """
476 Deletes an object from the HuggingFace repository.
477
478 :param path: The path of the object to delete from the repository.
479 :param if_match: Optional ETag for conditional deletion (not supported by HuggingFace).
480 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
481 :raises ValueError: If conditional deletion parameters are provided (not supported).
482 :raises FileNotFoundError: If the file doesn't exist in the repository.
483 """
484 if not self._hf_client:
485 raise RuntimeError("HuggingFace client not initialized")
486
487 if if_match is not None:
488 raise ValueError(
489 "HuggingFace provider does not support conditional deletion. if_match parameter is not supported."
490 )
491
492 path = self._normalize_path(path)
493
494 def _invoke_api():
495 self._hf_client.delete_file(
496 path_in_repo=path,
497 repo_id=self._repository_id,
498 repo_type=self._repo_type,
499 revision=self._repo_revision,
500 commit_message=f"Delete {path}",
501 )
502
503 self._translate_errors(_invoke_api, "DELETE", self._repository_id, path)
504
505 def _item_to_metadata(self, item: Union[RepoFile, RepoFolder]) -> ObjectMetadata:
506 """
507 Convert a RepoFile or RepoFolder into ObjectMetadata.
508
509 :param item: The RepoFile or RepoFolder item from HuggingFace API.
510 :return: ObjectMetadata representing the item.
511 """
512 last_modified = AWARE_DATETIME_MIN
513
514 if isinstance(item, RepoFile):
515 etag = item.blob_id
516 return ObjectMetadata(
517 key=item.path,
518 type="file",
519 content_length=item.size,
520 last_modified=last_modified,
521 etag=etag,
522 content_type=None,
523 storage_class=None,
524 metadata=None,
525 )
526 else:
527 etag = item.tree_id
528 return ObjectMetadata(
529 key=item.path,
530 type="directory",
531 content_length=0,
532 last_modified=last_modified,
533 etag=etag,
534 content_type=None,
535 storage_class=None,
536 metadata=None,
537 )
538
539 def _get_object_metadata(self, path: str, strict: bool = True) -> ObjectMetadata:
540 """
541 Retrieves metadata for an object in the HuggingFace repository.
542
543 :param path: The path of the object to get metadata for.
544 :param strict: Whether to raise an error if the object doesn't exist.
545 :return: Metadata about the object.
546 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
547 :raises FileNotFoundError: If the file doesn't exist and strict=True.
548 """
549 if not self._hf_client:
550 raise RuntimeError("HuggingFace client not initialized")
551
552 path = self._normalize_path(path)
553
554 def _invoke_api():
555 items = self._hf_client.get_paths_info(
556 repo_id=self._repository_id,
557 paths=[path],
558 repo_type=self._repo_type,
559 revision=self._repo_revision,
560 expand=True,
561 )
562
563 if not items:
564 raise FileNotFoundError(f"File not found in HuggingFace repository: {path}")
565
566 item = items[0]
567 return self._item_to_metadata(item)
568
569 try:
570 return self._translate_errors(_invoke_api, "HEAD", self._repository_id, path)
571 except FileNotFoundError as error:
572 if strict:
573 dir_path = path.rstrip("/") + "/"
574 if self._is_dir(dir_path):
575 return ObjectMetadata(
576 key=dir_path,
577 type="directory",
578 content_length=0,
579 last_modified=AWARE_DATETIME_MIN,
580 etag=None,
581 content_type=None,
582 storage_class=None,
583 metadata=None,
584 )
585 raise error
586
587 def _list_objects(
588 self,
589 path: str,
590 start_after: Optional[str] = None,
591 end_at: Optional[str] = None,
592 include_directories: bool = False,
593 follow_symlinks: bool = True,
594 ) -> Iterator[ObjectMetadata]:
595 """
596 Lists objects in the HuggingFace repository under the specified path.
597
598 :param path: The path to list objects under.
599 :param start_after: The key to start listing after (exclusive, used as cursor).
600 :param end_at: The key to end listing at (inclusive, used as cursor).
601 :param include_directories: Whether to include directories in the listing.
602 :return: An iterator over object metadata for objects under the specified path.
603 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
604
605 .. note::
606 HuggingFace Hub API does not natively support pagination parameters.
607 This implementation fetches all items and uses cursor-based filtering,
608 which may impact performance for large repositories. The ordering is
609 directory-first, then files, with lexicographical ordering within each group.
610 """
611 if not self._hf_client:
612 raise RuntimeError("HuggingFace client not initialized")
613
614 path = self._normalize_path(path)
615
616 try:
617 metadata = self._get_object_metadata(path.rstrip("/"), strict=False)
618 if metadata and metadata.type == "file":
619 yield metadata
620 return
621 except FileNotFoundError:
622 pass
623
624 def _invoke_api():
625 dir_path = path.rstrip("/")
626
627 repo_items = self._hf_client.list_repo_tree(
628 repo_id=self._repository_id,
629 path_in_repo=dir_path + "/" if dir_path else None,
630 repo_type=self._repo_type,
631 revision=self._repo_revision,
632 expand=True,
633 recursive=not include_directories,
634 )
635
636 return list(repo_items)
637
638 try:
639 items = self._translate_errors(_invoke_api, "LIST", self._repository_id, path)
640
641 # Use cursor-based pagination because HuggingFace returns items with
642 # directory-first ordering (not pure lexicographical).
643 seen_start = start_after is None
644 seen_end = False
645
646 for item in items:
647 if seen_end:
648 break
649
650 metadata = self._item_to_metadata(item)
651 key = metadata.key
652
653 if not seen_start:
654 if key == start_after:
655 seen_start = True
656 continue
657
658 should_yield = False
659 if include_directories and isinstance(item, RepoFolder):
660 should_yield = True
661 elif isinstance(item, RepoFile):
662 should_yield = True
663
664 if should_yield:
665 yield metadata
666
667 if end_at is not None and key == end_at:
668 seen_end = True
669
670 except FileNotFoundError:
671 # Directory doesn't exist - return empty (matches POSIX behavior)
672 pass
673
674 def _upload_file(self, remote_path: str, f: Union[str, IO], attributes: Optional[dict[str, str]] = None) -> int:
675 """
676 Uploads a file to the HuggingFace repository.
677
678 :param remote_path: The remote path where the file will be stored in the repository.
679 :param f: File path or file object to upload.
680 :param attributes: Optional attributes for the file (not supported by HuggingFace).
681 :return: Data size in bytes.
682 :raises RuntimeError: If HuggingFace client is not initialized or API errors occur.
683 :raises ValueError: If client attempts to create a directory.
684 :raises ValueError: If custom attributes are provided (not supported).
685 """
686 if not self._hf_client:
687 raise RuntimeError("HuggingFace client not initialized")
688
689 if attributes is not None:
690 raise ValueError(
691 "HuggingFace provider does not support custom file attributes. "
692 "Use commit messages or repository metadata instead."
693 )
694
695 if remote_path.endswith("/"):
696 raise ValueError(
697 "HuggingFace Storage Provider does not support explicit directory creation. "
698 "Directories are created implicitly when files are uploaded to paths within them."
699 )
700
701 remote_path = self._normalize_path(remote_path)
702
703 def _invoke_api():
704 if isinstance(f, str):
705 file_size = os.path.getsize(f)
706
707 self._hf_client.upload_file(
708 path_or_fileobj=f,
709 path_in_repo=remote_path,
710 repo_id=self._repository_id,
711 repo_type=self._repo_type,
712 revision=self._repo_revision,
713 commit_message=f"Upload {remote_path}",
714 commit_description=None,
715 create_pr=False,
716 )
717
718 return file_size
719
720 else:
721 content = f.read()
722
723 if isinstance(content, str):
724 content_bytes = content.encode("utf-8")
725 else:
726 content_bytes = content
727
728 # Create temporary file since HfAPI.upload_file requires BinaryIO, not generic IO
729 with tempfile.NamedTemporaryFile(delete=False) as temp_file:
730 temp_file.write(content_bytes)
731 temp_file_path = temp_file.name
732
733 try:
734 self._hf_client.upload_file(
735 path_or_fileobj=temp_file_path,
736 path_in_repo=remote_path,
737 repo_id=self._repository_id,
738 repo_type=self._repo_type,
739 revision=self._repo_revision,
740 commit_message=f"Upload {remote_path}",
741 create_pr=False,
742 )
743
744 return len(content_bytes)
745
746 finally:
747 os.unlink(temp_file_path)
748
749 return self._translate_errors(_invoke_api, "PUT", self._repository_id, remote_path)
750
751 def _download_file(self, remote_path: str, f: Union[str, IO], metadata: Optional[ObjectMetadata] = None) -> int:
752 """
753 Downloads a file from the HuggingFace repository.
754
755 :param remote_path: The remote path of the file to download from the repository.
756 :param f: Local file path or file object to write to.
757 :param metadata: Optional object metadata (not used in this implementation).
758 :return: Data size in bytes.
759 """
760 if not self._hf_client:
761 raise RuntimeError("HuggingFace client not initialized")
762
763 remote_path = self._normalize_path(remote_path)
764
765 def _invoke_api():
766 if isinstance(f, str):
767 parent_dir = os.path.dirname(f)
768 if parent_dir:
769 safe_makedirs(parent_dir)
770
771 target_dir = parent_dir if parent_dir else "."
772 downloaded_path = self._hf_client.hf_hub_download(
773 repo_id=self._repository_id,
774 filename=remote_path,
775 repo_type=self._repo_type,
776 revision=self._repo_revision,
777 local_dir=target_dir,
778 )
779
780 if os.path.abspath(downloaded_path) != os.path.abspath(f):
781 os.rename(downloaded_path, f)
782
783 return os.path.getsize(f)
784
785 else:
786 with tempfile.TemporaryDirectory() as temp_dir:
787 downloaded_path = self._hf_client.hf_hub_download(
788 repo_id=self._repository_id,
789 filename=remote_path,
790 repo_type=self._repo_type,
791 revision=self._repo_revision,
792 local_dir=temp_dir,
793 )
794
795 with open(downloaded_path, "rb") as src:
796 data = src.read()
797 if isinstance(f, io.TextIOBase):
798 f.write(data.decode("utf-8"))
799 else:
800 f.write(data)
801
802 return len(data)
803
804 return self._translate_errors(_invoke_api, "GET", self._repository_id, remote_path)
805
806 def _is_dir(self, path: str) -> bool:
807 """
808 Helper method to check if a path is a directory.
809
810 :param path: The path to check.
811 :return: True if the path appears to be a directory (has files under it).
812 """
813 path = path.rstrip("/")
814 if not path:
815 # The root of the repo is always a directory
816 return True
817
818 try:
819 path_info = self._hf_client.get_paths_info(
820 repo_id=self._repository_id,
821 paths=[path],
822 repo_type=self._repo_type,
823 revision=self._repo_revision,
824 )
825
826 if not path_info:
827 return False
828
829 return isinstance(path_info[0], RepoFolder)
830
831 except RepositoryNotFoundError as e:
832 raise FileNotFoundError(
833 f"Repository not found or access denied: {self._repository_id}. "
834 f"Verify the repository exists and you have access permissions."
835 ) from e
836 except RevisionNotFoundError as e:
837 raise FileNotFoundError(
838 f"Revision '{self._repo_revision}' not found in repository {self._repository_id}. "
839 f"Verify the branch, tag, or commit exists."
840 ) from e
841 except IndexError:
842 return False
843 except Exception as e:
844 raise Exception(f"Unexpected error: {e}")
845
846 def _normalize_path(self, path: str) -> str:
847 """
848 Normalize path for HuggingFace API by removing leading slashes.
849 HuggingFace expects relative paths within repositories.
850 """
851 return path.lstrip("/")