Source code for nv_ingest_api.util.logging.sanitize

# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from typing import Any, Mapping, MutableMapping, Sequence, Set

try:
    # Pydantic is optional at runtime for this helper; import if available
    from pydantic import BaseModel  # type: ignore
except Exception:  # pragma: no cover - pydantic always present in this repo
    BaseModel = None  # type: ignore


_DEFAULT_SENSITIVE_KEYS: Set[str] = {
    "access_token",
    "api_key",
    "authorization",
    "auth_token",
    "client_secret",
    "hf_access_token",
    "hugging_face_access_token",
    "password",
    "refresh_token",
    "secret",
    "ssl_cert",
    "x-api-key",
}

_REDACTION = "***REDACTED***"


def _is_mapping(obj: Any) -> bool:
    try:
        return isinstance(obj, Mapping)
    except Exception:
        return False


def _is_sequence(obj: Any) -> bool:
    # Exclude strings/bytes from sequences we want to traverse
    return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray))


[docs] def sanitize_for_logging( data: Any, sensitive_keys: Set[str] | None = None, redaction: str = _REDACTION, ) -> Any: """ Recursively sanitize common secret fields from dicts, lists, tuples, and Pydantic models. - Key comparison is case-insensitive and matches exact keys only. - Does not mutate input; returns a sanitized deep copy. - For Pydantic BaseModel instances, uses model_dump() before redaction. """ keys = {k.lower() for k in (sensitive_keys or _DEFAULT_SENSITIVE_KEYS)} # Handle Pydantic models without importing pydantic at module import time if BaseModel is not None and isinstance(data, BaseModel): # type: ignore[arg-type] try: return sanitize_for_logging(data.model_dump(), keys, redaction) except Exception: # Fall through and try generic handling below pass # Dict-like if _is_mapping(data): out: MutableMapping[str, Any] = type(data)() # preserve mapping type where possible for k, v in data.items(): # type: ignore[assignment] key_lower = str(k).lower() if key_lower in keys: out[k] = redaction else: out[k] = sanitize_for_logging(v, keys, redaction) return out # List/Tuple/Sequence if _is_sequence(data): return type(data)(sanitize_for_logging(v, keys, redaction) for v in data) # Fallback: return as-is return data