Source code for nv_ingest_api.internal.schemas.extract.extract_html_schema

# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import logging

from pydantic import ConfigDict, BaseModel

logger = logging.getLogger(__name__)


[docs] class HtmlExtractorSchema(BaseModel): """ Configuration schema for the Html extractor settings. Parameters ---------- max_queue_size : int, default=1 The maximum number of items allowed in the processing queue. n_workers : int, default=16 The number of worker threads to use for processing. raise_on_failure : bool, default=False A flag indicating whether to raise an exception on processing failure. """ max_queue_size: int = 1 n_workers: int = 16 raise_on_failure: bool = False model_config = ConfigDict(extra="forbid")