Source code for tensorrt_llm.scheduling_params

from dataclasses import dataclass
from typing import List, Optional, Tuple

AgentHierarchy = List[Tuple[str, int]]


[docs] @dataclass(slots=True, kw_only=True) class SchedulingParams: """Schedule parameters. Args: attention_dp_rank (int): The rank of target attention dp attention_dp_relax (bool): Whether to allow the request to be scheduled to other attention dp for better throughput. Defaults to True. agent_hierarchy (AgentHierarchy): Path of (agent_type, node_id) tuples identifying this request's position in an agent execution tree. Used by the batch scheduler for hierarchy-aware scheduling. """ attention_dp_rank: Optional[int] = None attention_dp_relax: bool = True agent_hierarchy: Optional[AgentHierarchy] = None