Deepseek R1 Reasoning Parser#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1#! /usr/bin/env bash
 2
 3cat >./extra-llm-api-config.yml <<EOF
 4cuda_graph_config:
 5    enable_padding: true
 6    max_batch_size: 512
 7enable_attention_dp: true
 8kv_cache_config:
 9    dtype: fp8
10    free_gpu_memory_fraction: 0.8
11stream_interval: 10
12moe_config:
13    backend: DEEPGEMM
14EOF
15
16trtllm-serve \
17    deepseek-ai/DeepSeek-R1 \
18    --host localhost --port 8000 \
19    --trust_remote_code \
20    --max_batch_size 1024 --max_num_tokens 8192 \
21    --tp_size 8 --ep_size 8 --pp_size 1 \
22    --extra_llm_api_options ./extra-llm-api-config.yml \
23    --reasoning_parser deepseek-r1