Deepseek R1 Reasoning Parser#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1#! /usr/bin/env bash
2
3trtllm-serve \
4 deepseek-ai/DeepSeek-R1 \
5 --host localhost --port 8000 \
6 --backend pytorch \
7 --max_batch_size 161 --max_num_tokens 1160 \
8 --tp_size 8 --ep_size 8 --pp_size 1 \
9 --kv_cache_free_gpu_memory_fraction 0.95 \
10 --extra_llm_api_options ./extra-llm-api-config.yml \
11 --reasoning_parser deepseek-r1