Genai Perf Client#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1#! /usr/bin/env bash
2
3genai-perf profile \
4 -m TinyLlama-1.1B-Chat-v1.0 \
5 --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
6 --service-kind openai \
7 --endpoint-type chat \
8 --random-seed 123 \
9 --synthetic-input-tokens-mean 128 \
10 --synthetic-input-tokens-stddev 0 \
11 --output-tokens-mean 128 \
12 --output-tokens-stddev 0 \
13 --request-count 100 \
14 --request-rate 10 \
15 --profile-export-file my_profile_export.json \
16 --url localhost:8000 \
17 --streaming