Genai Perf Client#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1#! /usr/bin/env bash
 2
 3genai-perf profile \
 4    -m TinyLlama-1.1B-Chat-v1.0 \
 5    --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
 6    --endpoint-type chat \
 7    --random-seed 123 \
 8    --synthetic-input-tokens-mean 128 \
 9    --synthetic-input-tokens-stddev 0 \
10    --output-tokens-mean 128 \
11    --output-tokens-stddev 0 \
12    --request-count 100 \
13    --request-rate 10 \
14    --profile-export-file my_profile_export.json \
15    --url localhost:8000 \
16    --streaming