Genai Perf Client#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1#! /usr/bin/env bash
 2
 3genai-perf profile \
 4    -m TinyLlama-1.1B-Chat-v1.0 \
 5    --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
 6    --service-kind openai \
 7    --endpoint-type chat \
 8    --random-seed 123 \
 9    --synthetic-input-tokens-mean 128 \
10    --synthetic-input-tokens-stddev 0 \
11    --output-tokens-mean 128 \
12    --output-tokens-stddev 0 \
13    --request-count 100 \
14    --request-rate 10 \
15    --profile-export-file my_profile_export.json \
16    --url localhost:8000 \
17    --streaming