Llm Mgmn Trtllm Bench#

Source NVIDIA/TensorRT-LLM.

 1#!/bin/bash
 2#SBATCH -A <account>
 3#SBATCH -p <partition>
 4#SBATCH -t 01:00:00
 5#SBATCH -N 2
 6#SBATCH --ntasks-per-node=8
 7#SBATCH -o logs/trtllm-bench.out
 8#SBATCH -e logs/trtllm-bench.err
 9#SBATCH -J trtllm-bench
10
11### Run trtllm-bench with pytorch backend on Slurm
12
13# NOTE, this feature is experimental and may not work on all systems.
14# The trtllm-llmapi-launch is a script that launches the LLM-API code on
15# Slurm-like systems, and can support multi-node and multi-GPU setups.
16
17# Note that, the number of MPI processes should be the same as the model world
18# size. e.g. For tensor_parallel_size=16, you may use 2 nodes with 8 gpus for
19# each, or 4 nodes with 4 gpus for each or other combinations.
20
21# This docker image should have tensorrt_llm installed, or you need to install
22# it in the task.
23
24# The following variables are expected to be set in the environment:
25# You can set them via --export in the srun/sbatch command.
26#   CONTAINER_IMAGE: the docker image to use, you'd better install tensorrt_llm in it, or install it in the task.
27#   MOUNT_DIR: the directory to mount in the container
28#   MOUNT_DEST: the destination directory in the container
29#   WORKDIR: the working directory in the container
30#   SOURCE_ROOT: the path to the TensorRT-LLM source
31#   PROLOGUE: the prologue to run before the script
32#   LOCAL_MODEL: the local model directory to use, NOTE: downloading from HF is
33#      not supported in Slurm mode, you need to download the model and put it in
34#      the LOCAL_MODEL directory.
35
36export prepare_dataset="$SOURCE_ROOT/benchmarks/cpp/prepare_dataset.py"
37export data_path="$WORKDIR/token-norm-dist.txt"
38
39echo "Preparing dataset..."
40srun -l \
41    -N 1 \
42    -n 1 \
43    --container-image=${CONTAINER_IMAGE} \
44    --container-name="prepare-name" \
45    --container-mounts=${MOUNT_DIR}:${MOUNT_DEST} \
46    --container-workdir=${WORKDIR} \
47    --export=ALL \
48    --mpi=pmix \
49    bash -c "
50        $PROLOGUE
51        python3 $prepare_dataset \
52            --tokenizer=$LOCAL_MODEL \
53            --stdout token-norm-dist \
54            --num-requests=100 \
55            --input-mean=128 \
56            --output-mean=128 \
57            --input-stdev=0 \
58            --output-stdev=0 > $data_path
59    "
60
61echo "Running benchmark..."
62# Just launch trtllm-bench job with trtllm-llmapi-launch command.
63
64srun -l \
65    --container-image=${CONTAINER_IMAGE} \
66    --container-mounts=${MOUNT_DIR}:${MOUNT_DEST} \
67    --container-workdir=${WORKDIR} \
68    --export=ALL,PYTHONPATH=${SOURCE_ROOT} \
69    --mpi=pmix \
70    bash -c "
71        set -ex
72        $PROLOGUE
73        export PATH=$PATH:~/.local/bin
74
75        # This is optional
76        cat > /tmp/pytorch_extra_args.txt << EOF
77pytorch_backend_config:
78    use_cuda_graph: false
79    enable_overlap_scheduler: true
80    cuda_graph_padding_enabled: false
81    print_iter_log: true
82enable_attention_dp: false
83EOF
84
85        # launch the benchmark
86        trtllm-llmapi-launch \
87         trtllm-bench \
88            --model $MODEL_NAME \
89            --model_path $LOCAL_MODEL \
90            throughput \
91            --dataset $data_path \
92            --backend pytorch \
93            --tp 16 \
94            --extra_llm_api_options /tmp/pytorch_extra_args.txt \
95            $EXTRA_ARGS
96    "