Run trtllm-bench with pytorch backend on Slurm#
Source NVIDIA/TensorRT-LLM.
1#!/bin/bash
2#SBATCH -A <account>
3#SBATCH -p <partition>
4#SBATCH -t 01:00:00
5#SBATCH -N 2
6#SBATCH --ntasks-per-node=8
7#SBATCH -o logs/trtllm-bench.out
8#SBATCH -e logs/trtllm-bench.err
9#SBATCH -J trtllm-bench
10
11
12# NOTE, this feature is experimental and may not work on all systems.
13# The trtllm-llmapi-launch is a script that launches the LLM-API code on
14# Slurm-like systems, and can support multi-node and multi-GPU setups.
15
16# Note that, the number of MPI processes should be the same as the model world
17# size. e.g. For tensor_parallel_size=16, you may use 2 nodes with 8 gpus for
18# each, or 4 nodes with 4 gpus for each or other combinations.
19
20# This docker image should have tensorrt_llm installed, or you need to install
21# it in the task.
22
23# The following variables are expected to be set in the environment:
24# You can set them via --export in the srun/sbatch command.
25# CONTAINER_IMAGE: the docker image to use, you'd better install tensorrt_llm in it, or install it in the task.
26# MOUNT_DIR: the directory to mount in the container
27# MOUNT_DEST: the destination directory in the container
28# WORKDIR: the working directory in the container
29# SOURCE_ROOT: the path to the TensorRT-LLM source
30# PROLOGUE: the prologue to run before the script
31# LOCAL_MODEL: the local model directory to use, NOTE: downloading from HF is
32# not supported in Slurm mode, you need to download the model and put it in
33# the LOCAL_MODEL directory.
34
35export prepare_dataset="$SOURCE_ROOT/benchmarks/cpp/prepare_dataset.py"
36export data_path="$WORKDIR/token-norm-dist.txt"
37
38echo "Preparing dataset..."
39srun -l \
40 -N 1 \
41 -n 1 \
42 --container-image=${CONTAINER_IMAGE} \
43 --container-name="prepare-name" \
44 --container-mounts=${MOUNT_DIR}:${MOUNT_DEST} \
45 --container-workdir=${WORKDIR} \
46 --export=ALL \
47 --mpi=pmix \
48 bash -c "
49 $PROLOGUE
50 python3 $prepare_dataset \
51 --tokenizer=$LOCAL_MODEL \
52 --stdout token-norm-dist \
53 --num-requests=100 \
54 --input-mean=128 \
55 --output-mean=128 \
56 --input-stdev=0 \
57 --output-stdev=0 > $data_path
58 "
59
60echo "Running benchmark..."
61# Just launch trtllm-bench job with trtllm-llmapi-launch command.
62
63srun -l \
64 --container-image=${CONTAINER_IMAGE} \
65 --container-mounts=${MOUNT_DIR}:${MOUNT_DEST} \
66 --container-workdir=${WORKDIR} \
67 --export=ALL,PYTHONPATH=${SOURCE_ROOT} \
68 --mpi=pmix \
69 bash -c "
70 set -ex
71 $PROLOGUE
72 export PATH=$PATH:~/.local/bin
73
74 # This is optional
75 cat > /tmp/pytorch_extra_args.txt << EOF
76print_iter_log: true
77enable_attention_dp: false
78EOF
79
80 # launch the benchmark
81 trtllm-llmapi-launch \
82 trtllm-bench \
83 --model $MODEL_NAME \
84 --model_path $LOCAL_MODEL \
85 throughput \
86 --dataset $data_path \
87 --backend pytorch \
88 --tp 16 \
89 --extra_llm_api_options /tmp/pytorch_extra_args.txt \
90 $EXTRA_ARGS
91 "