Run trtllm-serve with pytorch backend on Slurm#

Source NVIDIA/TensorRT-LLM.

 1#!/bin/bash
 2#SBATCH -A <account>
 3#SBATCH -p <partition>
 4#SBATCH -t 01:00:00
 5#SBATCH -N 2
 6#SBATCH --ntasks-per-node=8
 7#SBATCH -o logs/trtllm-serve.out
 8#SBATCH -e logs/trtllm-serve.err
 9#SBATCH -J trtllm-serve
10
11
12# NOTE, this feature is experimental and may not work on all systems.
13# The trtllm-llmapi-launch is a script that launches the LLM-API code on
14# Slurm-like systems, and can support multi-node and multi-GPU setups.
15
16# Note that, the number of MPI processes should be the same as the model world
17# size. e.g. For tensor_parallel_size=16, you may use 2 nodes with 8 gpus for
18# each, or 4 nodes with 4 gpus for each or other combinations.
19
20# This docker image should have tensorrt_llm installed, or you need to install
21# it in the task.
22
23# The following variables are expected to be set in the environment:
24# You can set them via --export in the srun/sbatch command.
25#   CONTAINER_IMAGE: the docker image to use, you'd better install tensorrt_llm in it, or install it in the task.
26#   MOUNT_DIR: the directory to mount in the container
27#   MOUNT_DEST: the destination directory in the container
28#   WORKDIR: the working directory in the container
29#   SOURCE_ROOT: the path to the TensorRT-LLM source
30#   PROLOGUE: the prologue to run before the script
31#   LOCAL_MODEL: the local model directory to use, NOTE: downloading from HF is
32#      not supported in Slurm mode, you need to download the model and put it in
33#      the LOCAL_MODEL directory.
34
35echo "Starting trtllm-serve..."
36# Just launch trtllm-serve job with trtllm-llmapi-launch command.
37srun -l \
38    --container-image=${CONTAINER_IMAGE} \
39    --container-mounts=${MOUNT_DIR}:${MOUNT_DEST} \
40    --container-workdir=${WORKDIR} \
41    --export=ALL,PYTHONPATH=${SOURCE_ROOT} \
42    --mpi=pmix \
43    bash -c "
44        set -ex
45        $PROLOGUE
46        export PATH=$PATH:~/.local/bin
47
48        trtllm-llmapi-launch \
49         trtllm-serve $LOCAL_MODEL \
50            --tp_size 16 \
51            --backend pytorch \
52            --host 0.0.0.0 \
53            ${ADDITIONAL_OPTIONS}
54    "