OpenAI Chat Client#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1### OpenAI Chat Client
 2
 3from openai import OpenAI
 4
 5client = OpenAI(
 6    base_url="http://localhost:8000/v1",
 7    api_key="tensorrt_llm",
 8)
 9
10# Single image inference
11response = client.chat.completions.create(
12    model="Qwen2-VL-7B-Instruct",
13    messages=[{
14        "role": "system",
15        "content": "you are a helpful assistant"
16    }, {
17        "role":
18        "user",
19        "content": [{
20            "type": "text",
21            "text": "Describe the natural environment in the image."
22        }, {
23            "type": "image_url",
24            "image_url": {
25                "url":
26                "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
27            }
28        }]
29    }],
30    max_tokens=64,
31)
32print(response)
33
34# TODO
35# multi-image inference
36# video inference