OpenAI Chat Client#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1### OpenAI Chat Client
2
3from openai import OpenAI
4
5client = OpenAI(
6 base_url="http://localhost:8000/v1",
7 api_key="tensorrt_llm",
8)
9
10# Single image inference
11response = client.chat.completions.create(
12 model="Qwen2-VL-7B-Instruct",
13 messages=[{
14 "role": "system",
15 "content": "you are a helpful assistant"
16 }, {
17 "role":
18 "user",
19 "content": [{
20 "type": "text",
21 "text": "Describe the natural environment in the image."
22 }, {
23 "type": "image_url",
24 "image_url": {
25 "url":
26 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
27 }
28 }]
29 }],
30 max_tokens=64,
31)
32print(response)
33
34# TODO
35# multi-image inference
36# video inference