OpenAI Chat Client#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1### OpenAI Chat Client
2
3from openai import OpenAI
4
5from tensorrt_llm.inputs import encode_base64_content_from_url
6
7client = OpenAI(
8 base_url="http://localhost:8000/v1",
9 api_key="tensorrt_llm",
10)
11
12# SINGLE IMAGE INFERENCE
13response = client.chat.completions.create(
14 model="Qwen2.5-VL-3B-Instruct",
15 messages=[{
16 "role": "system",
17 "content": "you are a helpful assistant"
18 }, {
19 "role":
20 "user",
21 "content": [{
22 "type": "text",
23 "text": "Describe the natural environment in the image."
24 }, {
25 "type": "image_url",
26 "image_url": {
27 "url":
28 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
29 }
30 }]
31 }],
32 max_tokens=64,
33)
34print(response)
35
36# MULTI IMAGE INFERENCE
37response = client.chat.completions.create(
38 model="Qwen2.5-VL-3B-Instruct",
39 messages=[{
40 "role": "system",
41 "content": "you are a helpful assistant"
42 }, {
43 "role":
44 "user",
45 "content": [{
46 "type": "text",
47 "text": "Tell me the difference between two images"
48 }, {
49 "type": "image_url",
50 "image_url": {
51 "url":
52 "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
53 }
54 }, {
55 "type": "image_url",
56 "image_url": {
57 "url":
58 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
59 }
60 }]
61 }],
62 max_tokens=64,
63)
64print(response)
65
66# SINGLE VIDEO INFERENCE
67response = client.chat.completions.create(
68 model="Qwen2.5-VL-3B-Instruct",
69 messages=[{
70 "role": "system",
71 "content": "you are a helpful assistant"
72 }, {
73 "role":
74 "user",
75 "content": [{
76 "type": "text",
77 "text": "Tell me what you see in the video briefly."
78 }, {
79 "type": "video_url",
80 "video_url": {
81 "url":
82 "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4"
83 }
84 }]
85 }],
86 max_tokens=64,
87)
88print(response)
89
90# IMAGE EMBED INFERENCE
91image64 = encode_base64_content_from_url(
92 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
93)
94response = client.chat.completions.create(
95 model="Qwen2.5-VL-3B-Instruct",
96 messages=[{
97 "role": "system",
98 "content": "you are a helpful assistant"
99 }, {
100 "role":
101 "user",
102 "content": [{
103 "type": "text",
104 "text": "Describe the natural environment in the image."
105 }, {
106 "type": "image_url",
107 "image_url": {
108 "url": "data:image/png;base64," + image64
109 }
110 }]
111 }],
112 max_tokens=64,
113)
114print(response)