OpenAI Chat Client for Multimodal#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1
2from openai import OpenAI
3
4from tensorrt_llm.inputs import encode_base64_content_from_url
5
6client = OpenAI(
7 base_url="http://localhost:8000/v1",
8 api_key="tensorrt_llm",
9)
10
11# SINGLE IMAGE INFERENCE
12response = client.chat.completions.create(
13 model="Qwen2.5-VL-3B-Instruct",
14 messages=[{
15 "role": "system",
16 "content": "you are a helpful assistant"
17 }, {
18 "role":
19 "user",
20 "content": [{
21 "type": "text",
22 "text": "Describe the natural environment in the image."
23 }, {
24 "type": "image_url",
25 "image_url": {
26 "url":
27 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
28 }
29 }]
30 }],
31 max_tokens=64,
32)
33print(response)
34
35# MULTI IMAGE INFERENCE
36response = client.chat.completions.create(
37 model="Qwen2.5-VL-3B-Instruct",
38 messages=[{
39 "role": "system",
40 "content": "you are a helpful assistant"
41 }, {
42 "role":
43 "user",
44 "content": [{
45 "type": "text",
46 "text": "Tell me the difference between two images"
47 }, {
48 "type": "image_url",
49 "image_url": {
50 "url":
51 "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
52 }
53 }, {
54 "type": "image_url",
55 "image_url": {
56 "url":
57 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
58 }
59 }]
60 }],
61 max_tokens=64,
62)
63print(response)
64
65# SINGLE VIDEO INFERENCE
66response = client.chat.completions.create(
67 model="Qwen2.5-VL-3B-Instruct",
68 messages=[{
69 "role": "system",
70 "content": "you are a helpful assistant"
71 }, {
72 "role":
73 "user",
74 "content": [{
75 "type": "text",
76 "text": "Tell me what you see in the video briefly."
77 }, {
78 "type": "video_url",
79 "video_url": {
80 "url":
81 "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4"
82 }
83 }]
84 }],
85 max_tokens=64,
86)
87print(response)
88
89# IMAGE EMBED INFERENCE
90image64 = encode_base64_content_from_url(
91 "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
92)
93response = client.chat.completions.create(
94 model="Qwen2.5-VL-3B-Instruct",
95 messages=[{
96 "role": "system",
97 "content": "you are a helpful assistant"
98 }, {
99 "role":
100 "user",
101 "content": [{
102 "type": "text",
103 "text": "Describe the natural environment in the image."
104 }, {
105 "type": "image_url",
106 "image_url": {
107 "url": "data:image/png;base64," + image64
108 }
109 }]
110 }],
111 max_tokens=64,
112)
113print(response)