OpenAI Chat Client#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

  1### OpenAI Chat Client
  2
  3from openai import OpenAI
  4
  5from tensorrt_llm.inputs import encode_base64_content_from_url
  6
  7client = OpenAI(
  8    base_url="http://localhost:8000/v1",
  9    api_key="tensorrt_llm",
 10)
 11
 12# SINGLE IMAGE INFERENCE
 13response = client.chat.completions.create(
 14    model="Qwen2.5-VL-3B-Instruct",
 15    messages=[{
 16        "role": "system",
 17        "content": "you are a helpful assistant"
 18    }, {
 19        "role":
 20        "user",
 21        "content": [{
 22            "type": "text",
 23            "text": "Describe the natural environment in the image."
 24        }, {
 25            "type": "image_url",
 26            "image_url": {
 27                "url":
 28                "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
 29            }
 30        }]
 31    }],
 32    max_tokens=64,
 33)
 34print(response)
 35
 36# MULTI IMAGE INFERENCE
 37response = client.chat.completions.create(
 38    model="Qwen2.5-VL-3B-Instruct",
 39    messages=[{
 40        "role": "system",
 41        "content": "you are a helpful assistant"
 42    }, {
 43        "role":
 44        "user",
 45        "content": [{
 46            "type": "text",
 47            "text": "Tell me the difference between two images"
 48        }, {
 49            "type": "image_url",
 50            "image_url": {
 51                "url":
 52                "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
 53            }
 54        }, {
 55            "type": "image_url",
 56            "image_url": {
 57                "url":
 58                "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
 59            }
 60        }]
 61    }],
 62    max_tokens=64,
 63)
 64print(response)
 65
 66# SINGLE VIDEO INFERENCE
 67response = client.chat.completions.create(
 68    model="Qwen2.5-VL-3B-Instruct",
 69    messages=[{
 70        "role": "system",
 71        "content": "you are a helpful assistant"
 72    }, {
 73        "role":
 74        "user",
 75        "content": [{
 76            "type": "text",
 77            "text": "Tell me what you see in the video briefly."
 78        }, {
 79            "type": "video_url",
 80            "video_url": {
 81                "url":
 82                "https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4"
 83            }
 84        }]
 85    }],
 86    max_tokens=64,
 87)
 88print(response)
 89
 90# IMAGE EMBED INFERENCE
 91image64 = encode_base64_content_from_url(
 92    "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"
 93)
 94response = client.chat.completions.create(
 95    model="Qwen2.5-VL-3B-Instruct",
 96    messages=[{
 97        "role": "system",
 98        "content": "you are a helpful assistant"
 99    }, {
100        "role":
101        "user",
102        "content": [{
103            "type": "text",
104            "text": "Describe the natural environment in the image."
105        }, {
106            "type": "image_url",
107            "image_url": {
108                "url": "data:image/png;base64," + image64
109            }
110        }]
111    }],
112    max_tokens=64,
113)
114print(response)