OpenAI Chat Client#

Refer to the trtllm-serve documentation for starting a server.

Source https://github.com/NVIDIA/TensorRT-LLM/blob/4e69c14f732a6e6afce4f71616db5b5cd2b10530/examples/serve/openai_chat_client.py.

 1
 2from openai import OpenAI
 3
 4client = OpenAI(
 5    base_url="http://localhost:8000/v1",
 6    api_key="tensorrt_llm",
 7)
 8
 9response = client.chat.completions.create(
10    model="TinyLlama-1.1B-Chat-v1.0",
11    messages=[{
12        "role": "system",
13        "content": "you are a helpful assistant"
14    }, {
15        "role": "user",
16        "content": "Where is New York?"
17    }],
18    max_tokens=20,
19)
20print(response)