OpenAI Chat Client#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1### OpenAI Chat Client
 2
 3from openai import OpenAI
 4
 5client = OpenAI(
 6    base_url="http://localhost:8000/v1",
 7    api_key="tensorrt_llm",
 8)
 9
10response = client.chat.completions.create(
11    model="TinyLlama-1.1B-Chat-v1.0",
12    messages=[{
13        "role": "system",
14        "content": "you are a helpful assistant"
15    }, {
16        "role": "user",
17        "content": "Where is New York?"
18    }],
19    max_tokens=20,
20)
21print(response)