OpenAI Completion Client#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1
 2from openai import OpenAI
 3
 4client = OpenAI(
 5    base_url="http://localhost:8000/v1",
 6    api_key="tensorrt_llm",
 7)
 8
 9response = client.completions.create(
10    model="TinyLlama-1.1B-Chat-v1.0",
11    prompt="Where is New York?",
12    max_tokens=20,
13)
14print(response)