OpenAI Completion Client#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1### OpenAI Completion Client
2
3from openai import OpenAI
4
5client = OpenAI(
6 base_url="http://localhost:8000/v1",
7 api_key="tensorrt_llm",
8)
9
10response = client.completions.create(
11 model="TinyLlama-1.1B-Chat-v1.0",
12 prompt="Where is New York?",
13 max_tokens=20,
14)
15print(response)