OpenAI Completion Client with JSON Schema#
Refer to the trtllm-serve documentation for starting a server.
Source NVIDIA/TensorRT-LLM.
1
2from openai import OpenAI
3
4client = OpenAI(
5 base_url="http://localhost:8000/v1",
6 api_key="tensorrt_llm",
7)
8
9response = client.chat.completions.create(
10 model="TinyLlama-1.1B-Chat-v1.0",
11 messages=[{
12 "role": "system",
13 "content": "you are a helpful assistant"
14 }, {
15 "role":
16 "user",
17 "content":
18 f"Give me the information of the biggest city of China in the JSON format.",
19 }],
20 max_tokens=100,
21 temperature=0,
22 response_format={
23 "type": "json",
24 "schema": {
25 "type": "object",
26 "properties": {
27 "name": {
28 "type": "string"
29 },
30 "population": {
31 "type": "integer"
32 },
33 },
34 "required": ["name", "population"],
35 "chat_template_kwargs": {
36 "enable_thinking": False
37 }
38 }
39 },
40)
41print(response.choices[0].message.content)