OpenAI Completion Client with JSON Schema#

Refer to the trtllm-serve documentation for starting a server.

Source NVIDIA/TensorRT-LLM.

 1
 2from openai import OpenAI
 3
 4client = OpenAI(
 5    base_url="http://localhost:8000/v1",
 6    api_key="tensorrt_llm",
 7)
 8
 9response = client.chat.completions.create(
10    model="TinyLlama-1.1B-Chat-v1.0",
11    messages=[{
12        "role": "system",
13        "content": "you are a helpful assistant"
14    }, {
15        "role":
16        "user",
17        "content":
18        f"Give me the information of the biggest city of China in the JSON format.",
19    }],
20    max_tokens=100,
21    temperature=0,
22    response_format={
23        "type": "json",
24        "schema": {
25            "type": "object",
26            "properties": {
27                "name": {
28                    "type": "string"
29                },
30                "population": {
31                    "type": "integer"
32                },
33            },
34            "required": ["name", "population"],
35            "chat_template_kwargs": {
36                "enable_thinking": False
37            }
38        }
39    },
40)
41print(response.choices[0].message.content)