B200_NVL |
Min Latency |
1024 / 1024 |
4 |
1k1k_tp1_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml
|
2xB200_NVL |
Low Latency |
1024 / 1024 |
4 |
1k1k_tp2_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml
|
4xB200_NVL |
Low Latency |
1024 / 1024 |
4 |
1k1k_tp4_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml
|
8xB200_NVL |
Low Latency |
1024 / 1024 |
4 |
1k1k_tp8_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml
|
B200_NVL |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp1_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml
|
2xB200_NVL |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp2_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml
|
4xB200_NVL |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp4_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml
|
8xB200_NVL |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp8_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml
|
B200_NVL |
Low Latency |
1024 / 1024 |
16 |
1k1k_tp1_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml
|
2xB200_NVL |
Low Latency |
1024 / 1024 |
16 |
1k1k_tp2_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml
|
4xB200_NVL |
High Throughput |
1024 / 1024 |
16 |
1k1k_tp4_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml
|
8xB200_NVL |
High Throughput |
1024 / 1024 |
16 |
1k1k_tp8_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml
|
B200_NVL |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp1_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml
|
2xB200_NVL |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp2_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml
|
4xB200_NVL |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp4_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml
|
8xB200_NVL |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp8_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml
|
B200_NVL |
High Throughput |
1024 / 1024 |
64 |
1k1k_tp1_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml
|
2xB200_NVL |
High Throughput |
1024 / 1024 |
64 |
1k1k_tp2_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml
|
4xB200_NVL |
High Throughput |
1024 / 1024 |
64 |
1k1k_tp4_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml
|
8xB200_NVL |
Max Throughput |
1024 / 1024 |
64 |
1k1k_tp8_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml
|
B200_NVL |
Min Latency |
1024 / 8192 |
4 |
1k8k_tp1_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml
|
2xB200_NVL |
Low Latency |
1024 / 8192 |
4 |
1k8k_tp2_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml
|
4xB200_NVL |
Low Latency |
1024 / 8192 |
4 |
1k8k_tp4_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml
|
8xB200_NVL |
Low Latency |
1024 / 8192 |
4 |
1k8k_tp8_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml
|
B200_NVL |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp1_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml
|
2xB200_NVL |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp2_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml
|
4xB200_NVL |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp4_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml
|
8xB200_NVL |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp8_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml
|
B200_NVL |
Low Latency |
1024 / 8192 |
16 |
1k8k_tp1_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml
|
2xB200_NVL |
Low Latency |
1024 / 8192 |
16 |
1k8k_tp2_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml
|
4xB200_NVL |
High Throughput |
1024 / 8192 |
16 |
1k8k_tp4_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml
|
8xB200_NVL |
High Throughput |
1024 / 8192 |
16 |
1k8k_tp8_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml
|
B200_NVL |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp1_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml
|
2xB200_NVL |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp2_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml
|
4xB200_NVL |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp4_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml
|
8xB200_NVL |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp8_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml
|
B200_NVL |
High Throughput |
1024 / 8192 |
64 |
1k8k_tp1_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml
|
2xB200_NVL |
High Throughput |
1024 / 8192 |
64 |
1k8k_tp2_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml
|
4xB200_NVL |
High Throughput |
1024 / 8192 |
64 |
1k8k_tp4_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml
|
8xB200_NVL |
Max Throughput |
1024 / 8192 |
64 |
1k8k_tp8_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml
|
B200_NVL |
Min Latency |
8192 / 1024 |
4 |
8k1k_tp1_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml
|
2xB200_NVL |
Low Latency |
8192 / 1024 |
4 |
8k1k_tp2_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml
|
4xB200_NVL |
Low Latency |
8192 / 1024 |
4 |
8k1k_tp4_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml
|
8xB200_NVL |
Low Latency |
8192 / 1024 |
4 |
8k1k_tp8_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml
|
B200_NVL |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp1_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml
|
2xB200_NVL |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp2_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml
|
4xB200_NVL |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp4_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml
|
8xB200_NVL |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp8_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml
|
B200_NVL |
Low Latency |
8192 / 1024 |
16 |
8k1k_tp1_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml
|
2xB200_NVL |
Low Latency |
8192 / 1024 |
16 |
8k1k_tp2_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml
|
4xB200_NVL |
High Throughput |
8192 / 1024 |
16 |
8k1k_tp4_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml
|
8xB200_NVL |
High Throughput |
8192 / 1024 |
16 |
8k1k_tp8_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml
|
B200_NVL |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp1_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml
|
2xB200_NVL |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp2_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml
|
4xB200_NVL |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp4_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml
|
8xB200_NVL |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp8_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml
|
B200_NVL |
High Throughput |
8192 / 1024 |
64 |
8k1k_tp1_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml
|
2xB200_NVL |
High Throughput |
8192 / 1024 |
64 |
8k1k_tp2_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml
|
4xB200_NVL |
High Throughput |
8192 / 1024 |
64 |
8k1k_tp4_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml
|
8xB200_NVL |
Max Throughput |
8192 / 1024 |
64 |
8k1k_tp8_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml
|
H200_SXM |
Min Latency |
1024 / 1024 |
4 |
1k1k_tp1_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml
|
2xH200_SXM |
Low Latency |
1024 / 1024 |
4 |
1k1k_tp2_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml
|
4xH200_SXM |
Low Latency |
1024 / 1024 |
4 |
1k1k_tp4_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml
|
8xH200_SXM |
Low Latency |
1024 / 1024 |
4 |
1k1k_tp8_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml
|
H200_SXM |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp1_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml
|
2xH200_SXM |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp2_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml
|
4xH200_SXM |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp4_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml
|
8xH200_SXM |
Low Latency |
1024 / 1024 |
8 |
1k1k_tp8_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml
|
H200_SXM |
Low Latency |
1024 / 1024 |
16 |
1k1k_tp1_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml
|
2xH200_SXM |
Low Latency |
1024 / 1024 |
16 |
1k1k_tp2_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml
|
4xH200_SXM |
High Throughput |
1024 / 1024 |
16 |
1k1k_tp4_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml
|
8xH200_SXM |
High Throughput |
1024 / 1024 |
16 |
1k1k_tp8_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml
|
H200_SXM |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp1_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml
|
2xH200_SXM |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp2_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml
|
4xH200_SXM |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp4_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml
|
8xH200_SXM |
High Throughput |
1024 / 1024 |
32 |
1k1k_tp8_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml
|
H200_SXM |
High Throughput |
1024 / 1024 |
64 |
1k1k_tp1_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml
|
2xH200_SXM |
High Throughput |
1024 / 1024 |
64 |
1k1k_tp2_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml
|
4xH200_SXM |
High Throughput |
1024 / 1024 |
64 |
1k1k_tp4_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml
|
8xH200_SXM |
Max Throughput |
1024 / 1024 |
64 |
1k1k_tp8_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml
|
H200_SXM |
Min Latency |
1024 / 8192 |
4 |
1k8k_tp1_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml
|
2xH200_SXM |
Low Latency |
1024 / 8192 |
4 |
1k8k_tp2_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml
|
4xH200_SXM |
Low Latency |
1024 / 8192 |
4 |
1k8k_tp4_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml
|
8xH200_SXM |
Low Latency |
1024 / 8192 |
4 |
1k8k_tp8_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml
|
H200_SXM |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp1_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml
|
2xH200_SXM |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp2_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml
|
4xH200_SXM |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp4_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml
|
8xH200_SXM |
Low Latency |
1024 / 8192 |
8 |
1k8k_tp8_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml
|
H200_SXM |
Low Latency |
1024 / 8192 |
16 |
1k8k_tp1_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml
|
2xH200_SXM |
Low Latency |
1024 / 8192 |
16 |
1k8k_tp2_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml
|
4xH200_SXM |
High Throughput |
1024 / 8192 |
16 |
1k8k_tp4_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml
|
8xH200_SXM |
High Throughput |
1024 / 8192 |
16 |
1k8k_tp8_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml
|
H200_SXM |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp1_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml
|
2xH200_SXM |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp2_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml
|
4xH200_SXM |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp4_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml
|
8xH200_SXM |
High Throughput |
1024 / 8192 |
32 |
1k8k_tp8_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml
|
H200_SXM |
High Throughput |
1024 / 8192 |
64 |
1k8k_tp1_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml
|
2xH200_SXM |
High Throughput |
1024 / 8192 |
64 |
1k8k_tp2_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml
|
4xH200_SXM |
High Throughput |
1024 / 8192 |
64 |
1k8k_tp4_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml
|
8xH200_SXM |
Max Throughput |
1024 / 8192 |
64 |
1k8k_tp8_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml
|
H200_SXM |
Min Latency |
8192 / 1024 |
4 |
8k1k_tp1_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml
|
2xH200_SXM |
Low Latency |
8192 / 1024 |
4 |
8k1k_tp2_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml
|
4xH200_SXM |
Low Latency |
8192 / 1024 |
4 |
8k1k_tp4_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml
|
8xH200_SXM |
Low Latency |
8192 / 1024 |
4 |
8k1k_tp8_conc4.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml
|
H200_SXM |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp1_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml
|
2xH200_SXM |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp2_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml
|
4xH200_SXM |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp4_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml
|
8xH200_SXM |
Low Latency |
8192 / 1024 |
8 |
8k1k_tp8_conc8.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml
|
H200_SXM |
Low Latency |
8192 / 1024 |
16 |
8k1k_tp1_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml
|
2xH200_SXM |
Low Latency |
8192 / 1024 |
16 |
8k1k_tp2_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml
|
4xH200_SXM |
High Throughput |
8192 / 1024 |
16 |
8k1k_tp4_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml
|
8xH200_SXM |
High Throughput |
8192 / 1024 |
16 |
8k1k_tp8_conc16.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml
|
H200_SXM |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp1_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml
|
2xH200_SXM |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp2_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml
|
4xH200_SXM |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp4_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml
|
8xH200_SXM |
High Throughput |
8192 / 1024 |
32 |
8k1k_tp8_conc32.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml
|
H200_SXM |
High Throughput |
8192 / 1024 |
64 |
8k1k_tp1_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml
|
2xH200_SXM |
High Throughput |
8192 / 1024 |
64 |
8k1k_tp2_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml
|
4xH200_SXM |
High Throughput |
8192 / 1024 |
64 |
8k1k_tp4_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml
|
8xH200_SXM |
Max Throughput |
8192 / 1024 |
64 |
8k1k_tp8_conc64.yaml |
trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml
|