Metrics#
-
class BaseMetrics#
Base class for performance metrics.
Provides common interface and total runs tracking.
Subclassed by trt_edgellm::metrics::EagleGenerationMetrics, trt_edgellm::metrics::LLMGenerationMetrics, trt_edgellm::metrics::LLMPrefillMetrics, trt_edgellm::metrics::MultimodalMetrics, trt_edgellm::metrics::OmniTalkerMetrics
-
class LLMPrefillMetrics : public trt_edgellm::metrics::BaseMetrics#
LLM prefill stage metrics.
Tracks reused and computed tokens during prefill.
Public Functions
-
inline void recordRun(int64_t reused, int64_t computed) noexcept#
Record a prefill run.
- Parameters:
reused – Number of reused tokens
computed – Number of computed tokens
-
inline void recordRun(int64_t reused, int64_t computed) noexcept#
-
class LLMGenerationMetrics : public trt_edgellm::metrics::BaseMetrics#
LLM generation stage metrics.
Tracks generated tokens during decoding.
Public Functions
-
inline void recordRun(int64_t generated) noexcept#
Record a generation run.
- Parameters:
generated – Number of generated tokens
Public Members
-
int64_t generatedTokens = {0}#
Total number of generated tokens.
-
inline void recordRun(int64_t generated) noexcept#
-
class MultimodalMetrics : public trt_edgellm::metrics::BaseMetrics#
Multimodal processing stage metrics.
Tracks image and audio processing statistics.
Public Functions
- inline void recordRun(
- int64_t imageCount,
- int64_t imageTokens,
- int64_t audioCount = 0,
- int64_t audioTokens = 0
Record a multimodal processing run.
- Parameters:
imageCount – Number of images processed
imageTokens – Number of image tokens generated
audioCount – Number of audio clips processed (optional, for Qwen3-Omni)
audioTokens – Number of audio tokens generated (optional, for Qwen3-Omni)
Public Members
-
int64_t totalImages = {0}#
Total number of processed images.
-
int64_t totalImageTokens = {0}#
Total number of image tokens generated.
-
int64_t totalAudios = {0}#
Total number of processed audio clips (Qwen3-Omni)
-
int64_t totalAudioTokens = {0}#
Total number of audio tokens generated (Qwen3-Omni)
-
class EagleGenerationMetrics : public trt_edgellm::metrics::BaseMetrics#
Eagle speculative decoding generation metrics.
Tracks iterations and tokens generated during Eagle spec-decode.
Public Functions
- inline void recordRun(
- int64_t iterations,
- int64_t generatedTokens
Record an Eagle generation run.
- Parameters:
iterations – Number of iterations
generatedTokens – Number of generated tokens
-
class OmniTalkerMetrics : public trt_edgellm::metrics::BaseMetrics#
Omni Talker pipeline metrics.
Tracks audio frame generation, RVQ codes, prefill time, and exit reason.
Public Functions
- inline void recordRun(
- int64_t frames,
- int64_t rvqCodes,
- float prefillMs,
- int32_t prefillSeqLen,
- std::string const &exit,
- bool streaming
Public Members
-
int64_t totalFrames = {0}#
Total audio frames generated (each frame = numCodesPerFrame RVQ codes)
-
int64_t totalRvqCodes = {0}#
Total RVQ codes generated (frames * codesPerFrame)
-
float prefillGpuTimeMs = {0}#
Talker prefill GPU time in milliseconds.
-
int32_t prefillSeqLength = {0}#
Talker prefill input sequence length.
-
std::string exitReason#
“eos” or “max_length”
-
bool isStreaming = {false}#
Whether streaming mode was used.
-
struct OmniLatencyMetrics#
Omni audio latency metrics.
Tracks time to first audio code (TTFA), real-time factor (RTF), and audio output info. Time to first playable audio (TTFPA) is derived at JSON output time from talker_generation + code2wav stage times.
Public Members
-
float timeToFirstAudioCodeMs = {0}#
Request start to first codec token sampled (includes Thinker)
-
float timeToFirstPlayableAudioMs = {0}#
Request start to first playable audio chunk complete.
-
float endToEndMs = {0}#
Request start to all audio output complete.
-
float realTimeFactor = {0}#
audio_duration / talker_generation_time (< 1.0 = faster than real-time)
-
float audioDurationSeconds = {0}#
Total audio output duration in seconds.
-
int64_t audioSamples = {0}#
Total audio output samples.
-
int32_t sampleRate = {24000}#
Audio sample rate.
-
float timeToFirstAudioCodeMs = {0}#