Metrics#

class BaseMetrics#

Base class for performance metrics.

Provides common interface and total runs tracking.

Subclassed by trt_edgellm::metrics::EagleGenerationMetrics, trt_edgellm::metrics::LLMGenerationMetrics, trt_edgellm::metrics::LLMPrefillMetrics, trt_edgellm::metrics::MultimodalMetrics, trt_edgellm::metrics::OmniTalkerMetrics

Public Functions

virtual ~BaseMetrics() noexcept = default#

Virtual destructor.

inline int64_t getTotalRuns() const noexcept#

Get total number of runs.

Returns:

Total runs count

class LLMPrefillMetrics : public trt_edgellm::metrics::BaseMetrics#

LLM prefill stage metrics.

Tracks reused and computed tokens during prefill.

Public Functions

inline void recordRun(int64_t reused, int64_t computed) noexcept#

Record a prefill run.

Parameters:
  • reused – Number of reused tokens

  • computed – Number of computed tokens

Public Members

int64_t reusedTokens = {0}#

Number of reused tokens from cache.

int64_t computedTokens = {0}#

Number of newly computed tokens.

class LLMGenerationMetrics : public trt_edgellm::metrics::BaseMetrics#

LLM generation stage metrics.

Tracks generated tokens during decoding.

Public Functions

inline void recordRun(int64_t generated) noexcept#

Record a generation run.

Parameters:

generated – Number of generated tokens

Public Members

int64_t generatedTokens = {0}#

Total number of generated tokens.

class MultimodalMetrics : public trt_edgellm::metrics::BaseMetrics#

Multimodal processing stage metrics.

Tracks image and audio processing statistics.

Public Functions

inline void recordRun(
int64_t imageCount,
int64_t imageTokens,
int64_t audioCount = 0,
int64_t audioTokens = 0
) noexcept#

Record a multimodal processing run.

Parameters:
  • imageCount – Number of images processed

  • imageTokens – Number of image tokens generated

  • audioCount – Number of audio clips processed (optional, for Qwen3-Omni)

  • audioTokens – Number of audio tokens generated (optional, for Qwen3-Omni)

Public Members

int64_t totalImages = {0}#

Total number of processed images.

int64_t totalImageTokens = {0}#

Total number of image tokens generated.

int64_t totalAudios = {0}#

Total number of processed audio clips (Qwen3-Omni)

int64_t totalAudioTokens = {0}#

Total number of audio tokens generated (Qwen3-Omni)

class EagleGenerationMetrics : public trt_edgellm::metrics::BaseMetrics#

Eagle speculative decoding generation metrics.

Tracks iterations and tokens generated during Eagle spec-decode.

Public Functions

inline void recordRun(
int64_t iterations,
int64_t generatedTokens
) noexcept#

Record an Eagle generation run.

Parameters:
  • iterations – Number of iterations

  • generatedTokens – Number of generated tokens

Public Members

int64_t totalIterations = {0}#

Total number of Eagle iterations.

int64_t totalGeneratedTokens = {0}#

Total number of generated tokens.

class OmniTalkerMetrics : public trt_edgellm::metrics::BaseMetrics#

Omni Talker pipeline metrics.

Tracks audio frame generation, RVQ codes, prefill time, and exit reason.

Public Functions

inline void recordRun(
int64_t frames,
int64_t rvqCodes,
float prefillMs,
int32_t prefillSeqLen,
std::string const &exit,
bool streaming
) noexcept#

Public Members

int64_t totalFrames = {0}#

Total audio frames generated (each frame = numCodesPerFrame RVQ codes)

int64_t totalRvqCodes = {0}#

Total RVQ codes generated (frames * codesPerFrame)

float prefillGpuTimeMs = {0}#

Talker prefill GPU time in milliseconds.

int32_t prefillSeqLength = {0}#

Talker prefill input sequence length.

std::string exitReason#

“eos” or “max_length”

bool isStreaming = {false}#

Whether streaming mode was used.

struct OmniLatencyMetrics#

Omni audio latency metrics.

Tracks time to first audio code (TTFA), real-time factor (RTF), and audio output info. Time to first playable audio (TTFPA) is derived at JSON output time from talker_generation + code2wav stage times.

Public Members

float timeToFirstAudioCodeMs = {0}#

Request start to first codec token sampled (includes Thinker)

float timeToFirstPlayableAudioMs = {0}#

Request start to first playable audio chunk complete.

float endToEndMs = {0}#

Request start to all audio output complete.

float realTimeFactor = {0}#

audio_duration / talker_generation_time (< 1.0 = faster than real-time)

float audioDurationSeconds = {0}#

Total audio output duration in seconds.

int64_t audioSamples = {0}#

Total audio output samples.

int32_t sampleRate = {24000}#

Audio sample rate.