Audio Runner#

class Qwen3OmniAudioRunner : public trt_edgellm::rt::MultimodalRunner#

Runner for Qwen3-Omni audio encoder.

This class handles audio preprocessing and encoder inference for Qwen3-Omni model.

Public Functions

Qwen3OmniAudioRunner(
std::string const &engineDir,
cudaStream_t stream
)#

Constructor for Qwen3OmniAudioRunner.

Parameters:
  • engineDir[in] Directory containing the audio encoder engine

  • stream[in] CUDA stream for execution

Throws:

std::runtime_error – if engine loading fails or configuration is invalid

~Qwen3OmniAudioRunner() noexcept = default#
virtual bool preprocess(
rt::LLMGenerationRequest const &request,
std::vector<std::vector<int32_t>> &batchedInputIds,
tokenizer::Tokenizer const *tokenizer,
rt::Tensor &ropeRotaryCosSinDevice,
cudaStream_t stream
) override#

Preprocess multimodal input including audio and text.

Parameters:
  • request[in] LLM generation request containing audio and text

  • batchedInputIds[inout] Batched input token IDs after preprocessing

  • tokenizer[in] Tokenizer for text processing

  • ropeRotaryCosSinDevice[inout] RoPE rotary position encoding cache

  • stream[in] CUDA stream for execution

Returns:

True if preprocessing succeeded, false otherwise

virtual bool infer(cudaStream_t stream) override#

Run inference on the audio encoder.

Parameters:

stream[in] CUDA stream for execution

Returns:

True if inference succeeded, false otherwise

virtual bool validateAndFillConfig(
std::string const &engineDir
) override#

Validate and load configuration from JSON file.

Parameters:

engineDir[in] Path to engine directory

Returns:

True if configuration is valid and loaded successfully, false otherwise

virtual bool allocateBuffer(cudaStream_t stream) override#

Allocate buffers for inference.

Parameters:

stream[in] CUDA stream for execution

Returns:

True if allocation succeeded, false otherwise

virtual rt::Tensor &getOutputEmbedding() override#

Get audio embeddings from encoder output.

Returns:

Reference to audio embedding tensor

virtual bool preprocessSystemPrompt(
std::string const &systemPrompt,
tokenizer::Tokenizer const *tokenizer,
rt::Tensor &ropeRotaryCosSinDevice,
cudaStream_t stream
) override#

Initialize sequential MRope cache for system prompt KVCache saving.

For audio-only MRope models (e.g. Qwen3-ASR), initialize sequential MRope cache since no vision runner will fill it. When a vision runner is present, this is a no-op because QwenViTRunner::preprocessSystemPrompt handles MRope initialization.

Parameters:
  • systemPrompt[in] System prompt text

  • tokenizer[in] Tokenizer instance

  • ropeRotaryCosSinDevice[inout] RoPE cache tensor

  • stream[in] CUDA stream

Returns:

True on success, false on failure

struct AudioConfig#

Configuration for Qwen3-Omni audio encoder.

Public Members

int32_t melBins = {128}#

Number of mel-frequency bins.

int32_t audioFeatureDim = {2560}#

Audio feature dimension (output embedding size)

int32_t nWindow = {100}#

Window size for audio chunking.

int32_t nWindowInfer = {100}#

Inference window size.

int32_t audioTokenId = {151675}#

<|audio_pad|> token ID

int32_t audioBosTokenId = {151669}#

<|audio_start|> token ID

int32_t audioEosTokenId = {151670}#

<|audio_end|> token ID

float mropeTheta = {0.0F}#

Multi-dimensional RoPE theta (0 = no MRope)