Audio Runner#

class Qwen3OmniAudioRunner : public trt_edgellm::rt::MultimodalRunner #

Runner for Qwen3-Omni audio encoder.

This class handles audio preprocessing and encoder inference for Qwen3-Omni model.

Public Functions

Qwen3OmniAudioRunner( std::string const &engineDir, cudaStream_t stream )#

Constructor for Qwen3OmniAudioRunner.

Parameters:

engineDir – [in] Directory containing the audio encoder engine
stream – [in] CUDA stream for execution

Throws:

std::runtime_error – if engine loading fails or configuration is invalid

~Qwen3OmniAudioRunner() noexcept = default#

virtual bool preprocess( rt::LLMGenerationRequest const &request, std::vector<std::vector<int32_t>> &batchedInputIds, tokenizer::Tokenizer const *tokenizer, rt::Tensor &ropeRotaryCosSinDevice, cudaStream_t stream, bool imageOnly = false ) override#

Preprocess multimodal input including audio and text.

Parameters:

request – [in] LLM generation request containing audio and text
batchedInputIds – [inout] Batched input token IDs after preprocessing
tokenizer – [in] Tokenizer for text processing
ropeRotaryCosSinDevice – [inout] RoPE rotary position encoding cache
stream – [in] CUDA stream for execution

Returns:

True if preprocessing succeeded, false otherwise

virtual bool infer(cudaStream_t stream) override#

Run inference on the audio encoder.

Parameters:: stream – [in] CUDA stream for execution
Returns:: True if inference succeeded, false otherwise

virtual bool validateAndFillConfig( std::string const &engineDir ) override#

Validate and load configuration from JSON file.

Parameters:: engineDir – [in] Path to engine directory
Returns:: True if configuration is valid and loaded successfully, false otherwise

virtual bool allocateBuffer(cudaStream_t stream) override#

Allocate buffers for inference.

Parameters:: stream – [in] CUDA stream for execution
Returns:: True if allocation succeeded, false otherwise

virtual rt::Tensor &getOutputEmbedding() override#

Get audio embeddings from encoder output.

Returns:: Reference to audio embedding tensor

virtual bool preprocessSystemPrompt( std::string const &systemPrompt, tokenizer::Tokenizer const *tokenizer, rt::Tensor &ropeRotaryCosSinDevice, cudaStream_t stream ) override#

Initialize sequential MRope cache for system prompt KVCache saving.

For audio-only MRope models (e.g. Qwen3-ASR), initialize sequential MRope cache since no vision runner will fill it. When a vision runner is present, this is a no-op because QwenViTRunner::preprocessSystemPrompt handles MRope initialization.

Parameters:

systemPrompt – [in] System prompt text
tokenizer – [in] Tokenizer instance
ropeRotaryCosSinDevice – [inout] RoPE cache tensor
stream – [in] CUDA stream

Returns:

True on success, false on failure

struct AudioConfig#

Configuration for Qwen3-Omni audio encoder.

Public Members

int32_t melBins = {128}#: Number of mel-frequency bins.

int32_t audioFeatureDim = {2560}#: Audio feature dimension (output embedding size)

int32_t nWindow = {100}#: Window size for audio chunking.

int32_t nWindowInfer = {100}#: Inference window size.

int32_t audioTokenId = {151675}#: <|audio_pad|> token ID

int32_t audioBosTokenId = {151669}#: <|audio_start|> token ID

int32_t audioEosTokenId = {151670}#: <|audio_end|> token ID

float mropeTheta = {0.0F}#: Multi-dimensional RoPE theta (0 = no MRope)

int32_t mropeSectionH = {20}#: MRoPE section: frequency pairs for height (default Qwen3-Omni)

int32_t mropeSectionW = {20}#: MRoPE section: frequency pairs for width (default Qwen3-Omni)