Audio Runner#
-
class Qwen3OmniAudioRunner : public trt_edgellm::rt::MultimodalRunner#
Runner for Qwen3-Omni audio encoder.
This class handles audio preprocessing and encoder inference for Qwen3-Omni model.
Public Functions
- Qwen3OmniAudioRunner(
- std::string const &engineDir,
- cudaStream_t stream
Constructor for Qwen3OmniAudioRunner.
- Parameters:
engineDir – [in] Directory containing the audio encoder engine
stream – [in] CUDA stream for execution
- Throws:
std::runtime_error – if engine loading fails or configuration is invalid
-
~Qwen3OmniAudioRunner() noexcept = default#
- virtual bool preprocess(
- rt::LLMGenerationRequest const &request,
- std::vector<std::vector<int32_t>> &batchedInputIds,
- tokenizer::Tokenizer const *tokenizer,
- rt::Tensor &ropeRotaryCosSinDevice,
- cudaStream_t stream
Preprocess multimodal input including audio and text.
- Parameters:
request – [in] LLM generation request containing audio and text
batchedInputIds – [inout] Batched input token IDs after preprocessing
tokenizer – [in] Tokenizer for text processing
ropeRotaryCosSinDevice – [inout] RoPE rotary position encoding cache
stream – [in] CUDA stream for execution
- Returns:
True if preprocessing succeeded, false otherwise
-
virtual bool infer(cudaStream_t stream) override#
Run inference on the audio encoder.
- Parameters:
stream – [in] CUDA stream for execution
- Returns:
True if inference succeeded, false otherwise
- virtual bool validateAndFillConfig(
- std::string const &engineDir
Validate and load configuration from JSON file.
- Parameters:
engineDir – [in] Path to engine directory
- Returns:
True if configuration is valid and loaded successfully, false otherwise
-
virtual bool allocateBuffer(cudaStream_t stream) override#
Allocate buffers for inference.
- Parameters:
stream – [in] CUDA stream for execution
- Returns:
True if allocation succeeded, false otherwise
-
virtual rt::Tensor &getOutputEmbedding() override#
Get audio embeddings from encoder output.
- Returns:
Reference to audio embedding tensor
- virtual bool preprocessSystemPrompt(
- std::string const &systemPrompt,
- tokenizer::Tokenizer const *tokenizer,
- rt::Tensor &ropeRotaryCosSinDevice,
- cudaStream_t stream
Initialize sequential MRope cache for system prompt KVCache saving.
For audio-only MRope models (e.g. Qwen3-ASR), initialize sequential MRope cache since no vision runner will fill it. When a vision runner is present, this is a no-op because QwenViTRunner::preprocessSystemPrompt handles MRope initialization.
- Parameters:
systemPrompt – [in] System prompt text
tokenizer – [in] Tokenizer instance
ropeRotaryCosSinDevice – [inout] RoPE cache tensor
stream – [in] CUDA stream
- Returns:
True on success, false on failure
-
struct AudioConfig#
Configuration for Qwen3-Omni audio encoder.
Public Members
-
int32_t melBins = {128}#
Number of mel-frequency bins.
-
int32_t audioFeatureDim = {2560}#
Audio feature dimension (output embedding size)
-
int32_t nWindow = {100}#
Window size for audio chunking.
-
int32_t nWindowInfer = {100}#
Inference window size.
-
int32_t audioTokenId = {151675}#
<|audio_pad|> token ID
-
int32_t audioBosTokenId = {151669}#
<|audio_start|> token ID
-
int32_t audioEosTokenId = {151670}#
<|audio_end|> token ID
-
float mropeTheta = {0.0F}#
Multi-dimensional RoPE theta (0 = no MRope)
-
int32_t melBins = {128}#