Nemotron Omni Audio Runner#
-
class NemotronOmniAudioRunner : public trt_edgellm::rt::MultimodalRunner#
Runner for Nemotron-Omni Parakeet audio encoder.
The encoder is exported and built at fixed batch=1: the Conformer’s depthwise convolution is a local operator that ignores attention masks, so cross-clip batching with padding silently corrupts short-clip outputs at their right edge.
preprocess()walks every audio clip in the request batch and invokes the encoder once per clip, writing the encoded rows directly intomAudioEmbeddingat sequential offsets.infer()is a no-op.Public Functions
- NemotronOmniAudioRunner(
- std::string const &engineDir,
- cudaStream_t stream
Constructor for NemotronOmniAudioRunner.
- Parameters:
engineDir – [in] Directory containing the audio encoder engine
stream – [in] CUDA stream for execution
- Throws:
std::runtime_error – if engine loading fails or configuration is invalid
-
~NemotronOmniAudioRunner() noexcept = default#
- virtual bool preprocess(
- rt::LLMGenerationRequest const &request,
- std::vector<std::vector<int32_t>> &batchedInputIds,
- tokenizer::Tokenizer const *tokenizer,
- rt::OptionalOutputTensor mropeCosSinOut,
- cudaStream_t stream,
- bool imageOnly = false
Preprocess multimodal input including audio and text.
- Parameters:
request – [in] LLM generation request containing audio and text
batchedInputIds – [inout] Batched input token IDs after preprocessing
tokenizer – [in] Tokenizer for text processing
mropeCosSinOut – [inout] MRoPE cos/sin output tensor (unused by this model)
stream – [in] CUDA stream for execution
- Returns:
True if preprocessing succeeded, false otherwise
-
virtual bool infer(cudaStream_t stream) override#
Run inference on the audio encoder.
- Parameters:
stream – [in] CUDA stream for execution
- Returns:
True if inference succeeded, false otherwise
- virtual bool validateAndFillConfig(
- std::string const &engineDir
Validate and load configuration from JSON file.
- Parameters:
engineDir – [in] Path to engine directory
- Returns:
True if configuration is valid and loaded successfully, false otherwise
-
virtual bool allocateBuffer(cudaStream_t stream) override#
Allocate buffers for inference.
- Parameters:
stream – [in] CUDA stream for execution
- Returns:
True if allocation succeeded, false otherwise
-
struct NemotronOmniAudioConfig#
Configuration for Nemotron-Omni Parakeet audio encoder.
Public Members
-
int32_t melBins = {0}#
Number of mel-frequency bins.
-
int32_t audioFeatureDim = {0}#
Audio feature dimension (LLM hidden size)
-
int32_t subsamplingFactor = {0}#
Parakeet subsampling factor.
-
int32_t samplingRate = {0}#
Audio sampling rate.
-
int32_t soundContextTokenId = {0}#
<so_embedding> token ID
-
int32_t vocabSize = {0}#
Vocabulary size (audio token ID offset)
-
int32_t melBins = {0}#