Int4 Moe Plugin#
-
class Int4MoePlugin : public nvinfer1::IPluginV3, public nvinfer1::IPluginV3OneCore, public nvinfer1::IPluginV3OneBuild, public nvinfer1::IPluginV3OneRuntime#
TensorRT plugin for INT4 Mixture of Experts (MoE)
Implements efficient INT4 quantized matrix multiplication with Mixture of Experts (MoE) quantization.
Public Functions
- Int4MoePlugin(
- std::string const &name,
- int32_t const numExperts,
- int32_t const topK,
- int32_t const hiddenSize,
- int32_t const moeInterSize,
- nvinfer1::ActivationType const activationType,
- int32_t const quantizationGroupSize
Construct INT4 Mixture of Experts (MoE) plugin.
- Parameters:
name – Layer name
numExperts – Number of experts
topK – Top K experts to select
hiddenSize – Hidden size
moeInterSize – Intermediate size of the MoE layer
activationType – Activation type
quantizationGroupSize – Quantization group size
- Int4MoePlugin(
- std::string const &name,
- nvinfer1::PluginFieldCollection const *fc
Construct from field collection.
- Parameters:
name – Layer name
fc – Plugin field collection
-
Int4MoePlugin() = delete#
Deleted default constructor.
-
Int4MoePlugin(Int4MoePlugin const&) = delete#
Deleted copy constructor.
-
~Int4MoePlugin() noexcept override#
Destructor.
- nvinfer1::IPluginCapability *getCapabilityInterface(
- nvinfer1::PluginCapabilityType type
Return the plugin capability interface for given type.
-
nvinfer1::IPluginV3 *clone() noexcept override#
Clone the plugin for use in another network.
- Returns:
Cloned plugin instance
-
char const *getPluginName() const noexcept override#
Get plugin name.
- Returns:
Plugin name string
-
char const *getPluginVersion() const noexcept override#
Get plugin version.
- Returns:
Version string
-
char const *getPluginNamespace() const noexcept override#
Get plugin namespace.
- Returns:
Namespace string
-
int32_t getNbOutputs() const noexcept override#
Get number of output tensors.
- Returns:
Number of outputs (1)
- int32_t getOutputDataTypes(
- nvinfer1::DataType *outputTypes,
- int32_t nbOutputs,
- nvinfer1::DataType const *inputTypes,
- int32_t nbInputs
Get output tensor data types.
- Parameters:
outputTypes – Output array for data types
nbOutputs – Number of outputs
inputTypes – Input data types
nbInputs – Number of inputs
- Returns:
0 on success, non-zero on error
- int32_t getOutputShapes(
- nvinfer1::DimsExprs const *inputs,
- int32_t nbInputs,
- nvinfer1::DimsExprs const *shapeInputs,
- int32_t nbShapeInputs,
- nvinfer1::DimsExprs *outputs,
- int32_t nbOutputs,
- nvinfer1::IExprBuilder &exprBuilder
Get output tensor shapes.
- Parameters:
inputs – Input dimensions
nbInputs – Number of inputs
shapeInputs – Shape tensor inputs
nbShapeInputs – Number of shape inputs
outputs – Output dimensions
nbOutputs – Number of outputs
exprBuilder – Expression builder for dynamic shapes
- Returns:
0 on success, non-zero on error
- bool supportsFormatCombination(
- int32_t pos,
- nvinfer1::DynamicPluginTensorDesc const *inOut,
- int32_t nbInputs,
- int32_t nbOutputs
Check if format combination is supported.
- Parameters:
pos – Position in input/output array
inOut – Input and output tensor descriptors
nbInputs – Number of inputs
nbOutputs – Number of outputs
- Returns:
True if supported
- int32_t configurePlugin(
- nvinfer1::DynamicPluginTensorDesc const *in,
- int32_t nbInputs,
- nvinfer1::DynamicPluginTensorDesc const *out,
- int32_t nbOutputs
Configure plugin with tensor descriptions.
- Parameters:
in – Input tensor descriptors
nbInputs – Number of inputs
out – Output tensor descriptors
nbOutputs – Number of outputs
- Returns:
0 on success, non-zero on error
- size_t getWorkspaceSize(
- nvinfer1::DynamicPluginTensorDesc const *inputs,
- int32_t nbInputs,
- nvinfer1::DynamicPluginTensorDesc const *outputs,
- int32_t nbOutputs
Get workspace size required for execution.
- Parameters:
inputs – Input tensor descriptors
nbInputs – Number of inputs
outputs – Output tensor descriptors
nbOutputs – Number of outputs
- Returns:
Workspace size in bytes
- int32_t enqueue(
- nvinfer1::PluginTensorDesc const *inputDesc,
- nvinfer1::PluginTensorDesc const *outputDesc,
- void const *const *inputs,
- void *const *outputs,
- void *workspace,
- cudaStream_t stream
Execute the plugin.
- Parameters:
inputDesc – Input tensor descriptors
outputDesc – Output tensor descriptors
inputs – Input tensor pointers
outputs – Output tensor pointers
workspace – Workspace pointer
stream – CUDA stream
- Returns:
0 on success, non-zero on error
- int32_t onShapeChange(
- nvinfer1::PluginTensorDesc const *in,
- int32_t nbInputs,
- nvinfer1::PluginTensorDesc const *out,
- int32_t nbOutputs
Called when input/output shapes change during runtime.
- Parameters:
in – Input tensor descriptors
nbInputs – Number of inputs
out – Output tensor descriptors
nbOutputs – Number of outputs
- Returns:
0 on success, non-zero on error
- nvinfer1::IPluginV3 *attachToContext(
- nvinfer1::IPluginResourceContext *context
Attach plugin to an execution context.
- Parameters:
context – Plugin resource context
- Returns:
Cloned plugin attached to context
- nvinfer1::PluginFieldCollection const *getFieldsToSerialize(
Get plugin fields for serialization.
- Returns:
Field collection for serialization
-
void setPluginNamespace(char const *pluginNamespace) noexcept#
Set plugin namespace.
- Parameters:
pluginNamespace – Namespace string
-
class Int4MoePluginCreator : public nvinfer1::IPluginCreatorV3One#
Factory for creating Int4MoePlugin instances.
Handles plugin registration and creation in TensorRT.
Public Functions
-
Int4MoePluginCreator()#
Constructor.
-
~Int4MoePluginCreator() override = default#
Destructor.
-
char const *getPluginName() const noexcept override#
Get plugin name.
- Returns:
Plugin name string
-
char const *getPluginVersion() const noexcept override#
Get plugin version.
- Returns:
Version string
- nvinfer1::PluginFieldCollection const *getFieldNames(
Get plugin field names.
- Returns:
Field collection
-
char const *getPluginNamespace() const noexcept override#
Get plugin namespace.
- Returns:
Namespace string
-
void setPluginNamespace(char const *pluginNamespace) noexcept#
Set plugin namespace.
- Parameters:
pluginNamespace – Namespace string
- nvinfer1::IPluginV3 *createPlugin(
- char const *name,
- nvinfer1::PluginFieldCollection const *fc,
- nvinfer1::TensorRTPhase phase
Create plugin from field collection.
- Parameters:
name – Plugin name
fc – Field collection with parameters
phase – TensorRT phase (build or runtime)
- Returns:
Created plugin instance
-
Int4MoePluginCreator()#