Nvfp4 Moe Plugin#
-
class Nvfp4MoePlugin : public nvinfer1::IPluginV3, public nvinfer1::IPluginV3OneCore, public nvinfer1::IPluginV3OneBuild, public nvinfer1::IPluginV3OneRuntime#
TensorRT plugin: NVFP4 MoE — FP16 activations with on-the-fly NVFP4 quant. SM100/SM101/SM110 use the split FC1/FC2 CuTeDSL path.
Weight layout: FC1 is the 64-row up/gate interleave
[up_chunk(64), gate_chunk(64), up_chunk(64), ...](the layout the split split FC1 kernel reads natively). For the SM12x fused path, see the siblingNvFP4MoEPluginGeforceplugin which consumes the plain[up_all, gate_all]concat layout.Note
This plugin is only supported on SM100, SM101, and SM110.
Note
This plugin is only supported on FP16 I/O.
Note
The split FC1/FC2 path supports swiglu and relu2 with E=128, 0 < top_k <= 8.
Public Functions
- Nvfp4MoePlugin(
- std::string const &name,
- int32_t numExperts,
- int32_t topK,
- int32_t hiddenSize,
- int32_t moeInterSize,
- int32_t activationType,
- int32_t nGroup,
- int32_t topkGroup,
- int32_t normTopkProb,
- float routedScalingFactor,
- int32_t routingMode,
- int32_t backend,
- int32_t maxRoutedRows,
- int32_t ioDtype
- Nvfp4MoePlugin(
- std::string const &name,
- nvinfer1::PluginFieldCollection const *fc
-
Nvfp4MoePlugin() = delete#
-
Nvfp4MoePlugin(Nvfp4MoePlugin const&) = delete#
-
~Nvfp4MoePlugin() noexcept override#
- nvinfer1::IPluginCapability *getCapabilityInterface(
- nvinfer1::PluginCapabilityType type
-
nvinfer1::IPluginV3 *clone() noexcept override#
-
char const *getPluginName() const noexcept override#
-
char const *getPluginVersion() const noexcept override#
-
char const *getPluginNamespace() const noexcept override#
-
int32_t getNbOutputs() const noexcept override#
- int32_t getOutputDataTypes(
- nvinfer1::DataType *outputTypes,
- int32_t nbOutputs,
- nvinfer1::DataType const *inputTypes,
- int32_t nbInputs
- int32_t getOutputShapes(
- nvinfer1::DimsExprs const *inputs,
- int32_t nbInputs,
- nvinfer1::DimsExprs const *shapeInputs,
- int32_t nbShapeInputs,
- nvinfer1::DimsExprs *outputs,
- int32_t nbOutputs,
- nvinfer1::IExprBuilder &exprBuilder
- bool supportsFormatCombination(
- int32_t pos,
- nvinfer1::DynamicPluginTensorDesc const *inOut,
- int32_t nbInputs,
- int32_t nbOutputs
- int32_t configurePlugin(
- nvinfer1::DynamicPluginTensorDesc const *in,
- int32_t nbInputs,
- nvinfer1::DynamicPluginTensorDesc const *out,
- int32_t nbOutputs
- size_t getWorkspaceSize(
- nvinfer1::DynamicPluginTensorDesc const *inputs,
- int32_t nbInputs,
- nvinfer1::DynamicPluginTensorDesc const *outputs,
- int32_t nbOutputs
- int32_t enqueue(
- nvinfer1::PluginTensorDesc const *inputDesc,
- nvinfer1::PluginTensorDesc const *outputDesc,
- void const *const *inputs,
- void *const *outputs,
- void *workspace,
- cudaStream_t stream
- int32_t onShapeChange(
- nvinfer1::PluginTensorDesc const *in,
- int32_t nbInputs,
- nvinfer1::PluginTensorDesc const *out,
- int32_t nbOutputs
- nvinfer1::IPluginV3 *attachToContext(
- nvinfer1::IPluginResourceContext *context
- nvinfer1::PluginFieldCollection const *getFieldsToSerialize(
-
void setPluginNamespace(char const *pluginNamespace) noexcept#
-
class Nvfp4MoePluginCreator : public nvinfer1::IPluginCreatorV3One#
Plugin creator — parses PluginFieldCollection into the attributes above, registers under TensorRT’s default namespace, exposes name “Nvfp4MoePlugin” / version “1”.
Public Functions
-
Nvfp4MoePluginCreator()#
-
~Nvfp4MoePluginCreator() override = default#
-
char const *getPluginName() const noexcept override#
-
char const *getPluginVersion() const noexcept override#
- nvinfer1::PluginFieldCollection const *getFieldNames(
-
char const *getPluginNamespace() const noexcept override#
-
void setPluginNamespace(char const *pluginNamespace) noexcept#
- nvinfer1::IPluginV3 *createPlugin(
- char const *name,
- nvinfer1::PluginFieldCollection const *fc,
- nvinfer1::TensorRTPhase phase
-
Nvfp4MoePluginCreator()#