TensorRT Model Optimizer
Getting Started
Overview
Installation
Quick Start: Quantization
Quick Start: Quantization (Windows)
Quick Start: Pruning
Quick Start: Distillation
Quick Start: Sparsity
Guides
Quantization
Pruning
NAS
Distillation
Sparsity
Saving & Restoring
Speculative Decoding
Deployment
TensorRT-LLM Deployment
DirectML Deployment
Examples
All GitHub Examples
ResNet20 on CIFAR-10: Pruning
HF BERT: Prune, Distill & Quantize
Reference
Changelog
modelopt API
Support
Contact us
FAQs
TensorRT Model Optimizer
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
Y
|
Z
_
__init__() (AttentionConfig method)
(AWQClipHelper method)
(AWQLiteHelper method)
(BaseQuantizedTensor method)
(BaseSearcher method)
(CalibrationDataProvider method)
(Clip method)
(ConcatNodeProcessor method)
(ConcatSymbol method)
(ConcatSymbol.Input method)
(ConvConfig method)
(CustomSentencePieceTokenizer method)
(DecoderLayerConfig method)
(DistillationLossBalancer method)
(DynamicModule method)
(DynamicSpace method)
(EmbeddingConfig method)
(enable_modelopt_patches method)
(ExpertConfig method)
(GradientDataManager method)
(GraphCollection method)
(HistogramCalibrator method)
(Hparam method)
(LayernormConfig method)
(LinearActConfig method)
(LinearConfig method)
(LLM method)
(LogitsDistillationLoss method)
(MaxCalibrator method)
(MedusaHeadConfig method)
(MGDLoss method)
(MLPConfig method)
(ModelConfig method)
(ModeloptStateManager method)
(MOEConfig method)
(NFSWorkspace method)
(no_modelopt_patches method)
(QDQConvTranspose method)
(QDQNormalization method)
(QKVConfig method)
(QuantRecipe method)
(QuantRecipeHparam method)
(RandomDataProvider method)
(RecurrentConfig method)
(ResBlock method)
(RgLruConfig method)
(RNNLayerForward method)
(RobustTracer method)
(SearchSpace method)
(SequentialQuantizer method)
(set_modelopt_patches_enabled method)
(StaticLossBalancer method)
(Symbol method)
(SymDepth method)
(SymInfo method)
(SymMap method)
(TensorQuantizer method)
(Timer method)
(VFRNNForward method)
__new__() (LayerNormPositionType method)
(LayerNormType method)
(MLPType method)
(QTensorWrapper static method)
A
activation_scaling_factor (LinearConfig attribute)
(QKVConfig property)
active (ConcatTracedHp property)
(Hparam property)
(QuantRecipeHparam property)
active_slice (ConcatTracedHp property)
(Hparam property)
ActiveSlice (Hparam attribute)
AdaptiveAvgPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AdaptiveAvgPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AdaptiveAvgPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
add_fp16_fp32_cast() (in module modelopt.onnx.quantization.graph_utils)
add_mode() (ModeloptStateManager method)
add_sym_info() (SymMap method)
additional_algorithm (RealQuantizeConfig attribute)
adjust_attn_amax_values() (in module modelopt.torch.export.scaling_factor_utils)
after_search() (BaseSearcher method)
(IterativeSearcher method)
(SparseGPTSearcher method)
after_step() (BinarySearcher method)
(EvolveSearcher method)
(IterativeSearcher method)
algorithm (QuantizeConfig attribute)
alibi_bias_max (DecoderLayerConfig attribute)
all_input_quantizers_disabled (QuantRNNBase property)
alpha (SmoothQuantCalibConfig attribute)
alpha_step (AWQClipHelper attribute)
(AWQLiteCalibConfig attribute)
(AWQLiteHelper attribute)
amax (TensorQuantizer property)
amaxs (MaxCalibrator property)
analyze_symbols() (in module modelopt.torch.trace.analyzer)
apply_mode() (in module modelopt.torch.opt.conversion)
apply_residual_connection_post_layernorm (DecoderLayerConfig attribute)
attention (DecoderLayerConfig attribute)
attention_head_size (DecoderLayerConfig attribute)
attention_layernorm (DecoderLayerConfig attribute)
AttentionConfig (class in modelopt.torch.export.model_config)
attn_logit_softcapping (DecoderLayerConfig attribute)
attn_replacing_linear (DecoderLayerConfig attribute)
auto_quantize() (in module modelopt.torch.quantization.model_quant)
AutoNASModeDescriptor (class in modelopt.torch.nas.mode)
AutoNASPatchManager (class in modelopt.torch.nas.autonas)
AutoQuantizeSearcher (class in modelopt.torch.quantization.algorithms)
AvgPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AvgPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AvgPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
awq_block_size (LinearConfig attribute)
(QKVConfig property)
AWQClipHelper (class in modelopt.onnx.quantization.int4)
AWQLiteHelper (class in modelopt.onnx.quantization.int4)
axis (QuantizerAttributeConfig attribute)
(TensorQuantizer property)
B
backend() (in module modelopt.torch.utils.distributed)
backward() (ClipFunction static method)
(DynamicBlockQuantizationFunction static method)
(FakeAffineTensorQuantFunction static method)
(FakeTensorQuantFunction static method)
(LegacyFakeTensorQuantFunction static method)
(ScaledE4M3Function static method)
(TensorQuantFunction static method)
barrier() (in module modelopt.torch.utils.distributed)
BaseQuantizedTensor (class in modelopt.torch.quantization.qtensor.base_qtensor)
BaseSearcher (class in modelopt.torch.opt.searcher)
BaseSparseSearcher (class in modelopt.torch.sparsity.searcher)
batch_decode() (CustomSentencePieceTokenizer method)
batch_encode_plus() (CustomSentencePieceTokenizer method)
before_search() (AutoQuantizeSearcher method)
(BaseSearcher method)
(BinarySearcher method)
(EvolveSearcher method)
(GradientBinarySearcher method)
(IterativeSearcher method)
(MCoreGPTMinitronSearcher method)
(SparseGPTSearcher method)
before_step() (BinarySearcher method)
(EvolveSearcher method)
(IterativeSearcher method)
best (AutoQuantizeSearcher attribute)
(IterativeSearcher attribute)
best_history (IterativeSearcher attribute)
bias (ConvConfig attribute)
(LayernormConfig attribute)
(LinearConfig attribute)
(QKVConfig property)
BinarySearcher (class in modelopt.torch.prune.fastnas)
block_config (DecoderLayerConfig attribute)
block_embedding (ModelConfig attribute)
block_sizes (QuantizerAttributeConfig attribute)
(TensorQuantizer property)
blocksparse_block_size (DecoderLayerConfig attribute)
blocksparse_homo_head_pattern (DecoderLayerConfig attribute)
blocksparse_num_local_blocks (DecoderLayerConfig attribute)
blocksparse_vertical_stride (DecoderLayerConfig attribute)
build_attention_config() (in module modelopt.torch.export.layer_utils)
build_conv_config() (in module modelopt.torch.export.layer_utils)
build_decoder_config() (in module modelopt.torch.export.layer_utils)
build_embedding_config() (in module modelopt.torch.export.layer_utils)
build_layernorm_config() (in module modelopt.torch.export.layer_utils)
build_linear_config() (in module modelopt.torch.export.layer_utils)
build_medusa_heads_config() (in module modelopt.torch.export.layer_utils)
build_mlp_config() (in module modelopt.torch.export.layer_utils)
build_moe_config() (in module modelopt.torch.export.layer_utils)
build_non_residual_input_map() (in module modelopt.onnx.quantization.graph_utils)
build_qkv() (in module modelopt.torch.export.layer_utils)
build_recurrent_config() (in module modelopt.torch.export.layer_utils)
build_stacked_experts() (in module modelopt.torch.export.layer_utils)
C
calib (ExportConfig attribute)
calibrate() (in module modelopt.torch.quantization.model_calib)
calibrate_weights() (in module modelopt.torch.quantization.calib.histogram)
CalibrationDataProvider (class in modelopt.onnx.quantization.calib_utils)
calibrator (QuantizerAttributeConfig attribute)
candidate (IterativeSearcher attribute)
candidate_stats (AutoQuantizeSearcher attribute)
candidates (EvolveSearcher attribute)
centroid() (in module modelopt.torch.utils.random)
chatglm_version (DecoderLayerConfig attribute)
check_mode() (ModeloptStateManager method)
check_model_compatibility() (in module modelopt.torch.export.layer_utils)
check_weight_shape_valid() (in module modelopt.torch.export.postprocess)
choice() (in module modelopt.torch.utils.random)
choices (Hparam property)
cl_type (Symbol property)
classify_partition_nodes() (in module modelopt.onnx.quantization.graph_utils)
clean_up_after_set_from_modelopt_state() (TensorQuantizer method)
clear_cuda_cache() (in module modelopt.torch.utils.perf)
Clip (class in modelopt.torch.quantization.nn.modules.clip)
clip_qkv (AttentionConfig attribute)
(DecoderLayerConfig attribute)
ClipFunction (class in modelopt.torch.quantization.nn.functional)
clone() (set_modelopt_patches_enabled method)
collect() (HistogramCalibrator method)
(MaxCalibrator method)
compare_dict() (in module modelopt.torch.utils.network)
compression (QuantRecipe property)
compute_amax() (HistogramCalibrator method)
(MaxCalibrator method)
compute_kd_loss() (DistillationModel method)
compute_valid_1d_patterns() (in module modelopt.torch.sparsity.magnitude)
concat_sym (ConcatSymbol.Input property)
ConcatNodeProcessor (class in modelopt.torch.trace.modules.concat)
ConcatSymbol (class in modelopt.torch.trace.modules.concat)
ConcatSymbol.Input (class in modelopt.torch.trace.modules.concat)
ConcatTracedHp (class in modelopt.torch.nas.hparams.concat)
config (BaseSearcher attribute)
(QuantRecipe property)
config() (DynamicSpace method)
config_class (AutoNASModeDescriptor property)
(ExportModeDescriptor property)
(ExportSparseModeDescriptor property)
(ExportStudentModeDescriptor property)
(FastNASModeDescriptor property)
(GradNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(MCoreGPTMinitronModeDescriptor property)
(MedusaModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseGPTModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
configure_ort() (in module modelopt.onnx.quantization.ort_utils)
constraints (BaseSearcher attribute)
constraints_func (IterativeSearcher attribute)
construct_forward_loop() (BaseSearcher method)
Conv1d (in module modelopt.torch.quantization.nn.modules.quant_conv)
conv1d (RecurrentConfig attribute)
Conv2d (in module modelopt.torch.quantization.nn.modules.quant_conv)
Conv3d (in module modelopt.torch.quantization.nn.modules.quant_conv)
ConvConfig (class in modelopt.torch.export.model_config)
convert (AutoNASModeDescriptor property)
(ExportModeDescriptor property)
(ExportSparseModeDescriptor property)
(ExportStudentModeDescriptor property)
(FastNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(MedusaModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
convert() (ConcatSymbol.Input static method)
(DynamicModule class method)
(in module modelopt.torch.distill.distillation)
(in module modelopt.torch.nas.conversion)
(in module modelopt.torch.speculative.speculative_decoding)
convert_autonas_searchspace() (in module modelopt.torch.nas.autonas)
convert_fastnas_searchspace() (in module modelopt.torch.prune.fastnas)
convert_fp16_io() (in module modelopt.onnx.quantization.graph_utils)
convert_searchspace() (in module modelopt.torch.nas.autonas)
convert_sparse_model() (in module modelopt.torch.sparsity.mode)
convert_state_dict_amax_to_scales() (in module modelopt.torch.export.scaling_factor_utils)
convert_to_dynamic() (DynamicSpace method)
convert_to_medusa_model() (in module modelopt.torch.speculative.medusa.conversion)
convert_to_tensorrt_llm_config() (in module modelopt.torch.export.tensorrt_llm_utils)
convert_to_transformer_engine() (in module modelopt.torch.export.transformer_engine)
ConvTranspose1d (in module modelopt.torch.quantization.nn.modules.quant_conv)
ConvTranspose2d (in module modelopt.torch.quantization.nn.modules.quant_conv)
ConvTranspose3d (in module modelopt.torch.quantization.nn.modules.quant_conv)
create_asp_mask() (in module modelopt.torch.sparsity.magnitude)
create_forward_loop() (in module modelopt.torch.utils.dataset_utils)
create_inference_session() (in module modelopt.onnx.quantization.ort_utils)
create_linked_copy() (ConcatSymbol.Input method)
create_param_grad_clear_hook() (in module modelopt.torch.utils.network)
create_sgpt_mask() (in module modelopt.torch.sparsity.sparsegpt)
criterion (KDLossConfig attribute)
cross_attention (DecoderLayerConfig attribute)
cross_attention_layernorm (DecoderLayerConfig attribute)
customize_rule() (ModeloptBaseRule class method)
CustomSentencePieceTokenizer (class in modelopt.deploy.llm.nemo_utils)
D
deactivate() (in module modelopt.torch.quantization.quant_modules)
debug (AWQClipCalibConfig attribute)
(AWQFullCalibConfig attribute)
(AWQLiteCalibConfig attribute)
decode() (CustomSentencePieceTokenizer method)
decoder_type (DecoderLayerConfig attribute)
DecoderLayerConfig (class in modelopt.torch.export.model_config)
default_quant_desc_input (QuantInputBase attribute)
(QuantRNNBase attribute)
default_quant_desc_output (QuantInputBase attribute)
default_quant_desc_weight (QuantConv1d attribute)
(QuantConv2d attribute)
(QuantConv3d attribute)
(QuantConvTranspose1d attribute)
(QuantConvTranspose2d attribute)
(QuantConvTranspose3d attribute)
(QuantLinear attribute)
(QuantLinearConvBase attribute)
(QuantRNNBase attribute)
default_search_config (AutoQuantizeSearcher property)
(BaseSearcher property)
(BaseSparseSearcher property)
(EvolveSearcher property)
(GradientBinarySearcher property)
(IterativeSearcher property)
(MCoreGPTMinitronSearcher property)
(SparseGPTSearcher property)
default_state_dict (AutoQuantizeSearcher property)
(BaseSearcher property)
(BaseSparseSearcher property)
(BinarySearcher property)
(EvolveSearcher property)
(IterativeSearcher property)
(MCoreGPTMinitronSearcher property)
dense (AttentionConfig attribute)
dense_attention_every_n_layers (DecoderLayerConfig attribute)
deployment (BaseSearcher attribute)
DeprecatedError
DepthHparam (class in modelopt.torch.nas.hparams.container)
dequantize() (BaseQuantizedTensor method)
(INT4QTensor method)
(NF4QTensor method)
(NVFP4QTensor method)
(TensorQuantizer method)
dim() (QTensorWrapper method)
disable() (ConcatSymbol method)
(SequentialQuantizer method)
(Symbol method)
(SymDepth method)
(TensorQuantizer method)
disable_calib() (TensorQuantizer method)
disable_clip() (TensorQuantizer method)
disable_folding_pqs_to_weights() (QuantRecipe static method)
disable_quant() (TensorQuantizer method)
disable_quantizer() (in module modelopt.torch.quantization.model_quant)
DistillationLossBalancer (class in modelopt.torch.distill.loss_balancers)
DistillationModel (class in modelopt.torch.distill.distillation_model)
double_quantization() (NF4QTensor class method)
dq_tensor() (in module modelopt.onnx.quantization.int4)
dtype (ModelConfig attribute)
dummy_input (BaseSearcher attribute)
dup_kv_weight() (in module modelopt.torch.export.layer_utils)
duplicate_shared_constants() (in module modelopt.onnx.utils)
DynamicBlockQuantizationFunction (class in modelopt.torch.quantization.tensor_quant)
DynamicModule (class in modelopt.torch.opt.dynamic)
DynamicSpace (class in modelopt.torch.opt.dynamic)
E
early_stop() (BinarySearcher method)
(IterativeSearcher method)
elastic_dims (Symbol property)
emb_scale_by_sqrt_dim (DecoderLayerConfig attribute)
EmbeddingConfig (class in modelopt.torch.export.model_config)
enable (QuantizerAttributeConfig attribute)
enable() (TensorQuantizer method)
enable_calib() (TensorQuantizer method)
enable_clip() (TensorQuantizer method)
enable_huggingface_checkpointing() (in module modelopt.torch.opt.plugins.huggingface)
enable_modelopt_patches (class in modelopt.torch.nas.utils)
enable_onnx_export() (in module modelopt.torch.quantization.quant_modules)
enable_quant() (TensorQuantizer method)
enable_quantizer() (in module modelopt.torch.quantization.model_quant)
enc_dec (ModelConfig attribute)
encode() (CustomSentencePieceTokenizer method)
encoder_head_size (ModelConfig attribute)
encoder_hidden_size (ModelConfig attribute)
encoder_num_heads (ModelConfig attribute)
enforce_order() (Hparam method)
eos_token (CustomSentencePieceTokenizer property)
eos_token_id (CustomSentencePieceTokenizer property)
eps (LayernormConfig attribute)
eval_score() (BaseSearcher method)
EvolveSearcher (class in modelopt.torch.nas.autonas)
expand_node_names_from_patterns() (in module modelopt.onnx.quantization.graph_utils)
ExpertConfig (class in modelopt.torch.export.model_config)
experts (MOEConfig attribute)
export() (DynamicModule method)
(DynamicSpace method)
(in module modelopt.torch.distill.distillation)
(in module modelopt.torch.nas.conversion)
(in module modelopt.torch.sparsity.sparsification)
(SearchSpace method)
export_amax() (TensorQuantizer method)
export_fp8() (in module modelopt.torch.quantization.export_onnx)
export_hf() (in module modelopt.torch.export.unified_export_hf)
export_hf_checkpoint() (in module modelopt.torch.export.unified_export_hf)
export_int8() (in module modelopt.torch.quantization.export_onnx)
export_mode (AutoNASModeDescriptor property)
(FastNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
export_searchspace() (in module modelopt.torch.nas.autonas)
export_sparse() (in module modelopt.torch.sparsity.mode)
export_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)
export_to_vllm() (in module modelopt.torch.export.vllm)
export_torch_mode() (in module modelopt.torch.quantization.utils)
ExportModeDescriptor (class in modelopt.torch.nas.mode)
ExportSparseModeDescriptor (class in modelopt.torch.sparsity.mode)
ExportStudentModeDescriptor (class in modelopt.torch.distill.mode)
expose_minimal_state_dict (KDLossConfig attribute)
extra_repr() (DynamicModule method)
(TensorQuantizer method)
F
failure_msg() (GraphCollection method)
(RobustTracer method)
fake_quant (QuantizerAttributeConfig attribute)
(TensorQuantizer property)
fake_quant_impl() (in module modelopt.torch.quantization.tensor_quant)
FakeAffineTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
FakeTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
FastNASModeDescriptor (class in modelopt.torch.prune.mode)
FastNASPatchManager (class in modelopt.torch.prune.fastnas)
fc (ExpertConfig attribute)
(MLPConfig attribute)
(MOEConfig property)
ffn_hidden_size_local (DecoderLayerConfig property)
fill() (in module modelopt.torch.sparsity.magnitude)
filter_quantizable_kgen_heads() (in module modelopt.onnx.quantization.graph_utils)
final_logit_softcapping (DecoderLayerConfig attribute)
find_fp8_mha_partitions() (in module modelopt.onnx.quantization.graph_utils)
find_fusible_partitions() (in module modelopt.onnx.quantization.partitioning)
find_hardcoded_patterns() (in module modelopt.onnx.quantization.partitioning)
find_layer_norm_partitions() (in module modelopt.onnx.quantization.partitioning)
find_lowest_common_ancestor() (in module modelopt.onnx.utils)
find_mha_partitions() (in module modelopt.onnx.quantization.graph_utils)
(in module modelopt.onnx.quantization.partitioning)
find_nodes_from_matmul_to_exclude() (in module modelopt.onnx.quantization.graph_utils)
find_nodes_from_mha_to_exclude() (in module modelopt.onnx.quantization.graph_utils)
find_nodes_to_exclude() (in module modelopt.onnx.quantization.graph_utils)
find_non_quantizable_partitions_from_patterns() (in module modelopt.onnx.quantization.partitioning)
find_quantizable_nodes() (in module modelopt.onnx.quantization.partitioning)
find_scales() (in module modelopt.onnx.quantization.int4)
fold_pqs_to_weights() (QuantRecipe static method)
fold_weight() (in module modelopt.torch.quantization.model_quant)
force_assign() (DynamicModule method)
forward() (Clip method)
(ClipFunction static method)
(DistillationLossBalancer method)
(DistillationModel method)
(DynamicBlockQuantizationFunction static method)
(FakeAffineTensorQuantFunction static method)
(FakeTensorQuantFunction static method)
(LegacyFakeTensorQuantFunction static method)
(LogitsDistillationLoss method)
(MGDLoss method)
(QuantInputBase method)
(QuantLinearConvBase method)
(QuantRNNBase method)
(ResBlock method)
(ScaledE4M3Function static method)
(StaticLossBalancer method)
(TensorQuantFunction static method)
(TensorQuantizer method)
(VFRNNForward method)
forward_loop (BaseSearcher attribute)
freeze() (DynamicModule method)
freeze_parameters() (in module modelopt.torch.quantization.optim)
from_quantized_weight() (in module modelopt.torch.export.model_config_utils)
functionals_to_replace (QuantRNNBase property)
FusedGatedMLP (MLPType attribute)
G
gate (MLPConfig attribute)
GatedMLP (MLPType attribute)
gegelu_limit (DecoderLayerConfig attribute)
gen_random_inputs() (in module modelopt.onnx.utils)
generate() (SearchSpace method)
generate_context_logits() (LLM method)
generate_search_space() (in module modelopt.torch.nas.search_space)
generate_text() (LLM method)
generate_tokens() (LLM method)
get() (ModeloptBaseConfig method)
(TracedHpRegistry class method)
get_act_scale() (in module modelopt.onnx.quantization.int4)
get_act_to_weight_map_and_act_to_wa_pack_map() (in module modelopt.onnx.quantization.int4)
get_activation_scaling_factor() (in module modelopt.torch.export.layer_utils)
(NVFP4QTensor class method)
get_all_input_names() (in module modelopt.onnx.utils)
get_batch_size() (in module modelopt.onnx.utils)
get_batch_size_from_bytes() (in module modelopt.onnx.utils)
get_child_nodes() (in module modelopt.onnx.utils)
get_config_class() (ModeloptStateManager static method)
get_configs_parallel() (in module modelopt.torch.export.distribute)
get_cuda_ext() (in module modelopt.torch.quantization.extensions)
get_cuda_ext_fp8() (in module modelopt.torch.quantization.extensions)
get_cuda_ext_mx() (in module modelopt.torch.quantization.extensions)
get_cuda_memory_stats() (in module modelopt.torch.utils.perf)
get_custom_layers() (in module modelopt.onnx.quantization.trt_utils)
get_data_parallel_group() (in module modelopt.torch.utils.distributed)
get_dataset_dataloader() (in module modelopt.torch.utils.dataset_utils)
get_field_name_from_key() (ModeloptBaseConfig method)
get_fusible_backbone() (in module modelopt.onnx.quantization.graph_utils)
get_hparam() (DynamicModule method)
(DynamicSpace method)
(in module modelopt.torch.opt.utils)
get_input_names() (in module modelopt.onnx.utils)
get_input_names_from_bytes() (in module modelopt.onnx.utils)
get_input_shapes() (in module modelopt.onnx.utils)
get_input_shapes_from_bytes() (in module modelopt.onnx.utils)
get_kv_cache_dtype() (in module modelopt.torch.export.layer_utils)
get_kv_cache_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_kwargs_for_create_model_with_rules() (in module modelopt.torch.opt.config)
get_model_attributes() (in module modelopt.torch.utils.network)
get_modelopt_state() (SequentialQuantizer method)
(TensorQuantizer method)
get_module_device() (in module modelopt.torch.utils.network)
get_nemo_tokenizer() (in module modelopt.deploy.llm.nemo_utils)
get_next() (CalibrationDataProvider method)
(RandomDataProvider method)
get_nmprune_info() (in module modelopt.torch.sparsity.magnitude)
get_node_names() (in module modelopt.onnx.utils)
get_node_names_from_bytes() (in module modelopt.onnx.utils)
get_output_names() (in module modelopt.onnx.utils)
get_output_names_from_bytes() (in module modelopt.onnx.utils)
get_output_shapes() (in module modelopt.onnx.utils)
get_parallel_state() (in module modelopt.torch.quantization.utils)
get_parent_child_nodes_map() (in module modelopt.onnx.quantization.int4)
get_parent_nodes() (in module modelopt.onnx.utils)
get_prequant_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_qkv_and_avg_prequant_scale() (in module modelopt.torch.export.layer_utils)
get_qtensor() (QTensorWrapper method)
get_quantizable_op_types() (in module modelopt.onnx.quantization.ort_utils)
get_quantization_format() (in module modelopt.torch.export.layer_utils)
get_quantized_rnn_layer_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)
get_quantized_rnn_layer_variable_len_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)
get_quantized_rnn_layer_variable_len_reverse_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)
get_rule_type() (ModeloptBaseRule class method)
get_same_padding() (in module modelopt.torch.utils.network)
get_scale() (in module modelopt.onnx.quantization.int4)
get_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_skiped_output_layers() (in module modelopt.onnx.quantization.partitioning)
get_sliced_tensor() (in module modelopt.torch.nas.modules.utils)
get_sliced_tensor_by_slices() (in module modelopt.torch.nas.modules.utils)
get_subnet_config() (in module modelopt.torch.nas.utils)
get_symbol() (SymMap method)
get_tensor_consumer_nodes() (in module modelopt.onnx.quantization.graph_utils)
get_tensor_parallel_group() (in module modelopt.torch.utils.distributed)
get_tensor_producer_nodes() (in module modelopt.onnx.quantization.graph_utils)
get_tensors_parallel() (in module modelopt.torch.export.distribute)
get_tokenzier() (in module modelopt.deploy.llm.nemo_utils)
get_transformer_layers() (in module modelopt.torch.export.layer_utils)
get_unwrapped_name() (in module modelopt.torch.utils.network)
get_variable_inputs() (in module modelopt.onnx.utils)
get_weight_block_size() (in module modelopt.torch.export.layer_utils)
get_weight_scale() (in module modelopt.onnx.quantization.int4)
get_weight_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_weight_scaling_factor_2() (in module modelopt.torch.export.layer_utils)
get_weights_scaling_factor() (NVFP4QTensor class method)
get_weights_scaling_factor_2() (NVFP4QTensor class method)
get_weights_scaling_factor_and_amax() (in module modelopt.torch.export.scaling_factor_utils)
get_x_w_mean_for_subgraph() (in module modelopt.onnx.quantization.int4)
gradient_checkpointing_enable_contexts (AutoQuantizeSearcher attribute)
GradientBinarySearcher (class in modelopt.torch.prune.gradnas)
GradientDataManager (class in modelopt.torch.prune.gradnas)
gradnas_score_func() (GradientBinarySearcher static method)
GradNASModeDescriptor (class in modelopt.torch.prune.mode)
GraphCollection (class in modelopt.torch.trace.tracer)
group_parameters() (in module modelopt.torch.quantization.optim)
GroupNorm (LayerNormType attribute)
H
has_const_input() (in module modelopt.onnx.quantization.graph_utils)
has_path_type() (in module modelopt.onnx.quantization.graph_utils)
has_score (BaseSearcher property)
has_state (ModeloptStateManager property)
hidden_act (LinearActConfig attribute)
(MLPConfig attribute)
(ModelConfig property)
(MOEConfig attribute)
hidden_size (DecoderLayerConfig property)
(EmbeddingConfig property)
(ModelConfig property)
hide_loss_modules() (DistillationModel method)
hide_teacher_model() (DistillationModel method)
HistogramCalibrator (class in modelopt.torch.quantization.calib.histogram)
history (IterativeSearcher attribute)
Hparam (class in modelopt.torch.opt.hparam)
hparam_names_for_search (BinarySearcher property)
(GradientBinarySearcher property)
hparam_types_for_search (BinarySearcher property)
I
import_scales_from_calib_cache() (in module modelopt.onnx.quantization.calib_utils)
Importance (Hparam attribute)
importance (Hparam property)
(QuantRecipeHparam property)
ImportanceEstimator (Hparam attribute)
INCOMING (Symbol.CLType attribute)
inference_flops() (in module modelopt.torch.nas.utils)
init_learn_amax() (TensorQuantizer method)
init_model_from_model_like() (in module modelopt.torch.utils.network)
initialize() (in module modelopt.torch.quantization.quant_modules)
initialize_from() (TracedHp class method)
(TracedHpRegistry class method)
initialize_quantizer_with_dummy_states() (QuantLinearConvBase static method)
initialize_real_qtensor_with_dummy_weight() (QuantLinearConvBase static method)
input_gate (RgLruConfig attribute)
input_layernorm (DecoderLayerConfig attribute)
input_quantizer (QuantInputBase attribute)
input_syms (ConcatSymbol property)
insert_dq_nodes() (in module modelopt.onnx.quantization.qdq_utils)
insert_fp8_mha_casts() (in module modelopt.onnx.quantization.graph_utils)
insert_hparams_after_merge_rules() (AutoQuantizeSearcher class method)
insert_matmul_casts() (in module modelopt.onnx.quantization.graph_utils)
insert_pre_quant_scale_nodes() (in module modelopt.onnx.quantization.qdq_utils)
insert_qdq_nodes() (in module modelopt.onnx.quantization.qdq_utils)
INT4QTensor (class in modelopt.torch.quantization.qtensor.int4_tensor)
int8_to_fp8() (in module modelopt.onnx.quantization.fp8)
invert() (in module modelopt.torch.sparsity.sparsegpt)
is_attention() (in module modelopt.torch.export.layer_utils)
is_available() (in module modelopt.torch.utils.distributed)
is_binary_op() (in module modelopt.onnx.op_types)
is_channels_last() (in module modelopt.torch.utils.network)
is_configurable (Hparam property)
is_configurable() (DynamicSpace method)
(in module modelopt.torch.opt.utils)
is_const_input() (in module modelopt.onnx.quantization.graph_utils)
is_constant (ConcatSymbol property)
(Symbol property)
is_control_flow_op() (in module modelopt.onnx.op_types)
is_conversion_op() (in module modelopt.onnx.op_types)
is_converted() (ModeloptStateManager class method)
is_copy_op() (in module modelopt.onnx.op_types)
is_cross_layer (Symbol property)
is_dangling (Symbol property)
is_decoder_list() (in module modelopt.torch.export.layer_utils)
is_default_quantizable_op_by_ort() (in module modelopt.onnx.op_types)
is_dynamic (Symbol property)
is_dynamic() (DynamicSpace method)
(in module modelopt.torch.opt.utils)
is_embedding() (in module modelopt.torch.export.layer_utils)
is_enabled (TensorQuantizer property)
is_export_mode (ExportModeDescriptor property)
(ExportSparseModeDescriptor property)
(ExportStudentModeDescriptor property)
(QuantizeExportModeDescriptor property)
is_failed() (GraphCollection method)
(RobustTracer method)
is_free (Symbol property)
is_fusible_reduction_op() (in module modelopt.onnx.op_types)
is_fusible_scaling_op() (in module modelopt.onnx.op_types)
is_generator_op() (in module modelopt.onnx.op_types)
is_incoming (Symbol property)
is_initialized (NFSWorkspace property)
is_initialized() (in module modelopt.torch.utils.distributed)
is_irregular_mem_access_op() (in module modelopt.onnx.op_types)
is_layernorm() (in module modelopt.torch.export.layer_utils)
is_leaf_module() (RobustTracer method)
is_linear() (in module modelopt.torch.export.layer_utils)
is_linear_op() (in module modelopt.onnx.op_types)
is_master() (in module modelopt.torch.utils.distributed)
is_mlp() (in module modelopt.torch.export.layer_utils)
is_modelopt_patches_enabled() (in module modelopt.torch.nas.utils)
is_modifier_op() (in module modelopt.onnx.op_types)
is_moe() (in module modelopt.torch.export.layer_utils)
is_multiclass_op() (in module modelopt.onnx.op_types)
is_mx_format (TensorQuantizer property)
is_non_reshape_copy_op() (in module modelopt.onnx.op_types)
is_normalization_op() (in module modelopt.onnx.op_types)
is_outgoing (Symbol property)
is_parallel() (in module modelopt.torch.utils.network)
is_pointwise_or_elementwise_op() (in module modelopt.onnx.op_types)
is_pooling_or_window_op() (in module modelopt.onnx.op_types)
is_quantized() (in module modelopt.torch.quantization.utils)
is_quantized_column_parallel_linear() (in module modelopt.torch.quantization.utils)
is_quantized_layer_with_weight() (in module modelopt.torch.quantization.utils)
is_quantized_row_parallel_linear() (in module modelopt.torch.quantization.utils)
is_quantlinear() (in module modelopt.torch.export.layer_utils)
is_recurrent() (in module modelopt.torch.export.layer_utils)
is_recurrent_op() (in module modelopt.onnx.op_types)
is_registered_leaf() (RobustTracer class method)
is_searchable (ConcatSymbol property)
(Symbol property)
is_selection_op() (in module modelopt.onnx.op_types)
is_sequence_op() (in module modelopt.onnx.op_types)
is_shape_op() (in module modelopt.onnx.op_types)
is_shape_preserving (SymInfo property)
is_shape_preserving() (SymMap method)
is_skippable() (SymDepth method)
is_sortable (Hparam property)
(Symbol property)
is_special_node() (ConcatNodeProcessor method)
is_tensorrt_llm_0_8_or_9() (in module modelopt.torch.export.tensorrt_llm_utils)
is_torch_library_supported() (in module modelopt.torch.quantization.utils)
is_unary_op() (in module modelopt.onnx.op_types)
is_unvisited() (GraphCollection method)
(RobustTracer method)
is_valid_onnx_model() (in module modelopt.onnx.utils)
items() (ModeloptBaseConfig method)
(SymMap method)
iter_num (IterativeSearcher attribute)
IterativeSearcher (class in modelopt.torch.nas.autonas)
K
k (QKVConfig attribute)
keys() (ModeloptBaseConfig method)
KnowledgeDistillationModeDescriptor (class in modelopt.torch.distill.mode)
kv_cache_dtype (AttentionConfig attribute)
kv_cache_scaling_factor (AttentionConfig attribute)
L
last_mode (ModeloptStateManager property)
layer_types (DecoderLayerConfig attribute)
LayerNorm (LayerNormType attribute)
layernorm_type (LayernormConfig attribute)
LayernormConfig (class in modelopt.torch.export.model_config)
LayerNormPositionType (class in modelopt.torch.export.tensorrt_llm_type)
LayerNormType (class in modelopt.torch.export.tensorrt_llm_type)
layers (ModelConfig attribute)
learn_amax (QuantizerAttributeConfig attribute)
LegacyFakeTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
Linear (in module modelopt.torch.quantization.nn.modules.quant_linear)
linear (LinearActConfig attribute)
linear_out (RecurrentConfig attribute)
linear_type (LinearConfig attribute)
linear_x (RecurrentConfig attribute)
linear_y (RecurrentConfig attribute)
LinearActConfig (class in modelopt.torch.export.model_config)
LinearConfig (class in modelopt.torch.export.model_config)
link_to() (ConcatSymbol method)
(ConcatSymbol.Input method)
(Symbol method)
(SymDepth method)
list_closest_to_median() (in module modelopt.torch.utils.list)
LLM (class in modelopt.deploy.llm.generate)
lm_head (MedusaHeadConfig attribute)
(ModelConfig attribute)
ln_embed (ModelConfig attribute)
ln_f (ModelConfig attribute)
load_calib_amax() (TensorQuantizer method)
load_cpp_extension() (in module modelopt.torch.utils.cpp_extension)
load_onnx_model() (in module modelopt.onnx.quantization.trt_utils)
load_search_checkpoint() (BaseSearcher method)
(BinarySearcher method)
load_state_dict() (DistillationModel method)
(ModeloptStateManager method)
local_vocab_size (EmbeddingConfig property)
logits_soft_cap (DecoderLayerConfig attribute)
LogitsDistillationLoss (class in modelopt.torch.distill.losses)
longrope_long_mscale (DecoderLayerConfig attribute)
longrope_scaling_long_factors (DecoderLayerConfig attribute)
longrope_scaling_short_factors (DecoderLayerConfig attribute)
longrope_short_mscale (DecoderLayerConfig attribute)
loss_balancer (DistillationModel property)
(KDLossConfig attribute)
loss_modules (DistillationModel property)
lstm_cell_with_proj() (in module modelopt.torch.quantization.nn.modules.quant_rnn)
M
m4n2_1d() (in module modelopt.torch.sparsity.magnitude)
MagnitudeSearcher (class in modelopt.torch.sparsity.magnitude)
make_divisible() (in module modelopt.torch.utils.network)
make_gs_awq_scale() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_dequantize_node() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_dequantize_output() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_pre_quant_scale_node() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_pre_quant_scale_output() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_quantize_node() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_quantize_output() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_quantized_weight() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_scale() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_zp() (in module modelopt.onnx.quantization.qdq_utils)
match() (in module modelopt.torch.utils.graph)
match_parameters() (in module modelopt.torch.quantization.optim)
max (Hparam property)
max_beam_width (LLM property)
max_co_batch_size (AWQClipCalibConfig attribute)
max_degrade (BinarySearcher attribute)
max_depth (SymDepth property)
max_input_len (LLM property)
max_position_embeddings (DecoderLayerConfig attribute)
(ModelConfig property)
max_tokens_per_batch (AWQClipCalibConfig attribute)
maxbound (TensorQuantizer property)
MaxCalibrator (class in modelopt.torch.quantization.calib.max)
MaxPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
MaxPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
MaxPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
MCoreGPTMinitronModeDescriptor (class in modelopt.torch.prune.mode)
MCoreGPTMinitronSearcher (class in modelopt.torch.prune.mcore_gpt_minitron)
medusa_heads (ModelConfig attribute)
medusa_layers (MedusaHeadConfig attribute)
medusa_num_heads (MedusaConfig attribute)
medusa_num_layers (MedusaConfig attribute)
MedusaHeadConfig (class in modelopt.torch.export.model_config)
MedusaModeDescriptor (class in modelopt.torch.speculative.mode)
MedusaModel (class in modelopt.torch.speculative.medusa.medusa_model)
merge_fc1_gate() (in module modelopt.torch.export.model_config_utils)
merge_qkv() (in module modelopt.torch.export.model_config_utils)
merged_fc1_gate (MLPConfig attribute)
method (QuantizeAlgorithmConfig attribute)
MGDLoss (class in modelopt.torch.distill.losses)
middle_value (BinarySearcher attribute)
min (Hparam property)
min_alpha (AWQClipHelper attribute)
min_clip_ratio (AWQClipCalibConfig attribute)
min_degrade (BinarySearcher attribute)
min_depth (SymDepth property)
mlp (DecoderLayerConfig attribute)
MLP (MLPType attribute)
mlp_layernorm (DecoderLayerConfig attribute)
mlp_replacing_linear (DecoderLayerConfig attribute)
MLPConfig (class in modelopt.torch.export.model_config)
MLPType (class in modelopt.torch.export.tensorrt_llm_type)
mn_1d_best() (in module modelopt.torch.sparsity.magnitude)
model (BaseSearcher attribute)
model_config_from_dict() (in module modelopt.torch.export.model_config_utils)
model_config_to_dict() (in module modelopt.torch.export.model_config_utils)
model_dump() (KDLossConfig method)
(ModeloptBaseConfig method)
model_dump_json() (ModeloptBaseConfig method)
model_to() (in module modelopt.torch.utils.network)
ModelConfig (class in modelopt.torch.export.model_config)
modelopt.deploy
module
modelopt.deploy.llm
module
modelopt.deploy.llm.generate
module
modelopt.deploy.llm.nemo_utils
module
modelopt.onnx
module
modelopt.onnx.op_types
module
modelopt.onnx.quantization
module
modelopt.onnx.quantization.calib_utils
module
modelopt.onnx.quantization.extensions
module
modelopt.onnx.quantization.fp8
module
modelopt.onnx.quantization.graph_utils
module
modelopt.onnx.quantization.gs_patching
module
modelopt.onnx.quantization.int4
module
modelopt.onnx.quantization.int8
module
modelopt.onnx.quantization.operators
module
modelopt.onnx.quantization.ort_patching
module
modelopt.onnx.quantization.ort_utils
module
modelopt.onnx.quantization.partitioning
module
modelopt.onnx.quantization.qdq_utils
module
modelopt.onnx.quantization.quant_utils
module
modelopt.onnx.quantization.trt_utils
module
modelopt.onnx.utils
module
modelopt.torch
module
modelopt.torch.distill
module
modelopt.torch.distill.config
module
modelopt.torch.distill.distillation
module
modelopt.torch.distill.distillation_model
module
modelopt.torch.distill.loss_balancers
module
modelopt.torch.distill.losses
module
modelopt.torch.distill.mode
module
modelopt.torch.distill.plugins
module
modelopt.torch.distill.registry
module
modelopt.torch.export
module
modelopt.torch.export.distribute
module
modelopt.torch.export.hf_config_map
module
modelopt.torch.export.layer_utils
module
modelopt.torch.export.model_config
module
modelopt.torch.export.model_config_export
module
modelopt.torch.export.model_config_utils
module
modelopt.torch.export.postprocess
module
modelopt.torch.export.scaling_factor_utils
module
modelopt.torch.export.tensorrt_llm_type
module
modelopt.torch.export.tensorrt_llm_utils
module
modelopt.torch.export.transformer_engine
module
modelopt.torch.export.unified_export_hf
module
modelopt.torch.export.vllm
module
modelopt.torch.nas
module
modelopt.torch.nas.algorithms
module
modelopt.torch.nas.autonas
module
modelopt.torch.nas.config
module
modelopt.torch.nas.conversion
module
modelopt.torch.nas.hparams
module
modelopt.torch.nas.hparams.concat
module
modelopt.torch.nas.hparams.container
module
modelopt.torch.nas.mode
module
modelopt.torch.nas.modules
module
modelopt.torch.nas.modules.container
module
modelopt.torch.nas.modules.conv
module
modelopt.torch.nas.modules.linear
module
modelopt.torch.nas.modules.norm
module
modelopt.torch.nas.modules.utils
module
modelopt.torch.nas.plugins
module
modelopt.torch.nas.registry
module
modelopt.torch.nas.search_space
module
modelopt.torch.nas.traced_hp
module
modelopt.torch.nas.utils
module
modelopt.torch.opt
module
modelopt.torch.opt.config
module
modelopt.torch.opt.conversion
module
modelopt.torch.opt.dynamic
module
modelopt.torch.opt.hparam
module
modelopt.torch.opt.mode
module
modelopt.torch.opt.plugins
module
modelopt.torch.opt.plugins.huggingface
module
modelopt.torch.opt.searcher
module
modelopt.torch.opt.utils
module
modelopt.torch.prune
module
modelopt.torch.prune.config
module
modelopt.torch.prune.fastnas
module
modelopt.torch.prune.gradnas
module
modelopt.torch.prune.mcore_gpt_minitron
module
modelopt.torch.prune.mode
module
modelopt.torch.prune.plugins
module
modelopt.torch.prune.pruning
module
modelopt.torch.quantization
module
modelopt.torch.quantization.algorithms
module
modelopt.torch.quantization.calib
module
modelopt.torch.quantization.calib.calibrator
module
modelopt.torch.quantization.calib.histogram
module
modelopt.torch.quantization.calib.max
module
modelopt.torch.quantization.config
module
modelopt.torch.quantization.conversion
module
modelopt.torch.quantization.export_onnx
module
modelopt.torch.quantization.extensions
module
modelopt.torch.quantization.mode
module
modelopt.torch.quantization.model_calib
module
modelopt.torch.quantization.model_quant
module
modelopt.torch.quantization.nn
module
modelopt.torch.quantization.nn.functional
module
modelopt.torch.quantization.nn.modules
module
modelopt.torch.quantization.nn.modules.clip
module
modelopt.torch.quantization.nn.modules.quant_activations
module
modelopt.torch.quantization.nn.modules.quant_batchnorm
module
modelopt.torch.quantization.nn.modules.quant_conv
module
modelopt.torch.quantization.nn.modules.quant_instancenorm
module
modelopt.torch.quantization.nn.modules.quant_linear
module
modelopt.torch.quantization.nn.modules.quant_module
module
modelopt.torch.quantization.nn.modules.quant_pooling
module
modelopt.torch.quantization.nn.modules.quant_rnn
module
modelopt.torch.quantization.nn.modules.tensor_quantizer
module
modelopt.torch.quantization.optim
module
modelopt.torch.quantization.plugins
module
modelopt.torch.quantization.qtensor
module
modelopt.torch.quantization.qtensor.base_qtensor
module
modelopt.torch.quantization.qtensor.int4_tensor
module
modelopt.torch.quantization.qtensor.nf4_tensor
module
modelopt.torch.quantization.qtensor.nvfp4_tensor
module
modelopt.torch.quantization.quant_modules
module
modelopt.torch.quantization.tensor_quant
module
modelopt.torch.quantization.utils
module
modelopt.torch.sparsity
module
modelopt.torch.sparsity.config
module
modelopt.torch.sparsity.magnitude
module
modelopt.torch.sparsity.mode
module
modelopt.torch.sparsity.module
module
modelopt.torch.sparsity.plugins
module
modelopt.torch.sparsity.searcher
module
modelopt.torch.sparsity.sparsegpt
module
modelopt.torch.sparsity.sparsification
module
modelopt.torch.speculative
module
modelopt.torch.speculative.config
module
modelopt.torch.speculative.medusa
module
modelopt.torch.speculative.medusa.conversion
module
modelopt.torch.speculative.medusa.medusa_model
module
modelopt.torch.speculative.mode
module
modelopt.torch.speculative.plugins
module
modelopt.torch.speculative.speculative_decoding
module
modelopt.torch.trace
module
modelopt.torch.trace.analyzer
module
modelopt.torch.trace.modules
module
modelopt.torch.trace.modules.concat
module
modelopt.torch.trace.modules.nn
module
modelopt.torch.trace.plugins
module
modelopt.torch.trace.symbols
module
modelopt.torch.trace.tracer
module
modelopt.torch.utils
module
modelopt.torch.utils.cpp_extension
module
modelopt.torch.utils.dataset_utils
module
modelopt.torch.utils.distributed
module
modelopt.torch.utils.graph
module
modelopt.torch.utils.list
module
modelopt.torch.utils.logging
module
modelopt.torch.utils.network
module
modelopt.torch.utils.perf
module
modelopt.torch.utils.random
module
modelopt.torch.utils.tensor
module
modelopt_state() (in module modelopt.torch.opt.conversion)
ModeloptField() (in module modelopt.torch.opt.config)
ModeloptStateManager (class in modelopt.torch.opt.conversion)
modes_with_states() (ModeloptStateManager method)
modify() (DistillationModel method)
(DynamicModule method)
(MedusaModel method)
(SparseModule method)
module
modelopt.deploy
modelopt.deploy.llm
modelopt.deploy.llm.generate
modelopt.deploy.llm.nemo_utils
modelopt.onnx
modelopt.onnx.op_types
modelopt.onnx.quantization
modelopt.onnx.quantization.calib_utils
modelopt.onnx.quantization.extensions
modelopt.onnx.quantization.fp8
modelopt.onnx.quantization.graph_utils
modelopt.onnx.quantization.gs_patching
modelopt.onnx.quantization.int4
modelopt.onnx.quantization.int8
modelopt.onnx.quantization.operators
modelopt.onnx.quantization.ort_patching
modelopt.onnx.quantization.ort_utils
modelopt.onnx.quantization.partitioning
modelopt.onnx.quantization.qdq_utils
modelopt.onnx.quantization.quant_utils
modelopt.onnx.quantization.trt_utils
modelopt.onnx.utils
modelopt.torch
modelopt.torch.distill
modelopt.torch.distill.config
modelopt.torch.distill.distillation
modelopt.torch.distill.distillation_model
modelopt.torch.distill.loss_balancers
modelopt.torch.distill.losses
modelopt.torch.distill.mode
modelopt.torch.distill.plugins
modelopt.torch.distill.registry
modelopt.torch.export
modelopt.torch.export.distribute
modelopt.torch.export.hf_config_map
modelopt.torch.export.layer_utils
modelopt.torch.export.model_config
modelopt.torch.export.model_config_export
modelopt.torch.export.model_config_utils
modelopt.torch.export.postprocess
modelopt.torch.export.scaling_factor_utils
modelopt.torch.export.tensorrt_llm_type
modelopt.torch.export.tensorrt_llm_utils
modelopt.torch.export.transformer_engine
modelopt.torch.export.unified_export_hf
modelopt.torch.export.vllm
modelopt.torch.nas
modelopt.torch.nas.algorithms
modelopt.torch.nas.autonas
modelopt.torch.nas.config
modelopt.torch.nas.conversion
modelopt.torch.nas.hparams
modelopt.torch.nas.hparams.concat
modelopt.torch.nas.hparams.container
modelopt.torch.nas.mode
modelopt.torch.nas.modules
modelopt.torch.nas.modules.container
modelopt.torch.nas.modules.conv
modelopt.torch.nas.modules.linear
modelopt.torch.nas.modules.norm
modelopt.torch.nas.modules.utils
modelopt.torch.nas.plugins
modelopt.torch.nas.registry
modelopt.torch.nas.search_space
modelopt.torch.nas.traced_hp
modelopt.torch.nas.utils
modelopt.torch.opt
modelopt.torch.opt.config
modelopt.torch.opt.conversion
modelopt.torch.opt.dynamic
modelopt.torch.opt.hparam
modelopt.torch.opt.mode
modelopt.torch.opt.plugins
modelopt.torch.opt.plugins.huggingface
modelopt.torch.opt.searcher
modelopt.torch.opt.utils
modelopt.torch.prune
modelopt.torch.prune.config
modelopt.torch.prune.fastnas
modelopt.torch.prune.gradnas
modelopt.torch.prune.mcore_gpt_minitron
modelopt.torch.prune.mode
modelopt.torch.prune.plugins
modelopt.torch.prune.pruning
modelopt.torch.quantization
modelopt.torch.quantization.algorithms
modelopt.torch.quantization.calib
modelopt.torch.quantization.calib.calibrator
modelopt.torch.quantization.calib.histogram
modelopt.torch.quantization.calib.max
modelopt.torch.quantization.config
modelopt.torch.quantization.conversion
modelopt.torch.quantization.export_onnx
modelopt.torch.quantization.extensions
modelopt.torch.quantization.mode
modelopt.torch.quantization.model_calib
modelopt.torch.quantization.model_quant
modelopt.torch.quantization.nn
modelopt.torch.quantization.nn.functional
modelopt.torch.quantization.nn.modules
modelopt.torch.quantization.nn.modules.clip
modelopt.torch.quantization.nn.modules.quant_activations
modelopt.torch.quantization.nn.modules.quant_batchnorm
modelopt.torch.quantization.nn.modules.quant_conv
modelopt.torch.quantization.nn.modules.quant_instancenorm
modelopt.torch.quantization.nn.modules.quant_linear
modelopt.torch.quantization.nn.modules.quant_module
modelopt.torch.quantization.nn.modules.quant_pooling
modelopt.torch.quantization.nn.modules.quant_rnn
modelopt.torch.quantization.nn.modules.tensor_quantizer
modelopt.torch.quantization.optim
modelopt.torch.quantization.plugins
modelopt.torch.quantization.qtensor
modelopt.torch.quantization.qtensor.base_qtensor
modelopt.torch.quantization.qtensor.int4_tensor
modelopt.torch.quantization.qtensor.nf4_tensor
modelopt.torch.quantization.qtensor.nvfp4_tensor
modelopt.torch.quantization.quant_modules
modelopt.torch.quantization.tensor_quant
modelopt.torch.quantization.utils
modelopt.torch.sparsity
modelopt.torch.sparsity.config
modelopt.torch.sparsity.magnitude
modelopt.torch.sparsity.mode
modelopt.torch.sparsity.module
modelopt.torch.sparsity.plugins
modelopt.torch.sparsity.searcher
modelopt.torch.sparsity.sparsegpt
modelopt.torch.sparsity.sparsification
modelopt.torch.speculative
modelopt.torch.speculative.config
modelopt.torch.speculative.medusa
modelopt.torch.speculative.medusa.conversion
modelopt.torch.speculative.medusa.medusa_model
modelopt.torch.speculative.mode
modelopt.torch.speculative.plugins
modelopt.torch.speculative.speculative_decoding
modelopt.torch.trace
modelopt.torch.trace.analyzer
modelopt.torch.trace.modules
modelopt.torch.trace.modules.concat
modelopt.torch.trace.modules.nn
modelopt.torch.trace.plugins
modelopt.torch.trace.symbols
modelopt.torch.trace.tracer
modelopt.torch.utils
modelopt.torch.utils.cpp_extension
modelopt.torch.utils.dataset_utils
modelopt.torch.utils.distributed
modelopt.torch.utils.graph
modelopt.torch.utils.list
modelopt.torch.utils.logging
modelopt.torch.utils.network
modelopt.torch.utils.perf
modelopt.torch.utils.random
modelopt.torch.utils.tensor
moe_num_experts (DecoderLayerConfig attribute)
moe_renorm_mode (DecoderLayerConfig attribute)
moe_top_k (DecoderLayerConfig attribute)
moe_tp_mode (DecoderLayerConfig attribute)
MOEConfig (class in modelopt.torch.export.model_config)
mup_attn_multiplier (DecoderLayerConfig attribute)
mup_embedding_multiplier (DecoderLayerConfig attribute)
mup_use_scaling (DecoderLayerConfig attribute)
mup_width_multiplier (DecoderLayerConfig attribute)
N
naive_quantization() (in module modelopt.torch.export.model_config_utils)
name (AutoNASModeDescriptor property)
(ExportModeDescriptor property)
(ExportSparseModeDescriptor property)
(ExportStudentModeDescriptor property)
(FastNASModeDescriptor property)
(GradNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(MCoreGPTMinitronModeDescriptor property)
(MedusaModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseGPTModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
name_onnx_nodes() (in module modelopt.onnx.utils)
named_dynamic_modules() (DynamicSpace method)
(in module modelopt.torch.opt.utils)
named_hparams() (DynamicModule method)
(DynamicSpace method)
(in module modelopt.torch.opt.utils)
named_modules() (SymMap method)
named_sym_dicts() (SymMap method)
named_symbols() (SymMap method)
narrow_range (QuantizerAttributeConfig attribute)
(TensorQuantizer property)
new_decoder_architecture (DecoderLayerConfig attribute)
next_modes (AutoNASModeDescriptor property)
(FastNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
NF4QTensor (class in modelopt.torch.quantization.qtensor.nf4_tensor)
NFSWorkspace (class in modelopt.torch.export.distribute)
nn_batchnorm1d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_batchnorm2d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_batchnorm3d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_conv1d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_conv2d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
(SparseGPTConfig attribute)
(SparseMagnitudeConfig attribute)
nn_conv3d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_convtranspose1d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_convtranspose2d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_convtranspose3d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_groupnorm (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_instancenorm1d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_instancenorm2d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_instancenorm3d (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_layernorm (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
nn_linear (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
(SparseGPTConfig attribute)
(SparseMagnitudeConfig attribute)
nn_sequential (AutoNASConfig attribute)
nn_syncbatchnorm (AutoNASConfig attribute)
(FastNASConfig attribute)
(GradNASConfig attribute)
no_modelopt_patches (class in modelopt.torch.nas.utils)
no_stdout() (in module modelopt.torch.utils.logging)
NONE (Symbol.CLType attribute)
num2hrb() (in module modelopt.torch.utils.logging)
num_attention_heads (DecoderLayerConfig attribute)
(ModelConfig property)
num_bits (QuantizerAttributeConfig attribute)
(QuantRecipe property)
(TensorQuantizer property)
num_kv_heads (DecoderLayerConfig attribute)
(ModelConfig property)
num_medusa_heads (ModelConfig attribute)
num_medusa_layers (ModelConfig attribute)
num_satisfied (IterativeSearcher attribute)
numpy_to_torch() (in module modelopt.torch.utils.tensor)
NVFP4QTensor (class in modelopt.torch.quantization.qtensor.nvfp4_tensor)
O
original (Hparam property)
original() (in module modelopt.torch.utils.random)
original_cls (DynamicModule property)
original_max_position_embeddings (DecoderLayerConfig attribute)
original_meta_tensor (BaseQuantizedTensor attribute)
original_score (BinarySearcher attribute)
OUTGOING (Symbol.CLType attribute)
output_quantizer (QuantInputBase attribute)
P
pack_float32_to_4bit_cpp_based() (in module modelopt.onnx.quantization.quant_utils)
pack_float32_to_4bit_optimized() (in module modelopt.onnx.quantization.quant_utils)
pack_linear_weights() (in module modelopt.torch.export.model_config_utils)
pack_real_quantize_weight() (in module modelopt.torch.quantization.qtensor.base_qtensor)
pad_embedding_lm_head() (in module modelopt.torch.export.postprocess)
pad_token (CustomSentencePieceTokenizer property)
pad_token_id (CustomSentencePieceTokenizer property)
pad_weights() (in module modelopt.torch.export.model_config_utils)
parallel_attention (DecoderLayerConfig attribute)
param_num() (in module modelopt.torch.utils.network)
param_num_from_forward() (in module modelopt.torch.utils.network)
parent (Symbol property)
parse_shapes_spec() (in module modelopt.onnx.utils)
partial_rotary_factor (DecoderLayerConfig attribute)
patch_gs_modules() (in module modelopt.onnx.quantization.gs_patching)
patch_ort_modules() (in module modelopt.onnx.quantization.ort_patching)
pipeline_parallel (ModelConfig attribute)
pop() (SymMap method)
population (EvolveSearcher attribute)
position_embedding (ModelConfig attribute)
post_feedforward_layernorm (DecoderLayerConfig attribute)
post_layernorm (DecoderLayerConfig attribute)
(LayerNormPositionType attribute)
post_process() (ConcatNodeProcessor method)
postprocess_amax() (in module modelopt.torch.quantization.model_calib)
postprocess_model_config() (in module modelopt.torch.export.postprocess)
postprocess_tensors() (in module modelopt.torch.export.postprocess)
pre_feedforward_layernorm (DecoderLayerConfig attribute)
pre_layernorm (LayerNormPositionType attribute)
pre_quant_scale (TensorQuantizer property)
precompile() (in module modelopt.torch.quantization.extensions)
prepare() (in module modelopt.torch.sparsity.sparsegpt)
prepare_enc_dec_decoder_layer() (in module modelopt.torch.export.tensorrt_llm_utils)
prepare_enc_dec_export_dir() (in module modelopt.torch.export.tensorrt_llm_utils)
prequant_scaling_factor (LinearConfig attribute)
(QKVConfig property)
print_quant_summary() (in module modelopt.torch.quantization.model_quant)
print_rank_0() (in module modelopt.torch.utils.logging)
print_search_space_summary() (in module modelopt.torch.nas.utils)
print_stat() (in module modelopt.onnx.quantization.graph_utils)
print_summary() (SearchSpace method)
process() (ConcatNodeProcessor method)
process_gradient() (GradientDataManager method)
process_layer_quant_config() (in module modelopt.torch.export.model_config_utils)
profile() (in module modelopt.torch.nas.algorithms)
proj (ExpertConfig attribute)
(MLPConfig attribute)
prune() (in module modelopt.torch.prune.pruning)
(SymMap method)
Q
q (QKVConfig attribute)
qdq_to_dq() (in module modelopt.onnx.quantization.qdq_utils)
QDQConvTranspose (class in modelopt.onnx.quantization.operators)
QDQNormalization (class in modelopt.onnx.quantization.operators)
qkv (AttentionConfig attribute)
QKVConfig (class in modelopt.torch.export.model_config)
QTensorWrapper (class in modelopt.torch.quantization.qtensor.base_qtensor)
quant_cfg (QuantizeConfig attribute)
quant_tensor() (in module modelopt.onnx.quantization.int4)
quant_weight_inplace() (in module modelopt.torch.quantization.optim)
QuantAdaptiveAvgPool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAdaptiveAvgPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAdaptiveAvgPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAvgPool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAvgPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAvgPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantConv1d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConv2d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConv3d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConvTranspose1d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConvTranspose2d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConvTranspose3d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantInputBase (class in modelopt.torch.quantization.nn.modules.quant_module)
QuantInstanceNorm1d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)
QuantInstanceNorm2d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)
QuantInstanceNorm3d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)
quantization (ConvConfig attribute)
(DecoderLayerConfig attribute)
(EmbeddingConfig attribute)
(LayernormConfig attribute)
(LinearConfig attribute)
(MLPConfig property)
(ModelConfig attribute)
(QKVConfig property)
quantize() (BaseQuantizedTensor class method)
(in module modelopt.onnx.quantization)
(in module modelopt.onnx.quantization.fp8)
(in module modelopt.onnx.quantization.int4)
(in module modelopt.onnx.quantization.int8)
(in module modelopt.torch.quantization.model_quant)
(INT4QTensor class method)
(NF4QTensor class method)
(NVFP4QTensor class method)
(QDQConvTranspose method)
(QDQNormalization method)
quantize_op_abstract() (in module modelopt.torch.quantization.tensor_quant)
quantize_rtn() (in module modelopt.onnx.quantization.int4)
quantize_weight() (QuantLinearConvBase method)
(QuantRNNBase method)
quantized_cell_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)
quantized_data (BaseQuantizedTensor attribute)
(INT4QTensor attribute)
(NF4QTensor attribute)
QuantizeExportModeDescriptor (class in modelopt.torch.quantization.mode)
QuantizeModeDescriptor (class in modelopt.torch.quantization.mode)
QuantLinear (class in modelopt.torch.quantization.nn.modules.quant_linear)
QuantLinearConvBase (class in modelopt.torch.quantization.nn.modules.quant_module)
QuantMaxPool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantMaxPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantMaxPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantRecipe (class in modelopt.torch.quantization.algorithms)
QuantRecipeHparam (class in modelopt.torch.quantization.algorithms)
QuantRNNBase (class in modelopt.torch.quantization.nn.modules.quant_rnn)
QuantRNNFullBase (class in modelopt.torch.quantization.nn.modules.quant_rnn)
query_pre_attn_scalar (DecoderLayerConfig attribute)
qwen_type (DecoderLayerConfig attribute)
R
random() (in module modelopt.torch.utils.random)
RandomDataProvider (class in modelopt.onnx.quantization.calib_utils)
randomize_weights() (in module modelopt.onnx.utils)
randomize_weights_onnx_bytes() (in module modelopt.onnx.utils)
RandomSearcher (class in modelopt.torch.nas.autonas)
rank (ModelConfig attribute)
rank() (in module modelopt.torch.utils.distributed)
read_configs_and_weights_from_rank() (NFSWorkspace method)
record_call_module() (RobustTracer method)
recurrent (DecoderLayerConfig attribute)
recurrent_gate (RgLruConfig attribute)
recurrent_param (RgLruConfig attribute)
RecurrentConfig (class in modelopt.torch.export.model_config)
recursive_trace() (GraphCollection method)
(in module modelopt.torch.trace.tracer)
reduce_amax() (in module modelopt.torch.quantization.utils)
register() (in module modelopt.torch.quantization.conversion)
(SymMap class method)
(TracedHpRegistry class method)
register_default() (ModeloptBaseRuleConfig class method)
register_gradient_checkpointing_enable_context() (AutoQuantizeSearcher class method)
register_importance() (Hparam method)
register_leaf() (RobustTracer class method)
rel_attn_max_distance (DecoderLayerConfig attribute)
rel_attn_num_buckets (DecoderLayerConfig attribute)
rel_attn_table (AttentionConfig attribute)
remove_bn() (in module modelopt.torch.utils.network)
remove_partial_input_qdq() (in module modelopt.onnx.quantization.graph_utils)
remove_weights_data() (in module modelopt.onnx.utils)
replace_forward() (in module modelopt.torch.nas.utils)
replace_function() (in module modelopt.torch.quantization.utils)
replace_quant_module() (in module modelopt.torch.quantization.conversion)
replace_scale_values() (in module modelopt.onnx.quantization.qdq_utils)
replace_sequential_quantizer_with_single_quantizer() (SequentialQuantizer static method)
report_memory() (in module modelopt.torch.utils.perf)
ResBlock (class in modelopt.torch.speculative.medusa.medusa_model)
reset() (ConcatNodeProcessor method)
(HistogramCalibrator method)
(MaxCalibrator method)
reset_amax() (SequentialQuantizer method)
(TensorQuantizer method)
reset_dynamic_attributes() (DynamicModule method)
reset_search() (BaseSearcher method)
reshape_1d() (in module modelopt.torch.sparsity.magnitude)
residual_layernorm (DecoderLayerConfig attribute)
residual_mlp (DecoderLayerConfig attribute)
resmooth_and_get_scale_and_amax() (in module modelopt.torch.export.scaling_factor_utils)
resmooth_weights_and_get_scales() (NVFP4QTensor class method)
resolve_dependencies() (TracedHp method)
restore (AutoNASModeDescriptor property)
(ExportModeDescriptor property)
(ExportSparseModeDescriptor property)
(ExportStudentModeDescriptor property)
(FastNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(MedusaModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
restore() (in module modelopt.torch.opt.conversion)
restore_autonas_searchspace() (in module modelopt.torch.nas.autonas)
restore_export() (in module modelopt.torch.nas.autonas)
restore_export_sparse() (in module modelopt.torch.sparsity.mode)
restore_fastnas_searchspace() (in module modelopt.torch.prune.fastnas)
restore_from_modelopt_state() (in module modelopt.torch.opt.conversion)
restore_medusa_model() (in module modelopt.torch.speculative.medusa.conversion)
restore_model_config() (in module modelopt.torch.export.model_config_utils)
restore_searchspace() (in module modelopt.torch.nas.autonas)
restore_sparse_model() (in module modelopt.torch.sparsity.mode)
rg_lru (RecurrentConfig attribute)
RgLruConfig (class in modelopt.torch.export.model_config)
RmsNorm (LayerNormType attribute)
rnn_hidden_size (DecoderLayerConfig attribute)
RNNLayerForward (class in modelopt.torch.quantization.nn.modules.quant_rnn)
RobustTracer (class in modelopt.torch.trace.tracer)
rope_ratio (DecoderLayerConfig attribute)
rope_scaling (DecoderLayerConfig attribute)
rotary_base (DecoderLayerConfig attribute)
rotary_dim (AttentionConfig attribute)
rotary_pct (DecoderLayerConfig attribute)
router (MOEConfig attribute)
rtn() (in module modelopt.onnx.quantization.int4)
rules (AutoQuantizeSearcher attribute)
run_awq_scale_search_per_node() (in module modelopt.onnx.quantization.int4)
run_awq_scale_search_per_subgraph() (in module modelopt.onnx.quantization.int4)
run_forward_loop() (in module modelopt.torch.utils.network)
run_search() (AutoQuantizeSearcher method)
(BaseSearcher method)
(BaseSparseSearcher method)
(IterativeSearcher method)
(MCoreGPTMinitronSearcher method)
run_step() (IterativeSearcher method)
S
sample() (BinarySearcher method)
(EvolveSearcher method)
(in module modelopt.torch.nas.utils)
(in module modelopt.torch.utils.random)
(IterativeSearcher method)
(RandomSearcher method)
(SearchSpace method)
sample_during_training (AutoNASPatchManager property)
(FastNASPatchManager property)
samples (IterativeSearcher attribute)
sanitize_dummy_weight() (QuantLinearConvBase static method)
sanitize_search_config() (AutoQuantizeSearcher method)
(BaseSearcher method)
(BaseSparseSearcher method)
(GradientBinarySearcher method)
(IterativeSearcher method)
(MCoreGPTMinitronSearcher method)
save() (in module modelopt.torch.opt.conversion)
save_onnx() (in module modelopt.onnx.utils)
save_onnx_bytes_to_dir() (in module modelopt.onnx.utils)
save_search_checkpoint() (BaseSearcher method)
scaled_e4m3_impl() (in module modelopt.torch.quantization.tensor_quant)
ScaledE4M3Function (class in modelopt.torch.quantization.tensor_quant)
score (GradientDataManager property)
search() (BaseSearcher method)
(in module modelopt.torch.nas.algorithms)
search_algorithm (AutoNASModeDescriptor property)
(FastNASModeDescriptor property)
(GradNASModeDescriptor property)
(MCoreGPTMinitronModeDescriptor property)
(SparseGPTModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
search_space_size() (in module modelopt.torch.opt.utils)
SearchSpace (class in modelopt.torch.nas.search_space)
select() (DynamicSpace method)
(in module modelopt.torch.nas.utils)
self_attention (DecoderLayerConfig attribute)
self_attention_layernorm (DecoderLayerConfig attribute)
sensitivity_map (BinarySearcher attribute)
seq_length (DecoderLayerConfig attribute)
SequentialQuantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)
set_data_parallel_group() (in module modelopt.torch.utils.distributed)
set_from_attribute_config() (SequentialQuantizer method)
(TensorQuantizer method)
set_from_modelopt_state() (TensorQuantizer method)
set_mask() (SparseModule method)
set_modelopt_patches_enabled (class in modelopt.torch.nas.utils)
set_quantizer_attribute() (in module modelopt.torch.quantization.conversion)
set_quantizer_by_cfg() (in module modelopt.torch.quantization.conversion)
set_quantizer_by_cfg_context() (in module modelopt.torch.quantization.conversion)
set_skippable() (SymDepth method)
set_student_loss_reduction_fn() (DistillationLossBalancer method)
set_submodule() (in module modelopt.torch.utils.network)
set_symbol() (SymMap method)
set_tensor_parallel_group() (in module modelopt.torch.utils.distributed)
SETUP_GRADIENT_FUNC (GradientBinarySearcher attribute)
share_embedding_table (ModelConfig attribute)
shrink_step (AWQClipCalibConfig attribute)
shuffle() (in module modelopt.torch.utils.random)
silence_matched_warnings() (in module modelopt.torch.utils.logging)
size() (DynamicSpace method)
(in module modelopt.torch.utils.distributed)
skippable_idxs (SymDepth property)
sort_parameters() (SearchSpace method)
SparseGPTModeDescriptor (class in modelopt.torch.sparsity.mode)
SparseGPTSearcher (class in modelopt.torch.sparsity.sparsegpt)
SparseMagnitudeModeDescriptor (class in modelopt.torch.sparsity.mode)
SparseModule (class in modelopt.torch.sparsity.module)
sparsify() (in module modelopt.torch.sparsity.sparsification)
split_config_and_weights() (in module modelopt.torch.export.model_config_utils)
standardize_constructor_args() (in module modelopt.torch.utils.network)
standardize_model_args() (in module modelopt.torch.utils.network)
standardize_model_like_tuple() (in module modelopt.torch.utils.network)
standardize_named_model_args() (in module modelopt.torch.utils.network)
start() (Timer method)
state_dict() (BaseSearcher method)
(DistillationModel method)
(ModeloptStateManager method)
StaticLossBalancer (class in modelopt.torch.distill.loss_balancers)
stats() (in module modelopt.torch.utils.list)
step_size (TensorQuantizer property)
stop() (Timer method)
strict (ExportConfig attribute)
SUPPORTED_HPARAMS (MCoreGPTMinitronSearcher attribute)
Symbol (class in modelopt.torch.trace.symbols)
Symbol.CLType (class in modelopt.torch.trace.symbols)
symbolic() (FakeTensorQuantFunction static method)
(ScaledE4M3Function static method)
(TensorQuantFunction static method)
SymDepth (class in modelopt.torch.trace.modules.nn)
SymDict (SymInfo attribute)
SymInfo (class in modelopt.torch.trace.symbols)
SymMap (class in modelopt.torch.trace.symbols)
SymRegisterFunc (SymMap attribute)
sync_amax_across_distributed_group() (TensorQuantizer method)
T
teacher_model (DistillationModel property)
(KDLossConfig attribute)
tensor_parallel (ModelConfig attribute)
tensor_quantizer_iterator() (SequentialQuantizer static method)
TensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
TensorQuantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)
Timer (class in modelopt.torch.utils.perf)
to() (QTensorWrapper method)
to_quantized_weight() (in module modelopt.torch.export.model_config_utils)
torch_detach() (in module modelopt.torch.utils.tensor)
torch_to() (in module modelopt.torch.utils.tensor)
torch_to_numpy() (in module modelopt.torch.utils.tensor)
torch_to_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)
trace() (RobustTracer method)
TracedHp (class in modelopt.torch.nas.traced_hp)
TracedHpRegistry (class in modelopt.torch.nas.traced_hp)
transfer_state_dict() (ModeloptStateManager class method)
trt_high_precision_dtype (QuantizerAttributeConfig attribute)
(TensorQuantizer property)
type (QuantizerAttributeConfig attribute)
U
udpate_domain() (in module modelopt.onnx.utils)
unregister() (in module modelopt.torch.quantization.conversion)
(SymMap class method)
(TracedHpRegistry class method)
unregister_default() (ModeloptBaseRuleConfig class method)
unregister_leaf() (RobustTracer class method)
unsigned (QuantizerAttributeConfig attribute)
(TensorQuantizer property)
unwrap_model() (in module modelopt.torch.utils.network)
update() (ModeloptBaseConfig method)
update_autonas_metadata() (in module modelopt.torch.nas.autonas)
update_best_params() (AWQClipHelper method)
(AWQLiteHelper method)
update_for_new_mode (AutoNASModeDescriptor property)
(FastNASModeDescriptor property)
(KnowledgeDistillationModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
update_for_save (AutoNASModeDescriptor property)
(FastNASModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
update_last_state_before_new_mode() (ModeloptStateManager method)
update_last_state_before_save() (ModeloptStateManager method)
update_lm_head_quantization() (in module modelopt.torch.export.postprocess)
update_sparse_metadata() (in module modelopt.torch.sparsity.mode)
use_alibi (DecoderLayerConfig attribute)
use_cache (DecoderLayerConfig attribute)
use_trt_qdq_ops() (in module modelopt.onnx.quantization.qdq_utils)
V
v (QKVConfig attribute)
val2list() (in module modelopt.torch.utils.list)
val2tuple() (in module modelopt.torch.utils.list)
validate_batch_size() (in module modelopt.onnx.utils)
validate_onnx() (in module modelopt.onnx.utils)
validate_rule() (ModeloptBaseRule class method)
values() (ModeloptBaseConfig method)
version (ModelConfig attribute)
VFRNNForward (class in modelopt.torch.quantization.nn.modules.quant_rnn)
vocab_embedding (ModelConfig attribute)
vocab_size (ModelConfig attribute)
vocab_size_padded (ModelConfig property)
W
weight (ConvConfig attribute)
(EmbeddingConfig attribute)
(LayernormConfig attribute)
(LinearConfig attribute)
(QKVConfig property)
weight_quantizer (QuantLinearConvBase attribute)
(QuantRNNBase attribute)
weights_scaling_factor (LinearConfig attribute)
(QKVConfig property)
weights_scaling_factor_2 (LinearConfig attribute)
(QKVConfig property)
weights_to_npz() (in module modelopt.torch.export.tensorrt_llm_utils)
write_configs_and_weights() (NFSWorkspace method)
Y
y_bias (RecurrentConfig attribute)
Z
zero_grad() (in module modelopt.torch.utils.network)