TensorRT Model Optimizer
0.13.0
Getting Started
Overview
Installation
Quick Start: Quantization
Quick Start: Sparsity
Optimization Guides
Quantization
Sparsity
Deployment
TensorRT-LLM Deployment
Examples
All ModelOpt Examples
Reference
Changelog
modelopt API
Support
Contact us
FAQs
TensorRT Model Optimizer
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
Y
|
Z
_
__init__() (AttentionConfig method)
(AWQClipHelper method)
(BaseQuantizedTensor method)
(BaseSearcher method)
(CalibrationDataProvider method)
(Clip method)
(ConvConfig method)
(CustomSentencePieceTokenizer method)
(DecoderLayerConfig method)
(DynamicModule method)
(DynamicSpace method)
(EmbeddingConfig method)
(ExpertConfig method)
(HistogramCalibrator method)
(Hparam method)
(LayernormConfig method)
(LinearActConfig method)
(LinearConfig method)
(LLM method)
(MaxCalibrator method)
(MedusaHeadConfig method)
(MLPConfig method)
(ModelConfig method)
(ModeloptStateManager method)
(MOEConfig method)
(NFSWorkspace method)
(QDQConvTranspose method)
(QDQNormalization method)
(QKVConfig method)
(RandomDataProvider method)
(RecurrentConfig method)
(RgLruConfig method)
(ScaledQuantDescriptor method)
(SequentialQuantizer method)
(TensorQuantizer method)
(Timer method)
A
activation_scaling_factor (LinearConfig attribute)
(QKVConfig property)
active (Hparam property)
active_slice (Hparam property)
ActiveSlice (Hparam attribute)
AdaptiveAvgPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AdaptiveAvgPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AdaptiveAvgPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
add_mode() (ModeloptStateManager method)
after_search() (BaseSearcher method)
(SparseGPTSearcher method)
algorithm (QuantizeConfig attribute)
alibi_bias_max (DecoderLayerConfig attribute)
alpha_step (AWQClipHelper attribute)
alphas (AWQClipHelper attribute)
amax (ScaledQuantDescriptor property)
(TensorQuantizer property)
amaxs (MaxCalibrator property)
apply_mode() (in module modelopt.torch.opt.conversion)
apply_residual_connection_post_layernorm (DecoderLayerConfig attribute)
attention (DecoderLayerConfig attribute)
attention_head_size (DecoderLayerConfig attribute)
AttentionConfig (class in modelopt.torch.export.model_config)
AvgPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AvgPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
AvgPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
awq_block_size (LinearConfig attribute)
(QKVConfig property)
AWQClipHelper (class in modelopt.onnx.quantization.int4)
axis (ScaledQuantDescriptor property)
(TensorQuantizer property)
B
backend() (in module modelopt.torch.utils.distributed)
backward() (ClipFunction static method)
(FakeAffineTensorQuantFunction static method)
(FakeTensorQuantFunction static method)
(LegacyFakeTensorQuantFunction static method)
(ScaledE4M3Function static method)
(TensorQuantFunction static method)
barrier() (in module modelopt.torch.utils.distributed)
BaseQuantizedTensor (class in modelopt.torch.quantization.qtensor.base_qtensor)
BaseSearcher (class in modelopt.torch.opt.searcher)
BaseSparseSearcher (class in modelopt.torch.sparsity.searcher)
batch_decode() (CustomSentencePieceTokenizer method)
batch_encode_plus() (CustomSentencePieceTokenizer method)
before_search() (BaseSearcher method)
(SparseGPTSearcher method)
bias (ConvConfig attribute)
(LayernormConfig attribute)
(LinearConfig attribute)
(QKVConfig property)
block_sizes (ScaledQuantDescriptor property)
(TensorQuantizer property)
blocksparse_block_size (DecoderLayerConfig attribute)
blocksparse_homo_head_pattern (DecoderLayerConfig attribute)
blocksparse_num_local_blocks (DecoderLayerConfig attribute)
blocksparse_vertical_stride (DecoderLayerConfig attribute)
build_attention_config() (in module modelopt.torch.export.layer_utils)
build_conv_config() (in module modelopt.torch.export.layer_utils)
build_decoder_config() (in module modelopt.torch.export.layer_utils)
build_embedding_config() (in module modelopt.torch.export.layer_utils)
build_layernorm_config() (in module modelopt.torch.export.layer_utils)
build_linear_config() (in module modelopt.torch.export.layer_utils)
build_medusa_heads_config() (in module modelopt.torch.export.layer_utils)
build_mlp_config() (in module modelopt.torch.export.layer_utils)
build_moe_config() (in module modelopt.torch.export.layer_utils)
build_non_residual_input_map() (in module modelopt.onnx.quantization.graph_utils)
build_qkv() (in module modelopt.torch.export.layer_utils)
build_recurrent_config() (in module modelopt.torch.export.layer_utils)
build_stacked_experts() (in module modelopt.torch.export.layer_utils)
build_tensorrt_llm() (in module modelopt.deploy.llm.model_config_trt)
build_tensorrt_llm_rank() (in module modelopt.deploy.llm.model_config_trt)
C
calib_method (ScaledQuantDescriptor property)
calibrate() (in module modelopt.torch.quantization.model_calib)
calibrate_weights() (in module modelopt.torch.quantization.calib.histogram)
CalibrationDataProvider (class in modelopt.onnx.quantization.calib_utils)
centroid() (in module modelopt.torch.utils.random)
check_mode() (ModeloptStateManager method)
check_model_compatibility() (in module modelopt.torch.export.layer_utils)
check_weight_shape_valid() (in module modelopt.torch.export.postprocess)
choice() (in module modelopt.torch.utils.random)
choices (Hparam property)
classify_partition_nodes() (in module modelopt.onnx.quantization.graph_utils)
clean_up_after_set_from_modelopt_state() (TensorQuantizer method)
clear_cuda_cache() (in module modelopt.torch.utils.perf)
Clip (class in modelopt.torch.quantization.nn.modules.clip)
clip_qkv (AttentionConfig attribute)
ClipFunction (class in modelopt.torch.quantization.nn.functional)
collect() (HistogramCalibrator method)
(MaxCalibrator method)
compare_dict() (in module modelopt.torch.utils.network)
compute_amax() (HistogramCalibrator method)
(MaxCalibrator method)
compute_valid_1d_patterns() (in module modelopt.torch.sparsity.magnitude)
config (BaseSearcher attribute)
config() (DynamicSpace method)
config_class (ExportSparseModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseGPTModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
configure_ort() (in module modelopt.onnx.quantization.ort_utils)
constraints (BaseSearcher attribute)
construct_forward_loop() (BaseSearcher method)
Conv1d (in module modelopt.torch.quantization.nn.modules.quant_conv)
conv1d (RecurrentConfig attribute)
Conv2d (in module modelopt.torch.quantization.nn.modules.quant_conv)
Conv3d (in module modelopt.torch.quantization.nn.modules.quant_conv)
ConvConfig (class in modelopt.torch.export.model_config)
convert (ExportSparseModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
convert() (DynamicModule class method)
convert_sparse_model() (in module modelopt.torch.sparsity.mode)
convert_to_dynamic() (DynamicSpace method)
convert_to_tensorrt_llm_config() (in module modelopt.torch.export.tensorrt_llm_utils)
convert_to_transformer_engine() (in module modelopt.torch.export.transformer_engine)
ConvTranspose1d (in module modelopt.torch.quantization.nn.modules.quant_conv)
ConvTranspose2d (in module modelopt.torch.quantization.nn.modules.quant_conv)
ConvTranspose3d (in module modelopt.torch.quantization.nn.modules.quant_conv)
create_asp_mask() (in module modelopt.torch.sparsity.magnitude)
create_forward_loop() (in module modelopt.torch.utils.dataset_utils)
create_inference_session() (in module modelopt.onnx.quantization.ort_utils)
create_sgpt_mask() (in module modelopt.torch.sparsity.sparsegpt)
customize_rule() (ModeloptBaseRule class method)
CustomSentencePieceTokenizer (class in modelopt.deploy.llm.nemo_utils)
D
deactivate() (in module modelopt.torch.quantization.quant_modules)
decode() (CustomSentencePieceTokenizer method)
decoder_type (DecoderLayerConfig attribute)
DecoderLayerConfig (class in modelopt.torch.export.model_config)
default_quant_desc_input (QuantInputBase attribute)
default_quant_desc_output (QuantInputBase attribute)
default_quant_desc_weight (QuantConv1d attribute)
(QuantConv2d attribute)
(QuantConv3d attribute)
(QuantConvTranspose1d attribute)
(QuantConvTranspose2d attribute)
(QuantConvTranspose3d attribute)
(QuantLinear attribute)
(QuantLinearConvBase attribute)
default_search_config (BaseSearcher property)
(BaseSparseSearcher property)
(SparseGPTSearcher property)
default_state_dict (BaseSearcher property)
(BaseSparseSearcher property)
dense (AttentionConfig attribute)
dense_attention_every_n_layers (DecoderLayerConfig attribute)
deployment (BaseSearcher attribute)
DeprecatedError
dequantize() (BaseQuantizedTensor method)
(INT4QTensor method)
(NF4QTensor method)
(TensorQuantizer method)
dict() (ScaledQuantDescriptor method)
disable() (SequentialQuantizer method)
(TensorQuantizer method)
disable_calib() (TensorQuantizer method)
disable_clip() (TensorQuantizer method)
disable_quant() (TensorQuantizer method)
disable_quantizer() (in module modelopt.torch.quantization.model_quant)
double_quantization() (NF4QTensor class method)
dq_tensor() (in module modelopt.onnx.quantization.int4)
dtype (ModelConfig attribute)
dummy_input (BaseSearcher attribute)
duplicate_shared_constants() (in module modelopt.onnx.utils)
dynamic (ScaledQuantDescriptor property)
DynamicModule (class in modelopt.torch.opt.dynamic)
DynamicSpace (class in modelopt.torch.opt.dynamic)
E
emb_scale_by_sqrt_dim (DecoderLayerConfig attribute)
EmbeddingConfig (class in modelopt.torch.export.model_config)
enable() (TensorQuantizer method)
enable_calib() (TensorQuantizer method)
enable_clip() (TensorQuantizer method)
enable_onnx_export() (in module modelopt.torch.quantization.quant_modules)
enable_quant() (TensorQuantizer method)
enable_quantizer() (in module modelopt.torch.quantization.model_quant)
encode() (CustomSentencePieceTokenizer method)
enforce_order() (Hparam method)
eos_token (CustomSentencePieceTokenizer property)
eos_token_id (CustomSentencePieceTokenizer property)
eps (LayernormConfig attribute)
eval_score() (BaseSearcher method)
ExpertConfig (class in modelopt.torch.export.model_config)
experts (MOEConfig attribute)
export() (DynamicModule method)
(DynamicSpace method)
(in module modelopt.torch.sparsity.sparsification)
export_amax() (TensorQuantizer method)
export_mode (QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
export_sparse() (in module modelopt.torch.sparsity.mode)
export_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)
export_to_vllm() (in module modelopt.torch.export.vllm)
export_torch_mode() (in module modelopt.torch.quantization.utils)
ExportSparseModeDescriptor (class in modelopt.torch.sparsity.mode)
extra_repr() (DynamicModule method)
(TensorQuantizer method)
F
fake_quant (ScaledQuantDescriptor property)
(TensorQuantizer property)
FakeAffineTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
FakeTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
fc (ExpertConfig attribute)
(MLPConfig attribute)
(MOEConfig property)
ffn_hidden_size_local (DecoderLayerConfig property)
fill() (in module modelopt.torch.sparsity.magnitude)
filter_quantizable_kgen_heads() (in module modelopt.onnx.quantization.graph_utils)
find_fusible_partitions() (in module modelopt.onnx.quantization.partitioning)
find_hardcoded_patterns() (in module modelopt.onnx.quantization.partitioning)
find_layer_norm_partitions() (in module modelopt.onnx.quantization.partitioning)
find_lowest_common_ancestor() (in module modelopt.onnx.utils)
find_mha_partitions() (in module modelopt.onnx.quantization.partitioning)
find_nodes_to_exclude() (in module modelopt.onnx.quantization.graph_utils)
find_non_quantizable_partitions_from_patterns() (in module modelopt.onnx.quantization.partitioning)
find_quantizable_nodes() (in module modelopt.onnx.quantization.partitioning)
find_scales() (in module modelopt.onnx.quantization.int4)
fold_weight() (in module modelopt.torch.quantization.model_quant)
force_assign() (DynamicModule method)
forward() (Clip method)
(ClipFunction static method)
(FakeAffineTensorQuantFunction static method)
(FakeTensorQuantFunction static method)
(LegacyFakeTensorQuantFunction static method)
(QuantInputBase method)
(QuantLinearConvBase method)
(ScaledE4M3Function static method)
(TensorQuantFunction static method)
(TensorQuantizer method)
forward_loop (BaseSearcher attribute)
freeze() (DynamicModule method)
freeze_parameters() (in module modelopt.torch.quantization.optim)
from_quantized_weight() (in module modelopt.torch.export.model_config_utils)
G
gate (MLPConfig attribute)
gegelu_limit (DecoderLayerConfig attribute)
gen_random_inputs() (in module modelopt.onnx.utils)
generate_text() (LLM method)
generate_tokens() (LLM method)
get() (ModeloptBaseConfig method)
get_activation_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_all_input_names() (in module modelopt.onnx.utils)
get_batch_size() (in module modelopt.onnx.utils)
get_batch_size_from_bytes() (in module modelopt.onnx.utils)
get_block_quant_axes_and_sizes() (ScaledQuantDescriptor static method)
get_child_nodes() (in module modelopt.onnx.utils)
get_config_class() (ModeloptStateManager static method)
get_configs_parallel() (in module modelopt.torch.export.distribute)
get_cuda_ext() (in module modelopt.torch.quantization.extensions)
get_cuda_ext_fp8() (in module modelopt.torch.quantization.extensions)
get_cuda_memory_stats() (in module modelopt.torch.utils.perf)
get_data_parallel_group() (in module modelopt.torch.utils.distributed)
get_dataset_dataloader() (in module modelopt.torch.utils.dataset_utils)
get_field_name_from_key() (ModeloptBaseConfig method)
get_fusible_backbone() (in module modelopt.onnx.quantization.graph_utils)
get_hparam() (DynamicModule method)
(DynamicSpace method)
get_input_names() (in module modelopt.onnx.utils)
get_input_names_from_bytes() (in module modelopt.onnx.utils)
get_input_shapes() (in module modelopt.onnx.utils)
get_input_shapes_from_bytes() (in module modelopt.onnx.utils)
get_kv_cache_dtype() (in module modelopt.torch.export.layer_utils)
get_kv_cache_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_kwargs_for_create_model_with_rules() (in module modelopt.torch.opt.config)
get_model_attributes() (in module modelopt.torch.utils.network)
get_modelopt_state() (SequentialQuantizer method)
(TensorQuantizer method)
get_module_device() (in module modelopt.torch.utils.network)
get_nemo_tokenizer() (in module modelopt.deploy.llm.nemo_utils)
get_next() (CalibrationDataProvider method)
(RandomDataProvider method)
get_nmprune_info() (in module modelopt.torch.sparsity.magnitude)
get_node_names() (in module modelopt.onnx.utils)
get_node_names_from_bytes() (in module modelopt.onnx.utils)
get_output_names() (in module modelopt.onnx.utils)
get_output_names_from_bytes() (in module modelopt.onnx.utils)
get_output_shapes() (in module modelopt.onnx.utils)
get_parent_nodes() (in module modelopt.onnx.utils)
get_prequant_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_quantizable_op_types() (in module modelopt.onnx.quantization.ort_utils)
get_rule_type() (ModeloptBaseRule class method)
get_same_padding() (in module modelopt.torch.utils.network)
get_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_skiped_output_layers() (in module modelopt.onnx.quantization.partitioning)
get_tensor_parallel_group() (in module modelopt.torch.utils.distributed)
get_tensors_parallel() (in module modelopt.torch.export.distribute)
get_tokenzier() (in module modelopt.deploy.llm.nemo_utils)
get_transformer_layers() (in module modelopt.torch.export.layer_utils)
get_variable_inputs() (in module modelopt.onnx.utils)
get_weight_block_size() (in module modelopt.torch.export.layer_utils)
get_weight_scaling_factor() (in module modelopt.torch.export.layer_utils)
get_weight_scaling_factor_2() (in module modelopt.torch.export.layer_utils)
get_weights_scaling_factor() (in module modelopt.torch.export.scaling_factor_utils)
group_parameters() (in module modelopt.torch.quantization.optim)
H
has_const_input() (in module modelopt.onnx.quantization.graph_utils)
has_path_type() (in module modelopt.onnx.quantization.graph_utils)
has_score (BaseSearcher property)
has_state (ModeloptStateManager property)
hidden_act (LinearActConfig attribute)
(MLPConfig attribute)
(ModelConfig property)
(MOEConfig attribute)
hidden_size (DecoderLayerConfig property)
(EmbeddingConfig property)
(ModelConfig property)
HistogramCalibrator (class in modelopt.torch.quantization.calib.histogram)
Hparam (class in modelopt.torch.opt.hparam)
I
import_scales_from_calib_cache() (in module modelopt.onnx.quantization.calib_utils)
Importance (Hparam attribute)
importance (Hparam property)
ImportanceEstimator (Hparam attribute)
init_learn_amax() (TensorQuantizer method)
init_model_from_model_like() (in module modelopt.torch.utils.network)
initialize() (in module modelopt.torch.quantization.quant_modules)
initialize_quantizer_with_dummy_states() (QuantLinearConvBase static method)
input_gate (RgLruConfig attribute)
input_layernorm (DecoderLayerConfig attribute)
input_quantizer (QuantInputBase attribute)
insert_dq_nodes() (in module modelopt.onnx.quantization.qdq_utils)
insert_qdq_nodes() (in module modelopt.onnx.quantization.qdq_utils)
INT4QTensor (class in modelopt.torch.quantization.qtensor.int4_tensor)
invert() (in module modelopt.torch.sparsity.sparsegpt)
is_attention() (in module modelopt.torch.export.layer_utils)
is_available() (in module modelopt.torch.utils.distributed)
is_binary_op() (in module modelopt.onnx.op_types)
is_channels_last() (in module modelopt.torch.utils.network)
is_configurable (Hparam property)
is_configurable() (DynamicSpace method)
(in module modelopt.torch.opt.utils)
is_const_input() (in module modelopt.onnx.quantization.graph_utils)
is_control_flow_op() (in module modelopt.onnx.op_types)
is_conversion_op() (in module modelopt.onnx.op_types)
is_converted() (ModeloptStateManager class method)
is_copy_op() (in module modelopt.onnx.op_types)
is_decoder_list() (in module modelopt.torch.export.layer_utils)
is_default_quantizable_op_by_ort() (in module modelopt.onnx.op_types)
is_dynamic() (DynamicSpace method)
(in module modelopt.torch.opt.utils)
is_embedding() (in module modelopt.torch.export.layer_utils)
is_enabled (TensorQuantizer property)
is_export_mode (ExportSparseModeDescriptor property)
(QuantizeExportModeDescriptor property)
is_fusible_reduction_op() (in module modelopt.onnx.op_types)
is_generator_op() (in module modelopt.onnx.op_types)
is_initialized (NFSWorkspace property)
is_initialized() (in module modelopt.torch.utils.distributed)
is_irregular_mem_access_op() (in module modelopt.onnx.op_types)
is_layernorm() (in module modelopt.torch.export.layer_utils)
is_linear() (in module modelopt.torch.export.layer_utils)
is_linear_op() (in module modelopt.onnx.op_types)
is_master() (in module modelopt.torch.utils.distributed)
is_mlp() (in module modelopt.torch.export.layer_utils)
is_modifier_op() (in module modelopt.onnx.op_types)
is_moe() (in module modelopt.torch.export.layer_utils)
is_multiclass_op() (in module modelopt.onnx.op_types)
is_non_reshape_copy_op() (in module modelopt.onnx.op_types)
is_normalization_op() (in module modelopt.onnx.op_types)
is_parallel() (in module modelopt.torch.utils.network)
is_pointwise_or_elementwise_op() (in module modelopt.onnx.op_types)
is_pooling_or_window_op() (in module modelopt.onnx.op_types)
is_quantized() (in module modelopt.torch.quantization.utils)
is_quantized_column_parallel_linear() (in module modelopt.torch.quantization.utils)
is_quantized_layer_with_weight() (in module modelopt.torch.quantization.utils)
is_quantized_row_parallel_linear() (in module modelopt.torch.quantization.utils)
is_recurrent() (in module modelopt.torch.export.layer_utils)
is_recurrent_op() (in module modelopt.onnx.op_types)
is_selection_op() (in module modelopt.onnx.op_types)
is_sequence_op() (in module modelopt.onnx.op_types)
is_shape_op() (in module modelopt.onnx.op_types)
is_sortable (Hparam property)
is_tensorrt_llm_0_8_or_9() (in module modelopt.torch.export.tensorrt_llm_utils)
is_torch_library_supported() (in module modelopt.torch.quantization.utils)
is_unary_op() (in module modelopt.onnx.op_types)
is_valid_onnx_model() (in module modelopt.onnx.utils)
items() (ModeloptBaseConfig method)
K
k (QKVConfig attribute)
keys() (ModeloptBaseConfig method)
kv_cache_dtype (AttentionConfig attribute)
kv_cache_scaling_factor (AttentionConfig attribute)
L
last_mode (ModeloptStateManager property)
layer_types (DecoderLayerConfig attribute)
layernorm_type (LayernormConfig attribute)
LayernormConfig (class in modelopt.torch.export.model_config)
layers (ModelConfig attribute)
learn_amax (ScaledQuantDescriptor property)
LegacyFakeTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
Linear (in module modelopt.torch.quantization.nn.modules.quant_linear)
linear (LinearActConfig attribute)
linear_out (RecurrentConfig attribute)
linear_type (LinearConfig attribute)
linear_x (RecurrentConfig attribute)
linear_y (RecurrentConfig attribute)
LinearActConfig (class in modelopt.torch.export.model_config)
LinearConfig (class in modelopt.torch.export.model_config)
list_closest_to_median() (in module modelopt.torch.utils.list)
LLM (class in modelopt.deploy.llm.generate)
lm_head (MedusaHeadConfig attribute)
(ModelConfig attribute)
ln_embed (ModelConfig attribute)
ln_f (ModelConfig attribute)
load_calib_amax() (TensorQuantizer method)
load_cpp_extension() (in module modelopt.torch.utils.cpp_extension)
load_search_checkpoint() (BaseSearcher method)
load_state_dict() (ModeloptStateManager method)
local_vocab_size (EmbeddingConfig property)
logits_soft_cap (DecoderLayerConfig attribute)
longrope_long_mscale (DecoderLayerConfig attribute)
longrope_scaling_long_factors (DecoderLayerConfig attribute)
longrope_scaling_short_factors (DecoderLayerConfig attribute)
longrope_short_mscale (DecoderLayerConfig attribute)
M
m4n2_1d() (in module modelopt.torch.sparsity.magnitude)
MagnitudeSearcher (class in modelopt.torch.sparsity.magnitude)
make_divisible() (in module modelopt.torch.utils.network)
make_gs_dequantize_node() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_dequantize_output() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_quantize_node() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_quantize_output() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_quantized_weight() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_scale() (in module modelopt.onnx.quantization.qdq_utils)
make_gs_zp() (in module modelopt.onnx.quantization.qdq_utils)
match() (in module modelopt.torch.utils.graph)
match_parameters() (in module modelopt.torch.quantization.optim)
max (Hparam property)
max_beam_width (LLM property)
max_input_len (LLM property)
max_position_embeddings (DecoderLayerConfig attribute)
(ModelConfig property)
maxbound (TensorQuantizer property)
MaxCalibrator (class in modelopt.torch.quantization.calib.max)
MaxPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
MaxPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
MaxPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)
medusa_heads (ModelConfig attribute)
medusa_layers (MedusaHeadConfig attribute)
MedusaHeadConfig (class in modelopt.torch.export.model_config)
merge_fc1_gate() (in module modelopt.torch.export.model_config_utils)
merge_qkv() (in module modelopt.torch.export.model_config_utils)
merged_fc1_gate (MLPConfig attribute)
min (Hparam property)
min_alpha (AWQClipHelper attribute)
mlp (DecoderLayerConfig attribute)
mlp_layernorm (DecoderLayerConfig attribute)
MLPConfig (class in modelopt.torch.export.model_config)
mn_1d_best() (in module modelopt.torch.sparsity.magnitude)
model (BaseSearcher attribute)
model_config_from_dict() (in module modelopt.torch.export.model_config_utils)
model_config_to_dict() (in module modelopt.torch.export.model_config_utils)
model_dump() (ModeloptBaseConfig method)
model_dump_json() (ModeloptBaseConfig method)
model_name (DecoderLayerConfig attribute)
model_to() (in module modelopt.torch.utils.network)
ModelConfig (class in modelopt.torch.export.model_config)
modelopt.deploy
module
modelopt.deploy.llm
module
modelopt.deploy.llm.generate
module
modelopt.deploy.llm.model_config_trt
module
modelopt.deploy.llm.nemo_utils
module
modelopt.onnx
module
modelopt.onnx.op_types
module
modelopt.onnx.quantization
module
modelopt.onnx.quantization.calib_utils
module
modelopt.onnx.quantization.extensions
module
modelopt.onnx.quantization.fp8
module
modelopt.onnx.quantization.graph_utils
module
modelopt.onnx.quantization.gs_patching
module
modelopt.onnx.quantization.int4
module
modelopt.onnx.quantization.int8
module
modelopt.onnx.quantization.operators
module
modelopt.onnx.quantization.ort_patching
module
modelopt.onnx.quantization.ort_utils
module
modelopt.onnx.quantization.partitioning
module
modelopt.onnx.quantization.qdq_utils
module
modelopt.onnx.quantization.quant_utils
module
modelopt.onnx.quantization.quantize
module
modelopt.onnx.utils
module
modelopt.torch
module
modelopt.torch.export
module
modelopt.torch.export.distribute
module
modelopt.torch.export.hf_config_map
module
modelopt.torch.export.layer_utils
module
modelopt.torch.export.model_config
module
modelopt.torch.export.model_config_export
module
modelopt.torch.export.model_config_utils
module
modelopt.torch.export.postprocess
module
modelopt.torch.export.scaling_factor_utils
module
modelopt.torch.export.tensorrt_llm_utils
module
modelopt.torch.export.transformer_engine
module
modelopt.torch.export.vllm
module
modelopt.torch.opt
module
modelopt.torch.opt.config
module
modelopt.torch.opt.conversion
module
modelopt.torch.opt.dynamic
module
modelopt.torch.opt.hparam
module
modelopt.torch.opt.mode
module
modelopt.torch.opt.plugins
module
modelopt.torch.opt.searcher
module
modelopt.torch.opt.utils
module
modelopt.torch.quantization
module
modelopt.torch.quantization.calib
module
modelopt.torch.quantization.calib.calibrator
module
modelopt.torch.quantization.calib.histogram
module
modelopt.torch.quantization.calib.max
module
modelopt.torch.quantization.config
module
modelopt.torch.quantization.conversion
module
modelopt.torch.quantization.extensions
module
modelopt.torch.quantization.mode
module
modelopt.torch.quantization.model_calib
module
modelopt.torch.quantization.model_quant
module
modelopt.torch.quantization.nn
module
modelopt.torch.quantization.nn.functional
module
modelopt.torch.quantization.nn.modules
module
modelopt.torch.quantization.nn.modules.clip
module
modelopt.torch.quantization.nn.modules.quant_activations
module
modelopt.torch.quantization.nn.modules.quant_batchnorm
module
modelopt.torch.quantization.nn.modules.quant_conv
module
modelopt.torch.quantization.nn.modules.quant_instancenorm
module
modelopt.torch.quantization.nn.modules.quant_linear
module
modelopt.torch.quantization.nn.modules.quant_module
module
modelopt.torch.quantization.nn.modules.quant_pooling
module
modelopt.torch.quantization.nn.modules.tensor_quantizer
module
modelopt.torch.quantization.optim
module
modelopt.torch.quantization.plugins
module
modelopt.torch.quantization.qtensor
module
modelopt.torch.quantization.qtensor.base_qtensor
module
modelopt.torch.quantization.qtensor.int4_tensor
module
modelopt.torch.quantization.qtensor.nf4_tensor
module
modelopt.torch.quantization.quant_modules
module
modelopt.torch.quantization.tensor_quant
module
modelopt.torch.quantization.utils
module
modelopt.torch.sparsity
module
modelopt.torch.sparsity.config
module
modelopt.torch.sparsity.magnitude
module
modelopt.torch.sparsity.mode
module
modelopt.torch.sparsity.module
module
modelopt.torch.sparsity.plugins
module
modelopt.torch.sparsity.searcher
module
modelopt.torch.sparsity.sparsegpt
module
modelopt.torch.sparsity.sparsification
module
modelopt.torch.utils
module
modelopt.torch.utils.cpp_extension
module
modelopt.torch.utils.dataset_utils
module
modelopt.torch.utils.distributed
module
modelopt.torch.utils.graph
module
modelopt.torch.utils.list
module
modelopt.torch.utils.logging
module
modelopt.torch.utils.network
module
modelopt.torch.utils.perf
module
modelopt.torch.utils.random
module
modelopt.torch.utils.tensor
module
modelopt_state() (in module modelopt.torch.opt.conversion)
ModeloptField() (in module modelopt.torch.opt.config)
ModeloptStateManager (class in modelopt.torch.opt.conversion)
modes_with_states() (ModeloptStateManager method)
modify() (DynamicModule method)
(SparseModule method)
module
modelopt.deploy
modelopt.deploy.llm
modelopt.deploy.llm.generate
modelopt.deploy.llm.model_config_trt
modelopt.deploy.llm.nemo_utils
modelopt.onnx
modelopt.onnx.op_types
modelopt.onnx.quantization
modelopt.onnx.quantization.calib_utils
modelopt.onnx.quantization.extensions
modelopt.onnx.quantization.fp8
modelopt.onnx.quantization.graph_utils
modelopt.onnx.quantization.gs_patching
modelopt.onnx.quantization.int4
modelopt.onnx.quantization.int8
modelopt.onnx.quantization.operators
modelopt.onnx.quantization.ort_patching
modelopt.onnx.quantization.ort_utils
modelopt.onnx.quantization.partitioning
modelopt.onnx.quantization.qdq_utils
modelopt.onnx.quantization.quant_utils
modelopt.onnx.quantization.quantize
modelopt.onnx.utils
modelopt.torch
modelopt.torch.export
modelopt.torch.export.distribute
modelopt.torch.export.hf_config_map
modelopt.torch.export.layer_utils
modelopt.torch.export.model_config
modelopt.torch.export.model_config_export
modelopt.torch.export.model_config_utils
modelopt.torch.export.postprocess
modelopt.torch.export.scaling_factor_utils
modelopt.torch.export.tensorrt_llm_utils
modelopt.torch.export.transformer_engine
modelopt.torch.export.vllm
modelopt.torch.opt
modelopt.torch.opt.config
modelopt.torch.opt.conversion
modelopt.torch.opt.dynamic
modelopt.torch.opt.hparam
modelopt.torch.opt.mode
modelopt.torch.opt.plugins
modelopt.torch.opt.searcher
modelopt.torch.opt.utils
modelopt.torch.quantization
modelopt.torch.quantization.calib
modelopt.torch.quantization.calib.calibrator
modelopt.torch.quantization.calib.histogram
modelopt.torch.quantization.calib.max
modelopt.torch.quantization.config
modelopt.torch.quantization.conversion
modelopt.torch.quantization.extensions
modelopt.torch.quantization.mode
modelopt.torch.quantization.model_calib
modelopt.torch.quantization.model_quant
modelopt.torch.quantization.nn
modelopt.torch.quantization.nn.functional
modelopt.torch.quantization.nn.modules
modelopt.torch.quantization.nn.modules.clip
modelopt.torch.quantization.nn.modules.quant_activations
modelopt.torch.quantization.nn.modules.quant_batchnorm
modelopt.torch.quantization.nn.modules.quant_conv
modelopt.torch.quantization.nn.modules.quant_instancenorm
modelopt.torch.quantization.nn.modules.quant_linear
modelopt.torch.quantization.nn.modules.quant_module
modelopt.torch.quantization.nn.modules.quant_pooling
modelopt.torch.quantization.nn.modules.tensor_quantizer
modelopt.torch.quantization.optim
modelopt.torch.quantization.plugins
modelopt.torch.quantization.qtensor
modelopt.torch.quantization.qtensor.base_qtensor
modelopt.torch.quantization.qtensor.int4_tensor
modelopt.torch.quantization.qtensor.nf4_tensor
modelopt.torch.quantization.quant_modules
modelopt.torch.quantization.tensor_quant
modelopt.torch.quantization.utils
modelopt.torch.sparsity
modelopt.torch.sparsity.config
modelopt.torch.sparsity.magnitude
modelopt.torch.sparsity.mode
modelopt.torch.sparsity.module
modelopt.torch.sparsity.plugins
modelopt.torch.sparsity.searcher
modelopt.torch.sparsity.sparsegpt
modelopt.torch.sparsity.sparsification
modelopt.torch.utils
modelopt.torch.utils.cpp_extension
modelopt.torch.utils.dataset_utils
modelopt.torch.utils.distributed
modelopt.torch.utils.graph
modelopt.torch.utils.list
modelopt.torch.utils.logging
modelopt.torch.utils.network
modelopt.torch.utils.perf
modelopt.torch.utils.random
modelopt.torch.utils.tensor
moe_num_experts (DecoderLayerConfig attribute)
moe_renorm_mode (DecoderLayerConfig attribute)
moe_top_k (DecoderLayerConfig attribute)
moe_tp_mode (DecoderLayerConfig attribute)
MOEConfig (class in modelopt.torch.export.model_config)
mup_attn_multiplier (DecoderLayerConfig attribute)
mup_embedding_multiplier (DecoderLayerConfig attribute)
mup_use_scaling (DecoderLayerConfig attribute)
mup_width_multiplier (DecoderLayerConfig attribute)
N
naive_quantization() (in module modelopt.torch.export.model_config_utils)
name (ExportSparseModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(ScaledQuantDescriptor property)
(SparseGPTModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
name_onnx_nodes() (in module modelopt.onnx.utils)
named_dynamic_modules() (DynamicSpace method)
named_hparams() (DynamicModule method)
(DynamicSpace method)
(in module modelopt.torch.opt.utils)
narrow_range (ScaledQuantDescriptor property)
(TensorQuantizer property)
new_decoder_architecture (DecoderLayerConfig attribute)
next_modes (QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
NF4QTensor (class in modelopt.torch.quantization.qtensor.nf4_tensor)
NFSWorkspace (class in modelopt.torch.export.distribute)
nn_conv2d (SparseGPTConfig attribute)
(SparseMagnitudeConfig attribute)
nn_linear (SparseGPTConfig attribute)
(SparseMagnitudeConfig attribute)
no_stdout() (in module modelopt.torch.utils.logging)
num2hrb() (in module modelopt.torch.utils.logging)
num_attention_heads (DecoderLayerConfig attribute)
(ModelConfig property)
num_bits (ScaledQuantDescriptor property)
(TensorQuantizer property)
num_kv_heads (DecoderLayerConfig attribute)
(ModelConfig property)
num_medusa_heads (ModelConfig attribute)
num_medusa_layers (ModelConfig attribute)
numpy_to_torch() (in module modelopt.torch.utils.tensor)
O
original (Hparam property)
original() (in module modelopt.torch.utils.random)
original_cls (DynamicModule property)
original_max_position_embeddings (DecoderLayerConfig attribute)
original_meta_tensor (BaseQuantizedTensor attribute)
output_quantizer (QuantInputBase attribute)
P
pack_float32_to_4bit_cpp_based() (in module modelopt.onnx.quantization.quant_utils)
pack_float32_to_4bit_optimized() (in module modelopt.onnx.quantization.quant_utils)
pack_linear_weights() (in module modelopt.torch.export.model_config_utils)
pad_embedding_lm_head() (in module modelopt.torch.export.postprocess)
pad_token (CustomSentencePieceTokenizer property)
pad_token_id (CustomSentencePieceTokenizer property)
pad_weights() (in module modelopt.torch.export.model_config_utils)
parallel_attention (DecoderLayerConfig attribute)
param_num() (in module modelopt.torch.utils.network)
param_num_from_forward() (in module modelopt.torch.utils.network)
partial_rotary_factor (DecoderLayerConfig attribute)
patch_gs_modules() (in module modelopt.onnx.quantization.gs_patching)
patch_ort_modules() (in module modelopt.onnx.quantization.ort_patching)
pipeline_parallel (ModelConfig attribute)
position_embedding (ModelConfig attribute)
post_layernorm (DecoderLayerConfig attribute)
postprocess_amax() (in module modelopt.torch.quantization.model_calib)
postprocess_model_config() (in module modelopt.torch.export.postprocess)
postprocess_tensors() (in module modelopt.torch.export.postprocess)
pre_quant_scale (TensorQuantizer property)
prepare() (in module modelopt.torch.sparsity.sparsegpt)
prequant_scaling_factor (LinearConfig attribute)
(QKVConfig property)
print_quant_summary() (in module modelopt.torch.quantization.model_quant)
print_rank_0() (in module modelopt.torch.utils.logging)
print_stat() (in module modelopt.onnx.quantization.graph_utils)
proj (ExpertConfig attribute)
(MLPConfig attribute)
Q
q (QKVConfig attribute)
QDQConvTranspose (class in modelopt.onnx.quantization.operators)
QDQNormalization (class in modelopt.onnx.quantization.operators)
qkv (AttentionConfig attribute)
QKVConfig (class in modelopt.torch.export.model_config)
quant_cfg (QuantizeConfig attribute)
quant_tensor() (in module modelopt.onnx.quantization.int4)
quant_weight_inplace() (in module modelopt.torch.quantization.optim)
QuantAdaptiveAvgPool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAdaptiveAvgPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAdaptiveAvgPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAvgPool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAvgPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantAvgPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantConv1d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConv2d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConv3d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConvTranspose1d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConvTranspose2d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantConvTranspose3d (class in modelopt.torch.quantization.nn.modules.quant_conv)
QuantDescriptor (in module modelopt.torch.quantization.tensor_quant)
QuantInputBase (class in modelopt.torch.quantization.nn.modules.quant_module)
QuantInstanceNorm1d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)
QuantInstanceNorm2d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)
QuantInstanceNorm3d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)
quantization (DecoderLayerConfig attribute)
(ModelConfig attribute)
quantize() (BaseQuantizedTensor class method)
(in module modelopt.onnx.quantization.fp8)
(in module modelopt.onnx.quantization.int4)
(in module modelopt.onnx.quantization.int8)
(in module modelopt.onnx.quantization.quantize)
(in module modelopt.torch.quantization.model_quant)
(INT4QTensor class method)
(NF4QTensor class method)
(QDQConvTranspose method)
(QDQNormalization method)
quantize_awq_clip() (in module modelopt.onnx.quantization.int4)
quantize_rtn() (in module modelopt.onnx.quantization.int4)
quantize_weight() (QuantLinearConvBase method)
quantized_data (BaseQuantizedTensor attribute)
(INT4QTensor attribute)
(NF4QTensor attribute)
QuantizeExportModeDescriptor (class in modelopt.torch.quantization.mode)
QuantizeModeDescriptor (class in modelopt.torch.quantization.mode)
QuantLinear (class in modelopt.torch.quantization.nn.modules.quant_linear)
QuantLinearConvBase (class in modelopt.torch.quantization.nn.modules.quant_module)
QuantMaxPool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantMaxPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
QuantMaxPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)
qwen_type (DecoderLayerConfig attribute)
R
random() (in module modelopt.torch.utils.random)
RandomDataProvider (class in modelopt.onnx.quantization.calib_utils)
randomize_weights() (in module modelopt.onnx.utils)
randomize_weights_onnx_bytes() (in module modelopt.onnx.utils)
rank (ModelConfig attribute)
rank() (in module modelopt.torch.utils.distributed)
read_configs_and_weights_from_rank() (NFSWorkspace method)
recurrent (DecoderLayerConfig attribute)
recurrent_gate (RgLruConfig attribute)
recurrent_param (RgLruConfig attribute)
RecurrentConfig (class in modelopt.torch.export.model_config)
reduce_amax() (in module modelopt.torch.quantization.utils)
register() (in module modelopt.torch.quantization.conversion)
register_default() (ModeloptBaseRuleConfig class method)
register_importance() (Hparam method)
remove_bn() (in module modelopt.torch.utils.network)
remove_partial_input_qdq() (in module modelopt.onnx.quantization.graph_utils)
remove_weights_data() (in module modelopt.onnx.utils)
replace_function() (in module modelopt.torch.quantization.utils)
replace_quant_module() (in module modelopt.torch.quantization.conversion)
replace_scale_values() (in module modelopt.onnx.quantization.qdq_utils)
replace_sequential_quantizer_with_single_quantizer() (SequentialQuantizer static method)
report_memory() (in module modelopt.torch.utils.perf)
reset() (HistogramCalibrator method)
(MaxCalibrator method)
reset_amax() (TensorQuantizer method)
reset_dynamic_attributes() (DynamicModule method)
reset_search() (BaseSearcher method)
reshape_1d() (in module modelopt.torch.sparsity.magnitude)
residual_layernorm (DecoderLayerConfig attribute)
residual_mlp (DecoderLayerConfig attribute)
resmooth_and_get_scale() (in module modelopt.torch.export.scaling_factor_utils)
restore (ExportSparseModeDescriptor property)
(QuantizeExportModeDescriptor property)
(QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
restore() (in module modelopt.torch.opt.conversion)
restore_export_sparse() (in module modelopt.torch.sparsity.mode)
restore_from_modelopt_state() (in module modelopt.torch.opt.conversion)
restore_model_config() (in module modelopt.torch.export.model_config_utils)
restore_sparse_model() (in module modelopt.torch.sparsity.mode)
rg_lru (RecurrentConfig attribute)
RgLruConfig (class in modelopt.torch.export.model_config)
rnn_hidden_size (DecoderLayerConfig attribute)
rope_ratio (DecoderLayerConfig attribute)
rotary_base (DecoderLayerConfig attribute)
rotary_dim (AttentionConfig attribute)
rotary_pct (DecoderLayerConfig attribute)
router (MOEConfig attribute)
rtn() (in module modelopt.onnx.quantization.int4)
run_forward_loop() (in module modelopt.torch.utils.network)
run_search() (BaseSearcher method)
(BaseSparseSearcher method)
S
sample() (in module modelopt.torch.utils.random)
sanitize_search_config() (BaseSearcher method)
(BaseSparseSearcher method)
save() (in module modelopt.torch.opt.conversion)
save_onnx() (in module modelopt.onnx.utils)
save_onnx_bytes_to_dir() (in module modelopt.onnx.utils)
save_search_checkpoint() (BaseSearcher method)
scale (TensorQuantizer property)
scale_amax (ScaledQuantDescriptor property)
scaled_e4m3_abstract() (in module modelopt.torch.quantization.tensor_quant)
ScaledE4M3Function (class in modelopt.torch.quantization.tensor_quant)
ScaledQuantDescriptor (class in modelopt.torch.quantization.tensor_quant)
search() (BaseSearcher method)
search_algorithm (SparseGPTModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
search_space_size() (in module modelopt.torch.opt.utils)
select() (DynamicSpace method)
seq_length (DecoderLayerConfig attribute)
SequentialQuantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)
set_data_parallel_group() (in module modelopt.torch.utils.distributed)
set_from_attribute_dict() (SequentialQuantizer method)
(TensorQuantizer method)
set_from_modelopt_state() (TensorQuantizer method)
set_mask() (SparseModule method)
set_quantizer_attribute() (in module modelopt.torch.quantization.conversion)
set_quantizer_by_cfg() (in module modelopt.torch.quantization.conversion)
set_submodule() (in module modelopt.torch.utils.network)
set_tensor_parallel_group() (in module modelopt.torch.utils.distributed)
share_embedding_table (ModelConfig attribute)
shuffle() (in module modelopt.torch.utils.random)
size() (DynamicSpace method)
(in module modelopt.torch.utils.distributed)
SparseGPTModeDescriptor (class in modelopt.torch.sparsity.mode)
SparseGPTSearcher (class in modelopt.torch.sparsity.sparsegpt)
SparseMagnitudeModeDescriptor (class in modelopt.torch.sparsity.mode)
SparseModule (class in modelopt.torch.sparsity.module)
sparsify() (in module modelopt.torch.sparsity.sparsification)
split_config_and_weights() (in module modelopt.torch.export.model_config_utils)
standardize_constructor_args() (in module modelopt.torch.utils.network)
standardize_model_args() (in module modelopt.torch.utils.network)
standardize_model_like_tuple() (in module modelopt.torch.utils.network)
standardize_named_model_args() (in module modelopt.torch.utils.network)
start() (Timer method)
state_dict() (BaseSearcher method)
(ModeloptStateManager method)
stats() (in module modelopt.torch.utils.list)
step_size (TensorQuantizer property)
stop() (Timer method)
symbolic() (FakeTensorQuantFunction static method)
(ScaledE4M3Function static method)
(TensorQuantFunction static method)
sync_amax_across_distributed_group() (TensorQuantizer method)
T
tensor_parallel (ModelConfig attribute)
tensor_quantizer_iterator() (SequentialQuantizer static method)
TensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)
TensorQuantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)
Timer (class in modelopt.torch.utils.perf)
to_quantized_weight() (in module modelopt.torch.export.model_config_utils)
torch_detach() (in module modelopt.torch.utils.tensor)
torch_to() (in module modelopt.torch.utils.tensor)
torch_to_numpy() (in module modelopt.torch.utils.tensor)
torch_to_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)
transfer_state_dict() (ModeloptStateManager class method)
U
unregister() (in module modelopt.torch.quantization.conversion)
unregister_default() (ModeloptBaseRuleConfig class method)
unsigned (ScaledQuantDescriptor property)
(TensorQuantizer property)
unwrap_model() (in module modelopt.torch.utils.network)
update() (ModeloptBaseConfig method)
update_best_params() (AWQClipHelper method)
update_for_new_mode (QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
update_for_save (QuantizeModeDescriptor property)
(SparseMagnitudeModeDescriptor property)
update_last_state_before_new_mode() (ModeloptStateManager method)
update_last_state_before_save() (ModeloptStateManager method)
update_sparse_metadata() (in module modelopt.torch.sparsity.mode)
use_alibi (DecoderLayerConfig attribute)
use_cache (DecoderLayerConfig attribute)
use_trt_qdq_ops() (in module modelopt.onnx.quantization.qdq_utils)
V
v (QKVConfig attribute)
val2list() (in module modelopt.torch.utils.list)
val2tuple() (in module modelopt.torch.utils.list)
validate_batch_size() (in module modelopt.onnx.utils)
validate_onnx() (in module modelopt.onnx.utils)
validate_rule() (ModeloptBaseRule class method)
values() (ModeloptBaseConfig method)
version (ModelConfig attribute)
vocab_embedding (ModelConfig attribute)
vocab_size (ModelConfig attribute)
vocab_size_padded (ModelConfig property)
W
weight (ConvConfig attribute)
(EmbeddingConfig attribute)
(LayernormConfig attribute)
(LinearConfig attribute)
(QKVConfig property)
weight_quantizer (QuantLinearConvBase attribute)
weights_scaling_factor (LinearConfig attribute)
(QKVConfig property)
weights_scaling_factor_2 (LinearConfig attribute)
(QKVConfig property)
weights_to_npz() (in module modelopt.torch.export.tensorrt_llm_utils)
write_configs_and_weights() (NFSWorkspace method)
Y
y_bias (RecurrentConfig attribute)
Z
zero_grad() (in module modelopt.torch.utils.network)