All modules for which code is available
- tensorrt_llm._torch.async_llm
- tensorrt_llm._torch.auto_deploy.transform.graph_module_visualizer
- tensorrt_llm._torch.auto_deploy.transform.interface
- tensorrt_llm._torch.auto_deploy.transform.library.attention
- tensorrt_llm._torch.auto_deploy.transform.library.build_model
- tensorrt_llm._torch.auto_deploy.transform.library.cleanup_identity_dtype_cast
- tensorrt_llm._torch.auto_deploy.transform.library.cleanup_input_constraints
- tensorrt_llm._torch.auto_deploy.transform.library.cleanup_noop_add
- tensorrt_llm._torch.auto_deploy.transform.library.cleanup_noop_slice
- tensorrt_llm._torch.auto_deploy.transform.library.collectives
- tensorrt_llm._torch.auto_deploy.transform.library.compile_model
- tensorrt_llm._torch.auto_deploy.transform.library.eliminate_redundant_transposes
- tensorrt_llm._torch.auto_deploy.transform.library.export_to_gm
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_causal_conv
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_gdn_gating
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_mamba_a_log
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_quant
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_relu2_quant_nvfp4
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_rmsnorm_quant_fp8
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_rmsnorm_quant_nvfp4
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_rope_into_trtllm_attention
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_rope_mla
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_silu_mul
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_swiglu
- tensorrt_llm._torch.auto_deploy.transform.library.fuse_trtllm_attention_quant_fp8
- tensorrt_llm._torch.auto_deploy.transform.library.fused_add_rms_norm
- tensorrt_llm._torch.auto_deploy.transform.library.fused_moe
- tensorrt_llm._torch.auto_deploy.transform.library.fused_moe_mxfp4
- tensorrt_llm._torch.auto_deploy.transform.library.fusion
- tensorrt_llm._torch.auto_deploy.transform.library.gather_logits_before_lm_head
- tensorrt_llm._torch.auto_deploy.transform.library.hidden_states
- tensorrt_llm._torch.auto_deploy.transform.library.kvcache
- tensorrt_llm._torch.auto_deploy.transform.library.kvcache_transformers
- tensorrt_llm._torch.auto_deploy.transform.library.l2_norm
- tensorrt_llm._torch.auto_deploy.transform.library.load_weights
- tensorrt_llm._torch.auto_deploy.transform.library.mlir_elementwise_fusion
- tensorrt_llm._torch.auto_deploy.transform.library.moe_routing
- tensorrt_llm._torch.auto_deploy.transform.library.mrope_delta_cache
- tensorrt_llm._torch.auto_deploy.transform.library.multi_stream_attn
- tensorrt_llm._torch.auto_deploy.transform.library.multi_stream_gemm
- tensorrt_llm._torch.auto_deploy.transform.library.multi_stream_moe
- tensorrt_llm._torch.auto_deploy.transform.library.quantization
- tensorrt_llm._torch.auto_deploy.transform.library.quantize_moe
- tensorrt_llm._torch.auto_deploy.transform.library.rms_norm
- tensorrt_llm._torch.auto_deploy.transform.library.rope
- tensorrt_llm._torch.auto_deploy.transform.library.sharding
- tensorrt_llm._torch.auto_deploy.transform.library.sharding_ir
- tensorrt_llm._torch.auto_deploy.transform.library.ssm_cache
- tensorrt_llm._torch.auto_deploy.transform.library.visualization
- tensorrt_llm._torch.auto_deploy.transform.optimizer
- tensorrt_llm._torch.auto_deploy.transform.pipeline_cache.pipeline_cache
- tensorrt_llm.bindings.executor
- tensorrt_llm.builder
- tensorrt_llm.disaggregated_params
- tensorrt_llm.executor.request
- tensorrt_llm.executor.result
- tensorrt_llm.executor.utils
- tensorrt_llm.functional
- tensorrt_llm.layers.activation
- tensorrt_llm.layers.attention
- tensorrt_llm.layers.cast
- tensorrt_llm.layers.conv
- tensorrt_llm.layers.embedding
- tensorrt_llm.layers.linear
- tensorrt_llm.layers.mlp
- tensorrt_llm.layers.normalization
- tensorrt_llm.layers.pooling
- tensorrt_llm.llmapi.build_cache
- tensorrt_llm.llmapi.llm
- tensorrt_llm.llmapi.llm_args
- tensorrt_llm.llmapi.mm_encoder
- tensorrt_llm.llmapi.mpi_session
- tensorrt_llm.llmapi.thinking_budget
- tensorrt_llm.models.baichuan.model
- tensorrt_llm.models.bert.model
- tensorrt_llm.models.bloom.model
- tensorrt_llm.models.chatglm.config
- tensorrt_llm.models.chatglm.model
- tensorrt_llm.models.clip.model
- tensorrt_llm.models.cogvlm.config
- tensorrt_llm.models.cogvlm.model
- tensorrt_llm.models.commandr.model
- tensorrt_llm.models.dbrx.config
- tensorrt_llm.models.dbrx.model
- tensorrt_llm.models.deepseek_v1.model
- tensorrt_llm.models.deepseek_v2.model
- tensorrt_llm.models.dit.model
- tensorrt_llm.models.eagle.model
- tensorrt_llm.models.enc_dec.model
- tensorrt_llm.models.falcon.config
- tensorrt_llm.models.falcon.model
- tensorrt_llm.models.gemma.config
- tensorrt_llm.models.gemma.model
- tensorrt_llm.models.gpt.config
- tensorrt_llm.models.gpt.model
- tensorrt_llm.models.gptj.config
- tensorrt_llm.models.gptj.model
- tensorrt_llm.models.gptneox.model
- tensorrt_llm.models.llama.config
- tensorrt_llm.models.llama.model
- tensorrt_llm.models.mamba.model
- tensorrt_llm.models.medusa.config
- tensorrt_llm.models.medusa.model
- tensorrt_llm.models.mllama.model
- tensorrt_llm.models.mmdit_sd3.model
- tensorrt_llm.models.modeling_utils
- tensorrt_llm.models.mpt.model
- tensorrt_llm.models.multimodal_encoders.config
- tensorrt_llm.models.multimodal_encoders.model
- tensorrt_llm.models.opt.model
- tensorrt_llm.models.phi.model
- tensorrt_llm.models.phi3.model
- tensorrt_llm.models.recurrentgemma.model
- tensorrt_llm.models.redrafter.model
- tensorrt_llm.plugin.plugin
- tensorrt_llm.quantization.mode
- tensorrt_llm.quantization.quantize_by_modelopt
- tensorrt_llm.runtime.enc_dec_model_runner
- tensorrt_llm.runtime.generation
- tensorrt_llm.runtime.kv_cache_manager
- tensorrt_llm.runtime.model_runner
- tensorrt_llm.runtime.model_runner_cpp
- tensorrt_llm.runtime.multimodal_model_runner
- tensorrt_llm.runtime.session
- tensorrt_llm.sampling_params
- tensorrt_llm.scheduling_params