All modules for which code is available
- tensorrt_llm.bindings
- tensorrt_llm.builder
- tensorrt_llm.executor
- tensorrt_llm.functional
- tensorrt_llm.layers.activation
- tensorrt_llm.layers.attention
- tensorrt_llm.layers.cast
- tensorrt_llm.layers.conv
- tensorrt_llm.layers.embedding
- tensorrt_llm.layers.linear
- tensorrt_llm.layers.mlp
- tensorrt_llm.layers.normalization
- tensorrt_llm.layers.pooling
- tensorrt_llm.llmapi.build_cache
- tensorrt_llm.llmapi.llm
- tensorrt_llm.llmapi.llm_utils
- tensorrt_llm.models.baichuan.model
- tensorrt_llm.models.bert.model
- tensorrt_llm.models.bloom.model
- tensorrt_llm.models.chatglm.config
- tensorrt_llm.models.chatglm.model
- tensorrt_llm.models.cogvlm.config
- tensorrt_llm.models.cogvlm.model
- tensorrt_llm.models.commandr.model
- tensorrt_llm.models.dbrx.config
- tensorrt_llm.models.dbrx.model
- tensorrt_llm.models.deepseek_v1.model
- tensorrt_llm.models.deepseek_v2.model
- tensorrt_llm.models.dit.model
- tensorrt_llm.models.eagle.model
- tensorrt_llm.models.enc_dec.model
- tensorrt_llm.models.falcon.config
- tensorrt_llm.models.falcon.model
- tensorrt_llm.models.gemma.config
- tensorrt_llm.models.gemma.model
- tensorrt_llm.models.gpt.config
- tensorrt_llm.models.gpt.model
- tensorrt_llm.models.gptj.config
- tensorrt_llm.models.gptj.model
- tensorrt_llm.models.gptneox.model
- tensorrt_llm.models.llama.config
- tensorrt_llm.models.llama.model
- tensorrt_llm.models.mamba.model
- tensorrt_llm.models.medusa.config
- tensorrt_llm.models.medusa.model
- tensorrt_llm.models.mllama.model
- tensorrt_llm.models.modeling_utils
- tensorrt_llm.models.mpt.model
- tensorrt_llm.models.opt.model
- tensorrt_llm.models.phi.model
- tensorrt_llm.models.phi3.model
- tensorrt_llm.models.recurrentgemma.model
- tensorrt_llm.models.redrafter.model
- tensorrt_llm.plugin.plugin
- tensorrt_llm.quantization.mode
- tensorrt_llm.quantization.quantize_by_modelopt
- tensorrt_llm.runtime.enc_dec_model_runner
- tensorrt_llm.runtime.generation
- tensorrt_llm.runtime.kv_cache_manager
- tensorrt_llm.runtime.model_runner
- tensorrt_llm.runtime.model_runner_cpp
- tensorrt_llm.runtime.multimodal_model_runner
- tensorrt_llm.runtime.session
- tensorrt_llm.sampling_params