All modules for which code is available
- tensorrt_llm.functional
- tensorrt_llm.layers.activation
- tensorrt_llm.layers.attention
- tensorrt_llm.layers.cast
- tensorrt_llm.layers.conv
- tensorrt_llm.layers.embedding
- tensorrt_llm.layers.linear
- tensorrt_llm.layers.mlp
- tensorrt_llm.layers.normalization
- tensorrt_llm.layers.pooling
- tensorrt_llm.models.baichuan.model
- tensorrt_llm.models.bert.model
- tensorrt_llm.models.bloom.model
- tensorrt_llm.models.chatglm.model
- tensorrt_llm.models.enc_dec.model
- tensorrt_llm.models.falcon.model
- tensorrt_llm.models.gemma.model
- tensorrt_llm.models.gpt.model
- tensorrt_llm.models.gptj.model
- tensorrt_llm.models.gptneox.model
- tensorrt_llm.models.llama.model
- tensorrt_llm.models.mamba.model
- tensorrt_llm.models.medusa.model
- tensorrt_llm.models.modeling_utils
- tensorrt_llm.models.mpt.model
- tensorrt_llm.models.opt.model
- tensorrt_llm.models.phi.model
- tensorrt_llm.models.quantized.quant
- tensorrt_llm.models.qwen.model
- tensorrt_llm.plugin.plugin
- tensorrt_llm.quantization.mode
- tensorrt_llm.quantization.quantize_by_ammo
- tensorrt_llm.runtime.generation
- tensorrt_llm.runtime.kv_cache_manager
- tensorrt_llm.runtime.model_runner
- tensorrt_llm.runtime.model_runner_cpp
- tensorrt_llm.runtime.session