Decoder Registry#

class DecoderRegistry#

Public Functions

DecoderRegistry(
DecodingRuntimeContext &runtime,
DecoderRegistryConfig const &config
)#
DecodingStrategy &select(
LLMGenerationRequest const &request
) const noexcept#
DecodingStrategy &cachePrimingStrategy() const noexcept#
bool captureCudaGraphs(cudaStream_t stream) const#
int64_t getRequiredContextMemorySize() const noexcept#
void setContextMemory(Tensor &memory) const#
inline bool hasSpeculativeDecoder() const noexcept#
inline char const *speculativeDecoderName() const noexcept#
struct DecoderRegistryConfig#

Public Members

std::filesystem::path engineDir#
std::optional<SpecDecodeDraftingConfig> draftingConfig#
cudaStream_t stream = {}#