Vanilla Decoder#

class VanillaDecoder : public trt_edgellm::rt::DecodingStrategy#

Public Functions

explicit VanillaDecoder(DecodingRuntimeContext &runtime)#
inline virtual DecodingStrategyKind kind() const noexcept override#
inline virtual char const *name() const noexcept override#
inline virtual bool isSpeculative() const noexcept override#
inline virtual char const *unsupportedReason(
LLMGenerationRequest const&
) const noexcept override#

Check whether this strategy can handle the given request.

Returns:

nullptr if supported; a human-readable reason string if not.

virtual bool decodeStep(DecodingInferenceContext &context) override#
virtual bool captureCudaGraphs(cudaStream_t stream) override#
inline virtual int64_t getRequiredContextMemorySize(
) const noexcept override#
inline virtual void setContextMemory(Tensor&) override#
inline virtual bool hasSystemPromptKVCache(
SystemPromptCacheKey const&
) const override#
inline virtual void restoreSystemPromptKVCache(
SystemPromptCacheKey const&,
int32_t,
cudaStream_t
) override#
inline virtual bool runSystemPromptPrefill(
DecodingInferenceContext&
) override#
inline virtual void saveSystemPromptKVCache(
SystemPromptCacheKey const&,
std::string const&,
std::vector<tokenizer::Rank> const&,
int32_t,
cudaStream_t
) override#
inline virtual void resetForNewSequences(
Tensor&,
cudaStream_t
) override#
inline virtual void onBatchEvict(
std::vector<int32_t> const&,
int32_t,
int32_t,
Tensor&,
cudaStream_t
) override#