Vanilla Decoder#

class VanillaDecoder : public trt_edgellm::rt::DecodingStrategy #

Public Functions

explicit VanillaDecoder(DecodingRuntimeContext &runtime)#

inline virtual DecodingStrategyKind kind() const noexcept override#

inline virtual char const *name() const noexcept override#

inline virtual bool isSpeculative() const noexcept override#

inline virtual char const *unsupportedReason( LLMGenerationRequest const& ) const noexcept override#

Check whether this strategy can handle the given request.

Returns:: nullptr if supported; a human-readable reason string if not.

virtual bool decodeStep(DecodingInferenceContext &context) override#

virtual bool captureCudaGraphs(cudaStream_t stream) override#

inline virtual int64_t getRequiredContextMemorySize( ) const noexcept override#

inline virtual void setContextMemory(Tensor&) override#

inline virtual bool hasSystemPromptKVCache( SystemPromptCacheKey const& ) const override#

inline virtual void restoreSystemPromptKVCache( SystemPromptCacheKey const&, int32_t, cudaStream_t ) override#

inline virtual bool runSystemPromptPrefill( DecodingInferenceContext& ) override#

inline virtual void saveSystemPromptKVCache( SystemPromptCacheKey const&, std::string const&, std::vector<tokenizer::Rank> const&, int32_t, cudaStream_t ) override#

inline virtual void resetForNewSequences( Tensor&, cudaStream_t ) override#

inline virtual void onBatchEvict( std::vector<int32_t> const&, int32_t, int32_t, Tensor&, cudaStream_t ) override#