Vanilla Decoder#
-
class VanillaDecoder : public trt_edgellm::rt::DecodingStrategy#
Public Functions
-
explicit VanillaDecoder(DecodingRuntimeContext &runtime)#
-
inline virtual DecodingStrategyKind kind() const noexcept override#
-
inline virtual char const *name() const noexcept override#
-
inline virtual bool isSpeculative() const noexcept override#
- inline virtual char const *unsupportedReason(
- LLMGenerationRequest const&
Check whether this strategy can handle the given request.
- Returns:
nullptr if supported; a human-readable reason string if not.
-
virtual bool decodeStep(DecodingInferenceContext &context) override#
-
virtual bool captureCudaGraphs(cudaStream_t stream) override#
- inline virtual int64_t getRequiredContextMemorySize(
- inline virtual bool hasSystemPromptKVCache(
- SystemPromptCacheKey const&
- inline virtual void restoreSystemPromptKVCache(
- SystemPromptCacheKey const&,
- int32_t,
- cudaStream_t
- inline virtual bool runSystemPromptPrefill( ) override#
- inline virtual void saveSystemPromptKVCache(
- SystemPromptCacheKey const&,
- std::string const&,
- std::vector<tokenizer::Rank> const&,
- int32_t,
- cudaStream_t
-
explicit VanillaDecoder(DecodingRuntimeContext &runtime)#