EAGLE Decoder#
-
class EagleDecoder : public trt_edgellm::rt::DecodingStrategy#
Public Functions
- EagleDecoder(
- DecodingRuntimeContext &runtime,
- std::filesystem::path const &engineDir,
- SpecDecodeDraftingConfig const &draftingConfig,
- cudaStream_t stream
-
inline virtual DecodingStrategyKind kind() const noexcept override#
-
inline virtual char const *name() const noexcept override#
-
inline virtual bool isSpeculative() const noexcept override#
- virtual char const *unsupportedReason(
- LLMGenerationRequest const &request
Check whether this strategy can handle the given request.
- Returns:
nullptr if supported; a human-readable reason string if not.
-
virtual bool decodeStep(DecodingInferenceContext &context) override#
-
virtual bool captureCudaGraphs(cudaStream_t stream) override#
- virtual int64_t getRequiredContextMemorySize(
- virtual bool hasSystemPromptKVCache(
- SystemPromptCacheKey const &key
- virtual void restoreSystemPromptKVCache(
- SystemPromptCacheKey const &key,
- int32_t batchIdx,
- cudaStream_t stream
- virtual bool runSystemPromptPrefill(
- DecodingInferenceContext &context
- virtual void saveSystemPromptKVCache(
- SystemPromptCacheKey const &key,
- std::string const &prompt,
- std::vector<tokenizer::Rank> const &tokenizedPrompt,
- int32_t promptIdsLength,
- cudaStream_t stream