EAGLE Util Kernels
-
void trt_edgellm::kernel::prepareEaglePrefillInputs(
- rt::Tensor const &sequenceContextLengths,
- rt::Tensor &selectTokenIndices,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::prepareEagleDraftProposalInputs(
- rt::Tensor const &draftTreeMask,
- rt::Tensor const &draftTreeLength,
- rt::Tensor const &sequenceStartIndices,
- rt::Tensor &packedDraftTreeMask,
- rt::Tensor &tensorPositionIndices,
- rt::Tensor &selectTokenIndices,
- rt::Tensor &sequenceContextLengths,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::prepareEagleAcceptDecodeTokenInputs(
- rt::Tensor const &sequenceStartIndices,
- rt::Tensor const &acceptedTokenNums,
- rt::Tensor &packedTreeMask,
- rt::Tensor &tensorPositionIndices,
- rt::Tensor &selectTokenIndices,
- rt::Tensor &sequenceContextLengths,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::prepareEagleBaseTreeDecodingInputs(
- rt::Tensor const &baseTreeDecodingMask,
- rt::Tensor const &sequenceStartIndices,
- rt::Tensor &packedBaseTreeDecodingMask,
- rt::Tensor &tensorPositionIndices,
- rt::Tensor &selectTokenIndices,
- rt::Tensor &sequenceContextLengths,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::eagleBaseCommitKVCacheAndAssembleHiddenState(
- rt::Tensor const &acceptedIndices,
- rt::Tensor const &acceptLengths,
- rt::Tensor const &kvCacheLengths,
- rt::Tensor &kvCacheBuffer,
- rt::Tensor &hiddenState,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::initializeDraftTreeTables(
- rt::Tensor const &selectedIndices,
- rt::Tensor const &logProb,
- rt::Tensor const &rootTokens,
- rt::Tensor const &vocabMappingTable,
- rt::Tensor &draftIdFullTable,
- rt::Tensor &draftScoreFullTable,
- rt::Tensor &draftParentFullTable,
- int32_t const draftTopK,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::assembleInitialDraftTreeInput(
- rt::Tensor const &draftIdFullTable,
- rt::Tensor const &draftHiddenStatesOutput,
- rt::Tensor &inputIds,
- rt::Tensor &draftHiddenStatesInput,
- rt::Tensor &draftTreeLength,
- rt::Tensor &draftTreeMask,
- int32_t const draftTopK,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::assembleDraftTreeInput(
- rt::Tensor const &draftIdTable,
- rt::Tensor const &draftHiddenOutput,
- rt::Tensor const &selectedIndices,
- rt::Tensor &inputIds,
- rt::Tensor &draftHiddenStatesInput,
- rt::Tensor &draftTreeLength,
- rt::Tensor &draftTreeMask,
- int32_t const draftTopK,
- int32_t const round,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::assembleInitialIntermediateData(
- rt::Tensor const &logProbs,
- rt::Tensor &intermediateParents,
- rt::Tensor &intermediateScores,
- int32_t const draftTopK,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::assembleIntermediateData(
- rt::Tensor const &cuLogProbs,
- rt::Tensor const &selectedIndices,
- rt::Tensor &intermediateScores,
- rt::Tensor &intermediateParents,
- int32_t const draftTopK,
- int32_t const round,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::computeCuScoresAndTranslateToken(
- rt::Tensor const &selectedIndices,
- rt::Tensor const &logProbs,
- rt::Tensor const &intermediateScores,
- rt::Tensor const &vocabMappingTable,
- rt::Tensor &draftIdTable,
- rt::Tensor &draftScoreTable,
- int32_t const draftTopK,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::updateDraftTreeFullTables(
- rt::Tensor const &draftIdTable,
- rt::Tensor const &draftScoreTable,
- rt::Tensor const &intermediateParents,
- rt::Tensor &draftIdFullTable,
- rt::Tensor &draftScoreFullTable,
- rt::Tensor &draftParentFullTable,
- int32_t const draftTopK,
- int32_t const round,
- cudaStream_t stream
)
-
void trt_edgellm::kernel::constructVerificationDraftTree(
- rt::Tensor const &draftIdFullTable,
- rt::Tensor const &draftParentFullTable,
- rt::Tensor const &selectedIndices,
- rt::Tensor &inputIds,
- rt::Tensor &draftTreeMask,
- cudaStream_t stream
)