EAGLE Util Kernels#

void trt_edgellm::kernel::prepareEaglePrefillInputs(
rt::Tensor const &sequenceContextLengths,
rt::Tensor &selectTokenIndices,
cudaStream_t stream
)#
void trt_edgellm::kernel::prepareEagleDraftProposalInputs(
rt::Tensor const &draftTreeMask,
rt::Tensor const &draftTreeLength,
rt::Tensor const &sequenceStartIndices,
rt::Tensor &packedDraftTreeMask,
rt::Tensor &tensorPositionIndices,
rt::Tensor &selectTokenIndices,
rt::Tensor &sequenceContextLengths,
cudaStream_t stream
)#
void trt_edgellm::kernel::prepareEagleAcceptDecodeTokenInputs(
rt::Tensor const &sequenceStartIndices,
rt::Tensor const &acceptedTokenNums,
rt::Tensor &packedTreeMask,
rt::Tensor &tensorPositionIndices,
rt::Tensor &selectTokenIndices,
rt::Tensor &sequenceContextLengths,
cudaStream_t stream
)#
void trt_edgellm::kernel::prepareEagleBaseTreeDecodingInputs(
rt::Tensor const &baseTreeDecodingMask,
rt::Tensor const &sequenceStartIndices,
rt::Tensor &packedBaseTreeDecodingMask,
rt::Tensor &tensorPositionIndices,
rt::Tensor &selectTokenIndices,
rt::Tensor &sequenceContextLengths,
cudaStream_t stream
)#
void trt_edgellm::kernel::eagleBaseCommitKVCacheAndAssembleHiddenState(
rt::Tensor const &acceptedIndices,
rt::Tensor const &acceptLengths,
rt::Tensor const &kvCacheLengths,
rt::Tensor &kvCacheBuffer,
rt::Tensor &hiddenState,
cudaStream_t stream
)#
void trt_edgellm::kernel::initializeDraftTreeTables(
rt::Tensor const &selectedIndices,
rt::Tensor const &logProb,
rt::Tensor const &rootTokens,
rt::Tensor const &vocabMappingTable,
rt::Tensor &draftIdFullTable,
rt::Tensor &draftScoreFullTable,
rt::Tensor &draftParentFullTable,
int32_t const draftTopK,
cudaStream_t stream
)#
void trt_edgellm::kernel::assembleInitialDraftTreeInput(
rt::Tensor const &draftIdFullTable,
rt::Tensor const &draftHiddenStatesOutput,
rt::Tensor &inputIds,
rt::Tensor &draftHiddenStatesInput,
rt::Tensor &draftTreeLength,
rt::Tensor &draftTreeMask,
int32_t const draftTopK,
cudaStream_t stream
)#
void trt_edgellm::kernel::assembleDraftTreeInput(
rt::Tensor const &draftIdTable,
rt::Tensor const &draftHiddenOutput,
rt::Tensor const &selectedIndices,
rt::Tensor &inputIds,
rt::Tensor &draftHiddenStatesInput,
rt::Tensor &draftTreeLength,
rt::Tensor &draftTreeMask,
int32_t const draftTopK,
int32_t const round,
cudaStream_t stream
)#
void trt_edgellm::kernel::assembleInitialIntermediateData(
rt::Tensor const &logProbs,
rt::Tensor &intermediateParents,
rt::Tensor &intermediateScores,
int32_t const draftTopK,
cudaStream_t stream
)#
void trt_edgellm::kernel::assembleIntermediateData(
rt::Tensor const &cuLogProbs,
rt::Tensor const &selectedIndices,
rt::Tensor &intermediateScores,
rt::Tensor &intermediateParents,
int32_t const draftTopK,
int32_t const round,
cudaStream_t stream
)#
void trt_edgellm::kernel::computeCuScoresAndTranslateToken(
rt::Tensor const &selectedIndices,
rt::Tensor const &logProbs,
rt::Tensor const &intermediateScores,
rt::Tensor const &vocabMappingTable,
rt::Tensor &draftIdTable,
rt::Tensor &draftScoreTable,
int32_t const draftTopK,
cudaStream_t stream
)#
void trt_edgellm::kernel::updateDraftTreeFullTables(
rt::Tensor const &draftIdTable,
rt::Tensor const &draftScoreTable,
rt::Tensor const &intermediateParents,
rt::Tensor &draftIdFullTable,
rt::Tensor &draftScoreFullTable,
rt::Tensor &draftParentFullTable,
int32_t const draftTopK,
int32_t const round,
cudaStream_t stream
)#
void trt_edgellm::kernel::constructVerificationDraftTree(
rt::Tensor const &draftIdFullTable,
rt::Tensor const &draftParentFullTable,
rt::Tensor const &selectedIndices,
rt::Tensor &inputIds,
rt::Tensor &draftTreeMask,
cudaStream_t stream
)#