Causal Conv1d#

void mamba_ssm::invokeCausalConv1d(
trt_edgellm::rt::Tensor const &x,
trt_edgellm::rt::Tensor const &weight,
trt_edgellm::rt::OptionalInputTensor bias,
trt_edgellm::rt::Tensor &out,
int32_t stride,
int32_t padding,
int32_t dilation,
cudaStream_t stream
)#

Prefill causal depthwise conv1d.

x: [batch, seq_len, dim] weight: [dim, 1, width] bias: [dim] (optional) out: [batch, out_seq_len, dim]

void mamba_ssm::invokeCausalConv1dDecode(
trt_edgellm::rt::Tensor const &convState,
trt_edgellm::rt::Tensor const &weight,
trt_edgellm::rt::OptionalInputTensor bias,
trt_edgellm::rt::Tensor &out,
cudaStream_t stream
)#

Decode-mode conv1d (dot product).

convState: [batch, dim, width] weight: [dim, 1, width] bias: [dim] (optional) out: [batch, 1, dim]

void mamba_ssm::invokeCaptureConvState(
trt_edgellm::rt::Tensor const &x,
trt_edgellm::rt::Tensor &convState,
cudaStream_t stream
)#

Capture conv state from prefill input.

x: [batch, seqLen, dim] convState: [batch, dim, width] (output, zero-initialized before call)

void mamba_ssm::invokeConvStateShiftInsert(
trt_edgellm::rt::Tensor &convState,
trt_edgellm::rt::Tensor const &newCol,
cudaStream_t stream
)#

Shift conv_state left by 1 and insert new values at position width-1.

convState: [batch, dim, width] (in-place) newCol: [batch, 1, dim] (the new single-token input)