Adding New Operations

See also

The architecture guide provides an overview of the codebase.

Adding a new operation involves:

Let’s implement Top-K:

Implementing The Trace Operation

Trace operations implement the TraceOp interface and are located under nvtripy/trace/ops.

Note

TensorRTOps.td defines the tensorrt dialect. Refer to that file for details on each operation.

 1# nvtripy/trace/ops/top_k.py
 2from dataclasses import dataclass
 3
 4from mlir_tensorrt.compiler.dialects import tensorrt
 5from nvtripy.common import datatype
 6from nvtripy.trace.ops.base import TraceOp
 7
 8
 9@dataclass(repr=False)
10class TopK(TraceOp):
11    # Attributes of the operation are added to the constructor by default.
12    # Use `dataclasses.field(..., init=False)` to avoid that.
13    k: int
14    dim: int
15
16    def infer_rank(self):
17        # Top-K does not change the rank of its input
18        rank = self.inputs[0].rank
19        self.outputs[0].rank = rank
20        self.outputs[1].rank = rank
21
22    def infer_dtypes(self):
23        # First output is top-k values, second is indices
24        self.outputs[0].dtype = self.inputs[0].dtype
25        self.outputs[1].dtype = datatype.int32
26
27    # This is only required if `num_outputs != 1`:
28    def get_num_outputs(self):
29        return 2
30
31    def to_mlir(self, inputs, outputs):
32        # This method should *not* access `self.inputs` or `self.outputs`, only
33        # `inputs` and `outputs`. The former are trace tensors while the latter
34        # are MLIR operands.
35        #
36        # NOTE: If the MLIR API returned only a single tensor, we would need to
37        # wrap it in a list.
38        return tensorrt.top_k(
39            inputs[0], self.k, self.dim, tensorrt.TopKOperationAttr.get("kMAX")
40        )

We can add tests under tests/trace/ops/:

 1# tests/trace/ops/top_k.py
 2import nvtripy as tp
 3
 4from nvtripy.trace.ops.top_k import TopK
 5
 6
 7class TestTopK:
 8    def test_infer_rank(self):
 9        inp = tp.ones((2, 2, 3))
10        values, indices = TopK([inp.trace_tensor], dim=2, k=2).outputs
11        assert values.rank == inp.rank
12        assert indices.rank == inp.rank
13
14    def test_infer_dtypes(self):
15        inp = tp.ones((2, 2, 3))
16        values, indices = TopK([inp.trace_tensor], dim=2, k=2).outputs
17        assert values.dtype == inp.dtype
18        assert indices.dtype == tp.int32

Implementing The Frontend API

Frontend APIs are implemented in nvtripy/frontend/ops.

They should:

  1. Use @export.public_api to export themselves into the nvtripy module. This also:

    • Controls where the API is documented.

    • Enables type checking and function overloading.

  2. Use @wrappers.interface to express data type constraints.

  3. Include documentation, including at least one code example.

 1# nvtripy/frontend/ops/top_k.py
 2from typing import Optional, Tuple
 3
 4from nvtripy import export
 5from nvtripy.trace.ops.top_k import TopK
 6from nvtripy.utils import wrappers
 7from nvtripy.frontend.ops import utils as op_utils
 8
 9
10@export.public_api(document_under="operations/functions")
11@wrappers.interface(
12    dtype_constraints={"input": "T1", wrappers.RETURN_VALUE: ["T1", "T2"]},
13    dtype_variables={
14        "T1": ["float32", "float16", "bfloat16", "int32", "int64"],
15        "T2": ["int32"],
16    },
17)
18def top_k(
19    input: "nvtripy.Tensor", k: int, dim: int
20) -> Tuple["nvtripy.Tensor", "nvtripy.Tensor"]:
21    # See docs/README.md for more information on how to write docstrings
22    """
23    Returns the top-k values in the tensor and their
24    indices along the specified dimension.
25
26    Args:
27        input: The input tensor.
28        k: The number of values to take.
29        dim: The dimension along which to find the top-k values.
30
31    Returns:
32        The top-k values and indices
33
34    .. code-block:: python
35        :linenos:
36
37        inp = tp.iota((1, 5), dim=1)
38        values, indices = tp.top_k(inp, k=2, dim=1)
39
40        assert tp.equal(values, tp.Tensor([[4.0, 3.0]]))
41        assert tp.equal(indices, tp.Tensor([[4, 3]]))
42    """
43    # The `process_dim` helper performs bounds checking and handles
44    # negative dimensions:
45    dim = op_utils.process_dim(dim, input.rank)
46
47    # The variadic arguments to `create_op` should match the attributes
48    # of the trace operation.
49    return op_utils.create_op(TopK, [input], k=k, dim=dim)

We can add tests in tests/frontend/ops/ to test the frontend function, e.g. parameter bounds checking:

 1# tests/frontend/ops/test_top_k.py
 2import nvtripy as tp
 3from tests import helper
 4
 5
 6class TestTopK:
 7    def test_invalid_dim(self):
 8        inp = tp.ones((5, 5))
 9        with helper.raises(
10            tp.TripyException, match="Dimension argument is out of bounds"
11        ):
12            values, indices = tp.top_k(inp, k=1, dim=3)

We can add integration tests in tests/integration/ to test end-to-end functionality and accuracy:

 1# tests/integration/test_top_k.py
 2import nvtripy as tp
 3
 4
 5# When implementing a real operation, we would likely want
 6# more exhaustive testing:
 7def test_top_k():
 8    # TensorRT requires 2 dimensions for top-k:
 9    inp = tp.unsqueeze(tp.arange(5) + 2.0, dim=1)
10    values, indices = tp.top_k(inp, k=1, dim=0)
11
12    # The last value in `arange` will be the largest:
13    assert tp.equal(values, tp.Tensor([[6.0]]))
14    assert tp.equal(indices, tp.Tensor([[4]]))