transpose

transpose#

Transpose an operator into a tensor. This is equivalent to permuting the dimensions of an operator in reverse order. Using transpose() potentially allows for higher performance than calling permute() since it’s not lazily evaluated and can use an optimized implementation.

template<typename T>
__MATX_INLINE__ auto matx::transpose(const T &op)#

Operator to transpose the dimensions of a tensor or operator.

The each dimension must appear in the dims array once.

This operator can appear as an rvalue or lvalue.

Template Parameters:

T – Input operator/tensor type

Parameters:

op – Input operator

Returns:

transposed operator

Examples#

// ExecType is an executor type (e.g. matx::cudaExecutor for executing on the GPU).
ExecType exec{};

const index_t m = 3;
const index_t n = 5;
const index_t p = 7;

// TestType is the tensor data type
tensor_t<TestType, 3> t3  ({m,n,p});
tensor_t<TestType, 3> t3t ({p,n,m});
tensor_t<TestType, 3> t3tm({m,p,n});

for (index_t i = 0; i < m; i++) {
  for (index_t j = 0; j < n; j++) {
    for (index_t k = 0; k < p; k++) {
      t3(i, j, k) = static_cast<detail::value_promote_t<TestType>>(i*n*p + j*p + k);
    }
  }
}

(t3t = transpose(t3)).run(exec);
(t3tm = transpose_matrix(t3)).run(exec);

exec.sync();
if constexpr (std::is_same_v<ExecType,cudaExecutor>) {
  ASSERT_EQ(cudaGetLastError(), cudaSuccess);
}

for (index_t i = 0; i < m; i++) {
  for (index_t j = 0; j < n; j++) {
    for (index_t k = 0; k < p; k++) {
      // transpose() permutes all dimensions whereas transpose_matrix() only permutes the
      // last two dimensions.
      EXPECT_EQ(t3(i,j,k), t3t(k,j,i));
      EXPECT_EQ(t3(i,j,k), t3tm(i,k,j));
    }
  }
}