38 CUTLASS_DEVICE dim3
swizzle() {
return blockIdx; }
43 assert(OutputTile[0] == 1 && OutputTile[1] == 1);
44 assert((problem_size[0] * problem_size[1] * problem_size[2]) % OutputTile[2] == 0);
46 grid.x = problem_size[0] * problem_size[1] * problem_size[2]
53 assert(SubTile[0] == 1 && SubTile[1] == 1);
57 return threadblock_offset;
Definition: reduction/threadblock_swizzle.h:33
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE DefaultBlockSwizzle()
Ctor.
Definition: reduction/threadblock_swizzle.h:35
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:387
CUTLASS_DEVICE dim3 swizzle()
Swizzle the block index.
Definition: reduction/threadblock_swizzle.h:38
CUTLASS_HOST_DEVICE dim3 get_grid_layout(Coord< 3 > const &problem_size, Coord< 3 > const &OutputTile)
Definition: reduction/threadblock_swizzle.h:41
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_DEVICE Coord< 3 > get_threadblock_offset(Coord< 3 > const &SubTile)
Definition: reduction/threadblock_swizzle.h:52