CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Member Functions | List of all members
cutlass::gemm::threadblock::GemvBatchedStridedThreadblockDefaultSwizzle Struct Reference

Threadblock swizzling function for batched GEMVs.

#include <threadblock_swizzle.h>

Public Member Functions

CUTLASS_HOST_DEVICE BatchedGemmCoord get_tiled_shape (BatchedGemmCoord problem_size, BatchedGemmCoord tile_size) const
 Returns the shape of the problem in units of logical tiles. More...
 
CUTLASS_HOST_DEVICE dim3 get_grid_shape (BatchedGemmCoord tiled_shape) const
 Computes CUDA grid dimensions given a size in units of logical tiles. More...
 
CUTLASS_DEVICE BatchedGemmCoord get_tile_offset () const
 Obtains the threadblock offset (in units of threadblock-scoped tiles) More...
 
CUTLASS_DEVICE int get_batch_tile_idx () const
 Gets the batch tile index. More...
 
CUTLASS_DEVICE int get_batch_idx () const
 Gets the absolute batch index. More...
 

Member Function Documentation

CUTLASS_DEVICE int cutlass::gemm::threadblock::GemvBatchedStridedThreadblockDefaultSwizzle::get_batch_idx ( ) const
inline
CUTLASS_DEVICE int cutlass::gemm::threadblock::GemvBatchedStridedThreadblockDefaultSwizzle::get_batch_tile_idx ( ) const
inline
CUTLASS_HOST_DEVICE dim3 cutlass::gemm::threadblock::GemvBatchedStridedThreadblockDefaultSwizzle::get_grid_shape ( BatchedGemmCoord  tiled_shape) const
inline
CUTLASS_DEVICE BatchedGemmCoord cutlass::gemm::threadblock::GemvBatchedStridedThreadblockDefaultSwizzle::get_tile_offset ( ) const
inline
CUTLASS_HOST_DEVICE BatchedGemmCoord cutlass::gemm::threadblock::GemvBatchedStridedThreadblockDefaultSwizzle::get_tiled_shape ( BatchedGemmCoord  problem_size,
BatchedGemmCoord  tile_size 
) const
inline

The documentation for this struct was generated from the following file: