47 template <
typename Fragment>
48 CUTLASS_DEVICE
void dump_fragment(Fragment
const& frag,
int N = 0,
int M = 0,
50 int total_threads = blockDim.x * blockDim.y * blockDim.z;
52 blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
53 int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +
54 (threadIdx.y * blockDim.x) + threadIdx.x;
56 if (N < 0 || N > total_threads) {
57 if (thread_id == 0 && block_id == 0)
58 printf(
"Thread number N = %d should between [1, %d].\n", N,
66 int total_elements = frag.size();
68 if (M < 0 || M > total_elements) {
69 if (thread_id == 0 && block_id == 0)
70 printf(
"Element number M = %d should between [1, %d].\n", M,
78 if (N == 0) N = total_threads;
80 if (M == 0) M = total_elements;
83 if (thread_id == 0 && block_id == 0)
84 printf(
"Stride S = %d should between [1, %d].\n", S, M);
91 if (thread_id == 0 && block_id == 0)
92 printf(
"\n*******************Dumping the fragments*******************\n\n");
95 for (
int tid = 0; tid < N; ++tid) {
96 if (tid == thread_id) {
97 printf(
"TB%d W%d T%d: ", block_id, tid / 32, tid & 31);
99 for (
int i = 0; i < M; i += S) {
100 printf(
"%.0f ",
float(
typename Fragment::value_type(frag[i])));
108 if (thread_id == 0 && block_id == 0)
109 printf(
"\n***********************************************************\n\n");
120 #define SHMEM_ROW_SIZE 128 124 template <
typename Element>
125 CUTLASS_DEVICE
void dump_shmem(Element
const* ptr,
size_t size,
int S = 1) {
127 blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
128 int thread_id = (threadIdx.z * (blockDim.x * blockDim.y)) +
129 (threadIdx.y * blockDim.x) + threadIdx.x;
131 if (ptr ==
nullptr) {
132 if (thread_id == 0 && block_id == 0) printf(
"ptr is null.\n");
139 if (thread_id == 0 && block_id == 0)
140 printf(
"Element size is less than 1\n");
149 if (S < 1 || S > row_elements) {
150 if (thread_id == 0 && block_id == 0)
151 printf(
"Stride S = %d should between [1, %d].\n", S, row_elements);
161 printf(
"\n********Dumping the shared memory of TB %d*******\n\n", block_id);
163 if (thread_id == 0) {
164 for (
int i = 0; i < size; i += row_elements) {
165 for (
int j = 0; j < row_elements; j += S) {
166 printf(
"%.0f ",
float(ptr[i + j]));
174 printf(
"\n***********************************************************\n\n");
Definition: aligned_buffer.h:35
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
CUTLASS_DEVICE void dump_fragment(Fragment const &frag, int N=0, int M=0, int S=1)
Definition: device_dump.h:48
#define CUTLASS_PRAGMA_NO_UNROLL
Definition: cutlass.h:111
#define SHMEM_ROW_SIZE
Definition: device_dump.h:120
Basic include for CUTLASS.
CUTLASS_DEVICE void dump_shmem(Element const *ptr, size_t size, int S=1)
Definition: device_dump.h:125