61 wait_thread(thread_id < 0 || thread_id == 0),
70 asm volatile (
"ld.global.cg.s32 %0, [%1];\n" :
"=r"(
state) :
"l"(lock));
81 void wait(
int status = 0) {
84 while (state != status) {
103 asm volatile (
"st.global.cg.s32 [%0], %1;\n" : :
"l"(
lock),
"r"(status));
Definition: aligned_buffer.h:35
Defines common types used for all GEMM-like operators.
CUTLASS_HOST_DEVICE Semaphore(int *lock_, int thread_id)
Implements a semaphore to wait for a flag to reach a given value.
Definition: semaphore.h:59
CUTLASS_DEVICE void fetch()
Permit fetching the synchronization mechanism early.
Definition: semaphore.h:68
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
Defines a Shape template for matrix tiles.
CUTLASS_DEVICE int get_state() const
Gets the internal state.
Definition: semaphore.h:75
bool wait_thread
Definition: semaphore.h:52
AlignedBuffer is a container for trivially copyable elements suitable for use in unions and shared me...
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Top-level include for all CUTLASS numeric types.
CTA-wide semaphore for inter-CTA synchronization.
Definition: semaphore.h:48
CUTLASS_DEVICE void release(int status=0)
Updates the lock with the given result.
Definition: semaphore.h:98
CUTLASS_DEVICE void wait(int status=0)
Waits until the semaphore is equal to the given value.
Definition: semaphore.h:81
Basic include for CUTLASS.
int state
Definition: semaphore.h:53
int * lock
Definition: semaphore.h:51