tcgen05.cp
PTX ISA: tcgen05.cp
tcgen05.cp.cta_group::1.128x256b
// tcgen05.cp.cta_group.128x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.128x256b
// tcgen05.cp.cta_group.128x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.4x256b
// tcgen05.cp.cta_group.4x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.4x256b
// tcgen05.cp.cta_group.4x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.128x128b
// tcgen05.cp.cta_group.128x128b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.128x128b
// tcgen05.cp.cta_group.128x128b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.64x128b.warpx2::02_13
// tcgen05.cp.cta_group.64x128b.warpx2::02_13 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.64x128b.warpx2::02_13
// tcgen05.cp.cta_group.64x128b.warpx2::02_13 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.64x128b.warpx2::01_23
// tcgen05.cp.cta_group.64x128b.warpx2::01_23 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.64x128b.warpx2::01_23
// tcgen05.cp.cta_group.64x128b.warpx2::01_23 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.32x128b.warpx4
// tcgen05.cp.cta_group.32x128b.warpx4 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.32x128b.warpx4
// tcgen05.cp.cta_group.32x128b.warpx4 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.128x256b.b8x16.b6x16_p32
// tcgen05.cp.cta_group.128x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32
// tcgen05.cp.cta_group.128x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.4x256b.b8x16.b6x16_p32
// tcgen05.cp.cta_group.4x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32
// tcgen05.cp.cta_group.4x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.128x128b.b8x16.b6x16_p32
// tcgen05.cp.cta_group.128x128b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32
// tcgen05.cp.cta_group.128x128b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b6x16_p32
// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32
// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b6x16_p32
// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32
// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b6x16_p32
// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32
// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b6x16_p32(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.128x256b.b8x16.b4x16_p64
// tcgen05.cp.cta_group.128x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64
// tcgen05.cp.cta_group.128x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.4x256b.b8x16.b4x16_p64
// tcgen05.cp.cta_group.4x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64
// tcgen05.cp.cta_group.4x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.128x128b.b8x16.b4x16_p64
// tcgen05.cp.cta_group.128x128b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64
// tcgen05.cp.cta_group.128x128b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b4x16_p64
// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64
// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b4x16_p64
// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64
// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b4x16_p64
// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);
tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64
// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b4x16_p64(
cuda::ptx::cta_group_t<Cta_Group> cta_group,
uint32_t taddr,
uint64_t s_desc);