tcgen05.cp

tcgen05.cp.cta_group::1.128x256b

// tcgen05.cp.cta_group.128x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.128x256b

// tcgen05.cp.cta_group.128x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.4x256b

// tcgen05.cp.cta_group.4x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.4x256b

// tcgen05.cp.cta_group.4x256b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.128x128b

// tcgen05.cp.cta_group.128x128b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.128x128b

// tcgen05.cp.cta_group.128x128b [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.64x128b.warpx2::02_13

// tcgen05.cp.cta_group.64x128b.warpx2::02_13 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.64x128b.warpx2::02_13

// tcgen05.cp.cta_group.64x128b.warpx2::02_13 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.64x128b.warpx2::01_23

// tcgen05.cp.cta_group.64x128b.warpx2::01_23 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.64x128b.warpx2::01_23

// tcgen05.cp.cta_group.64x128b.warpx2::01_23 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.32x128b.warpx4

// tcgen05.cp.cta_group.32x128b.warpx4 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.32x128b.warpx4

// tcgen05.cp.cta_group.32x128b.warpx4 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.128x256b.b8x16.b6x16_p32

// tcgen05.cp.cta_group.128x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32

// tcgen05.cp.cta_group.128x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.4x256b.b8x16.b6x16_p32

// tcgen05.cp.cta_group.4x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32

// tcgen05.cp.cta_group.4x256b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.128x128b.b8x16.b6x16_p32

// tcgen05.cp.cta_group.128x128b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32

// tcgen05.cp.cta_group.128x128b.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b6x16_p32

// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32

// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b6x16_p32

// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32

// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b6x16_p32

// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32

// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b6x16_p32 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b6x16_p32(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.128x256b.b8x16.b4x16_p64

// tcgen05.cp.cta_group.128x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64

// tcgen05.cp.cta_group.128x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x256b_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.4x256b.b8x16.b4x16_p64

// tcgen05.cp.cta_group.4x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64

// tcgen05.cp.cta_group.4x256b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_4x256b_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.128x128b.b8x16.b4x16_p64

// tcgen05.cp.cta_group.128x128b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64

// tcgen05.cp.cta_group.128x128b.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_128x128b_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b4x16_p64

// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64

// tcgen05.cp.cta_group.64x128b.warpx2::02_13.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_02_13_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b4x16_p64

// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64

// tcgen05.cp.cta_group.64x128b.warpx2::01_23.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_64x128b_warpx2_01_23_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b4x16_p64

// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);

tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64

// tcgen05.cp.cta_group.32x128b.warpx4.b8x16.b4x16_p64 [taddr], s_desc; // PTX ISA 86, SM_100a, SM_101a
// .cta_group = { .cta_group::1, .cta_group::2 }
template <cuda::ptx::dot_cta_group Cta_Group>
__device__ static inline void tcgen05_cp_32x128b_warpx4_b8x16_b4x16_p64(
  cuda::ptx::cta_group_t<Cta_Group> cta_group,
  uint32_t taddr,
  uint64_t s_desc);