tcgen05.st

tcgen05.st.sync.aligned.16x64b.x1.b32

// tcgen05.st.sync.aligned.16x64b.x1.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[1]);

tcgen05.st.sync.aligned.16x64b.x1.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x1.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[1]);

tcgen05.st.sync.aligned.16x64b.x2.b32

// tcgen05.st.sync.aligned.16x64b.x2.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.16x64b.x2.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x2.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.16x64b.x4.b32

// tcgen05.st.sync.aligned.16x64b.x4.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x64b.x4.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x4.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x64b.x8.b32

// tcgen05.st.sync.aligned.16x64b.x8.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x64b.x8.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x8.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x64b.x16.b32

// tcgen05.st.sync.aligned.16x64b.x16.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x64b.x16.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x16.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x64b.x32.b32

// tcgen05.st.sync.aligned.16x64b.x32.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x64b.x32.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x32.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x64b.x64.b32

// tcgen05.st.sync.aligned.16x64b.x64.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x64b.x64.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x64.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x64b.x128.b32

// tcgen05.st.sync.aligned.16x64b.x128.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x64b.x128.unpack::16b.b32

// tcgen05.st.sync.aligned.16x64b.x128.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x64b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x128b.x1.b32

// tcgen05.st.sync.aligned.16x128b.x1.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.16x128b.x1.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x1.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.16x128b.x2.b32

// tcgen05.st.sync.aligned.16x128b.x2.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x128b.x2.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x2.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x128b.x4.b32

// tcgen05.st.sync.aligned.16x128b.x4.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x128b.x4.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x4.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x128b.x8.b32

// tcgen05.st.sync.aligned.16x128b.x8.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x128b.x8.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x8.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x128b.x16.b32

// tcgen05.st.sync.aligned.16x128b.x16.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x128b.x16.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x16.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x128b.x32.b32

// tcgen05.st.sync.aligned.16x128b.x32.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x128b.x32.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x32.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x128b.x64.b32

// tcgen05.st.sync.aligned.16x128b.x64.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x128b.x64.unpack::16b.b32

// tcgen05.st.sync.aligned.16x128b.x64.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x128b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x256b.x1.b32

// tcgen05.st.sync.aligned.16x256b.x1.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x256b.x1.unpack::16b.b32

// tcgen05.st.sync.aligned.16x256b.x1.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x256b.x2.b32

// tcgen05.st.sync.aligned.16x256b.x2.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x256b.x2.unpack::16b.b32

// tcgen05.st.sync.aligned.16x256b.x2.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x256b.x4.b32

// tcgen05.st.sync.aligned.16x256b.x4.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x256b.x4.unpack::16b.b32

// tcgen05.st.sync.aligned.16x256b.x4.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x256b.x8.b32

// tcgen05.st.sync.aligned.16x256b.x8.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x256b.x8.unpack::16b.b32

// tcgen05.st.sync.aligned.16x256b.x8.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x256b.x16.b32

// tcgen05.st.sync.aligned.16x256b.x16.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x256b.x16.unpack::16b.b32

// tcgen05.st.sync.aligned.16x256b.x16.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x256b.x32.b32

// tcgen05.st.sync.aligned.16x256b.x32.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x256b.x32.unpack::16b.b32

// tcgen05.st.sync.aligned.16x256b.x32.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x256b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.32x32b.x1.b32

// tcgen05.st.sync.aligned.32x32b.x1.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[1]);

tcgen05.st.sync.aligned.32x32b.x1.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x1.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[1]);

tcgen05.st.sync.aligned.32x32b.x2.b32

// tcgen05.st.sync.aligned.32x32b.x2.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.32x32b.x2.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x2.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.32x32b.x4.b32

// tcgen05.st.sync.aligned.32x32b.x4.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.32x32b.x4.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x4.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.32x32b.x8.b32

// tcgen05.st.sync.aligned.32x32b.x8.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.32x32b.x8.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x8.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.32x32b.x16.b32

// tcgen05.st.sync.aligned.32x32b.x16.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.32x32b.x16.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x16.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.32x32b.x32.b32

// tcgen05.st.sync.aligned.32x32b.x32.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.32x32b.x32.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x32.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.32x32b.x64.b32

// tcgen05.st.sync.aligned.32x32b.x64.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.32x32b.x64.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x64.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.32x32b.x128.b32

// tcgen05.st.sync.aligned.32x32b.x128.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.32x32b.x128.unpack::16b.b32

// tcgen05.st.sync.aligned.32x32b.x128.unpack::16b.b32 [taddr], values; // PTX ISA 86, SM_100a, SM_101a
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_32x32b_unpack_16b(
  uint32_t taddr,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x32bx2.x1.b32

// tcgen05.st.sync.aligned.16x32bx2.x1.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[1]);

tcgen05.st.sync.aligned.16x32bx2.x1.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x1.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[1]);

tcgen05.st.sync.aligned.16x32bx2.x2.b32

// tcgen05.st.sync.aligned.16x32bx2.x2.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.16x32bx2.x2.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x2.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[2]);

tcgen05.st.sync.aligned.16x32bx2.x4.b32

// tcgen05.st.sync.aligned.16x32bx2.x4.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x32bx2.x4.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x4.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[4]);

tcgen05.st.sync.aligned.16x32bx2.x8.b32

// tcgen05.st.sync.aligned.16x32bx2.x8.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x32bx2.x8.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x8.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[8]);

tcgen05.st.sync.aligned.16x32bx2.x16.b32

// tcgen05.st.sync.aligned.16x32bx2.x16.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x32bx2.x16.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x16.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[16]);

tcgen05.st.sync.aligned.16x32bx2.x32.b32

// tcgen05.st.sync.aligned.16x32bx2.x32.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x32bx2.x32.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x32.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[32]);

tcgen05.st.sync.aligned.16x32bx2.x64.b32

// tcgen05.st.sync.aligned.16x32bx2.x64.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x32bx2.x64.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x64.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[64]);

tcgen05.st.sync.aligned.16x32bx2.x128.b32

// tcgen05.st.sync.aligned.16x32bx2.x128.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[128]);

tcgen05.st.sync.aligned.16x32bx2.x128.unpack::16b.b32

// tcgen05.st.sync.aligned.16x32bx2.x128.unpack::16b.b32 [taddr], immHalfSplitoff, values; // PTX ISA 86, SM_100a, SM_101a
template <int N32, typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline void tcgen05_st_16x32bx2_unpack_16b(
  uint32_t taddr,
  cuda::ptx::n32_t<N32> immHalfSplitoff,
  const B32 (&values)[128]);