cuda::experimental::stf::thread_hierarchy#

template<auto... spec>
class thread_hierarchy#

This describes a hierarchy of threads used to implement a launch construct.

It corresponds to a thread_hierarchy_spec which was mapped on the execution place, and provides simple mechanisms at different levels in the hierarchy :

  • getting the rank and the size of the calling thread in the hierarchy

  • synchronizing all threads in a specific level

  • getting a local storage attached to a specific level.

This class is intended to be passed by value from the host to CUDA kernels so it does not contain pointers or indirection (except in the implementation of system-wide barriers)

Public Functions

thread_hierarchy() = default#
template<bool outer_sync, size_t outer_width>
inline thread_hierarchy(
const thread_hierarchy<outer_sync, outer_width, spec...> &outer,
)#
inline thread_hierarchy(
int devid,
interpreted_execution_policy<spec...> &p,
)#

This takes an interpreted_execution_policy which is the mapping of a spec on the hardware, and generates a thread_hierarchy object that can be passed to kernels as an argument.

inline const ::std::array<size_t, 3> &get_config() const#
inline size_t rank(int level, int root_level) const#
inline size_t size(int level, int root_level) const#
inline size_t size(int level = int(depth) - 1) const#
inline size_t rank(int level = int(depth) - 1) const#
inline void sync(int level = 0)#
template<typename T, typename ...Others>
inline auto remove_first_tuple_element(
const ::std::tuple<T, Others...> &t,
)#
template<typename shape_t, typename P, typename ...sub_partitions>
inline auto apply_partition(
const shape_t &s,
const ::std::tuple<P, sub_partitions...> &t,
) const#
template<typename shape_t>
inline auto apply_partition(
const shape_t &s,
) const#
inline auto inner() const#

Get the inner thread hierarchy (starting one level down)

Returns:

thread_hierarchy instantiated with spec sans the first two arguments

template<typename T>
inline slice<T> storage(int level)#
inline void set_device_tmp(void *addr)#
inline void set_system_tmp(void *addr)#
inline void set_devid(int d)#

Public Static Functions

static inline constexpr size_t static_width(size_t level)#

Get the statically-specified width at a specific level.

Parameters:

level – The level

Returns:

The width (0 if width is dynamic)

Friends

friend class thread_hierarchy