cuda::experimental::stf::thread_hierarchy
Defined in include/cuda/experimental/__stf/internal/interpreted_execution_policy.cuh
-
template<auto... spec>
class thread_hierarchy This describes a hierarchy of threads used to implement a launch construct.
It corresponds to a thread_hierarchy_spec which was mapped on the execution place, and provides simple mechanisms at different levels in the hierarchy :
getting the rank and the size of the calling thread in the hierarchy
synchronizing all threads in a specific level
getting a local storage attached to a specific level.
This class is intended to be passed by value from the host to CUDA kernels so it does not contain pointers or indirection (except in the implementation of system-wide barriers)
Public Functions
-
thread_hierarchy() = default
-
template<bool outer_sync, size_t outer_width>
inline thread_hierarchy(const thread_hierarchy<outer_sync, outer_width, spec...> &outer)
-
inline thread_hierarchy(int devid, interpreted_execution_policy<spec...> &p)
This takes an interpreted_execution_policy which is the mapping of a spec on the hardware, and generates a thread_hierarchy object that can be passed to kernels as an argument.
-
inline const ::std::array<size_t, 3> &get_config() const
-
inline size_t rank(int level, int root_level) const
-
inline size_t size(int level, int root_level) const
-
inline size_t size(int level = int(depth) - 1) const
-
inline size_t rank(int level = int(depth) - 1) const
-
inline void sync(int level = 0)
-
template<typename T, typename ...Others>
inline auto remove_first_tuple_element(const ::std::tuple<T, Others...> &t)
-
template<typename shape_t, typename P, typename ...sub_partitions>
inline auto apply_partition(const shape_t &s, const ::std::tuple<P, sub_partitions...> &t) const
-
inline auto inner() const
Get the inner thread hierarchy (starting one level down)
- Returns
thread_hierarchy
instantiated withspec
sans the first two arguments
-
inline void set_device_tmp(void *addr)
-
inline void set_system_tmp(void *addr)
-
inline void set_devid(int d)
Public Static Functions
-
static inline constexpr size_t static_width(size_t level)
Get the statically-specified width at a specific level.
- Parameters
level – The level
- Returns
The width (0 if width is dynamic)
Friends
- friend class thread_hierarchy