cuda::experimental::stf::interpreted_execution_policy
Defined in include/cuda/experimental/__stf/internal/interpreted_execution_policy.cuh
-
template<auto... spec>
class interpreted_execution_policy This corresponds to an execution_policy_spec (e.g.
par(con(32))) which we map on an execution place.
The goal of this intermediate class is to take the high-level spec description and compute how the different levels are mapped on the machine. In particular, how levels are mapped to the CUDA hierarchy (threads, blocks, devices).
Public Types
-
using thread_hierarchy_t = thread_hierarchy<spec...>
Public Functions
-
interpreted_execution_policy() = default
-
~interpreted_execution_policy() = default
-
template<typename Fun>
interpreted_execution_policy(const thread_hierarchy_spec<spec...> &p, const exec_place &where, const Fun &f)
-
inline void set_level_mem(int level, size_t size)
-
inline size_t get_level_mem(int level) const
-
inline void set_level_sync(int level, bool sync)
-
inline bool get_level_sync(size_t level) const
-
inline size_t width(int l) const
-
inline const auto &get_levels() const
-
inline ::std::array<size_t, 3> get_config() const
-
inline ::std::array<size_t, 3> get_mem_config() const
-
inline bool need_cooperative_kernel_launch() const
-
inline void set_system_mem(void *addr)
-
inline void *get_system_mem() const
Public Members
-
reserved::cooperative_group_system cg_system
-
class level
Each level of the interpreted policy is a vector which describes how the level is spread across the machine.
For example, a level could be (128 threads), or it could be (4 blocks) x (32 threads). In the latter example, the level is described as a vector of 2 subentries.
-
using thread_hierarchy_t = thread_hierarchy<spec...>