Defined in include/cuda/experimental/__stf/stream/stream_ctx.cuh
class stream_ctx : public cuda::experimental::stf::backend_ctx<stream_ctx>
This class describes a CUDASTF execution context where CUDA streams and CUDA events are used as synchronization primitives.
This class is copyable, movable, and can be passed by value
Unnamed Group
inline stream_ctx(async_resources_handle handle = async_resources_handle(nullptr))
This type is copyable, assignable, and movable. However, copies have reference semantics.
inline stream_ctx(cudaStream_t user_stream, async_resources_handle handle = async_resources_handle(nullptr))
This type is copyable, assignable, and movable. However, copies have reference semantics.
Public Types
using task_type = stream_task<>
template<typename T>
using data_interface = typename streamed_interface_of<T>::type Definition for the underlying implementation of
- Template Parameters
T –
Public Functions
inline void set_user_stream(cudaStream_t user_stream)
inline ::std::string to_string() const
template<typename ...Deps>
inline stream_task<Deps...> task(exec_place e_place, task_dep<Deps>... deps) Creates a task on the specified execution place.
template<typename ...Deps>
inline deferred_stream_task<Deps...> deferred_task(exec_place e_place, task_dep<Deps>... deps)
template<typename ...Deps>
inline deferred_stream_task<Deps...> deferred_task(task_dep<Deps>... deps)
inline cudaStream_t task_fence()
inline void finalize()
inline float get_submission_time_ms() const
inline void submit()
inline void change_epoch()
template<typename S, typename ...Deps>
inline auto deferred_parallel_for(exec_place e_place, S shape, task_dep<Deps>... deps)
template<typename S, typename ...Deps>
inline auto deferred_parallel_for(S shape, task_dep<Deps>... deps)
template<typename T>
inline auto wait(cuda::experimental::stf::logical_data<T> &ldata)
inline cuda::experimental::stf::logical_data<T> logical_data(shape_of<T> shape)
Returns a
object with the given shape, tied to this graph.Initial data place is invalid.
- Template Parameters
T – Underlying type for the logical data object
- Parameters
shape – shape of the created object
- Returns
usable with this graph
inline auto logical_data(T prototype, data_place dplace = data_place::host)
inline auto logical_data(T (&array)[n], data_place dplace = data_place::host)
inline auto logical_data(size_t elements, Sizes... more_sizes)
inline auto logical_data(T *p, size_t n, data_place dplace = data_place::host)
inline auto logical_token()
inline frozen_logical_data<T> freeze(cuda::experimental::stf::logical_data<T> d, access_mode m = access_mode::read, data_place where = data_place::invalid)
inline auto task(task_dep<Deps>... deps)
Creates a typed task on the current CUDA device.
- Returns
An instantiation of
with the appropriate arguments, suitable for use withoperator->*
inline auto host_launch(task_dep<Deps>... deps)
Creates an object able to launch a lambda function on the host.
- Template Parameters
Deps – Dependency types
- Parameters
deps – dependencies
- Returns
ready for the->*
inline auto launch(thread_hierarchy_spec_t spec, exec_place e_place, task_dep<Deps>... deps)
inline auto launch(exec_place_host, task_dep<Deps>... deps)
inline auto launch(exec_place e_place, task_dep<Deps>... deps)
inline auto repeat(size_t count)
inline auto repeat(::std::function<bool()> condition)
inline auto parallel_for(exec_place e_place, S shape, Deps... deps)
inline auto parallel_for(partitioner_t, exec_place e_place, S shape, Deps... deps)
auto parallel_for(exec_place_grid e_place, S shape, Deps... deps) = delete
inline auto parallel_for(partitioner_t p, exec_place_grid e_place, S shape, Deps... deps)
inline explicit operator bool() const
inline bool operator==(const backend_ctx_untyped &rhs) const
inline bool operator!=(const backend_ctx_untyped &rhs) const
inline async_resources_handle &async_resources() const
inline auto &get_stack()
inline bool reordering_tasks() const
inline auto &get_composite_cache()
inline ::std::pair<exec_place, bool> schedule_task(const task &t) const
inline void reorder_tasks(::std::vector<int> &tasks, ::std::unordered_map<int, reserved::reorderer_payload> &task_map)
inline void increment_task_count()
inline size_t task_count() const
inline void set_allocator(block_allocator_untyped custom)
inline void set_uncached_allocator(block_allocator_untyped custom)
inline auto &get_allocator()
inline const auto &get_allocator() const
inline auto &get_default_allocator()
inline auto &get_uncached_allocator()
inline void update_uncached_allocator(block_allocator_untyped uncached_allocator)
inline void attach_allocator(block_allocator_untyped a)
inline void add_transfer(const data_place &src_node, const data_place &dst_node, size_t s)
inline bool generate_event_symbols() const
inline cudaGraph_t graph() const
inline event_list stream_to_event_list(cudaStream_t stream, ::std::string event_symbol) const
inline size_t epoch() const
inline impl &get_state()
inline const impl &get_state() const
inline const auto &get_dot() const
inline auto &get_dot()
template<typename parent_ctx_t>
inline void set_parent_ctx(parent_ctx_t &parent_ctx)
inline void dot_push_section(::std::string symbol) const
inline void dot_pop_section() const
inline auto dot_section(::std::string symbol) const
inline auto get_phase() const
inline void set_phase(backend_ctx_untyped::phase p)
inline bool has_start_events() const
inline const event_list &get_start_events() const
inline void pop_affinity() const
inline const ::std::vector<::std::shared_ptr<exec_place>> ¤t_affinity() const
inline const exec_place ¤t_exec_place() const
inline bool has_affinity() const
inline exec_place default_exec_place() const
inline auto pick_dstream()
inline cudaStream_t pick_stream()
Public Members
bool blocking_finalize = true
template<typename ...Data>
class deferred_host_launch_scope : public cuda::experimental::stf::deferred_stream_task<> Set the symbol of the task. This is used for profiling and debugging.
- param s
- return
inline deferred_host_launch_scope &set_symbol(::std::string s) &
inline deferred_host_launch_scope &&set_symbol(::std::string s) &&
template<typename shape_t, typename P, typename ...Data>
class deferred_parallel_for_scope : public cuda::experimental::stf::deferred_stream_task<> Set the symbol of the task. This is used for profiling and debugging.
- param s
- return
inline deferred_parallel_for_scope &set_symbol(::std::string s) &
inline deferred_parallel_for_scope &&set_symbol(::std::string s) &&
Public Functions
inline deferred_parallel_for_scope(stream_ctx &ctx, exec_place e_place, shape_t shape, task_dep<Data>... deps)
inline void populate_deps_scheduling_info()
inline stream_ctx(async_resources_handle handle = async_resources_handle(nullptr))