cuda::experimental::stf::stream_ctx

Defined in include/cuda/experimental/__stf/stream/stream_ctx.cuh

class stream_ctx : public cuda::experimental::stf::backend_ctx<stream_ctx>

This class describes a CUDASTF execution context where CUDA streams and CUDA events are used as synchronization primitives.

This class is copyable, movable, and can be passed by value

Unnamed Group

inline stream_ctx(async_resources_handle handle = async_resources_handle(nullptr))

This type is copyable, assignable, and movable. However, copies have reference semantics.

inline stream_ctx(cudaStream_t user_stream, async_resources_handle handle = async_resources_handle(nullptr))

This type is copyable, assignable, and movable. However, copies have reference semantics.

Public Types

using task_type = stream_task<>
template<typename T>
using data_interface = typename streamed_interface_of<T>::type

Definition for the underlying implementation of data_interface<T>

Template Parameters

T

enum class phase

current context status

We keep track of the status of context so that we do not make API calls at an inappropriate time, such as synchronizing twice.

Values:

enumerator setup
enumerator submitted
enumerator finalized

Public Functions

inline void set_user_stream(cudaStream_t user_stream)
inline ::std::string to_string() const
template<typename ...Deps>
inline stream_task<Deps...> task(exec_place e_place, task_dep<Deps>... deps)

Creates a task on the specified execution place.

template<typename ...Deps>
inline deferred_stream_task<Deps...> deferred_task(exec_place e_place, task_dep<Deps>... deps)
template<typename ...Deps>
inline deferred_stream_task<Deps...> deferred_task(task_dep<Deps>... deps)
template<typename ...Deps>
inline auto deferred_host_launch(task_dep<Deps>... deps)
inline cudaStream_t task_fence()
inline void finalize()
inline float get_submission_time_ms() const
inline void submit()
inline void change_epoch()
template<typename S, typename ...Deps>
inline auto deferred_parallel_for(exec_place e_place, S shape, task_dep<Deps>... deps)
template<typename S, typename ...Deps>
inline auto deferred_parallel_for(S shape, task_dep<Deps>... deps)
template<typename T>
inline auto wait(cuda::experimental::stf::logical_data<T> &ldata)
inline cuda::experimental::stf::logical_data<T> logical_data(shape_of<T> shape)

Returns a logical_data object with the given shape, tied to this graph.

Initial data place is invalid.

Template Parameters

T – Underlying type for the logical data object

Parameters

shape – shape of the created object

Returns

logical_data<T> usable with this graph

inline auto logical_data(T prototype, data_place dplace = data_place::host)
inline auto logical_data(T (&array)[n], data_place dplace = data_place::host)
inline auto logical_data(size_t elements, Sizes... more_sizes)
inline auto logical_data(T *p, size_t n, data_place dplace = data_place::host)
inline auto logical_token()
inline frozen_logical_data<T> freeze(cuda::experimental::stf::logical_data<T> d, access_mode m = access_mode::read, data_place where = data_place::invalid)
inline auto task(task_dep<Deps>... deps)

Creates a typed task on the current CUDA device.

Returns

An instantiation of task with the appropriate arguments, suitable for use with operator->*.

inline auto host_launch(task_dep<Deps>... deps)

Creates an object able to launch a lambda function on the host.

Template Parameters

Deps – Dependency types

Parameters

deps – dependencies

Returns

host_launch_scope<Deps...> ready for the ->* operator

inline auto cuda_kernel(task_dep<Deps>... deps)
inline auto cuda_kernel_chain(task_dep<Deps>... deps)
inline auto launch(thread_hierarchy_spec_t spec, exec_place e_place, task_dep<Deps>... deps)
inline auto launch(exec_place_host, task_dep<Deps>... deps)
inline auto launch(exec_place e_place, task_dep<Deps>... deps)
inline auto launch(task_dep<Deps>... deps)
inline auto repeat(size_t count)
inline auto repeat(::std::function<bool()> condition)
inline auto parallel_for(exec_place e_place, S shape, Deps... deps)
inline auto parallel_for(partitioner_t, exec_place e_place, S shape, Deps... deps)
auto parallel_for(exec_place_grid e_place, S shape, Deps... deps) = delete
inline auto parallel_for(partitioner_t p, exec_place_grid e_place, S shape, Deps... deps)
inline auto parallel_for(S shape, task_dep<Deps, Ops, flags>... deps)
inline explicit operator bool() const
inline bool operator==(const backend_ctx_untyped &rhs) const
inline bool operator!=(const backend_ctx_untyped &rhs) const
inline async_resources_handle &async_resources() const
inline auto &get_stack()
inline bool reordering_tasks() const
inline auto &get_composite_cache()
inline ::std::pair<exec_place, bool> schedule_task(const task &t) const
inline void reorder_tasks(::std::vector<int> &tasks, ::std::unordered_map<int, reserved::reorderer_payload> &task_map)
inline void increment_task_count()
inline size_t task_count() const
inline void set_allocator(block_allocator_untyped custom)
inline void set_uncached_allocator(block_allocator_untyped custom)
inline auto &get_allocator()
inline const auto &get_allocator() const
inline auto &get_default_allocator()
inline auto &get_uncached_allocator()
inline void update_uncached_allocator(block_allocator_untyped uncached_allocator)
inline void attach_allocator(block_allocator_untyped a)
inline void add_transfer(const data_place &src_node, const data_place &dst_node, size_t s)
inline bool generate_event_symbols() const
inline cudaGraph_t graph() const
inline event_list stream_to_event_list(cudaStream_t stream, ::std::string event_symbol) const
inline size_t epoch() const
inline impl &get_state()
inline const impl &get_state() const
inline const auto &get_dot() const
inline auto &get_dot()
template<typename parent_ctx_t>
inline void set_parent_ctx(parent_ctx_t &parent_ctx)
inline void dot_push_section(::std::string symbol) const
inline void dot_pop_section() const
inline auto dot_section(::std::string symbol) const
inline auto get_phase() const
inline void set_phase(backend_ctx_untyped::phase p)
inline bool has_start_events() const
inline const event_list &get_start_events() const
inline void push_affinity(::std::vector<::std::shared_ptr<exec_place>> p) const
inline void push_affinity(::std::shared_ptr<exec_place> p) const
inline void pop_affinity() const
inline const ::std::vector<::std::shared_ptr<exec_place>> &current_affinity() const
inline const exec_place &current_exec_place() const
inline bool has_affinity() const
inline exec_place default_exec_place() const
inline auto pick_dstream()
inline cudaStream_t pick_stream()

Public Members

bool blocking_finalize = true
template<typename ...Data>
class deferred_host_launch_scope : public cuda::experimental::stf::deferred_stream_task<>

Set the symbol of the task. This is used for profiling and debugging.

param s

return

deferred_host_launch_scope&

inline deferred_host_launch_scope &set_symbol(::std::string s) &
inline deferred_host_launch_scope &&set_symbol(::std::string s) &&

Public Functions

inline deferred_host_launch_scope(stream_ctx &ctx, task_dep<Data>... deps)
inline void populate_deps_scheduling_info()
template<typename Fun>
inline void operator->*(Fun fun)
template<typename shape_t, typename P, typename ...Data>
class deferred_parallel_for_scope : public cuda::experimental::stf::deferred_stream_task<>

Set the symbol of the task. This is used for profiling and debugging.

param s

return

deferred_parallel_for_scope&

inline deferred_parallel_for_scope &set_symbol(::std::string s) &
inline deferred_parallel_for_scope &&set_symbol(::std::string s) &&

Public Functions

inline deferred_parallel_for_scope(stream_ctx &ctx, exec_place e_place, shape_t shape, task_dep<Data>... deps)
inline void populate_deps_scheduling_info()
template<typename Fun>
inline void operator->*(Fun fun)