cuda::experimental::stf::stream_ctx#
-
class stream_ctx : public cuda::experimental::stf::backend_ctx<stream_ctx>#
This class describes a CUDASTF execution context where CUDA streams and CUDA events are used as synchronization primitives.
This class is copyable, movable, and can be passed by value
Unnamed Group
- inline stream_ctx(
- async_resources_handle handle = async_resources_handle(nullptr),
This type is copyable, assignable, and movable.
However, copies have reference semantics.
- inline stream_ctx(
- cudaStream_t user_stream,
- async_resources_handle handle = async_resources_handle(nullptr),
This type is copyable, assignable, and movable.
However, copies have reference semantics.
Public Types
-
using task_type = stream_task<>#
-
template<typename T>
using data_interface = typename streamed_interface_of<T>::type# Definition for the underlying implementation of
data_interface<T>
- Template Parameters:
T –
Public Functions
-
inline void set_user_stream(cudaStream_t user_stream)#
-
template<typename ...Deps>
inline stream_task<Deps...> task( - exec_place e_place,
- task_dep<Deps>... deps,
Creates a task on the specified execution place.
-
template<typename ...Deps>
inline deferred_stream_task<Deps...> deferred_task( - exec_place e_place,
- task_dep<Deps>... deps,
-
template<typename ...Deps>
inline deferred_stream_task<Deps...> deferred_task(
)#
-
inline cudaStream_t fence()#
-
inline void finalize()#
-
inline float get_submission_time_ms() const#
-
inline void submit()#
-
inline void change_stage()#
-
template<typename S, typename ...Deps>
inline auto deferred_parallel_for( - exec_place e_place,
- S shape,
- task_dep<Deps>... deps,
-
template<typename S, typename ...Deps>
inline auto deferred_parallel_for(
)#
-
template<typename T>
inline auto wait( - cuda::experimental::stf::logical_data<T> &ldata,
- inline cuda::experimental::stf::logical_data<T> logical_data(
- shape_of<T> shape,
Returns a
logical_data
object with the given shape, tied to this graph.Initial data place is invalid.
- Template Parameters:
T – Underlying type for the logical data object
- Parameters:
shape – shape of the created object
- Returns:
logical_data<T>
usable with this graph
- inline auto logical_data(
- T prototype,
- data_place dplace = data_place::host(),
- inline auto logical_data(
- T (&array)[n],
- data_place dplace = data_place::host(),
-
inline auto logical_data(size_t elements, Sizes... more_sizes)#
- inline auto logical_data(
- T *p,
- size_t n,
- data_place dplace = data_place::host(),
-
inline auto token()#
- inline frozen_logical_data<T> freeze(
- cuda::experimental::stf::logical_data<T> d,
- access_mode m = access_mode::read,
- data_place where = data_place::invalid(),
- bool user_freeze = true,
-
inline auto task(task_dep<Deps>... deps)#
Creates a typed task on the current CUDA device.
- Returns:
An instantiation of
task
with the appropriate arguments, suitable for use withoperator->*
.
-
inline auto host_launch(task_dep<Deps>... deps)#
Creates an object able to launch a lambda function on the host.
- Template Parameters:
Deps – Dependency types
- Parameters:
deps – dependencies
- Returns:
host_launch_scope<Deps...>
ready for the->*
operator
- inline auto launch(
- thread_hierarchy_spec_t spec,
- exec_place e_place,
- task_dep<Deps>... deps,
-
inline auto launch(exec_place_host, task_dep<Deps>... deps)#
-
inline auto launch(exec_place e_place, task_dep<Deps>... deps)#
-
inline auto repeat(size_t count)#
-
inline auto repeat(::std::function<bool()> condition)#
-
inline auto parallel_for(exec_place_t e_place, S shape, Deps... deps)#
- inline auto parallel_for(
- partitioner_t p,
- exec_place_t e_place,
- S shape,
- Deps... deps,
- auto parallel_for(
- exec_place_grid e_place,
- S shape,
- Deps... deps,
-
inline auto parallel_for(S shape, Deps... deps)#
-
inline explicit operator bool() const#
-
inline bool operator==(const backend_ctx_untyped &rhs) const#
-
inline bool operator!=(const backend_ctx_untyped &rhs) const#
-
inline async_resources_handle &async_resources() const#
-
inline bool reordering_tasks() const#
-
inline auto &get_composite_cache()#
- inline ::std::pair<exec_place, bool> schedule_task(
- const task &t,
- inline void reorder_tasks(
- ::std::vector<int> &tasks,
- ::std::unordered_map<int, reserved::reorderer_payload> &task_map,
-
inline void increment_task_count()#
-
inline void increment_finished_task_count()#
-
inline size_t task_count() const#
-
inline void set_allocator(block_allocator_untyped custom)#
-
inline void set_uncached_allocator(block_allocator_untyped custom)#
-
inline auto &get_allocator()#
-
inline const auto &get_allocator() const#
-
inline auto &get_default_allocator()#
-
inline auto &get_uncached_allocator()#
- inline void update_uncached_allocator(
- block_allocator_untyped uncached_allocator,
-
inline void attach_allocator(block_allocator_untyped a)#
- inline void add_transfer(
- const data_place &src_node,
- const data_place &dst_node,
- size_t s,
-
inline bool generate_event_symbols() const#
-
inline void enable_logical_data_stats()#
-
inline cudaGraph_t graph() const#
-
inline void set_graph_cache_policy(::std::function<bool()> policy)#
-
inline auto get_graph_cache_policy() const#
-
inline executable_graph_cache_stat *graph_get_cache_stat()#
- inline event_list stream_to_event_list(
- cudaStream_t stream,
- ::std::string event_symbol,
-
inline size_t stage() const#
-
inline ::std::string to_string() const#
-
inline bool track_dangling_events() const#
-
inline impl &get_state()#
-
inline const impl &get_state() const#
-
inline const auto &get_dot() const#
-
inline auto &get_dot()#
-
template<typename parent_ctx_t>
inline void set_parent_ctx( - parent_ctx_t &parent_ctx,
-
inline auto dot_section(::std::string symbol) const#
-
inline auto get_phase() const#
-
inline void set_phase(backend_ctx_untyped::phase p)#
-
inline bool has_start_events() const#
-
inline const event_list &get_start_events() const#
- ::std::vector<::std::shared_ptr<exec_place>> p,
-
inline void pop_affinity() const#
- inline const ::std::vector<::std::shared_ptr<exec_place>> ¤t_affinity(
-
inline const exec_place ¤t_exec_place() const#
-
inline bool has_affinity() const#
-
inline exec_place default_exec_place() const#
-
inline auto pick_dstream()#
-
inline cudaStream_t pick_stream()#
Public Members
-
bool blocking_finalize = true#
-
template<typename ...Data>
class deferred_host_launch_scope : public cuda::experimental::stf::deferred_stream_task<># Set the symbol of the task. This is used for profiling and debugging.
- param s:
- return:
-
inline deferred_host_launch_scope &set_symbol(::std::string s) &#
-
inline deferred_host_launch_scope &&set_symbol(::std::string s) &&#
-
template<typename shape_t, typename P, typename ...Data>
class deferred_parallel_for_scope : public cuda::experimental::stf::deferred_stream_task<># Set the symbol of the task. This is used for profiling and debugging.
- param s:
- return:
-
inline deferred_parallel_for_scope &set_symbol(::std::string s) &#
-
inline deferred_parallel_for_scope &&set_symbol(::std::string s) &&#
Public Functions
- inline deferred_parallel_for_scope(
- stream_ctx &ctx,
- exec_place e_place,
- shape_t shape,
- task_dep<Data>... deps,
-
inline void populate_deps_scheduling_info()#