cuda::experimental::stf::graph_ctx#

class graph_ctx : public cuda::experimental::stf::backend_ctx<graph_ctx>#

A graph context, which is a CUDA graph that we can automatically built using tasks.

Unnamed Group

inline graph_ctx(
async_resources_handle handle = async_resources_handle(nullptr),
)#

This type is copyable, assignable, and movable.

However, copies have reference semantics.

inline graph_ctx(
cudaStream_t user_stream,
async_resources_handle handle = async_resources_handle(nullptr),
)#

This type is copyable, assignable, and movable.

However, copies have reference semantics.

inline graph_ctx(cudaGraph_t g)#

Constructor taking a user-provided graph. User code is not supposed to destroy the graph later.

Public Types

using task_type = graph_task<>#
template<typename T>
using data_interface = typename graphed_interface_of<T>::type#

Definition for the underlying implementation of data_interface<T>

Template Parameters:

T

enum class phase#

current context status

We keep track of the status of context so that we do not make API calls at an inappropriate time, such as synchronizing twice.

Values:

enumerator setup#
enumerator submitted#
enumerator finalized#

Public Functions

template<typename ...Deps>
inline auto task(
exec_place e_place,
task_dep<Deps>... deps,
)#

Creates a typed task on the specified execution place.

inline cudaStream_t fence()#
inline void finalize()#
inline void submit(cudaStream_t stream = nullptr)#
inline void change_stage()#
inline ::std::shared_ptr<cudaGraph_t> get_shared_graph() const#

Get the “support” graph associated with the context.

inline cudaGraph_t &get_graph() const#
inline size_t get_graph_stage() const#
inline auto get_exec_graph() const#
inline ::std::shared_ptr<cudaGraph_t> finalize_as_graph()#
inline ::std::shared_ptr<cudaGraphExec_t> instantiate()#
inline void display_graph_info(cudaGraph_t g)#
inline void print_to_dot(
const ::std::string &filename,
enum cudaGraphDebugDotFlags flags = cudaGraphDebugDotFlags(0),
)#
template<typename T>
inline auto wait(
cuda::experimental::stf::logical_data<T> &ldata,
)#
inline cuda::experimental::stf::logical_data<T> logical_data(
shape_of<T> shape,
)#

Returns a logical_data object with the given shape, tied to this graph.

Initial data place is invalid.

Template Parameters:

T – Underlying type for the logical data object

Parameters:

shape – shape of the created object

Returns:

logical_data<T> usable with this graph

inline auto logical_data(
T prototype,
data_place dplace = data_place::host(),
)#
inline auto logical_data(
T (&array)[n],
data_place dplace = data_place::host(),
)#
inline auto logical_data(size_t elements, Sizes... more_sizes)#
inline auto logical_data(
T *p,
size_t n,
data_place dplace = data_place::host(),
)#
inline auto token()#
inline frozen_logical_data<T> freeze(
cuda::experimental::stf::logical_data<T> d,
access_mode m = access_mode::read,
data_place where = data_place::invalid(),
bool user_freeze = true,
)#
inline auto task(task_dep<Deps>... deps)#

Creates a typed task on the current CUDA device.

Returns:

An instantiation of task with the appropriate arguments, suitable for use with operator->*.

inline auto host_launch(task_dep<Deps>... deps)#

Creates an object able to launch a lambda function on the host.

Template Parameters:

Deps – Dependency types

Parameters:

deps – dependencies

Returns:

host_launch_scope<Deps...> ready for the ->* operator

inline auto cuda_kernel(task_dep<Deps>... deps)#
inline auto cuda_kernel_chain(task_dep<Deps>... deps)#
inline auto launch(
thread_hierarchy_spec_t spec,
exec_place e_place,
task_dep<Deps>... deps,
)#
inline auto launch(exec_place_host, task_dep<Deps>... deps)#
inline auto launch(exec_place e_place, task_dep<Deps>... deps)#
inline auto launch(task_dep<Deps>... deps)#
inline auto repeat(size_t count)#
inline auto repeat(::std::function<bool()> condition)#
inline auto parallel_for(exec_place_t e_place, S shape, Deps... deps)#
inline auto parallel_for(
partitioner_t p,
exec_place_t e_place,
S shape,
Deps... deps,
)#
auto parallel_for(
exec_place_grid e_place,
S shape,
Deps... deps,
) = delete#
inline auto parallel_for(S shape, Deps... deps)#
inline explicit operator bool() const#
inline bool operator==(const backend_ctx_untyped &rhs) const#
inline bool operator!=(const backend_ctx_untyped &rhs) const#
inline async_resources_handle &async_resources() const#
inline bool reordering_tasks() const#
inline auto &get_composite_cache()#
inline ::std::pair<exec_place, bool> schedule_task(
const task &t,
) const#
inline void reorder_tasks(
::std::vector<int> &tasks,
::std::unordered_map<int, reserved::reorderer_payload> &task_map,
)#
inline void increment_task_count()#
inline void increment_finished_task_count()#
inline size_t task_count() const#
inline void set_allocator(block_allocator_untyped custom)#
inline void set_uncached_allocator(block_allocator_untyped custom)#
inline auto &get_allocator()#
inline const auto &get_allocator() const#
inline auto &get_default_allocator()#
inline auto &get_uncached_allocator()#
inline void update_uncached_allocator(
block_allocator_untyped uncached_allocator,
)#
inline void attach_allocator(block_allocator_untyped a)#
inline void add_transfer(
const data_place &src_node,
const data_place &dst_node,
size_t s,
)#
inline bool generate_event_symbols() const#
inline void enable_logical_data_stats()#
inline cudaGraph_t graph() const#
inline void set_graph_cache_policy(::std::function<bool()> policy)#
inline auto get_graph_cache_policy() const#
inline executable_graph_cache_stat *graph_get_cache_stat()#
inline event_list stream_to_event_list(
cudaStream_t stream,
::std::string event_symbol,
) const#
inline size_t stage() const#
inline ::std::string to_string() const#
inline bool track_dangling_events() const#
inline impl &get_state()#
inline const impl &get_state() const#
inline const auto &get_dot() const#
inline auto &get_dot()#
template<typename parent_ctx_t>
inline void set_parent_ctx(
parent_ctx_t &parent_ctx,
)#
inline auto dot_section(::std::string symbol) const#
inline auto get_phase() const#
inline void set_phase(backend_ctx_untyped::phase p)#
inline bool has_start_events() const#
inline const event_list &get_start_events() const#
inline void push_affinity(
::std::vector<::std::shared_ptr<exec_place>> p,
) const#
inline void push_affinity(::std::shared_ptr<exec_place> p) const#
inline void pop_affinity() const#
inline const ::std::vector<::std::shared_ptr<exec_place>> &current_affinity(
) const#
inline const exec_place &current_exec_place() const#
inline bool has_affinity() const#
inline exec_place default_exec_place() const#
inline auto pick_dstream()#
inline cudaStream_t pick_stream()#

Friends

inline friend cudaGraph_t ctx_to_graph(backend_ctx_untyped &ctx)#
inline friend size_t ctx_to_graph_stage(backend_ctx_untyped &ctx)#