
Defined in include/cuda/experimental/__stf/graph/graph_ctx.cuh

class graph_ctx : public cuda::experimental::stf::backend_ctx<graph_ctx>

A graph context, which is a CUDA graph that we can automatically built using tasks.

Unnamed Group

inline graph_ctx(async_resources_handle handle = async_resources_handle(nullptr))

This type is copyable, assignable, and movable. However, copies have reference semantics.

inline graph_ctx(cudaStream_t user_stream, async_resources_handle handle = async_resources_handle(nullptr))

This type is copyable, assignable, and movable. However, copies have reference semantics.

inline graph_ctx(cudaGraph_t g)

Constructor taking a user-provided graph. User code is not supposed to destroy the graph later.

Public Types

using task_type = graph_task<>
template<typename T>
using data_interface = typename graphed_interface_of<T>::type

Definition for the underlying implementation of data_interface<T>

Template Parameters


enum class phase

current context status

We keep track of the status of context so that we do not make API calls at an inappropriate time, such as synchronizing twice.


enumerator setup
enumerator submitted
enumerator finalized

Public Functions

inline ::std::string to_string() const
template<typename ...Deps>
inline auto task(exec_place e_place, task_dep<Deps>... deps)

Creates a typed task on the specified execution place.

inline cudaStream_t task_fence()
inline void finalize()
inline void submit(cudaStream_t stream = nullptr)
inline void change_epoch()
inline ::std::shared_ptr<cudaGraph_t> get_shared_graph() const

Get the “support” graph associated with the context.

inline cudaGraph_t &get_graph() const
inline size_t get_graph_epoch() const
inline auto get_exec_graph() const
inline ::std::shared_ptr<cudaGraph_t> finalize_as_graph()
inline ::std::shared_ptr<cudaGraphExec_t> instantiate()
inline void display_graph_info(cudaGraph_t g)
inline void print_to_dot(const ::std::string &filename, enum cudaGraphDebugDotFlags flags = cudaGraphDebugDotFlags(0))
template<typename T>
inline auto wait(cuda::experimental::stf::logical_data<T> &ldata)
inline cuda::experimental::stf::logical_data<T> logical_data(shape_of<T> shape)

Returns a logical_data object with the given shape, tied to this graph.

Initial data place is invalid.

Template Parameters

T – Underlying type for the logical data object


shape – shape of the created object


logical_data<T> usable with this graph

inline auto logical_data(T prototype, data_place dplace = data_place::host)
inline auto logical_data(T (&array)[n], data_place dplace = data_place::host)
inline auto logical_data(size_t elements, Sizes... more_sizes)
inline auto logical_data(T *p, size_t n, data_place dplace = data_place::host)
inline auto logical_token()
inline frozen_logical_data<T> freeze(cuda::experimental::stf::logical_data<T> d, access_mode m = access_mode::read, data_place where = data_place::invalid)
inline auto task(task_dep<Deps>... deps)

Creates a typed task on the current CUDA device.


An instantiation of task with the appropriate arguments, suitable for use with operator->*.

inline auto host_launch(task_dep<Deps>... deps)

Creates an object able to launch a lambda function on the host.

Template Parameters

Deps – Dependency types


deps – dependencies


host_launch_scope<Deps...> ready for the ->* operator

inline auto cuda_kernel(task_dep<Deps>... deps)
inline auto cuda_kernel_chain(task_dep<Deps>... deps)
inline auto launch(thread_hierarchy_spec_t spec, exec_place e_place, task_dep<Deps>... deps)
inline auto launch(exec_place_host, task_dep<Deps>... deps)
inline auto launch(exec_place e_place, task_dep<Deps>... deps)
inline auto launch(task_dep<Deps>... deps)
inline auto repeat(size_t count)
inline auto repeat(::std::function<bool()> condition)
inline auto parallel_for(exec_place e_place, S shape, Deps... deps)
inline auto parallel_for(partitioner_t, exec_place e_place, S shape, Deps... deps)
auto parallel_for(exec_place_grid e_place, S shape, Deps... deps) = delete
inline auto parallel_for(partitioner_t p, exec_place_grid e_place, S shape, Deps... deps)
inline auto parallel_for(S shape, task_dep<Deps, Ops, flags>... deps)
inline explicit operator bool() const
inline bool operator==(const backend_ctx_untyped &rhs) const
inline bool operator!=(const backend_ctx_untyped &rhs) const
inline async_resources_handle &async_resources() const
inline auto &get_stack()
inline bool reordering_tasks() const
inline auto &get_composite_cache()
inline ::std::pair<exec_place, bool> schedule_task(const task &t) const
inline void reorder_tasks(::std::vector<int> &tasks, ::std::unordered_map<int, reserved::reorderer_payload> &task_map)
inline void increment_task_count()
inline size_t task_count() const
inline void set_allocator(block_allocator_untyped custom)
inline void set_uncached_allocator(block_allocator_untyped custom)
inline auto &get_allocator()
inline const auto &get_allocator() const
inline auto &get_default_allocator()
inline auto &get_uncached_allocator()
inline void update_uncached_allocator(block_allocator_untyped uncached_allocator)
inline void attach_allocator(block_allocator_untyped a)
inline void add_transfer(const data_place &src_node, const data_place &dst_node, size_t s)
inline bool generate_event_symbols() const
inline cudaGraph_t graph() const
inline event_list stream_to_event_list(cudaStream_t stream, ::std::string event_symbol) const
inline size_t epoch() const
inline impl &get_state()
inline const impl &get_state() const
inline const auto &get_dot() const
inline auto &get_dot()
template<typename parent_ctx_t>
inline void set_parent_ctx(parent_ctx_t &parent_ctx)
inline void dot_push_section(::std::string symbol) const
inline void dot_pop_section() const
inline auto dot_section(::std::string symbol) const
inline auto get_phase() const
inline void set_phase(backend_ctx_untyped::phase p)
inline bool has_start_events() const
inline const event_list &get_start_events() const
inline void push_affinity(::std::vector<::std::shared_ptr<exec_place>> p) const
inline void push_affinity(::std::shared_ptr<exec_place> p) const
inline void pop_affinity() const
inline const ::std::vector<::std::shared_ptr<exec_place>> &current_affinity() const
inline const exec_place &current_exec_place() const
inline bool has_affinity() const
inline exec_place default_exec_place() const
inline auto pick_dstream()
inline cudaStream_t pick_stream()

Public Static Functions

static inline bool try_updating_executable_graph(cudaGraphExec_t exec_graph, cudaGraph_t graph)


inline friend cudaGraph_t ctx_to_graph(backend_ctx_untyped &ctx)
inline friend size_t ctx_to_graph_epoch(backend_ctx_untyped &ctx)