cuda::experimental::stf::graph_ctx

Defined in include/cuda/experimental/__stf/graph/graph_ctx.cuh

class graph_ctx : public cuda::experimental::stf::backend_ctx<graph_ctx>

A graph context, which is a CUDA graph that we can automatically built using tasks.

Unnamed Group

inline graph_ctx(async_resources_handle handle = async_resources_handle(nullptr)): This type is copyable, assignable, and movable. However, copies have reference semantics.

inline graph_ctx(cudaStream_t user_stream, async_resources_handle handle = async_resources_handle(nullptr)): This type is copyable, assignable, and movable. However, copies have reference semantics.

inline graph_ctx(cudaGraph_t g): Constructor taking a user-provided graph. User code is not supposed to destroy the graph later.

Public Types

using task_type = graph_task<>

template<typename T> using data_interface = typename graphed_interface_of<T>::type

Definition for the underlying implementation of data_interface<T>

Template Parameters: T –

enum class phase

current context status

We keep track of the status of context so that we do not make API calls at an inappropriate time, such as synchronizing twice.

Values:

enumerator setup

enumerator submitted

enumerator finalized

Public Functions

template<typename ...Deps> inline auto task(exec_place e_place, task_dep<Deps>... deps): Creates a typed task on the specified execution place.

inline cudaStream_t fence()

inline void finalize()

inline void submit(cudaStream_t stream = nullptr)

inline void change_stage()

inline ::std::shared_ptr<cudaGraph_t> get_shared_graph() const: Get the “support” graph associated with the context.

inline cudaGraph_t &get_graph() const

inline size_t get_graph_stage() const

inline auto get_exec_graph() const

inline ::std::shared_ptr<cudaGraph_t> finalize_as_graph()

inline ::std::shared_ptr<cudaGraphExec_t> instantiate()

inline void display_graph_info(cudaGraph_t g)

inline void print_to_dot(const ::std::string &filename, enum cudaGraphDebugDotFlags flags = cudaGraphDebugDotFlags(0))

template<typename T> inline auto wait(cuda::experimental::stf::logical_data<T> &ldata)

inline cuda::experimental::stf::logical_data<T> logical_data(shape_of<T> shape)

Returns a logical_data object with the given shape, tied to this graph.

Initial data place is invalid.

Template Parameters: T – Underlying type for the logical data object
Parameters: shape – shape of the created object
Returns: logical_data<T> usable with this graph

inline auto logical_data(T prototype, data_place dplace = data_place::host())

inline auto logical_data(T (&array)[n], data_place dplace = data_place::host())

inline auto logical_data(size_t elements, Sizes... more_sizes)

inline auto logical_data(T *p, size_t n, data_place dplace = data_place::host())

inline auto token()

inline frozen_logical_data<T> freeze(cuda::experimental::stf::logical_data<T> d, access_mode m = access_mode::read, data_place where = data_place::invalid(), bool user_freeze = true)

inline auto task(task_dep<Deps>... deps)

Creates a typed task on the current CUDA device.

Returns: An instantiation of task with the appropriate arguments, suitable for use with operator->*.

inline auto host_launch(task_dep<Deps>... deps)

Creates an object able to launch a lambda function on the host.

Template Parameters: Deps – Dependency types
Parameters: deps – dependencies
Returns: host_launch_scope<Deps...> ready for the ->* operator

inline auto cuda_kernel(task_dep<Deps>... deps)

inline auto cuda_kernel_chain(task_dep<Deps>... deps)

inline auto launch(thread_hierarchy_spec_t spec, exec_place e_place, task_dep<Deps>... deps)

inline auto launch(exec_place_host, task_dep<Deps>... deps)

inline auto launch(exec_place e_place, task_dep<Deps>... deps)

inline auto launch(task_dep<Deps>... deps)

inline auto repeat(size_t count)

inline auto repeat(::std::function<bool()> condition)

inline auto parallel_for(exec_place_t e_place, S shape, Deps... deps)

inline auto parallel_for(partitioner_t, exec_place_t e_place, S shape, Deps... deps)

auto parallel_for(exec_place_grid e_place, S shape, Deps... deps) = delete

inline auto parallel_for(S shape, task_dep<Deps, Ops, flags>... deps)

inline explicit operator bool() const

inline bool operator==(const backend_ctx_untyped &rhs) const

inline bool operator!=(const backend_ctx_untyped &rhs) const

inline async_resources_handle &async_resources() const

inline bool reordering_tasks() const

inline auto &get_composite_cache()

inline ::std::pair<exec_place, bool> schedule_task(const task &t) const

inline void reorder_tasks(::std::vector<int> &tasks, ::std::unordered_map<int, reserved::reorderer_payload> &task_map)

inline void increment_task_count()

inline void increment_finished_task_count()

inline size_t task_count() const

inline void set_allocator(block_allocator_untyped custom)

inline void set_uncached_allocator(block_allocator_untyped custom)

inline auto &get_allocator()

inline const auto &get_allocator() const

inline auto &get_default_allocator()

inline auto &get_uncached_allocator()

inline void update_uncached_allocator(block_allocator_untyped uncached_allocator)

inline void attach_allocator(block_allocator_untyped a)

inline void add_transfer(const data_place &src_node, const data_place &dst_node, size_t s)

inline bool generate_event_symbols() const

inline void enable_logical_data_stats()

inline cudaGraph_t graph() const

inline void set_graph_cache_policy(::std::function<bool()> policy)

inline auto get_graph_cache_policy() const

inline executable_graph_cache_stat *graph_get_cache_stat()

inline event_list stream_to_event_list(cudaStream_t stream, ::std::string event_symbol) const

inline size_t stage() const

inline ::std::string to_string() const

inline bool track_dangling_events() const

inline impl &get_state()

inline const impl &get_state() const

inline const auto &get_dot() const

inline auto &get_dot()

template<typename parent_ctx_t> inline void set_parent_ctx(parent_ctx_t &parent_ctx)

inline auto dot_section(::std::string symbol) const

inline auto get_phase() const

inline void set_phase(backend_ctx_untyped::phase p)

inline bool has_start_events() const

inline const event_list &get_start_events() const

inline void push_affinity(::std::vector<::std::shared_ptr<exec_place>> p) const

inline void push_affinity(::std::shared_ptr<exec_place> p) const

inline void pop_affinity() const

inline const ::std::vector<::std::shared_ptr<exec_place>> &current_affinity() const

inline const exec_place &current_exec_place() const

inline bool has_affinity() const

inline exec_place default_exec_place() const

inline auto pick_dstream()

inline cudaStream_t pick_stream()

Friends

inline friend cudaGraph_t ctx_to_graph(backend_ctx_untyped &ctx)

inline friend size_t ctx_to_graph_stage(backend_ctx_untyped &ctx)