cuda::experimental::stf::backend_ctx#

template<typename Engine> class backend_ctx : public cuda::experimental::stf::backend_ctx_untyped #

This is a placeholder class so that we can put common utilities to design a backend ctx.

The state of the backend itself is put elsewhere.

Public Types

enum class phase#

current context status

We keep track of the status of context so that we do not make API calls at an inappropriate time, such as synchronizing twice.

Values:

enumerator setup#

enumerator submitted#

enumerator finalized#

Public Functions

inline backend_ctx(::std::shared_ptr<impl> impl)#

~backend_ctx() = default#

template<typename T> inline cuda::experimental::stf::logical_data<T> logical_data( shape_of<T> shape, )#

Returns a logical_data object with the given shape, tied to this graph.

Initial data place is invalid.

Template Parameters:: T – Underlying type for the logical data object
Parameters:: shape – shape of the created object
Returns:: logical_data<T> usable with this graph

template<typename T> inline auto logical_data( T prototype, data_place dplace = data_place::host(), )#

template<typename T, size_t n> inline auto logical_data( T (&array)[n], data_place dplace = data_place::host(), )#

template<typename T, typename ...Sizes> inline auto logical_data( size_t elements, Sizes... more_sizes, )#

template<typename T> inline auto logical_data( T *p, size_t n, data_place dplace = data_place::host(), )#

inline auto token()#

template<typename T> inline frozen_logical_data<T> freeze( cuda::experimental::stf::logical_data<T> d, access_mode m = access_mode::read, data_place where = data_place::invalid(), bool user_freeze = true, )#

template<typename ...Deps> inline auto task(task_dep<Deps>... deps)#

Creates a typed task on the current CUDA device.

Returns:: An instantiation of task with the appropriate arguments, suitable for use with operator->*.

template<typename ...Deps> inline auto host_launch( task_dep<Deps>... deps, )#

Creates an object able to launch a lambda function on the host.

Template Parameters:: Deps – Dependency types
Parameters:: deps – dependencies
Returns:: host_launch_scope<Deps...> ready for the ->* operator

template<typename ...Deps> inline auto cuda_kernel( task_dep<Deps>... deps, )#

template<typename ...Deps> inline auto cuda_kernel_chain( task_dep<Deps>... deps, )#

template<typename thread_hierarchy_spec_t, typename ...Deps> inline auto launch( thread_hierarchy_spec_t spec, exec_place e_place, task_dep<Deps>... deps, )#

template<typename ...Deps> inline auto launch( exec_place_host, task_dep<Deps>... deps, )#

template<typename ...Deps> inline auto launch( exec_place e_place, task_dep<Deps>... deps, )#

template<typename ...Deps> inline auto launch( task_dep<Deps>... deps, )#

inline auto repeat(size_t count)#

inline auto repeat(::std::function<bool()> condition)#

template<typename exec_place_t, typename S, typename ...Deps, typename = ::std::enable_if_t<::std::is_base_of_v<exec_place, exec_place_t>>> inline auto parallel_for( exec_place_t e_place, S shape, Deps... deps, )#

template<typename partitioner_t, typename exec_place_t, typename S, typename ...Deps, typename = ::std::enable_if_t<std::is_base_of_v<exec_place, exec_place_t>>> inline auto parallel_for( partitioner_t p, exec_place_t e_place, S shape, Deps... deps, )#

template<typename S, typename ...Deps> auto parallel_for( exec_place_grid e_place, S shape, Deps... deps, ) = delete#

template<typename S, typename ...Deps> inline auto parallel_for( S shape, Deps... deps, )#

inline explicit operator bool() const#

inline bool operator==(const backend_ctx_untyped &rhs) const#

inline bool operator!=(const backend_ctx_untyped &rhs) const#

inline async_resources_handle &async_resources() const#

inline bool reordering_tasks() const#

inline auto &get_composite_cache()#

inline ::std::pair<exec_place, bool> schedule_task( const task &t, ) const#

inline void reorder_tasks( ::std::vector<int> &tasks, ::std::unordered_map<int, reserved::reorderer_payload> &task_map, )#

inline void increment_task_count()#

inline void increment_finished_task_count()#

inline size_t task_count() const#

inline void set_allocator(block_allocator_untyped custom)#

inline void set_uncached_allocator(block_allocator_untyped custom)#

inline auto &get_allocator()#

inline const auto &get_allocator() const#

inline auto &get_default_allocator()#

inline auto &get_uncached_allocator()#

inline void update_uncached_allocator( block_allocator_untyped uncached_allocator, )#

inline void attach_allocator(block_allocator_untyped a)#

inline void add_transfer( const data_place &src_node, const data_place &dst_node, size_t s, )#

inline bool generate_event_symbols() const#

inline void enable_logical_data_stats()#

inline cudaGraph_t graph() const#

inline void set_graph_cache_policy(::std::function<bool()> policy)#

inline auto get_graph_cache_policy() const#

inline executable_graph_cache_stat *graph_get_cache_stat()#

inline event_list stream_to_event_list( cudaStream_t stream, ::std::string event_symbol, ) const#

inline size_t stage() const#

inline ::std::string to_string() const#

inline bool track_dangling_events() const#

inline impl &get_state()#

inline const impl &get_state() const#

inline const auto &get_dot() const#

inline auto &get_dot()#

template<typename parent_ctx_t> inline void set_parent_ctx( parent_ctx_t &parent_ctx, )#

inline auto dot_section(::std::string symbol) const#

inline auto get_phase() const#

inline void set_phase(backend_ctx_untyped::phase p)#

inline bool has_start_events() const#

inline const event_list &get_start_events() const#

inline void push_affinity( ::std::vector<::std::shared_ptr<exec_place>> p, ) const#

inline void push_affinity(::std::shared_ptr<exec_place> p) const#

inline void pop_affinity() const#

inline const ::std::vector<::std::shared_ptr<exec_place>> &current_affinity( ) const#

inline const exec_place &current_exec_place() const#

inline bool has_affinity() const#

inline exec_place default_exec_place() const#

inline auto pick_dstream()#

inline cudaStream_t pick_stream()#