cuda::experimental::stf::context

Defined in include/cuda/experimental/__stf/internal/context.cuh

class context

Generic context implementation.

Public Functions

context() = default: Default constructor for the context class.

inline context(cudaStream_t stream, async_resources_handle handle = async_resources_handle(nullptr))

Constructs a stream context with a CUDA stream and an optional asynchronous resource handle.

Parameters

stream – The CUDA stream to be used in the context.
handle – Optional asynchronous resource handle.

inline context(async_resources_handle handle)

Constructs a stream context with an asynchronous resource handle.

Parameters: handle – The asynchronous resource handle.

inline context(stream_ctx ctx)

Constructs a context from a stream context.

Parameters: ctx – The context to be assigned.

inline context(graph_ctx ctx)

Constructs a context from a graph context.

Parameters: ctx – The context to be assigned.

template<typename Ctx> inline context &operator=(Ctx ctx)

Assigns a specific context type to the context.

Template Parameters: Ctx – The type of the context to be assigned.
Parameters: ctx – The context to be assigned.
Returns: Reference to the updated context.

inline ::std::string to_string() const

Converts the context to a string representation.

Returns: A string representation of the context.

inline auto stream_to_event_list(cudaStream_t stream, ::std::string str) const: Returns an event list which depends on the completion of work in the stream.

inline void set_graph_cache_policy(::std::function<bool()> policy)

inline auto get_graph_cache_policy() const

inline executable_graph_cache_stat *graph_get_cache_stat()

inline size_t task_count() const: Returns the number of tasks created since the context was created or since the last fence (if any)

template<typename T, typename ...Sizes> inline auto logical_data(size_t elements, Sizes... othersizes)

Creates logical data with specified sizes.

Template Parameters

T – The type of the logical data.
Sizes – The sizes of the logical data dimensions.

Parameters

elements – The number of elements.
othersizes – The sizes of other dimensions.

template<typename P0, typename ...Ps> inline auto logical_data(P0 &&p0, Ps&&... ps)

Creates logical data with specified parameters.

Template Parameters

P0 – The type of the first parameter.
Ps – The types of the other parameters.

Parameters

p0 – The first parameter.
ps – The other parameters.

inline auto token()

template<typename T> inline frozen_logical_data<T> freeze(::cuda::experimental::stf::logical_data<T> d, access_mode m = access_mode::read, data_place where = data_place::invalid(), bool user_freeze = true)

template<typename T> inline auto logical_data(T *p, size_t n, data_place dplace = data_place::host())

Creates logical data from a pointer and size.

Template Parameters

T – The type of the logical data.

Parameters

p – The pointer to the data.
n – The number of elements.
dplace – The data place of the logical data (default is host).

Returns

The created logical data.

template<typename ...Deps> inline unified_task<Deps...> task(exec_place e_place, task_dep<Deps>... deps)

template<typename ...Deps> inline unified_task<Deps...> task(task_dep<Deps>... deps)

template<typename ...Deps> inline auto host_launch(task_dep<Deps>... deps)

template<typename ...Deps> inline auto cuda_kernel(task_dep<Deps>... deps)

template<typename ...Deps> inline auto cuda_kernel(exec_place e_place, task_dep<Deps>... deps)

template<typename ...Deps> inline auto cuda_kernel_chain(task_dep<Deps>... deps)

template<typename ...Deps> inline auto cuda_kernel_chain(exec_place e_place, task_dep<Deps>... deps)

inline auto repeat(size_t count)

inline auto repeat(::std::function<bool()> condition)

inline cudaStream_t fence()

inline void finalize()

inline void submit()

inline void set_allocator(block_allocator_untyped custom_allocator)

inline void attach_allocator(block_allocator_untyped custom_allocator)

inline void update_uncached_allocator(block_allocator_untyped custom)

inline void change_stage()

inline ::std::shared_ptr<reserved::per_ctx_dot> get_dot()

template<typename T> inline auto wait(::cuda::experimental::stf::logical_data<T> &ldata)

template<typename parent_ctx_t> inline void set_parent_ctx(parent_ctx_t &parent_ctx)

inline void enable_logical_data_stats()

inline auto dot_section(::std::string symbol) const: RAII-style description of a new section in the DOT file identified by its symbol.

inline bool is_graph_ctx() const

inline async_resources_handle &async_resources() const

inline void push_affinity(::std::vector<::std::shared_ptr<exec_place>> p) const

inline void push_affinity(::std::shared_ptr<exec_place> p) const

inline void pop_affinity() const

inline const ::std::vector<::std::shared_ptr<exec_place>> &current_affinity() const

inline const exec_place &current_exec_place() const

inline bool has_affinity() const

inline exec_place default_exec_place() const

Determines the default execution place for a given context, which corresponds to the execution place when no place is provided.

By default, we select the current device, unless an affinity was set in the context, in which case we take the first execution place in the current places.

Returns: execution place used by constructs where the place is implicit.

inline graph_ctx to_graph_ctx() const

inline cudaStream_t pick_stream()

Get a CUDA stream from the stream pool associated to the context.

This helper is intended to avoid creating CUDA streams manually. Using this stream after the context has been finalized is an undefined behaviour.

Public Members

::std::variant<stream_ctx, graph_ctx> payload