cuda::experimental::hierarchy_dimensions_fragment

Defined in /home/runner/work/cccl/cccl/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh

template<typename BottomUnit, typename ...Levels>
struct hierarchy_dimensions_fragment

Type representing a hierarchy of CUDA threads.

This type combines a number of level_dimensions objects to represent dimensions of a (possibly partial) hierarchy of CUDA threads. It supports accessing individual levels or queries combining dimensions of multiple levels. This type should not be created directly and make_hierarchy or make_hierarchy_fragment functions should be used instead. For every level, the unit for its dimensions is implied by the next level in the hierarchy, except for the last type, for which its the BottomUnit template argument. In case the BottomUnit type is thread_level, the hierarchy is considered complete and there exist an alias template for it named hierarchy_dimensions, that only takes the Levels… template argument.

Snippet

#include <cudax/hierarchy_dimensions.cuh>

auto hierarchy = make_hierarchy(grid_dims(256), block_dims<8, 8, 8>());
assert(hierarchy.level(grid).dims.x == 256);
static_assert(hierarchy.count(thread, block) == 8 * 8 * 8);

Template Parameters
  • BottomUnit – Type indicating what is the unit of the last level in the hierarchy

  • Levels – Template parameter pack with the types of levels in the hierarchy, must be level_dimensions instances or types derived from it

Public Types

template<typename Unit, typename Level>
using extents_type = decltype(::cuda::std::apply(::cuda::std::declval<detail::hierarchy_extents_helper<Unit>>(), levels_range_static<Unit, Level>(::cuda::std::declval<decltype(levels)>())))

Public Functions

inline constexpr hierarchy_dimensions_fragment(const Levels&... ls) noexcept
inline constexpr hierarchy_dimensions_fragment(Levels&&... ls) noexcept
inline constexpr hierarchy_dimensions_fragment(const BottomUnit&, const Levels&... ls) noexcept
inline constexpr hierarchy_dimensions_fragment(const BottomUnit&, Levels&&... ls) noexcept
inline constexpr hierarchy_dimensions_fragment(const ::cuda::std::tuple<Levels...> &ls) noexcept
inline constexpr hierarchy_dimensions_fragment(::cuda::std::tuple<Levels...> &&ls) noexcept
inline constexpr hierarchy_dimensions_fragment(const BottomUnit &unit, const ::cuda::std::tuple<Levels...> &ls) noexcept
inline constexpr hierarchy_dimensions_fragment(const BottomUnit &unit, ::cuda::std::tuple<Levels...> &&ls) noexcept
template<typename Unit, typename Level>
inline constexpr auto fragment(const Unit& = Unit(), const Level& = Level()) const noexcept

Get a fragment of this hierarchy.

This member function can be used to get a fragment of the hierarchy its called on. It returns a hierarchy_dimensions_fragment that includes levels starting with the level specified in Level and ending with a level before Unit. Toegether with hierarchy_add_level function it can be used to create a new hierarchy that is a modification of an exsiting hierarchy.

Snippet

#include <cudax/hierarchy_dimensions.cuh>

auto hierarchy = make_hierarchy(grid_dims(256), cluster_dims<4>(), block_dims<8, 8, 8>());
auto fragment = hierarchy.fragment(block, grid);
auto new_hierarchy = hierarchy_add_level(fragment, block_dims<128>());
static_assert(new_hierarchy.count(thread, block) == 128);

Template Parameters
  • Unit – Type indicating what should be the unit of the resulting fragment

  • Level – Type indicating what should be the top most level of the resulting fragment

template<typename Unit = BottomUnit, typename Level = typename detail::get_first_level_type<Levels...>::type::level_type>
inline constexpr auto extents(const Unit& = Unit(), const Level& = Level()) const noexcept

Returns extents of multi-dimensional index space of a specified range of levels in this hierarchy.

Each dimension in the returned extents is a product of the corresponding dimension in extents of each level in the range between Level and Unit. The returned hierarchy_query_result type can be used like cuda::std::extents or dim3. Unit and Level need to be levels present in this hierarchy.

Snippet

#include <cudax/hierarchy_dimensions.cuh>
#include <cassert>

using namespace cuda::experimental;

auto hierarchy = make_hierarchy(grid_dims(256), cluster_dims<4>(), block_dims<8, 8, 8>());
static_assert(hierarchy.extents(thread, cluster).extent(0) == 4 * 8);
static_assert(hierarchy.extents(thread, cluster).extent(1) == 8);
static_assert(hierarchy.extents(thread, cluster).extent(2) == 8);

// Using default arguments:
assert(hierarchy.extents().extent(0) == 256 * 4 * 8);
assert(hierarchy.extents(cluster).extent(0) == 256);

Template Parameters
  • Unit – Specifies the unit of the requested extents

  • Level – Specifies at what CUDA hierarchy level the extents are requested

template<typename Unit = BottomUnit, typename Level = typename detail::get_first_level_type<Levels...>::type::level_type>
inline constexpr auto count(const Unit& = Unit(), const Level& = Level()) const noexcept

Returns a count of specified entities at a level in this hierarchy.

This function return a product of all dimensions of each level in the range between Level and Unit. Unit and Level need to be levels present in this hierarchy.

Snippet

#include <cudax/hierarchy_dimensions.cuh>
#include <cassert>

using namespace cuda::experimental;

auto hierarchy = make_hierarchy(grid_dims(256), cluster_dims<4>(), block_dims<8, 8, 8>());
static_assert(hierarchy.count(thread, cluster) == 4 * 8 * 8 * 8);

// Using default arguments:
assert(hierarchy.count() == 256 * 4 * 8 * 8 * 8);
assert(hierarchy.count(cluster) == 256);

Template Parameters
  • Unit – Specifies what should be counted

  • Level – Specifies at what level the count should happen

template<typename Unit = BottomUnit, typename Level = typename detail::get_first_level_type<Levels...>::type::level_type>
inline constexpr auto index(const Unit& = Unit(), const Level& = Level()) const noexcept

Returns a 3-dimensional index of an entity the calling thread belongs to in a hierarchy level.

Returned index is in line with intrinsic CUDA indexing like threadIdx and blockIdx, extentded to more unit/level combinations. Returns a hierarchy_query_result object, which can be used like cuda::std::extents or dim3. This query will use any statically available information in the hierarchy to simplify rank calculation compared to the rank function operating only on level types (for example if extent of a certain dimnsion is 1, then index will be statically 0). Unit and Level need to be present in the hierarchy. Available only in device code.

Snippet

#include <cudax/hierarchy_dimensions.cuh>
#include <cassert>

using namespace cuda::experimental;

template <typename Dimensions>
__global__ void kernel(Dimensions dims)
{
    // Can be called with the instances of level types
    auto thread_index_in_block = dims.index(thread, block);
    assert(thread_index_in_block == threadIdx);
    // With default arguments:
    auto block_index_in_grid = dims.index(block);
    assert(block_index_in_grid == blockIdx);

    // Or using the level types as template arguments
    int thread_index_in_grid = dims.template index<thread_level, grid_level>();
}

Template Parameters
  • Unit – Specifies the entity that the index is requested for

  • Level – Specifies at what hierarchy level the index is requested

template<typename Unit = BottomUnit, typename Level = typename detail::get_first_level_type<Levels...>::type::level_type>
inline constexpr auto rank(const Unit& = Unit(), const Level& = Level()) const noexcept

Ranks an entity the calling thread belongs to in a hierarchy level.

Returns a unique numeric rank within Level of the Unit that the calling thread belongs to. Returned rank is always in in range 0 to count - 1. This query will use any statically available information in the hierarchy to simplify rank calculation compared to the rank function operating only on level types. Unit and Level need to be present in the hierarchy. Available only in device code.

Snippet

#include <cudax/hierarchy_dimensions.cuh>

using namespace cuda::experimental;

template <typename Dimensions>
__global__ void kernel(Dimensions dims)
{
    // Can be called with the instances of level types
    int thread_rank_in_block = dims.rank(thread, block);
    // With default arguments:
    int block_rank_in_grid = dims.rank(block);

    // Or using the level types as template arguments
    int thread_rank_in_grid = dimensions.template rank<thread_level, grid_level>();
}

Template Parameters
  • Unit – Specifies the entity that the rank is requested for

  • Level – Specifies at what level the rank is requested

template<typename Level>
inline constexpr auto level(const Level&) const noexcept

Returns level description associated with a specified hierarchy level in this hierarchy.

This function returns a copy of the object associated with the specified level, that was passed into the hierarchy on its creation. Level need to be levels present in this hierarchy.

Snippet

#include <cudax/hierarchy_dimensions.cuh>

using namespace cuda::experimental;

auto hierarchy = make_hierarchy(grid_dims(256), cluster_dims<4>(), block_dims<8, 8, 8>());
static_assert(decltype(hierarchy.level(cluster).dims)::static_extent(0) == 4);

Template Parameters

Level – Specifies the requested level

Public Members

::cuda::std::tuple<Levels...> levels

Public Static Functions

template<typename Unit = BottomUnit, typename Level = typename detail::get_first_level_type<Levels...>::type::level_type>
static inline constexpr auto static_count(const Unit& = Unit(), const Level& = Level()) noexcept

Returns a compile time count of specified entities at a level in this hierarchy type.

This function return a product of all dimensions of each level in the range between Level and Unit, if all of those dimensions are specified statically. If at least one of them is a dynamic value, this function returns cuda::std::dynamic_extent instead. Unit and Level need to be levels present in this hierarchy.

Snippet

#include <cudax/hierarchy_dimensions.cuh>
#include <cassert>

using namespace cuda::experimental;

auto hierarchy = make_hierarchy(grid_dims(256), cluster_dims<4>(), block_dims<8, 8, 8>());
static_assert(hierarchy.static_count(thread, cluster) == 4 * 8 * 8 * 8);

// Using default arguments:
assert(hierarchy.static_count() == cuda::std::dynamic_extent);

Template Parameters
  • Unit – Specifies what should be counted

  • Level – Specifies at what level the count should happen