The cuda::resource concept

The std::pmr::memory_resource feature provides only a single allocate interface, which is sufficient for homogeneous memory systems. However, CUDA provides both synchronous and stream-ordered allocation.

With std::pmr::memory_resource there is no way to tell whether a memory resource can utilize stream-ordered allocations. Even if the application knows it can, there is no way to properly tell the memory resource to use stream-ordered allocation. Ideally, this should not be something discovered through an assert at run time, but should be checked by the compiler.

The cuda::mr::resource concept provides basic type checks to ensure that a given memory resource provides the expected allocate / deallocate interface and is also equality comparable, which covers the whole API surface of std::pmr::memory_resource. See below for different memory resources and potential pitfals.

To demonstrate, the following example defines several resources, only some of which are valid implementations of the cuda::mr::resource concept. The static_assertion’s will result in compile-time errors for the invalid resources.

struct valid_resource {
  void* allocate(std::size_t, std::size_t) { return nullptr; }
  void deallocate(void*, std::size_t, std::size_t) noexcept {}
  bool operator==(const valid_resource&) const { return true; }
  // NOTE: C++20 thankfully added operator rewrite rules so defining operator!= is not required.
  // However, if compiled with C++14 / C++17, operator != must also be defined.
  bool operator!=(const valid_resource&) const { return false; }
};
static_assert(cuda::mr::resource<valid_resource>, "");

struct invalid_argument {};
struct invalid_allocate_argument {
  void* allocate(invalid_argument, std::size_t) { return nullptr; }
  void deallocate(void*, std::size_t, std::size_t) noexcept {}
  bool operator==(const invalid_allocate_argument&) { return true; }
};
static_assert(!cuda::mr::resource<invalid_allocate_argument>, "");

struct invalid_allocate_return {
  int allocate(std::size_t, std::size_t) { return 42; }
  void deallocate(void*, std::size_t, std::size_t) noexcept {}
  bool operator==(const invalid_allocate_return&) { return true; }
};
static_assert(!cuda::mr::resource<invalid_allocate_return>, "");

struct invalid_deallocate_argument {
  void* allocate(std::size_t, std::size_t) { return nullptr; }
  void deallocate(void*, invalid_argument, std::size_t) noexcept {}
  bool operator==(const invalid_deallocate_argument&) { return true; }
};
static_assert(!cuda::mr::resource<invalid_deallocate_argument>, "");

struct non_comparable {
  void* allocate(std::size_t, std::size_t) { return nullptr; }
  void deallocate(void*, std::size_t, std::size_t) noexcept {}
};
static_assert(!cuda::mr::resource<non_comparable>, "");

struct non_eq_comparable {
  void* allocate(std::size_t, std::size_t) { return nullptr; }
  void deallocate(void*, std::size_t, std::size_t) noexcept {}
  bool operator!=(const non_eq_comparable&) { return false; }
};
static_assert(!cuda::mr::resource<non_eq_comparable>, "");

In addition to the std::pmr::memory_resource interface the cuda::mr::async_resource concept verifies that a memory resource also satisfies the allocate_async / deallocate_async interface. Requiring both the PMR interface and the async interface is a deliberate design decision.

struct valid_resource {
  void* allocate(std::size_t, std::size_t) { return nullptr; }
  void deallocate(void*, std::size_t, std::size_t) noexcept {}
  void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; }
  void deallocate_async(void*, std::size_t, std::size_t, cuda::stream_ref) {}
  bool operator==(const valid_resource&) const { return true; }
  bool operator!=(const valid_resource&) const { return false; }
};
static_assert(cuda::mr::async_resource<valid_resource>, "");

A library can easily decide whether to use the async interface:

template<class MemoryResource>
    requires cuda::mr::resource<MemoryResource>
void* maybe_allocate_async(MemoryResource& resource, std::size_t size, std::size_t align, cuda::stream_ref stream) {
    if constexpr(cuda::mr::async_resource<MemoryResource>) {
        return resource.allocate_async(size, align, stream);
    } else {
        return resource.allocate(size, align);
    }
}

Putting them together

Applications and libraries may want to combine type checks for arbitrary properties with the {async_}resource concept. The {async_}resource_with concept allows checking resources for arbitrary properties.

struct required_alignment{
    using value_type = std::size_t;
};
struct my_memory_resource {
    void* allocate(std::size_t, std::size_t) { return nullptr; }
    void deallocate(void*, std::size_t, std::size_t) noexcept {}
    bool operator==(const my_memory_resource&) const { return true; }
    bool operator!=(const my_memory_resource&) const { return false; }

    friend constexpr std::size_t get_property(const my_memory_resource& resource, required_alignment) noexcept { return resource.required_alignment; }

    std::size_t required_alignment;
};

template<class MemoryResource>
    requires cuda::mr::resource<MemoryResource>
void* maybe_allocate_async_check_alignment(MemoryResource& resource, std::size_t size, cuda::stream_ref stream) {
    if constexpr(cuda::mr::async_resource_with<MemoryResource, required_alignment>) {
        return resource.allocate_async(size, get_property(resource, required_alignment), stream);
    } else if constexpr (cuda::mr::async_resource<MemoryResource>) {
        return resource.allocate_async(size, my_default_alignment, stream);
    } else if constexpr (cuda::mr::resource_with<MemoryResource, required_alignment>) {
        return resource.allocate(size, get_property(resource, required_alignment));
    } else {
        return resource.allocate(size, my_default_alignment);
    }
}

// Potentially more concise
template<class MemoryResource>
    requires cuda::mr::resource<MemoryResource>
void* maybe_allocate_async_check_alignment2(MemoryResource& resource, std::size_t size, cuda::stream_ref stream) {
    constexpr std::size_t align = cuda::mr::resource_with<MemoryResource, required_alignment>
                                ? get_property(resource, required_alignment)
                                : my_default_alignment;
    if constexpr(cuda::mr::async_resource<MemoryResource>) {
        return resource.allocate_async(size, align, stream);
    } else {
        return resource.allocate(size, align);
    }
}