uninitialized_buffer.cuh

File members: include/cuda/experimental/__container/uninitialized_buffer.cuh
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef __CUDAX__CONTAINERS_UNINITIALIZED_BUFFER_H
#define __CUDAX__CONTAINERS_UNINITIALIZED_BUFFER_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/__memory_resource/properties.h>
#include <cuda/std/__memory/align.h>
#include <cuda/std/__new/launder.h>
#include <cuda/std/__type_traits/type_set.h>
#include <cuda/std/__utility/exchange.h>
#include <cuda/std/__utility/move.h>
#include <cuda/std/__utility/swap.h>
#include <cuda/std/span>

#include <cuda/experimental/__memory_resource/any_resource.cuh>
#include <cuda/experimental/__memory_resource/properties.cuh>

#include <cuda/std/__cccl/prologue.h>

#if defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)

namespace cuda::experimental
{

template <class _Tp, class... _Properties>
class uninitialized_buffer
{
private:
  static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>,
                "The properties of cuda::experimental::uninitialized_buffer must contain at least one execution space "
                "property!");

  using __resource = ::cuda::experimental::any_resource<_Properties...>;

  __resource __mr_;
  size_t __count_ = 0;
  void* __buf_    = nullptr;

  template <class, class...>
  friend class uninitialized_buffer;

  template <class... _OtherProperties>
  static constexpr bool __properties_match =
    !_CCCL_TRAIT(_CUDA_VSTD::is_same,
                 _CUDA_VSTD::__make_type_set<_Properties...>,
                 _CUDA_VSTD::__make_type_set<_OtherProperties...>)
    && _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>;

  [[nodiscard]] _CCCL_HIDE_FROM_ABI static constexpr size_t __get_allocation_size(const size_t __count) noexcept
  {
    constexpr size_t __alignment = alignof(_Tp);
    return (__count * sizeof(_Tp) + (__alignment - 1)) & ~(__alignment - 1);
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI _Tp* __get_data() const noexcept
  {
    constexpr size_t __alignment = alignof(_Tp);
    size_t __space               = __get_allocation_size(__count_);
    void* __ptr                  = __buf_;
    return _CUDA_VSTD::launder(
      static_cast<_Tp*>(_CUDA_VSTD::align(__alignment, __count_ * sizeof(_Tp), __ptr, __space)));
  }

  template <class _Tp2 = _Tp>
  [[nodiscard]] _CCCL_HIDE_FROM_ABI friend auto
  __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept
    _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<device_accessible, _Properties...>)
  {
    return {__self.__get_data(), __self.size()};
  }

  template <class _Tp2 = _Tp>
  [[nodiscard]] _CCCL_HIDE_FROM_ABI friend auto
  __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept
    _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<device_accessible, _Properties...>)
  {
    return {__self.__get_data(), __self.size()};
  }

public:
  using value_type      = _Tp;
  using reference       = _Tp&;
  using const_reference = const _Tp&;
  using pointer         = _Tp*;
  using const_pointer   = const _Tp*;
  using size_type       = size_t;

  _CCCL_HIDE_FROM_ABI uninitialized_buffer(__resource __mr, const size_t __count)
      : __mr_(_CUDA_VSTD::move(__mr))
      , __count_(__count)
      , __buf_(__count_ == 0 ? nullptr : __mr_.allocate(__get_allocation_size(__count_)))
  {}

  _CCCL_HIDE_FROM_ABI uninitialized_buffer(const uninitialized_buffer&)            = delete;
  _CCCL_HIDE_FROM_ABI uninitialized_buffer& operator=(const uninitialized_buffer&) = delete;

  _CCCL_HIDE_FROM_ABI uninitialized_buffer(uninitialized_buffer&& __other) noexcept
      : __mr_(_CUDA_VSTD::move(__other.__mr_))
      , __count_(_CUDA_VSTD::exchange(__other.__count_, 0))
      , __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr))
  {}

  _CCCL_TEMPLATE(class... _OtherProperties)
  _CCCL_REQUIRES(__properties_match<_OtherProperties...>)
  _CCCL_HIDE_FROM_ABI uninitialized_buffer(uninitialized_buffer<_Tp, _OtherProperties...>&& __other) noexcept
      : __mr_(_CUDA_VSTD::move(__other.__mr_))
      , __count_(_CUDA_VSTD::exchange(__other.__count_, 0))
      , __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr))
  {}

  _CCCL_HIDE_FROM_ABI uninitialized_buffer& operator=(uninitialized_buffer&& __other) noexcept
  {
    if (this == _CUDA_VSTD::addressof(__other))
    {
      return *this;
    }

    if (__buf_)
    {
      __mr_.deallocate(__buf_, __get_allocation_size(__count_));
    }

    __mr_    = _CUDA_VSTD::move(__other.__mr_);
    __count_ = _CUDA_VSTD::exchange(__other.__count_, 0);
    __buf_   = _CUDA_VSTD::exchange(__other.__buf_, nullptr);
    return *this;
  }

  _CCCL_HIDE_FROM_ABI void destroy()
  {
    if (__buf_)
    {
      __mr_.deallocate(__buf_, __get_allocation_size(__count_));
      __buf_   = nullptr;
      __count_ = 0;
    }
    auto __tmp_mr = _CUDA_VSTD::move(__mr_);
  }

  _CCCL_HIDE_FROM_ABI ~uninitialized_buffer()
  {
    destroy();
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI pointer begin() noexcept
  {
    return __get_data();
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI const_pointer begin() const noexcept
  {
    return __get_data();
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI pointer end() noexcept
  {
    return __get_data() + __count_;
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI const_pointer end() const noexcept
  {
    return __get_data() + __count_;
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI pointer data() noexcept
  {
    return __get_data();
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI const_pointer data() const noexcept
  {
    return __get_data();
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI constexpr size_type size() const noexcept
  {
    return __count_;
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept
  {
    return __count_ * sizeof(_Tp);
  }

  [[nodiscard]] _CCCL_HIDE_FROM_ABI const __resource& memory_resource() const noexcept
  {
    return __mr_;
  }

#  ifndef _CCCL_DOXYGEN_INVOKED // friend functions are currently broken
  _CCCL_TEMPLATE(class _Property)
  _CCCL_REQUIRES((!property_with_value<_Property>) _CCCL_AND _CUDA_VSTD::__is_included_in_v<_Property, _Properties...>)
  _CCCL_HIDE_FROM_ABI friend constexpr void get_property(const uninitialized_buffer&, _Property) noexcept {}
#  endif // _CCCL_DOXYGEN_INVOKED

  _CCCL_HIDE_FROM_ABI uninitialized_buffer __replace_allocation(const size_t __count)
  {
    // Create a new buffer with a reference to the stored memory resource and swap allocation information
    uninitialized_buffer __ret{resource_ref<_Properties...>{__mr_}, __count};
    _CUDA_VSTD::swap(__count_, __ret.__count_);
    _CUDA_VSTD::swap(__buf_, __ret.__buf_);
    return __ret;
  }
};

template <class _Tp>
using uninitialized_device_buffer = uninitialized_buffer<_Tp, device_accessible>;

} // namespace cuda::experimental

#endif // LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE

#include <cuda/std/__cccl/epilogue.h>

#endif //__CUDAX__CONTAINERS_UNINITIALIZED_BUFFER_H