include/cuda/experimental/__device/all_devices.cuh
File members: include/cuda/experimental/__device/all_devices.cuh
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//
#ifndef _CUDAX__DEVICE_ALL_DEVICES
#define _CUDAX__DEVICE_ALL_DEVICES
#include <cuda/__cccl_config>
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header
#include <cuda/std/__cuda/api_wrapper.h>
#include <cuda/std/cassert>
#include <cuda/std/detail/libcxx/include/stdexcept>
#include <cuda/experimental/__device/device.cuh>
#include <vector>
namespace cuda::experimental
{
namespace detail
{
class all_devices
{
public:
using size_type = ::std::vector<device>::size_type;
using iterator = ::std::vector<device>::const_iterator;
using const_iterator = ::std::vector<device>::const_iterator;
all_devices() = default;
_CCCL_NODISCARD const device& operator[](size_type __i) const noexcept;
_CCCL_NODISCARD const device& at(size_type __i) const;
_CCCL_NODISCARD size_type size() const;
_CCCL_NODISCARD iterator begin() const noexcept;
_CCCL_NODISCARD iterator end() const noexcept;
private:
struct __initializer_iterator;
static const ::std::vector<device>& __devices();
};
struct all_devices::__initializer_iterator
{
using value_type = __emplace_device;
using reference = __emplace_device;
using iterator_category = ::std::forward_iterator_tag;
using difference_type = int;
using pointer = __emplace_device;
int __id_;
__emplace_device operator*() const noexcept
{
return __emplace_device{__id_};
}
__emplace_device operator->() const noexcept
{
return __emplace_device{__id_};
}
__initializer_iterator& operator++() noexcept
{
++__id_;
return *this;
}
__initializer_iterator operator++(int) noexcept
{
auto __tmp = *this;
++__id_;
return __tmp;
}
bool operator==(const __initializer_iterator& __other) const noexcept
{
return __id_ == __other.__id_;
}
bool operator!=(const __initializer_iterator& __other) const noexcept
{
return __id_ != __other.__id_;
}
};
_CCCL_NODISCARD inline const device& all_devices::operator[](size_type __id_) const noexcept
{
_CCCL_ASSERT(__id_ < size(), "cuda::experimental::all_devices::subscript device index out of range");
return __devices()[__id_];
}
_CCCL_NODISCARD inline const device& all_devices::at(size_type __id_) const
{
if (__id_ >= size())
{
_CUDA_VSTD::__throw_out_of_range("device index out of range");
}
return __devices()[__id_];
}
_CCCL_NODISCARD inline all_devices::size_type all_devices::size() const
{
return __devices().size();
}
_CCCL_NODISCARD inline all_devices::iterator all_devices::begin() const noexcept
{
return __devices().begin();
}
_CCCL_NODISCARD inline all_devices::iterator all_devices::end() const noexcept
{
return __devices().end();
}
inline const ::std::vector<device>& all_devices::__devices()
{
static const ::std::vector<device> __devices = [] {
int __count = 0;
_CCCL_TRY_CUDA_API(::cudaGetDeviceCount, "failed to get the count of CUDA devices", &__count);
return ::std::vector<device>{__initializer_iterator{0}, __initializer_iterator{__count}};
}();
return __devices;
}
} // namespace detail
inline constexpr detail::all_devices devices{};
inline const arch_traits_t& device_ref::arch_traits() const
{
return devices[get()].arch_traits();
}
_CCCL_NODISCARD inline ::std::vector<device_ref> device_ref::get_peers() const
{
::std::vector<device_ref> __result;
__result.reserve(devices.size());
for (const device& __other_dev : devices)
{
// Exclude the device this API is called on. The main use case for this API
// is enable/disable peer access. While enable peer access can be called on
// device on which memory resides, disable peer access will error-out.
// Usage of the peer access control is smoother when *this is excluded,
// while it can be easily added with .push_back() on the vector if a full
// group of peers is needed (for cases other than peer access control)
if (__other_dev != *this)
{
// While in almost all practical applications peer access should be symmetrical,
// it is possible to build a system with one directional peer access, check
// both ways here just to be safe
if (has_peer_access_to(__other_dev) && __other_dev.has_peer_access_to(*this))
{
__result.push_back(__other_dev);
}
}
}
return __result;
}
} // namespace cuda::experimental
#endif // _CUDAX__DEVICE_ALL_DEVICES