reducer.cuh

File members: include/cuda/experimental/__stf/internal/reducer.cuh
//===----------------------------------------------------------------------===//
//
// Part of CUDASTF in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#pragma once

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/std/__cccl/execution_space.h>

#include <algorithm> // for std::min and std::max
#include <limits>

namespace cuda::experimental::stf::reducer
{

template <typename T>
class sum
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = static_cast<T>(0);
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst += src;
  }
};

template <typename T>
class maxval
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = ::std::numeric_limits<T>::lowest();
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst = ::std::max(dst, src);
  }
};

template <typename T>
class minval
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = ::std::numeric_limits<T>::max();
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst = ::std::min(dst, src);
  }
};

template <typename T>
class product
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = static_cast<T>(1);
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst *= src;
  }
};

template <typename T>
class bitwise_and
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = ~static_cast<T>(0);
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst &= src;
  }
};

template <typename T>
class bitwise_or
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = static_cast<T>(0);
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst |= src;
  }
};

template <typename T>
class bitwise_xor
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = static_cast<T>(0);
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst ^= src;
  }
};

template <typename T>
class logical_and
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = true; // Logical AND identity
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst = dst && src;
  }
};

template <typename T>
class logical_or
{
public:
  static __host__ __device__ void init_op(T& dst)
  {
    dst = false; // Logical OR identity
  }

  static __host__ __device__ void apply_op(T& dst, const T& src)
  {
    dst = dst || src;
  }
};

} // end namespace cuda::experimental::stf::reducer