cuda::mul_sat_overflow#

Defined in <cuda/numeric> header.

namespace cuda {

template <class T>
struct overflow_result;

template <class T>
[[nodiscard]] __host__ __device__ constexpr
overflow_result<T> mul_sat_overflow(T lhs, T rhs) noexcept; // (1)

template <class T>
[[nodiscard]] __host__ __device__ constexpr
bool mul_sat_overflow(T& result, T lhs, T rhs) noexcept; // (2)

} // namespace cuda

The function cuda::mul_sat_overflow performs saturating multiplication of two values lhs and rhs with overflow checking.

Parameters

  • result: The result of the saturating multiplication (2).

  • lhs: The left-hand side operand (1, 2).

  • rhs: The right-hand side operand (1, 2).

Return value

  1. Returns an overflow_result object containing the result of the saturating multiplication and a boolean indicating whether an overflow or underflow occurred.

  2. Returns true if an overflow or underflow occurred, false otherwise.

Constraints

Performance considerations

  • Functionality is implemented by correcting the cuda::mul_overflow result in case of overflow/underflow.

  • Unsigned computations are generally faster than signed computations.

Example#

#include <cuda/numeric>
#include <cuda/std/cassert>
#include <cuda/std/limits>

__global__ void kernel()
{
    constexpr auto int_max = cuda::std::numeric_limits<int>::max();
    constexpr auto int_min = cuda::std::numeric_limits<int>::min();

    const auto result = cuda::mul_sat_overflow(int_max, int_min); // saturated
    assert(result.value == int_min);
    assert(result.overflow);

    int value;
    if (cuda::mul_sat_overflow(value, 4, 8))
    {
        assert(false); // shouldn't be reached
    }
    assert(value == 32);
}

int main()
{
    kernel<<<1, 1>>>();
    cudaDeviceSynchronize();
}

See it on Godbolt 🔗