cub::DeviceFind#
-
struct DeviceFind#
Public Static Functions
-
template<typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename NumItemsT>
static inline cudaError_t FindIf( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- ScanOpT scan_op,
- NumItemsT num_items,
- cudaStream_t stream = nullptr
Finds the first element in the input sequence that satisfies the given predicate.
The search terminates at the first element where the predicate evaluates to true.
The index of the found element is written to
d_out.If no element satisfies the predicate,
num_itemsis written tod_out.The range
[d_out, d_out + 1)shall not overlap[d_in, d_in + num_items)in any way.When
d_temp_storageisnullptr, no work is done and the required allocation size is returned intemp_storage_bytes. See Determining Temporary Storage Requirements for usage guidance.
Added in version 3.3.0.
Snippet#
The code snippet below illustrates the finding of the first element that satisfies the predicate.
struct is_greater_than_t { int threshold; __host__ __device__ bool operator()(int value) const { return value > threshold; } };
constexpr int num_items = 8; thrust::device_vector<int> d_in = {0, 1, 2, 3, 4, 5, 6, 7}; thrust::device_vector<int> d_out(1, thrust::no_init); is_greater_than_t predicate{4}; size_t temp_storage_bytes = 0; cub::DeviceFind::FindIf(nullptr, temp_storage_bytes, d_in.begin(), d_out.begin(), predicate, num_items); thrust::device_vector<char> temp_storage(temp_storage_bytes, thrust::no_init); cub::DeviceFind::FindIf( thrust::raw_pointer_cast(temp_storage.data()), temp_storage_bytes, d_in.begin(), d_out.begin(), predicate, num_items); int expected = 5;
- Template Parameters:
InputIteratorT – [inferred] Random-access input iterator type for reading input items (may be a simple pointer type)
OutputIteratorT – [inferred] Random-access output iterator type for writing the result index (may be a simple pointer type)
ScanOpT – [inferred] Unary predicate functor type having member
bool operator()(const T &a)NumItemsT – [inferred] An integral type representing the number of input elements
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr, the required allocation size is written totemp_storage_bytesand no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storageallocationd_in – [in] Random-access iterator to the input sequence of data items
d_out – [out] Random-access iterator to the output location for the index of the found element
scan_op – [in] Unary predicate functor for determining whether an element satisfies the search condition
num_items – [in] Total number of input items (i.e., the length of
d_in)stream – [in]
[optional] CUDA stream to launch kernels within. Default is stream0.
-
template<typename RangeIteratorT, typename RangeNumItemsT, typename ValuesIteratorT, typename ValuesNumItemsT, typename OutputIteratorT, typename CompareOpT>
static inline cudaError_t LowerBound( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- RangeIteratorT d_range,
- RangeNumItemsT range_num_items,
- ValuesIteratorT d_values,
- ValuesNumItemsT values_num_items,
- OutputIteratorT d_output,
- CompareOpT comp,
- cudaStream_t stream = nullptr
Overview#
For each
valuein[d_values, d_values + values_num_items), performs a binary search in the range[d_range, d_range + range_num_items), usingcompas the comparator to find the iterator to the element of said range which is not ordered beforevalue.The range
[first, last)must be sorted consistently withcomp.
Added in version 3.3.0.
Snippet#
The code snippet below illustrates the lower bound search.
thrust::device_vector<int> d_range = {0, 2, 4, 6, 8}; thrust::device_vector<int> d_values = {1, 3, 5, 7}; thrust::device_vector<int> d_output(4); size_t temp_storage_bytes = 0; cub::DeviceFind::LowerBound( nullptr, temp_storage_bytes, d_range.begin(), static_cast<int>(d_range.size()), d_values.begin(), static_cast<int>(d_values.size()), d_output.begin(), cuda::std::less{}); thrust::device_vector<char> temp_storage(temp_storage_bytes, thrust::no_init); cub::DeviceFind::LowerBound( thrust::raw_pointer_cast(temp_storage.data()), temp_storage_bytes, d_range.begin(), static_cast<int>(d_range.size()), d_values.begin(), static_cast<int>(d_values.size()), d_output.begin(), cuda::std::less{}); thrust::device_vector<int> expected = {1, 2, 3, 4};
- Template Parameters:
RangeIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
ValuesIteratorTusingCompareOpTas the predicate.RangeNumItemsT – is an integral type representing the number of elements in the range to be searched.
ValuesIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
RangeIteratorTusingCompareOpTas the predicate.ValuesNumItemsT – is a model of integral type representing the number of elements in the range of values to be searched for.
OutputIteratorT – is a model of Random Access Iterator, whose value type is assignable from
RangeIteratorT’s difference type.CompareOpT – is a model of Strict Weak Ordering, which forms a Relation with the value types of
RangeIteratorTandValuesIteratorT.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr, the required allocation size is written totemp_storage_bytesand no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storageallocationd_range – [in] Iterator to the beginning of the ordered range to be searched.
range_num_items – [in] Number of elements in the ordered range to be searched.
d_values – [in] Iterator to the beginning of the range of values to be searched for.
values_num_items – [in] Number of elements in the range of values to be searched for.
d_output – [out] Iterator to the beginning of the output range.
comp – [in] Comparison function object which returns true if its first argument is ordered before the second in the Strict Weak Ordering of the range to be searched.
stream – [in] [optional] CUDA stream to launch kernels within. Default is stream0.
-
template<typename RangeIteratorT, typename RangeNumItemsT, typename ValuesIteratorT, typename ValuesNumItemsT, typename OutputIteratorT, typename CompareOpT>
static inline cudaError_t UpperBound( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- RangeIteratorT d_range,
- RangeNumItemsT range_num_items,
- ValuesIteratorT d_values,
- ValuesNumItemsT values_num_items,
- OutputIteratorT d_output,
- CompareOpT comp,
- cudaStream_t stream = nullptr
Overview#
For each
valuein[d_values, d_values + values_num_items), performs a binary search in the range[d_range, d_range + range_num_items), usingcompas the comparator to find the iterator to the element of said range which is ordered aftervalue.The range
[d_range, d_range + range_num_items)must be sorted consistently withcomp.
Added in version 3.3.0.
Snippet#
The code snippet below illustrates the upper bound search.
thrust::device_vector<int> d_range = {0, 2, 4, 6, 8}; thrust::device_vector<int> d_values = {1, 3, 5, 7}; thrust::device_vector<int> d_output(4); size_t temp_storage_bytes = 0; cub::DeviceFind::UpperBound( nullptr, temp_storage_bytes, d_range.begin(), static_cast<int>(d_range.size()), d_values.begin(), static_cast<int>(d_values.size()), d_output.begin(), cuda::std::less{}); thrust::device_vector<char> temp_storage(temp_storage_bytes, thrust::no_init); cub::DeviceFind::UpperBound( thrust::raw_pointer_cast(temp_storage.data()), temp_storage_bytes, d_range.begin(), static_cast<int>(d_range.size()), d_values.begin(), static_cast<int>(d_values.size()), d_output.begin(), cuda::std::less{}); thrust::device_vector<int> expected = {1, 2, 3, 4};
- Template Parameters:
RangeIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
ValuesIteratorTusingCompareOpTas the predicate.RangeNumItemsT – is an integral type representing the number of elements in the range to be searched.
ValuesIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
RangeIteratorTusingCompareOpTas the predicate.ValuesNumItemsT – is a model of integral type representing the number of elements in the range of values to be searched for.
OutputIteratorT – is a model of Random Access Iterator, whose value type is assignable from
RangeIteratorT’s difference type.CompareOpT – is a model of Strict Weak Ordering, which forms a Relation with the value types of
RangeIteratorTandValuesIteratorT.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr, the required allocation size is written totemp_storage_bytesand no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storageallocationd_range – [in] Iterator to the beginning of the ordered range to be searched.
range_num_items – [in] Number of elements in the ordered range to be searched.
d_values – [in] Iterator to the beginning of the range of values to be searched for.
values_num_items – [in] Number of elements in the range of values to be searched for.
d_output – [out] Iterator to the beginning of the output range.
comp – [in] Comparison function object which returns true if its first argument is ordered before the second in the Strict Weak Ordering of the range to be searched.
stream – [in] [optional] CUDA stream to launch kernels within. Default is stream0.
-
template<typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename NumItemsT, typename EnvT = ::cuda::std::execution::env<>>
static inline cudaError_t FindIf( - InputIteratorT d_in,
- OutputIteratorT d_out,
- ScanOpT scan_op,
- NumItemsT num_items,
- EnvT env = {}
Finds the first element in the input sequence that satisfies the given predicate.
Added in version 3.4.0: First appears in CUDA Toolkit 13.4.
This is an environment-based API that allows customization of:
Stream: Query via
cuda::get_streamMemory resource: Query via
cuda::mr::get_memory_resourceThe search terminates at the first element where the predicate evaluates to true.
The index of the found element is written to
d_out.If no element satisfies the predicate,
num_itemsis written tod_out.The range
[d_out, d_out + 1)shall not overlap[d_in, d_in + num_items)in any way.
Snippet#
The code snippet below illustrates the finding of the first element that satisfies the predicate.
struct is_greater_than_t { int threshold; __host__ __device__ bool operator()(int value) const { return value > threshold; } };
struct is_greater_than_t { int threshold; __host__ __device__ bool operator()(int value) const { return value > threshold; } };
- Template Parameters:
InputIteratorT – [inferred] Random-access input iterator type for reading input items (may be a simple pointer type)
OutputIteratorT – [inferred] Random-access output iterator type for writing the result index (may be a simple pointer type)
ScanOpT – [inferred] Unary predicate functor type having member
bool operator()(const T &a)NumItemsT – [inferred] An integral type representing the number of input elements
EnvT – [inferred] Environment type (e.g.,
cuda::std::execution::env<...>)
- Parameters:
d_in – [in] Random-access iterator to the input sequence of data items
d_out – [out] Random-access iterator to the output location for the index of the found element
scan_op – [in] Unary predicate functor for determining whether an element satisfies the search condition
num_items – [in] Total number of input items (i.e., the length of
d_in)env – [in]
[optional] Execution environment. Default is
cuda::std::execution::env{}.
-
template<typename RangeIteratorT, typename RangeNumItemsT, typename ValuesIteratorT, typename ValuesNumItemsT, typename OutputIteratorT, typename CompareOpT, typename EnvT = ::cuda::std::execution::env<>>
static inline cudaError_t LowerBound( - RangeIteratorT d_range,
- RangeNumItemsT range_num_items,
- ValuesIteratorT d_values,
- ValuesNumItemsT values_num_items,
- OutputIteratorT d_output,
- CompareOpT comp,
- EnvT env = {}
For each
valuein[d_values, d_values + values_num_items), performs a binary search in the range[d_range, d_range + range_num_items), usingcompas the comparator to find the iterator to the element of said range which is not ordered beforevalue.Added in version 3.4.0: First appears in CUDA Toolkit 13.4.
This is an environment-based API that allows customization of:
Stream: Query via
cuda::get_streamMemory resource: Query via
cuda::mr::get_memory_resourceThe range
[d_range, d_range + range_num_items)must be sorted consistently withcomp.
Snippet#
The code snippet below illustrates the lower bound search.
thrust::device_vector<int> d_range = {0, 2, 4, 6, 8}; thrust::device_vector<int> d_values = {1, 3, 5, 7}; thrust::device_vector<int> d_output(4); cuda::stream stream{cuda::devices[0]}; cuda::stream_ref stream_ref{stream}; auto error = cub::DeviceFind::LowerBound( d_range.begin(), static_cast<int>(d_range.size()), d_values.begin(), static_cast<int>(d_values.size()), d_output.begin(), cuda::std::less{}, stream_ref); if (error != cudaSuccess) { std::cerr << "cub::DeviceFind::LowerBound failed with status: " << error << '\n'; } thrust::device_vector<int> expected = {1, 2, 3, 4};
- Template Parameters:
RangeIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
ValuesIteratorTusingCompareOpTas the predicate.RangeNumItemsT – is an integral type representing the number of elements in the range to be searched.
ValuesIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
RangeIteratorTusingCompareOpTas the predicate.ValuesNumItemsT – is a model of integral type representing the number of elements in the range of values to be searched for.
OutputIteratorT – is a model of Random Access Iterator, whose value type is assignable from
RangeIteratorT’s difference type.CompareOpT – is a model of Strict Weak Ordering, which forms a Relation with the value types of
RangeIteratorTandValuesIteratorT.EnvT – [inferred] Environment type (e.g.,
cuda::std::execution::env<...>)
- Parameters:
d_range – [in] Iterator to the beginning of the ordered range to be searched.
range_num_items – [in] Number of elements in the ordered range to be searched.
d_values – [in] Iterator to the beginning of the range of values to be searched for.
values_num_items – [in] Number of elements in the range of values to be searched for.
d_output – [out] Iterator to the beginning of the output range.
comp – [in] Comparison function object which returns true if its first argument is ordered before the second in the Strict Weak Ordering of the range to be searched.
env – [in]
[optional] Execution environment. Default is
cuda::std::execution::env{}.
-
template<typename RangeIteratorT, typename RangeNumItemsT, typename ValuesIteratorT, typename ValuesNumItemsT, typename OutputIteratorT, typename CompareOpT, typename EnvT = ::cuda::std::execution::env<>>
static inline cudaError_t UpperBound( - RangeIteratorT d_range,
- RangeNumItemsT range_num_items,
- ValuesIteratorT d_values,
- ValuesNumItemsT values_num_items,
- OutputIteratorT d_output,
- CompareOpT comp,
- EnvT env = {}
For each
valuein[d_values, d_values + values_num_items), performs a binary search in the range[d_range, d_range + range_num_items), usingcompas the comparator to find the iterator to the element of said range which is ordered aftervalue.Added in version 3.4.0: First appears in CUDA Toolkit 13.4.
This is an environment-based API that allows customization of:
Stream: Query via
cuda::get_streamMemory resource: Query via
cuda::mr::get_memory_resourceThe range
[d_range, d_range + range_num_items)must be sorted consistently withcomp.
Snippet#
The code snippet below illustrates the upper bound search.
thrust::device_vector<int> d_range = {0, 2, 4, 6, 8}; thrust::device_vector<int> d_values = {1, 3, 5, 7}; thrust::device_vector<int> d_output(4); cuda::stream stream{cuda::devices[0]}; cuda::stream_ref stream_ref{stream}; auto error = cub::DeviceFind::UpperBound( d_range.begin(), static_cast<int>(d_range.size()), d_values.begin(), static_cast<int>(d_values.size()), d_output.begin(), cuda::std::less{}, stream_ref); if (error != cudaSuccess) { std::cerr << "cub::DeviceFind::UpperBound failed with status: " << error << '\n'; } thrust::device_vector<int> expected = {1, 2, 3, 4};
- Template Parameters:
RangeIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
ValuesIteratorTusingCompareOpTas the predicate.RangeNumItemsT – is an integral type representing the number of elements in the range to be searched.
ValuesIteratorT – is a model of Random Access Iterator, whose value type forms a Relation with the value type of
RangeIteratorTusingCompareOpTas the predicate.ValuesNumItemsT – is a model of integral type representing the number of elements in the range of values to be searched for.
OutputIteratorT – is a model of Random Access Iterator, whose value type is assignable from
RangeIteratorT’s difference type.CompareOpT – is a model of Strict Weak Ordering, which forms a Relation with the value types of
RangeIteratorTandValuesIteratorT.EnvT – [inferred] Environment type (e.g.,
cuda::std::execution::env<...>)
- Parameters:
d_range – [in] Iterator to the beginning of the ordered range to be searched.
range_num_items – [in] Number of elements in the ordered range to be searched.
d_values – [in] Iterator to the beginning of the range of values to be searched for.
values_num_items – [in] Number of elements in the range of values to be searched for.
d_output – [out] Iterator to the beginning of the output range.
comp – [in] Comparison function object which returns true if its first argument is ordered before the second in the Strict Weak Ordering of the range to be searched.
env – [in]
[optional] Execution environment. Default is
cuda::std::execution::env{}.
-
template<typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename NumItemsT>