cub::ThreadLoad

Defined in /home/runner/work/cccl/cccl/cub/cub/thread/thread_load.cuh

template<CacheLoadModifier MODIFIER, typename InputIteratorT>
cub::detail::value_t<InputIteratorT> cub::ThreadLoad(InputIteratorT itr)

Thread utility for reading memory using cub::CacheLoadModifier cache modifiers.

Can be used to load any data type.

Example

#include <cub/cub.cuh>   // or equivalently <cub/thread/thread_load.cuh>

// 32-bit load using cache-global modifier:
int *d_in;
int val = cub::ThreadLoad<cub::LOAD_CA>(d_in + threadIdx.x);

// 16-bit load using default modifier
short *d_in;
short val = cub::ThreadLoad<cub::LOAD_DEFAULT>(d_in + threadIdx.x);

// 256-bit load using cache-volatile modifier
double4 *d_in;
double4 val = cub::ThreadLoad<cub::LOAD_CV>(d_in + threadIdx.x);

// 96-bit load using cache-streaming modifier
struct TestFoo { bool a; short b; };
TestFoo *d_struct;
TestFoo val = cub::ThreadLoad<cub::LOAD_CS>(d_in + threadIdx.x);

Template Parameters
  • MODIFIER[inferred] CacheLoadModifier enumeration

  • InputIteratorT[inferred] Input iterator type (may be a simple pointer type)