CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Classes | |
struct | allocation |
Device allocation abstraction that tracks size and capacity. More... | |
Functions | |
template<typename T > | |
T * | allocate (size_t count=1) |
Allocate a buffer of count elements of type T on the current CUDA device. More... | |
template<typename T > | |
void | free (T *ptr) |
Free the buffer pointed to by ptr . More... | |
template<typename T > | |
void | copy (T *dst, T const *src, size_t count, cudaMemcpyKind kind) |
template<typename T > | |
void | copy_to_device (T *dst, T const *src, size_t count=1) |
template<typename T > | |
void | copy_to_host (T *dst, T const *src, size_t count=1) |
template<typename T > | |
void | copy_device_to_device (T *dst, T const *src, size_t count=1) |
template<typename T > | |
void | copy_host_to_host (T *dst, T const *src, size_t count=1) |
template<typename OutputIterator , typename T > | |
void | insert_to_host (OutputIterator begin, OutputIterator end, T const *device_begin) |
Copies elements from device memory to host-side range. More... | |
template<typename T , typename InputIterator > | |
void | insert_to_device (T *device_begin, InputIterator begin, InputIterator end) |
Copies elements to device memory from host-side range. More... | |
T* cutlass::device_memory::allocate | ( | size_t | count = 1 | ) |
void cutlass::device_memory::copy | ( | T * | dst, |
T const * | src, | ||
size_t | count, | ||
cudaMemcpyKind | kind | ||
) |
void cutlass::device_memory::copy_device_to_device | ( | T * | dst, |
T const * | src, | ||
size_t | count = 1 |
||
) |
void cutlass::device_memory::copy_host_to_host | ( | T * | dst, |
T const * | src, | ||
size_t | count = 1 |
||
) |
void cutlass::device_memory::copy_to_device | ( | T * | dst, |
T const * | src, | ||
size_t | count = 1 |
||
) |
void cutlass::device_memory::copy_to_host | ( | T * | dst, |
T const * | src, | ||
size_t | count = 1 |
||
) |
void cutlass::device_memory::free | ( | T * | ptr | ) |
void cutlass::device_memory::insert_to_device | ( | T * | device_begin, |
InputIterator | begin, | ||
InputIterator | end | ||
) |
void cutlass::device_memory::insert_to_host | ( | OutputIterator | begin, |
OutputIterator | end, | ||
T const * | device_begin | ||
) |