34 namespace device_memory {
44 size_t bytes =
sizeof(T) * count;
46 cudaError_t cuda_error = cudaMalloc((
void**)&ptr, bytes);
47 if (cuda_error != cudaSuccess) {
58 cudaError_t cuda_error = (cudaFree(ptr));
59 if (cuda_error != cudaSuccess) {
70 void copy(T* dst, T
const* src,
size_t count, cudaMemcpyKind kind) {
72 if (bytes == 0 && count > 0)
74 cudaError_t cuda_error = (cudaMemcpy(dst, src, bytes, kind));
75 if (cuda_error != cudaSuccess) {
82 copy(dst, src, count, cudaMemcpyHostToDevice);
87 copy(dst, src, count, cudaMemcpyDeviceToHost);
92 copy(dst, src, count, cudaMemcpyDeviceToDevice);
97 copy(dst, src, count, cudaMemcpyHostToHost);
101 template <
typename OutputIterator,
typename T>
102 void insert_to_host(OutputIterator begin, OutputIterator end, T
const* device_begin) {
103 size_t elements = end - begin;
108 template <
typename T,
typename InputIterator>
110 size_t elements = end - begin;
119 template <
typename T>
124 cudaError_t cuda_error = (cudaFree(ptr));
125 if (cuda_error != cudaSuccess) {
162 T*
get()
const {
return smart_ptr.
get(); }
177 void reset(T* _ptr,
size_t _capacity) {
178 smart_ptr.
reset(_ptr);
179 capacity = _capacity;
Definition: aligned_buffer.h:35
allocation(size_t _capacity)
Constructor: allocates capacity elements on the current CUDA device.
Definition: device_memory.h:151
void insert_to_device(T *device_begin, InputIterator begin, InputIterator end)
Copies elements to device memory from host-side range.
Definition: device_memory.h:109
deleter & get_deleter()
Returns the deleter object which would be used for destruction of the managed object.
Definition: device_memory.h:186
void copy_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:81
void copy(T *dst, T const *src, size_t count, cudaMemcpyKind kind)
Definition: device_memory.h:70
void operator()(T *ptr)
Definition: device_memory.h:123
T * get() const
Returns a pointer to the managed object.
Definition: device_memory.h:162
void reset()
Deletes the managed object and resets capacity to zero.
Definition: device_memory.h:171
Delete functor for CUDA device memory.
Definition: device_memory.h:122
T * release()
Releases the ownership of the managed object (without deleting) and resets capacity to zero...
Definition: device_memory.h:165
T * allocate(size_t count=1)
Allocate a buffer of count elements of type T on the current CUDA device.
Definition: device_memory.h:42
platform::unique_ptr< T, deleter > smart_ptr
Smart pointer.
Definition: device_memory.h:141
size_t capacity
Number of elements of T allocated on the current CUDA device.
Definition: device_memory.h:138
Defines the size of an element in bits.
Definition: numeric_types.h:42
void copy_host_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:96
C++ exception semantics for CUDA error codes.
Top-level include for all CUTLASS numeric types.
T * operator->() const
Returns a pointer to the object owned by *this.
Definition: device_memory.h:183
void copy_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:86
~allocation()
Destructor.
Definition: device_memory.h:159
const deleter & get_deleter() const
Returns the deleter object which would be used for destruction of the managed object (const) ...
Definition: device_memory.h:189
C++ exception wrapper for CUDA cudaError_t.
Definition: exceptions.h:36
allocation & operator=(allocation const &p)
Copies a device-side memory allocation.
Definition: device_memory.h:192
allocation()
Constructor: allocates no memory.
Definition: device_memory.h:148
void reset(T *_ptr, size_t _capacity)
Deletes managed object, if owned, and replaces its reference with a given pointer and capacity...
Definition: device_memory.h:177
void free(T *ptr)
Free the buffer pointed to by ptr.
Definition: device_memory.h:56
void insert_to_host(OutputIterator begin, OutputIterator end, T const *device_begin)
Copies elements from device memory to host-side range.
Definition: device_memory.h:102
Device allocation abstraction that tracks size and capacity.
Definition: device_memory.h:120
allocation(allocation const &p)
Copy constructor.
Definition: device_memory.h:154
void copy_device_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:91