cutlass/device__memory_8h_source.html

 /******************************************************************************
  * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are not permitted.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  ******************************************************************************/

 #pragma once

 #include <memory>

 #include "cutlass/platform/platform.h"
 #include "cutlass/numeric_types.h"
 #include "exceptions.h"

 namespace cutlass {
 namespace device_memory {

 /******************************************************************************
  * Allocation lifetime
  ******************************************************************************/

 template <typename T>
 T* allocate(size_t count = 1) {
   T* ptr = 0;
   size_t bytes = sizeof(T) * count;

   cudaError_t cuda_error = cudaMalloc((void**)&ptr, bytes);
   if (cuda_error != cudaSuccess) {
     throw cuda_exception("Failed to allocate memory", cuda_error);
   }

   return ptr;
 }

 template <typename T>
 void free(T* ptr) {
   if (ptr) {
     cudaError_t cuda_error = (cudaFree(ptr));
     if (cuda_error != cudaSuccess) {
       throw cuda_exception("Failed to free device memory", cuda_error);
     }
   }
 }

 /******************************************************************************
  * Data movement
  ******************************************************************************/

 template <typename T>
 void copy(T* dst, T const* src, size_t count, cudaMemcpyKind kind) {
   size_t bytes = count * sizeof_bits<T>::value / 8;
   if (bytes == 0 && count > 0)
     bytes = 1;
   cudaError_t cuda_error = (cudaMemcpy(dst, src, bytes, kind));
   if (cuda_error != cudaSuccess) {
     throw cuda_exception("cudaMemcpy() failed", cuda_error);
   }
 }

 template <typename T>
 void copy_to_device(T* dst, T const* src, size_t count = 1) {
   copy(dst, src, count, cudaMemcpyHostToDevice);
 }

 template <typename T>
 void copy_to_host(T* dst, T const* src, size_t count = 1) {
   copy(dst, src, count, cudaMemcpyDeviceToHost);
 }

 template <typename T>
 void copy_device_to_device(T* dst, T const* src, size_t count = 1) {
   copy(dst, src, count, cudaMemcpyDeviceToDevice);
 }

 template <typename T>
 void copy_host_to_host(T* dst, T const* src, size_t count = 1) {
   copy(dst, src, count, cudaMemcpyHostToHost);
 }

 template <typename OutputIterator, typename T>
 void insert_to_host(OutputIterator begin, OutputIterator end, T const* device_begin) {
   size_t elements = end - begin;
   copy_to_host(&*begin, device_begin, elements);
 }

 template <typename T, typename InputIterator>
 void insert_to_device(T* device_begin, InputIterator begin, InputIterator end) {
   size_t elements = end - begin;
   copy_to_device(device_begin, &*begin, elements);
 }

 /******************************************************************************
  * "Smart" device memory allocation
  ******************************************************************************/

 template <typename T>
 struct allocation {
   struct deleter {
     void operator()(T* ptr) {
       cudaError_t cuda_error = (cudaFree(ptr));
       if (cuda_error != cudaSuccess) {
         // noexcept
         //                throw cuda_exception("cudaFree() failed", cuda_error);
         return;
       }
     }
   };

   //
   // Data members
   //

   size_t capacity;

   platform::unique_ptr<T, deleter> smart_ptr;

   //
   // Methods
   //

   allocation() : capacity(0) {}

   allocation(size_t _capacity) : smart_ptr(allocate<T>(_capacity)), capacity(_capacity) {}

   allocation(allocation const &p): smart_ptr(allocate<T>(p.capacity)), capacity(p.capacity) {
     copy_device_to_device(smart_ptr.get(), p.get(), capacity);
   }

   ~allocation() { reset(); }

   T* get() const { return smart_ptr.get(); }

   T* release() {
     capacity = 0;
     return smart_ptr.release();
   }

   void reset() {
     capacity = 0;
     smart_ptr.reset();
   }

   void reset(T* _ptr, size_t _capacity) {
     smart_ptr.reset(_ptr);
     capacity = _capacity;
   }

   T* operator->() const { return smart_ptr.get(); }

   deleter& get_deleter() { return smart_ptr.get_deleter(); }

   const deleter& get_deleter() const { return smart_ptr.get_deleter(); }

   allocation & operator=(allocation const &p) {
     if (capacity != p.capacity) {
       smart_ptr.reset(allocate<T>(p.capacity));
       capacity = p.capacity;
     }
     copy_device_to_device(smart_ptr.get(), p.get(), capacity);
     return *this;
   }
 };

 }  // namespace device_memory
 }  // namespace cutlass
cutlass
Definition: aligned_buffer.h:35

cutlass::device_memory::allocation::allocation
allocation(size_t _capacity)
Constructor: allocates capacity elements on the current CUDA device.
Definition: device_memory.h:151

cutlass::device_memory::insert_to_device
void insert_to_device(T *device_begin, InputIterator begin, InputIterator end)
Copies elements to device memory from host-side range.
Definition: device_memory.h:109

cutlass::device_memory::allocation::get_deleter
deleter & get_deleter()
Returns the deleter object which would be used for destruction of the managed object.
Definition: device_memory.h:186

cutlass::device_memory::copy_to_device
void copy_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:81

cutlass::device_memory::copy
void copy(T *dst, T const *src, size_t count, cudaMemcpyKind kind)
Definition: device_memory.h:70

cutlass::device_memory::allocation::deleter::operator()
void operator()(T *ptr)
Definition: device_memory.h:123

cutlass::device_memory::allocation::get
T * get() const
Returns a pointer to the managed object.
Definition: device_memory.h:162

cutlass::device_memory::allocation::reset
void reset()
Deletes the managed object and resets capacity to zero.
Definition: device_memory.h:171

platform.h
C++ features that may be otherwise unimplemented for CUDA device functions.

cutlass::device_memory::allocation::deleter
Delete functor for CUDA device memory.
Definition: device_memory.h:122

cutlass::platform::unique_ptr
std::unique_ptr
Definition: platform.h:712

cutlass::device_memory::allocation::release
T * release()
Releases the ownership of the managed object (without deleting) and resets capacity to zero...
Definition: device_memory.h:165

cutlass::platform::unique_ptr::get
pointer get() const noexcept
Returns a pointer to the managed object or nullptr if no object is owned.
Definition: platform.h:735

cutlass::device_memory::allocate
T * allocate(size_t count=1)
Allocate a buffer of count elements of type T on the current CUDA device.
Definition: device_memory.h:42

cutlass::device_memory::allocation::smart_ptr
platform::unique_ptr< T, deleter > smart_ptr
Smart pointer.
Definition: device_memory.h:141

cutlass::device_memory::allocation::capacity
size_t capacity
Number of elements of T allocated on the current CUDA device.
Definition: device_memory.h:138

cutlass::sizeof_bits
Defines the size of an element in bits.
Definition: numeric_types.h:42

cutlass::device_memory::copy_host_to_host
void copy_host_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:96

exceptions.h
C++ exception semantics for CUDA error codes.

numeric_types.h
Top-level include for all CUTLASS numeric types.

cutlass::platform::unique_ptr::get_deleter
Deleter & get_deleter() noexcept
Returns the deleter object.
Definition: platform.h:757

cutlass::device_memory::allocation::operator->
T * operator->() const
Returns a pointer to the object owned by *this.
Definition: device_memory.h:183

cutlass::platform::unique_ptr::reset
void reset(pointer p=pointer()) noexcept
Replaces the managed object, deleting the old object.
Definition: platform.h:745

cutlass::device_memory::copy_to_host
void copy_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:86

cutlass::device_memory::allocation::~allocation
~allocation()
Destructor.
Definition: device_memory.h:159

cutlass::device_memory::allocation::get_deleter
const deleter & get_deleter() const
Returns the deleter object which would be used for destruction of the managed object (const) ...
Definition: device_memory.h:189

cutlass::cuda_exception
C++ exception wrapper for CUDA cudaError_t.
Definition: exceptions.h:36

cutlass::device_memory::allocation::operator=
allocation & operator=(allocation const &p)
Copies a device-side memory allocation.
Definition: device_memory.h:192

cutlass::device_memory::allocation::allocation
allocation()
Constructor: allocates no memory.
Definition: device_memory.h:148

cutlass::device_memory::allocation::reset
void reset(T *_ptr, size_t _capacity)
Deletes managed object, if owned, and replaces its reference with a given pointer and capacity...
Definition: device_memory.h:177

cutlass::device_memory::free
void free(T *ptr)
Free the buffer pointed to by ptr.
Definition: device_memory.h:56

cutlass::device_memory::insert_to_host
void insert_to_host(OutputIterator begin, OutputIterator end, T const *device_begin)
Copies elements from device memory to host-side range.
Definition: device_memory.h:102

cutlass::device_memory::allocation
Device allocation abstraction that tracks size and capacity.
Definition: device_memory.h:120

cutlass::platform::unique_ptr::release
pointer release() noexcept
Releases ownership of the managed object, if any.
Definition: platform.h:738

cutlass::device_memory::allocation::allocation
allocation(allocation const &p)
Copy constructor.
Definition: device_memory.h:154

cutlass::device_memory::copy_device_to_device
void copy_device_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:91