CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Host tensor.
#include <host_tensor.h>
Public Types | |
using | Element = Element_ |
Data type of individual access. More... | |
using | Layout = Layout_ |
Mapping function from logical coordinate to linear memory. More... | |
using | Index = typename Layout::Index |
Index type. More... | |
using | LongIndex = typename Layout::LongIndex |
Long index used for pointer offsets. More... | |
using | TensorCoord = typename Layout::TensorCoord |
Coordinate in logical tensor space. More... | |
using | Stride = typename Layout::Stride |
Layout's stride vector. More... | |
using | TensorRef = TensorRef< Element, Layout > |
Tensor reference to device memory. More... | |
using | ConstTensorRef = typename TensorRef::ConstTensorRef |
Tensor reference to constant device memory. More... | |
using | TensorView = TensorView< Element, Layout > |
Tensor reference to device memory. More... | |
using | ConstTensorView = typename TensorView::ConstTensorView |
Tensor reference to constant device memory. More... | |
using | Reference = typename TensorRef::Reference |
Reference to element in tensor. More... | |
using | ConstReference = typename ConstTensorRef::Reference |
Constant reference to element in tensor. More... | |
Public Member Functions | |
HostTensor () | |
Default constructor. More... | |
HostTensor (TensorCoord const &extent, bool device_backed=true) | |
Constructs a tensor given an extent. Assumes a packed layout. More... | |
HostTensor (TensorCoord const &extent, Layout const &layout, bool device_backed=true) | |
Constructs a tensor given an extent and layout. More... | |
~HostTensor () | |
void | reset () |
Clears the HostTensor allocation to size/capacity = 0. More... | |
void | reserve (size_t count, bool device_backed_=true) |
Resizes internal memory allocations without affecting layout or extent. More... | |
void | reset (TensorCoord const &extent, Layout const &layout, bool device_backed_=true) |
void | reset (TensorCoord const &extent, bool device_backed_=true) |
void | resize (TensorCoord const &extent, Layout const &layout, bool device_backed_=true) |
void | resize (TensorCoord const &extent, bool device_backed_=true) |
size_t | size () const |
Returns the number of elements stored in the host tensor. More... | |
LongIndex | capacity () const |
Returns the logical capacity based on extent and layout. May differ from size(). More... | |
Element * | host_data () |
Gets pointer to host data. More... | |
Element * | host_data_ptr_offset (LongIndex ptr_element_offset) |
Gets pointer to host data with a pointer offset. More... | |
Reference | host_data (LongIndex idx) |
Gets a reference to an element in host memory. More... | |
Element const * | host_data () const |
Gets pointer to host data. More... | |
ConstReference | host_data (LongIndex idx) const |
Gets a constant reference to an element in host memory. More... | |
Element * | device_data () |
Gets pointer to device data. More... | |
Element * | device_data_ptr_offset (LongIndex ptr_element_offset) |
Gets pointer to device data with a pointer offset. More... | |
Element const * | device_data () const |
Gets pointer to device data. More... | |
TensorRef | host_ref (LongIndex ptr_element_offset=0) |
Accesses the tensor reference pointing to data. More... | |
ConstTensorRef | host_ref (LongIndex ptr_element_offset=0) const |
Accesses the tensor reference pointing to data. More... | |
TensorRef | device_ref (LongIndex ptr_element_offset=0) |
Accesses the tensor reference pointing to data. More... | |
ConstTensorRef | device_ref (LongIndex ptr_element_offset=0) const |
Accesses the tensor reference pointing to data. More... | |
TensorView | host_view (LongIndex ptr_element_offset=0) |
Accesses the tensor reference pointing to data. More... | |
ConstTensorView | host_view (LongIndex ptr_element_offset=0) const |
Accesses the tensor reference pointing to data. More... | |
TensorView | device_view (LongIndex ptr_element_offset=0) |
Accesses the tensor reference pointing to data. More... | |
ConstTensorView | device_view (LongIndex ptr_element_offset=0) const |
Accesses the tensor reference pointing to data. More... | |
bool | device_backed () const |
Returns true if device memory is allocated. More... | |
Layout & | layout () |
Returns the layout object. More... | |
Layout | layout () const |
Returns the layout object. More... | |
Stride | stride () const |
Returns the layout object's stride vector. More... | |
Stride & | stride () |
Returns the layout object's stride vector. More... | |
Index | stride (int dim) const |
Returns the layout object's stride in a given physical dimension. More... | |
Index & | stride (int dim) |
Returns the layout object's stride in a given physical dimension. More... | |
LongIndex | offset (TensorCoord const &coord) const |
Computes the offset of an index from the origin of the tensor. More... | |
Reference | at (TensorCoord const &coord) |
Returns a reference to the element at the logical Coord in host memory. More... | |
ConstReference | at (TensorCoord const &coord) const |
Returns a const reference to the element at the logical Coord in host memory. More... | |
TensorCoord | extent () const |
Returns the extent of the tensor. More... | |
TensorCoord & | extent () |
Returns the extent of the tensor. More... | |
void | sync_host () |
Copies data from device to host. More... | |
void | sync_device () |
Copies data from host to device. More... | |
void | copy_in_device_to_host (Element const *ptr_device, LongIndex count=-1) |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_in_device_to_device (Element const *ptr_device, LongIndex count=-1) |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_in_host_to_device (Element const *ptr_host, LongIndex count=-1) |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_in_host_to_host (Element const *ptr_host, LongIndex count=-1) |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_out_device_to_host (Element *ptr_host, LongIndex count=-1) const |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_out_device_to_device (Element *ptr_device, LongIndex count=-1) const |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_out_host_to_device (Element *ptr_device, LongIndex count=-1) const |
Copy data from a caller-supplied device pointer into host memory. More... | |
void | copy_out_host_to_host (Element *ptr_host, LongIndex count=-1) const |
Copy data from a caller-supplied device pointer into host memory. More... | |
Static Public Attributes | |
static int const | kRank = Layout::kRank |
Logical rank of tensor index space. More... | |
static int const | kElementsPerStoredItem = (sizeof_bits<Element>::value < 8 ? sizeof(Element) * 8 / sizeof_bits<Element>::value : 1) |
Used to handle packing of subbyte elements. More... | |
using cutlass::HostTensor< Element_, Layout_ >::ConstReference = typename ConstTensorRef::Reference |
using cutlass::HostTensor< Element_, Layout_ >::ConstTensorRef = typename TensorRef::ConstTensorRef |
using cutlass::HostTensor< Element_, Layout_ >::ConstTensorView = typename TensorView::ConstTensorView |
using cutlass::HostTensor< Element_, Layout_ >::Element = Element_ |
using cutlass::HostTensor< Element_, Layout_ >::Index = typename Layout::Index |
using cutlass::HostTensor< Element_, Layout_ >::Layout = Layout_ |
using cutlass::HostTensor< Element_, Layout_ >::LongIndex = typename Layout::LongIndex |
using cutlass::HostTensor< Element_, Layout_ >::Reference = typename TensorRef::Reference |
using cutlass::HostTensor< Element_, Layout_ >::Stride = typename Layout::Stride |
using cutlass::HostTensor< Element_, Layout_ >::TensorCoord = typename Layout::TensorCoord |
using cutlass::HostTensor< Element_, Layout_ >::TensorRef = TensorRef<Element, Layout> |
using cutlass::HostTensor< Element_, Layout_ >::TensorView = TensorView<Element, Layout> |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_device | source device memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_device | source device memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_host | source host memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_host | source host memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_device | source device memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_host | source device memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_device | source host memory |
|
inline |
< number of elements to transfer; if negative, entire tensor is overwritten.
ptr_host | source host memory |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
< if true, device memory is also allocated
count | size of tensor in elements |
|
inline |
|
inline |
Updates the extent and layout of the HostTensor. Allocates memory according to the new extent and layout.
< if true, device memory is also allocated.
extent | extent of logical tensor |
layout | layout object of tensor |
|
inline |
Updates the extent and layout of the HostTensor. Allocates memory according to the new extent and layout. Assumes a packed tensor configuration.
< if true, device memory is also allocated.
extent | extent of logical tensor |
|
inline |
Changes the size of the logical tensor. Only allocates memory if new capacity exceeds reserved capacity. To force allocation, call reset().
< if true, device memory is also allocated.
extent | extent of logical tensor |
layout | layout object of tensor |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
|
static |