CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
host_tensor.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 #pragma once
26 
39 #include <vector>
40 
41 #include "cutlass/cutlass.h"
42 #include "cutlass/matrix_traits.h"
43 #include "cutlass/tensor_ref.h"
44 #include "cutlass/tensor_view.h"
45 
46 #include "device_memory.h"
47 
48 namespace cutlass {
49 
51 
53 template <
55  typename Element_,
57  typename Layout_
58 >
59 class HostTensor {
60 public:
61 
63  using Element = Element_;
64 
66  using Layout = Layout_;
67 
69  static int const kRank = Layout::kRank;
70 
72  using Index = typename Layout::Index;
73 
75  using LongIndex = typename Layout::LongIndex;
76 
78  using TensorCoord = typename Layout::TensorCoord;
79 
81  using Stride = typename Layout::Stride;
82 
85 
88 
91 
94 
96  using Reference = typename TensorRef::Reference;
97 
99  using ConstReference = typename ConstTensorRef::Reference;
100 
103 
104  private:
105 
106  //
107  // Data members
108  //
109 
111  TensorCoord extent_;
112 
114  Layout layout_;
115 
117  std::vector<Element> host_;
118 
121 
122  public:
123  //
124  // Device and Host Methods
125  //
126 
129 
132  TensorCoord const &extent,
133  bool device_backed = true
134  ) {
135 
136  this->reset(extent, Layout::packed(extent), device_backed);
137  }
138 
141  TensorCoord const &extent,
142  Layout const &layout,
143  bool device_backed = true
144  ) {
145 
146  this->reset(extent, layout, device_backed);
147  }
148 
150 
152  void reset() {
153  extent_ = TensorCoord();
154  layout_ = Layout::packed(extent_);
155 
156  host_.clear();
157  device_.reset();
158  }
159 
161  void reserve(
162  size_t count,
163  bool device_backed_ = true) {
164 
165  device_.reset();
166  host_.clear();
167 
168  count /= kElementsPerStoredItem;
169 
170  host_.resize(count);
171 
172  // Allocate memory
173  Element* device_memory = nullptr;
174  if (device_backed_) {
175  device_memory = device_memory::allocate<Element>(count);
176  }
177  device_.reset(device_memory, device_backed_ ? count : 0);
178  }
179 
182  void reset(
183  TensorCoord const &extent,
184  Layout const &layout,
185  bool device_backed_ = true) {
186 
187  extent_ = extent;
188  layout_ = layout;
189 
190  reserve(size_t(layout_.capacity(extent_)), device_backed_);
191  }
192 
195  void reset(
196  TensorCoord const &extent,
197  bool device_backed_ = true) {
198 
199  reset(extent, Layout::packed(extent), device_backed_);
200  }
201 
204  void resize(
205  TensorCoord const &extent,
206  Layout const &layout,
207  bool device_backed_ = true) {
208 
209  extent_ = extent;
210  layout_ = layout;
211 
212  LongIndex new_size = size_t(layout_.capacity(extent_));
213 
214  if (static_cast<decltype(host_.size())>(new_size) > host_.size()) {
215  reserve(new_size);
216  }
217  }
218 
221  void resize(
222  TensorCoord const &extent,
223  bool device_backed_ = true) {
224 
225  resize(extent, Layout::packed(extent), device_backed_);
226  }
227 
229  size_t size() const {
230  return host_.size() * kElementsPerStoredItem;
231  }
232 
234  LongIndex capacity() const {
235  return layout_.capacity(extent_) * kElementsPerStoredItem;
236  }
237 
239  Element * host_data() { return host_.data(); }
240 
242  Element * host_data_ptr_offset(LongIndex ptr_element_offset) { return host_.data() + ptr_element_offset; }
243 
247  }
248 
250  Element const * host_data() const { return host_.data(); }
251 
255  }
256 
258  Element * device_data() { return device_.get(); }
259 
261  Element * device_data_ptr_offset(LongIndex ptr_element_offset) { return device_.get() + ptr_element_offset; }
262 
264  Element const * device_data() const { return device_.get(); }
265 
267  TensorRef host_ref(LongIndex ptr_element_offset=0) { return TensorRef(host_data_ptr_offset(ptr_element_offset), layout_); }
268 
270  ConstTensorRef host_ref(LongIndex ptr_element_offset=0) const { return ConstTensorRef(host_data_ptr_offset(ptr_element_offset), layout_); }
271 
273  TensorRef device_ref(LongIndex ptr_element_offset=0) {
274  return TensorRef(device_data_ptr_offset(ptr_element_offset), layout_);
275  }
276 
278  ConstTensorRef device_ref(LongIndex ptr_element_offset=0) const {
279  return TensorRef(device_data_ptr_offset(ptr_element_offset), layout_);
280  }
281 
283  TensorView host_view(LongIndex ptr_element_offset=0) {
284  return TensorView(host_data_ptr_offset(ptr_element_offset), layout_, extent_);
285  }
286 
288  ConstTensorView host_view(LongIndex ptr_element_offset=0) const {
289  return ConstTensorView(host_data_ptr_offset(ptr_element_offset), layout_, extent_);
290  }
291 
293  TensorView device_view(LongIndex ptr_element_offset=0) {
294  return TensorView(device_data_ptr_offset(ptr_element_offset), layout_, extent_);
295  }
296 
298  ConstTensorView device_view(LongIndex ptr_element_offset=0) const {
299  return ConstTensorView(device_data_ptr_offset(ptr_element_offset), layout_, extent_);
300  }
301 
303  bool device_backed() const {
304  return (device_.get() == nullptr) ? false : true;
305  }
306 
307 
310  return layout_;
311  }
312 
314  Layout layout() const {
315  return layout_;
316  }
317 
319  Stride stride() const {
320  return layout_.stride();
321  }
322 
325  return layout_.stride();
326  }
327 
329  Index stride(int dim) const {
330  return layout_.stride().at(dim);
331  }
332 
334  Index & stride(int dim) {
335  return layout_.stride().at(dim);
336  }
337 
339  LongIndex offset(TensorCoord const& coord) const {
340  return layout_(coord);
341  }
342 
344  Reference at(TensorCoord const& coord) {
345  return host_data(offset(coord));
346  }
347 
349  ConstReference at(TensorCoord const& coord) const {
350  return host_data(offset(coord));
351  }
352 
354  TensorCoord extent() const {
355  return extent_;
356  }
357 
360  return extent_;
361  }
362 
364  void sync_host() {
365  if (device_backed()) {
367  host_data(), device_data(), size());
368  }
369  }
370 
372  void sync_device() {
373  if (device_backed()) {
375  device_data(), host_data(), size());
376  }
377  }
378 
381  Element const* ptr_device,
382  LongIndex count = -1) {
383 
384  if (count < 0) {
385  count = capacity();
386  }
387  else {
388  count = __NV_STD_MIN(capacity(), count);
389  }
391  host_data(), ptr_device, count);
392  }
393 
396  Element const* ptr_device,
397  LongIndex count = -1) {
398 
399  if (count < 0) {
400  count = capacity();
401  }
402  else {
403  count = __NV_STD_MIN(capacity(), count);
404  }
406  device_data(), ptr_device, count);
407  }
408 
411  Element const* ptr_host,
412  LongIndex count = -1) {
413 
414  if (count < 0) {
415  count = capacity();
416  }
417  else {
418  count = __NV_STD_MIN(capacity(), count);
419  }
421  device_data(), ptr_host, count);
422  }
423 
426  Element const* ptr_host,
427  LongIndex count = -1) {
428 
429  if (count < 0) {
430  count = capacity();
431  }
432  else {
433  count = __NV_STD_MIN(capacity(), count);
434  }
436  host_data(), ptr_host, count);
437  }
438 
441  Element * ptr_host,
442  LongIndex count = -1) const {
443 
444  if (count < 0) {
445  count = capacity();
446  }
447  else {
448  count = __NV_STD_MIN(capacity(), count);
449  }
451  ptr_host, device_data(), count);
452  }
453 
456  Element * ptr_device,
457  LongIndex count = -1) const {
458 
459  if (count < 0) {
460  count = capacity();
461  }
462  else {
463  count = __NV_STD_MIN(capacity(), count);
464  }
466  ptr_device, device_data(), count);
467  }
468 
471  Element * ptr_device,
472  LongIndex count = -1) const {
473 
474  if (count < 0) {
475  count = capacity();
476  }
477  else {
478  count = __NV_STD_MIN(capacity(), count);
479  }
481  ptr_device, host_data(), count);
482  }
483 
486  Element * ptr_host,
487  LongIndex count = -1) const {
488 
489  if (count < 0) {
490  count = capacity();
491  }
492  else {
493  count = __NV_STD_MIN(capacity(), count);
494  }
496  ptr_host, host_data(), count);
497  }
498 };
499 
501 
502 } // namespace cutlass
Stride & stride()
Returns the layout object&#39;s stride vector.
Definition: host_tensor.h:324
Definition: aligned_buffer.h:35
HostTensor(TensorCoord const &extent, Layout const &layout, bool device_backed=true)
Constructs a tensor given an extent and layout.
Definition: host_tensor.h:140
Defines a structure containing strides, bounds, and a pointer to tensor data.
typename Layout::Stride Stride
Layout&#39;s stride vector.
Definition: host_tensor.h:81
void copy_in_host_to_device(Element const *ptr_host, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:410
Index & stride(int dim)
Returns the layout object&#39;s stride in a given physical dimension.
Definition: host_tensor.h:334
void copy_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:81
ConstTensorView device_view(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:298
typename Layout::LongIndex LongIndex
Long index used for pointer offsets.
Definition: host_tensor.h:75
Layout_ Layout
Mapping function from logical coordinate to linear memory.
Definition: host_tensor.h:66
ConstTensorView host_view(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:288
LongIndex capacity() const
Returns the logical capacity based on extent and layout. May differ from size().
Definition: host_tensor.h:234
void copy_out_host_to_host(Element *ptr_host, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:485
void sync_host()
Copies data from device to host.
Definition: host_tensor.h:364
Layout layout() const
Returns the layout object.
Definition: host_tensor.h:314
void copy_in_host_to_host(Element const *ptr_host, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:425
Element * device_data()
Gets pointer to device data.
Definition: host_tensor.h:258
T * get() const
Returns a pointer to the managed object.
Definition: device_memory.h:162
void reset()
Deletes the managed object and resets capacity to zero.
Definition: device_memory.h:171
Element const * device_data() const
Gets pointer to device data.
Definition: host_tensor.h:264
static int const kElementsPerStoredItem
Used to handle packing of subbyte elements.
Definition: host_tensor.h:102
Defines a structure containing strides and a pointer to tensor data.
C++ interface to CUDA device memory management functions.
~HostTensor()
Definition: host_tensor.h:149
TensorRef< typename platform::remove_const< Element >::type const, Layout > ConstTensorRef
TensorRef to constant data.
Definition: tensor_ref.h:179
void copy_in_device_to_device(Element const *ptr_device, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:395
typename ConstTensorRef::Reference ConstReference
Constant reference to element in tensor.
Definition: host_tensor.h:99
Index stride(int dim) const
Returns the layout object&#39;s stride in a given physical dimension.
Definition: host_tensor.h:329
void sync_device()
Copies data from host to device.
Definition: host_tensor.h:372
TensorView< typename platform::remove_const< Element >::type const, Layout > ConstTensorView
TensorView pointing to constant memory.
Definition: tensor_view.h:95
TensorView< Element, Layout > TensorView
Tensor reference to device memory.
Definition: host_tensor.h:90
Element * host_data_ptr_offset(LongIndex ptr_element_offset)
Gets pointer to host data with a pointer offset.
Definition: host_tensor.h:242
Host tensor.
Definition: host_tensor.h:59
TensorView device_view(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:293
void copy_out_device_to_host(Element *ptr_host, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:440
void copy_out_host_to_device(Element *ptr_device, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:470
ConstReference at(TensorCoord const &coord) const
Returns a const reference to the element at the logical Coord in host memory.
Definition: host_tensor.h:349
Defines the size of an element in bits.
Definition: numeric_types.h:42
void copy_host_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:96
ConstTensorRef host_ref(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:270
TensorCoord extent() const
Returns the extent of the tensor.
Definition: host_tensor.h:354
Element * host_data()
Gets pointer to host data.
Definition: host_tensor.h:239
void reserve(size_t count, bool device_backed_=true)
Resizes internal memory allocations without affecting layout or extent.
Definition: host_tensor.h:161
Definition: subbyte_reference.h:557
#define __NV_STD_MIN(a, b)
Select minimum(a, b)
Definition: platform.h:168
bool device_backed() const
Returns true if device memory is allocated.
Definition: host_tensor.h:303
typename Layout::TensorCoord TensorCoord
Coordinate in logical tensor space.
Definition: host_tensor.h:78
typename platform::conditional< sizeof_bits< Element >::value >=8, Element &, SubbyteReference< Element > >::type Reference
Reference type to an element.
Definition: tensor_ref.h:159
Reference host_data(LongIndex idx)
Gets a reference to an element in host memory.
Definition: host_tensor.h:245
TensorCoord & extent()
Returns the extent of the tensor.
Definition: host_tensor.h:359
HostTensor()
Default constructor.
Definition: host_tensor.h:128
typename TensorView::ConstTensorView ConstTensorView
Tensor reference to constant device memory.
Definition: host_tensor.h:93
void resize(TensorCoord const &extent, bool device_backed_=true)
Definition: host_tensor.h:221
void copy_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:86
void copy_in_device_to_host(Element const *ptr_device, LongIndex count=-1)
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:380
LongIndex offset(TensorCoord const &coord) const
Computes the offset of an index from the origin of the tensor.
Definition: host_tensor.h:339
TensorView host_view(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:283
HostTensor(TensorCoord const &extent, bool device_backed=true)
Constructs a tensor given an extent. Assumes a packed layout.
Definition: host_tensor.h:131
TensorRef device_ref(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:273
Element_ Element
Data type of individual access.
Definition: host_tensor.h:63
Reference at(TensorCoord const &coord)
Returns a reference to the element at the logical Coord in host memory.
Definition: host_tensor.h:344
void reset(TensorCoord const &extent, bool device_backed_=true)
Definition: host_tensor.h:195
typename TensorRef::ConstTensorRef ConstTensorRef
Tensor reference to constant device memory.
Definition: host_tensor.h:87
ConstReference host_data(LongIndex idx) const
Gets a constant reference to an element in host memory.
Definition: host_tensor.h:253
void reset()
Clears the HostTensor allocation to size/capacity = 0.
Definition: host_tensor.h:152
static int const kRank
Logical rank of tensor index space.
Definition: host_tensor.h:69
Element * device_data_ptr_offset(LongIndex ptr_element_offset)
Gets pointer to device data with a pointer offset.
Definition: host_tensor.h:261
size_t size() const
Returns the number of elements stored in the host tensor.
Definition: host_tensor.h:229
ConstTensorRef device_ref(LongIndex ptr_element_offset=0) const
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:278
Layout & layout()
Returns the layout object.
Definition: host_tensor.h:309
typename TensorRef::Reference Reference
Reference to element in tensor.
Definition: host_tensor.h:96
void resize(TensorCoord const &extent, Layout const &layout, bool device_backed_=true)
Definition: host_tensor.h:204
Defines properties of matrices used to denote layout and operands to GEMM kernels.
TensorRef< Element, Layout > TensorRef
Tensor reference to device memory.
Definition: host_tensor.h:84
void reset(TensorCoord const &extent, Layout const &layout, bool device_backed_=true)
Definition: host_tensor.h:182
Stride stride() const
Returns the layout object&#39;s stride vector.
Definition: host_tensor.h:319
typename Layout::Index Index
Index type.
Definition: host_tensor.h:72
Basic include for CUTLASS.
TensorRef host_ref(LongIndex ptr_element_offset=0)
Accesses the tensor reference pointing to data.
Definition: host_tensor.h:267
void copy_out_device_to_device(Element *ptr_device, LongIndex count=-1) const
Copy data from a caller-supplied device pointer into host memory.
Definition: host_tensor.h:455
void copy_device_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:91
Element const * host_data() const
Gets pointer to host data.
Definition: host_tensor.h:250