cub/grid/grid_even_share.cuh
File members: cub/grid/grid_even_share.cuh
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
#pragma once
#include <cub/config.cuh>
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header
#include <cub/grid/grid_mapping.cuh>
#include <cub/util_math.cuh>
#include <cub/util_type.cuh>
CUB_NAMESPACE_BEGIN
template <typename OffsetT>
struct GridEvenShare
{
private:
int total_tiles;
int big_shares;
OffsetT big_share_items;
OffsetT normal_share_items;
OffsetT normal_base_offset;
public:
OffsetT num_items;
int grid_size;
OffsetT block_offset;
OffsetT block_end;
OffsetT block_stride;
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE GridEvenShare()
: total_tiles(0)
, big_shares(0)
, big_share_items(0)
, normal_share_items(0)
, normal_base_offset(0)
, num_items(0)
, grid_size(0)
, block_offset(0)
, block_end(0)
, block_stride(0)
{}
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE void DispatchInit(OffsetT num_items_, int max_grid_size, int tile_items)
{
this->block_offset = num_items_; // Initialize past-the-end
this->block_end = num_items_; // Initialize past-the-end
this->num_items = num_items_;
this->total_tiles = static_cast<int>(::cuda::ceil_div(num_items_, tile_items));
this->grid_size = CUB_MIN(total_tiles, max_grid_size);
int avg_tiles_per_block = total_tiles / grid_size;
// leftover grains go to big blocks:
this->big_shares = total_tiles - (avg_tiles_per_block * grid_size);
this->normal_share_items = avg_tiles_per_block * tile_items;
this->normal_base_offset = big_shares * tile_items;
this->big_share_items = normal_share_items + tile_items;
}
template <int TILE_ITEMS>
_CCCL_DEVICE _CCCL_FORCEINLINE void BlockInit(int block_id, Int2Type<GRID_MAPPING_RAKE> /*strategy_tag*/)
{
block_stride = TILE_ITEMS;
if (block_id < big_shares)
{
// This thread block gets a big share of grains (avg_tiles_per_block + 1)
block_offset = (block_id * big_share_items);
block_end = block_offset + big_share_items;
}
else if (block_id < total_tiles)
{
// This thread block gets a normal share of grains (avg_tiles_per_block)
block_offset = normal_base_offset + (block_id * normal_share_items);
// Avoid generating values greater than num_items, as it may cause overflow
block_end = block_offset + CUB_MIN(num_items - block_offset, normal_share_items);
}
// Else default past-the-end
}
template <int TILE_ITEMS>
_CCCL_DEVICE _CCCL_FORCEINLINE void BlockInit(int block_id, Int2Type<GRID_MAPPING_STRIP_MINE> /*strategy_tag*/)
{
block_stride = grid_size * TILE_ITEMS;
block_offset = (block_id * TILE_ITEMS);
block_end = num_items;
}
template <int TILE_ITEMS, GridMappingStrategy STRATEGY>
_CCCL_DEVICE _CCCL_FORCEINLINE void BlockInit()
{
BlockInit<TILE_ITEMS>(blockIdx.x, Int2Type<STRATEGY>());
}
template <int TILE_ITEMS>
_CCCL_DEVICE _CCCL_FORCEINLINE void BlockInit(OffsetT block_offset, OffsetT block_end)
{
this->block_offset = block_offset;
this->block_end = block_end;
this->block_stride = TILE_ITEMS;
}
};
CUB_NAMESPACE_END