Defines the optimal thread map for TensorOp accumulator layouts.
#include <default_thread_map_volta_tensor_op.h>