include/cuda/experimental/__stf/internal/hooks.cuh
File members: include/cuda/experimental/__stf/internal/hooks.cuh
//===----------------------------------------------------------------------===//
//
// Part of CUDASTF in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//
#pragma once
#include <cuda/__cccl_config>
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header
#include <cuda/experimental/__stf/internal/task_dep.cuh>
#include <vector>
namespace cuda::experimental::stf
{
namespace reserved
{
/*
* When we dump the content of the logical data, we put them in files which are
* automatically named according to this counter. By having such a determinitic
* counter shared by all contexts, we can compare the content of files with the
* same index during different executions.
*/
class dump_hook_cnt : public reserved::meyers_singleton<dump_hook_cnt>
{
protected:
dump_hook_cnt()
{
cnt = 0;
}
~dump_hook_cnt() = default;
public:
static int get()
{
return instance().cnt++;
}
private:
int cnt;
};
} // namespace reserved
template <typename Unknown, size_t... i>
void data_dump(Unknown, ::std::ostream& file = ::std::cerr)
{
file << "Dunno how to dump object of type " << type_name<Unknown> << ".\n";
}
template <typename Unknown, size_t... i>
size_t data_hash(Unknown)
{
return 0;
}
namespace reserved
{
inline void create_dump_dir(const ::std::string& dump_dir)
{
// Create the directory (no-op if it already exists)
if (::std::filesystem::create_directories(dump_dir))
{
//::std::cout << "Directory \"" << dump_dir << "\" was created successfully." << ::std::endl;
}
else
{
if (!::std::filesystem::exists(dump_dir))
{
::std::cerr << "An error occurred while trying to create the dump_dir \"" << dump_dir << "\"." << ::std::endl;
abort();
}
}
}
inline void ensure_directory_exists(const ::std::string& dir_path)
{
// Check if the directory exists
if (!::std::filesystem::exists(dir_path))
{
::std::cerr << "Directory \"" << dir_path << "\" does not exist." << ::std::endl;
abort();
}
}
/* Compute a vector of hooks to dump modified logical data (using
* typed-erased hooks). This will generate one host_launch task for each
* modified logical data after task submission. */
template <typename ctxt_t, typename... Deps>
static ::std::vector<::std::function<void()>> get_dump_hooks(ctxt_t* ctx, const task_dep<Deps>&... deps)
{
::std::vector<::std::function<void()>> hooks;
// If the CUDASTF_AUTO_DUMP is not set, or set to 0, we don't save the content
const char* dump_str = ::std::getenv("CUDASTF_AUTO_DUMP");
const bool dump = dump_str && atoi(dump_str) != 0;
const char* compare_str = ::std::getenv("CUDASTF_AUTO_COMPARE");
const bool compare = compare_str && atoi(compare_str) != 0;
if (!dump && !compare)
{
return hooks;
}
const bool hash_only = ::std::getenv("CUDASTF_AUTO_DUMP_ONLY_HASH");
// Where do we write dumped content ? We postpone the creation of this
// directory to the first time we need to create a directory to avoid
// creating an empty dir if no data was dumped
const char* dump_dir_env = ::std::getenv("CUDASTF_AUTO_DUMP_DIR");
const ::std::string dump_dir = (dump_dir_env != nullptr) ? dump_dir_env : "dump/";
// For every dependency, we create a hook to dump the content of the
// logical data if it was modified.
::cuda::experimental::stf::each_in_pack(
[&](const auto& dep) {
auto dep_ld = dep.get_data();
if (dep.get_access_mode() != access_mode::read && dep_ld.get_auto_dump())
{
const auto ro_dep = dep.as_read_mode();
/* We either make sure the directory exists or lazily create it if
* we need to add content when dumping data */
if (compare)
{
ensure_directory_exists(dump_dir);
}
else
{
create_dump_dir(dump_dir);
}
// Create a hook that will be executed after the submission of the
// tasks: this will submit a host callback to write the content in
// a file
auto h = [ctx, ro_dep, dump_dir, hash_only, compare]() {
// Get the next counter (to have a repeatable order)
const int cnt = reserved::dump_hook_cnt::get();
const ::std::string filePath = dump_dir + "/" + ::std::to_string(cnt);
if (compare)
{
// Instead of using a host callback which might have had
// better performance, we use a task and a synchronization
// because it is easier to break on errors with a debugger
// when a mismatch is found.
ctx->task(exec_place::host, ro_dep).set_symbol("compare " + ::std::to_string(cnt))
->*[filePath](cudaStream_t stream, auto s) {
cuda_safe_call(cudaStreamSynchronize(stream));
::std::ifstream f(filePath);
if (!f.is_open())
{
::std::cerr << "Failed to open " << filePath << ::std::endl;
abort();
}
size_t saved_hash;
f >> saved_hash;
f.close();
size_t computed_hash = data_hash(s);
if (computed_hash != saved_hash)
{
::std::cerr << "Hash mismatch : computed = " << computed_hash << ", saved = " << saved_hash
<< " in " << filePath << ::std::endl;
if (getenv("CUDASTF_AUTO_COMPARE_ABORT_ON_ERRORS"))
{
abort();
}
}
};
}
else
{
ctx->host_launch(ro_dep).set_symbol("dump " + ::std::to_string(cnt))->*[filePath, hash_only](auto s) {
::std::ofstream f(filePath);
if (!f.is_open())
{
::std::cerr << "Failed to open " << filePath << ::std::endl;
abort();
}
// Compute a hash of the content, to easily compare equality
const size_t hsh = data_hash(s);
f << hsh << ::std::endl;
if (!hash_only)
{
// Dump the actual data content (may be very large)
data_dump(s, f);
}
f.close();
};
}
};
hooks.push_back(mv(h));
}
},
deps...);
return hooks;
}
} // end namespace reserved
} // end namespace cuda::experimental::stf