File members: include/cuda/experimental/__stf/internal/hooks.cuh
// Part of CUDASTF in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
#pragma once
#include <cuda/__cccl_config>
# pragma GCC system_header
# pragma clang system_header
# pragma system_header
#endif // no system header
#include <cuda/experimental/__stf/internal/task_dep.cuh>
#include <vector>
namespace cuda::experimental::stf
namespace reserved
* When we dump the content of the logical data, we put them in files which are
* automatically named according to this counter. By having such a determinitic
* counter shared by all contexts, we can compare the content of files with the
* same index during different executions.
class dump_hook_cnt : public reserved::meyers_singleton<dump_hook_cnt>
cnt = 0;
~dump_hook_cnt() = default;
static int get()
return instance().cnt++;
int cnt;
} // namespace reserved
template <typename Unknown, size_t... i>
void data_dump(Unknown, ::std::ostream& file = ::std::cerr)
file << "Dunno how to dump object of type " << type_name<Unknown> << ".\n";
template <typename Unknown, size_t... i>
size_t data_hash(Unknown)
return 0;
namespace reserved
inline void create_dump_dir(const ::std::string& dump_dir)
// Create the directory (no-op if it already exists)
if (::std::filesystem::create_directories(dump_dir))
//::std::cout << "Directory \"" << dump_dir << "\" was created successfully." << ::std::endl;
if (!::std::filesystem::exists(dump_dir))
::std::cerr << "An error occurred while trying to create the dump_dir \"" << dump_dir << "\"." << ::std::endl;
inline void ensure_directory_exists(const ::std::string& dir_path)
// Check if the directory exists
if (!::std::filesystem::exists(dir_path))
::std::cerr << "Directory \"" << dir_path << "\" does not exist." << ::std::endl;
/* Compute a vector of hooks to dump modified logical data (using
* typed-erased hooks). This will generate one host_launch task for each
* modified logical data after task submission. */
template <typename ctxt_t, typename... Deps>
static ::std::vector<::std::function<void()>> get_dump_hooks(ctxt_t* ctx, const task_dep<Deps>&... deps)
::std::vector<::std::function<void()>> hooks;
// If the CUDASTF_AUTO_DUMP is not set, or set to 0, we don't save the content
const char* dump_str = ::std::getenv("CUDASTF_AUTO_DUMP");
const bool dump = dump_str && atoi(dump_str) != 0;
const char* compare_str = ::std::getenv("CUDASTF_AUTO_COMPARE");
const bool compare = compare_str && atoi(compare_str) != 0;
if (!dump && !compare)
return hooks;
const bool hash_only = ::std::getenv("CUDASTF_AUTO_DUMP_ONLY_HASH");
// Where do we write dumped content ? We postpone the creation of this
// directory to the first time we need to create a directory to avoid
// creating an empty dir if no data was dumped
const char* dump_dir_env = ::std::getenv("CUDASTF_AUTO_DUMP_DIR");
const ::std::string dump_dir = (dump_dir_env != nullptr) ? dump_dir_env : "dump/";
// For every dependency, we create a hook to dump the content of the
// logical data if it was modified.
[&](const auto& dep) {
auto dep_ld = dep.get_data();
if (dep.get_access_mode() != access_mode::read && dep_ld.get_auto_dump())
const auto ro_dep = dep.as_read_mode();
/* We either make sure the directory exists or lazily create it if
* we need to add content when dumping data */
if (compare)
// Create a hook that will be executed after the submission of the
// tasks: this will submit a host callback to write the content in
// a file
auto h = [ctx, ro_dep, dump_dir, hash_only, compare]() {
// Get the next counter (to have a repeatable order)
const int cnt = reserved::dump_hook_cnt::get();
const ::std::string filePath = dump_dir + "/" + ::std::to_string(cnt);
if (compare)
// Instead of using a host callback which might have had
// better performance, we use a task and a synchronization
// because it is easier to break on errors with a debugger
// when a mismatch is found.
ctx->task(exec_place::host, ro_dep).set_symbol("compare " + ::std::to_string(cnt))
->*[filePath](cudaStream_t stream, auto s) {
::std::ifstream f(filePath);
if (!f.is_open())
::std::cerr << "Failed to open " << filePath << ::std::endl;
size_t saved_hash;
f >> saved_hash;
size_t computed_hash = data_hash(s);
if (computed_hash != saved_hash)
::std::cerr << "Hash mismatch : computed = " << computed_hash << ", saved = " << saved_hash
<< " in " << filePath << ::std::endl;
ctx->host_launch(ro_dep).set_symbol("dump " + ::std::to_string(cnt))->*[filePath, hash_only](auto s) {
::std::ofstream f(filePath);
if (!f.is_open())
::std::cerr << "Failed to open " << filePath << ::std::endl;
// Compute a hash of the content, to easily compare equality
const size_t hsh = data_hash(s);
f << hsh << ::std::endl;
if (!hash_only)
// Dump the actual data content (may be very large)
data_dump(s, f);
return hooks;
} // end namespace reserved
} // end namespace cuda::experimental::stf