Source code for accvlab.optim_test_tools.tensor_dumper

# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from enum import Enum
from typing import Union, Sequence, Dict, Any, Callable, List, TYPE_CHECKING, TypeAlias, Optional
import json
import numbers
import math
import warnings
import copy

import torch
import numpy as np

# Type aliases for commonly used complex types
TensorData: TypeAlias = Union[torch.Tensor, Any]
TensorDataStructure: TypeAlias = Union[
    TensorData, Sequence[Union[TensorData, Sequence, Dict]], Dict[str, Union[TensorData, Sequence, Dict]]
]

FormattedTensorData: TypeAlias = Union['TensorDumper._TensorWithFormat', Any]
FormattedDataStructure: TypeAlias = Union[
    FormattedTensorData,
    Sequence[Union[FormattedTensorData, Sequence, Dict]],
    Dict[str, Union[FormattedTensorData, Sequence, Dict]],
]

GenericDataStructure: TypeAlias = Union[
    Any, Sequence[Union[Any, Sequence, Dict]], Dict[str, Union[Any, Sequence, Dict]]
]

# Additional type aliases for common patterns
OptionalSequence: TypeAlias = Optional[Sequence[int]]
OptionalTypeDict: TypeAlias = Optional[Dict[str, 'TensorDumper.Type']]
OptionalPermuteDict: TypeAlias = Optional[Dict[str, OptionalSequence]]
OptionalPath: TypeAlias = Optional[str]

if TYPE_CHECKING:
    try:
        from accvlab.batching_helpers import RaggedBatch
    except ImportError:
        # Dummy to avoid errors in case the `RaggedBatch` class is not available.
        # This can happen if the `batching_helpers` package is not installed.
        # In this case, it is still possible to use the `TensorDumper` class, but
        # dumping of `RaggedBatch` data is not supported.
        class RaggedBatch:
            pass


if __name__ != "__main__":
    from .singleton_base import SingletonBase
else:
    from singleton_base import SingletonBase



[docs]
class TensorDumper(SingletonBase):
    '''Singleton class for dumping tensor & gradient data to a directory and comparing to previously dumped
    data.

    This class provides a way to dump tensor data to a directory in a structured format.

    The dumper is able to dump tensors, gradients, :class:`RaggedBatch` objects, as well as data with
    user-defined & auto-applied converters. Furthermore, it supports custom processing prior to dumping (e.g.
    converting of bounding boxes to images containing the bounding boxes), which is performed only if the
    dumper is enabled, and does not incur overhead if the dumper is not enabled.

    Main JSON files are created for each dump (one for the data and one for the gradients). The individual
    tensors (or converted data) can be stored inside the main JSON file, or in separate binary/image files
    (can be configured, and can vary for individual data entries). In case of the binary/image files, the main
    JSON file contains a reference to the file, and the file is stored in the same directory as the main JSON
    file.

    The dumper can also be used to compare to previously dumped data, to detect mismatches. This can be useful
    for debugging e.g. to rerun the same code multiple times, while always comparing to the same dumped data.
    This can be use used when modifying (e.g. optimizing) the implementation, or to check for determinism.

    Important:
        The dumper is a singleton, so that it can be used in different source files without having to pass the
        instance around.

    Note:
        The comparison is only supported if all data is dumped in the ``Type.JSON`` format. This can be
        enforced by calling :meth:`set_dump_type_for_all` before dumping/comparing the data (so easy
        switching between dumping for manual inspection and comparison is possible).

    Note:
        When in the disabled state, all dumping-related methods (dump, add data, compare to dumped data etc)
        are empty methods, which means they have no effect and minimal overhead.

    Note:
        When obtaining an object using (``TensorDumper()``) the singleton is returned if already
        created.

        If parameters are provided when calling ``TensorDumper()``, this will enable the dumper
        (equivalent to calling :meth:`enable`). Note that enabling can only be done once, and will
        lead to an error if attempted a second time.
    '''


[docs]
    class Type(Enum):
        '''Dump format types.

        The format type determines how tensor data is serialized when dumped.

        Note:
            For binary types (``BINARY``, ``IMAGE_RGB``, ``IMAGE_BGR``, ``IMAGE_I``), entries are added to the
            main JSON file indicating the filenames of the stored data. Also, files containing meta-data are
            created and stored in the same directory. For ``BINARY``, the meta-data is the shape and dtype of
            the tensor. For ``IMAGE_*``, the meta-data is the original range of the image data (min and max
            value) and the image format (RGB, BGR, Intensity).

        Note:
            For ``BINARY`` and ``IMAGE_*`` formats, entries are added to the main JSON file indicating the
            filenames of the stored data. The filenames for these cases are:

              - blob/image data: ``[<main_json_file_name>]<path_to_data_in_dumped_structure>.<file_type>``
              - meta-data: ``[<main_json_file_name>]<path_to_data_in_dumped_structure>.<file_type>.meta.json``

        Note:
            For images containing multiple channels, the color channel is the last dimension. If this is not
            the case, permutation of the axes needs to be applied to move the color channel to the last
            dimension. The permutation can be applied using the ``permute_axes`` parameter, e.g. of
            :meth:`add_tensor_data`.

            If a tensor contains more than the necessary number of dimensions (3 for color images,
            2 for grayscale images), the leading dimensions are treated as iterating over the images,
            and multiple images are dumped (with the indices of the leading dimensions indicated in the
            filename).
        '''

        #: Tensor data is serialized into the JSON file as nested lists.
        #: Suitable for small tensors and provides human-readable output.
        JSON = 0
        #: Tensor data saved as binary files with metadata in separate JSON files.
        #: Efficient for large tensors; preserves exact numerical precision.
        BINARY = 1
        #: Tensor data converted to PNG image format (RGB, 3 channels).
        #: Channel must be the last dimension; permute axes if necessary.
        IMAGE_RGB = 2
        #: Tensor data converted to PNG image format (BGR, 3 channels).
        #: Channel must be the last dimension; permute axes if necessary.
        IMAGE_BGR = 3
        #: Tensor data converted to PNG image format (grayscale).
        #: Single channel; no explicit channel dimension.
        IMAGE_I = 4


[docs]
        @classmethod
        def is_image(cls, dump_type: 'TensorDumper.Type') -> bool:
            return dump_type in [cls.IMAGE_RGB, cls.IMAGE_BGR, cls.IMAGE_I]



    class _ComparisonConfig:
        def __init__(
            self,
            eps_numerical_data: float = 1e-6,
            num_errors_per_tensor_to_show: int = 1,
            allow_missing_data_in_current: bool = False,
        ):
            self.eps_numerical_data = eps_numerical_data
            self.num_errors_per_tensor_to_show = num_errors_per_tensor_to_show
            self.allow_missing_data_in_current = allow_missing_data_in_current

    class _TensorWithFormat:
        def __init__(self, tensor: Any, dump_type: 'TensorDumper.Type', permute_axes: OptionalSequence):
            self.tensor = tensor
            self.dump_type = dump_type
            self.permute_axes = permute_axes

    class _CustomEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, np.integer):
                return int(obj)
            elif isinstance(obj, np.complexfloating):
                return complex(obj)
            elif isinstance(obj, np.floating):
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            elif isinstance(obj, torch.Tensor):
                obj_as_list = obj.tolist()
                return obj_as_list
            elif isinstance(obj, torch.dtype):
                obj_as_str = str(obj)
                return obj_as_str
            elif isinstance(obj, type):
                return str(obj)
            else:
                return super(TensorDumper._CustomEncoder, self).default(obj)

    def __init__(self, *args, **kwargs):
        '''
        Args:
            dump_dir: The directory to dump the data to. If provided, the dumper will be enabled automatically.
                If not provided, the dumper will be disabled and can be enabled later by calling :meth:`enable`.
        '''
        if not hasattr(self, '_initialized'):
            try:
                import accvlab.batching_helpers
            except ImportError:
                warnings.warn(
                    "`accvlab.batching_helpers` is not available. Dumping of `RaggedBatch` data is not supported."
                )
            self._initialized = True
            self._enabled = False
            self._SET_DOCSTRINGS_OF_ENABLED_METHOD_VARIANTS()
        if len(args) > 0 or len(kwargs) > 0:
            self.enable(*args, **kwargs)


[docs]
    def enable(self, dump_dir: str):
        '''Enable the TensorDumper singleton.

        This method can be called only once and enables the TensorDumper singleton.
        Any use of the singleton before enabling it is ignored.

        Args:
            dump_dir: The directory to dump the data to.
        '''
        if self._enabled:
            raise RuntimeError("`TensorDumper` is already enabled. Can only be enabled once.")
        self._dump_dir = dump_dir
        self._dump_count = 0
        self._tensor_struct = {}
        self._grad_struct = {}
        self._grad_computed = False
        self._enabled = True
        self._after_dump_count_actions = {}
        self._custom_converters = {np.ndarray: lambda x: torch.from_numpy(x)}

        # Set the methods
        self.add_tensor_data = self._add_tensor_data_enabled
        self.add_grad_data = self._add_grad_data_enabled
        self.set_dump_type_for_all = self._set_dump_type_for_all_enabled
        self.dump = self._dump_enabled
        self.compare_to_dumped_data = self._compare_to_dumped_data_enabled
        self.set_gradients = self._set_gradients_enabled
        self.reset_dump_count = self._reset_dump_count_enabled
        self.perform_after_dump_count = self._perform_after_dump_count_enabled
        self.register_custom_converter = self._register_custom_converter_enabled
        self.enable_ragged_batch_dumping = self._enable_ragged_batch_dumping_enabled
        self.run_if_enabled = self._run_if_enabled_enabled


    @property
    def is_enabled(self) -> bool:
        '''Whether the TensorDumper is enabled'''
        return self._enabled


[docs]
    def add_tensor_data(
        self,
        path: str,
        data: TensorDataStructure,
        dump_type: 'TensorDumper.Type',
        dump_type_override: OptionalTypeDict = None,
        permute_axes: OptionalSequence = None,
        permute_axes_override: OptionalPermuteDict = None,
        exclude: Union[Sequence[str], None] = None,
    ):
        '''
        Add tensor data to the dump.

        The data is formatted and inserted into the dump structure.

        Args:
            path: Path where the data will be inserted. If the path does not exist, it will be created.
                If `data` is a dictionary, the path may be already present in the structure,
                but the direct children of `data` need to be non-existent in the element the path points to.
                If `data` is not a dictionary, the path must not be present in the structure and the
                data will be inserted at the path.
            data: The tensor data to add
            dump_type: The type of dump to use
            dump_type_override: A dictionary mapping names to dump types.
                If a name is present in the dictionary, the dump type for all tensors with that name in the path
                (i.e. either the name itself or the name of a parent along the path) will be overridden with the
                value in the dictionary. If multiple names match the path, the match closest to the tensor
                (i.e. further inside the structure) is used.
                If ``None``, no override is applied.
            permute_axes: Permutation of axes to apply to the tensor data.
                If ``None``, no permutation is applied.
            permute_axes_override: A dictionary mapping names to permute axes.
                If a name is present in the dictionary, the permute axes for all tensors with that name in the path
                (i.e. either the name itself or the name of a parent along the path) will be overridden with the
                value in the dictionary. If multiple names match the path, the match closest to the tensor
                (i.e. further inside the structure) is used.
                If ``None``, no override is applied.
            exclude: List of entries to exclude from the dump. There entries are specified by names and may apply to any level of the data structure.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def add_grad_data(
        self,
        path: str,
        data: TensorDataStructure,
        dump_type: 'TensorDumper.Type',
        dump_type_override: OptionalTypeDict = None,
        permute_grad_axes: OptionalSequence = None,
        permute_grad_axes_override: OptionalPermuteDict = None,
        exclude: Union[Sequence[str], None] = None,
    ):
        '''Add gradient data of the given tensor(s) to dump.

        Note that if this method is called, :meth:`set_gradients` must be called before dumping the next time.

        The gradients are computed using :func:`torch.autograd.grad`, and do not influence the gradients
        as computed/used elsewhere in the code (e.g. in the training loop).

        Note that tensors which do not require gradients or which are not part of the computation graph
        can be included in the dump, but no actual gradients will be computed for them. Instead,
        a note will be written to the json dump in case that ``requires_grad`` is ``False``. If the tensor
        is not part of the computation graph, the written gradient will be ``null``, and no image/binary file
        will be written for that tensor regardless of the ``dump_type`` setting.

        Args:
            path: Path where the gradient data will be inserted. See :meth:`add_tensor_data` for more details.
            data: The tensor data for which to dump the gradients.
            dump_type: The type of dump to use
            dump_type_override: A dictionary mapping names to dump types.
                If a name is present in the dictionary, the dump type for all gradients of tensors with that
                name in the path (i.e. either the name itself or the name of a parent along the path) will be
                overridden with the value in the dictionary. If multiple names match the path, the match
                closest to the tensor (i.e. further inside the structure) is used.
                If ``None``, no override is applied.
            permute_grad_axes: Permutation of axes to apply to the gradient data.
                If ``None``, no permutation is applied.
            permute_grad_axes_override: A dictionary mapping names to permute axes.
                If a name is present in the dictionary, the permute axes for all gradients of tensors with
                that name in the path (i.e. either the name itself or the name of a parent along the path)
                will be overridden with the value in the dictionary. If multiple names match the path, the
                match closest to the tensor (i.e. further inside the structure) is used.
                If ``None``, no override is applied.
            exclude: List of entries to exclude from the dump. There entries are specified by names and may
                apply to any level of the data structure.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def set_dump_type_for_all(
        self, dump_type: 'TensorDumper.Type', include_tensors: bool = True, include_grads: bool = True
    ):
        '''Set the dump type for all tensors and gradients.

        This method is e.g. useful to quickly change the dump type to ``Type.JSON`` to generate reference data
        for comparison (using :meth:`compare_to_dumped_data`) without the need to go through the code and change the dump type
        for each tensor manually.

        Important:
            This method can sets the dumping type for the data which is already added.
            The dump type of data which is added after this method is called will not be affected.

        Args:
            dump_type: The type of dump to use
            include_tensors: Whether to include tensors in the dump
            include_grads: Whether to include gradients in the dump
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def dump(self):
        '''Dump the data to the dump directory.'''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def compare_to_dumped_data(
        self,
        eps_numerical_data: float = 1e-6,
        num_errors_per_tensor_to_show: int = 1,
        allow_missing_data_in_current: bool = False,
        as_warning: bool = False,
    ):
        '''Compare the data to previously dumped data.

        In case of a mismatch, a ``ValueError`` is raised with a detailed error message.

        Important:
            Only comparisons to data stored in the JSON format (Type.JSON) are supported.
            Therefore, the reference data must be stored with the ``Type.JSON`` both
            when generating the reference data and when comparing to it.

            An easy way to ensure that the reference data is stored in the JSON format without
            modifying multiple places in the code is to call :meth:`set_dump_type_for_all`
            when generating the reference data.

        Note:
            The comparison can be set to allow missing data in the current data by setting ``allow_missing_data_in_current`` to ``True``.
            This is e.g. useful if the current data is based on an implementation in progress, so that some of the data is not yet available.
            In this case, the comparison will not raise an error if the current data is missing some data which is present in the reference data.
            Instead, a warning will be printed.

        Args:
            eps_numerical_data: The numerical tolerance for the comparison of numerical data.
            num_errors_per_tensor_to_show: The number of most significant errors to show per tensor.
            allow_missing_data_in_current: If ``True``, the comparison will not raise an error if the current data is missing
                some data which is present in the reference data.
            as_warning: If ``True``, no error is raised in case of a mismatch and instead, a warning is printed.
                If ``False``, an error is raised.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def set_gradients(self, function_values: Union[torch.Tensor, List[torch.Tensor]]):
        '''Set gradients for the tensors in the dump.

        The gradients are computed using :func:`torch.autograd.grad`, and do not influence the gradients
        computed elsewhere (e.g. in the training loop).

        This method must be called before dumping if :meth:`add_grad_data` was called since the last dump.

        Args:
            function_values: The value(s) of the function(s) to compute the gradients for.
                This can be a single tensor or a list of tensors.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def reset_dump_count(self):
        '''Reset the dump count.

        Important:
            Resetting the dump count means that:

              - In case of dumping: the next dump will overwrite a previous dump (starting from the first dump).
              - In case of comparing to previously dumped data: the next comparison will start from the first dump.

        This method is useful for debugging e.g. to rerun the same code multiple times to check for
        determinism, while always comparing to the same dumped data.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def perform_after_dump_count(self, count: int, action: Callable[[], None]):
        '''Register an action to be performed after a given number of dumps.

        The action will be performed after the dump is completed.

        This can e.g. be used to automatically exit the program after a given number of iterations have
        been dumped (by passing the :func:`exit`-function as the action).

        Important:
            If :meth:`reset_dump_count` is called, the dump count is reset to 0,
            and the action will be performed after the ``count``-th dump after the reset.

            Note that this also means that the action can be performed multiple times if
            the dump count is reset after the action has been performed.

        Important:
            This method can be called multiple times with the same count.
            In this case, the action will be overwritten.

        Note that as in case of other methods, this method has no effect if the TensorDumper is not enabled.

        Args:
            count: The number of dumps after which the action should be performed.
            action: The action to perform.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def register_custom_converter(self, data_type: type, converter_func: Callable):
        '''Register a custom converter for a given data type.

        This method can be used to register a custom converter function for a given data type.
        The converter function must take a single argument of type ``data_type`` and return one of the following,
        or a nested list/dict structure containing elements of the following types:

          - either a JSON-serializable object,
          - or a tensor,
          - or a numpy array
          - or an object for which a custom converter is registered

        The conversion is performed iteratively, so that chains of conversions can be followed through.

        The conversion is performed before any other processing steps. This means that if the converter returns
        tensors, these are handled in the same way as tensors which are directly added to the dumper.

        Note:
            This is useful when the data to dump in not JSON-serializable by default. This may e.g. be the case
            for custom data types which are used in the training.

        Args:

            data_type: The type of the data to convert.
            converter_func: The function to use for converting the data.

        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def enable_ragged_batch_dumping(self, as_per_sample: bool = False):
        '''Enable dumping of :class:`RaggedBatch` data.

        Note:
            It is possible to dump some :class:`RaggedBatch` data as per sample, and some as a :class:`RaggedBatch`
            structure.
            This can be achieved by calling this method multiple times with different values for ``as_per_sample``,
            before adding the data which should be dumped with the desired format.

        Args:
            as_per_sample: If ``True``, the :class:`RaggedBatch` data is dumped as per sample. Otherwise, it is dumped
                as a :class:`RaggedBatch` structure.
        '''
        # Empty method to minimize overhead if not enabled. Will be replaced when enabling.
        pass



[docs]
    def run_if_enabled(self, func: Callable[[], None]):
        '''Run a function if the TensorDumper is enabled.

        This method can be used to run a function only if the TensorDumper is enabled.
        This is useful to avoid running code which is only relevant for debugging.

        The typical use-case for this method is the dumping of data which needs
        to be pre-processed first (e.g. drawing of bounding boxes into an image).
        This is done as follows:

          - Encapsulate the pre-processing logic in a function (inside the function
            which uses the dumper). Note that this means that ``func`` will enclose
            the data accessible in that function and therefore does not need to have
            any arguments. The function ``func`` should

              - Perform any debugging-related pre-processing needed
              - Add the pre-processed data to the dump (e.g. using :meth:`add_tensor_data`)

          - Call :meth:`run_if_enabled` with the function ``func`` as its argument. This will ensure
            that the pre-processing is only performed if the dumper is enabled. Otherwise, the
            pre-processing is omitted, and there is no overhead (apart from calling an empty function).

        Args:
            func: The function to run. The function must take no arguments.
        '''
        pass


    # ===== Methods for Setting the Doc-strings of the Enabled Variants of the Methods =====

    @classmethod
    def _SET_DOCSTRINGS_OF_ENABLED_METHOD_VARIANTS(cls):
        '''Set the docstrings of the enabled method variants.

        This is done to ensure that the correct docstring is present in the methods
        once the TensorDumper is enabled, and the original (disabled) methods are
        replaced by the enabled variants.
        '''
        cls._add_tensor_data_enabled.__doc__ = cls.add_tensor_data.__doc__
        cls._add_grad_data_enabled.__doc__ = cls.add_grad_data.__doc__
        cls._set_dump_type_for_all_enabled.__doc__ = cls.set_dump_type_for_all.__doc__
        cls._dump_enabled.__doc__ = cls.dump.__doc__
        cls._compare_to_dumped_data_enabled.__doc__ = cls.compare_to_dumped_data.__doc__
        cls._set_gradients_enabled.__doc__ = cls.set_gradients.__doc__
        cls._reset_dump_count_enabled.__doc__ = cls.reset_dump_count.__doc__
        cls._perform_after_dump_count_enabled.__doc__ = cls.perform_after_dump_count.__doc__
        cls._register_custom_converter_enabled.__doc__ = cls.register_custom_converter.__doc__
        cls._enable_ragged_batch_dumping_enabled.__doc__ = cls.enable_ragged_batch_dumping.__doc__
        cls._run_if_enabled_enabled.__doc__ = cls.run_if_enabled.__doc__

    # ===== Enabled Variants of the Methods =====

    def _add_tensor_data_enabled(
        self,
        path: str,
        data: TensorDataStructure,
        dump_type: 'TensorDumper.Type',
        dump_type_override: OptionalTypeDict = None,
        permute_axes: OptionalSequence = None,
        permute_axes_override: OptionalPermuteDict = None,
        exclude: Union[Sequence[str], None] = None,
    ):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        if exclude is not None:
            data = TensorDumper._exclude_elements(data, exclude)
        if len(self._custom_converters) > 0:
            data = TensorDumper._get_with_custom_converters_applied(data, self._custom_converters)
        data_with_format = TensorDumper._format_data_elements(
            data, dump_type, dump_type_override, permute_axes, permute_axes_override
        )
        TensorDumper._insert_at_path(self._tensor_struct, path, data_with_format)

    def _add_grad_data_enabled(
        self,
        path: str,
        data: TensorDataStructure,
        dump_type: 'TensorDumper.Type',
        dump_type_override: OptionalTypeDict = None,
        permute_grad_axes: OptionalSequence = None,
        permute_grad_axes_override: OptionalPermuteDict = None,
        exclude: Union[Sequence[str], None] = None,
    ):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        if exclude is not None:
            data = TensorDumper._exclude_elements(data, exclude)
        if len(self._custom_converters) > 0:
            data = TensorDumper._get_with_custom_converters_applied(data, self._custom_converters)
        for_grads_with_format = TensorDumper._format_data_elements(
            data, dump_type, dump_type_override, permute_grad_axes, permute_grad_axes_override
        )
        # with_grads_enabled = TensorDumper._enable_grad(for_grads_with_format)
        TensorDumper._insert_at_path(self._grad_struct, path, for_grads_with_format)

    def _set_dump_type_for_all_enabled(
        self, dump_type: 'TensorDumper.Type', include_tensors: bool = True, include_grads: bool = True
    ):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''

        def set_dump_type(data: TensorDumper._TensorWithFormat) -> TensorDumper._TensorWithFormat:
            data.dump_type = dump_type
            return data

        if include_tensors:
            self._tensor_struct = TensorDumper._traverse_and_apply(
                self._tensor_struct, TensorDumper._TensorWithFormat, set_dump_type
            )
        if include_grads:
            self._grad_struct = TensorDumper._traverse_and_apply(
                self._grad_struct, TensorDumper._TensorWithFormat, set_dump_type
            )

    def _dump_enabled(self):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        self._dump_struct(self._tensor_struct, "tensors")
        if len(self._grad_struct) > 0:
            if not self._grad_computed:
                raise ValueError(
                    "Gradients were not computed. Call `set_gradients` before dumping if any gradients are included."
                )
            self._dump_struct(self._grad_struct, "grads")
        self._tensor_struct = {}
        self._grad_struct = {}
        self._grad_computed = False
        self._dump_count += 1
        if self._dump_count in self._after_dump_count_actions:
            self._after_dump_count_actions[self._dump_count]()

    def _compare_to_dumped_data_enabled(
        self,
        eps_numerical_data: float = 1e-6,
        num_errors_per_tensor_to_show: int = 1,
        allow_missing_data_in_current: bool = False,
        as_warning: bool = False,
    ):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        # Create config from parameters
        config = TensorDumper._ComparisonConfig(
            eps_numerical_data=eps_numerical_data,
            num_errors_per_tensor_to_show=num_errors_per_tensor_to_show,
            allow_missing_data_in_current=allow_missing_data_in_current,
        )

        is_tensor_data_consistent = self._compare_to_dumped_data(
            self._tensor_struct,
            "tensors",
            config,
            as_warning,
        )
        has_grad_data = len(self._grad_struct) > 0
        if has_grad_data:
            if not self._grad_computed:
                raise ValueError(
                    "Gradients were not computed. Call `set_gradients` before comparing to previously dumped data."
                )
            is_grad_data_consistent = self._compare_to_dumped_data(
                self._grad_struct,
                "grads",
                config,
                as_warning,
            )
        else:
            is_grad_data_consistent = True
        if is_tensor_data_consistent:
            print(
                f"`TensorDumper:` Tensor data is consistent with previously dumped data for dump {self._dump_count}."
            )
        if has_grad_data and is_grad_data_consistent:
            print(
                f"`TensorDumper:` Grad data is consistent with previously dumped data for dump {self._dump_count}."
            )

        self._tensor_struct = {}
        self._grad_struct = {}
        self._grad_computed = False
        self._dump_count += 1

    def _set_gradients_enabled(self, function_values: Union[torch.Tensor, List[torch.Tensor]]):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        self._grad_struct = self._compute_and_set_gradients(self._grad_struct, function_values)
        self._grad_computed = True

    def _reset_dump_count_enabled(self):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        self._dump_count = 0

    def _perform_after_dump_count_enabled(self, count: int, action: Callable):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        self._after_dump_count_actions[count] = action

    def _register_custom_converter_enabled(self, data_type: type, converter_func: Callable):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        self._custom_converters[data_type] = converter_func

    def _enable_ragged_batch_dumping_enabled(self, as_per_sample: bool = False):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''

        # Check if the RaggedBatch class is available and raise an error if not.
        try:
            from accvlab.batching_helpers import RaggedBatch
        except ImportError:
            raise ImportError(
                "The `accvlab.batching_helpers` package is not installed. Please install it to use ragged batch dumping."
            )

        # Convert to per-sample format.
        def convert_to_per_sample(data: 'RaggedBatch') -> List[torch.Tensor]:
            res = data.split()
            return res

        # Convert to RaggedBatch descriptor format.
        def convert_to_descriptor(data: 'RaggedBatch') -> Dict[str, Union[torch.Tensor, int]]:
            res = {
                "data": data.tensor,
                "sample_sizes": data.sample_sizes,
                "non_uniform_dim": data.non_uniform_dim,
                "num_batch_dims": data.num_batch_dims,
            }
            return res

        # Register the selected converter.
        if as_per_sample:
            self.register_custom_converter(RaggedBatch, convert_to_per_sample)
        else:
            self.register_custom_converter(RaggedBatch, convert_to_descriptor)

    def _run_if_enabled_enabled(self, func: Callable):
        '''TEMPORARY DOCSTRING
        This is the enabled variant of the corresponding method (same name without leading `_` and training `_enabled`).
        This docstring will be replaced with the docstring of the corresponding method when an instance is requested
        for the first time.
        '''
        # Note that the `_run_if_enabled_enabled` method is only called if the TensorDumper is enabled. Therefore,
        # no further checks are needed and we can directly call the passed function.
        func()

    # ===== Private Helper Methods =====

    @staticmethod
    def _exclude_elements(
        data: TensorDataStructure,
        exclude: Sequence[str],
    ) -> TensorDataStructure:
        if isinstance(data, Dict):
            return {
                key: TensorDumper._exclude_elements(data[key], exclude)
                for key in data.keys()
                if not key in exclude
            }
        else:
            return data

    @staticmethod
    def _split_multi_image_data(
        data: FormattedDataStructure,
    ) -> FormattedDataStructure:

        def split_dims_inner(
            data: torch.Tensor, num_dims_to_split: int, wrapper: TensorDumper._TensorWithFormat
        ) -> Union[TensorDumper._TensorWithFormat, List]:
            if num_dims_to_split == 0:
                wrapper = copy.deepcopy(wrapper)
                wrapper.tensor = data
                return wrapper
            else:
                res = [
                    split_dims_inner(data[i], num_dims_to_split - 1, wrapper) for i in range(data.shape[0])
                ]
                return res

        # Create a wrapper for the resulting tensors. Note that:
        # - The tensor is set to None, as it will be replaced by the split tensors
        # - The permutation is set to None, as it will be applied as part of this function
        #   for the tensors which are split, and the wrapper is not used for any other tensors
        def get_image_wrapper(dump_type: 'TensorDumper.Type') -> TensorDumper._TensorWithFormat:
            return TensorDumper._TensorWithFormat(None, dump_type, None)

        def split_dims(data: TensorDumper._TensorWithFormat) -> Union[TensorDumper._TensorWithFormat, List]:
            if TensorDumper.Type.is_image(data.dump_type):
                image_num_dims = 2 if data.dump_type == TensorDumper.Type.IMAGE_I else 3
                if data.tensor.ndim > image_num_dims:
                    data_to_split = data.tensor
                    if data.permute_axes is not None:
                        data_to_split = torch.permute(data_to_split, data.permute_axes)
                    res = split_dims_inner(
                        data_to_split, data.tensor.ndim - image_num_dims, get_image_wrapper(data.dump_type)
                    )
                    return res
                else:
                    return data
            else:
                return data

        res = TensorDumper._traverse_and_apply(data, TensorDumper._TensorWithFormat, split_dims)
        return res

    def _compute_and_set_gradients(
        self,
        struct_with_tensors: TensorDataStructure,
        function_values: Union[torch.Tensor, List[torch.Tensor]],
    ) -> TensorDataStructure:
        list_of_tensors = []

        def traverse_and_collect(
            data: FormattedDataStructure,
            list_of_tensors: List[torch.Tensor],
        ):
            if isinstance(data, TensorDumper._TensorWithFormat):
                list_of_tensors.append(data.tensor)
            elif isinstance(data, Sequence) and not isinstance(data, str):
                for item in data:
                    traverse_and_collect(item, list_of_tensors)
            elif isinstance(data, Dict):
                for key in data.keys():
                    traverse_and_collect(data[key], list_of_tensors)

        def traverse_and_replace_by_grad(
            data: FormattedDataStructure,
            list_of_tensors: List[torch.Tensor],
        ) -> FormattedDataStructure:
            if isinstance(data, TensorDumper._TensorWithFormat):
                to_set = list_of_tensors.pop(0)
                if to_set is None:
                    data = None
                else:
                    data.tensor = to_set
                return data
            elif isinstance(data, Sequence) and not isinstance(data, str):
                res = []
                for item in data:
                    res.append(traverse_and_replace_by_grad(item, list_of_tensors))
                return res
            elif isinstance(data, Dict):
                res = {}
                for key in data.keys():
                    res[key] = traverse_and_replace_by_grad(data[key], list_of_tensors)
                return res
            else:
                return data

        def replace_element_not_requiring_grad(
            data: TensorDumper._TensorWithFormat,
        ) -> Union[TensorDumper._TensorWithFormat, str]:
            if not data.tensor.requires_grad:
                data = "`.requires_grad == False`"
            return data

        struct_with_tensors = TensorDumper._traverse_and_apply(
            struct_with_tensors, TensorDumper._TensorWithFormat, replace_element_not_requiring_grad
        )
        traverse_and_collect(struct_with_tensors, list_of_tensors)
        if isinstance(function_values, torch.Tensor):
            function_values = [function_values]
        grads = torch.autograd.grad(function_values, list_of_tensors, retain_graph=True, allow_unused=True)
        grads = list(grads)
        struct_with_tensors = traverse_and_replace_by_grad(struct_with_tensors, grads)

        return struct_with_tensors

    def _get_dump_dir(self) -> str:
        return f"{self._dump_dir}/{self._dump_count}"

    def _get_json_filename(self, type_of_struct: str) -> str:
        return f"{type_of_struct}.json"

    def _dump_struct(self, struct_to_dump: Union[Sequence, Dict], type_of_struct: str):
        struct_to_dump = TensorDumper._split_multi_image_data(struct_to_dump)
        json_struct, binary_files = TensorDumper._apply_format_and_get(struct_to_dump)
        dump_dir = self._get_dump_dir()
        self._ensure_dir_exists(dump_dir)
        json_file_name = self._get_json_filename(type_of_struct)

        json.dump(
            json_struct,
            open(f"{dump_dir}/{json_file_name}", "w"),
            cls=TensorDumper._CustomEncoder,
            indent=2,
        )
        for file_name, file_data in binary_files.items():
            dump_type = file_data["dump_type"]
            if dump_type == TensorDumper.Type.BINARY:
                TensorDumper._dump_binary(f"{dump_dir}/[{json_file_name}]{file_name}", file_data["data"])
            elif TensorDumper.Type.is_image(dump_type):
                TensorDumper._dump_image(
                    f"{dump_dir}/[{json_file_name}]{file_name}", file_data["data"], dump_type
                )
            else:
                raise ValueError(f"Unsupported file type: {file_name}")

    @staticmethod
    def _walk_and_compare(
        dumped_data: Union[Any, Sequence, Dict],
        json_struct_to_compare: Union[Any, Sequence, Dict],
        non_tensor_struct: Optional[Union[Any, Sequence, Dict]],
        curr_path: str,
        is_parent_tensor: bool,
        config: 'TensorDumper._ComparisonConfig',
    ) -> List['TensorDumper._ComparisonError']:

        class ComparisonError:
            def __init__(self, message: str, weight: float):
                self.message = message
                self.weight = weight

        def order_errors_by_weight(
            errors: List['TensorDumper._ComparisonError'],
        ) -> List['TensorDumper._ComparisonError']:
            return sorted(errors, key=lambda error: error.weight, reverse=True)

        def get_path_to_show(path: str) -> str:
            if len(path) == 0:
                return ". (i.e. root)"
            elif ":" in path:
                return (path.replace(":", "[") + "]")[1:]
            else:
                return path[1:]

        def get_child_path(curr_path: str, key: str, is_self_tensor: bool, is_parent_tensor: bool) -> str:
            if not is_parent_tensor and is_self_tensor:
                child_path = f"{curr_path}:{key}"
            elif is_self_tensor:
                child_path = f"{curr_path},{key}"
            else:
                child_path = f"{curr_path}.{key}"
            return child_path

        if isinstance(dumped_data, Dict):
            res = []
            is_self_tensor = non_tensor_struct is None
            for key in dumped_data.keys():
                if not key in json_struct_to_compare:
                    res.append(
                        ComparisonError(
                            f"  Missing key '{key}' at path: {get_path_to_show(curr_path)} in dumped reference",
                            math.inf,
                        )
                    )
                    continue
                is_child_tensor = is_self_tensor or not key in non_tensor_struct
                non_tensor_struct_child = non_tensor_struct[key] if not is_child_tensor else None
                r = TensorDumper._walk_and_compare(
                    dumped_data[key],
                    json_struct_to_compare[key],
                    non_tensor_struct_child,
                    get_child_path(curr_path, key, is_self_tensor, is_parent_tensor),
                    is_self_tensor,
                    config,
                )
                if not is_self_tensor and is_child_tensor:
                    r = order_errors_by_weight(r)
                    r = r[: config.num_errors_per_tensor_to_show] if len(r) > 0 else []
                res.extend(r)
            if not config.allow_missing_data_in_current:
                for key in json_struct_to_compare.keys():
                    if not key in dumped_data:
                        res.append(
                            ComparisonError(
                                f"  Extra key '{key}' at path: {get_path_to_show(curr_path)} in dumped reference",
                                math.inf,
                            )
                        )
            return res
        elif isinstance(dumped_data, Sequence):
            res = []
            is_self_tensor = non_tensor_struct is None
            if len(dumped_data) != len(json_struct_to_compare):
                res.append(
                    ComparisonError(
                        f"  Length mismatch at path: {get_path_to_show(curr_path)}\n    Dumped data: {dumped_data}\n    Struct to compare: {json_struct_to_compare}",
                        math.inf,
                    )
                )
                return res
            for i in range(len(dumped_data)):
                is_child_tensor = is_self_tensor or not i in non_tensor_struct
                non_tensor_struct_child = non_tensor_struct[i] if not is_child_tensor else None
                r = TensorDumper._walk_and_compare(
                    dumped_data[i],
                    json_struct_to_compare[i],
                    non_tensor_struct_child,
                    get_child_path(curr_path, i, is_self_tensor, is_parent_tensor),
                    is_self_tensor,
                    config,
                )
                if not is_self_tensor and is_child_tensor:
                    r = order_errors_by_weight(r)
                    r = r[: config.num_errors_per_tensor_to_show] if len(r) > 0 else []
                res.extend(r)
            return res
        elif isinstance(dumped_data, numbers.Number) and isinstance(json_struct_to_compare, numbers.Number):
            if abs(dumped_data - json_struct_to_compare) > config.eps_numerical_data:
                difference = abs(json_struct_to_compare - dumped_data)
                return [
                    ComparisonError(
                        f"  Numerical mismatch at path: {get_path_to_show(curr_path)}\n    Dumped data: {dumped_data}\n    Struct to compare: {json_struct_to_compare}\n    Difference (current - dumped): {json_struct_to_compare - dumped_data}",
                        difference,
                    )
                ]
            else:
                return []
        else:
            if dumped_data != json_struct_to_compare:
                return [
                    ComparisonError(
                        f"  Mismatch at path: {get_path_to_show(curr_path)}\n    Dumped data: {dumped_data}\n    Struct to compare: {json_struct_to_compare}",
                        0.0,
                    )
                ]
            else:
                return []

    def _compare_to_dumped_data(
        self,
        struct_to_compare: Union[Sequence, Dict],
        type_of_struct: str,
        config: 'TensorDumper._ComparisonConfig',
        as_warning: bool = False,
    ) -> bool:
        non_tensor_struct = TensorDumper._get_non_tensor_structure(struct_to_compare)
        json_struct, binary_files = TensorDumper._apply_format_and_get(struct_to_compare)
        # Dump json_struct to a string and read back to ensure format consistency to previously dumped data
        json_struct_str = json.dumps(json_struct, cls=TensorDumper._CustomEncoder, indent=2)
        json_struct = json.loads(json_struct_str)
        if len(binary_files) > 0:
            first_file = list(binary_files.keys())[0]
            first_file_split_at_extension = first_file.rsplit(".", 1)
            first_file_no_extension, extension = first_file_split_at_extension
            raise ValueError(
                f"Cannot compare to dumped data with binary or image format.\nFound image or binary format at: {first_file_no_extension}\nwith format: {extension}\nPlease use the JSON format when dumping the data for comparison."
            )

        json_file_name = self._get_json_filename(type_of_struct)
        json_file_path = f"{self._get_dump_dir()}/{json_file_name}"
        try:
            with open(json_file_path, "r") as f:
                dumped_data = json.load(f)
        except FileNotFoundError:
            raise FileNotFoundError(
                f"No previously dumped data found for [{type_of_struct}] data for dump {self._dump_count}\nunder file path: {json_file_path}.\nDump the data first before comparing to previously dumped data."
            )

        res_errors = self._walk_and_compare(
            dumped_data,
            json_struct,
            non_tensor_struct,
            "",
            False,
            config,
        )
        if len(res_errors) > 0:
            error_message = "\n".join([error.message for error in res_errors])
            error_message = (
                f"NOTE: The following errors were found for the dumped [{type_of_struct}] data for dump {self._dump_count}.\n"
                f"      Up to {config.num_errors_per_tensor_to_show} most significant errors are shown per tensor.\n"
                + error_message
            )
            error_message = f"Comparison of data with previously dumped data failed for [{type_of_struct}] data for dump {self._dump_count}.\n{error_message}"
            if as_warning:
                warnings.warn(error_message)
            else:
                raise ValueError(error_message)
            return False
        else:
            return True

    @staticmethod
    def _dump_binary(file_name: str, file_data: torch.Tensor, add_meta_info: bool = True):
        if add_meta_info:
            file_meta_info = {
                "file_data_shape": file_data.shape,
                "file_data_dtype": file_data.dtype,
            }
            json.dump(
                file_meta_info, open(f"{file_name}.meta.json", "w"), indent=2, cls=TensorDumper._CustomEncoder
            )
        data = file_data.detach().cpu().contiguous().numpy().tobytes()
        with open(file_name, 'wb') as f:
            f.write(data)

    @staticmethod
    def _dump_image(
        file_name: str, file_data: torch.Tensor, dump_type: 'TensorDumper.Type', add_meta_info: bool = True
    ):
        try:
            import cv2
        except ImportError as exc:
            raise ImportError(
                "OpenCV (cv2) is not installed, but is required for dumping images via TensorDumper.\n"
                "Please install the ACCV-Lab packages with optional dependencies enabled. "
                "For details, see the Installation Guide (section on installation with optional dependencies)."
            ) from exc

        def ensure_image_range_ang_get_orig_range(image: torch.Tensor) -> tuple[torch.Tensor, List[float]]:
            min_val = image.min()
            max_val = image.max()
            offset = -min_val
            diff = max_val - min_val
            scaling = (255 / diff) if diff > 0 else 1
            res = (image + offset) * scaling
            return res, [min_val, max_val]

        assert (dump_type == TensorDumper.Type.IMAGE_I and file_data.ndim == 2) or (
            dump_type != TensorDumper.Type.IMAGE_I and file_data.ndim == 3
        ), f"Number of image dimensions does not match the dump type for file:\n{file_name}.\nImage data has {file_data.ndim} dimensions; dump type is {dump_type}."
        assert (
            dump_type == TensorDumper.Type.IMAGE_I or file_data.shape[-1] == 3
        ), f"Color image must have 3 channels, but image to be dumped to:\n{file_name}\nhas {file_data.shape[-1]} channels."

        file_data, orig_range = ensure_image_range_ang_get_orig_range(file_data)
        file_data = file_data.detach().cpu().contiguous().numpy().astype(np.uint8)

        if dump_type == TensorDumper.Type.IMAGE_RGB:
            file_data = cv2.cvtColor(file_data, cv2.COLOR_RGB2BGR)

        cv2.imwrite(file_name, file_data)

        if add_meta_info:
            if dump_type == TensorDumper.Type.IMAGE_RGB:
                image_format = "RGB"
            elif dump_type == TensorDumper.Type.IMAGE_BGR:
                image_format = "BGR"
            elif dump_type == TensorDumper.Type.IMAGE_I:
                image_format = "Intensity"
            else:
                raise ValueError(f"Unsupported image format: {dump_type}")

            file_meta_info = {
                "original_range": orig_range,
                "image_format": image_format,
            }
            json.dump(
                file_meta_info, open(f"{file_name}.meta.json", "w"), cls=TensorDumper._CustomEncoder, indent=2
            )

    @staticmethod
    def _ensure_dir_exists(dir_name: str):
        os.makedirs(dir_name, exist_ok=True)

    @staticmethod
    def _traverse_and_apply(
        data: GenericDataStructure,
        data_type_element: type,
        func_element: Callable,
    ) -> GenericDataStructure:
        if isinstance(data, data_type_element):
            return func_element(data)
        else:
            if isinstance(data, Sequence) and not isinstance(data, str):
                res = [
                    TensorDumper._traverse_and_apply(item, data_type_element, func_element) for item in data
                ]
                return res
            elif isinstance(data, Dict):
                res = {
                    key: TensorDumper._traverse_and_apply(data[key], data_type_element, func_element)
                    for key in data.keys()
                }
                return res
            else:
                return data

    @staticmethod
    def _traverse_remember_waypoints_and_apply(
        data: GenericDataStructure,
        data_element_type: type,
        default_param: Any,
        param_for_waypoints: Dict[str, Any],
        func_element: Callable,
    ) -> GenericDataStructure:
        if isinstance(data, data_element_type):
            return func_element(data, default_param)
        elif isinstance(data, Sequence) and not isinstance(data, str):
            return [
                TensorDumper._traverse_remember_waypoints_and_apply(
                    item, data_element_type, default_param, param_for_waypoints, func_element
                )
                for item in data
            ]
        elif isinstance(data, Dict):
            res = {}
            for key, value in data.items():
                if str(key) in param_for_waypoints:
                    res[key] = TensorDumper._traverse_remember_waypoints_and_apply(
                        value,
                        data_element_type,
                        param_for_waypoints[str(key)],
                        param_for_waypoints,
                        func_element,
                    )
                else:
                    res[key] = TensorDumper._traverse_remember_waypoints_and_apply(
                        value, data_element_type, default_param, param_for_waypoints, func_element
                    )
            return res
        else:
            return data

    @staticmethod
    def _format_data_elements(
        data: TensorDataStructure,
        dump_type: 'TensorDumper.Type',
        dump_type_override: OptionalTypeDict = None,
        permute_axes: OptionalSequence = None,
        permute_axes_override: OptionalPermuteDict = None,
    ) -> FormattedDataStructure:

        def change_permute_axes(
            data: TensorDumper._TensorWithFormat, selected_permute_axes: OptionalSequence
        ) -> TensorDumper._TensorWithFormat:
            data.permute_axes = selected_permute_axes
            return data

        # If no overrides are provided, we can just apply the default dump type and permute axes to all tensors
        if dump_type_override is None and permute_axes_override is None:
            data = TensorDumper._traverse_and_apply(
                data, torch.Tensor, lambda x: TensorDumper._TensorWithFormat(x, dump_type, permute_axes)
            )
            return data
        # If dump type overrides are provided, we need to traverse the data and apply the overrides to the tensors
        elif dump_type_override is not None:
            data = TensorDumper._traverse_remember_waypoints_and_apply(
                data,
                torch.Tensor,
                dump_type,
                dump_type_override,
                lambda x, selected_dump_type: TensorDumper._TensorWithFormat(
                    x, selected_dump_type, permute_axes
                ),
            )
            # If permute axes overrides are provided additionally,
            # we need to need to make a second pass and apply the overrides
            # on the already converted data
            if permute_axes_override is not None:
                data = TensorDumper._traverse_remember_waypoints_and_apply(
                    data,
                    TensorDumper._TensorWithFormat,
                    permute_axes,
                    permute_axes_override,
                    change_permute_axes,
                )
            return data
        # The `else` case is when only permute axes overrides are provided.
        # In this case, we can apply them directly to the original data
        # and do not need a second pass.
        else:
            data = TensorDumper._traverse_remember_waypoints_and_apply(
                data,
                torch.Tensor,
                permute_axes,
                permute_axes_override,
                lambda x, selected_perm_axes: TensorDumper._TensorWithFormat(
                    x, dump_type, selected_perm_axes
                ),
            )
            return data

    @staticmethod
    def _apply_format_and_get(
        data: FormattedDataStructure,
        path: OptionalPath = None,
    ) -> tuple[Union[Any, Sequence, Dict], Dict[str, Dict[str, Union[torch.Tensor, 'TensorDumper.Type']]]]:

        def get_item_path(item_key: Union[str, int]) -> str:
            return f"{path}.{item_key}" if path is not None else str(item_key)

        res_data = None
        res_files = {}

        if isinstance(data, TensorDumper._TensorWithFormat):
            if data.permute_axes is not None:
                tensor = data.tensor.permute(data.permute_axes)
            else:
                tensor = data.tensor
            if data.dump_type == TensorDumper.Type.JSON:
                res_data = tensor
                res_files = {}
            elif data.dump_type == TensorDumper.Type.BINARY:
                res_data = f"{path}.bin"
                res_files = {res_data: {"data": tensor, "dump_type": data.dump_type}}
            elif TensorDumper.Type.is_image(data.dump_type):
                res_data = f"{path}.png"
                res_files = {res_data: {"data": tensor, "dump_type": data.dump_type}}
            else:
                raise ValueError(f"Unsupported dump type: {data.dump_type}")
            return res_data, res_files

        elif isinstance(data, Sequence) and not isinstance(data, str):
            res_data = []
            for i, item in enumerate(data):
                path_i = get_item_path(i)
                item_data, item_files = TensorDumper._apply_format_and_get(item, path_i)
                res_data.append(item_data)
                res_files.update(item_files)
            return res_data, res_files

        elif isinstance(data, Dict):
            res_data = {}
            for key, value in data.items():
                path_key = get_item_path(key)
                item_data, item_files = TensorDumper._apply_format_and_get(value, path_key)
                res_data[key] = item_data
                res_files.update(item_files)
            return res_data, res_files

        else:
            return data, {}

    def _get_non_tensor_structure(
        data: FormattedDataStructure,
    ) -> FormattedDataStructure:
        if isinstance(data, TensorDumper._TensorWithFormat):
            return None
        elif isinstance(data, Sequence) and not isinstance(data, str):
            # Note that we are using dictionaries instead of lists here,
            # because this allows us to store only non-tensor elements while
            # preserving the indices of the elements.
            res = {
                i: result
                for i, item in enumerate(data)
                if (result := TensorDumper._get_non_tensor_structure(item)) is not None
            }
            return res
        elif isinstance(data, dict):
            res = {
                key: result
                for key, value in data.items()
                if (result := TensorDumper._get_non_tensor_structure(value)) is not None
            }
            return res
        else:
            return data

    @staticmethod
    def _insert_at_path(
        data: Union[Sequence[Union[Sequence, Dict]], Dict[str, Union[Sequence, Dict]]],
        path: str,
        value: Union[
            Union[_TensorWithFormat, Any],
            Sequence[Union[_TensorWithFormat, Any, Sequence, Dict]],
            Dict[str, Union[_TensorWithFormat, Any, Sequence, Dict]],
        ],
    ):
        path_parts = path.split(".")
        curr_data = data
        parent = None
        # Make sure the path exists
        for part in path_parts:
            if isinstance(curr_data, Sequence):
                assert part.isdigit(), f"Path part {part} is not a number, but parent is a sequence"
                part = int(part)
                assert part < len(curr_data), f"Path part {part} is out of bounds (parent is a sequence)"
            elif isinstance(curr_data, Dict):
                # Convert digits to ints, but only if they are not already in the data as strings
                if not (part in curr_data):
                    curr_data[part] = {}
            parent = curr_data
            curr_data = curr_data[part]

        # Insert the new element
        # We can only insert into a dictionary
        assert isinstance(
            curr_data, Dict
        ), f"Path `{path}` points to an existing element which is not a dictionary. Cannot insert there."
        # If we are inserting a dictionary, we can insert into non-empty or empty dicts
        if isinstance(value, Dict):
            for key in value.keys():
                assert (
                    key not in curr_data
                ), f"Path `{path}` has an existing element with key `{key}`. Cannot insert element from `value` with the same key."
                curr_data[key] = value[key]
        # If we are inserting a tensor or a sequence, we can only insert into an empty dict
        elif isinstance(value, (TensorDumper._TensorWithFormat, Sequence)):
            assert parent is not None, f"Can only insert dictionaries at the root level."
            assert (
                len(curr_data) == 0
            ), f"Path part `{path}` points to an existing non-empty dictionary. Cannot insert tensors or sequences as this would overwrite the existing elements."
            parent[path_parts[-1]] = value
        else:
            raise ValueError(f"Unsupported data type: {type(value)}")

    @staticmethod
    def _get_with_custom_converters_applied(
        data: Union[Any, Sequence, Dict], custom_converters: Dict[type, Callable]
    ) -> Union[Any, Sequence, Dict]:

        was_changed = False

        def get_with_custom_converters_applied_inner(
            data: Union[Any, Sequence, Dict], custom_converters: Dict[type, Callable]
        ) -> Union[Any, Sequence, Dict]:
            nonlocal was_changed

            if isinstance(data, dict):
                return {
                    key: get_with_custom_converters_applied_inner(value, custom_converters)
                    for key, value in data.items()
                }
            elif isinstance(data, Sequence) and not isinstance(data, str):
                return [get_with_custom_converters_applied_inner(item, custom_converters) for item in data]
            else:
                data_type = type(data)
                if data_type in custom_converters:
                    was_changed = True
                    return custom_converters[data_type](data)
                else:
                    return data

        do_iterate = True
        while do_iterate:
            data = get_with_custom_converters_applied_inner(data, custom_converters)
            do_iterate = was_changed
            was_changed = False
        return data