Timer#

class TimerSession#

RAII timer session for automatic cleanup.

Automatically stops timing when session goes out of scope.

Public Functions

inline TimerSession(std::function<void()> onEnd)#

Construct active session with callback.

Parameters:

onEnd – Callback to execute on destruction

inline TimerSession(std::nullptr_t)#

Construct inactive session.

inline ~TimerSession()#

Destructor - executes callback if active.

TimerSession(TimerSession const&) = delete#

Deleted copy constructor.

TimerSession &operator=(TimerSession const&) = delete#

Deleted copy assignment.

inline TimerSession(TimerSession &&other) noexcept#

Move constructor.

inline TimerSession &operator=(TimerSession &&other) noexcept#

Move assignment operator.

class Timer#

CUDA timer with RAII and deferred calculation.

Provides stage-based timing using CUDA events with automatic cleanup.

Public Functions

Timer() = default#

Default constructor.

~Timer() = default#

Destructor.

void reset()#

Reset all timing data.

TimerSession startStage(
std::string const &stageId,
cudaStream_t stream
)#

Start timing a stage with automatic cleanup.

Parameters:
  • stageId – Stage identifier

  • stream – CUDA stream (default: 0)

Returns:

RAII session that stops timing on destruction

std::optional<StageTimingData> getTimingData(
std::string const &stageId
) const#

Get timing data for a stage.

Parameters:

stageId – Stage identifier

Returns:

Timing data if available, nullopt otherwise

std::unordered_map<std::string, StageTimingData> const &getAllTimingData(
) const#

Get all timing data.

Returns:

Map of stage IDs to timing data

struct TimerPair#

CUDA event pair for timing.

Lazy-initialized timer pair using CUDA events.

Public Functions

inline void initialize()#

Initialize CUDA events.

inline ~TimerPair()#

Destructor - destroys CUDA events.

TimerPair() = default#

Default constructor.

TimerPair(TimerPair const&) = delete#

Deleted copy constructor.

TimerPair &operator=(TimerPair const&) = delete#

Deleted copy assignment.

inline TimerPair(TimerPair &&other) noexcept#

Move constructor.

inline TimerPair &operator=(TimerPair &&other) noexcept#

Move assignment operator.

Public Members

cudaEvent_t gpuStart = {nullptr}#

Start event.

cudaEvent_t gpuEnd = {nullptr}#

End event.

bool hasStarted = {false}#

Whether timing has started.

bool isInitialized = {false}#

Whether events are initialized.

struct StageTimingData#

Stage timing data.

Stores raw timing measurements and calculates derived values on-demand.

Public Functions

inline void addTiming(float timeMs)#

Add timing measurement.

Parameters:

timeMs – Time in milliseconds

inline void reset()#

Reset all timing data.

inline float getTotalGpuTimeMs() const#

Calculate total GPU time.

Returns:

Total time in milliseconds

inline float getAverageTimeMs() const#

Calculate average time per run.

Returns:

Average time in milliseconds

inline int64_t getTotalRuns() const#

Get total number of runs.

Returns:

Run count

Public Members

std::vector<float> gpuTimesMs#

GPU time measurements in milliseconds.

bool trt_edgellm::getProfilingEnabled()#

Global profiling control flag When false, no profiling data (metrics or timing) will be recorded This is useful to exclude warmup runs from benchmark statistics

void trt_edgellm::setProfilingEnabled(bool enabled)#