NVTX C API Reference v3
NVIDIA Tools Extension Library
|
Data Structures | |
union | nvtxMemRegionRef_t |
A reference to a memory region (by pointer or handle). Which member of the union will be determined by a type or flag field outside. More... | |
struct | nvtxMemVirtualRangeDesc_v1 |
struct | nvtxMemHeapDesc_v1 |
structure to describe a heap in process virtual memory. More... | |
struct | nvtxMemRegionsRegisterBatch_v1 |
Register a region of memory inside of a heap. More... | |
struct | nvtxMemRegionsResizeBatch_v1 |
Register a region of memory inside of a heap. More... | |
struct | nvtxMemRegionsUnregisterBatch_v1 |
Register a region of memory inside of a heap. More... | |
struct | nvtxMemRegionNameDesc_v1 |
struct | nvtxMemRegionsNameBatch_v1 |
struct | nvtxMemPermissionsAssignRegionDesc_v1 |
struct | nvtxMemPermissionsAssignBatch_v1 |
Macros | |
#define | NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemHeapHandle_t, 0)) |
To indicate the full process virtual address space as a heap for functions where a nvtxMemHeapHandle_t is accepted. | |
#define | NVTX_MEM_HEAP_USAGE_TYPE_SUB_ALLOCATOR 0x1 |
This heap is a sub-allocator. | |
#define | NVTX_MEM_HEAP_USAGE_TYPE_LAYOUT 0x2 |
This is a heap of memory that has an explicit layout. | |
#define | NVTX_MEM_TYPE_VIRTUAL_ADDRESS 0x1 |
Standard process userspace virtual addresses for linear allocations. | |
#define | NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemPermissionsHandle_t, 0)) |
To indicate you are modifying permissions to the process-wide full virtual address space. | |
#define | NVTX_MEM_PERMISSIONS_CREATE_FLAGS_NONE 0x0 |
#define | NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_READ 0x1 |
#define | NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE 0x2 |
#define | NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_ATOMIC 0x4 |
#define | NVTX_MEM_REGION_REF_TYPE_UNKNOWN 0x0 |
#define | NVTX_MEM_REGION_REF_TYPE_POINTER 0x1 |
#define | NVTX_MEM_REGION_REF_TYPE_HANDLE 0x2 |
#define | NVTX_MEM_PERMISSIONS_REGION_FLAGS_NONE 0x0 |
There are no permissions for this memory. | |
#define | NVTX_MEM_PERMISSIONS_REGION_FLAGS_READ 0x1 |
The memory is readable. | |
#define | NVTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE 0x2 |
The memory is writable. | |
#define | NVTX_MEM_PERMISSIONS_REGION_FLAGS_ATOMIC 0x4 |
The memory is for atomic RW. | |
#define | NVTX_MEM_PERMISSIONS_REGION_FLAGS_RESET 0x8 |
The memory access permissions are reset for a region. | |
#define | NVTX_MEM_PERMISSIONS_BIND_FLAGS_NONE 0x0 |
#define | NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_WRITE 0x2 |
Upon binding, with the thread, exclude parent scope write regions instead of overlaying on top of them. | |
#define | NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_READ 0x1 |
Upon binding, with the thread, exclude parent scope read regions instead of overlaying on top of them. | |
#define | NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_ATOMIC 0x4 |
Upon binding, with the thread, exclude parent scope atomic RW regions instead of overlaying on top of them. | |
#define | NVTX_MEM_PERMISSIONS_BIND_SCOPE_UNKNOWN 0x0 |
#define | NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD 0x1 |
Bind to thread scope. In this case, tools should validate that local thread's execution is honoring the permissions as well as the state of NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE at the time of binding. If this is not bound then NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE should be used to validate the memory. | |
#define | NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM 0x2 |
Bind to CUDA stream scope. | |
Typedefs | |
typedef nvtxMemHeap_t * | nvtxMemHeapHandle_t |
A handle returned by a tool to represent a memory heap. | |
typedef nvtxMemRegion_t * | nvtxMemRegionHandle_t |
A handle returned by a tool to represent a memory region. | |
typedef nvtxMemPermissions_t * | nvtxMemPermissionsHandle_t |
A handle returned by a tool to represent a memory permissions mask. | |
typedef nvtxMemVirtualRangeDesc_v1 | nvtxMemVirtualRangeDesc_t |
typedef nvtxMemHeapDesc_v1 | nvtxMemHeapDesc_t |
typedef nvtxMemRegionsRegisterBatch_v1 | nvtxMemRegionsRegisterBatch_t |
typedef nvtxMemRegionsResizeBatch_v1 | nvtxMemRegionsResizeBatch_t |
typedef nvtxMemRegionsUnregisterBatch_v1 | nvtxMemRegionsUnregisterBatch_t |
typedef nvtxMemRegionNameDesc_v1 | nvtxMemRegionNameDesc_t |
typedef nvtxMemRegionsNameBatch_v1 | nvtxMemRegionsNameBatch_t |
typedef nvtxMemPermissionsAssignRegionDesc_v1 | nvtxMemPermissionsAssignRegionDesc_t |
typedef nvtxMemPermissionsAssignBatch_v1 | nvtxMemPermissionsAssignBatch_t |
Functions | |
NVTX_DECLSPEC nvtxMemHeapHandle_t NVTX_API | nvtxMemHeapRegister (nvtxDomainHandle_t domain, nvtxMemHeapDesc_t const *desc) |
Create a memory heap to represent a object or range of memory that will be further sub-divided into regions. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemHeapUnregister (nvtxDomainHandle_t domain, nvtxMemHeapHandle_t heap) |
Destroy a memory heap. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemHeapReset (nvtxDomainHandle_t domain, nvtxMemHeapHandle_t heap) |
Reset the memory heap wipes out any changes, as if it were a fresh heap. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemRegionsRegister (nvtxDomainHandle_t domain, nvtxMemRegionsRegisterBatch_t const *desc) |
Register a region of memory inside of a heap of linear process virtual memory. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemRegionsResize (nvtxDomainHandle_t domain, nvtxMemRegionsResizeBatch_t const *desc) |
Register a region of memory inside of a heap of linear process virtual memory. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemRegionsUnregister (nvtxDomainHandle_t domain, nvtxMemRegionsUnregisterBatch_t const *desc) |
Unregistration for regions of process virtual memory. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemRegionsName (nvtxDomainHandle_t domain, nvtxMemRegionsNameBatch_t const *desc) |
Name or rename a region. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemPermissionsAssign (nvtxDomainHandle_t domain, nvtxMemPermissionsAssignBatch_t const *desc) |
Change the permissions of a region of process virtual memory. | |
NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API | nvtxMemPermissionsCreate (nvtxDomainHandle_t domain, int32_t creationflags) |
Create a permissions object for fine grain thread-local control in multi-threading scenarios. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemPermissionsDestroy (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissionsHandle) |
Destroy the permissions object. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemPermissionsReset (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissionsHandle) |
Reset the permissions object back to its created state. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemPermissionsBind (nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions, uint32_t bindScope, uint32_t bindFlags) |
Bind the permissions object into a particular scope on the caller thread. | |
NVTX_DECLSPEC void NVTX_API | nvtxMemPermissionsUnbind (nvtxDomainHandle_t domain, uint32_t bindScope) |
Unbind the permissions object bound to the caller thread. | |
See page PAGE_MEMORY.
#define NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemHeapHandle_t, 0)) |
To indicate the full process virtual address space as a heap for functions where a nvtxMemHeapHandle_t is accepted.
The heap by default is always read-write-execute permissions without creating regions. Regions created in this heap have read-write access by default but not execute.
Definition at line 110 of file nvToolsExtMem.h.
#define NVTX_MEM_HEAP_USAGE_TYPE_LAYOUT 0x2 |
This is a heap of memory that has an explicit layout.
The layout could be static or dynamic (calculated). This often represents an algorithm's structures that are packed together. By default this heap is assumed to be accessible for scopes where the memory is naturally accessible by hardware. Regions may be use to further annotate or restrict access. A tool may have an option to be more strict, but special consideration must be made for NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE
.
The behavior of this usage is similar to NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE but a tool can use it to track special behaviors and reservation.
Memory in a heap with this usage has read-write permissions by default but not execute without creating regions. Regions created in this heap have the same default permission access.
Definition at line 134 of file nvToolsExtMem.h.
#define NVTX_MEM_HEAP_USAGE_TYPE_SUB_ALLOCATOR 0x1 |
This heap is a sub-allocator.
Heap created with this usage should not be accessed by the user until regions are registered. Regions from a heap with this usage have read-write access by default but not execute.
Definition at line 117 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_NONE 0x0 |
Definition at line 589 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_ATOMIC 0x4 |
Upon binding, with the thread, exclude parent scope atomic RW regions instead of overlaying on top of them.
EX After eliminating any errors from read and write, a developer may chose to ensure that atomics are in their own region, removing standard read/write, and replacing with this strict atomic only access. This way they know that conventional reads or writes will not cause unexpected issues.
Definition at line 612 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_READ 0x1 |
Upon binding, with the thread, exclude parent scope read regions instead of overlaying on top of them.
EX After eliminating any errors when applying strict writes, a developer may then choose to annotate and enforce strict reads behaviors in segments of code.
Definition at line 603 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_FLAGS_STRICT_WRITE 0x2 |
Upon binding, with the thread, exclude parent scope write regions instead of overlaying on top of them.
EX A developer may chose to first prevent all writes except the ones specified to avoid OOB writes, since there are typically fewer regions written to than read from.
Definition at line 596 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD 0x1 |
Bind to thread scope. In this case, tools should validate that local thread's execution is honoring the permissions as well as the state of NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE at the time of binding. If this is not bound then NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE should be used to validate the memory.
Not all tools will support every scope, such a GPU sanitizer.
Definition at line 624 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM 0x2 |
Bind to CUDA stream scope.
In this case, work enqueued to a CUDA stream should be validated by the tool, when it executes, that it respect the permission of the permission at the point of binding, as well as the appropriate nvtxMemCudaGetDevicePermissions at the time of binding. If this is not bound then nvtxMemCudaGetDevicePermissions at the time of stream enqueue should be used to validate the memory.
This could apply to work done either on the GPU like a kernel launch or to CPU based callbacks like cudaStreamAddCallback if the tools supports it.
Binding is applies locally to a CPU thread so that if N CPU threads are enqueuing work to the same stream (like the default stream) that there cannot be a race condition between thread binding vs launching their work. IE users should expect the permissions bound in the thread to be honored by the proceeding work (launches, copies, etc) invoked from in the CPU thread until unbound.
Definition at line 644 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_BIND_SCOPE_UNKNOWN 0x0 |
Definition at line 615 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_ATOMIC 0x4 |
Definition at line 161 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_READ 0x1 |
Definition at line 159 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE 0x2 |
Definition at line 160 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_CREATE_FLAGS_NONE 0x0 |
Definition at line 158 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE (NVTX_STATIC_CAST(nvtxMemPermissionsHandle_t, 0)) |
To indicate you are modifying permissions to the process-wide full virtual address space.
This is a companion object to NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE
.
Definition at line 156 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_ATOMIC 0x4 |
The memory is for atomic RW.
Definition at line 501 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_NONE 0x0 |
There are no permissions for this memory.
Definition at line 492 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_READ 0x1 |
The memory is readable.
Definition at line 495 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_RESET 0x8 |
The memory access permissions are reset for a region.
This is as if never set, rather than documented defaults. As as result any flags indicating how unspecified regions are handle will affect this area.
This should not be used with READ, WRITE, nor ATOMIC, as those flags would have no effect.
Definition at line 511 of file nvToolsExtMem.h.
#define NVTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE 0x2 |
The memory is writable.
Definition at line 498 of file nvToolsExtMem.h.
#define NVTX_MEM_REGION_REF_TYPE_HANDLE 0x2 |
Definition at line 413 of file nvToolsExtMem.h.
#define NVTX_MEM_REGION_REF_TYPE_POINTER 0x1 |
Definition at line 412 of file nvToolsExtMem.h.
#define NVTX_MEM_REGION_REF_TYPE_UNKNOWN 0x0 |
Definition at line 411 of file nvToolsExtMem.h.
#define NVTX_MEM_TYPE_VIRTUAL_ADDRESS 0x1 |
Standard process userspace virtual addresses for linear allocations.
APIs that map into this space, such as CUDA UVA should use this type.
Relevant functions: cudaMalloc, cudaMallocManaged, cudaHostAlloc, cudaMallocHost NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is supported
nvtxMemHeapRegister receives a heapDesc of type nvtxMemVirtualRangeDesc_t
Definition at line 147 of file nvToolsExtMem.h.
typedef nvtxMemHeapDesc_v1 nvtxMemHeapDesc_t |
Definition at line 297 of file nvToolsExtMem.h.
typedef nvtxMemHeap_t* nvtxMemHeapHandle_t |
A handle returned by a tool to represent a memory heap.
Definition at line 173 of file nvToolsExtMem.h.
Definition at line 539 of file nvToolsExtMem.h.
Definition at line 521 of file nvToolsExtMem.h.
typedef nvtxMemPermissions_t* nvtxMemPermissionsHandle_t |
A handle returned by a tool to represent a memory permissions mask.
Definition at line 204 of file nvToolsExtMem.h.
typedef nvtxMemRegion_t* nvtxMemRegionHandle_t |
A handle returned by a tool to represent a memory region.
Definition at line 184 of file nvToolsExtMem.h.
Definition at line 468 of file nvToolsExtMem.h.
Definition at line 483 of file nvToolsExtMem.h.
Definition at line 363 of file nvToolsExtMem.h.
Definition at line 402 of file nvToolsExtMem.h.
Definition at line 445 of file nvToolsExtMem.h.
Definition at line 212 of file nvToolsExtMem.h.
NVTX_DECLSPEC nvtxMemHeapHandle_t NVTX_API nvtxMemHeapRegister | ( | nvtxDomainHandle_t | domain, |
nvtxMemHeapDesc_t const * | desc | ||
) |
Create a memory heap to represent a object or range of memory that will be further sub-divided into regions.
The handle used to address the heap will depend on the heap's type. Where the heap is virtual memory accessible, the address of the heap's memory itself is its handle. This will likewise be returned from the function.
For more advanced types, where the heap is not virtual memory accessible the tools may be responsible for returning a void const * that that uniquely identifies the object. Please see the description of each heap type for more details on whether this is expected to be a uniquely generated by the tool or otherwise.
NVTX_DECLSPEC void NVTX_API nvtxMemHeapReset | ( | nvtxDomainHandle_t | domain, |
nvtxMemHeapHandle_t | heap | ||
) |
Reset the memory heap wipes out any changes, as if it were a fresh heap.
This includes invalidating all regions and their handles.
NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsBind | ( | nvtxDomainHandle_t | domain, |
nvtxMemPermissionsHandle_t | permissions, | ||
uint32_t | bindScope, | ||
uint32_t | bindFlags | ||
) |
Bind the permissions object into a particular scope on the caller thread.
Permissions do not take affect until binding. Binding permissions is a thread local activity that overrides global behaviors. This is to avoid multi-threaded race conditions,
The scope dictates what type of processing it applies to, and when in some cases. EX1: NVTX_MEM_PERMISSIONS_BIND_SCOPE_CPU_THREAD applies to CPU code accessing memory while bound. EX2: NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM applies to CUDA streams, and the permissions must be recorded and applied when the work in the stream dequeues to executes. In this case it could be GPU or CPU, if the tool support both.
Bind can be called again on the same object and thread to take any updates to the specified permission object or the inherited properties.
Bind flags support changing how the binding process inherits region access control. In the case of thread scope this is NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE and from CUDA_STREAM this is nvtxMemCudaGetDevicePermissions. Choosing stricter modes allows the user to further reduce the access with less work, since memory by default, behaves as natural until the NVTX annotations instructs a tool to treat it anther way. See strict flags for more details.
Also see nvtxMemPermissionsUnbind
NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemPermissionsCreate | ( | nvtxDomainHandle_t | domain, |
int32_t | creationflags | ||
) |
Create a permissions object for fine grain thread-local control in multi-threading scenarios.
Unlike the global permissions object (NVTX_MEM_PERMISSIONS_HANDLE_PROCESS_WIDE), a new permissions object is empty. There are no regions registered to it, so more memory is accessible if bound(bind) without calls to nvtxMemPermissionsSetAccess* first. The permissions are not active until nvtxMemPermissionsBind. See nvtxMemPermissionsBind
for more details.
Use the flags NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_* to control how the regions in this permission object will interact with global permissions when bound. You may choose to either replace global memory regions setting or overlay on top of them. The most common uses are as follows:
NVTX_MEM_PERMISSIONS_CREATE_FLAGS_EXCLUDE_GLOBAL_WRITE
Also see nvtxMemPermissionsBind
& nvtxMemPermissionsSetAccess*
.
NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsDestroy | ( | nvtxDomainHandle_t | domain, |
nvtxMemPermissionsHandle_t | permissionsHandle | ||
) |
Destroy the permissions object.
If bound(bind), destroy will also unbind it.
NVTX_DECLSPEC void NVTX_API nvtxMemPermissionsUnbind | ( | nvtxDomainHandle_t | domain, |
uint32_t | bindScope | ||
) |
Unbind the permissions object bound to the caller thread.
Upon unbind, the thread local permissions for a scope are restored to the default behavior defined by the scope.
NVTX_DECLSPEC void NVTX_API nvtxMemRegionsUnregister | ( | nvtxDomainHandle_t | domain, |
nvtxMemRegionsUnregisterBatch_t const * | desc | ||
) |
Unregistration for regions of process virtual memory.
This is not necessary if the nvtx heap destroy function has been called that contains this object.