NVTX/doxygen/nv_tools_ext_mem_cuda_rt_8h_source.html

/*

 * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 *

 * Licensed under the Apache License v2.0 with LLVM Exceptions.

 * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.

 */


#if defined(NVTX_AS_SYSTEM_HEADER)

#if defined(__clang__)

#pragma clang system_header

#elif defined(__GNUC__) || defined(__NVCOMPILER)

#pragma GCC system_header

#elif defined(_MSC_VER)

#pragma system_header

#endif

#endif


#include "nvToolsExtMem.h"


#include "cuda.h"

#include "cuda_runtime.h"


#ifdef __cplusplus

extern "C" {

#endif /* __cplusplus */


#ifndef NVTX_MEM_CUDART_CONTENTS_V1

#define NVTX_MEM_CUDART_CONTENTS_V1


#define NVTX_MEM_TYPE_CUDA_ARRAY 0x11


typedef struct nvtxMemCudaArrayRangeDesc_v1

{

    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */

    uint16_t structSize; /* Size of the structure. */

    uint32_t reserved0;

    cudaArray_t  src;

    size_t offset[3];

    size_t extent[3];

} nvtxMemCudaArrayRangeDesc_v1;


typedef nvtxMemCudaArrayRangeDesc_v1 nvtxMemCudaArrayRangeDesc_t;


#define NVTX_MEM_TYPE_CU_ARRAY 0x12


typedef struct nvtxMemCuArrayRangeDesc_v1

{

    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */

    uint16_t structSize; /* Size of the structure. */

    uint32_t reserved0;

    CUarray  src;

    size_t offset[3];

    size_t extent[3];

} nvtxMemCuArrayRangeDesc_v1;


typedef nvtxMemCuArrayRangeDesc_v1 nvtxMemCuArrayRangeDesc_t;


/* Reserving 0x2-0xF for more common types */


#define NVTX_MEM_CUDA_PEER_ALL_DEVICES -1


NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetProcessWidePermissions(

    nvtxDomainHandle_t domain);


NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetDeviceWidePermissions(

    nvtxDomainHandle_t domain,

    int device);


NVTX_DECLSPEC void NVTX_API nvtxMemCudaSetPeerAccess(

    nvtxDomainHandle_t domain,

    nvtxMemPermissionsHandle_t permissions,

    int devicePeer, /* device number such as from cudaGetDevice() or NVTX_MEM_CUDA_PEER_ALL_DEVICES */

    uint32_t flags); /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */


typedef struct nvtxMemMarkInitializedBatch_v1

{

    uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */

    uint16_t structSize; /* Size of the structure. */


    uint32_t regionType; /* NVTX_MEM_TYPE_* */


    size_t regionDescCount;

    size_t regionDescElementSize;

    void const* regionDescElements; /* this will also become the handle for this region */


} nvtxMemMarkInitializedBatch_v1;


typedef nvtxMemMarkInitializedBatch_v1 nvtxMemMarkInitializedBatch_t;


NVTX_DECLSPEC void NVTX_API nvtxMemCudaMarkInitialized(

    nvtxDomainHandle_t domain,

    cudaStream_t stream,

    uint8_t isPerThreadStream, /* 0 for false, otherwise true */

    nvtxMemMarkInitializedBatch_t const* desc);


#endif /* NVTX_MEM_CUDART_CONTENTS_V1 */


#ifdef __GNUC__

#pragma GCC visibility push(internal)

#endif


#ifndef NVTX_NO_IMPL

#define NVTX_EXT_IMPL_MEM_CUDART_GUARD /* Ensure other headers cannot be included directly */

#include "nvtxDetail/nvtxExtImplMemCudaRt_v1.h"

#undef NVTX_EXT_IMPL_MEM_CUDART_GUARD

#endif /*NVTX_NO_IMPL*/


#ifdef __GNUC__

#pragma GCC visibility pop

#endif


#ifdef __cplusplus

}

#endif /* __cplusplus */

nvtxMemCudaMarkInitialized
NVTX_DECLSPEC void NVTX_API nvtxMemCudaMarkInitialized(nvtxDomainHandle_t domain, cudaStream_t stream, uint8_t isPerThreadStream, nvtxMemMarkInitializedBatch_t const *desc)
Register a region of memory inside of a heap of linear process virtual memory.

nvtxMemCudaGetDeviceWidePermissions
NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetDeviceWidePermissions(nvtxDomainHandle_t domain, int device)
Get the permission object that represent the CUDA runtime device or cuda driver context.

nvtxMemCudaSetPeerAccess
NVTX_DECLSPEC void NVTX_API nvtxMemCudaSetPeerAccess(nvtxDomainHandle_t domain, nvtxMemPermissionsHandle_t permissions, int devicePeer, uint32_t flags)
Change the default behavior for all memory mapped in from a particular device.

nvtxMemCudaGetProcessWidePermissions
NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetProcessWidePermissions(nvtxDomainHandle_t domain)
Get the permission object that represent the CUDA runtime device or cuda driver context.

nvtxMemPermissionsHandle_t
nvtxMemPermissions_t * nvtxMemPermissionsHandle_t
A handle returned by a tool to represent a memory permissions mask.
Definition nvToolsExtMem.h:204

nvtxMemCuArrayRangeDesc_v1
structure to describe memory in a CUDA array object
Definition nvToolsExtMemCudaRt.h:89

nvtxMemCudaArrayRangeDesc_v1
structure to describe memory in a CUDA array object
Definition nvToolsExtMemCudaRt.h:63

nvtxMemMarkInitializedBatch_v1
Mark memory ranges as initialized.
Definition nvToolsExtMemCudaRt.h:173