Coverage for cuda / core / _memory / _memory_pool.pyx: 69.06%
181 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from __future__ import annotations
7from libc.limits cimport ULLONG_MAX
8from libc.stdint cimport uintptr_t
9from libc.string cimport memset
10from cpython.mem cimport PyMem_Malloc, PyMem_Free
12from cuda.bindings cimport cydriver
13from cuda.core._memory._buffer cimport Buffer, Buffer_from_deviceptr_handle, MemoryResource
14from cuda.core._memory cimport _ipc
15from cuda.core._stream cimport default_stream, Stream_accept, Stream
16from cuda.core._resource_handles cimport (
17 MemoryPoolHandle,
18 DevicePtrHandle,
19 create_mempool_handle,
20 create_mempool_handle_ref,
21 get_device_mempool,
22 deviceptr_alloc_from_pool,
23 as_cu,
24 as_py,
25)
27from cuda.core._utils.cuda_utils cimport (
28 HANDLE_RETURN,
29)
31import platform # no-cython-lint
33from cuda.core._utils.cuda_utils import driver
36cdef class _MemPoolOptions:
38 def __cinit__(self):
39 self._ipc_enabled = False 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
40 self._max_size = 0 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
41 self._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_INVALID 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
42 self._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_INVALID 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
43 self._use_current = True 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
46cdef class _MemPoolAttributes:
47 """Provides access to memory pool attributes."""
49 def __init__(self, *args, **kwargs):
50 raise RuntimeError("_MemPoolAttributes cannot be instantiated directly. Please use MemoryResource APIs.")
52 @staticmethod
53 cdef _MemPoolAttributes _init(MemoryPoolHandle h_pool):
54 cdef _MemPoolAttributes self = _MemPoolAttributes.__new__(_MemPoolAttributes) 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
55 self._h_pool = h_pool 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
56 return self 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
58 def __repr__(self):
59 return f"{self.__class__.__name__}(%s)" % ", ".join( 1acb
60 f"{attr}={getattr(self, attr)}" for attr in dir(self) 1acb
61 if not attr.startswith("_") 1acb
62 )
64 cdef int _getattribute(self, cydriver.CUmemPool_attribute attr_enum, void* value) except?-1:
65 with nogil: 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
66 HANDLE_RETURN(cydriver.cuMemPoolGetAttribute(as_cu(self._h_pool), attr_enum, value)) 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
67 return 0 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
69 @property
70 def reuse_follow_event_dependencies(self):
71 """Allow memory to be reused when there are event dependencies between streams."""
72 cdef int value
73 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES, &value) 1+J@/Pacb
74 return bool(value) 1+J@/Pacb
76 @property
77 def reuse_allow_opportunistic(self):
78 """Allow reuse of completed frees without dependencies."""
79 cdef int value
80 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC, &value) 1*I?.Oacb
81 return bool(value) 1*I?.Oacb
83 @property
84 def reuse_allow_internal_dependencies(self):
85 """Allow insertion of new stream dependencies for memory reuse."""
86 cdef int value
87 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES, &value) 1)H=-Nacb
88 return bool(value) 1)H=-Nacb
90 @property
91 def release_threshold(self):
92 """Amount of reserved memory to hold before OS release."""
93 cdef cydriver.cuuint64_t value
94 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD, &value) 1(F;,Lacb
95 return int(value) 1(F;,Lacb
97 @property
98 def reserved_mem_current(self):
99 """Current amount of backing memory allocated."""
100 cdef cydriver.cuuint64_t value
101 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT, &value) 1hdGxlWjfMzacb
102 return int(value) 1hdGxlWjfMzacb
104 @property
105 def reserved_mem_high(self):
106 """High watermark of backing memory allocated."""
107 cdef cydriver.cuuint64_t value
108 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH, &value) 1GxWMzacb
109 return int(value) 1GxWMzacb
111 @property
112 def used_mem_current(self):
113 """Current amount of memory in use."""
114 cdef cydriver.cuuint64_t value
115 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_USED_MEM_CURRENT, &value) 2[bi e K y m X k g Q A a c b
116 return int(value) 1ieKymXkgQAacb
118 @property
119 def used_mem_high(self):
120 """High watermark of memory in use."""
121 cdef cydriver.cuuint64_t value
122 self._getattribute(cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_USED_MEM_HIGH, &value) 1KyXQAY:Zacb
123 return int(value) 1KyXQAY:Zacb
126cdef class _MemPool(MemoryResource):
128 def __cinit__(self):
129 self._dev_id = cydriver.CU_DEVICE_INVALID 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
130 self._mempool_owned = False 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
131 self._ipc_data = None 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
132 self._attributes = None 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
133 self._peer_accessible_by = () 2[b1 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
135 def __init__(self, int device_id, _MemPoolOptions opts):
136 if opts._use_current: 21 2 3 4 5 6 7 8 9 ! # 0b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r } ~ ubvbT U D $ % V ' E ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 R S n B [ _ | ` { wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
137 _MP_init_current(self, device_id, opts) 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U $ V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
138 else:
139 _MP_init_create(self, device_id, opts) 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
141 def __dealloc__(self):
142 _MP_close(self) 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
144 def close(self):
145 """
146 Close the device memory resource and destroy the associated memory pool
147 if owned.
148 """
149 _MP_close(self) 1Y:ZnB_|`{
151 def allocate(self, size_t size, stream: Stream | GraphBuilder | None = None) -> Buffer:
152 """Allocate a buffer of the requested size.
154 Parameters
155 ----------
156 size : int
157 The size of the buffer to allocate, in bytes.
158 stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`, optional
159 The stream on which to perform the allocation asynchronously.
160 If None, an internal stream is used.
162 Returns
163 -------
164 Buffer
165 The allocated buffer object, which is accessible on the device that this memory
166 resource was created for.
167 """
168 if self.is_mapped: 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
169 raise TypeError("Cannot allocate from a mapped IPC-enabled memory resource")
170 stream = Stream_accept(stream) if stream is not None else default_stream() 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
171 return _MP_allocate(self, size, <Stream> stream) 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
173 def deallocate(self, ptr: DevicePointerT, size_t size, stream: Stream | GraphBuilder | None = None):
174 """Deallocate a buffer previously allocated by this resource.
176 Parameters
177 ----------
178 ptr : :obj:`~_memory.DevicePointerT`
179 The pointer or handle to the buffer to deallocate.
180 size : int
181 The size of the buffer to deallocate, in bytes.
182 stream : :obj:`~_stream.Stream` | :obj:`~_graph.GraphBuilder`, optional
183 The stream on which to perform the deallocation asynchronously.
184 If the buffer is deallocated without an explicit stream, the allocation stream
185 is used.
186 """
187 stream = Stream_accept(stream) if stream is not None else default_stream()
188 _MP_deallocate(self, <uintptr_t>ptr, size, <Stream> stream)
190 @property
191 def attributes(self) -> _MemPoolAttributes:
192 """Memory pool attributes."""
193 if self._attributes is None: 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
194 self._attributes = _MemPoolAttributes._init(self._h_pool) 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
195 return self._attributes 1(FhdGx)H*I+JieKy;lW=?@mX,LjfMz-N.O/PkgQAY:Zacb
197 @property
198 def device_id(self) -> int:
199 """The associated device ordinal."""
200 return self._dev_id 24 5 6 7 8 9 ! # ?co p ^bs _b`b{b|b}b~b] ^ acbc]bt u v w q r } ~ T U D V ' E R S n B _ ` { abbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbCcsbDctb
202 @property
203 def handle(self) -> object:
204 """Handle to the underlying memory pool."""
205 return as_py(self._h_pool)
207 @property
208 def is_handle_owned(self) -> bool:
209 """Whether the memory resource handle is owned. If False, ``close`` has no effect."""
210 return self._mempool_owned
212 @property
213 def peer_accessible_by(self):
214 """
215 Get or set the devices that can access allocations from this memory
216 pool. Access can be modified at any time and affects all allocations
217 from this memory pool.
219 Returns a tuple of sorted device IDs that currently have peer access to
220 allocations from this memory pool.
222 When setting, accepts a sequence of Device objects or device IDs.
223 Setting to an empty sequence revokes all peer access.
225 Examples
226 --------
227 >>> dmr = DeviceMemoryResource(0)
228 >>> dmr.peer_accessible_by = [1] # Grant access to device 1
229 >>> assert dmr.peer_accessible_by == (1,)
230 >>> dmr.peer_accessible_by = [] # Revoke access
231 """
232 return self._peer_accessible_by
234 @peer_accessible_by.setter
235 def peer_accessible_by(self, devices):
236 """Set which devices can access this memory pool."""
237 from .._device import Device
239 # Convert all devices to device IDs
240 cdef set[int] target_ids = {Device(dev).device_id for dev in devices}
241 target_ids.discard(self._dev_id) # exclude this device from peer access list
242 this_dev = Device(self._dev_id)
243 cdef list bad = [dev for dev in target_ids if not this_dev.can_access_peer(dev)]
244 if bad:
245 raise ValueError(f"Device {self._dev_id} cannot access peer(s): {', '.join(map(str, bad))}")
246 cdef set[int] cur_ids = set(self._peer_accessible_by)
247 cdef set[int] to_add = target_ids - cur_ids
248 cdef set[int] to_rm = cur_ids - target_ids
249 cdef size_t count = len(to_add) + len(to_rm) # transaction size
250 cdef cydriver.CUmemAccessDesc* access_desc = NULL
251 cdef size_t i = 0
253 if count > 0:
254 access_desc = <cydriver.CUmemAccessDesc*>PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
255 if access_desc == NULL:
256 raise MemoryError("Failed to allocate memory for access descriptors")
258 try:
259 for dev_id in to_add:
260 access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
261 access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
262 access_desc[i].location.id = dev_id
263 i += 1
265 for dev_id in to_rm:
266 access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
267 access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
268 access_desc[i].location.id = dev_id
269 i += 1
271 with nogil:
272 HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self._h_pool), access_desc, count))
273 finally:
274 if access_desc != NULL:
275 PyMem_Free(access_desc)
277 self._peer_accessible_by = tuple(target_ids)
279 @property
280 def is_ipc_enabled(self) -> bool:
281 """Whether this memory resource has IPC enabled."""
282 return self._ipc_data is not None 2?c0b@c1bo 2bp 3b^b4bdc5bs 6bC 7b_b8bec9b`b!bfc#b{b$bgc%bhc'bic(b|b)bjc*b}b+bkc,b~b-blc.b] ^ ac/bmc:bbc;bnc=b]b?bcc@bt u v w q r T U D % ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A 0 S n B _ | ` {
284 @property
285 def is_mapped(self) -> bool:
286 """
287 Whether this is a mapping of an IPC-enabled memory resource from
288 another process. If True, allocation is not permitted.
289 """
290 return self._ipc_data is not None and self._ipc_data._is_mapped 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
292 @property
293 def uuid(self) -> uuid.UUID | None:
294 """
295 A universally unique identifier for this memory resource. Meaningful
296 only for IPC-enabled memory resources.
297 """
298 return getattr(self._ipc_data, 'uuid', None) 2o p ]bccq r
301# _MemPool Implementation
302# -----------------------
304cdef int _MP_init_current(_MemPool self, int dev_id, _MemPoolOptions opts) except?-1:
305 # Get the current memory pool.
306 cdef cydriver.cuuint64_t current_threshold
307 cdef cydriver.cuuint64_t max_threshold = ULLONG_MAX 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U $ V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
308 cdef cydriver.CUmemLocation loc
309 cdef cydriver.CUmemoryPool pool
311 self._dev_id = dev_id 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U $ V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
312 self._mempool_owned = False 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U $ V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
314 if opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \ 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U $ V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
315 and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE: 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
316 assert dev_id >= 0 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
317 self._h_pool = get_device_mempool(dev_id) 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
319 # Set a higher release threshold to improve performance when there are
320 # no active allocations. By default, the release threshold is 0, which
321 # means memory is immediately released back to the OS when there are no
322 # active suballocations, causing performance issues.
323 with nogil: 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
324 HANDLE_RETURN( 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
325 cydriver.cuMemPoolGetAttribute( 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
326 as_cu(self._h_pool),
327 cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
328 ¤t_threshold
329 )
330 )
331 if current_threshold == 0: 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U V [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
332 HANDLE_RETURN(cydriver.cuMemPoolSetAttribute( 1V
333 as_cu(self._h_pool),
334 cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
335 &max_threshold
336 ))
337 elif opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \ 1$ER
338 and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST: 1ER
339 IF CUDA_CORE_BUILD_MAJOR >= 13:
340 assert dev_id == -1 1ER
341 loc.id = dev_id 1ER
342 loc.type = opts._location 1ER
343 with nogil: 1ER
344 HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type)) 1ER
345 self._h_pool = create_mempool_handle_ref(pool) 1ER
346 ELSE:
347 raise RuntimeError("not supported")
348 elif opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \ 1$
349 and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA:
350 IF CUDA_CORE_BUILD_MAJOR >= 13:
351 assert dev_id == 0
352 loc.id = 0
353 loc.type = opts._location
354 with nogil:
355 HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type))
356 self._h_pool = create_mempool_handle_ref(pool)
357 ELSE:
358 raise RuntimeError("not supported")
359 else:
360 IF CUDA_CORE_BUILD_MAJOR >= 13:
361 if opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED: 1$
362 # Managed memory pools
363 loc.id = dev_id 1$
364 loc.type = opts._location 1$
365 with nogil: 1$
366 HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type)) 1$
367 self._h_pool = create_mempool_handle_ref(pool) 1$
368 else:
369 assert False
370 ELSE:
371 assert False
373 return 0 21 2 3 4 5 6 7 8 9 ! # } ~ ubvbT U $ V E R [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbWbXbYbsbZbtb
376cdef int _MP_init_create(_MemPool self, int dev_id, _MemPoolOptions opts) except?-1:
377 cdef cydriver.CUmemPoolProps properties
378 memset(&properties, 0, sizeof(cydriver.CUmemPoolProps)) 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
380 cdef bint ipc_enabled = opts._ipc_enabled 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
381 properties.allocType = opts._type 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
382 properties.handleTypes = _ipc.IPC_HANDLE_TYPE if ipc_enabled else cydriver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_NONE 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
383 properties.location.id = dev_id 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
384 properties.location.type = opts._location 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
385 # managed memory does not support maxSize as of CUDA 13.0
386 IF CUDA_CORE_BUILD_MAJOR >= 13:
387 if properties.allocType != cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED: 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
388 properties.maxSize = opts._max_size 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D ( F h d G x ) H * I + J i e K y , L j f M z - N . O / P k g Q A Y Z a b 0 S n B _ | ` {
389 ELSE:
390 properties.maxSize = opts._max_size
392 self._dev_id = dev_id 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
393 self._mempool_owned = True 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
395 self._h_pool = create_mempool_handle(properties) 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
397 if ipc_enabled: 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
398 alloc_handle = _ipc.MP_export_mempool(self) 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r F d x H I J e y L f z N O P g A n _ | ` {
399 self._ipc_data = _ipc.IPCDataForMR(alloc_handle, False) 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r F d x H I J e y L f z N O P g A n _ | ` {
401 return 0 20b1bo 2bp 3b4b5bs 6bC 7b8b9b!b#b$b%b'b(b)b*b+b,b-b.b] ^ /b:b;b=b?b@bt u v w q r D % ' ( F h d G x ) H * I + J i e K y ; l W = ? @ m X , L j f M z - N . O / P k g Q A Y : Z a c b 0 S n B _ | ` {
404# Raise an exception if the given stream is capturing.
405# A result of CU_STREAM_CAPTURE_STATUS_INVALIDATED is considered an error.
406cdef inline int check_not_capturing(cydriver.CUstream s) except?-1 nogil:
407 cdef cydriver.CUstreamCaptureStatus capturing
408 HANDLE_RETURN(cydriver.cuStreamIsCapturing(s, &capturing)) 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
409 if capturing != cydriver.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_NONE: 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
410 raise RuntimeError("_MemPool cannot perform memory operations on " 1123
411 "a capturing stream (consider using GraphMemoryResource).")
414cdef inline Buffer _MP_allocate(_MemPool self, size_t size, Stream stream):
415 cdef cydriver.CUstream s = as_cu(stream._h_stream) 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
416 cdef DevicePtrHandle h_ptr
417 with nogil: 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
418 check_not_capturing(s) 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
419 h_ptr = deviceptr_alloc_from_pool(size, self._h_pool, stream._h_stream) 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
420 if not h_ptr: 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
421 raise RuntimeError("Failed to allocate memory from pool")
422 return Buffer_from_deviceptr_handle(h_ptr, size, self, None) 21 2 3 4 5 6 7 8 9 ! # o p ^bdcs C _bec`bfc{bgchcic|bjc}bkc~blc] ^ acmcbcnc]bcct u v w q r } ~ ubvbT U D $ % V ' E h d i e l m j f k g a c b 0 R S n B [ wbxbybzbAbBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbabbboccbdbebpcqcfbgbhbrcibjbsckblbmbtcucvcnbwcxcobyczcpbAcqbBcrbWbXbYbCcsbZbDctb
425cdef inline void _MP_deallocate(
426 _MemPool self, uintptr_t ptr, size_t size, Stream stream
427) noexcept nogil:
428 cdef cydriver.CUstream s = as_cu(stream._h_stream)
429 cdef cydriver.CUdeviceptr devptr = <cydriver.CUdeviceptr>ptr
430 cdef cydriver.CUresult r
431 with nogil:
432 r = cydriver.cuMemFreeAsync(devptr, s)
433 if r != cydriver.CUDA_ERROR_INVALID_CONTEXT:
434 HANDLE_RETURN(r)
437cdef inline _MP_close(_MemPool self):
438 if not self._h_pool: 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
439 return 1Y:ZnB_|`{
441 # This works around nvbug 5698116. When a memory pool handle is recycled
442 # the new handle inherits the peer access state of the previous handle.
443 if self._peer_accessible_by: 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
444 self.peer_accessible_by = []
446 # Reset members in declaration order.
447 # The RAII deleter handles nvbug 5698116 workaround (clears peer access)
448 # and calls cuMemPoolDestroy if this is an owning handle.
449 self._h_pool.reset() 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
450 self._dev_id = cydriver.CU_DEVICE_INVALID 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
451 self._mempool_owned = False 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
452 self._ipc_data = None 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
453 self._attributes = None 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c
454 self._peer_accessible_by = () 21 2 3 4 5 6 7 8 9 ! # o p s C t u v w q r EcFcGcT U D $ % V ' E ( F h d G x ) HcH * I + J i e K y ; l W = ? @ m X , L j f M Icz - N . O / P k g Q A Y : Z a c b 0 JcR S n B [ Kc_ | ` { LcMcNcOcPcQcRcScTcUcVcWcXcYcZc0c1c2c3c4c5c6c7c8c9c!c#c$c%c'c(c)c*c+c,c-c.c/c:c;c=c