Skip to content

Commit 10cf241

Browse files
Copilotleofang
andcommitted
Add additional performance optimizations to cythonized Buffer
Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
1 parent 4fb04c5 commit 10cf241

File tree

1 file changed

+21
-12
lines changed

1 file changed

+21
-12
lines changed

cuda_core/cuda/core/experimental/_memory.pyx

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -101,22 +101,25 @@ cdef class Buffer:
101101
@property
102102
def is_device_accessible(self) -> bool:
103103
"""Return True if this buffer can be accessed by the GPU, otherwise False."""
104-
if self._mnff.mr is not None:
105-
return self._mnff.mr.is_device_accessible
104+
cdef object mr = self._mnff.mr
105+
if mr is not None:
106+
return mr.is_device_accessible
106107
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
107108

108109
@property
109110
def is_host_accessible(self) -> bool:
110111
"""Return True if this buffer can be accessed by the CPU, otherwise False."""
111-
if self._mnff.mr is not None:
112-
return self._mnff.mr.is_host_accessible
112+
cdef object mr = self._mnff.mr
113+
if mr is not None:
114+
return mr.is_host_accessible
113115
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
114116

115117
@property
116118
def device_id(self) -> int:
117119
"""Return the device ordinal of this buffer."""
118-
if self._mnff.mr is not None:
119-
return self._mnff.mr.device_id
120+
cdef object mr = self._mnff.mr
121+
if mr is not None:
122+
return mr.device_id
120123
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
121124

122125
def copy_to(self, dst: Buffer = None, *, stream: Stream) -> Buffer:
@@ -141,11 +144,14 @@ cdef class Buffer:
141144
if self._mnff.mr is None:
142145
raise ValueError("a destination buffer must be provided (this buffer does not have a memory_resource)")
143146
dst = self._mnff.mr.allocate(self._mnff.size, stream)
144-
if dst._mnff.size != self._mnff.size:
147+
148+
cdef int src_size = self._mnff.size
149+
cdef int dst_size = dst._mnff.size
150+
if dst_size != src_size:
145151
raise ValueError(
146-
f"buffer sizes mismatch between src and dst (sizes are: src={self._mnff.size}, dst={dst._mnff.size})"
152+
f"buffer sizes mismatch between src and dst (sizes are: src={src_size}, dst={dst_size})"
147153
)
148-
handle_return(driver.cuMemcpyAsync(dst._mnff.ptr, self._mnff.ptr, self._mnff.size, stream.handle))
154+
handle_return(driver.cuMemcpyAsync(dst._mnff.ptr, self._mnff.ptr, src_size, stream.handle))
149155
return dst
150156

151157
def copy_from(self, src: Buffer, *, stream: Stream):
@@ -162,11 +168,14 @@ cdef class Buffer:
162168
"""
163169
if stream is None:
164170
raise ValueError("stream must be provided")
165-
if src._mnff.size != self._mnff.size:
171+
172+
cdef int src_size = src._mnff.size
173+
cdef int dst_size = self._mnff.size
174+
if src_size != dst_size:
166175
raise ValueError(
167-
f"buffer sizes mismatch between src and dst (sizes are: src={src._mnff.size}, dst={self._mnff.size})"
176+
f"buffer sizes mismatch between src and dst (sizes are: src={src_size}, dst={dst_size})"
168177
)
169-
handle_return(driver.cuMemcpyAsync(self._mnff.ptr, src._mnff.ptr, self._mnff.size, stream.handle))
178+
handle_return(driver.cuMemcpyAsync(self._mnff.ptr, src._mnff.ptr, src_size, stream.handle))
170179

171180
def __dlpack__(
172181
self,

0 commit comments

Comments
 (0)