Skip to content

Commit bf590e4

Browse files
leofangkkraus14
andauthored
Make a few memory management objects public + Miscellaneous doc updates (#693)
* make Buffer, DeviceMemoryResource, LegacyPinnedMemoryResource, MemoryResource public APIs * add docs * enchanced typing in buffer, MR, Stream, ... * more docs * fix buffer destructor; highlight each MR needs to clarify the handling of stream=None; more docs * fix create_stream docstring * narrower error * fix typo Co-authored-by: Keith Kraus <keith.j.kraus@gmail.com> * reduce the applicability of __cuda_stream__ --------- Co-authored-by: Keith Kraus <keith.j.kraus@gmail.com>
1 parent 314060c commit bf590e4

File tree

19 files changed

+360
-143
lines changed

19 files changed

+360
-143
lines changed

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from cuda.core.experimental._launch_config import LaunchConfig
1515
from cuda.core.experimental._launcher import launch
1616
from cuda.core.experimental._linker import Linker, LinkerOptions
17+
from cuda.core.experimental._memory import Buffer, DeviceMemoryResource, LegacyPinnedMemoryResource, MemoryResource
1718
from cuda.core.experimental._module import ObjectCode
1819
from cuda.core.experimental._program import Program, ProgramOptions
1920
from cuda.core.experimental._stream import Stream, StreamOptions

cuda_core/cuda/core/experimental/_device.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from cuda.core.experimental._context import Context, ContextOptions
99
from cuda.core.experimental._event import Event, EventOptions
1010
from cuda.core.experimental._graph import GraphBuilder
11-
from cuda.core.experimental._memory import Buffer, MemoryResource, _DefaultAsyncMempool, _SynchronousMemoryResource
12-
from cuda.core.experimental._stream import Stream, StreamOptions, default_stream
11+
from cuda.core.experimental._memory import Buffer, DeviceMemoryResource, MemoryResource, _SynchronousMemoryResource
12+
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions, default_stream
1313
from cuda.core.experimental._utils.clear_error_support import assert_type
1414
from cuda.core.experimental._utils.cuda_utils import (
1515
ComputeCapability,
@@ -1004,7 +1004,7 @@ def __new__(cls, device_id: Optional[int] = None):
10041004
)
10051005
)
10061006
) == 1:
1007-
dev._mr = _DefaultAsyncMempool(dev_id)
1007+
dev._mr = DeviceMemoryResource(dev_id)
10081008
else:
10091009
dev._mr = _SynchronousMemoryResource(dev_id)
10101010

@@ -1207,13 +1207,13 @@ def create_context(self, options: ContextOptions = None) -> Context:
12071207
raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
12081208

12091209
@precondition(_check_context_initialized)
1210-
def create_stream(self, obj=None, options: StreamOptions = None) -> Stream:
1210+
def create_stream(self, obj: Optional[IsStreamT] = None, options: StreamOptions = None) -> Stream:
12111211
"""Create a Stream object.
12121212
12131213
New stream objects can be created in two different ways:
12141214
1215-
1) Create a new CUDA stream with customizable `options`.
1216-
2) Wrap an existing foreign `obj` supporting the __cuda_stream__ protocol.
1215+
1) Create a new CUDA stream with customizable ``options``.
1216+
2) Wrap an existing foreign `obj` supporting the ``__cuda_stream__`` protocol.
12171217
12181218
Option (2) internally holds a reference to the foreign object
12191219
such that the lifetime is managed.
@@ -1224,8 +1224,8 @@ def create_stream(self, obj=None, options: StreamOptions = None) -> Stream:
12241224
12251225
Parameters
12261226
----------
1227-
obj : Any, optional
1228-
Any object supporting the __cuda_stream__ protocol.
1227+
obj : :obj:`~_stream.IsStreamT`, optional
1228+
Any object supporting the ``__cuda_stream__`` protocol.
12291229
options : :obj:`~_stream.StreamOptions`, optional
12301230
Customizable dataclass for stream creation options.
12311231
@@ -1259,7 +1259,7 @@ def create_event(self, options: Optional[EventOptions] = None) -> Event:
12591259
return Event._init(self._id, self.context._handle, options)
12601260

12611261
@precondition(_check_context_initialized)
1262-
def allocate(self, size, stream=None) -> Buffer:
1262+
def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
12631263
"""Allocate device memory from a specified stream.
12641264
12651265
Allocates device memory of `size` bytes on the specified `stream`

cuda_core/cuda/core/experimental/_launch_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class LaunchConfig:
5050
shmem_size : int, optional
5151
Dynamic shared-memory size per thread block in bytes.
5252
(Default to size 0)
53-
53+
cooperative_launch : bool, optional
54+
Whether this config can be used to launch a cooperative kernel.
5455
"""
5556

5657
# TODO: expand LaunchConfig to include other attributes

cuda_core/cuda/core/experimental/_launcher.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
from typing import Union
56

67
from cuda.core.experimental._kernel_arg_handler import ParamHolder
78
from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
89
from cuda.core.experimental._module import Kernel
9-
from cuda.core.experimental._stream import Stream
10+
from cuda.core.experimental._stream import IsStreamT, Stream, _try_to_get_stream_ptr
1011
from cuda.core.experimental._utils.clear_error_support import assert_type
1112
from cuda.core.experimental._utils.cuda_utils import (
1213
_reduce_3_tuple,
@@ -34,7 +35,7 @@ def _lazy_init():
3435
_inited = True
3536

3637

37-
def launch(stream, config, kernel, *kernel_args):
38+
def launch(stream: Union[Stream, IsStreamT], config: LaunchConfig, kernel: Kernel, *kernel_args):
3839
"""Launches a :obj:`~_module.Kernel`
3940
object with launch-time configuration.
4041
@@ -43,7 +44,7 @@ def launch(stream, config, kernel, *kernel_args):
4344
stream : :obj:`~_stream.Stream`
4445
The stream establishing the stream ordering semantic of a
4546
launch.
46-
config : :obj:`~_launcher.LaunchConfig`
47+
config : :obj:`LaunchConfig`
4748
Launch configurations inline with options provided by
4849
:obj:`~_launcher.LaunchConfig` dataclass.
4950
kernel : :obj:`~_module.Kernel`
@@ -55,13 +56,15 @@ def launch(stream, config, kernel, *kernel_args):
5556
"""
5657
if stream is None:
5758
raise ValueError("stream cannot be None, stream must either be a Stream object or support __cuda_stream__")
58-
if not isinstance(stream, Stream):
59+
try:
60+
stream_handle = stream.handle
61+
except AttributeError:
5962
try:
60-
stream = Stream._init(stream)
61-
except Exception as e:
63+
stream_handle = _try_to_get_stream_ptr(stream)
64+
except Exception:
6265
raise ValueError(
6366
f"stream must either be a Stream object or support __cuda_stream__ (got {type(stream)})"
64-
) from e
67+
) from None
6568
assert_type(kernel, Kernel)
6669
_lazy_init()
6770
config = check_or_create_options(LaunchConfig, config, "launch config")
@@ -78,15 +81,15 @@ def launch(stream, config, kernel, *kernel_args):
7881
# rich.
7982
if _use_ex:
8083
drv_cfg = _to_native_launch_config(config)
81-
drv_cfg.hStream = stream.handle
84+
drv_cfg.hStream = stream_handle
8285
if config.cooperative_launch:
8386
_check_cooperative_launch(kernel, config, stream)
8487
handle_return(driver.cuLaunchKernelEx(drv_cfg, int(kernel._handle), args_ptr, 0))
8588
else:
8689
# TODO: check if config has any unsupported attrs
8790
handle_return(
8891
driver.cuLaunchKernel(
89-
int(kernel._handle), *config.grid, *config.block, config.shmem_size, stream.handle, args_ptr, 0
92+
int(kernel._handle), *config.grid, *config.block, config.shmem_size, stream_handle, args_ptr, 0
9093
)
9194
)
9295

0 commit comments

Comments
 (0)