Skip to content

Commit 1ac7d2c

Browse files
Expose ObjectCode as public API + prune unnecessary input arguments (#435)
1 parent 19563d5 commit 1ac7d2c

File tree

9 files changed

+92
-85
lines changed

9 files changed

+92
-85
lines changed

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from cuda.core.experimental._event import EventOptions
88
from cuda.core.experimental._launcher import LaunchConfig, launch
99
from cuda.core.experimental._linker import Linker, LinkerOptions
10+
from cuda.core.experimental._module import ObjectCode
1011
from cuda.core.experimental._program import Program, ProgramOptions
1112
from cuda.core.experimental._stream import Stream, StreamOptions
1213
from cuda.core.experimental._system import System

cuda_core/cuda/core/experimental/_event.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,7 @@ def close(self):
6565
__slots__ = ("__weakref__", "_mnff", "_timing_disabled", "_busy_waited")
6666

6767
def __init__(self):
68-
raise NotImplementedError(
69-
"directly creating an Event object can be ambiguous. Please call call Stream.record()."
70-
)
68+
raise NotImplementedError("directly creating an Event object can be ambiguous. Please call Stream.record().")
7169

7270
@staticmethod
7371
def _init(options: Optional[EventOptions] = None):

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def link(self, target_type) -> ObjectCode:
439439
addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle))
440440
code = (ctypes.c_char * size).from_address(addr)
441441

442-
return ObjectCode(bytes(code), target_type)
442+
return ObjectCode._init(bytes(code), target_type)
443443

444444
def get_error_log(self) -> str:
445445
"""Get the error log generated by the linker.

cuda_core/cuda/core/experimental/_module.py

Lines changed: 47 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

5-
5+
from typing import Optional, Union
66
from warnings import warn
77

88
from cuda.core.experimental._utils import driver, get_binding_version, handle_return, precondition
@@ -213,47 +213,42 @@ def attributes(self):
213213

214214

215215
class ObjectCode:
216-
"""Represent a compiled program that was loaded onto the device.
216+
"""Represent a compiled program to be loaded onto the device.
217217
218218
This object provides a unified interface for different types of
219-
compiled programs that are loaded onto the device.
219+
compiled programs that will be loaded onto the device.
220220
221-
Loads the module library with specified module code and JIT options.
221+
Note
222+
----
223+
This class has no default constructor. If you already have a cubin that you would
224+
like to load, use the :meth:`from_cubin` alternative constructor. For all other
225+
possible code types (ex: "ptx"), only :class:`~cuda.core.experimental.Program`
226+
accepts them and returns an :class:`ObjectCode` instance with its
227+
:meth:`~cuda.core.experimental.Program.compile` method.
222228
223229
Note
224230
----
225231
Usage under CUDA 11.x will only load to the current device
226232
context.
227-
228-
Parameters
229-
----------
230-
module : Union[bytes, str]
231-
Either a bytes object containing the module to load, or
232-
a file path string containing that module for loading.
233-
code_type : Any
234-
String of the compiled type.
235-
Supported options are "ptx", "cubin", "ltoir" and "fatbin".
236-
jit_options : Optional
237-
Mapping of JIT options to use during module loading.
238-
(Default to no options)
239-
symbol_mapping : Optional
240-
Keyword argument dictionary specifying how symbol names
241-
should be mapped before trying to retrieve them.
242-
(Default to no mappings)
243-
244233
"""
245234

246-
__slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map")
235+
__slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map")
247236
_supported_code_type = ("cubin", "ptx", "ltoir", "fatbin")
248237

249-
def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None):
250-
if code_type not in self._supported_code_type:
251-
raise ValueError
238+
def __init__(self):
239+
raise NotImplementedError(
240+
"directly creating an ObjectCode object can be ambiguous. Please either call Program.compile() "
241+
"or one of the ObjectCode.from_*() constructors"
242+
)
243+
244+
@staticmethod
245+
def _init(module, code_type, *, symbol_mapping: Optional[dict] = None):
246+
self = ObjectCode.__new__(ObjectCode)
247+
assert code_type in self._supported_code_type, f"{code_type=} is not supported"
252248
_lazy_init()
253249

254250
# handle is assigned during _lazy_load
255251
self._handle = None
256-
self._jit_options = jit_options
257252

258253
self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
259254
self._loader = _backend[self._backend_version]
@@ -262,42 +257,41 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None):
262257
self._module = module
263258
self._sym_map = {} if symbol_mapping is None else symbol_mapping
264259

260+
return self
261+
262+
@staticmethod
263+
def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
264+
"""Create an :class:`ObjectCode` instance from an existing cubin.
265+
266+
Parameters
267+
----------
268+
module : Union[bytes, str]
269+
Either a bytes object containing the in-memory cubin to load, or
270+
a file path string pointing to the on-disk cubin to load.
271+
symbol_mapping : Optional[dict]
272+
A dictionary specifying how the unmangled symbol names (as keys)
273+
should be mapped to the mangled names before trying to retrieve
274+
them (default to no mappings).
275+
"""
276+
return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping)
277+
265278
# TODO: do we want to unload in a finalizer? Probably not..
266279

267280
def _lazy_load_module(self, *args, **kwargs):
268281
if self._handle is not None:
269282
return
270-
jit_options = self._jit_options
271283
module = self._module
272284
if isinstance(module, str):
273-
# TODO: this option is only taken by the new library APIs, but we have
274-
# a bug that we can't easily support it just yet (NVIDIA/cuda-python#73).
275-
if jit_options is not None:
276-
raise ValueError
277-
self._handle = handle_return(self._loader["file"](module))
285+
if self._backend_version == "new":
286+
self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0))
287+
else: # "old" backend
288+
self._handle = handle_return(self._loader["file"](module.encode()))
278289
else:
279290
assert isinstance(module, bytes)
280-
if jit_options is None:
281-
jit_options = {}
282291
if self._backend_version == "new":
283-
args = (
284-
module,
285-
list(jit_options.keys()),
286-
list(jit_options.values()),
287-
len(jit_options),
288-
# TODO: support library options
289-
[],
290-
[],
291-
0,
292-
)
292+
self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0))
293293
else: # "old" backend
294-
args = (
295-
module,
296-
len(jit_options),
297-
list(jit_options.keys()),
298-
list(jit_options.values()),
299-
)
300-
self._handle = handle_return(self._loader["data"](*args))
294+
self._handle = handle_return(self._loader["data"](module, 0, [], []))
301295

302296
@precondition(_lazy_load_module)
303297
def get_kernel(self, name):
@@ -314,12 +308,12 @@ def get_kernel(self, name):
314308
Newly created kernel object.
315309
316310
"""
311+
if self._code_type not in ("cubin", "ptx", "fatbin"):
312+
raise RuntimeError(f"get_kernel() is not supported for {self._code_type}")
317313
try:
318314
name = self._sym_map[name]
319315
except KeyError:
320316
name = name.encode()
321317

322318
data = handle_return(self._loader["kernel"](self._handle, name))
323319
return Kernel._from_obj(data, self)
324-
325-
# TODO: implement from_handle()

cuda_core/cuda/core/experimental/_program.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
386386
if not isinstance(code, str):
387387
raise TypeError("ptx Program expects code argument to be a string")
388388
self._linker = Linker(
389-
ObjectCode(code.encode(), code_type), options=self._translate_program_options(options)
389+
ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
390390
)
391391
self._backend = "linker"
392392
else:
@@ -472,7 +472,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
472472
handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle)
473473
logs.write(log.decode())
474474

475-
return ObjectCode(data, target_type, symbol_mapping=symbol_mapping)
475+
return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping)
476476

477477
if self._backend == "linker":
478478
return self._linker.link(target_type)

cuda_core/docs/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ CUDA compilation toolchain
3232

3333
Program
3434
Linker
35+
ObjectCode
3536

3637
:template: dataclass.rst
3738

cuda_core/docs/source/release/0.2.0-notes.rst

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
``cuda.core`` 0.2.0 Release Notes
44
=================================
55

6-
Released on <TODO>, 2024
6+
Released on <TODO>, 2025
77

88
Highlights
99
----------
@@ -12,14 +12,19 @@ Highlights
1212
- Add :class:`~DeviceProperties` to provide pythonic access to device properties.
1313
- Add kernel attributes to :class:`~Kernel`
1414

15-
Limitations
16-
-----------
17-
18-
- <TODO>
19-
2015
Breaking Changes
2116
----------------
2217

2318
- Change ``__cuda_stream__`` from attribute to method
2419
- The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`.
25-
- :meth: `~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary.
20+
- :meth:`~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary.
21+
22+
New features
23+
------------
24+
25+
- Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`.
26+
27+
Limitations
28+
-----------
29+
30+
- <TODO>

cuda_core/examples/simple_multi_gpu_example.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,8 @@
3434
}
3535
"""
3636
arch0 = "".join(f"{i}" for i in dev0.compute_capability)
37-
prog_add = Program(code_add, code_type="c++")
38-
mod_add = prog_add.compile(
39-
"cubin",
40-
options=(
41-
"-std=c++17",
42-
"-arch=sm_" + arch0,
43-
),
44-
)
37+
prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"})
38+
mod_add = prog_add.compile("cubin")
4539
ker_add = mod_add.get_kernel("vector_add")
4640

4741
# Set GPU 1
@@ -63,14 +57,8 @@
6357
}
6458
"""
6559
arch1 = "".join(f"{i}" for i in dev1.compute_capability)
66-
prog_sub = Program(code_sub, code_type="c++")
67-
mod_sub = prog_sub.compile(
68-
"cubin",
69-
options=(
70-
"-std=c++17",
71-
"-arch=sm_" + arch1,
72-
),
73-
)
60+
prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"})
61+
mod_sub = prog_sub.compile("cubin")
7462
ker_sub = mod_sub.get_kernel("vector_sub")
7563

7664

cuda_core/tests/test_module.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pytest
1111
from conftest import can_load_generated_ptx
1212

13-
from cuda.core.experimental import Program, ProgramOptions, system
13+
from cuda.core.experimental import ObjectCode, Program, ProgramOptions, system
1414

1515

1616
@pytest.fixture(scope="function")
@@ -37,7 +37,7 @@ def get_saxpy_kernel(init_cuda):
3737
)
3838

3939
# run in single precision
40-
return mod.get_kernel("saxpy<float>")
40+
return mod.get_kernel("saxpy<float>"), mod
4141

4242

4343
@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new")
@@ -72,7 +72,7 @@ def test_get_kernel(init_cuda):
7272
],
7373
)
7474
def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type):
75-
kernel = get_saxpy_kernel
75+
kernel, _ = get_saxpy_kernel
7676
method = getattr(kernel.attributes, attr)
7777
# get the value without providing a device ordinal
7878
value = method()
@@ -82,3 +82,23 @@ def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type):
8282
for device in system.devices:
8383
value = method(device.device_id)
8484
assert isinstance(value, expected_type), f"Expected {attr} to be of type {expected_type}, but got {type(value)}"
85+
86+
87+
def test_object_code_load_cubin(get_saxpy_kernel):
88+
_, mod = get_saxpy_kernel
89+
cubin = mod._module
90+
sym_map = mod._sym_map
91+
assert isinstance(cubin, bytes)
92+
mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map)
93+
mod.get_kernel("saxpy<double>") # force loading
94+
95+
96+
def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
97+
_, mod = get_saxpy_kernel
98+
cubin = mod._module
99+
sym_map = mod._sym_map
100+
assert isinstance(cubin, bytes)
101+
cubin_file = tmp_path / "test.cubin"
102+
cubin_file.write_bytes(cubin)
103+
mod = ObjectCode.from_cubin(str(cubin_file), symbol_mapping=sym_map)
104+
mod.get_kernel("saxpy<double>") # force loading

0 commit comments

Comments
 (0)