Add Device.arch property for convenient compute capability string access (#877)

Copilot · leofang · kkraus14 · web-flow · commit ee16510fb36b · 2025-08-21T08:36:31.000-04:00
* Initial plan

* Add Device.arch property and update examples to use it

Co-authored-by: leofang &lt;5534781+leofang@users.noreply.github.com&gt;

* Inline dev.arch calls in f-strings per PR feedback

Co-authored-by: leofang &lt;5534781+leofang@users.noreply.github.com&gt;

* Add release note for Device.arch property

Co-authored-by: leofang &lt;5534781+leofang@users.noreply.github.com&gt;

* Use f-string instead of "".join for Device.arch property

Co-authored-by: kkraus14 &lt;3665167+kkraus14@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: leofang &lt;5534781+leofang@users.noreply.github.com&gt;
Co-authored-by: kkraus14 &lt;3665167+kkraus14@users.noreply.github.com&gt;
diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
@@ -1112,6 +1112,11 @@ def compute_capability(self) -> ComputeCapability:
         self.properties._cache["compute_capability"] = cc
         return cc
 
+    @property
+    def arch(self) -> str:
+        """Return compute capability as a string (e.g., '75' for CC 7.5)."""
+        return f"{self.compute_capability.major}{self.compute_capability.minor}"
+
     @property
     def context(self) -> Context:
         """Return the current :obj:`~_context.Context` associated with this device.
diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst
@@ -24,7 +24,7 @@ Breaking Changes
 New features
 ------------
 
-None.
+- Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple.
 
 
 New examples
diff --git a/cuda_core/examples/cuda_graphs.py b/cuda_core/examples/cuda_graphs.py
@@ -53,8 +53,7 @@ def main():
     cp.cuda.ExternalStream(int(stream.handle)).use()
 
     # Compile the program
-    arch = "".join(f"{i}" for i in dev.compute_capability)
-    program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
+    program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
     prog = Program(code, code_type="c++", options=program_options)
     mod = prog.compile(
         "cubin", name_expressions=("vector_add<float>", "vector_multiply<float>", "vector_subtract<float>")
diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py
@@ -54,8 +54,7 @@
 cp.cuda.ExternalStream(int(stream.handle)).use()
 
 # Compile kernel
-arch = "".join(f"{i}" for i in dev.compute_capability)
-program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
+program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
 prog = Program(code, code_type="c++", options=program_options)
 mod = prog.compile("cubin")
 kernel = mod.get_kernel("memory_ops")
diff --git a/cuda_core/examples/pytorch_example.py b/cuda_core/examples/pytorch_example.py
@@ -51,8 +51,7 @@ def __cuda_stream__(self):
 s = dev.create_stream(PyTorchStreamWrapper(pt_stream))
 
 # prepare program
-arch = "".join(f"{i}" for i in dev.compute_capability)
-program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
+program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}")
 prog = Program(code, code_type="c++", options=program_options)
 mod = prog.compile(
     "cubin",
diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py
@@ -38,8 +38,7 @@
 s = dev.create_stream()
 
 # prepare program
-arch = "".join(f"{i}" for i in dev.compute_capability)
-program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
+program_options = ProgramOptions(std="c++11", arch=f"sm_{dev.arch}")
 prog = Program(code, code_type="c++", options=program_options)
 
 # Note the use of the `name_expressions` argument to specify the template
diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py
@@ -40,8 +40,7 @@
     }
 }
 """
-arch0 = "".join(f"{i}" for i in dev0.compute_capability)
-prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"})
+prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev0.arch}"})
 mod_add = prog_add.compile("cubin")
 ker_add = mod_add.get_kernel("vector_add")
 
@@ -63,8 +62,7 @@
     }
 }
 """
-arch1 = "".join(f"{i}" for i in dev1.compute_capability)
-prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"})
+prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev1.arch}"})
 mod_sub = prog_sub.compile("cubin")
 ker_sub = mod_sub.get_kernel("vector_sub")
 
diff --git a/cuda_core/examples/strided_memory_view_gpu.py b/cuda_core/examples/strided_memory_view_gpu.py
@@ -103,8 +103,7 @@ def run():
     # To know the GPU's compute capability, we need to identify which GPU to use.
     dev = Device(0)
     dev.set_current()
-    arch = "".join(f"{i}" for i in dev.compute_capability)
-    gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{arch}", std="c++11"))
+    gpu_prog = Program(gpu_code, code_type="c++", options=ProgramOptions(arch=f"sm_{dev.arch}", std="c++11"))
     mod = gpu_prog.compile(target_type="cubin")
     gpu_ker = mod.get_kernel(func_name)
 
diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py
@@ -33,8 +33,7 @@
 s = dev.create_stream()
 
 # prepare program
-arch = "".join(f"{i}" for i in dev.compute_capability)
-program_options = ProgramOptions(std="c++17", arch=f"sm_{arch}")
+program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
 prog = Program(code, code_type="c++", options=program_options)
 mod = prog.compile("cubin", name_expressions=("vector_add<float>",))
 
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
@@ -105,6 +105,18 @@ def test_compute_capability():
     assert device.compute_capability == expected_cc
 
 
+def test_arch():
+    device = Device()
+    # Test that arch returns the same as the old pattern
+    expected_arch = "".join(f"{i}" for i in device.compute_capability)
+    assert device.arch == expected_arch
+    # Test that it's a string
+    assert isinstance(device.arch, str)
+    # Test that it matches the expected format (e.g., "75" for CC 7.5)
+    cc = device.compute_capability
+    assert device.arch == f"{cc.major}{cc.minor}"
+
+
 cuda_base_properties = [
     ("max_threads_per_block", int),
     ("max_block_dim_x", int),

Original file line number	Diff line number	Diff line change
`@@ -40,8 +40,7 @@`
`40`	`40`	`}`
`41`	`41`	`}`
`42`	`42`	`"""`
`43`		`-arch0 = "".join(f"{i}" for i in dev0.compute_capability)`
`44`		`-prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"})`
	`43`	`+prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev0.arch}"})`
`45`	`44`	`mod_add = prog_add.compile("cubin")`
`46`	`45`	`ker_add = mod_add.get_kernel("vector_add")`
`47`	`46`
`@@ -63,8 +62,7 @@`
`63`	`62`	`}`
`64`	`63`	`}`
`65`	`64`	`"""`
`66`		`-arch1 = "".join(f"{i}" for i in dev1.compute_capability)`
`67`		`-prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"})`
	`65`	`+prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{dev1.arch}"})`
`68`	`66`	`mod_sub = prog_sub.compile("cubin")`
`69`	`67`	`ker_sub = mod_sub.get_kernel("vector_sub")`
`70`	`68`