Skip to content

Commit f161652

Browse files
committed
Add a brief description for each of the examples
1 parent 1d1171f commit f161652

File tree

9 files changed

+58
-11
lines changed

9 files changed

+58
-11
lines changed

cuda_core/examples/jit_lto_fractal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# ################################################################################
66
#
7-
# This demo aims to illustrate a couple takeaways:
7+
# This demo illustrates:
88
#
99
# 1. How to use the JIT LTO feature provided by the Linker class to link multiple objects together
1010
# 2. That linking allows for libraries to modify workflows dynamically at runtime

cuda_core/examples/pytorch_example.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,16 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
## Usage: pip install "cuda-core[cu12]"
6-
## python python_example.py
5+
# ################################################################################
6+
#
7+
# This demo illustrates how to use `cuda.core` to compile a CUDA kernel
8+
# and launch it using PyTorch tensors as inputs.
9+
#
10+
# ## Usage: pip install "cuda-core[cu12]"
11+
# ## python pytorch_example.py
12+
#
13+
# ################################################################################
14+
715
import sys
816

917
import torch

cuda_core/examples/saxpy.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
# ################################################################################
6+
#
7+
# This demo illustrates how to use `cuda.core` to compile a templated CUDA kernel
8+
# and launch it using `cupy` arrays as inputs. This is a simple example of a
9+
# templated kernel, where the kernel is instantiated for both `float` and `double`
10+
# data types.
11+
#
12+
# ################################################################################
13+
514
import sys
615

716
import cupy as cp
@@ -32,6 +41,10 @@
3241
arch = "".join(f"{i}" for i in dev.compute_capability)
3342
program_options = ProgramOptions(std="c++11", arch=f"sm_{arch}")
3443
prog = Program(code, code_type="c++", options=program_options)
44+
45+
# Note the use of the `name_expressions` argument to specify the template
46+
# instantiations of the kernel that we will use. For non-templated kernels,
47+
# `name_expressions` will simply contain the name of the kernels.
3548
mod = prog.compile(
3649
"cubin",
3750
logs=sys.stdout,

cuda_core/examples/show_device_properties.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
# ################################################################################
6+
#
7+
# This demo illustrates how to use `cuda.core` to show the properties of the
8+
# CUDA devices in the system.
9+
#
10+
# ################################################################################
11+
512
import sys
613

714
from cuda.core.experimental import Device, system

cuda_core/examples/simple_multi_gpu_example.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
# ################################################################################
6+
#
7+
# This demo illustrates how to use `cuda.core` to compile and launch kernels
8+
# on multiple GPUs.
9+
#
10+
# ################################################################################
11+
512
import sys
613

714
import cupy as cp

cuda_core/examples/strided_memory_view_cpu.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44

55
# ################################################################################
66
#
7-
# This demo aims to illustrate two takeaways:
7+
# This demo illustrates:
88
#
99
# 1. The similarity between CPU and GPU JIT-compilation with C++ sources
1010
# 2. How to use StridedMemoryView to interface with foreign C/C++ functions
1111
#
12-
# To facilitate this demo, we use cffi (https://cffi.readthedocs.io/) for the CPU
13-
# path, which can be easily installed from pip or conda following their instructions.
14-
# We also use NumPy/CuPy as the CPU/GPU array container.
12+
# This demo uses cffi (https://cffi.readthedocs.io/) for the CPU path, which can be
13+
# easily installed from pip or conda following their instructions.
1514
#
1615
# ################################################################################
1716

cuda_core/examples/strided_memory_view_gpu.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44

55
# ################################################################################
66
#
7-
# This demo aims to illustrate two takeaways:
7+
# This demo illustrates:
88
#
99
# 1. The similarity between CPU and GPU JIT-compilation with C++ sources
1010
# 2. How to use StridedMemoryView to interface with foreign C/C++ functions
1111
#
12-
# To facilitate this demo, we use cffi (https://cffi.readthedocs.io/) for the CPU
13-
# path, which can be easily installed from pip or conda following their instructions.
14-
# We also use NumPy/CuPy as the CPU/GPU array container.
12+
# This demo uses cffi (https://cffi.readthedocs.io/) for the CPU path, which can be
13+
# easily installed from pip or conda following their instructions.
1514
#
1615
# ################################################################################
1716

cuda_core/examples/thread_block_cluster.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
# ################################################################################
6+
#
7+
# This demo illustrates the use of thread block clusters in the CUDA launch
8+
# configuration.
9+
#
10+
# ################################################################################
11+
512
import os
613
import sys
714

cuda_core/examples/vector_add.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
# ################################################################################
6+
#
7+
# This demo illustrates how to use `cuda.core` to compile and launch a simple
8+
# vector addition kernel.
9+
#
10+
# ################################################################################
11+
512
import cupy as cp
613

714
from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch

0 commit comments

Comments
 (0)