From 8d8f02d5ecd54310c243119d67d28b63aeb5d251 Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 10:37:41 -0500 Subject: [PATCH 1/8] update gray-scott example and benchmark tables. --- docs/src/benchmark.md | 35 ++++++++++++++++++++++++----------- examples/gray-scott.jl | 19 +++++++++---------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/docs/src/benchmark.md b/docs/src/benchmark.md index f4a0b845..0fd4d63b 100644 --- a/docs/src/benchmark.md +++ b/docs/src/benchmark.md @@ -15,9 +15,18 @@ Code Outline: mul!(C, A, B) ``` -| GEMM Efficiency | GEMM GFLOPS | -|---|---| -| ![GEMM Efficiency](images/gemm_efficiency.svg) | ![GEMM GFLOPS](images/gemm_gflops.svg) | +```@raw html + + + + + + + + + +
GEMM EfficiencyGEMM GFLOPS
GEMM EfficiencyGEMM GFLOPS
+``` ## Monte-Carlo Integration @@ -29,9 +38,18 @@ integrand = (x) -> exp.(-x.^2) val = (V/N) * sum(integrand(x)) ``` -| MC Efficiency | MC GFLOPS | -|---|---| -| ![MC Efficiency](images/mc_eff.svg) | ![MC GFLOPS](images/mc_ops.svg) | +```@raw html + + + + + + + + + +
MC EfficiencyMC GFLOPS
MC EfficiencyMC GFLOPS
+``` ## Gray-Scott (2D) @@ -145,8 +163,3 @@ To generate a weak scaling plot, you must increment the problem size in proporti As part of a more complete benchmark we ran our code on up to 8 A100 GPUs (single-node) and compared it to the Python library cuPyNumeric as well as a custom implementation using CUDA.jl. From these resutls we can see that cuNumeric.jl is capable of scaling and saturating the GPU memory bandwidth for matrix multiplication. - - -| GEMM Efficiency | GEMM GFLOPS | -|---|---| -| ![GEMM Efficiency](images/gemm_efficiency.svg) | ![GEMM GFLOPS](images/gemm_gflops.svg) | diff --git a/examples/gray-scott.jl b/examples/gray-scott.jl index f5865705..b7eae81c 100644 --- a/examples/gray-scott.jl +++ b/examples/gray-scott.jl @@ -1,5 +1,5 @@ using cuNumeric -# using Plots +using Plots struct Params{T} dx::T @@ -63,7 +63,7 @@ function step!(u, v, u_new, v_new, args::Params) end function gray_scott() - #anim = Animation() + anim = Animation() N = 100 dims = (N, N) @@ -78,8 +78,8 @@ function gray_scott() u_new = cuNumeric.zeros(dims) v_new = cuNumeric.zeros(dims) - u[1:15, 1:15] = cuNumeric.rand(15, 15) - v[1:15, 1:15] = cuNumeric.rand(15, 15) + u[1:15, 1:15] = cuNumeric.rand(Float32, 15, 15) + v[1:15, 1:15] = cuNumeric.rand(Float32, 15, 15) for n in 1:n_steps step!(u, v, u_new, v_new, args) @@ -88,13 +88,12 @@ function gray_scott() u, u_new = u_new, u v, v_new = v_new, v - # if n%frame_interval == 0 - # u_cpu = u[:, :] - # heatmap(u_cpu, clims=(0, 1)) - # frame(anim) - # end + if n%frame_interval == 0 + heatmap(Array(u); clims=(0, 1)) + frame(anim) + end end - # gif(anim, "gray-scott.gif", fps=10) + gif(anim, "gray-scott.gif"; fps=10) return u, v end From 365a71e2dab895e08ddd50fd82ce733d154bfda6 Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 10:44:46 -0500 Subject: [PATCH 2/8] try this to resolve pathing --- .githash | 2 +- docs/src/benchmark.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.githash b/.githash index 6dd09371..0790f09d 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -17adad8a6f9dfd2a79e3a9297020abeedfe5a10f +8d8f02d5ecd54310c243119d67d28b63aeb5d251 diff --git a/docs/src/benchmark.md b/docs/src/benchmark.md index 0fd4d63b..705c066b 100644 --- a/docs/src/benchmark.md +++ b/docs/src/benchmark.md @@ -22,8 +22,8 @@ mul!(C, A, B) GEMM GFLOPS - GEMM Efficiency - GEMM GFLOPS + GEMM Efficiency + GEMM GFLOPS ``` @@ -45,8 +45,8 @@ val = (V/N) * sum(integrand(x)) MC GFLOPS - MC Efficiency - MC GFLOPS + MC Efficiency + MC GFLOPS ``` From 5bf4142aa0184eaf5f7ad30102c3f01d56470f03 Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 10:51:01 -0500 Subject: [PATCH 3/8] table for grayscott --- .githash | 2 +- docs/src/benchmark.md | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.githash b/.githash index 0790f09d..bb49e2ac 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -8d8f02d5ecd54310c243119d67d28b63aeb5d251 +365a71e2dab895e08ddd50fd82ce733d154bfda6 diff --git a/docs/src/benchmark.md b/docs/src/benchmark.md index 705c066b..ac082766 100644 --- a/docs/src/benchmark.md +++ b/docs/src/benchmark.md @@ -56,7 +56,16 @@ val = (V/N) * sum(integrand(x)) Solving a PDE requires halo-exchanges and lots of data movement. In this benchmark we fall an order of magnitude short of the `ImplicitGlobalGrid.jl` library which specifically targets multi-node, multi-GPU halo exchanges. We attribute this to the lack of kernel fusion in cuNumeric.jl -![GS GFLOPS](images/gs_gflops_diffeq.svg) +```@raw html + + + + + + + +
GS GFLOPS
GS GFLOPS
+``` # Benchmarking cuNumeric.jl Programs From 6d0226cf6ad35b25acfe88d70ddb5c04744abab1 Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 13:04:02 -0500 Subject: [PATCH 4/8] try target _self embedded docs --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 543d857a..5ce8bfa2 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,8 @@ cuNumeric.versioninfo() ``` > [!WARNING] -> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the [hardware configuration](https://julialegate.github.io/cuNumeric.jl/dev/perf#Setting-Hardware-Configuration) documentation to learn more. +> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the +hardware configuration documentation to learn more. ### Monte-Carlo Example ```julia From 9950a5d36f45587eaf232b2a2233e1d5e4a26713 Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 13:09:38 -0500 Subject: [PATCH 5/8] @raw html --- .githash | 2 +- README.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.githash b/.githash index bb49e2ac..a4bcf63b 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -365a71e2dab895e08ddd50fd82ce733d154bfda6 +6d0226cf6ad35b25acfe88d70ddb5c04744abab1 diff --git a/README.md b/README.md index 5ce8bfa2..58d703e6 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,10 @@ cuNumeric.versioninfo() > [!WARNING] > Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the -hardware configuration documentation to learn more. +```@raw html +hardware configuration +``` +documentation to learn more. ### Monte-Carlo Example ```julia From 17dd3a32814f67f872146030a6fbe3c382c5d97d Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 13:37:30 -0500 Subject: [PATCH 6/8] try this maybe? --- .githash | 2 +- README.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.githash b/.githash index a4bcf63b..0998d6e3 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -6d0226cf6ad35b25acfe88d70ddb5c04744abab1 +9950a5d36f45587eaf232b2a2233e1d5e4a26713 diff --git a/README.md b/README.md index 58d703e6..eb89231e 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,10 @@ cuNumeric.versioninfo() > [!WARNING] > Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the -```@raw html -hardware configuration -``` -documentation to learn more. +> ```@raw html +> hardware configuration +> ``` +> documentation to learn more. ### Monte-Carlo Example ```julia From ff81537a0c8e23806869eef5c28c235b0dc3fbbe Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 13:48:59 -0500 Subject: [PATCH 7/8] try this --- .githash | 2 +- README.md | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.githash b/.githash index 0998d6e3..39b2b76b 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -9950a5d36f45587eaf232b2a2233e1d5e4a26713 +17dd3a32814f67f872146030a6fbe3c382c5d97d diff --git a/README.md b/README.md index eb89231e..1b0bbc1d 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,7 @@ cuNumeric.versioninfo() ``` > [!WARNING] -> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the -> ```@raw html -> hardware configuration -> ``` -> documentation to learn more. +> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the ```@raw html hardware configuration``` documentation to learn more. ### Monte-Carlo Example ```julia From f8ed86641daae74068b5e82a029a5a2e36e9ce04 Mon Sep 17 00:00:00 2001 From: krasow Date: Mon, 13 Apr 2026 13:58:06 -0500 Subject: [PATCH 8/8] remove the link completely --- .githash | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.githash b/.githash index 39b2b76b..cee7a114 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -17dd3a32814f67f872146030a6fbe3c382c5d97d +ff81537a0c8e23806869eef5c28c235b0dc3fbbe diff --git a/README.md b/README.md index 1b0bbc1d..88703e7c 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ cuNumeric.versioninfo() ``` > [!WARNING] -> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the ```@raw html hardware configuration``` documentation to learn more. +> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. For more details, please visit our hardware configuration documentation. ### Monte-Carlo Example ```julia