JuliaLegate · krasow · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/.githash b/.githash
@@ -1 +1 @@
-17adad8a6f9dfd2a79e3a9297020abeedfe5a10f
+ff81537a0c8e23806869eef5c28c235b0dc3fbbe
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ cuNumeric.versioninfo()
 ```
 
 > [!WARNING]
-> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. See the [hardware configuration](https://julialegate.github.io/cuNumeric.jl/dev/perf#Setting-Hardware-Configuration) documentation to learn more.
+> Starting more than one instance of cuNumeric.jl can lead to a hard-crash. The default hardware configuration reserves all available resources. For more details, please visit our hardware configuration documentation.
 
 ### Monte-Carlo Example
 ```julia

diff --git a/docs/src/benchmark.md b/docs/src/benchmark.md
@@ -15,9 +15,18 @@ Code Outline:
 mul!(C, A, B)
 ```
 
-| GEMM Efficiency | GEMM GFLOPS |
-|---|---|
-| ![GEMM Efficiency](images/gemm_efficiency.svg) | ![GEMM GFLOPS](images/gemm_gflops.svg) |
+```@raw html
+<table>
+  <tr>
+    <th>GEMM Efficiency</th>
+    <th>GEMM GFLOPS</th>
+  </tr>
+  <tr>
+    <td><img src="./images/gemm_efficiency.svg" alt="GEMM Efficiency"/></td>
+    <td><img src="./images/gemm_gflops.svg" alt="GEMM GFLOPS"/></td>
+  </tr>
+</table>
+```
 
 ## Monte-Carlo Integration
 
@@ -29,16 +38,34 @@ integrand = (x) -> exp.(-x.^2)
 val = (V/N) * sum(integrand(x))
 ```
 
-| MC Efficiency | MC GFLOPS |
-|---|---|
-| ![MC Efficiency](images/mc_eff.svg) | ![MC GFLOPS](images/mc_ops.svg) |
+```@raw html
+<table>
+  <tr>
+    <th>MC Efficiency</th>
+    <th>MC GFLOPS</th>
+  </tr>
+  <tr>
+    <td><img src="./images/mc_eff.svg" alt="MC Efficiency"/></td>
+    <td><img src="./images/mc_ops.svg" alt="MC GFLOPS"/></td>
+  </tr>
+</table>
+```
 
 
 ## Gray-Scott (2D)
 
 Solving a PDE requires halo-exchanges and lots of data movement. In this benchmark we fall an order of magnitude short of the `ImplicitGlobalGrid.jl` library which specifically targets multi-node, multi-GPU halo exchanges. We attribute this to the lack of kernel fusion in cuNumeric.jl
 
-![GS GFLOPS](images/gs_gflops_diffeq.svg)
+```@raw html
+<table>
+  <tr>
+    <th>GS GFLOPS</th>
+  </tr>
+  <tr>
+    <td><img src="./images/gs_gflops_diffeq.svg" alt="GS GFLOPS"/></td>
+  </tr>
+</table>
+```
 
 
 # Benchmarking cuNumeric.jl Programs
@@ -145,8 +172,3 @@ To generate a weak scaling plot, you must increment the problem size in proporti
 
 
 As part of a more complete benchmark we ran our code on up to 8 A100 GPUs (single-node) and compared it to the Python library cuPyNumeric as well as a custom implementation using CUDA.jl. From these resutls we can see that cuNumeric.jl is capable of scaling and saturating the GPU memory bandwidth for matrix multiplication.
-
-
-| GEMM Efficiency | GEMM GFLOPS |
-|---|---|
-| ![GEMM Efficiency](images/gemm_efficiency.svg) | ![GEMM GFLOPS](images/gemm_gflops.svg) |
diff --git a/examples/gray-scott.jl b/examples/gray-scott.jl
@@ -1,5 +1,5 @@
 using cuNumeric
-# using Plots
+using Plots
 
 struct Params{T}
     dx::T
@@ -63,7 +63,7 @@ function step!(u, v, u_new, v_new, args::Params)
 end
 
 function gray_scott()
-    #anim = Animation()
+    anim = Animation()
 
     N = 100
     dims = (N, N)
@@ -78,8 +78,8 @@ function gray_scott()
     u_new = cuNumeric.zeros(dims)
     v_new = cuNumeric.zeros(dims)
 
-    u[1:15, 1:15] = cuNumeric.rand(15, 15)
-    v[1:15, 1:15] = cuNumeric.rand(15, 15)
+    u[1:15, 1:15] = cuNumeric.rand(Float32, 15, 15)
+    v[1:15, 1:15] = cuNumeric.rand(Float32, 15, 15)
 
     for n in 1:n_steps
         step!(u, v, u_new, v_new, args)
@@ -88,13 +88,12 @@ function gray_scott()
         u, u_new = u_new, u
         v, v_new = v_new, v
 
-        # if n%frame_interval == 0
-        #     u_cpu = u[:, :]
-        #     heatmap(u_cpu, clims=(0, 1))
-        #     frame(anim)
-        # end
+        if n%frame_interval == 0
+            heatmap(Array(u); clims=(0, 1))
+            frame(anim)
+        end
     end
-    # gif(anim, "gray-scott.gif", fps=10)
+    gif(anim, "gray-scott.gif"; fps=10)
     return u, v
 end
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		17adad8a6f9dfd2a79e3a9297020abeedfe5a10f
		ff81537a0c8e23806869eef5c28c235b0dc3fbbe