From 8fd4726516fad00d349a53660168a0678f32bde2 Mon Sep 17 00:00:00 2001 From: krasow Date: Tue, 7 Apr 2026 22:40:37 -0500 Subject: [PATCH] pre-commit --- .JuliaFormatter.toml | 2 +- .gitattributes | 2 +- .githash | 2 +- .github/workflows/TagBot.yml | 2 +- .github/workflows/ci.yml | 8 ++-- .github/workflows/conda.yml | 4 +- .github/workflows/container.yml | 4 +- .github/workflows/developer.yml | 2 +- .github/workflows/docs.yml | 2 +- .gitignore | 5 +- .pre-commit-config.yaml | 23 +++++++++ Dockerfile | 6 +-- README.md | 4 +- benchmark/cpp_matmul/CMakeLists.txt | 2 +- benchmark/cpp_matmul/build.sh | 2 +- codecov.yml | 2 +- deps/build.jl | 2 +- deps/version.jl | 2 +- docs/src/.vitepress/config.mts | 14 +++--- docs/src/.vitepress/theme/index.ts | 8 ++-- docs/src/.vitepress/theme/style.css | 6 +-- docs/src/benchmark.md | 11 ++--- docs/src/components/AuthorBadge.vue | 2 +- docs/src/components/Authors.vue | 8 ++-- docs/src/components/VersionPicker.vue | 2 +- docs/src/dev.md | 4 +- docs/src/examples.md | 12 ++--- docs/src/install.md | 14 +++--- docs/src/perf.md | 6 +-- docs/src/usage.md | 2 +- examples/custom_cuda.jl | 1 - examples/gray-scott.jl | 63 +++++++++++++++---------- examples/gray-scott.py | 12 ++--- examples/integrate.jl | 8 ++-- ext/CUDAExt/CUDAExt.jl | 2 +- lib/cunumeric_jl_wrapper/CMakeLists.txt | 2 +- lib/cunumeric_jl_wrapper/VERSION | 2 +- lib/cunumeric_jl_wrapper/src/memory.cpp | 2 +- lib/cunumeric_jl_wrapper/src/types.cpp | 2 +- scripts/README.md | 12 ++--- scripts/build_cpp_wrapper.sh | 4 +- scripts/install_cxxwrap.sh | 6 +-- scripts/test_container.sh | 2 +- src/cuNumeric.jl | 6 +-- src/ndarray/binary.jl | 6 +-- src/ndarray/broadcast.jl | 14 +++--- src/ndarray/detail/ndarray.jl | 6 +-- src/ndarray/ndarray.jl | 17 +++---- src/ndarray/unary.jl | 6 +-- src/scoping.jl | 6 +-- src/utilities/cuda_stubs.jl | 32 ++++++------- src/utilities/preference.jl | 2 +- src/utilities/version.jl | 3 +- src/warnings.jl | 50 +++++++++++--------- test/runtests.jl | 2 +- test/tests/axpy.jl | 6 +-- test/tests/axpy_advanced.jl | 8 ++-- test/tests/binary_tests.jl | 2 +- test/tests/cuda/vecadd.jl | 2 +- test/tests/elementwise.jl | 2 +- test/tests/gemm.jl | 23 ++++----- test/tests/linalg.jl | 2 +- test/tests/scoping-advanced.jl | 4 +- test/tests/slicing.jl | 6 ++- test/tests/stability.jl | 4 +- test/tests/unary_tests.jl | 21 ++++----- 66 files changed, 273 insertions(+), 240 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 4935005d..8aa52541 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,3 +1,3 @@ style = "blue" join_lines_based_on_source=true -margin=100 \ No newline at end of file +margin=100 diff --git a/.gitattributes b/.gitattributes index e334746f..87c816cb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ # Auto detect text files and perform LF normalization * text=auto -.githash merge=ours \ No newline at end of file +.githash merge=ours diff --git a/.githash b/.githash index a7d75d44..87bef491 100644 --- a/.githash +++ b/.githash @@ -1 +1 @@ -c35ce35cb9a555d168b58c4f3a17ed146d258f94 +224cac24f15449a338abebe1bc17ba74c07d9f5c diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 4d2483cf..a3de268c 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -27,4 +27,4 @@ jobs: steps: - uses: JuliaRegistries/TagBot@v1 with: - token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d25c19fb..e3b9ff34 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,16 +51,16 @@ jobs: # - julia: '1.12' # allow-failure: true continue-on-error: ${{ matrix.allow-failure || false }} - + steps: - uses: actions/checkout@v4 - + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia }} - + - uses: julia-actions/cache@v2 - + - name: Run tests env: GPUTESTS: "0" # parsed by runtests.jl diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 4106223e..2ea05928 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -78,7 +78,7 @@ # - name: Setup Conda env # run: | # conda create -n myenv -c conda-forge -c legate cupynumeric=25.11.00 python=3.11 elfutils libdwarf -y - + # - name: Load Julia packages from cache # id: julia-cache # uses: julia-actions/cache@v2 @@ -99,7 +99,7 @@ # MYENV_PATH=$CONDA_PATH/envs/myenv # echo "LD_PRELOAD=$MYENV_PATH/lib/libdw.so.1:$MYENV_PATH/lib/libdwarf.so:$MYENV_PATH/lib/libelf.so.1:$MYENV_PATH/lib/libgomp.so.1:$MYENV_PATH/lib/libstdc++.so.6:$LD_PRELOAD" >> $GITHUB_ENV # echo "MYENV_PATH=$MYENV_PATH" >> $GITHUB_ENV - + # - name: Setup cuNumeric.jl and Legate.jl # run: | # conda run --no-capture-output -n myenv julia --color=yes -e ' diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index b817d514..00d5d995 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -115,7 +115,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - + - name: Cache Docker layers uses: actions/cache@v4 with: @@ -167,4 +167,4 @@ jobs: docker stop ${{ steps.pkg.outputs.ref }} || true docker rm ${{ steps.pkg.outputs.ref }} || true docker image rm ${{ steps.meta.outputs.tags }} || true - fi \ No newline at end of file + fi diff --git a/.github/workflows/developer.yml b/.github/workflows/developer.yml index 1f93d4d7..10c5094b 100644 --- a/.github/workflows/developer.yml +++ b/.github/workflows/developer.yml @@ -80,7 +80,7 @@ jobs: wget https://github.com/Kitware/CMake/releases/download/v3.30.7/cmake-3.30.7-linux-x86_64.sh --no-check-certificate sh cmake-3.30.7-linux-x86_64.sh --skip-license --prefix=$HOME/.local echo "$HOME/.local/bin" >> $GITHUB_PATH - + - name: Load Julia packages from cache id: julia-cache uses: julia-actions/cache@v2 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c01eac94..e3322451 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -54,4 +54,4 @@ jobs: run: julia --color=yes --project=docs docs/make.jl env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.gitignore b/.gitignore index bae220a5..0046a57e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ build_wrapper.sh logging logging/* -debug +debug debug/* compile_wrapper.sh @@ -44,7 +44,7 @@ deps/lapacke_build/ deps/deps.jl # generated by Julia build system -build.log +build.log env.log .localenv @@ -82,4 +82,3 @@ node_modules *.exe *.out *.app - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..7a201053 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v14.0.0 + hooks: + - id: clang-format + types_or: [c++, c] + + - repo: local + hooks: + - id: julia-formatter + name: JuliaFormatter + description: "Run JuliaFormatter on staged Julia files." + entry: julia --project=. -e 'using JuliaFormatter; format(ARGS, verbose=true)' + language: system + types: [julia] diff --git a/Dockerfile b/Dockerfile index 48bf80d9..00f91978 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ENV CUDA_VERSION_MAJOR_MINOR="${CUDA_MAJOR}.${CUDA_MINOR}" ARG REF=main ENV REF=${REF} # using bash -SHELL ["/bin/bash", "-c"] +SHELL ["/bin/bash", "-c"] ENV DEBIAN_FRONTEND=noninteractive # force turn off legate auto config for precompilation. @@ -16,7 +16,7 @@ ENV LEGATE_AUTO_CONFIG=0 # much of the CUDA.jl setup is from Tim Besard # CUDA.jl Dockerfile https://github.com/JuliaGPU/CUDA.jl/blob/master/Dockerfile -# Thank you Tim for the reccomendation. +# Thank you Tim for the reccomendation. ARG JULIA_CPU_TARGET=native ENV JULIA_CPU_TARGET=${JULIA_CPU_TARGET} @@ -101,7 +101,7 @@ EOF RUN apt-get clean && \ rm -rf /var/lib/apt/lists/* -ENV LEGATE_AUTO_CONFIG=1 +ENV LEGATE_AUTO_CONFIG=1 ENTRYPOINT source /etc/.env && exec /bin/bash WORKDIR /workspace diff --git a/README.md b/README.md index 22660c7d..acf691fe 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The cuNumeric.jl package wraps the [cuPyNumeric](https://github.com/nv-legate/cupynumeric) C++ API from NVIDIA to bring simple distributed computing on GPUs and CPUs to Julia! We provide a simple array abstraction, the `NDArray`, which supports most of the operations you would expect from a normal Julia array. -> [!WARNING] +> [!WARNING] > cuNumeric.jl is under active development. This is an alpha API and is subject to change. Stability is not guaranteed until the first official release. We are actively working to improve the build experience to be more seamless and Julia-friendly. ### Quick Start @@ -39,7 +39,7 @@ x_max = 10.0f0 domain = [-x_max, x_max] Ω = domain[2] - domain[1] -samples = Ω*cuNumeric.rand(N) .- x_max +samples = Ω*cuNumeric.rand(N) .- x_max estimate = (Ω/N) * sum(integrand(samples)) println("Monte-Carlo Estimate: $(estimate)") diff --git a/benchmark/cpp_matmul/CMakeLists.txt b/benchmark/cpp_matmul/CMakeLists.txt index 9ac064c2..d547f62d 100644 --- a/benchmark/cpp_matmul/CMakeLists.txt +++ b/benchmark/cpp_matmul/CMakeLists.txt @@ -12,6 +12,6 @@ endif() find_package(cupynumeric REQUIRED) -add_executable(matmulfp32_test main.cpp) +add_executable(matmulfp32_test main.cpp) target_link_libraries(matmulfp32_test PRIVATE cupynumeric::cupynumeric) install(TARGETS matmulfp32_test DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/cmake-install") diff --git a/benchmark/cpp_matmul/build.sh b/benchmark/cpp_matmul/build.sh index 1ceefefa..d7463c34 100755 --- a/benchmark/cpp_matmul/build.sh +++ b/benchmark/cpp_matmul/build.sh @@ -4,4 +4,4 @@ cupynumeric_root=`python -c 'import cupynumeric.install_info as i; from pathlib echo "Using cuPyNumeric at $cupynumeric_root" cmake -S . -B build -D legate_ROOT="$legate_root" -D cupynumeric_ROOT="$cupynumeric_root" -D CMAKE_BUILD_TYPE=Debug -cmake --build build --parallel 8 --verbose \ No newline at end of file +cmake --build build --parallel 8 --verbose diff --git a/codecov.yml b/codecov.yml index 1158dd29..ed622191 100644 --- a/codecov.yml +++ b/codecov.yml @@ -8,4 +8,4 @@ coverage: status: patch: false project: false - changes: false \ No newline at end of file + changes: false diff --git a/deps/build.jl b/deps/build.jl index 5e025de9..d53e576f 100644 --- a/deps/build.jl +++ b/deps/build.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/deps/version.jl b/deps/version.jl index 43838584..54e2e09b 100644 --- a/deps/version.jl +++ b/deps/version.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts index bf4470e6..ee0f9704 100644 --- a/docs/src/.vitepress/config.mts +++ b/docs/src/.vitepress/config.mts @@ -39,7 +39,7 @@ export default defineConfig({ // ['script', {src: '/versions.js'], for custom domains, I guess if deploy_url is available. ['script', {src: `${baseTemp.base}siteinfo.js`}] ], - + vite: { define: { __DEPLOY_ABSPATH__: JSON.stringify('REPLACE_ME_DOCUMENTER_VITEPRESS_DEPLOY_ABSPATH'), @@ -50,18 +50,18 @@ export default defineConfig({ } }, optimizeDeps: { - exclude: [ + exclude: [ '@nolebase/vitepress-plugin-enhanced-readabilities/client', 'vitepress', '@nolebase/ui', - ], - }, - ssr: { - noExternal: [ + ], + }, + ssr: { + noExternal: [ // If there are other packages that need to be processed by Vite, you can add them here. '@nolebase/vitepress-plugin-enhanced-readabilities', '@nolebase/ui', - ], + ], }, }, markdown: { diff --git a/docs/src/.vitepress/theme/index.ts b/docs/src/.vitepress/theme/index.ts index 061f26a1..60bb2456 100644 --- a/docs/src/.vitepress/theme/index.ts +++ b/docs/src/.vitepress/theme/index.ts @@ -3,9 +3,9 @@ import { h } from 'vue' import DefaultTheme from 'vitepress/theme' import type { Theme as ThemeConfig } from 'vitepress' -import { - NolebaseEnhancedReadabilitiesMenu, - NolebaseEnhancedReadabilitiesScreenMenu, +import { + NolebaseEnhancedReadabilitiesMenu, + NolebaseEnhancedReadabilitiesScreenMenu, } from '@nolebase/vitepress-plugin-enhanced-readabilities/client' import VersionPicker from "@/VersionPicker.vue" @@ -36,4 +36,4 @@ export const Theme: ThemeConfig = { app.component('Authors', Authors) } } -export default Theme \ No newline at end of file +export default Theme diff --git a/docs/src/.vitepress/theme/style.css b/docs/src/.vitepress/theme/style.css index 07fa8727..936322d1 100644 --- a/docs/src/.vitepress/theme/style.css +++ b/docs/src/.vitepress/theme/style.css @@ -24,7 +24,7 @@ https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/var --vp-font-family-mono: JuliaMono-Regular, monospace; } -/* Disable contextual alternates (kind of like ligatures but different) in monospace, +/* Disable contextual alternates (kind of like ligatures but different) in monospace, which turns `/>` to an up arrow and `|>` (the Julia pipe symbol) to an up arrow as well. */ .mono-no-substitutions { font-family: "JuliaMono-Regular", monospace; @@ -98,7 +98,7 @@ font-feature-settings: "calt" off; -145deg, #9558b282 30%, #3798269a 30%, - #cb3d33e3 + #cb3d33e3 ); --vp-home-hero-image-filter: blur(40px); } @@ -172,4 +172,4 @@ mjx-container { mjx-container > svg { margin: auto; display: inline-block; -} \ No newline at end of file +} diff --git a/docs/src/benchmark.md b/docs/src/benchmark.md index 6da5b589..d47fb94e 100644 --- a/docs/src/benchmark.md +++ b/docs/src/benchmark.md @@ -21,7 +21,7 @@ GEMM Efficiency | GEMM GFLOPS ## Monte-Carlo Integration -Monte-Carlo integration is embaressingly parallel and should scale perfectly. We do not know the exact number of operations in `exp` so the GFLOPs is off by a constant factor. +Monte-Carlo integration is embaressingly parallel and should scale perfectly. We do not know the exact number of operations in `exp` so the GFLOPs is off by a constant factor. Code Outline: ```julia @@ -45,7 +45,7 @@ Solving a PDE requires halo-exchanges and lots of data movement. In this benchma Since there is no programatic way to set the hardware configuration (as of 24.11) benchmarking cuNumeric.jl code is a bit tedious. As an introduction, we walk through a benchmark of matrix multiplication (SGEMM). All the code for this benchmark can be found in the `cuNumeric.jl/pkg/benchmark` directory. -> [!WARNING] +> [!WARNING] > We do not commit to maintaining the benchmark scripts, due to difficulty programatically configuring legate and API overturn as we work on cuNumeric v1.0. The general principles used should work, even if the code does not. @@ -73,14 +73,14 @@ function total_space(N, M) end ``` -We cannot use rely on common benchmark tools in Julia like [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl) or [ChairMarks.jl](https://github.com/LilithHafner/Chairmarks.jl) or even the built in `Base.@time` macro. The asynchronous nature of operations on NDArrays means that function calls will execute almost immediately and program execution must be blocked to properly time a kernel. It is technically possible to time NDArray operations with something like [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl) by adding a blocking operation (e.g., accessing the result), but the allocations reported by these tools will never be correct and it is safer to use the timing functionality from CuPyNumeric. +We cannot use rely on common benchmark tools in Julia like [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl) or [ChairMarks.jl](https://github.com/LilithHafner/Chairmarks.jl) or even the built in `Base.@time` macro. The asynchronous nature of operations on NDArrays means that function calls will execute almost immediately and program execution must be blocked to properly time a kernel. It is technically possible to time NDArray operations with something like [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl) by adding a blocking operation (e.g., accessing the result), but the allocations reported by these tools will never be correct and it is safer to use the timing functionality from CuPyNumeric. -The timer built into CuPyNumeric blocks execution until all Legate operations preceding the call that generated the timing object complete. We provide two timing utilities: `get_time_microseconds` and `get_time_nanoseconds`. +The timer built into CuPyNumeric blocks execution until all Legate operations preceding the call that generated the timing object complete. We provide two timing utilities: `get_time_microseconds` and `get_time_nanoseconds`. Now we can write the benchmark code. There are two more parameters we need to set: the number of samples, `n_samples`, and the number of warm-up samples, `n_warnup`. With all this the benchmark loop is: ```julia -using LinearAlgebra +using LinearAlgebra using cuNumeric function gemm_cunumeric(N, M, n_samples, n_warmup) @@ -150,4 +150,3 @@ As part of a more complete benchmark we ran our code on up to 8 A100 GPUs (singl GEMM Efficiency | GEMM GFLOPS :-------------------------:|:-------------------------: ![GEMM Efficiency](images/gemm_efficiency.svg) | ![GEMM GFLOPS](images/gemm_gflops.svg) - diff --git a/docs/src/components/AuthorBadge.vue b/docs/src/components/AuthorBadge.vue index a64b0afd..2679c013 100644 --- a/docs/src/components/AuthorBadge.vue +++ b/docs/src/components/AuthorBadge.vue @@ -136,4 +136,4 @@ const getColor = (name) => { box-shadow: 0 0 0 1.25px rgba(248, 248, 247, 0.4); transition: all 0.2s ease; } - \ No newline at end of file + diff --git a/docs/src/components/Authors.vue b/docs/src/components/Authors.vue index ee7920b4..2ce9e19b 100644 --- a/docs/src/components/Authors.vue +++ b/docs/src/components/Authors.vue @@ -10,14 +10,14 @@ /> - + - + \ No newline at end of file + diff --git a/docs/src/components/VersionPicker.vue b/docs/src/components/VersionPicker.vue index d03b2e84..67f889d0 100644 --- a/docs/src/components/VersionPicker.vue +++ b/docs/src/components/VersionPicker.vue @@ -122,4 +122,4 @@ onMounted(() => { .VPVersionPicker:hover :deep(button .text) { color: var(--vp-c-text-2) !important; } - \ No newline at end of file + diff --git a/docs/src/dev.md b/docs/src/dev.md index 1bc332fd..4f133524 100644 --- a/docs/src/dev.md +++ b/docs/src/dev.md @@ -2,9 +2,9 @@ To contribute to cuNumeric.jl, we recommend cloning the repository and adding it to one of your existing environments with `Pkg.develop`. ```bash -git clone https://github.com/JuliaLegate/cuNumeric.jl.git +git clone https://github.com/JuliaLegate/cuNumeric.jl.git julia --project=. -e 'using Pkg; Pkg.develop(path = "cuNumeric.jl/lib/CNPreferences")' julia --project=. -e 'using Pkg; Pkg.develop(path = "cuNumeric.jl")' julia --project=. -e 'using CNPreferences; CNPreferences.use_developer_mode()' julia --project=. -e 'using Pkg; Pkg.build()' -``` \ No newline at end of file +``` diff --git a/docs/src/examples.md b/docs/src/examples.md index 18dcaf0e..005aa4ef 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -43,9 +43,9 @@ x_max = 10.0f0 domain = [-x_max, x_max] Ω = domain[2] - domain[1] -samples = Ω*cuNumeric.rand(N) .- x_max +samples = Ω*cuNumeric.rand(N) .- x_max -# Reductions return 0D NDArrays instead +# Reductions return 0D NDArrays instead # of a scalar to avoid blocking runtime estimate = (Ω/N) * sum(integrand(samples)) @@ -90,9 +90,9 @@ function step!(u, v, u_new, v_new, args::Params) (args.f+args.k).*v[2:end-1, 2:end-1]) # 2-D Laplacian of f using array slicing, excluding boundaries # For an N x N array f, f_lap is the Nend x Nend array in the "middle" - u_lap = ((u[3:end, 2:end-1] - 2*u[2:end-1, 2:end-1] + u[1:end-2, 2:end-1]) ./ args.dx^2 + u_lap = ((u[3:end, 2:end-1] - 2*u[2:end-1, 2:end-1] + u[1:end-2, 2:end-1]) ./ args.dx^2 + (u[2:end-1, 3:end] - 2*u[2:end-1, 2:end-1] + u[2:end-1, 1:end-2]) ./ args.dx^2) - v_lap = ((v[3:end, 2:end-1] - 2*v[2:end-1, 2:end-1] + v[1:end-2, 2:end-1]) ./ args.dx^2 + v_lap = ((v[3:end, 2:end-1] - 2*v[2:end-1, 2:end-1] + v[1:end-2, 2:end-1]) ./ args.dx^2 + (v[2:end-1, 3:end] - 2*v[2:end-1, 2:end-1] + v[2:end-1, 1:end-2]) ./ args.dx^2) # Forward-Euler time step for all points except the boundaries @@ -124,8 +124,8 @@ function gray_scott() for n in 1:n_steps step!(u, v, u_new, v_new, args) - # update u and v - # this doesn't copy, this switching references + # update u and v + # this doesn't copy, this switching references u, u_new = u_new, u v, v_new = v_new, v diff --git a/docs/src/install.md b/docs/src/install.md index b9e0451b..a8c9c1e7 100644 --- a/docs/src/install.md +++ b/docs/src/install.md @@ -40,18 +40,18 @@ pkg> add CNPreferences ``` ## Developer mode -> [!TIP] +> [!TIP] > This gives the most flexibility in installs. It is meant for developing on cuNumeric.jl. We support using a custom install version of cupynumeric. See https://docs.nvidia.com/cupynumeric/latest/installation.html for details about different install configurations, or building cupynumeric from source. We require that you have a g++ capable compiler of C++ 20, and a recent version CMake >= 3.26. -To use developer mode, +To use developer mode, ```julia using CNPreferences; CNPreferences.use_developer_mode(; use_cunumeric_jll=true, cunumeric_path=nothing) ``` -By default `use_cunumeric_jll` will be set to true. However, you can set a custom branch and/or use a custom path of cupynumeric. By setting `use_cunumeric_jll=false`, you can set `cunumeric_path` to your custom install. +By default `use_cunumeric_jll` will be set to true. However, you can set a custom branch and/or use a custom path of cupynumeric. By setting `use_cunumeric_jll=false`, you can set `cunumeric_path` to your custom install. ```julia using CNPreferences; CNPreferences.use_developer_mode(;use_cunumeric_jll=false, cunumeric_path="/path/to/cupynumeric/root") @@ -59,8 +59,8 @@ using CNPreferences; CNPreferences.use_developer_mode(;use_cunumeric_jll=false, ## Link Against Existing Conda Environment -> [!WARNING] -> This feature is not passing our CI currently. Please use with caution. We are failing to currently match proper versions of .so libraries together. Our hope is to get this functional for users already using Legate within conda. +> [!WARNING] +> This feature is not passing our CI currently. Please use with caution. We are failing to currently match proper versions of .so libraries together. Our hope is to get this functional for users already using Legate within conda. Note, you need conda >= 24.1 to install the conda package. More installation details are found [here](https://docs.nvidia.com/cupynumeric/latest/installation.html). @@ -70,7 +70,7 @@ conda create -n myenv -c conda-forge -c cupynumeric # into an existing environment conda install -c conda-forge -c cupynumerice ``` -Once you have the conda package installed, you can activate here. +Once you have the conda package installed, you can activate here. ```bash conda activate [conda-env-with-cupynumeric] ``` @@ -79,4 +79,4 @@ To update `LocalPreferences.toml` so that a local conda environment is used as t ```julia using CNPreferences; CNPreferences.use_conda("conda-env-with-legate"); Pkg.build() -``` \ No newline at end of file +``` diff --git a/docs/src/perf.md b/docs/src/perf.md index d3a71e18..71411858 100644 --- a/docs/src/perf.md +++ b/docs/src/perf.md @@ -1,10 +1,10 @@ # Performance Tips ## Avoid Scalar Indexing -Accessing elements of an NDArray one at a time (e.g., `arr[5]`) is slow and should be avoided. Indexing like this requires data to be trasfered between device and host and maybe even communicated across nodes. Scalar indexing will emit an error which can be opted out of with `@allowscalar` or `allwoscalar() do ... end`. Several functions in the existing API invoke scalar indexing and are intended for testing (e.g., the `==` operator). +Accessing elements of an NDArray one at a time (e.g., `arr[5]`) is slow and should be avoided. Indexing like this requires data to be trasfered between device and host and maybe even communicated across nodes. Scalar indexing will emit an error which can be opted out of with `@allowscalar` or `allwoscalar() do ... end`. Several functions in the existing API invoke scalar indexing and are intended for testing (e.g., the `==` operator). ## Avoid Implicit Promotion -Mixing integral types of different size (e.g., `Float64` and `Float32`) will result in implicit promotion of the smaller type to the larger types. This creates a copy of the data and hurts performance. Implicit promotion from a smaller integral type to a larger integral type will emit an error which can be opted out of with `@allowpromotion` or `allowpromotion() do ... end`. This error is common when mixing literals with `NDArrays`. By default a floating point literal (i.e., 1.0) is `Float64` but the default type of an `NDArray` is `Float32`. +Mixing integral types of different size (e.g., `Float64` and `Float32`) will result in implicit promotion of the smaller type to the larger types. This creates a copy of the data and hurts performance. Implicit promotion from a smaller integral type to a larger integral type will emit an error which can be opted out of with `@allowpromotion` or `allowpromotion() do ... end`. This error is common when mixing literals with `NDArrays`. By default a floating point literal (i.e., 1.0) is `Float64` but the default type of an `NDArray` is `Float32`. ## Kernel Fusion -cuPyNumeric does not fuse independent operations automatically, even in broadcast expressions. This is a priority for a future release. \ No newline at end of file +cuPyNumeric does not fuse independent operations automatically, even in broadcast expressions. This is a priority for a future release. diff --git a/docs/src/usage.md b/docs/src/usage.md index ee4af5ea..464e2f80 100644 --- a/docs/src/usage.md +++ b/docs/src/usage.md @@ -9,4 +9,4 @@ There is no programatic way to set the hardware configuration used by CuPyNumeri These variables must be set before launching the Julia instance running cuNumeric.jl. We recommend setting `export LEGATE_SHOW_CONFIG=1` so that the hardware configuration will be printed when Legate starts. This output is automatically captured and relayed to the user. -To manually set the hardware configuration, `export LEGATE_AUTO_CONFIG=0`, and then define your own config with something like `export LEGATE_CONFIG="--gpus 1 --cpus 10 --ompthreads 10"`. We recommend using the default memory configuration for your machine and only settings the `gpus`, `cpus` and `ompthreads`. More details about the Legate configuration can be found in the [NVIDIA Legate documentation](https://docs.nvidia.com/legate/latest/usage.html#resource-allocation). If you know where Legate is installed on your computer you can also run `legate --help` for more detailed information. \ No newline at end of file +To manually set the hardware configuration, `export LEGATE_AUTO_CONFIG=0`, and then define your own config with something like `export LEGATE_CONFIG="--gpus 1 --cpus 10 --ompthreads 10"`. We recommend using the default memory configuration for your machine and only settings the `gpus`, `cpus` and `ompthreads`. More details about the Legate configuration can be found in the [NVIDIA Legate documentation](https://docs.nvidia.com/legate/latest/usage.html#resource-allocation). If you know where Legate is installed on your computer you can also run `legate --help` for more detailed information. diff --git a/examples/custom_cuda.jl b/examples/custom_cuda.jl index 18675c4f..6092642b 100644 --- a/examples/custom_cuda.jl +++ b/examples/custom_cuda.jl @@ -11,7 +11,6 @@ function kernel_add(a, b, c, N) return nothing end - function kernel_sin(a, b, N) i = (blockIdx().x - 1i32) * blockDim().x + threadIdx().x if i <= N diff --git a/examples/gray-scott.jl b/examples/gray-scott.jl index 319e9fd2..f5865705 100644 --- a/examples/gray-scott.jl +++ b/examples/gray-scott.jl @@ -15,32 +15,48 @@ struct Params{T} end function bc!(u_new, v_new, u, v) - u_new[:,1] = u[:,end-1] - u_new[:,end] = u[:,2] - u_new[1,:] = u[end-1,:] - u_new[end,:] = u[2,:] - v_new[:,1] = v[:,end-1] - v_new[:,end] = v[:,2] - v_new[1,:] = v[end-1,:] - v_new[end,:] = v[2,:] + u_new[:, 1] = u[:, end - 1] + u_new[:, end] = u[:, 2] + u_new[1, :] = u[end - 1, :] + u_new[end, :] = u[2, :] + v_new[:, 1] = v[:, end - 1] + v_new[:, end] = v[:, 2] + v_new[1, :] = v[end - 1, :] + v_new[end, :] = v[2, :] end function step!(u, v, u_new, v_new, args::Params) # calculate F_u and F_v functions - F_u = ((-u[2:end-1, 2:end-1].*(v[2:end-1, 2:end-1] .^ 2)) .+ - args.f*(1.0f0 .- u[2:end-1, 2:end-1])) - F_v = ((u[2:end-1, 2:end-1].*(v[2:end-1, 2:end-1] .^ 2)) .- - (args.f+args.k).*v[2:end-1, 2:end-1]) + F_u = ( + (-u[2:(end - 1), 2:(end - 1)] .* (v[2:(end - 1), 2:(end - 1)] .^ 2)) .+ + args.f*(1.0f0 .- u[2:(end - 1), 2:(end - 1)]) + ) + F_v = ( + (u[2:(end - 1), 2:(end - 1)] .* (v[2:(end - 1), 2:(end - 1)] .^ 2)) .- + (args.f+args.k) .* v[2:(end - 1), 2:(end - 1)] + ) # 2-D Laplacian of f using array slicing, excluding boundaries # For an N x N array f, f_lap is the Nend x Nend array in the "middle" - u_lap = ((u[3:end, 2:end-1] - 2*u[2:end-1, 2:end-1] + u[1:end-2, 2:end-1]) ./ args.dx^2 - + (u[2:end-1, 3:end] - 2*u[2:end-1, 2:end-1] + u[2:end-1, 1:end-2]) ./ args.dx^2) - v_lap = ((v[3:end, 2:end-1] - 2*v[2:end-1, 2:end-1] + v[1:end-2, 2:end-1]) ./ args.dx^2 - + (v[2:end-1, 3:end] - 2*v[2:end-1, 2:end-1] + v[2:end-1, 1:end-2]) ./ args.dx^2) + u_lap = ( + (u[3:end, 2:(end - 1)] - 2*u[2:(end - 1), 2:(end - 1)] + u[1:(end - 2), 2:(end - 1)]) ./ + args.dx^2 + + + (u[2:(end - 1), 3:end] - 2*u[2:(end - 1), 2:(end - 1)] + u[2:(end - 1), 1:(end - 2)]) ./ + args.dx^2 + ) + v_lap = ( + (v[3:end, 2:(end - 1)] - 2*v[2:(end - 1), 2:(end - 1)] + v[1:(end - 2), 2:(end - 1)]) ./ + args.dx^2 + + + (v[2:(end - 1), 3:end] - 2*v[2:(end - 1), 2:(end - 1)] + v[2:(end - 1), 1:(end - 2)]) ./ + args.dx^2 + ) # Forward-Euler time step for all points except the boundaries - u_new[2:end-1, 2:end-1] = ((args.c_u * u_lap) + F_u) * args.dt + u[2:end-1, 2:end-1] - v_new[2:end-1, 2:end-1] = ((args.c_v * v_lap) + F_v) * args.dt + v[2:end-1, 2:end-1] + u_new[2:(end - 1), 2:(end - 1)] = + ((args.c_u * u_lap) + F_u) * args.dt + u[2:(end - 1), 2:(end - 1)] + v_new[2:(end - 1), 2:(end - 1)] = + ((args.c_v * v_lap) + F_v) * args.dt + v[2:(end - 1), 2:(end - 1)] # Apply periodic boundary conditions bc!(u_new, v_new, u, v) @@ -62,13 +78,13 @@ function gray_scott() u_new = cuNumeric.zeros(dims) v_new = cuNumeric.zeros(dims) - u[1:15,1:15] = cuNumeric.rand(15,15) - v[1:15,1:15] = cuNumeric.rand(15,15) + u[1:15, 1:15] = cuNumeric.rand(15, 15) + v[1:15, 1:15] = cuNumeric.rand(15, 15) for n in 1:n_steps step!(u, v, u_new, v_new, args) - # update u and v - # this doesn't copy, this switching references + # update u and v + # this doesn't copy, this switching references u, u_new = u_new, u v, v_new = v_new, v @@ -80,7 +96,6 @@ function gray_scott() end # gif(anim, "gray-scott.gif", fps=10) return u, v - end -u, v = gray_scott() \ No newline at end of file +u, v = gray_scott() diff --git a/examples/gray-scott.py b/examples/gray-scott.py index 3ea7c7a6..dce4e6ca 100644 --- a/examples/gray-scott.py +++ b/examples/gray-scott.py @@ -8,18 +8,18 @@ # import matplotlib.pyplot as plt def greyScottSys(u, v, dx, dt, c_u, c_v, f, k): - # u,v are arrays + # u,v are arrays # dx,dt are space and time steps # c_u, c_v, f, k are constant paramaters - + #create new u array u_new = np.zeros_like(u) v_new = np.zeros_like(v) - + #calculate F_u and F_v functions F_u = (-u[1:-1,1:-1]*(v[1:-1,1:-1]**2)) + f*(1-u[1:-1,1:-1]) F_v = (u[1:-1,1:-1]*(v[1:-1,1:-1]**2)) - (f+k)*v[1:-1,1:-1] - + # 2-D Laplacian of f using array slicing, excluding boundaries # For an N x N array f, f_lap is the N-1 x N-1 array in the "middle" u_lap = (u[2:,1:-1] - 2*u[1:-1,1:-1] + u[:-2,1:-1]) / dx**2\ @@ -69,10 +69,10 @@ def greyScottSys(u, v, dx, dt, c_u, c_v, f, k): # build a list of images for n in range(n_steps) : - + ## This may need to be changed. u,v = greyScottSys(u, v, dx, dt, c_u, c_v, f, k) - + # ## Store frames when n is a multiple of frame_interval # if n%frame_interval == 0: # im = plt.imshow(u, vmin=0, vmax=1) # Show a plot of u. diff --git a/examples/integrate.jl b/examples/integrate.jl index 0c941de4..fd03436e 100644 --- a/examples/integrate.jl +++ b/examples/integrate.jl @@ -3,7 +3,7 @@ using cuNumeric # Note that we do not yet support broadcasting # custom functions, so the braodcasting MUST # be done inside the function -integrand = (x) -> exp.(-x.^2) +integrand = (x) -> exp.(-x .^ 2) N = 1_000_000 @@ -11,11 +11,11 @@ x_max = 10.0f0 domain = [-x_max, x_max] Ω = domain[2] - domain[1] -samples = Ω*cuNumeric.rand(N) .- x_max +samples = Ω*cuNumeric.rand(N) .- x_max -# Reductions return 0D NDArrays instead +# Reductions return 0D NDArrays instead # of a scalar to avoid blocking runtime estimate = (Ω/N) * sum(integrand(samples)) println("Monte-Carlo Estimate: $(estimate)") -println("Analytical: $(sqrt(pi))") \ No newline at end of file +println("Analytical: $(sqrt(pi))") diff --git a/ext/CUDAExt/CUDAExt.jl b/ext/CUDAExt/CUDAExt.jl index ad1381a2..e991aac6 100644 --- a/ext/CUDAExt/CUDAExt.jl +++ b/ext/CUDAExt/CUDAExt.jl @@ -3,7 +3,7 @@ module CUDAExt using Random using CUDA using Legate: Legate -import CxxWrap +using CxxWrap: CxxWrap using cuNumeric: cuNumeric import cuNumeric: @cuda_task, @launch, NDArray diff --git a/lib/cunumeric_jl_wrapper/CMakeLists.txt b/lib/cunumeric_jl_wrapper/CMakeLists.txt index 87848a77..2da543ab 100644 --- a/lib/cunumeric_jl_wrapper/CMakeLists.txt +++ b/lib/cunumeric_jl_wrapper/CMakeLists.txt @@ -95,4 +95,4 @@ if(HAVE_CUDA) target_include_directories(${C_INTERFACE_LIB} PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) endif() -install(TARGETS ${C_INTERFACE_LIB} DESTINATION lib) \ No newline at end of file +install(TARGETS ${C_INTERFACE_LIB} DESTINATION lib) diff --git a/lib/cunumeric_jl_wrapper/VERSION b/lib/cunumeric_jl_wrapper/VERSION index 3252f073..f07e60ca 100644 --- a/lib/cunumeric_jl_wrapper/VERSION +++ b/lib/cunumeric_jl_wrapper/VERSION @@ -1 +1 @@ -25.10.3 \ No newline at end of file +25.10.3 diff --git a/lib/cunumeric_jl_wrapper/src/memory.cpp b/lib/cunumeric_jl_wrapper/src/memory.cpp index 3e346977..9efe0fb6 100644 --- a/lib/cunumeric_jl_wrapper/src/memory.cpp +++ b/lib/cunumeric_jl_wrapper/src/memory.cpp @@ -129,4 +129,4 @@ uint64_t nda_query_total_host_memory() { Realm::Memory::SYSTEM_MEM); } -} // extern "C" \ No newline at end of file +} // extern "C" diff --git a/lib/cunumeric_jl_wrapper/src/types.cpp b/lib/cunumeric_jl_wrapper/src/types.cpp index 940aa5d7..2e73ebd5 100644 --- a/lib/cunumeric_jl_wrapper/src/types.cpp +++ b/lib/cunumeric_jl_wrapper/src/types.cpp @@ -161,4 +161,4 @@ void wrap_binary_ops(jlcxx::Module& mod) { CuPyNumericBinaryOpCode::CUPYNUMERIC_BINOP_RIGHT_SHIFT); mod.set_const("SUBTRACT", CuPyNumericBinaryOpCode::CUPYNUMERIC_BINOP_SUBTRACT); -} \ No newline at end of file +} diff --git a/scripts/README.md b/scripts/README.md index 14a37b4f..4fa1d0c4 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,14 +1,14 @@ ## Scripts -After initializing the git submodules, we need to install them. -- `install_cxxwrap.sh` WARNING: This will overwrite `/home/user/.julia/dev/libcxxwrap_julia_jll/override`. +After initializing the git submodules, we need to install them. +- `install_cxxwrap.sh` WARNING: This will overwrite `/home/user/.julia/dev/libcxxwrap_julia_jll/override`. We struggled compiling the wrapper package with g++ `(Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0` and CUDA `cuda_12.3.r12.3/compiler.33567101_0`. -The error is shown below. This seems to have mismatch with the atomic ref C++ versioning; however, we weren't able to pinpoint the exact issue. We have patched legion with `legion_redop.inl`. This is a temporary "hack" solution. +The error is shown below. This seems to have mismatch with the atomic ref C++ versioning; however, we weren't able to pinpoint the exact issue. We have patched legion with `legion_redop.inl`. This is a temporary "hack" solution. ``` In file included from /usr/include/c++/11/bits/shared_ptr_atomic.h:33, @@ -31,7 +31,7 @@ gmake: *** [Makefile:136: all] Error 2 ``` -This error was shown for each reduction operator in `legion_redop.inl` for complex types. The unmodified code of the Sum reduction is below. We would enter `#if defined(__cpp_lib_atomic_ref) && (__cpp_lib_atomic_ref >= 201806L)`; however, the compiler failed the static assert when `std::atomic_ref atomic(lhs);` was used. We have added a patch `patch_legion.sh` for the various default reductions where it does the TypePunning case. This will copy our patched file into the conda installed path of cupynumeric/legion. +This error was shown for each reduction operator in `legion_redop.inl` for complex types. The unmodified code of the Sum reduction is below. We would enter `#if defined(__cpp_lib_atomic_ref) && (__cpp_lib_atomic_ref >= 201806L)`; however, the compiler failed the static assert when `std::atomic_ref atomic(lhs);` was used. We have added a patch `patch_legion.sh` for the various default reductions where it does the TypePunning case. This will copy our patched file into the conda installed path of cupynumeric/legion. ``` #ifdef LEGION_REDOP_COMPLEX @@ -75,7 +75,3 @@ This error was shown for each reduction operator in `legion_redop.inl` for compl #endif #endif ``` - - - - diff --git a/scripts/build_cpp_wrapper.sh b/scripts/build_cpp_wrapper.sh index baacc85e..aae6f432 100755 --- a/scripts/build_cpp_wrapper.sh +++ b/scripts/build_cpp_wrapper.sh @@ -33,11 +33,11 @@ CUNUMERIC_WRAPPER_SOURCE=$CUNUMERICJL_ROOT_DIR/lib/cunumeric_jl_wrapper BUILD_DIR=$CUNUMERIC_WRAPPER_SOURCE/build if [[ ! -d "$BUILD_DIR" ]]; then - mkdir -p $BUILD_DIR + mkdir -p $BUILD_DIR fi if [[ ! -d "$INSTALL_DIR" ]]; then - mkdir -p $INSTALL_DIR + mkdir -p $INSTALL_DIR fi echo $LEGATE_ROOT_DIR diff --git a/scripts/install_cxxwrap.sh b/scripts/install_cxxwrap.sh index 370ebdb5..ba430d50 100755 --- a/scripts/install_cxxwrap.sh +++ b/scripts/install_cxxwrap.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2025 Northwestern University, +# Copyright 2025 Northwestern University, # Carnegie Mellon University University # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -79,9 +79,9 @@ julia -e 'using Pkg; Pkg.activate("."); Pkg.develop(PackageSpec(name="libcxxwrap # JULIA_CXXWRAP_OVERRIDE=$JULIA_CXXWRAP/override/ # Delete the default JLL installation of cxxwrap_julia -rm -rf $JULIA_CXXWRAP +rm -rf $JULIA_CXXWRAP mkdir $JULIA_CXXWRAP cmake -S $JULIA_CXXWRAP_SRC -B $JULIA_CXXWRAP -DJulia_EXECUTABLE=$JULIA_PATH -DCMAKE_BUILD_TYPE=Release cd $JULIA_CXXWRAP -make -j 16 \ No newline at end of file +make -j 16 diff --git a/scripts/test_container.sh b/scripts/test_container.sh index 8ccf1b01..e4319138 100755 --- a/scripts/test_container.sh +++ b/scripts/test_container.sh @@ -4,4 +4,4 @@ export LEGATE_AUTO_CONFIG=0 export LEGATE_SHOW_CONFIG=1 export LEGATE_CONFIG="--cpus 1 --utility 1 --sysmem 4000" -julia -e 'using Pkg; Pkg.test("cuNumeric")' \ No newline at end of file +julia -e 'using Pkg; Pkg.test("cuNumeric")' diff --git a/src/cuNumeric.jl b/src/cuNumeric.jl index 0e5c15d1..e9407207 100644 --- a/src/cuNumeric.jl +++ b/src/cuNumeric.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -155,7 +155,7 @@ include("ndarray/binary.jl") # scoping macro include("scoping.jl") -# Utilities +# Utilities include("utilities/version.jl") include("utilities/cuda_stubs.jl") include("util.jl") @@ -199,7 +199,7 @@ function _start_runtime() # AA = ArgcArgv([Base.julia_cmd()[1]]) cuNumeric.initialize_cunumeric(AA.argc, getargv(AA)) - # setup /src/memory.jl + # setup /src/memory.jl cuNumeric.init_gc!() Base.atexit(my_on_exit) diff --git a/src/ndarray/binary.jl b/src/ndarray/binary.jl index a8715240..125c0df1 100644 --- a/src/ndarray/binary.jl +++ b/src/ndarray/binary.jl @@ -15,7 +15,7 @@ The following binary operations are supported and can be applied elementwise to • `>=` • `==` • `!=` - • `atan` + • `atan` • `hypot` • `max` • `min` @@ -40,7 +40,7 @@ A .^ 2 """ # Still missing: -# # Base.copysign => cuNumeric.COPYSIGN, #* ANNOYING TO TEST +# # Base.copysign => cuNumeric.COPYSIGN, #* ANNOYING TO TEST # #missing => cuNumeric.fmod, #same as mod in Julia? # # Base.isapprox => cuNumeric.ISCLOSE, #* HANDLE rtol, atol kwargs!!! # # Base.ldexp => cuNumeric.LDEXP, #* LHS FLOATS, RHS INTS @@ -197,7 +197,7 @@ for (julia_fn, op_code) in binary_op_map end # Some functions always return floats even when given integers -# in the case where the output is determined to be float, but +# in the case where the output is determined to be float, but # the input is integer, we first promote the input to float. for (julia_fn, op_code) in floaty_binary_op_map @eval begin diff --git a/src/ndarray/broadcast.jl b/src/ndarray/broadcast.jl index 50524343..fcf0a3e5 100644 --- a/src/ndarray/broadcast.jl +++ b/src/ndarray/broadcast.jl @@ -47,8 +47,8 @@ function __broadcast(f::Function, _, args...) ) end -# Get depth of Broadcast tree recursively -# Need to call instantiate first +# Get depth of Broadcast tree recursively +# Need to call instantiate first bcast_depth(bc::Base.Broadcast.Broadcasted) = maximum(bcast_depth, bc.args, init=0) + 1; bcast_depth(::Any) = 0 @@ -60,7 +60,7 @@ function Base.Broadcast.materialize(bc::Broadcasted{<:NDArrayStyle}) #* This be the place to inject kernel fusion via CUDA.jl #* Use the function in Base.Broadcast.flatten(bc). - #* How can we check all the funcs in this expr + #* How can we check all the funcs in this expr #* are supported by CUDA? return unravel_broadcast_tree(bc) @@ -89,7 +89,7 @@ function unravel_broadcast_tree(bc::Broadcasted) # Recursively materialize/unravel any nested broadcasts # until we reach a Broadcasted expression with only # NDArray or scalar arguments. - # This is the OPPOSITE of kernel fusion + # This is the OPPOSITE of kernel fusion materialized_args = __materialize.(bc.args) # Handle type promotion @@ -102,14 +102,14 @@ function unravel_broadcast_tree(bc::Broadcasted) out = similar(NDArray{T_OUT}, axes(bc)) # If the operation, "bc.f", is supported by cuNumeric, this - # dispatches to a function calling the C-API. + # dispatches to a function calling the C-API. # If not it falls back to a pass-through that just calls # the Julia function and assumes the user defined a function - # composed of supported operations. + # composed of supported operations. return __broadcast(bc.f, out, in_args...) end -# Support .= +# Support .= function Base.copyto!(dest::NDArray{T,N}, bc::Broadcasted{<:NDArrayStyle{N}}) where {T,N} # Moves result from broadcast (src) to dest. src array is no longer valid #! THIS ENABLES FOOT GUN IF USER SPECIFIES INTEGER ARRAY AT OUTPUT diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index 7186baa6..bb045a43 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -30,7 +30,7 @@ abstract type AbstractNDArray{T<:SUPPORTED_TYPES,N} end @doc""" The NDArray type represents a multi-dimensional array in cuNumeric. -It is a wrapper around a Legate array and provides various methods for array manipulation and operations. +It is a wrapper around a Legate array and provides various methods for array manipulation and operations. Finalizer calls `nda_destroy_array` to clean up the underlying Legate array when the NDArray is garbage collected. """ mutable struct NDArray{T,N,PADDED} <: AbstractNDArray{T,N} @@ -81,7 +81,7 @@ NDArray(ptr::NDArray_t; T=get_julia_type(ptr), N::Integer=get_n_dim(ptr)) = NDAr NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array((), value) -# construction +# construction function nda_zeros_array(dims::Dims{N}, ::Type{T}) where {T,N} shape = collect(UInt64, dims) legate_type = Legate.to_legate_type(T) @@ -194,7 +194,7 @@ function nda_move(dst::NDArray{T,N}, src::NDArray{T,N}) where {T,N} register_free!(dst.nbytes) end -# operations +# operations function nda_binary_op!(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray) ccall((:nda_binary_op, libnda), Cvoid, (NDArray_t, BinaryOpCode, NDArray_t, NDArray_t), diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 21bfbd31..c3764ac7 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -94,7 +94,7 @@ Base.copy(arr::NDArray) = nda_copy(arr) Assign the contents of `other` to `arr` element-wise. -This function overwrites the data in `arr` with the values from `other`. +This function overwrites the data in `arr` with the values from `other`. Both arrays must have the same shape. # Examples @@ -215,7 +215,7 @@ size(arr) size(arr, 2) ``` """ -Base.size(arr::NDArray{<:Any, N}) where N = cuNumeric.shape(arr) +Base.size(arr::NDArray{<:Any,N}) where {N} = cuNumeric.shape(arr) Base.size(arr::NDArray, dim::Int) = Base.size(arr)[dim] @doc""" @@ -290,7 +290,7 @@ end Overloads `Base.getindex` and `Base.setindex!` to support multidimensional indexing and slicing on `cuNumeric.NDArray`s. -Slicing supports combinations of `Int`, `UnitRange`, and `Colon()` for selecting ranges of rows and columns. +Slicing supports combinations of `Int`, `UnitRange`, and `Colon()` for selecting ranges of rows and columns. The use of all colons (`arr[:]`, `arr[:, :]`, etc.) returns a new Julia `Array` containing a copy of the data. Assignment also supports: @@ -511,7 +511,6 @@ falses(dims::Dims) = cuNumeric.fill(false, dims) falses(dims::Int...) = cuNumeric.fill(false, dims) falses(dim::Int) = cuNumeric.fill(false, dim) - @doc""" cuNumeric.zeros([T=Float32,] dims::Int...) cuNumeric.zeros([T=Float32,] dims::Tuple) @@ -526,7 +525,7 @@ cuNumeric.zeros(Float64, 3) cuNumeric.zeros(Int32, (2,3)) ``` """ -function zeros(::Type{T}, dims::Dims{N}) where {T<:SUPPORTED_TYPES, N} +function zeros(::Type{T}, dims::Dims{N}) where {T<:SUPPORTED_TYPES,N} return nda_zeros_array(dims, T) end @@ -614,7 +613,9 @@ A = cuNumeric.zeros(2, 2); cuNumeric.rand!(A) ``` """ Random.rand!(arr::NDArray{Float64}) = cuNumeric.nda_random(arr, 0) -Random.rand!(arr::NDArray{T}) where T = error("rand! only supports NDArray{Float64} for now. Cast with cuNumeric.as_type.") +function Random.rand!(arr::NDArray{T}) where {T} + error("rand! only supports NDArray{Float64} for now. Cast with cuNumeric.as_type.") +end function rand(::Type{T}, dims::Dims) where {T<:AbstractFloat} arrfp64 = cuNumeric.nda_random_array(dims) @@ -648,7 +649,7 @@ end #*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG function reshape(arr::NDArray, i::Int...; copy::Bool=false) - return reshape(arr, i; copy = copy) + return reshape(arr, i; copy=copy) end # Ignore the scalar indexing here... diff --git a/src/ndarray/unary.jl b/src/ndarray/unary.jl index 3b12e73d..6f9e22ca 100644 --- a/src/ndarray/unary.jl +++ b/src/ndarray/unary.jl @@ -100,7 +100,7 @@ global const unary_op_map_no_args = Dict{Function,UnaryOpCode}( ### SPECIAL CASES ### -# Needed to support != +# Needed to support != Base.:(!)(input::NDArray{Bool,0}) = nda_unary_op!(similar(input), cuNumeric.LOGICAL_NOT, input) Base.:(!)(input::NDArray{Bool,1}) = nda_unary_op!(similar(input), cuNumeric.LOGICAL_NOT, input) @@ -165,7 +165,7 @@ for (julia_fn, op_code) in unary_op_map_no_args end # Some functions always return floats even when given integers -# in the case where the output is determined to be float, but +# in the case where the output is determined to be float, but # the input is integer, we first promote the input to float. for (julia_fn, op_code) in floaty_unary_ops_no_args @eval begin @@ -190,7 +190,7 @@ end # Base.clamp => Int(cuNumeric.CLIP), #* HAS EXTRA ARGS # Base.floor => cuNumeric.FLOOR, #! Doesnt support Bool, I do not feel like dealing with this right now... # Base.trunc => Int(cuNumeric.TRUNC) #* HAS EXTRA ARGS -# missing => Int(cuNumeric.RINT), #figure out which version of round +# missing => Int(cuNumeric.RINT), #figure out which version of round # missing => Int(cuNumeric.ROUND), #figure out which version of round # ) diff --git a/src/scoping.jl b/src/scoping.jl index 9caf4777..ecac81ca 100644 --- a/src/scoping.jl +++ b/src/scoping.jl @@ -5,9 +5,9 @@ export @cunumeric @doc""" @cunumeric expr -Wraps a block of code so that all temporary `NDArray` allocations -(e.g. from slicing or function calls) are tracked and safely freed -at the end of the block. Ensures proper cleanup of GPU memory by +Wraps a block of code so that all temporary `NDArray` allocations +(e.g. from slicing or function calls) are tracked and safely freed +at the end of the block. Ensures proper cleanup of GPU memory by inserting `maybe_insert_delete` calls automatically. """ macro cunumeric(block) diff --git a/src/utilities/cuda_stubs.jl b/src/utilities/cuda_stubs.jl index b824c059..caead31a 100644 --- a/src/utilities/cuda_stubs.jl +++ b/src/utilities/cuda_stubs.jl @@ -1,5 +1,5 @@ ## This file contains stubs for methods implemented in -## the CUDA package extensions not implemented +## the CUDA package extensions not implemented ## elsewhere in the package. export @cuda_task, @launch @@ -17,32 +17,32 @@ end """ @cuda_task(f(args...)) -Compile a Julia GPU kernel to PTX, register it with the Legate runtime, +Compile a Julia GPU kernel to PTX, register it with the Legate runtime, and return a `CUDATask` object for later launch. # Arguments - `f` — The name of the Julia CUDA.jl GPU kernel function to compile. -- `args...` — Example arguments to the kernel, used to determine the +- `args...` — Example arguments to the kernel, used to determine the argument type signature when generating PTX. # Description This macro automates the process of: -1. Inferring the CUDA argument types for the given `args` using +1. Inferring the CUDA argument types for the given `args` using `map_ndarray_cuda_types`. -2. Using `CUDA.code_ptx` to compile the specified GPU kernel +2. Using `CUDA.code_ptx` to compile the specified GPU kernel (`f`) into raw PTX text for the inferred types. -3. Extracting the kernel's function symbol name from the PTX using +3. Extracting the kernel's function symbol name from the PTX using `extract_kernel_name`. -4. Registering the compiled PTX and kernel name with the Legate runtime +4. Registering the compiled PTX and kernel name with the Legate runtime via `ptx_task`, making it available for GPU execution. 5. Returning a `CUDATask` struct that stores the kernel name and type signature, which can be used to configure and launch the kernel later. # Notes - The `args...` are not executed; they are used solely for type inference. -- This macro is intended for use with the Legate runtime and +- This macro is intended for use with the Legate runtime and assumes a CUDA context is available. -- Make sure your kernel code is GPU-compatible and does not rely on +- Make sure your kernel code is GPU-compatible and does not rely on unsupported Julia features. # Example @@ -58,7 +58,7 @@ macro cuda_task end Launch a GPU kernel (previously registered via [`@cuda_task`](@ref)) through the Legate runtime. # Keywords -- `task` — A `CUDATask` object, typically returned by [`@cuda_task`](@ref). +- `task` — A `CUDATask` object, typically returned by [`@cuda_task`](@ref). - `blocks` — Tuple or single element specifying the CUDA grid dimensions. Defaults to `(1,)`. - `threads` — Tuple or single element specifying the CUDA block dimensions. Defaults to `(256,)`. - `inputs` — Tuple or single element of input NDArray objects. @@ -66,20 +66,20 @@ Launch a GPU kernel (previously registered via [`@cuda_task`](@ref)) through th - `scalars` — Tuple or single element of scalar values. # Description -The `@launch` macro validates the provided keywords, ensuring only -the allowed set (`:task`, `:blocks`, `:threads`, `:inputs`, `:outputs`, `:scalars`) -are present. It then expands to a call to `cuNumeric.launch`, +The `@launch` macro validates the provided keywords, ensuring only +the allowed set (`:task`, `:blocks`, `:threads`, `:inputs`, `:outputs`, `:scalars`) +are present. It then expands to a call to `cuNumeric.launch`, passing the given arguments to the Legate runtime for execution. -This macro is meant to provide a concise, declarative syntax for -launching GPU kernels, separating kernel compilation (via `@cuda_task`) +This macro is meant to provide a concise, declarative syntax for +launching GPU kernels, separating kernel compilation (via `@cuda_task`) from execution configuration. # Notes - `task` **must** be a kernel registered with the runtime, usually from `@cuda_task`. - All keyword arguments must be specified as assignments, e.g. `blocks=(2,2)` not positional arguments. - Defaults are chosen for single-block, 256-thread 1D launches. -- The macro escapes its body so that the values of inputs/outputs/scalars are captured +- The macro escapes its body so that the values of inputs/outputs/scalars are captured from the surrounding scope at macro expansion time. # Example diff --git a/src/utilities/preference.jl b/src/utilities/preference.jl index 2342efe9..bb2effb9 100644 --- a/src/utilities/preference.jl +++ b/src/utilities/preference.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/utilities/version.jl b/src/utilities/version.jl index 08802867..b2cf79fa 100644 --- a/src/utilities/version.jl +++ b/src/utilities/version.jl @@ -26,7 +26,6 @@ Prints the cuNumeric build configuration summary, including package metadata, Julia and compiler version, and paths to core dependencies. """ function versioninfo(io::IO=stdout) - name = string(Base.nameof(@__MODULE__)) version = string(Base.pkgversion(cuNumeric)) compiler = get_cxx_version(CUPYNUMERIC_LIB_PATH) @@ -80,7 +79,7 @@ function versioninfo(io::IO=stdout) Wrappers: cuNumeric $(CUPYNUMERIC_WRAPPER_LIBDIR) Legate $liblegatewrapper - + Modes: cuNumeric: $(CNPreferences.MODE) Legate: $(LegatePreferences.MODE) diff --git a/src/warnings.jl b/src/warnings.jl index 4d2e0b74..c993571e 100644 --- a/src/warnings.jl +++ b/src/warnings.jl @@ -10,7 +10,6 @@ export allowpromotion, @allowpromotion, assertpromotion, allowscalar, @allowscal const requested_scalar_indexing = Ref{Union{Nothing,ScalarIndexing}}(nothing) const requested_implicit_promotion = Ref{Union{Nothing,ImplicitPromotion}}(nothing) - const _repl_frontend_task = Ref{Union{Nothing,Missing,Task}}() function repl_frontend_task() if !isassigned(_repl_frontend_task) @@ -50,7 +49,6 @@ end default_implicit_promotion() = PromotionDisallowed - """ assertscalar(op::String) @@ -70,7 +68,7 @@ function assertscalar(op::String) behavior = behavior::ScalarIndexing if behavior === ScalarAllowed # fast path - return + return nothing end _assertscalar(op, behavior) @@ -82,7 +80,7 @@ end Assert that a certain operation `op` performs promotion to a wider type. If this is not allowed, an error will be thrown ([`assertpromotion`](@ref)). """ -function assertpromotion(op, ::Type{FROM}, ::Type{TO}) where {FROM, TO} +function assertpromotion(op, ::Type{FROM}, ::Type{TO}) where {FROM,TO} behavior = get(task_local_storage(), :ImplicitPromotion, nothing) if behavior === nothing behavior = requested_implicit_promotion[] @@ -95,7 +93,7 @@ function assertpromotion(op, ::Type{FROM}, ::Type{TO}) where {FROM, TO} behavior = behavior::ImplicitPromotion if behavior === PromotionAllowed # fast path - return + return nothing end _assertpromotion(op, behavior, FROM, TO) @@ -109,10 +107,10 @@ end task_local_storage(:ScalarIndexing, ScalarWarned) end - return + return nothing end -@noinline function _assertpromotion(op, behavior, ::Type{FROM}, ::Type{TO}) where {FROM, TO} +@noinline function _assertpromotion(op, behavior, ::Type{FROM}, ::Type{TO}) where {FROM,TO} if behavior == PromotionDisallowed errordouble(op, FROM, TO) elseif behavior == PromotionWarn @@ -120,7 +118,7 @@ end task_local_storage(:ImplicitPromotion, PromotionWarned) end - return + return nothing end function scalardesc(op) @@ -132,9 +130,9 @@ function scalardesc(op) to enable scalar iteration globally or for the operations in question.""" end -function promotiondesc(op, ::Type{FROM}, ::Type{TO}) where {FROM, TO} - desc = """Invocation of $op resulted in implicit promotion of an NDArray from $(FROM) to - wider type: $(TO). This is typically caused by mixing NDArrays or literals +function promotiondesc(op, ::Type{FROM}, ::Type{TO}) where {FROM,TO} + desc = """Invocation of $op resulted in implicit promotion of an NDArray from $(FROM) to + wider type: $(TO). This is typically caused by mixing NDArrays or literals with different precision. This can cause extra copies of data and is slow. If you want to allow implicit promotion to wider types, use `allowpromotion` or `@allowpromotion` @@ -147,7 +145,7 @@ end $desc""") end -@noinline function warnsdouble(op, ::Type{FROM}, ::Type{TO}) where {FROM, TO} +@noinline function warnsdouble(op, ::Type{FROM}, ::Type{TO}) where {FROM,TO} desc = promotiondesc(op, FROM, TO) @warn("""Promotiong data to wider type on task $(current_task()). $desc""") @@ -159,7 +157,7 @@ end $desc""") end -@noinline function errordouble(op, ::Type{FROM}, ::Type{TO}) where {FROM, TO} +@noinline function errordouble(op, ::Type{FROM}, ::Type{TO}) where {FROM,TO} desc = promotiondesc(op, FROM, TO) error("""Implicit promotion to wider type is disallowed. $desc""") @@ -170,9 +168,9 @@ end # this problem will be introduced in https://github.com/JuliaLang/julia/pull/39217 macro __tryfinally(ex, fin) Expr(:tryfinally, - :($(esc(ex))), - :($(esc(fin))) - ) + :($(esc(ex))), + :($(esc(fin))), + ) end """ @@ -200,7 +198,7 @@ function allowscalar(allow::Bool=true) setting = allow ? ScalarAllowed : ScalarDisallowed task_local_storage(:ScalarIndexing, setting) requested_scalar_indexing[] = setting - return + return nothing end """ @@ -219,7 +217,7 @@ allowpromotion function allowpromotion(f::Base.Callable, allow::Bool=true) setting = allow ? PromotionAllowed : PromotionDisallowed task_local_storage(f, :ImplicitPromotion, setting) - return + return nothing end function allowpromotion(allow::Bool=true) @@ -230,7 +228,7 @@ function allowpromotion(allow::Bool=true) setting = allow ? PromotionAllowed : PromotionDisallowed task_local_storage(:ImplicitPromotion, setting) requested_implicit_promotion[] = setting - return + return nothing end """ @@ -247,8 +245,11 @@ macro allowscalar(ex) local tls_value = get(task_local_storage(), :ScalarIndexing, nothing) task_local_storage(:ScalarIndexing, ScalarAllowed) @__tryfinally($(esc(ex)), - isnothing(tls_value) ? delete!(task_local_storage(), :ScalarIndexing) - : task_local_storage(:ScalarIndexing, tls_value)) + if isnothing(tls_value) + delete!(task_local_storage(), :ScalarIndexing) + else + task_local_storage(:ScalarIndexing, tls_value) + end) end end @@ -266,7 +267,10 @@ macro allowpromotion(ex) local tls_value = get(task_local_storage(), :ImplicitPromotion, nothing) task_local_storage(:ImplicitPromotion, PromotionAllowed) @__tryfinally($(esc(ex)), - isnothing(tls_value) ? delete!(task_local_storage(), :ImplicitPromotion) - : task_local_storage(:ImplicitPromotion, tls_value)) + if isnothing(tls_value) + delete!(task_local_storage(), :ImplicitPromotion) + else + task_local_storage(:ImplicitPromotion, tls_value) + end) end end diff --git a/test/runtests.jl b/test/runtests.jl index bad87b28..9e73ce71 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -#= Copyright 2025 Northwestern University, +#= Copyright 2025 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/tests/axpy.jl b/test/tests/axpy.jl index 4ab7a754..0b5452cb 100644 --- a/test/tests/axpy.jl +++ b/test/tests/axpy.jl @@ -1,4 +1,4 @@ -#= Copyright 2025 Northwestern University, +#= Copyright 2025 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,8 +18,8 @@ =# #= Purpose of test: daxpy - -- Focused on double 2 dimenional. Does not test other types or dims. - -- NDArray intialization + -- Focused on double 2 dimenional. Does not test other types or dims. + -- NDArray intialization -- NDArray writing and reading scalar indexing -- shows both [i, j] and [(i, j)] working -- NDArray addition and multiplication diff --git a/test/tests/axpy_advanced.jl b/test/tests/axpy_advanced.jl index fe87d513..d4a285e1 100644 --- a/test/tests/axpy_advanced.jl +++ b/test/tests/axpy_advanced.jl @@ -1,4 +1,4 @@ -#= Copyright 2025 Northwestern University, +#= Copyright 2025 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,10 +21,10 @@ -- add overloading support for [double/float scalar] * NDArray -- equavalence operator between a cuNumeric and Julia array without looping -- result == (α_cpu * x_cpu + y_cpu) - -- (α_cpu * x_cpu + y_cpu) == + -- (α_cpu * x_cpu + y_cpu) == -- NDArray copy method allocates a new NDArray and copies all elements -- NDArray assign method assigns the contents from one NDArray to another NDArray - -- x[:] colon notation for reading entire 1D NDArray to a Julia array + -- x[:] colon notation for reading entire 1D NDArray to a Julia array -- x[:, :] colon notation for reading entire 2D NDArray to a Julia array -- x[:, :] colon notation for filling entire 2D NDArray with scalar -- reshape method. we test a reshape from NxN to N*N @@ -111,7 +111,7 @@ function axpy_advanced(T, N) result = α .* x .+ y - # check results + # check results @test is_same(result, (α * x_cpu + y_cpu)) @test is_same(α * x_cpu + y_cpu, result) # LHS and RHS switched end diff --git a/test/tests/binary_tests.jl b/test/tests/binary_tests.jl index ea69da43..0af177d6 100644 --- a/test/tests/binary_tests.jl +++ b/test/tests/binary_tests.jl @@ -22,7 +22,7 @@ function test_binary_function_set(func_dict, T, N) @testset "$func" for func in keys(func_dict) - # This is tested separately + # This is tested separately func == Base.:(^) && continue (func in skip) && continue diff --git a/test/tests/cuda/vecadd.jl b/test/tests/cuda/vecadd.jl index 77aa9a24..ea01e7ab 100644 --- a/test/tests/cuda/vecadd.jl +++ b/test/tests/cuda/vecadd.jl @@ -116,7 +116,7 @@ function cuda_unaryop(max_diff) end task = cuNumeric.@cuda_task kernel_sin(a, b, UInt32(N)) - # TODO explore getting inplace ops working. + # TODO explore getting inplace ops working. cuNumeric.@launch task=task threads=threads blocks=blocks inputs=a outputs=b scalars=UInt32(N) @test @allowscalar cuNumeric.compare(b, b_cpu, atol(Float32), rtol(Float32)) diff --git a/test/tests/elementwise.jl b/test/tests/elementwise.jl index 0a378a5d..5f0d76bd 100644 --- a/test/tests/elementwise.jl +++ b/test/tests/elementwise.jl @@ -1,4 +1,4 @@ -#= Copyright 2025 Northwestern University, +#= Copyright 2025 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/tests/gemm.jl b/test/tests/gemm.jl index 57fdc12b..e0f47984 100644 --- a/test/tests/gemm.jl +++ b/test/tests/gemm.jl @@ -1,4 +1,4 @@ -#= Copyright 2025 Northwestern University, +#= Copyright 2025 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,20 +17,17 @@ * Ethan Meitz =# - - function gemm(N, M, T, max_diff) - if T == Bool - a = cuNumeric.trues(5,5) - b = cuNumeric.as_type(cuNumeric.trues(5,5), Float32) - c = cuNumeric.as_type(cuNumeric.trues(5,5), Float64) + a = cuNumeric.trues(5, 5) + b = cuNumeric.as_type(cuNumeric.trues(5, 5), Float32) + c = cuNumeric.as_type(cuNumeric.trues(5, 5), Float64) @test_throws ArgumentError a * a # Bool * Bool not supported @allowpromotion d = a * b @allowpromotion e = a * c @test @allowscalar cuNumeric.compare(5 * ones(Float32, 5, 5), d, 0.0, max_diff) @test @allowscalar cuNumeric.compare(5 * ones(Float64, 5, 5), e, 0.0, max_diff) - return + return nothing end if T <: Integer @@ -40,10 +37,10 @@ function gemm(N, M, T, max_diff) b_jl = ones(Float32, 5, 5) @test_throws ArgumentError a * a @test @allowscalar cuNumeric.compare(a_jl * b_jl, a * b, 0.0, max_diff) - return + return nothing end - dims_to_test = [(N,N), (N, M), (M, N)] + dims_to_test = [(N, N), (N, M), (M, N)] @testset for dims in dims_to_test # Base julia arrays @@ -54,7 +51,7 @@ function gemm(N, M, T, max_diff) # cunumeric arrays A = cuNumeric.zeros(T, dims[1], dims[2]) B = cuNumeric.zeros(T, dims[2], dims[1]) - C_out = cuNumeric.zeros(T, dims[1], dims[1]) + C_out = cuNumeric.zeros(T, dims[1], dims[1]) # Initialize NDArrays with random values # used in Julia arrays @@ -79,8 +76,8 @@ function gemm(N, M, T, max_diff) LinearAlgebra.mul!(C_out, A, B) allowscalar() do - @test isapprox(C, C_cpu, rtol = max_diff) - @test isapprox(C, C_out, rtol = max_diff) + @test isapprox(C, C_cpu, rtol=max_diff) + @test isapprox(C, C_out, rtol=max_diff) if T != Float64 C_wider = cuNumeric.zeros(Float64, dims[1], dims[1]) diff --git a/test/tests/linalg.jl b/test/tests/linalg.jl index 93bd6783..32a18100 100644 --- a/test/tests/linalg.jl +++ b/test/tests/linalg.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/tests/scoping-advanced.jl b/test/tests/scoping-advanced.jl index 781c28e7..e2cc412e 100644 --- a/test/tests/scoping-advanced.jl +++ b/test/tests/scoping-advanced.jl @@ -19,7 +19,7 @@ end function step(u, v, u_new, v_new, args::ParamsGS) @cunumeric begin # calculate F_u and F_v functions - # currently we don't have NDArray^x working yet. + # currently we don't have NDArray^x working yet. F_u = ( ( -u[2:(end - 1), 2:(end - 1)] .* @@ -76,7 +76,7 @@ end # same as above but without @cunumeric macro function step_base(u, v, u_new, v_new, args::ParamsGS) # calculate F_u and F_v functions - # currently we don't have NDArray^x working yet. + # currently we don't have NDArray^x working yet. F_u = ( ( -u[2:(end - 1), 2:(end - 1)] .* diff --git a/test/tests/slicing.jl b/test/tests/slicing.jl index ce09750a..0a8c439c 100644 --- a/test/tests/slicing.jl +++ b/test/tests/slicing.jl @@ -29,7 +29,9 @@ struct Params{T} f::T k::T - function Params(::Type{T}, dx=T(0.1), c_u=T(1.0), c_v=T(0.3), f=T(0.03), k=T(0.06)) where {T <: AbstractFloat} + function Params( + ::Type{T}, dx=T(0.1), c_u=T(1.0), c_v=T(0.3), f=T(0.03), k=T(0.06) + ) where {T<:AbstractFloat} new{T}(dx, dx/T(5), c_u, c_v, f, k) end end @@ -72,7 +74,7 @@ end # gray scott function step(u, v, u_new, v_new, args::Params) # calculate F_u and F_v functions - # currently we don't have NDArray^x working yet. + # currently we don't have NDArray^x working yet. F_u = ( ( -u[2:(end - 1), 2:(end - 1)] .* diff --git a/test/tests/stability.jl b/test/tests/stability.jl index d097454a..9f929946 100644 --- a/test/tests/stability.jl +++ b/test/tests/stability.jl @@ -25,7 +25,7 @@ end for constructor in (:trues, :falses) @eval begin @inferred cuNumeric.$(constructor)(5) - @inferred cuNumeric.$(constructor)((5,4)) + @inferred cuNumeric.$(constructor)((5, 4)) @inferred cuNumeric.$(constructor)(3, 4, 5) end end @@ -74,4 +74,4 @@ end @inferred a .+ b @inferred a ./ b @inferred ((a .* b) .+ a) .* 2.0f0 -end \ No newline at end of file +end diff --git a/test/tests/unary_tests.jl b/test/tests/unary_tests.jl index 4e349c7b..734826e7 100644 --- a/test/tests/unary_tests.jl +++ b/test/tests/unary_tests.jl @@ -9,21 +9,19 @@ const SPECIAL_DOMAINS = Dict( Base.sqrt => :positive, ) - function test_unary_operation(func, julia_arr, cunumeric_arr, T) - T_OUT = Base.promote_op(func, T) - + # Pre-allocate output arrays cunumeric_in_place = cuNumeric.zeros(T_OUT, size(julia_arr)...) - + # Compute results using different methods julia_res = func.(julia_arr) - + cunumeric_res = func.(cunumeric_arr) cunumeric_in_place .= func.(cunumeric_arr) cunumeric_res2 = map(func, cunumeric_arr) - + allowscalar() do @test cuNumeric.compare(julia_res, cunumeric_in_place, atol(T_OUT), rtol(T_OUT)) @test cuNumeric.compare(julia_res, cunumeric_res, atol(T_OUT), rtol(T_OUT)) @@ -32,12 +30,11 @@ function test_unary_operation(func, julia_arr, cunumeric_arr, T) end function test_unary_function_set(func_dict, T, N) - default_generator = (T == Bool) ? :uniform : :unit_interval skip_on_integer = (Base.atanh, Base.atan, Base.acos, Base.asin) skip_on_bool = (Base.:(-), skip_on_integer...) - + @testset "$func" for func in keys(func_dict) # The are only defined for like 3 integers (-1, 0, 1) so just skip them @@ -56,9 +53,11 @@ function test_unary_function_set(func_dict, T, N) skip && continue julia_arr_1D, julia_arr_2D = make_julia_arrays(T, N, domain_type) - cunumeric_arr_1D, cunumeric_arr_2D = make_cunumeric_arrays([julia_arr_1D], [julia_arr_2D], T, N) - + cunumeric_arr_1D, cunumeric_arr_2D = make_cunumeric_arrays( + [julia_arr_1D], [julia_arr_2D], T, N + ) + test_unary_operation(func, julia_arr_1D, cunumeric_arr_1D, T) test_unary_operation(func, julia_arr_2D, cunumeric_arr_2D, T) end -end \ No newline at end of file +end