From f17aabb5b3b4fc37504acd006190d00fb58df95e Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Wed, 25 Feb 2026 13:32:03 -0600 Subject: [PATCH 1/8] working on instabilities --- examples/custom_cuda.jl | 4 +-- src/ndarray/detail/ndarray.jl | 23 ++++++------ src/ndarray/ndarray.jl | 68 ++++++++++++++++++++--------------- 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/examples/custom_cuda.jl b/examples/custom_cuda.jl index 7c6d8511..18675c4f 100644 --- a/examples/custom_cuda.jl +++ b/examples/custom_cuda.jl @@ -24,8 +24,8 @@ N = 1024 threads = 256 blocks = cld(N, threads) -a = cuNumeric.full(N, 1.0f0) -b = cuNumeric.full(N, 2.0f0) +a = cuNumeric.fill(1.0f0, N) +b = cuNumeric.fill(2.0f0, N) c = cuNumeric.ones(Float32, N) # task = cuNumeric.@cuda_task kernel_add(a, b, c, UInt32(1)) diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index 5ecc10ea..f124bc80 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -77,28 +77,28 @@ end # return NDArray(ptr, T = T, n_dim = 1) # end -NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array(UInt64[], value) +NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array((), value) # construction -function nda_zeros_array(shape::Vector{UInt64}, ::Type{T}) where {T} - n_dim = Int32(length(shape)) +function nda_zeros_array(dims::Dims{N}, ::Type{T}) where {T, N} + shape = collect(UInt64, dims) legate_type = Legate.to_legate_type(T) ptr = ccall((:nda_zeros_array, libnda), NDArray_t, (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated), - n_dim, shape, legate_type) - return NDArray(ptr; T=T, n_dim=n_dim) + Int32(N), shape, legate_type) + return NDArray(ptr; T=T, n_dim=N) end -function nda_full_array(shape::Vector{UInt64}, value::T) where {T} - n_dim = Int32(length(shape)) +function nda_full_array(dims::Dims{N}, value::T) where {T, N} + shape = collect(UInt64, dims) type = Legate.to_legate_type(T) ptr = ccall((:nda_full_array, libnda), NDArray_t, (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated, Ptr{Cvoid}), - n_dim, shape, type, Ref(value)) + Int32(N), shape, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=n_dim) + return NDArray(ptr; T=T, n_dim=N) end function nda_random(arr::NDArray, gen_code) @@ -408,7 +408,10 @@ end Return the size of the given `NDArray`. This will include the padded size. """ -padded_shape(arr::NDArray) = Tuple(Int.(cuNumeric.nda_array_shape(arr))) +function padded_shape(arr::NDArray{<:Any,N}) where {N} + shp = cuNumeric.nda_array_shape(arr) + return ntuple(i -> Int(shp[i]), Val(N)) +end @doc""" shape(arr::NDArray) diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 02ec68e5..f214eca6 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -220,7 +220,7 @@ size(arr) size(arr, 2) ``` """ -Base.size(arr::NDArray) = cuNumeric.shape(arr) +Base.size(arr::NDArray{<:Any, N}) where N = cuNumeric.shape(arr) Base.size(arr::NDArray, dim::Int) = Base.size(arr)[dim] @doc""" @@ -253,12 +253,16 @@ Base.IndexStyle(::NDArray) = IndexCartesian() function Base.show(io::IO, arr::NDArray{T,0}) where {T} println(io, "0-dimensional NDArray{$(T),0}") - print(io, arr[]) #! should I assert scalar?? + allowscalar() do + print(io, arr[]) + end end function Base.show(io::IO, ::MIME"text/plain", arr::NDArray{T,0}) where {T} println(io, "0-dimensional NDArray{$(T),0}") - print(io, arr[]) #! should I assert scalar?? + allowscalar() do + print(io, arr[]) + end end function Base.show(io::IO, arr::NDArray{T,D}) where {T,D} @@ -304,7 +308,7 @@ Assignment also supports: # Examples ```@repl -A = cuNumeric.full((3, 3), 1.0); +A = cuNumeric.fill(1.0, (3, 3)); A[1, 2] A[1:2, 2:3] = cuNumeric.ones(2, 2); A[:, 1] = 5.0; @@ -461,25 +465,27 @@ Base.fill!(arr::NDArray{T}, val::T) where {T} = nda_fill_array(arr, val) #### INITIALIZATION OF NDARRAYS #### @doc""" - cuNumeric.full(dims::Tuple, val) - cuNumeric.full(dim::Int, val) + cuNumeric.fill(val::T, dims::Dims) + cuNumeric.fill(val::T, dims::Int...) Create an `NDArray` filled with the scalar value `val`, with the shape specified by `dims`. # Examples ```@repl -cuNumeric.full((2, 3), 7.5) -cuNumeric.full(4, 0) +cuNumeric.fill(7.5, (2, 3)) +cuNumeric.fill(0, 4) ``` """ -function full(dims::Dims, val::T) where {T<:SUPPORTED_TYPES} - shape = collect(UInt64, dims) - return nda_full_array(shape, val) +function fill(val::T, dims::Dims) where {T<:SUPPORTED_TYPES} + return nda_full_array(dims, val) end -function full(dim::Int, val::T) where {T<:SUPPORTED_TYPES} - shape = UInt64[dim] - return nda_full_array(shape, val) +function fill(val::T, dims::Int...) where {T<:SUPPORTED_TYPES} + return fill(val, dims) +end + +function fill(val::T, dim::Int) where {T<:SUPPORTED_TYPES} + return fill(val, (dim,)) end @doc""" @@ -494,9 +500,9 @@ Create an `NDArray` filled with the true, with the shape specified by `dims`. cuNumeric.trues(2, 3) ``` """ -trues(dim::Int) = cuNumeric.full(dim, true) -trues(dims::Dims) = cuNumeric.full(dims, true) -trues(dims::Int...) = cuNumeric.full(dims, true) +trues(dim::Int) = cuNumeric.fill(true, dim) +trues(dims::Dims) = cuNumeric.fill(true, dims) +trues(dims::Int...) = cuNumeric.fill(true, dims) @doc""" cuNumeric.falses(dims::Tuple, val) @@ -510,9 +516,10 @@ Create an `NDArray` filled with the false, with the shape specified by `dims`. cuNumeric.falses(2, 3) ``` """ -falses(dim::Int) = cuNumeric.full(dim, false) -falses(dims::Dims) = cuNumeric.full(dims, false) -falses(dims::Int...) = cuNumeric.full(dims, false) +falses(dims::Dims) = cuNumeric.fill(false, dims) +falses(dims::Int...) = cuNumeric.fill(false, dims) +falses(dim::Int) = cuNumeric.fill(false, dim) + @doc""" cuNumeric.zeros([T=Float32,] dims::Int...) @@ -528,9 +535,8 @@ cuNumeric.zeros(Float64, 3) cuNumeric.zeros(Int32, (2,3)) ``` """ -function zeros(::Type{T}, dims::Dims) where {T<:SUPPORTED_TYPES} - shape = collect(UInt64, dims) - return nda_zeros_array(shape, T) +function zeros(::Type{T}, dims::Dims{N}) where {T<:SUPPORTED_TYPES, N} + return nda_zeros_array(dims, T) end function zeros(::Type{T}, dims::Int...) where {T<:SUPPORTED_TYPES} @@ -546,15 +552,16 @@ function zeros(dims::Int...) end function zeros(::Type{T}) where {T} - return nda_zeros_array(UInt64[], T) + return nda_zeros_array((), T) end function zeros() return zeros(DEFAULT_FLOAT) end -function zeros_like(arr::NDArray) - return zeros(eltype(arr), Base.size(arr)) +#* TYPE USNTABLE CAUSE SIZE IS RIGHT NOW +function zeros_like(arr::NDArray{T,N}) where {T,N} + return zeros(T, Base.size(arr)) end @doc""" @@ -572,7 +579,7 @@ cuNumeric.ones(Int32, (2, 3)) ``` """ function ones(::Type{T}, dims::Dims) where {T} - return full(dims, T(1)) + return nda_full_array(dims, T(1)) end function ones(::Type{T}, dims::Int...) where {T} @@ -587,12 +594,13 @@ function ones(dims::Int...) return ones(DEFAULT_FLOAT, dims) end +#* UNSTABLE function ones(::Type{T}) where {T} - return full((), T(1)) + return cuNumeric.fill((), T(1)) end function ones() - return zeros(DEFAULT_FLOAT) + return ones(DEFAULT_FLOAT) end @doc""" @@ -645,11 +653,13 @@ reshape(arr, 12) ``` """ +#*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG function reshape(arr::NDArray, i::Dims{N}; copy::Bool=false) where {N} reshaped = nda_reshape_array(arr, UInt64.(collect(i))) return copy ? copy(reshaped) : reshaped end +#*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG function reshape(arr::NDArray, i::Int64; copy::Bool=false) reshaped = nda_reshape_array(arr, UInt64.([i])) return copy ? copy(reshaped) : reshaped From eb0d844cbc932b2c0aba9a92a90d2fc275e06446 Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Wed, 25 Feb 2026 15:01:06 -0600 Subject: [PATCH 2/8] zeros, ones, fill type stable --- src/ndarray/binary.jl | 16 ++++----- src/ndarray/detail/ndarray.jl | 62 +++++++++++++++-------------------- src/ndarray/ndarray.jl | 3 +- src/ndarray/unary.jl | 18 +++++----- 4 files changed, 44 insertions(+), 55 deletions(-) diff --git a/src/ndarray/binary.jl b/src/ndarray/binary.jl index 10974206..a8715240 100644 --- a/src/ndarray/binary.jl +++ b/src/ndarray/binary.jl @@ -87,7 +87,7 @@ function Base.:(-)(rhs1::NDArray{A,N}, rhs2::NDArray{B,N}) where {A,B,N} promote_shape(size(rhs1), size(rhs2)) T_OUT = __checked_promote_op(-, A, B) out = cuNumeric.zeros(T_OUT, size(rhs1)) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.SUBTRACT, unchecked_promote_arr(rhs1, T_OUT), @@ -100,7 +100,7 @@ function Base.:(+)(rhs1::NDArray{A,N}, rhs2::NDArray{B,N}) where {A,B,N} promote_shape(size(rhs1), size(rhs2)) T_OUT = __checked_promote_op(+, A, B) out = cuNumeric.zeros(T_OUT, size(rhs1)) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.ADD, unchecked_promote_arr(rhs1, T_OUT), unchecked_promote_arr(rhs2, T_OUT) ) end @@ -108,7 +108,7 @@ end function Base.:(*)(val::V, arr::NDArray{A}) where {A,V} T = __my_promote_type(A, V) out = cuNumeric.zeros(T, size(arr)) - return nda_binary_op(out, cuNumeric.MULTIPLY, NDArray(T(val)), unchecked_promote_arr(arr, T)) + return nda_binary_op!(out, cuNumeric.MULTIPLY, NDArray(T(val)), unchecked_promote_arr(arr, T)) end function Base.:(*)(arr::NDArray{A}, val::V) where {A,V} @@ -191,7 +191,7 @@ for (julia_fn, op_code) in binary_op_map @inline function __broadcast( f::typeof($(julia_fn)), out::NDArray, rhs1::NDArray{T}, rhs2::NDArray{T} ) where {T} - return nda_binary_op(out, $(op_code), rhs1, rhs2) + return nda_binary_op!(out, $(op_code), rhs1, rhs2) end end end @@ -204,7 +204,7 @@ for (julia_fn, op_code) in floaty_binary_op_map @inline function __broadcast( f::typeof($(julia_fn)), out::NDArray, rhs1::NDArray{T}, rhs2::NDArray{T} ) where {T} - return nda_binary_op(out, $(op_code), rhs1, rhs2) + return nda_binary_op!(out, $(op_code), rhs1, rhs2) end # If input is not already float, promote to that @@ -220,7 +220,7 @@ end f::typeof(Base.:(+)), out::NDArray{O}, rhs1::NDArray{Bool}, rhs2::NDArray{Bool} ) where {O<:Integer} assertpromotion(".+", Bool, O) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.ADD, unchecked_promote_arr(rhs1, O), unchecked_promote_arr(rhs2, O) ) end @@ -229,7 +229,7 @@ end f::typeof(Base.:(-)), out::NDArray{O}, rhs1::NDArray{Bool}, rhs2::NDArray{Bool} ) where {O<:Integer} assertpromotion(".-", Bool, O) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.SUBTRACT, unchecked_promote_arr(rhs1, O), unchecked_promote_arr(rhs2, O) ) end @@ -250,7 +250,7 @@ end @inline function __broadcast( f::typeof(Base.literal_pow), out::NDArray, _, input::NDArray{T}, power::NDArray{T} ) where {T} - return nda_binary_op(out, cuNumeric.POWER, input, power) + return nda_binary_op!(out, cuNumeric.POWER, input, power) end # This is more "Julian" since a user expects map to broadcast diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index f124bc80..2dac42dd 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -28,21 +28,19 @@ get_n_dim(ptr::NDArray_t) = Int(ccall((:nda_array_dim, libnda), Int32, (NDArray_ abstract type AbstractNDArray{T<:SUPPORTED_TYPES,N} end @doc""" -**Internal API** - The NDArray type represents a multi-dimensional array in cuNumeric. It is a wrapper around a Legate array and provides various methods for array manipulation and operations. Finalizer calls `nda_destroy_array` to clean up the underlying Legate array when the NDArray is garbage collected. """ -mutable struct NDArray{T,N} <: AbstractNDArray{T,N} +mutable struct NDArray{T, N, PADDED} <: AbstractNDArray{T,N} ptr::NDArray_t nbytes::Int64 - padding::Union{Nothing,NTuple{N,Int}} where {N} + padding::Union{Nothing,NTuple{N,Int}} - function NDArray(ptr::NDArray_t; T=get_julia_type(ptr), n_dim=get_n_dim(ptr)) + function NDArray(ptr::NDArray_t, ::Type{T}, ::Val{N}) where {T, N} nbytes = cuNumeric.nda_nbytes(ptr) cuNumeric.register_alloc!(nbytes) - handle = new{T,Int(n_dim)}(ptr, nbytes, nothing) + handle = new{T,N, false}(ptr, nbytes, nothing) finalizer(handle) do h cuNumeric.nda_destroy_array(h.ptr) cuNumeric.register_free!(h.nbytes) @@ -51,6 +49,9 @@ mutable struct NDArray{T,N} <: AbstractNDArray{T,N} end end +# Dynamic fallback, not great but required if we cannot infer things +NDArray(ptr::NDArray_t) = NDArray(ptr, get_julia_type(ptr), Val(get_n_dim(ptr))) + # struct WrappedNDArray{T,N} <: AbstractNDArray{T,N} # ndarr::NDArray{T,N} # jlarr::Array{T,N} @@ -86,7 +87,7 @@ function nda_zeros_array(dims::Dims{N}, ::Type{T}) where {T, N} ptr = ccall((:nda_zeros_array, libnda), NDArray_t, (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated), Int32(N), shape, legate_type) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end function nda_full_array(dims::Dims{N}, value::T) where {T, N} @@ -98,7 +99,7 @@ function nda_full_array(dims::Dims{N}, value::T) where {T, N} (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated, Ptr{Cvoid}), Int32(N), shape, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end function nda_random(arr::NDArray, gen_code) @@ -112,14 +113,14 @@ function nda_random_array(shape::Vector{UInt64}) ptr = ccall((:nda_random_array, libnda), NDArray_t, (Int32, Ptr{UInt64}), n_dim, shape) - return NDArray(ptr; n_dim=n_dim) + return NDArray(ptr, get_julia_type(ptr), Val(n_dim)) end function nda_get_slice(arr::NDArray{T,N}, slices::Vector{Slice}) where {T,N} ptr = ccall((:nda_get_slice, libnda), NDArray_t, (NDArray_t, Ptr{Slice}, Cint), arr.ptr, pointer(slices), length(slices)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end # queries @@ -147,7 +148,7 @@ function nda_reshape_array(arr::NDArray{T}, newshape::Vector{UInt64}) where {T} ptr = ccall((:nda_reshape_array, libnda), NDArray_t, (NDArray_t, Int32, Ptr{UInt64}), arr.ptr, n_dim, newshape) - return NDArray(ptr; T=T, n_dim=n_dim) + return NDArray(ptr, T, Val(n_dim)) end function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N} @@ -156,7 +157,7 @@ function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N} NDArray_t, (NDArray_t, Legate.LegateTypeAllocated), arr.ptr, type) - return NDArray(ptr; T=NEW_T, n_dim=N) + return NDArray(ptr, NEW_T, Val(N)) end function nda_fill_array(arr::NDArray{T}, value::T) where {T} @@ -193,14 +194,14 @@ function nda_move(dst::NDArray{T,N}, src::NDArray{T,N}) where {T,N} end # operations -function nda_binary_op(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray) +function nda_binary_op!(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray) ccall((:nda_binary_op, libnda), Cvoid, (NDArray_t, BinaryOpCode, NDArray_t, NDArray_t), out.ptr, op_code, rhs1.ptr, rhs2.ptr) return out end -function nda_unary_op(out::NDArray, op_code::UnaryOpCode, input::NDArray) +function nda_unary_op!(out::NDArray, op_code::UnaryOpCode, input::NDArray) ccall((:nda_unary_op, libnda), Cvoid, (NDArray_t, UnaryOpCode, NDArray_t), out.ptr, op_code, input.ptr) @@ -218,7 +219,7 @@ function nda_array_equal(rhs1::NDArray{T,N}, rhs2::NDArray{T,N}) where {T,N} ptr = ccall((:nda_array_equal, libnda), NDArray_t, (NDArray_t, NDArray_t), rhs1.ptr, rhs2.ptr) - return NDArray(ptr; T=Bool, n_dim=1) + return NDArray(ptr, Bool, Val(1)) end function nda_diag(arr::NDArray, k::Int32) @@ -255,7 +256,7 @@ function nda_multiply_scalar(rhs1::NDArray{T,N}, value::T) where {T,N} ptr = ccall((:nda_multiply_scalar, libnda), NDArray_t, (NDArray_t, Legate.LegateTypeAllocated, Ptr{Cvoid}), rhs1.ptr, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end function nda_add_scalar(rhs1::NDArray{T,N}, value::T) where {T,N} @@ -264,7 +265,7 @@ function nda_add_scalar(rhs1::NDArray{T,N}, value::T) where {T,N} ptr = ccall((:nda_add_scalar, libnda), NDArray_t, (NDArray_t, Legate.LegateTypeAllocated, Ptr{Cvoid}), rhs1.ptr, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end function nda_three_dot_arg(rhs1::NDArray{T}, rhs2::NDArray{T}, out::NDArray{T}) where {T} @@ -286,7 +287,7 @@ function nda_eye(rows::Int32, ::Type{T}) where {T} ptr = ccall((:nda_eye, libnda), NDArray_t, (Int32, Legate.LegateTypeAllocated), rows, legate_type) - return NDArray(ptr; T=T, n_dim=2) + return NDArray(ptr, T, Val(2)) end function nda_trace( @@ -297,7 +298,7 @@ function nda_trace( NDArray_t, (NDArray_t, Int32, Int32, Int32, Legate.LegateTypeAllocated), arr.ptr, offset, a1, a2, legate_type) - return NDArray(ptr; T=T, n_dim=1) + return NDArray(ptr, T, Val(1)) end function nda_transpose(arr::NDArray) @@ -317,7 +318,7 @@ function nda_attach_external(arr::AbstractArray{T,N}) where {T,N} NDArray_t, (Ptr{Cvoid}, UInt64, Int32, Ptr{UInt64}, Legate.LegateTypeAllocated), ptr, nbytes, N, shape, legate_type) - return NDArray(nda_ptr; T=T, n_dim=N) + return NDArray(nda_ptr, T, Val(N)) end # return underlying logical store to the NDArray obj @@ -401,17 +402,6 @@ function slice_array(slices::Vararg{Tuple{Union{Int,Nothing},Union{Int,Nothing}} return v end -@doc""" - padded_shape(arr::NDArray) - -**Internal API** - -Return the size of the given `NDArray`. This will include the padded size. -""" -function padded_shape(arr::NDArray{<:Any,N}) where {N} - shp = cuNumeric.nda_array_shape(arr) - return ntuple(i -> Int(shp[i]), Val(N)) -end @doc""" shape(arr::NDArray) @@ -420,11 +410,11 @@ end Return the size of the given `NDArray`. """ -function shape(arr::NDArray) - if !isnothing(arr.padding) - return arr.padding - end - return cuNumeric.padded_shape(arr) +shape(arr::NDArray{<:Any, N, true}) where N = arr.padding + +function shape(arr::NDArray{<:Any, N, false}) where {N} + shp = cuNumeric.nda_array_shape(arr) + return ntuple(i -> Int(shp[i]), Val(N)) end @doc""" diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index f214eca6..5c8b5b6e 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -594,9 +594,8 @@ function ones(dims::Int...) return ones(DEFAULT_FLOAT, dims) end -#* UNSTABLE function ones(::Type{T}) where {T} - return cuNumeric.fill((), T(1)) + return cuNumeric.fill(T(1), ()) end function ones() diff --git a/src/ndarray/unary.jl b/src/ndarray/unary.jl index 4158b010..3b12e73d 100644 --- a/src/ndarray/unary.jl +++ b/src/ndarray/unary.jl @@ -101,13 +101,13 @@ global const unary_op_map_no_args = Dict{Function,UnaryOpCode}( ### SPECIAL CASES ### # Needed to support != -Base.:(!)(input::NDArray{Bool,0}) = nda_unary_op(similar(input), cuNumeric.LOGICAL_NOT, input) -Base.:(!)(input::NDArray{Bool,1}) = nda_unary_op(similar(input), cuNumeric.LOGICAL_NOT, input) +Base.:(!)(input::NDArray{Bool,0}) = nda_unary_op!(similar(input), cuNumeric.LOGICAL_NOT, input) +Base.:(!)(input::NDArray{Bool,1}) = nda_unary_op!(similar(input), cuNumeric.LOGICAL_NOT, input) # Non-broadcasted version of negation function Base.:(-)(input::NDArray{T}) where {T} out = cuNumeric.zeros(T, size(input)) - return nda_unary_op(out, cuNumeric.NEGATIVE, input) + return nda_unary_op!(out, cuNumeric.NEGATIVE, input) end function Base.:(-)(input::NDArray{Bool}) @@ -121,7 +121,7 @@ end @inline function __broadcast( f::typeof(Base.literal_pow), out::NDArray{O}, _, input::NDArray{T}, ::Type{Val{2}} ) where {T,O} - return nda_unary_op(out, cuNumeric.SQUARE, input) + return nda_unary_op!(out, cuNumeric.SQUARE, input) end @inline function __broadcast( @@ -129,13 +129,13 @@ end ) where {O} nda_move(out, O(1) ./ checked_promote_arr(input, O)) #! REPLACE WITH RECIP ONCE FIXED return out - # return nda_unary_op(out, cuNumeric.RECIPROCAL, input) + # return nda_unary_op!(out, cuNumeric.RECIPROCAL, input) end @inline function __broadcast(::typeof(Base.inv), out::NDArray{O}, input::NDArray) where {O} nda_move(out, O(1) ./ checked_promote_arr(input, O)) #! REPLACE WITH RECIP ONCE FIXED return out - # return nda_unary_op(out, cuNumeric.RECIPROCAL, checked_promote_arr(input,O)) + # return nda_unary_op!(out, cuNumeric.RECIPROCAL, checked_promote_arr(input,O)) end #! NEEDS TO SUPPORT inv and ^ -1 @@ -150,7 +150,7 @@ end # Only supported for Bools @inline function __broadcast(f::typeof(Base.:(!)), out::NDArray{Bool}, input::NDArray{Bool}) - return nda_unary_op(out, cuNumeric.LOGICAL_NOT, input) + return nda_unary_op!(out, cuNumeric.LOGICAL_NOT, input) end # Generate hidden broadcasted version of unary ops. @@ -159,7 +159,7 @@ for (julia_fn, op_code) in unary_op_map_no_args @inline function __broadcast( f::typeof($julia_fn), out::NDArray{T}, input::NDArray{T} ) where {T} - return nda_unary_op(out, $(op_code), input) + return nda_unary_op!(out, $(op_code), input) end end end @@ -172,7 +172,7 @@ for (julia_fn, op_code) in floaty_unary_ops_no_args @inline function __broadcast( f::typeof($julia_fn), out::NDArray{T}, input::NDArray{T} ) where {T} - return nda_unary_op(out, $(op_code), input) + return nda_unary_op!(out, $(op_code), input) end # If input is not already float, promote to that From 6e7c36afcfef15f4aedb5f571a997a2c9416dec3 Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Wed, 25 Feb 2026 15:15:09 -0600 Subject: [PATCH 3/8] start rand --- src/ndarray/detail/ndarray.jl | 10 +++++----- src/ndarray/ndarray.jl | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index 2dac42dd..7e216fcb 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -50,7 +50,7 @@ mutable struct NDArray{T, N, PADDED} <: AbstractNDArray{T,N} end # Dynamic fallback, not great but required if we cannot infer things -NDArray(ptr::NDArray_t) = NDArray(ptr, get_julia_type(ptr), Val(get_n_dim(ptr))) +NDArray(ptr::NDArray_t; T = get_julia_type(ptr), N::Integer = get_n_dim(ptr)) = NDArray(ptr, T, Val(N)) # struct WrappedNDArray{T,N} <: AbstractNDArray{T,N} # ndarr::NDArray{T,N} @@ -108,12 +108,12 @@ function nda_random(arr::NDArray, gen_code) arr.ptr, Int32(gen_code)) end -function nda_random_array(shape::Vector{UInt64}) - n_dim = Int32(length(shape)) +function nda_random_array(dims::Dims{N}) where {N} + shape = collect(UInt64, dims) ptr = ccall((:nda_random_array, libnda), NDArray_t, (Int32, Ptr{UInt64}), - n_dim, shape) - return NDArray(ptr, get_julia_type(ptr), Val(n_dim)) + Int32(N), shape) + return NDArray(ptr, Float64, Val(N)) #* T is always Float64 cause of cupynumeric end function nda_get_slice(arr::NDArray{T,N}, slices::Vector{Slice}) where {T,N} diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 5c8b5b6e..71337c37 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -624,14 +624,13 @@ A = cuNumeric.zeros(2, 2); cuNumeric.rand!(A) ``` """ Random.rand!(arr::NDArray{Float64}) = cuNumeric.nda_random(arr, 0) -rand(::Type{NDArray}, dims::Dims) = cuNumeric.nda_random_array(UInt64.(collect(dims))) +rand(::Type{NDArray}, dims::Dims) = cuNumeric.nda_random_array(dims) rand(::Type{NDArray}, dims::Int...) = cuNumeric.rand(NDArray, dims) rand(dims::Dims) = cuNumeric.rand(NDArray, dims) rand(dims::Int...) = cuNumeric.rand(NDArray, dims) function rand(::Type{T}, dims::Dims) where {T<:AbstractFloat} - arrfp64 = cuNumeric.nda_random_array(UInt64.(collect(dims))) - # if T == Float64, as_type should do minimial work # TODO check this. + arrfp64 = cuNumeric.nda_random_array(dims) return cuNumeric.as_type(arrfp64, T) end From 42cdb1727ee3a2ba033a9c68f2d032c434d0e7ee Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Wed, 25 Feb 2026 16:45:55 -0600 Subject: [PATCH 4/8] rand/rand! and most broadcasting stable --- src/ndarray/ndarray.jl | 1 - test/tests/stability.jl | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test/tests/stability.jl diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 71337c37..53f2a5a5 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -559,7 +559,6 @@ function zeros() return zeros(DEFAULT_FLOAT) end -#* TYPE USNTABLE CAUSE SIZE IS RIGHT NOW function zeros_like(arr::NDArray{T,N}) where {T,N} return zeros(T, Base.size(arr)) end diff --git a/test/tests/stability.jl b/test/tests/stability.jl new file mode 100644 index 00000000..5ef3d3fb --- /dev/null +++ b/test/tests/stability.jl @@ -0,0 +1,23 @@ +@testset "Stability" begin + + @testset "core" begin + # size, shape, NDArray constructor + end + + @testset "construction" begin + # zeros, zeros_like, ones, rand, fill, trues, falses + end + + @testset "indexing" begin + # getindex, setindex!, copy, copyto!, fill!, as_type + end + + @testset "arithmetic" begin + # +, -, *, /, ^, %, &, |, ⊻ + end + + @testset "linear algebra" begin + # mul!, dot, norm, det, inv, pinv, eig, svd, lu, qr, cholesky + end + +end \ No newline at end of file From cd4da3739fa45a61a88fb4e8a5f1aad29a20b0d8 Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Mon, 2 Mar 2026 14:38:37 -0600 Subject: [PATCH 5/8] add tests for type stability --- src/ndarray/detail/ndarray.jl | 8 ++-- src/ndarray/ndarray.jl | 7 ++-- test/runtests.jl | 4 ++ test/tests/stability.jl | 73 +++++++++++++++++++++++++++++++---- 4 files changed, 77 insertions(+), 15 deletions(-) diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index 7e216fcb..7d0c8c2b 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -143,12 +143,12 @@ function nda_array_shape(arr::NDArray) end # modify -function nda_reshape_array(arr::NDArray{T}, newshape::Vector{UInt64}) where {T} - n_dim = Int32(length(newshape)) +function nda_reshape_array(arr::NDArray{T}, newdims::Dims{N}) where {T, N} + newshape = collect(UInt64, newdims) ptr = ccall((:nda_reshape_array, libnda), NDArray_t, (NDArray_t, Int32, Ptr{UInt64}), - arr.ptr, n_dim, newshape) - return NDArray(ptr, T, Val(n_dim)) + arr.ptr, Int32(N), newshape) + return NDArray(ptr, T, Val(N)) end function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N} diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 53f2a5a5..9993d903 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -652,14 +652,13 @@ reshape(arr, 12) #*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG function reshape(arr::NDArray, i::Dims{N}; copy::Bool=false) where {N} - reshaped = nda_reshape_array(arr, UInt64.(collect(i))) + reshaped = nda_reshape_array(arr, i) return copy ? copy(reshaped) : reshaped end #*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG -function reshape(arr::NDArray, i::Int64; copy::Bool=false) - reshaped = nda_reshape_array(arr, UInt64.([i])) - return copy ? copy(reshaped) : reshaped +function reshape(arr::NDArray, i::Int...; copy::Bool=false) + return reshape(arr, i; copy = copy) end # Ignore the scalar indexing here... diff --git a/test/runtests.jl b/test/runtests.jl index 60137c4c..4df77506 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -380,6 +380,10 @@ end end end +@testset verbose = true "Type Stability" begin + include("tests/stability.jl") +end + @testset verbose = true "Scoping" begin N = 100 diff --git a/test/tests/stability.jl b/test/tests/stability.jl index 5ef3d3fb..010e251b 100644 --- a/test/tests/stability.jl +++ b/test/tests/stability.jl @@ -1,23 +1,82 @@ @testset "Stability" begin @testset "core" begin - # size, shape, NDArray constructor + a = cuNumeric.zeros(5) + b = cuNumeric.zeros(Float64, 3, 4) + @inferred size(a) + @inferred size(b) + @inferred cuNumeric.shape(a) + @inferred cuNumeric.shape(b) end @testset "construction" begin - # zeros, zeros_like, ones, rand, fill, trues, falses + # zeros, zeros_like, ones, rand, fill, trues, falses\ + for constructor in (:zeros, :ones) + @eval begin + @inferred cuNumeric.$(constructor)(Float64, 3, 2) + @inferred cuNumeric.$(constructor)(Float64, (3, 4)) + @inferred cuNumeric.$(constructor)(3, 5, 6) + @inferred cuNumeric.$(constructor)((3,)) + @inferred cuNumeric.$(constructor)() + @inferred cuNumeric.$(constructor)(Int64) + end + end + a = cuNumeric.zeros(Float64, 5, 3) + @inferred cuNumeric.zeros_like(a) + + for constructor in (:trues, :falses) + @eval begin + @inferred cuNumeric.$(constructor)(5) + @inferred cuNumeric.$(constructor)((5,4)) + @inferred cuNumeric.$(constructor)(3, 4, 5) + end + end + + @inferred cuNumeric.fill(2.0, 3, 4) + @inferred cuNumeric.fill(2, (3, 4)) + @inferred cuNumeric.fill(2.0, 3) + + @inferred cuNumeric.rand(4, 3) + @inferred cuNumeric.rand(Float32, 5) + end + + @testset "conversion" begin + # cast to array, as_type + a = cuNumeric.zeros(Float64, 5, 5) + @inferred Array(a) + @inferred Array{Float32}(a) + @inferred cuNumeric.as_type(a, Float32) + @inferred cuNumeric.as_type(a, Int64) end @testset "indexing" begin # getindex, setindex!, copy, copyto!, fill!, as_type - end + a = cuNumeric.zeros(Float32, 5, 5) + b = cuNumeric.zeros(Int32, 11) - @testset "arithmetic" begin - # +, -, *, /, ^, %, &, |, ⊻ + @inferred a[1:3, 1:3] + @inferred a[2, 1:3] + @inferred a[1, 1:3] .+ b[1:3] + @inferred b[1:5] + # @inferred a[1:3, 1:end] + allowscalar() do + @inferred a[1, 2] + end end - @testset "linear algebra" begin - # mul!, dot, norm, det, inv, pinv, eig, svd, lu, qr, cholesky + @testset "broadcasting" begin + a = cuNumeric.ones(Float32, 3, 3) + b = cuNumeric.ones(Int32, 3, 3) + @inferred 5 .* a + @inferred 5.0f0 .* a + @inferred 5 * a + @inferred 5.0f0 * a + + @inferred a .* b + @inferred a .+ b + @inferred a ./ b + @inferred ((a .* b) .+ a) .* 2.0f0 end + end \ No newline at end of file From 4f90b13ec5f5a47c59d24efde24b5fabe8bb71bb Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Mon, 2 Mar 2026 14:42:15 -0600 Subject: [PATCH 6/8] this? --- test/tests/stability.jl | 131 +++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 68 deletions(-) diff --git a/test/tests/stability.jl b/test/tests/stability.jl index 010e251b..137ac9e9 100644 --- a/test/tests/stability.jl +++ b/test/tests/stability.jl @@ -1,82 +1,77 @@ -@testset "Stability" begin +@testset "core" begin + a = cuNumeric.zeros(5) + b = cuNumeric.zeros(Float64, 3, 4) + @inferred size(a) + @inferred size(b) + @inferred cuNumeric.shape(a) + @inferred cuNumeric.shape(b) +end - @testset "core" begin - a = cuNumeric.zeros(5) - b = cuNumeric.zeros(Float64, 3, 4) - @inferred size(a) - @inferred size(b) - @inferred cuNumeric.shape(a) - @inferred cuNumeric.shape(b) - end - - @testset "construction" begin - # zeros, zeros_like, ones, rand, fill, trues, falses\ - for constructor in (:zeros, :ones) - @eval begin - @inferred cuNumeric.$(constructor)(Float64, 3, 2) - @inferred cuNumeric.$(constructor)(Float64, (3, 4)) - @inferred cuNumeric.$(constructor)(3, 5, 6) - @inferred cuNumeric.$(constructor)((3,)) - @inferred cuNumeric.$(constructor)() - @inferred cuNumeric.$(constructor)(Int64) - end +@testset "construction" begin + # zeros, zeros_like, ones, rand, fill, trues, falses\ + for constructor in (:zeros, :ones) + @eval begin + @inferred cuNumeric.$(constructor)(Float64, 3, 2) + @inferred cuNumeric.$(constructor)(Float64, (3, 4)) + @inferred cuNumeric.$(constructor)(3, 5, 6) + @inferred cuNumeric.$(constructor)((3,)) + @inferred cuNumeric.$(constructor)() + @inferred cuNumeric.$(constructor)(Int64) end - a = cuNumeric.zeros(Float64, 5, 3) - @inferred cuNumeric.zeros_like(a) + end + a = cuNumeric.zeros(Float64, 5, 3) + @inferred cuNumeric.zeros_like(a) - for constructor in (:trues, :falses) - @eval begin - @inferred cuNumeric.$(constructor)(5) - @inferred cuNumeric.$(constructor)((5,4)) - @inferred cuNumeric.$(constructor)(3, 4, 5) - end + for constructor in (:trues, :falses) + @eval begin + @inferred cuNumeric.$(constructor)(5) + @inferred cuNumeric.$(constructor)((5,4)) + @inferred cuNumeric.$(constructor)(3, 4, 5) end - - @inferred cuNumeric.fill(2.0, 3, 4) - @inferred cuNumeric.fill(2, (3, 4)) - @inferred cuNumeric.fill(2.0, 3) - - @inferred cuNumeric.rand(4, 3) - @inferred cuNumeric.rand(Float32, 5) end - @testset "conversion" begin - # cast to array, as_type - a = cuNumeric.zeros(Float64, 5, 5) - @inferred Array(a) - @inferred Array{Float32}(a) - @inferred cuNumeric.as_type(a, Float32) - @inferred cuNumeric.as_type(a, Int64) - end + @inferred cuNumeric.fill(2.0, 3, 4) + @inferred cuNumeric.fill(2, (3, 4)) + @inferred cuNumeric.fill(2.0, 3) - @testset "indexing" begin - # getindex, setindex!, copy, copyto!, fill!, as_type - a = cuNumeric.zeros(Float32, 5, 5) - b = cuNumeric.zeros(Int32, 11) + @inferred cuNumeric.rand(4, 3) + @inferred cuNumeric.rand(Float32, 5) +end - @inferred a[1:3, 1:3] - @inferred a[2, 1:3] - @inferred a[1, 1:3] .+ b[1:3] - @inferred b[1:5] - # @inferred a[1:3, 1:end] - allowscalar() do - @inferred a[1, 2] - end - end +@testset "conversion" begin + # cast to array, as_type + a = cuNumeric.zeros(Float64, 5, 5) + @inferred Array(a) + @inferred Array{Float32}(a) + @inferred cuNumeric.as_type(a, Float32) + @inferred cuNumeric.as_type(a, Int64) +end - @testset "broadcasting" begin - a = cuNumeric.ones(Float32, 3, 3) - b = cuNumeric.ones(Int32, 3, 3) - @inferred 5 .* a - @inferred 5.0f0 .* a - @inferred 5 * a - @inferred 5.0f0 * a +@testset "indexing" begin + # getindex, setindex!, copy, copyto!, fill!, as_type + a = cuNumeric.zeros(Float32, 5, 5) + b = cuNumeric.zeros(Int32, 11) - @inferred a .* b - @inferred a .+ b - @inferred a ./ b - @inferred ((a .* b) .+ a) .* 2.0f0 + @inferred a[1:3, 1:3] + @inferred a[2, 1:3] + @inferred a[1, 1:3] .+ b[1:3] + @inferred b[1:5] + # @inferred a[1:3, 1:end] + allowscalar() do + @inferred a[1, 2] end +end +@testset "broadcasting" begin + a = cuNumeric.ones(Float32, 3, 3) + b = cuNumeric.ones(Int32, 3, 3) + @inferred 5 .* a + @inferred 5.0f0 .* a + @inferred 5 * a + @inferred 5.0f0 * a + @inferred a .* b + @inferred a .+ b + @inferred a ./ b + @inferred ((a .* b) .+ a) .* 2.0f0 end \ No newline at end of file From d95f968e7a702c53be212c22fed51c5c865c58dc Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Mon, 2 Mar 2026 14:50:36 -0600 Subject: [PATCH 7/8] all verbose tests --- test/tests/stability.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/tests/stability.jl b/test/tests/stability.jl index 137ac9e9..d097454a 100644 --- a/test/tests/stability.jl +++ b/test/tests/stability.jl @@ -1,4 +1,4 @@ -@testset "core" begin +@testset verbose = true "core" begin a = cuNumeric.zeros(5) b = cuNumeric.zeros(Float64, 3, 4) @inferred size(a) @@ -7,7 +7,7 @@ @inferred cuNumeric.shape(b) end -@testset "construction" begin +@testset verbose = true "construction" begin # zeros, zeros_like, ones, rand, fill, trues, falses\ for constructor in (:zeros, :ones) @eval begin @@ -38,7 +38,7 @@ end @inferred cuNumeric.rand(Float32, 5) end -@testset "conversion" begin +@testset verbose = true "conversion" begin # cast to array, as_type a = cuNumeric.zeros(Float64, 5, 5) @inferred Array(a) @@ -47,7 +47,7 @@ end @inferred cuNumeric.as_type(a, Int64) end -@testset "indexing" begin +@testset verbose = true "indexing" begin # getindex, setindex!, copy, copyto!, fill!, as_type a = cuNumeric.zeros(Float32, 5, 5) b = cuNumeric.zeros(Int32, 11) @@ -62,7 +62,7 @@ end end end -@testset "broadcasting" begin +@testset verbose = true "broadcasting" begin a = cuNumeric.ones(Float32, 3, 3) b = cuNumeric.ones(Int32, 3, 3) @inferred 5 .* a From 5b962417d801fd12f3137c49dcb540073fea4dcc Mon Sep 17 00:00:00 2001 From: ejmeitz Date: Mon, 2 Mar 2026 14:59:18 -0600 Subject: [PATCH 8/8] fix ND printing bug --- src/ndarray/ndarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 9993d903..b703ceb9 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -267,7 +267,7 @@ end function Base.show(io::IO, arr::NDArray{T,D}) where {T,D} println(io, "NDArray{$(T),$(D)}") - Base.print_matrix(io, Array(arr)) + Base.print_array(io, Array(arr)) end function Base.show(io::IO, ::MIME"text/plain", arr::NDArray{T}) where {T}