diff --git a/examples/custom_cuda.jl b/examples/custom_cuda.jl index 7c6d8511..18675c4f 100644 --- a/examples/custom_cuda.jl +++ b/examples/custom_cuda.jl @@ -24,8 +24,8 @@ N = 1024 threads = 256 blocks = cld(N, threads) -a = cuNumeric.full(N, 1.0f0) -b = cuNumeric.full(N, 2.0f0) +a = cuNumeric.fill(1.0f0, N) +b = cuNumeric.fill(2.0f0, N) c = cuNumeric.ones(Float32, N) # task = cuNumeric.@cuda_task kernel_add(a, b, c, UInt32(1)) diff --git a/src/ndarray/binary.jl b/src/ndarray/binary.jl index 10974206..a8715240 100644 --- a/src/ndarray/binary.jl +++ b/src/ndarray/binary.jl @@ -87,7 +87,7 @@ function Base.:(-)(rhs1::NDArray{A,N}, rhs2::NDArray{B,N}) where {A,B,N} promote_shape(size(rhs1), size(rhs2)) T_OUT = __checked_promote_op(-, A, B) out = cuNumeric.zeros(T_OUT, size(rhs1)) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.SUBTRACT, unchecked_promote_arr(rhs1, T_OUT), @@ -100,7 +100,7 @@ function Base.:(+)(rhs1::NDArray{A,N}, rhs2::NDArray{B,N}) where {A,B,N} promote_shape(size(rhs1), size(rhs2)) T_OUT = __checked_promote_op(+, A, B) out = cuNumeric.zeros(T_OUT, size(rhs1)) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.ADD, unchecked_promote_arr(rhs1, T_OUT), unchecked_promote_arr(rhs2, T_OUT) ) end @@ -108,7 +108,7 @@ end function Base.:(*)(val::V, arr::NDArray{A}) where {A,V} T = __my_promote_type(A, V) out = cuNumeric.zeros(T, size(arr)) - return nda_binary_op(out, cuNumeric.MULTIPLY, NDArray(T(val)), unchecked_promote_arr(arr, T)) + return nda_binary_op!(out, cuNumeric.MULTIPLY, NDArray(T(val)), unchecked_promote_arr(arr, T)) end function Base.:(*)(arr::NDArray{A}, val::V) where {A,V} @@ -191,7 +191,7 @@ for (julia_fn, op_code) in binary_op_map @inline function __broadcast( f::typeof($(julia_fn)), out::NDArray, rhs1::NDArray{T}, rhs2::NDArray{T} ) where {T} - return nda_binary_op(out, $(op_code), rhs1, rhs2) + return nda_binary_op!(out, $(op_code), rhs1, rhs2) end end end @@ -204,7 +204,7 @@ for (julia_fn, op_code) in floaty_binary_op_map @inline function __broadcast( f::typeof($(julia_fn)), out::NDArray, rhs1::NDArray{T}, rhs2::NDArray{T} ) where {T} - return nda_binary_op(out, $(op_code), rhs1, rhs2) + return nda_binary_op!(out, $(op_code), rhs1, rhs2) end # If input is not already float, promote to that @@ -220,7 +220,7 @@ end f::typeof(Base.:(+)), out::NDArray{O}, rhs1::NDArray{Bool}, rhs2::NDArray{Bool} ) where {O<:Integer} assertpromotion(".+", Bool, O) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.ADD, unchecked_promote_arr(rhs1, O), unchecked_promote_arr(rhs2, O) ) end @@ -229,7 +229,7 @@ end f::typeof(Base.:(-)), out::NDArray{O}, rhs1::NDArray{Bool}, rhs2::NDArray{Bool} ) where {O<:Integer} assertpromotion(".-", Bool, O) - return nda_binary_op( + return nda_binary_op!( out, cuNumeric.SUBTRACT, unchecked_promote_arr(rhs1, O), unchecked_promote_arr(rhs2, O) ) end @@ -250,7 +250,7 @@ end @inline function __broadcast( f::typeof(Base.literal_pow), out::NDArray, _, input::NDArray{T}, power::NDArray{T} ) where {T} - return nda_binary_op(out, cuNumeric.POWER, input, power) + return nda_binary_op!(out, cuNumeric.POWER, input, power) end # This is more "Julian" since a user expects map to broadcast diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index 5ecc10ea..7d0c8c2b 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -28,21 +28,19 @@ get_n_dim(ptr::NDArray_t) = Int(ccall((:nda_array_dim, libnda), Int32, (NDArray_ abstract type AbstractNDArray{T<:SUPPORTED_TYPES,N} end @doc""" -**Internal API** - The NDArray type represents a multi-dimensional array in cuNumeric. It is a wrapper around a Legate array and provides various methods for array manipulation and operations. Finalizer calls `nda_destroy_array` to clean up the underlying Legate array when the NDArray is garbage collected. """ -mutable struct NDArray{T,N} <: AbstractNDArray{T,N} +mutable struct NDArray{T, N, PADDED} <: AbstractNDArray{T,N} ptr::NDArray_t nbytes::Int64 - padding::Union{Nothing,NTuple{N,Int}} where {N} + padding::Union{Nothing,NTuple{N,Int}} - function NDArray(ptr::NDArray_t; T=get_julia_type(ptr), n_dim=get_n_dim(ptr)) + function NDArray(ptr::NDArray_t, ::Type{T}, ::Val{N}) where {T, N} nbytes = cuNumeric.nda_nbytes(ptr) cuNumeric.register_alloc!(nbytes) - handle = new{T,Int(n_dim)}(ptr, nbytes, nothing) + handle = new{T,N, false}(ptr, nbytes, nothing) finalizer(handle) do h cuNumeric.nda_destroy_array(h.ptr) cuNumeric.register_free!(h.nbytes) @@ -51,6 +49,9 @@ mutable struct NDArray{T,N} <: AbstractNDArray{T,N} end end +# Dynamic fallback, not great but required if we cannot infer things +NDArray(ptr::NDArray_t; T = get_julia_type(ptr), N::Integer = get_n_dim(ptr)) = NDArray(ptr, T, Val(N)) + # struct WrappedNDArray{T,N} <: AbstractNDArray{T,N} # ndarr::NDArray{T,N} # jlarr::Array{T,N} @@ -77,28 +78,28 @@ end # return NDArray(ptr, T = T, n_dim = 1) # end -NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array(UInt64[], value) +NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array((), value) # construction -function nda_zeros_array(shape::Vector{UInt64}, ::Type{T}) where {T} - n_dim = Int32(length(shape)) +function nda_zeros_array(dims::Dims{N}, ::Type{T}) where {T, N} + shape = collect(UInt64, dims) legate_type = Legate.to_legate_type(T) ptr = ccall((:nda_zeros_array, libnda), NDArray_t, (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated), - n_dim, shape, legate_type) - return NDArray(ptr; T=T, n_dim=n_dim) + Int32(N), shape, legate_type) + return NDArray(ptr, T, Val(N)) end -function nda_full_array(shape::Vector{UInt64}, value::T) where {T} - n_dim = Int32(length(shape)) +function nda_full_array(dims::Dims{N}, value::T) where {T, N} + shape = collect(UInt64, dims) type = Legate.to_legate_type(T) ptr = ccall((:nda_full_array, libnda), NDArray_t, (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated, Ptr{Cvoid}), - n_dim, shape, type, Ref(value)) + Int32(N), shape, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=n_dim) + return NDArray(ptr, T, Val(N)) end function nda_random(arr::NDArray, gen_code) @@ -107,19 +108,19 @@ function nda_random(arr::NDArray, gen_code) arr.ptr, Int32(gen_code)) end -function nda_random_array(shape::Vector{UInt64}) - n_dim = Int32(length(shape)) +function nda_random_array(dims::Dims{N}) where {N} + shape = collect(UInt64, dims) ptr = ccall((:nda_random_array, libnda), NDArray_t, (Int32, Ptr{UInt64}), - n_dim, shape) - return NDArray(ptr; n_dim=n_dim) + Int32(N), shape) + return NDArray(ptr, Float64, Val(N)) #* T is always Float64 cause of cupynumeric end function nda_get_slice(arr::NDArray{T,N}, slices::Vector{Slice}) where {T,N} ptr = ccall((:nda_get_slice, libnda), NDArray_t, (NDArray_t, Ptr{Slice}, Cint), arr.ptr, pointer(slices), length(slices)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end # queries @@ -142,12 +143,12 @@ function nda_array_shape(arr::NDArray) end # modify -function nda_reshape_array(arr::NDArray{T}, newshape::Vector{UInt64}) where {T} - n_dim = Int32(length(newshape)) +function nda_reshape_array(arr::NDArray{T}, newdims::Dims{N}) where {T, N} + newshape = collect(UInt64, newdims) ptr = ccall((:nda_reshape_array, libnda), NDArray_t, (NDArray_t, Int32, Ptr{UInt64}), - arr.ptr, n_dim, newshape) - return NDArray(ptr; T=T, n_dim=n_dim) + arr.ptr, Int32(N), newshape) + return NDArray(ptr, T, Val(N)) end function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N} @@ -156,7 +157,7 @@ function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N} NDArray_t, (NDArray_t, Legate.LegateTypeAllocated), arr.ptr, type) - return NDArray(ptr; T=NEW_T, n_dim=N) + return NDArray(ptr, NEW_T, Val(N)) end function nda_fill_array(arr::NDArray{T}, value::T) where {T} @@ -193,14 +194,14 @@ function nda_move(dst::NDArray{T,N}, src::NDArray{T,N}) where {T,N} end # operations -function nda_binary_op(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray) +function nda_binary_op!(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray) ccall((:nda_binary_op, libnda), Cvoid, (NDArray_t, BinaryOpCode, NDArray_t, NDArray_t), out.ptr, op_code, rhs1.ptr, rhs2.ptr) return out end -function nda_unary_op(out::NDArray, op_code::UnaryOpCode, input::NDArray) +function nda_unary_op!(out::NDArray, op_code::UnaryOpCode, input::NDArray) ccall((:nda_unary_op, libnda), Cvoid, (NDArray_t, UnaryOpCode, NDArray_t), out.ptr, op_code, input.ptr) @@ -218,7 +219,7 @@ function nda_array_equal(rhs1::NDArray{T,N}, rhs2::NDArray{T,N}) where {T,N} ptr = ccall((:nda_array_equal, libnda), NDArray_t, (NDArray_t, NDArray_t), rhs1.ptr, rhs2.ptr) - return NDArray(ptr; T=Bool, n_dim=1) + return NDArray(ptr, Bool, Val(1)) end function nda_diag(arr::NDArray, k::Int32) @@ -255,7 +256,7 @@ function nda_multiply_scalar(rhs1::NDArray{T,N}, value::T) where {T,N} ptr = ccall((:nda_multiply_scalar, libnda), NDArray_t, (NDArray_t, Legate.LegateTypeAllocated, Ptr{Cvoid}), rhs1.ptr, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end function nda_add_scalar(rhs1::NDArray{T,N}, value::T) where {T,N} @@ -264,7 +265,7 @@ function nda_add_scalar(rhs1::NDArray{T,N}, value::T) where {T,N} ptr = ccall((:nda_add_scalar, libnda), NDArray_t, (NDArray_t, Legate.LegateTypeAllocated, Ptr{Cvoid}), rhs1.ptr, type, Ref(value)) - return NDArray(ptr; T=T, n_dim=N) + return NDArray(ptr, T, Val(N)) end function nda_three_dot_arg(rhs1::NDArray{T}, rhs2::NDArray{T}, out::NDArray{T}) where {T} @@ -286,7 +287,7 @@ function nda_eye(rows::Int32, ::Type{T}) where {T} ptr = ccall((:nda_eye, libnda), NDArray_t, (Int32, Legate.LegateTypeAllocated), rows, legate_type) - return NDArray(ptr; T=T, n_dim=2) + return NDArray(ptr, T, Val(2)) end function nda_trace( @@ -297,7 +298,7 @@ function nda_trace( NDArray_t, (NDArray_t, Int32, Int32, Int32, Legate.LegateTypeAllocated), arr.ptr, offset, a1, a2, legate_type) - return NDArray(ptr; T=T, n_dim=1) + return NDArray(ptr, T, Val(1)) end function nda_transpose(arr::NDArray) @@ -317,7 +318,7 @@ function nda_attach_external(arr::AbstractArray{T,N}) where {T,N} NDArray_t, (Ptr{Cvoid}, UInt64, Int32, Ptr{UInt64}, Legate.LegateTypeAllocated), ptr, nbytes, N, shape, legate_type) - return NDArray(nda_ptr; T=T, n_dim=N) + return NDArray(nda_ptr, T, Val(N)) end # return underlying logical store to the NDArray obj @@ -401,14 +402,6 @@ function slice_array(slices::Vararg{Tuple{Union{Int,Nothing},Union{Int,Nothing}} return v end -@doc""" - padded_shape(arr::NDArray) - -**Internal API** - -Return the size of the given `NDArray`. This will include the padded size. -""" -padded_shape(arr::NDArray) = Tuple(Int.(cuNumeric.nda_array_shape(arr))) @doc""" shape(arr::NDArray) @@ -417,11 +410,11 @@ padded_shape(arr::NDArray) = Tuple(Int.(cuNumeric.nda_array_shape(arr))) Return the size of the given `NDArray`. """ -function shape(arr::NDArray) - if !isnothing(arr.padding) - return arr.padding - end - return cuNumeric.padded_shape(arr) +shape(arr::NDArray{<:Any, N, true}) where N = arr.padding + +function shape(arr::NDArray{<:Any, N, false}) where {N} + shp = cuNumeric.nda_array_shape(arr) + return ntuple(i -> Int(shp[i]), Val(N)) end @doc""" diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index 02ec68e5..b703ceb9 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -220,7 +220,7 @@ size(arr) size(arr, 2) ``` """ -Base.size(arr::NDArray) = cuNumeric.shape(arr) +Base.size(arr::NDArray{<:Any, N}) where N = cuNumeric.shape(arr) Base.size(arr::NDArray, dim::Int) = Base.size(arr)[dim] @doc""" @@ -253,17 +253,21 @@ Base.IndexStyle(::NDArray) = IndexCartesian() function Base.show(io::IO, arr::NDArray{T,0}) where {T} println(io, "0-dimensional NDArray{$(T),0}") - print(io, arr[]) #! should I assert scalar?? + allowscalar() do + print(io, arr[]) + end end function Base.show(io::IO, ::MIME"text/plain", arr::NDArray{T,0}) where {T} println(io, "0-dimensional NDArray{$(T),0}") - print(io, arr[]) #! should I assert scalar?? + allowscalar() do + print(io, arr[]) + end end function Base.show(io::IO, arr::NDArray{T,D}) where {T,D} println(io, "NDArray{$(T),$(D)}") - Base.print_matrix(io, Array(arr)) + Base.print_array(io, Array(arr)) end function Base.show(io::IO, ::MIME"text/plain", arr::NDArray{T}) where {T} @@ -304,7 +308,7 @@ Assignment also supports: # Examples ```@repl -A = cuNumeric.full((3, 3), 1.0); +A = cuNumeric.fill(1.0, (3, 3)); A[1, 2] A[1:2, 2:3] = cuNumeric.ones(2, 2); A[:, 1] = 5.0; @@ -461,25 +465,27 @@ Base.fill!(arr::NDArray{T}, val::T) where {T} = nda_fill_array(arr, val) #### INITIALIZATION OF NDARRAYS #### @doc""" - cuNumeric.full(dims::Tuple, val) - cuNumeric.full(dim::Int, val) + cuNumeric.fill(val::T, dims::Dims) + cuNumeric.fill(val::T, dims::Int...) Create an `NDArray` filled with the scalar value `val`, with the shape specified by `dims`. # Examples ```@repl -cuNumeric.full((2, 3), 7.5) -cuNumeric.full(4, 0) +cuNumeric.fill(7.5, (2, 3)) +cuNumeric.fill(0, 4) ``` """ -function full(dims::Dims, val::T) where {T<:SUPPORTED_TYPES} - shape = collect(UInt64, dims) - return nda_full_array(shape, val) +function fill(val::T, dims::Dims) where {T<:SUPPORTED_TYPES} + return nda_full_array(dims, val) end -function full(dim::Int, val::T) where {T<:SUPPORTED_TYPES} - shape = UInt64[dim] - return nda_full_array(shape, val) +function fill(val::T, dims::Int...) where {T<:SUPPORTED_TYPES} + return fill(val, dims) +end + +function fill(val::T, dim::Int) where {T<:SUPPORTED_TYPES} + return fill(val, (dim,)) end @doc""" @@ -494,9 +500,9 @@ Create an `NDArray` filled with the true, with the shape specified by `dims`. cuNumeric.trues(2, 3) ``` """ -trues(dim::Int) = cuNumeric.full(dim, true) -trues(dims::Dims) = cuNumeric.full(dims, true) -trues(dims::Int...) = cuNumeric.full(dims, true) +trues(dim::Int) = cuNumeric.fill(true, dim) +trues(dims::Dims) = cuNumeric.fill(true, dims) +trues(dims::Int...) = cuNumeric.fill(true, dims) @doc""" cuNumeric.falses(dims::Tuple, val) @@ -510,9 +516,10 @@ Create an `NDArray` filled with the false, with the shape specified by `dims`. cuNumeric.falses(2, 3) ``` """ -falses(dim::Int) = cuNumeric.full(dim, false) -falses(dims::Dims) = cuNumeric.full(dims, false) -falses(dims::Int...) = cuNumeric.full(dims, false) +falses(dims::Dims) = cuNumeric.fill(false, dims) +falses(dims::Int...) = cuNumeric.fill(false, dims) +falses(dim::Int) = cuNumeric.fill(false, dim) + @doc""" cuNumeric.zeros([T=Float32,] dims::Int...) @@ -528,9 +535,8 @@ cuNumeric.zeros(Float64, 3) cuNumeric.zeros(Int32, (2,3)) ``` """ -function zeros(::Type{T}, dims::Dims) where {T<:SUPPORTED_TYPES} - shape = collect(UInt64, dims) - return nda_zeros_array(shape, T) +function zeros(::Type{T}, dims::Dims{N}) where {T<:SUPPORTED_TYPES, N} + return nda_zeros_array(dims, T) end function zeros(::Type{T}, dims::Int...) where {T<:SUPPORTED_TYPES} @@ -546,15 +552,15 @@ function zeros(dims::Int...) end function zeros(::Type{T}) where {T} - return nda_zeros_array(UInt64[], T) + return nda_zeros_array((), T) end function zeros() return zeros(DEFAULT_FLOAT) end -function zeros_like(arr::NDArray) - return zeros(eltype(arr), Base.size(arr)) +function zeros_like(arr::NDArray{T,N}) where {T,N} + return zeros(T, Base.size(arr)) end @doc""" @@ -572,7 +578,7 @@ cuNumeric.ones(Int32, (2, 3)) ``` """ function ones(::Type{T}, dims::Dims) where {T} - return full(dims, T(1)) + return nda_full_array(dims, T(1)) end function ones(::Type{T}, dims::Int...) where {T} @@ -588,11 +594,11 @@ function ones(dims::Int...) end function ones(::Type{T}) where {T} - return full((), T(1)) + return cuNumeric.fill(T(1), ()) end function ones() - return zeros(DEFAULT_FLOAT) + return ones(DEFAULT_FLOAT) end @doc""" @@ -617,14 +623,13 @@ A = cuNumeric.zeros(2, 2); cuNumeric.rand!(A) ``` """ Random.rand!(arr::NDArray{Float64}) = cuNumeric.nda_random(arr, 0) -rand(::Type{NDArray}, dims::Dims) = cuNumeric.nda_random_array(UInt64.(collect(dims))) +rand(::Type{NDArray}, dims::Dims) = cuNumeric.nda_random_array(dims) rand(::Type{NDArray}, dims::Int...) = cuNumeric.rand(NDArray, dims) rand(dims::Dims) = cuNumeric.rand(NDArray, dims) rand(dims::Int...) = cuNumeric.rand(NDArray, dims) function rand(::Type{T}, dims::Dims) where {T<:AbstractFloat} - arrfp64 = cuNumeric.nda_random_array(UInt64.(collect(dims))) - # if T == Float64, as_type should do minimial work # TODO check this. + arrfp64 = cuNumeric.nda_random_array(dims) return cuNumeric.as_type(arrfp64, T) end @@ -645,14 +650,15 @@ reshape(arr, 12) ``` """ +#*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG function reshape(arr::NDArray, i::Dims{N}; copy::Bool=false) where {N} - reshaped = nda_reshape_array(arr, UInt64.(collect(i))) + reshaped = nda_reshape_array(arr, i) return copy ? copy(reshaped) : reshaped end -function reshape(arr::NDArray, i::Int64; copy::Bool=false) - reshaped = nda_reshape_array(arr, UInt64.([i])) - return copy ? copy(reshaped) : reshaped +#*USNTABLE USE Val{false} IF WE REALLY WANT THIS FLAG +function reshape(arr::NDArray, i::Int...; copy::Bool=false) + return reshape(arr, i; copy = copy) end # Ignore the scalar indexing here... diff --git a/src/ndarray/unary.jl b/src/ndarray/unary.jl index 4158b010..3b12e73d 100644 --- a/src/ndarray/unary.jl +++ b/src/ndarray/unary.jl @@ -101,13 +101,13 @@ global const unary_op_map_no_args = Dict{Function,UnaryOpCode}( ### SPECIAL CASES ### # Needed to support != -Base.:(!)(input::NDArray{Bool,0}) = nda_unary_op(similar(input), cuNumeric.LOGICAL_NOT, input) -Base.:(!)(input::NDArray{Bool,1}) = nda_unary_op(similar(input), cuNumeric.LOGICAL_NOT, input) +Base.:(!)(input::NDArray{Bool,0}) = nda_unary_op!(similar(input), cuNumeric.LOGICAL_NOT, input) +Base.:(!)(input::NDArray{Bool,1}) = nda_unary_op!(similar(input), cuNumeric.LOGICAL_NOT, input) # Non-broadcasted version of negation function Base.:(-)(input::NDArray{T}) where {T} out = cuNumeric.zeros(T, size(input)) - return nda_unary_op(out, cuNumeric.NEGATIVE, input) + return nda_unary_op!(out, cuNumeric.NEGATIVE, input) end function Base.:(-)(input::NDArray{Bool}) @@ -121,7 +121,7 @@ end @inline function __broadcast( f::typeof(Base.literal_pow), out::NDArray{O}, _, input::NDArray{T}, ::Type{Val{2}} ) where {T,O} - return nda_unary_op(out, cuNumeric.SQUARE, input) + return nda_unary_op!(out, cuNumeric.SQUARE, input) end @inline function __broadcast( @@ -129,13 +129,13 @@ end ) where {O} nda_move(out, O(1) ./ checked_promote_arr(input, O)) #! REPLACE WITH RECIP ONCE FIXED return out - # return nda_unary_op(out, cuNumeric.RECIPROCAL, input) + # return nda_unary_op!(out, cuNumeric.RECIPROCAL, input) end @inline function __broadcast(::typeof(Base.inv), out::NDArray{O}, input::NDArray) where {O} nda_move(out, O(1) ./ checked_promote_arr(input, O)) #! REPLACE WITH RECIP ONCE FIXED return out - # return nda_unary_op(out, cuNumeric.RECIPROCAL, checked_promote_arr(input,O)) + # return nda_unary_op!(out, cuNumeric.RECIPROCAL, checked_promote_arr(input,O)) end #! NEEDS TO SUPPORT inv and ^ -1 @@ -150,7 +150,7 @@ end # Only supported for Bools @inline function __broadcast(f::typeof(Base.:(!)), out::NDArray{Bool}, input::NDArray{Bool}) - return nda_unary_op(out, cuNumeric.LOGICAL_NOT, input) + return nda_unary_op!(out, cuNumeric.LOGICAL_NOT, input) end # Generate hidden broadcasted version of unary ops. @@ -159,7 +159,7 @@ for (julia_fn, op_code) in unary_op_map_no_args @inline function __broadcast( f::typeof($julia_fn), out::NDArray{T}, input::NDArray{T} ) where {T} - return nda_unary_op(out, $(op_code), input) + return nda_unary_op!(out, $(op_code), input) end end end @@ -172,7 +172,7 @@ for (julia_fn, op_code) in floaty_unary_ops_no_args @inline function __broadcast( f::typeof($julia_fn), out::NDArray{T}, input::NDArray{T} ) where {T} - return nda_unary_op(out, $(op_code), input) + return nda_unary_op!(out, $(op_code), input) end # If input is not already float, promote to that diff --git a/test/runtests.jl b/test/runtests.jl index 60137c4c..4df77506 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -380,6 +380,10 @@ end end end +@testset verbose = true "Type Stability" begin + include("tests/stability.jl") +end + @testset verbose = true "Scoping" begin N = 100 diff --git a/test/tests/stability.jl b/test/tests/stability.jl new file mode 100644 index 00000000..d097454a --- /dev/null +++ b/test/tests/stability.jl @@ -0,0 +1,77 @@ +@testset verbose = true "core" begin + a = cuNumeric.zeros(5) + b = cuNumeric.zeros(Float64, 3, 4) + @inferred size(a) + @inferred size(b) + @inferred cuNumeric.shape(a) + @inferred cuNumeric.shape(b) +end + +@testset verbose = true "construction" begin + # zeros, zeros_like, ones, rand, fill, trues, falses\ + for constructor in (:zeros, :ones) + @eval begin + @inferred cuNumeric.$(constructor)(Float64, 3, 2) + @inferred cuNumeric.$(constructor)(Float64, (3, 4)) + @inferred cuNumeric.$(constructor)(3, 5, 6) + @inferred cuNumeric.$(constructor)((3,)) + @inferred cuNumeric.$(constructor)() + @inferred cuNumeric.$(constructor)(Int64) + end + end + a = cuNumeric.zeros(Float64, 5, 3) + @inferred cuNumeric.zeros_like(a) + + for constructor in (:trues, :falses) + @eval begin + @inferred cuNumeric.$(constructor)(5) + @inferred cuNumeric.$(constructor)((5,4)) + @inferred cuNumeric.$(constructor)(3, 4, 5) + end + end + + @inferred cuNumeric.fill(2.0, 3, 4) + @inferred cuNumeric.fill(2, (3, 4)) + @inferred cuNumeric.fill(2.0, 3) + + @inferred cuNumeric.rand(4, 3) + @inferred cuNumeric.rand(Float32, 5) +end + +@testset verbose = true "conversion" begin + # cast to array, as_type + a = cuNumeric.zeros(Float64, 5, 5) + @inferred Array(a) + @inferred Array{Float32}(a) + @inferred cuNumeric.as_type(a, Float32) + @inferred cuNumeric.as_type(a, Int64) +end + +@testset verbose = true "indexing" begin + # getindex, setindex!, copy, copyto!, fill!, as_type + a = cuNumeric.zeros(Float32, 5, 5) + b = cuNumeric.zeros(Int32, 11) + + @inferred a[1:3, 1:3] + @inferred a[2, 1:3] + @inferred a[1, 1:3] .+ b[1:3] + @inferred b[1:5] + # @inferred a[1:3, 1:end] + allowscalar() do + @inferred a[1, 2] + end +end + +@testset verbose = true "broadcasting" begin + a = cuNumeric.ones(Float32, 3, 3) + b = cuNumeric.ones(Int32, 3, 3) + @inferred 5 .* a + @inferred 5.0f0 .* a + @inferred 5 * a + @inferred 5.0f0 * a + + @inferred a .* b + @inferred a .+ b + @inferred a ./ b + @inferred ((a .* b) .+ a) .* 2.0f0 +end \ No newline at end of file