diff --git a/Project.toml b/Project.toml index c237d42a..de530589 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" [weakdeps] @@ -39,6 +40,7 @@ MacroTools = "0.4, 0.5" Optim = "0.19, 1" PrecompileTools = "1" Reexport = "1" +StructArrays = "0.7.1" SymbolicUtils = "0.19, ^1.0.5, 2, 3" Zygote = "0.7" julia = "1.10" diff --git a/src/ArrayNode.jl b/src/ArrayNode.jl new file mode 100644 index 00000000..2195a4e1 --- /dev/null +++ b/src/ArrayNode.jl @@ -0,0 +1,457 @@ +module ArrayNodeModule + +using ..NodeModule: AbstractExpressionNode, Nullable +using ..UtilsModule: Undefined +using StructArrays: StructArray, StructVector + +import ..NodeModule: + constructorof, + with_type_parameters, + with_max_degree, + default_allocator, + get_children, + set_children!, + unsafe_get_children, + copy_node + +export ArrayNode + +# Node data struct +struct NodeData{T,D} + degree::UInt8 + constant::Bool + val::T + feature::UInt16 + op::UInt8 + children::NTuple{D,UInt16} +end + +# Constructor for empty node +function NodeData{T,D}() where {T,D} + return NodeData{T,D}( + UInt8(0), true, zero(T), UInt16(0), UInt8(0), ntuple(_ -> UInt16(0), Val(D)) + ) +end + +mutable struct ArrayTree{T,D,S<:StructVector{NodeData{T,D}}} + const nodes::S + root_idx::UInt16 + n_nodes::UInt16 + const free_list::Vector{UInt16} + free_count::UInt16 + + function ArrayTree{T,D}(n::Int; array_type::Type{<:AbstractVector}=Vector) where {T,D} + # Create uninitialized StructVector directly + # For custom array types, we'd need to pass them to StructVector somehow + # For now, just use the default + nodes = if array_type === Vector + StructVector{NodeData{T,D}}(undef, n) + else + # For other array types, create backing arrays manually + degree = array_type{UInt8}(undef, n) + constant = array_type{Bool}(undef, n) + val = array_type{T}(undef, n) + feature = array_type{UInt16}(undef, n) + op = array_type{UInt8}(undef, n) + children = array_type{NTuple{D,UInt16}}(undef, n) + StructVector{NodeData{T,D}}(( + degree=degree, + constant=constant, + val=val, + feature=feature, + op=op, + children=children, + )) + end + + S = typeof(nodes) + free_list = Vector{UInt16}(undef, n) + tree = new{T,D,S}(nodes, UInt16(0), UInt16(0), free_list, UInt16(n)) + # Initialize free list in-place + @inbounds @simd for i in 1:n + tree.free_list[i] = UInt16(i) + end + return tree + end +end + +struct ArrayNode{T,D,S} <: AbstractExpressionNode{T,D} + tree::ArrayTree{T,D,S} + idx::UInt16 +end + +@inline function Base.getproperty(n::ArrayNode{T,D,S}, k::Symbol) where {T,D,S} + tree = getfield(n, :tree) + idx = getfield(n, :idx) + nodes = getfield(tree, :nodes) + + if k == :tree + return tree + elseif k == :idx + return idx + elseif k == :degree + return @inbounds nodes.degree[idx] + elseif k == :constant + return @inbounds nodes.constant[idx] + elseif k == :val + return @inbounds nodes.val[idx] + elseif k == :feature + return @inbounds nodes.feature[idx] + elseif k == :op + return @inbounds nodes.op[idx] + elseif k == :children + # Return tuple of child ArrayNodes wrapped in Nullable + return ntuple(Val(D)) do i + child_idx = @inbounds nodes.children[idx][i] + if child_idx == 0 + Nullable(true, n) # Poison node + else + Nullable(false, ArrayNode{T,D,S}(tree, child_idx)) + end + end + elseif k == :l + child_idx = @inbounds nodes.children[idx][1] + child_idx == 0 && error("No left child") + return ArrayNode{T,D,S}(tree, child_idx) + elseif k == :r + child_idx = @inbounds nodes.children[idx][2] + child_idx == 0 && error("No right child") + return ArrayNode{T,D,S}(tree, child_idx) + else + error("Unknown field $k") + end +end + +@inline function Base.setproperty!(n::ArrayNode{T,D,S}, k::Symbol, v) where {T,D,S} + tree = getfield(n, :tree) + idx = getfield(n, :idx) + nodes = getfield(tree, :nodes) + + if k == :degree + @inbounds nodes.degree[idx] = v + elseif k == :constant + @inbounds nodes.constant[idx] = v + elseif k == :val + @inbounds nodes.val[idx] = v + elseif k == :feature + @inbounds nodes.feature[idx] = v + elseif k == :op + @inbounds nodes.op[idx] = v + elseif k == :l + !isa(v, ArrayNode) && error("Cannot set left child to non-ArrayNode") + children = nodes.children[idx] + @inbounds nodes.children[idx] = (getfield(v, :idx), children[2:end]...) + elseif k == :r + !isa(v, ArrayNode) && error("Cannot set right child to non-ArrayNode") + children = nodes.children[idx] + @inbounds nodes.children[idx] = (children[1], getfield(v, :idx), children[3:end]...) + else + error("Cannot set field $k") + end + return v +end + +# Allocation management +@inline function allocate_node!(tree::ArrayTree) + tree.free_count == 0 && error("ArrayTree full") + idx = tree.free_list[tree.free_count] + tree.free_count -= 1 + tree.n_nodes += 1 + return idx +end + +@inline function free_node!(tree::ArrayTree, idx::UInt16) + tree.free_count += 1 + tree.free_list[tree.free_count] = idx + return tree.n_nodes -= 1 +end + +# Default constructors - now include array type parameters +function ArrayNode{T,D}(n::Int; array_type::Type{<:AbstractVector}=Vector) where {T,D} + return ArrayNode{T,D}(Undefined; allocator=ArrayTree{T,D}(n; array_type=array_type)) +end +function ArrayNode{T}(n::Int; array_type::Type{<:AbstractVector}=Vector) where {T} + return ArrayNode{T,2}(n; array_type=array_type) +end + +# Keyword constructors for partial type signatures +ArrayNode{T,D}(; kwargs...) where {T,D} = ArrayNode{T,D}(Undefined; kwargs...) +ArrayNode{T,D,S}(; kwargs...) where {T,D,S} = ArrayNode{T,D}(Undefined; kwargs...) +ArrayNode{T}(; kwargs...) where {T} = ArrayNode{T,2}(; kwargs...) + +# Constructor with keyword arguments - matches Node interface +function ArrayNode{T,D}( + ::Type{T1}; + val=nothing, + feature=nothing, + op=nothing, + l=nothing, + r=nothing, + children=nothing, + allocator=nothing, +) where {T,D,T1} + # Determine tree source + tree = if !isnothing(allocator) && isa(allocator, ArrayTree) + allocator + else + # Default size of 64 nodes for small expressions + # This is wasteful if building incrementally, but matches Node semantics + ArrayTree{T,D}(64) + end + + idx = allocate_node!(tree) + # Only set root_idx if this tree is new (no nodes allocated yet except this one) + if tree.n_nodes == 1 + tree.root_idx = idx + end + + if !isnothing(val) + tree.nodes.degree[idx] = 0 + tree.nodes.constant[idx] = true + tree.nodes.val[idx] = val + # Clear children for leaf node + tree.nodes.children[idx] = ntuple(_ -> UInt16(0), Val(D)) + return ArrayNode{T,D,typeof(tree.nodes)}(tree, idx) + end + + if !isnothing(feature) + tree.nodes.degree[idx] = 0 + tree.nodes.constant[idx] = false + tree.nodes.feature[idx] = feature + # Clear children for leaf node + tree.nodes.children[idx] = ntuple(_ -> UInt16(0), Val(D)) + return ArrayNode{T,D,typeof(tree.nodes)}(tree, idx) + end + + if !isnothing(op) + _children = if !isnothing(l) && isnothing(r) + (l,) + elseif !isnothing(l) && !isnothing(r) + (l, r) + else + children + end + + if !isnothing(_children) + degree = length(_children) + tree.nodes.degree[idx] = degree + tree.nodes.op[idx] = op + + # Copy children into this tree + child_indices = ntuple( + i -> begin + if i <= length(_children) + child = _children[i] + if isa(child, ArrayNode) + child_tree = getfield(child, :tree) + child_idx = getfield(child, :idx) + if child_tree === tree + # Same tree - just link + child_idx + else + # Different tree - copy + copy_subtree!(tree, child_tree, child_idx) + end + else + UInt16(0) + end + else + UInt16(0) + end + end, + Val(D), + ) + tree.nodes.children[idx] = child_indices + + return ArrayNode{T,D,typeof(tree.nodes)}(tree, idx) + end + end + + # Default: empty constant + tree.nodes.degree[idx] = 0 + tree.nodes.constant[idx] = true + tree.nodes.val[idx] = zero(T) + tree.nodes.children[idx] = ntuple(_ -> UInt16(0), Val(D)) + return ArrayNode{T,D,typeof(tree.nodes)}(tree, idx) +end + +function copy_subtree!( + dst::ArrayTree{T,D}, src::ArrayTree{T,D}, src_idx::UInt16 +) where {T,D} + dst_idx = allocate_node!(dst) + + @inbounds begin + dst.nodes.degree[dst_idx] = src.nodes.degree[src_idx] + dst.nodes.constant[dst_idx] = src.nodes.constant[src_idx] + dst.nodes.val[dst_idx] = src.nodes.val[src_idx] + dst.nodes.feature[dst_idx] = src.nodes.feature[src_idx] + dst.nodes.op[dst_idx] = src.nodes.op[src_idx] + end + + degree = @inbounds src.nodes.degree[src_idx] + child_indices = ntuple( + i -> begin + if i <= degree + child_idx = @inbounds src.nodes.children[src_idx][i] + if child_idx > 0 + copy_subtree!(dst, src, child_idx) + else + UInt16(0) + end + else + UInt16(0) + end + end, Val(D) + ) + dst.nodes.children[dst_idx] = child_indices + + return dst_idx +end + +constructorof(::Type{<:ArrayNode}) = ArrayNode +with_type_parameters(::Type{<:ArrayNode}, ::Type{T}) where {T} = ArrayNode{T,2} +with_max_degree(::Type{<:ArrayNode{T,D}}, ::Val{D2}) where {T,D,D2} = ArrayNode{T,D2} +function default_allocator( + ::Type{ArrayNode{T,D}}; array_type::Type{<:AbstractVector}=Vector +) where {T,D} + return ArrayTree{T,D}(32; array_type=array_type) +end + +# get_children and set_children! +function unsafe_get_children(n::ArrayNode{T,D,S}) where {T,D,S} + tree = getfield(n, :tree) + idx = getfield(n, :idx) + return ntuple(i -> begin + child_idx = @inbounds tree.nodes.children[idx][i] + if child_idx == 0 + Nullable(true, n) + else + Nullable(false, ArrayNode{T,D,S}(tree, child_idx)) + end + end, Val(D)) +end + +function get_children(n::ArrayNode{T,D,S}, ::Val{d}) where {T,D,S,d} + tree = getfield(n, :tree) + idx = getfield(n, :idx) + return ntuple(i -> begin + child_idx = @inbounds tree.nodes.children[idx][i] + ArrayNode{T,D,S}(tree, child_idx) + end, Val(Int(d))) +end + +function set_children!(n::ArrayNode{T,D,S}, cs::Tuple) where {T,D,S} + tree = getfield(n, :tree) + idx = getfield(n, :idx) + child_indices = ntuple(Val(D)) do i + if i <= length(cs) + child = cs[i] + if isa(child, Nullable) + # Handle Nullable wrapped children + if child.null + UInt16(0) + else + child_node = child.x + child_tree = getfield(child_node, :tree) + child_idx = getfield(child_node, :idx) + if child_tree === tree + # Same tree - just use the index + child_idx + else + # Different tree - need to copy the subtree + copy_subtree!(tree, child_tree, child_idx) + end + end + elseif isa(child, ArrayNode) + child_tree = getfield(child, :tree) + child_idx = getfield(child, :idx) + if child_tree === tree + # Same tree - just use the index + child_idx + else + # Different tree - need to copy the subtree + copy_subtree!(tree, child_tree, child_idx) + end + else + UInt16(0) + end + else + UInt16(0) + end + end + tree.nodes.children[idx] = child_indices + return nothing +end + +# Helper to mark nodes as reachable from a given root +function mark_reachable!( + reachable::Vector{Bool}, tree::ArrayTree{T,D}, idx::UInt16 +) where {T,D} + if idx == 0 || reachable[idx] + return nothing + end + reachable[idx] = true + degree = @inbounds tree.nodes.degree[idx] + for i in 1:degree + child_idx = @inbounds tree.nodes.children[idx][i] + if child_idx != 0 + mark_reachable!(reachable, tree, child_idx) + end + end +end + +# Copy +# Note: break_sharing parameter is ignored since ArrayNode doesn't preserve sharing +function copy_node(n::ArrayNode{T,D,S}; break_sharing::Val{BS}=Val(false)) where {T,D,S,BS} + # BS parameter unused - ArrayNode always breaks sharing since each node owns its tree + tree = getfield(n, :tree) + idx = getfield(n, :idx) + n_capacity = length(tree.nodes) + + # Create new tree with same capacity + new_tree = if tree.nodes.degree isa Vector + ArrayTree{T,D}(n_capacity; array_type=Vector) + else + ArrayTree{T,D}(n_capacity) + end + + # Direct array copy - works for both full tree and subtree + new_tree.nodes.degree[:] = tree.nodes.degree + new_tree.nodes.constant[:] = tree.nodes.constant + new_tree.nodes.val[:] = tree.nodes.val + new_tree.nodes.feature[:] = tree.nodes.feature + new_tree.nodes.op[:] = tree.nodes.op + new_tree.nodes.children[:] = tree.nodes.children + + # Set the root to our copied node + new_tree.root_idx = idx + + if idx == tree.root_idx + # Full tree copy - just copy all metadata + new_tree.n_nodes = tree.n_nodes + new_tree.free_count = tree.free_count + new_tree.free_list[:] = tree.free_list + else + # Subtree copy - need to update free list to exclude unreachable nodes + reachable = fill(false, n_capacity) + mark_reachable!(reachable, new_tree, idx) + + # Reset free list with unreachable nodes + new_tree.free_count = 0 + new_tree.n_nodes = 0 + for i in 1:n_capacity + if !reachable[i] + new_tree.free_count += 1 + new_tree.free_list[new_tree.free_count] = UInt16(i) + else + new_tree.n_nodes += 1 + end + end + end + + return ArrayNode{T,D,S}(new_tree, new_tree.root_idx) +end + +Base.copy(n::ArrayNode) = copy_node(n) + +end # module diff --git a/src/DynamicExpressions.jl b/src/DynamicExpressions.jl index 355e7b98..bc1a1b77 100644 --- a/src/DynamicExpressions.jl +++ b/src/DynamicExpressions.jl @@ -8,6 +8,7 @@ using DispatchDoctor: @stable, @unstable include("ExtensionInterface.jl") include("OperatorEnum.jl") include("Node.jl") + include("ArrayNode.jl") include("NodeUtils.jl") include("NodePreallocation.jl") include("Strings.jl") @@ -50,6 +51,7 @@ import .ValueInterfaceModule: tree_mapreduce, filter_map, filter_map! +import .ArrayNodeModule: ArrayNode import .NodePreallocationModule: allocate_container, copy_into! import .NodeModule: constructorof, diff --git a/test/Project.toml b/test/Project.toml index 61ff11c1..4aacc196 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,10 +1,12 @@ [deps] +AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DispatchDoctor = "8d63f2c5-f18a-4cf2-ba9d-b3f60fc568c8" +FixedSizeArrays = "3821ddf9-e5b5-40d5-8e25-6813ab96b5e2" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Interfaces = "85a1e053-f937-4924-92a5-1367d23b7b87" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" diff --git a/test/test_array_node.jl b/test/test_array_node.jl new file mode 100644 index 00000000..6a734bcd --- /dev/null +++ b/test/test_array_node.jl @@ -0,0 +1,530 @@ +@testitem "ArrayNode interface with Vector" begin + using DynamicExpressions + using DynamicExpressions: NodeInterface + using Interfaces: Interfaces + const ArrayNode = DynamicExpressions.ArrayNode + + # Test with regular Vector + x1 = ArrayNode{Float64,2,Vector}(; feature=1) + x2 = ArrayNode{Float64,2,Vector}(; feature=2) + + operators = OperatorEnum(; binary_operators=[+, *], unary_operators=[sin]) + + # Create test trees matching the pattern in test_node_interface.jl + tree_branch_deg2 = ArrayNode{Float64,2,Vector}(; + op=1, + l=x1, + r=ArrayNode{Float64,2,Vector}(; + op=1, + l=ArrayNode{Float64,2,Vector}(; + op=2, l=x2, r=ArrayNode{Float64,2,Vector}(; val=3.5) + ), + ), + ) # x1 + sin(x2 * 3.5) + + tree_branch_deg1 = ArrayNode{Float64,2,Vector}(; op=1, l=x1) # sin(x1) + tree_leaf_feature = x1 + tree_leaf_constant = ArrayNode{Float64,2,Vector}(; val=1.0) + + @test Interfaces.test( + NodeInterface, + ArrayNode, + [tree_branch_deg2, tree_branch_deg1, tree_leaf_feature, tree_leaf_constant], + ) +end + +@testitem "ArrayNode with custom array type" begin + using DynamicExpressions + using DynamicExpressions: NodeInterface + using Interfaces: Interfaces + const ArrayNode = DynamicExpressions.ArrayNode + + # Test that ArrayNode works with any AbstractVector type + # For production use with FixedSizeArrays, you'd need a wrapper + # that handles mixed element types properly + + x1 = ArrayNode{Float64,2,Vector}(; feature=1) + x2 = ArrayNode{Float64,2,Vector}(; feature=2) + + operators = OperatorEnum(; binary_operators=[+, *], unary_operators=[sin]) + + # Create test trees + tree_branch_deg2 = ArrayNode{Float64,2,Vector}(; + op=1, + l=x1, + r=ArrayNode{Float64,2,Vector}(; + op=1, + l=ArrayNode{Float64,2,Vector}(; + op=2, l=x2, r=ArrayNode{Float64,2,Vector}(; val=3.5) + ), + ), + ) + + tree_branch_deg1 = ArrayNode{Float64,2,Vector}(; op=1, l=x1) + tree_leaf_feature = x1 + tree_leaf_constant = ArrayNode{Float64,2,Vector}(; val=1.0) + + @test Interfaces.test( + NodeInterface, + ArrayNode, + [tree_branch_deg2, tree_branch_deg1, tree_leaf_feature, tree_leaf_constant], + ) +end + +@testitem "ArrayNode interface on n-arity nodes" begin + using DynamicExpressions + using DynamicExpressions: NodeInterface + using Interfaces: Interfaces + const ArrayNode = DynamicExpressions.ArrayNode + + for D in (3, 4, 5) + # Test with regular arrays + x = [ArrayNode{Float64,D,Vector}(; feature=i) for i in 1:3] + operator_tuple = ((sin, cos, exp), (+, *, /, -), (fma, clamp), (max, min), ()) + # Create pairs for degrees 1 through D + pairs = [i => operator_tuple[i] for i in 1:D if !isempty(operator_tuple[i])] + operators = + isempty(pairs) ? OperatorEnum(1 => ()) : OperatorEnum(pairs[1], pairs[2:end]...) + DynamicExpressions.OperatorEnumConstructionModule.empty_all_globals!() + + let tree = ArrayNode{Float64,D,Vector}(; op=2, children=(x[1], x[2])) # * + if D > 2 + fma_idx = 1 + tree = ArrayNode{Float64,D,Vector}(; + op=fma_idx, children=(tree, x[1], x[2]) + ) # fma + end + if D > 3 + idx_max = 1 + tree = ArrayNode{Float64,D,Vector}(; + op=idx_max, children=(tree, x[1], x[2], x[3]) + ) # max + end + @test Interfaces.test(NodeInterface, ArrayNode, [tree]) + end + end +end + +@testitem "ArrayNode basic operations" begin + using DynamicExpressions + using DynamicExpressions: OperatorEnum + using AllocCheck: @check_allocs + const ArrayNode = DynamicExpressions.ArrayNode + + # Test with regular arrays (default) + x1 = ArrayNode{Float64,2}(; feature=1) + x2 = ArrayNode{Float64,2}(; feature=2) + c = ArrayNode{Float64,2}(; val=3.5) + + # Test basic properties + @test x1.degree == 0 + @test x1.feature == 1 + @test !x1.constant + + @test c.degree == 0 + @test c.val == 3.5 + @test c.constant + + # Test tree construction + mul = ArrayNode{Float64,2}(; op=3, l=x2, r=c) + @test mul.degree == 2 + @test mul.op == 3 + + sin_expr = ArrayNode{Float64,2}(; op=1, l=mul) + @test sin_expr.degree == 1 + + tree = ArrayNode{Float64,2}(; op=1, l=x1, r=sin_expr) + @test tree.degree == 2 + + # Test copy + tree_copy = copy(tree) + @test tree == tree_copy + @test tree !== tree_copy + + # Test hash + @test hash(tree) == hash(tree_copy) + + # Test count_nodes + @test count_nodes(tree) == 6 # tree, x1, sin_expr, mul, x2, c + + # Test string conversion + operators = OperatorEnum(; binary_operators=[+, -, *, /], unary_operators=[sin, cos]) + str = string_tree(tree, operators) + @test str == "x1 + sin(x2 * 3.5)" + + # Test evaluation + X = [1.0 2.0; 0.5 1.0] # 2 features, 2 samples + result = eval_tree_array(tree, X, operators) + expected = X[1, :] .+ sin.(X[2, :] .* 3.5) + @test all(abs.(result[1] .- expected) .< 1e-10) +end + +@testitem "ArrayNode allocation tests" begin + using DynamicExpressions + using DynamicExpressions: OperatorEnum, eval_tree_array + using AllocCheck: @check_allocs + const ArrayNode = DynamicExpressions.ArrayNode + + # Create a tree with preallocated storage + allocator = DynamicExpressions.ArrayNodeModule.ArrayTree{Float64,2}(100) + x1 = ArrayNode{Float64,2}(; feature=1, allocator=allocator) + x2 = ArrayNode{Float64,2}(; feature=2, allocator=allocator) + c = ArrayNode{Float64,2}(; val=3.5, allocator=allocator) + + # Build tree using same allocator + mul = ArrayNode{Float64,2}(; op=3, l=x2, r=c, allocator=allocator) + sin_expr = ArrayNode{Float64,2}(; op=1, l=mul, allocator=allocator) + tree = ArrayNode{Float64,2}(; op=1, l=x1, r=sin_expr, allocator=allocator) + + operators = OperatorEnum(; binary_operators=[+, -, *, /], unary_operators=[sin, cos]) + X = [1.0 2.0; 0.5 1.0] # 2 features, 2 samples + + # Test that property access doesn't allocate + @check_allocs get_degree(n) = n.degree + @check_allocs get_val(n) = n.val + @check_allocs get_feature(n) = n.feature + @check_allocs get_op(n) = n.op + + get_degree(tree) + get_val(c) + get_feature(x1) + get_op(tree) + + # Test that count_nodes doesn't allocate (after warm-up) + @check_allocs count_nodes_test(t) = count_nodes(t) + count_nodes_test(tree) + + # Test that tree traversal doesn't allocate + function traverse_tree(n::ArrayNode) + sum = n.degree + if n.degree > 0 + children = DynamicExpressions.NodeModule.get_children(n, Val(Int(n.degree))) + for child in children + sum += traverse_tree(child) + end + end + return sum + end + + # Warm up + traverse_tree(tree) + @check_allocs traverse_tree(tree) = traverse_tree(tree) +end + +@testitem "ArrayNode with Expressions" begin + using DynamicExpressions + using DynamicExpressions: Expression + const ArrayNode = DynamicExpressions.ArrayNode + + # Create a simple tree with default arrays + x1 = ArrayNode{Float64,2}(; feature=1) + c = ArrayNode{Float64,2}(; val=2.0) + tree = ArrayNode{Float64,2}(; op=1, l=x1, r=c) + + operators = OperatorEnum(; binary_operators=[+, -, *, /]) + + # Test Expression conversion + expr = Expression(tree; operators=operators, variable_names=["x1", "x2"]) + @test string(expr) == "x1 + 2.0" + + # Test evaluation through Expression + X = [1.0 2.0 3.0] # 1 feature, 3 samples + result = expr(X) + expected = vec(X .+ 2.0) # Convert to vector to match result shape + @test all(abs.(result .- expected) .< 1e-10) +end + +@testitem "ArrayNode with FixedSizeArrays backing storage" begin + using DynamicExpressions + using DynamicExpressions: OperatorEnum + using FixedSizeArrays + using AllocCheck: @check_allocs + const ArrayNode = DynamicExpressions.ArrayNode + + # Create a FixedSizeVector type for our backing storage + # We'll use size 100 for this test + const N = 100 + + # Create an ArrayTree with FixedSizeVector backing + allocator = DynamicExpressions.ArrayNodeModule.ArrayTree{Float64,2}( + N; array_type=FixedSizeVector + ) + + # Test that the backing arrays are indeed FixedSizeArrays + @test allocator.nodes.degree isa FixedSizeArray{UInt8} + @test allocator.nodes.val isa FixedSizeArray{Float64} + @test allocator.nodes.feature isa FixedSizeArray{UInt16} + + # Create nodes using the FixedSizeVector-backed allocator + x1 = ArrayNode{Float64,2}(; feature=1, allocator=allocator) + x2 = ArrayNode{Float64,2}(; feature=2, allocator=allocator) + c = ArrayNode{Float64,2}(; val=3.5, allocator=allocator) + + # Build a tree + mul = ArrayNode{Float64,2}(; op=3, l=x2, r=c, allocator=allocator) + sin_expr = ArrayNode{Float64,2}(; op=1, l=mul, allocator=allocator) + tree = ArrayNode{Float64,2}(; op=1, l=x1, r=sin_expr, allocator=allocator) + + # Test basic operations + @test tree.degree == 2 + @test x1.feature == 1 + @test c.val == 3.5 + + # Test evaluation + operators = OperatorEnum(; binary_operators=[+, -, *, /], unary_operators=[sin, cos]) + X = [1.0 2.0; 0.5 1.0] # 2 features, 2 samples + result, complete = eval_tree_array(tree, X, operators) + expected = X[1, :] .+ sin.(X[2, :] .* 3.5) + @test all(abs.(result .- expected) .< 1e-10) + + # Test that operations are still allocation-free + @check_allocs get_degree(n) = n.degree + @check_allocs get_val(n) = n.val + @check_allocs get_feature(n) = n.feature + + get_degree(tree) + get_val(c) + get_feature(x1) + + # Test count_nodes + @test count_nodes(tree) == 6 + + # Test creating nodes is allocation-free with preallocated FixedSizeVector storage + @check_allocs create_node(alloc, f) = ArrayNode{Float64,2}(; feature=f, allocator=alloc) + new_node = create_node(allocator, 5) + @test new_node.feature == 5 + + println("✅ ArrayNode works with FixedSizeArrays backing storage!") +end + +@testitem "ArrayNode vs Node comparison with random trees" begin + using DynamicExpressions + using DynamicExpressions: Node, OperatorEnum + using Random: MersenneTwister + include("tree_gen_utils.jl") + + const ArrayNode = DynamicExpressions.ArrayNode + + # Test with different operator configurations + operators_configs = [ + OperatorEnum(; binary_operators=[+, -, *, /], unary_operators=[sin, cos]), + OperatorEnum(; binary_operators=[+, *], unary_operators=[-, abs]), + OperatorEnum(; + binary_operators=[+, -, *, /, ^], unary_operators=[sin, cos, exp, log] + ), + ] + + for operators in operators_configs + rng = MersenneTwister(42) + nfeatures = 3 + + for tree_size in [5, 10, 20] + for _ in 1:5 # Test multiple random trees of each size + # Generate a random Node tree + node_tree = gen_random_tree_fixed_size( + tree_size, operators, nfeatures, Float64, Node, rng + ) + + # Convert to ArrayNode + # First, create an allocator with enough space + allocator = DynamicExpressions.ArrayNodeModule.ArrayTree{Float64,2}( + tree_size * 2 + ) + + # Function to convert Node to ArrayNode + function node_to_array_node(n::Node, alloc) + if n.degree == 0 + if n.constant + return ArrayNode{Float64,2}(; val=n.val, allocator=alloc) + else + return ArrayNode{Float64,2}(; + feature=n.feature, allocator=alloc + ) + end + elseif n.degree == 1 + child = node_to_array_node(n.l, alloc) + return ArrayNode{Float64,2}(; op=n.op, l=child, allocator=alloc) + else # degree == 2 + left = node_to_array_node(n.l, alloc) + right = node_to_array_node(n.r, alloc) + return ArrayNode{Float64,2}(; + op=n.op, l=left, r=right, allocator=alloc + ) + end + end + + array_tree = node_to_array_node(node_tree, allocator) + + # Test 1: Count nodes + @test DynamicExpressions.count_nodes(node_tree) == + DynamicExpressions.count_nodes(array_tree) + + # Test 2: String representation + node_str = DynamicExpressions.string_tree(node_tree, operators) + array_str = DynamicExpressions.string_tree(array_tree, operators) + @test node_str == array_str + + # Test 3: Evaluation on random data + X = randn(rng, nfeatures, 10) + node_result, node_ok = DynamicExpressions.eval_tree_array( + node_tree, X, operators + ) + array_result, array_ok = DynamicExpressions.eval_tree_array( + array_tree, X, operators + ) + + @test node_ok == array_ok + if node_ok && array_ok + # Check that results match (accounting for floating point errors) + @test all(isnan.(node_result) .== isnan.(array_result)) + valid_idx = .!isnan.(node_result) .& .!isnan.(array_result) + if any(valid_idx) + @test all( + abs.(node_result[valid_idx] .- array_result[valid_idx]) .< 1e-10 + ) + end + end + + # Test 4: Hash consistency + # Two equivalent trees should have the same hash + array_tree2 = node_to_array_node(node_tree, allocator) + @test hash(array_tree) == hash(array_tree2) + + # Test 5: Copy operation + array_copy = copy(array_tree) + @test array_copy == array_tree + @test array_copy !== array_tree + @test DynamicExpressions.count_nodes(array_copy) == + DynamicExpressions.count_nodes(array_tree) + end + end + end + + println("✅ ArrayNode matches Node behavior on random trees!") +end + +@testitem "ArrayNode tree_mapreduce operations" begin + using DynamicExpressions + using DynamicExpressions: Node, OperatorEnum, tree_mapreduce + using Random: MersenneTwister + include("tree_gen_utils.jl") + + const ArrayNode = DynamicExpressions.ArrayNode + + operators = OperatorEnum(; binary_operators=[+, -, *], unary_operators=[sin, -]) + rng = MersenneTwister(123) + nfeatures = 2 + + for tree_size in [5, 10, 15] + # Generate random Node tree + node_tree = gen_random_tree_fixed_size( + tree_size, operators, nfeatures, Float64, Node, rng + ) + + # Convert to ArrayNode + allocator = DynamicExpressions.ArrayNodeModule.ArrayTree{Float64,2}(tree_size * 2) + + function node_to_array_node(n::Node, alloc) + if n.degree == 0 + if n.constant + return ArrayNode{Float64,2}(; val=n.val, allocator=alloc) + else + return ArrayNode{Float64,2}(; feature=n.feature, allocator=alloc) + end + elseif n.degree == 1 + child = node_to_array_node(n.l, alloc) + return ArrayNode{Float64,2}(; op=n.op, l=child, allocator=alloc) + else + left = node_to_array_node(n.l, alloc) + right = node_to_array_node(n.r, alloc) + return ArrayNode{Float64,2}(; op=n.op, l=left, r=right, allocator=alloc) + end + end + + array_tree = node_to_array_node(node_tree, allocator) + + # Test various tree_mapreduce operations + + # 1. Count constants + count_constants = t -> t.constant ? 1 : 0 + node_const_count = tree_mapreduce(count_constants, +, node_tree, Int) + array_const_count = tree_mapreduce(count_constants, +, array_tree, Int) + @test node_const_count == array_const_count + + # 2. Count features + count_features = t -> (!t.constant && t.degree == 0) ? 1 : 0 + node_feat_count = tree_mapreduce(count_features, +, node_tree, Int) + array_feat_count = tree_mapreduce(count_features, +, array_tree, Int) + @test node_feat_count == array_feat_count + + # 3. Max depth + depth_fn = t -> 1 + max_fn = (a, b...) -> maximum((a, b...)) + node_depth = tree_mapreduce(depth_fn, max_fn, node_tree, Int) + array_depth = tree_mapreduce(depth_fn, max_fn, array_tree, Int) + @test node_depth == array_depth + + # 4. Check if any node has specific property + has_sin = t -> (t.degree > 0 && t.op == 1) # Assuming sin is first unary op + node_has_sin = DynamicExpressions.any(has_sin, node_tree) + array_has_sin = DynamicExpressions.any(has_sin, array_tree) + @test node_has_sin == array_has_sin + end + + println("✅ ArrayNode tree_mapreduce operations match Node!") +end + +@testitem "ArrayNode copy has no array aliasing" begin + using DynamicExpressions + const ArrayNode = DynamicExpressions.ArrayNode + + # Create a test tree + x1 = ArrayNode{Float64,2,Vector}(; feature=1) + x2 = ArrayNode{Float64,2,Vector}(; feature=2) + tree = ArrayNode{Float64,2,Vector}(; + op=1, + l=ArrayNode{Float64,2,Vector}(; + op=2, l=x1, r=ArrayNode{Float64,2,Vector}(; val=3.5) + ), + r=x2, + ) + + # Test 1: Copy entire tree (root node) + tree_copy = copy(tree) + + # Verify no aliasing - modifying copy shouldn't affect original + tree_copy.val = 999.0 + tree_copy.l.val = 888.0 + + # Check that original is unchanged + @test tree.l.r.val == 3.5 # Original value unchanged + @test tree.l.r.val != 888.0 + + # Verify the backing arrays are different + orig_tree = tree.tree + copy_tree = tree_copy.tree + @test orig_tree !== copy_tree # Different tree objects + @test orig_tree.nodes.val !== copy_tree.nodes.val # Different arrays + @test orig_tree.nodes.degree !== copy_tree.nodes.degree + @test orig_tree.nodes.children !== copy_tree.nodes.children + + # Test 2: Copy subtree (non-root node) + subtree = tree.l + subtree_copy = copy(subtree) + + # Modify the copy + subtree_copy.r.val = 777.0 + + # Original should be unchanged + @test tree.l.r.val == 3.5 + @test subtree.r.val == 3.5 + + # Verify different backing arrays for subtree copy too + subtree_copy_tree = subtree_copy.tree + @test orig_tree !== subtree_copy_tree + @test orig_tree.nodes.val !== subtree_copy_tree.nodes.val + + # Test 3: Verify structure is preserved in copy + @test copy(tree) == tree + @test copy(subtree) == subtree +end diff --git a/test/unittest.jl b/test/unittest.jl index 78e0dcd7..41a17bf6 100644 --- a/test/unittest.jl +++ b/test/unittest.jl @@ -129,6 +129,7 @@ include("test_parse.jl") include("test_parametric_expression.jl") include("test_operator_construction_edgecases.jl") include("test_node_interface.jl") +include("test_array_node.jl") include("test_expression_math.jl") include("test_structured_expression.jl") include("test_zygote_gradient_wrapper.jl")