diff --git a/src/ArrayDiff.jl b/src/ArrayDiff.jl index 1224763..1f08c1e 100644 --- a/src/ArrayDiff.jl +++ b/src/ArrayDiff.jl @@ -50,6 +50,7 @@ import NaNMath: include("Coloring/Coloring.jl") include("graph_tools.jl") +include("sizes.jl") include("types.jl") include("utils.jl") diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl index 84cc358..faa14a5 100644 --- a/src/reverse_mode.jl +++ b/src/reverse_mode.jl @@ -109,20 +109,22 @@ function _forward_eval( # children, so a backwards pass through f.nodes is a forward pass through # the tree. children_arr = SparseArrays.rowvals(f.adj) + fill!(f.partials_storage, zero(T)) for k in length(f.nodes):-1:1 node = f.nodes[k] - f.partials_storage[k] = zero(T) + # Storage index if scalar + j = last(_storage_range(f.sizes, k)) if node.type == Nonlinear.NODE_VARIABLE - f.forward_storage[k] = x[node.index] + f.forward_storage[j] = x[node.index] # This should never happen, because we will have replaced these by now. # elseif node.type == Nonlinear.NODE_MOI_VARIABLE # f.forward_storage[k] = x[node.index] elseif node.type == Nonlinear.NODE_VALUE - f.forward_storage[k] = f.const_values[node.index] + f.forward_storage[j] = f.const_values[node.index] elseif node.type == Nonlinear.NODE_SUBEXPRESSION - f.forward_storage[k] = d.subexpression_forward_values[node.index] + f.forward_storage[j] = d.subexpression_forward_values[node.index] elseif node.type == Nonlinear.NODE_PARAMETER - f.forward_storage[k] = d.data.parameters[node.index] + f.forward_storage[j] = d.data.parameters[node.index] elseif node.type == Nonlinear.NODE_CALL_MULTIVARIATE children_indices = SparseArrays.nzrange(f.adj, k) N = length(children_indices) @@ -130,23 +132,28 @@ function _forward_eval( # With appropriate benchmarking, the special-cased if-statements can # be removed in favor of the generic user-defined function case. if node.index == 1 # :+ - tmp_sum = zero(T) - for c_idx in children_indices - @inbounds ix = children_arr[c_idx] - @inbounds f.partials_storage[ix] = one(T) - @inbounds tmp_sum += f.forward_storage[ix] + for j in _eachindex(f.sizes, k) + tmp_sum = zero(T) + for c_idx in children_indices + ix = children_arr[c_idx] + _setindex!(f.partials_storage, one(T), f.sizes, ix, j) + f.partials_storage[ix] = one(T) + tmp_sum += _getindex(f.forward_storage, f.sizes, ix, j) + end + _setindex!(f.forward_storage, tmp_sum, f.sizes, k, j) end - f.forward_storage[k] = tmp_sum elseif node.index == 2 # :- @assert N == 2 child1 = first(children_indices) @inbounds ix1 = children_arr[child1] @inbounds ix2 = children_arr[child1+1] - @inbounds tmp_sub = f.forward_storage[ix1] - @inbounds tmp_sub -= f.forward_storage[ix2] - @inbounds f.partials_storage[ix1] = one(T) - @inbounds f.partials_storage[ix2] = -one(T) - f.forward_storage[k] = tmp_sub + for j in _eachindex(f.sizes, k) + tmp_sub = _getindex(f.forward_storage, f.sizes, ix1, j) + tmp_sub -= _getindex(f.forward_storage, f.sizes, ix2, j) + _setindex!(f.partials_storage, one(T), f.sizes, ix1, j) + _setindex!(f.partials_storage, -one(T), f.sizes, ix2, j) + _setindex!(f.forward_storage, tmp_sub, f.sizes, k, j) + end elseif node.index == 3 # :* tmp_prod = one(T) for c_idx in children_indices @@ -221,7 +228,7 @@ function _forward_eval( @inbounds f.partials_storage[children_arr[idx1+2]] = !(condition == 1) f.forward_storage[k] = ifelse(condition == 1, lhs, rhs) - else + else # atan, min, max or vect f_input = _UnsafeVectorView(d.jac_storage, N) ∇f = _UnsafeVectorView(d.user_output_buffer, N) for (r, i) in enumerate(children_indices) diff --git a/src/sizes.jl b/src/sizes.jl new file mode 100644 index 0000000..b745156 --- /dev/null +++ b/src/sizes.jl @@ -0,0 +1,176 @@ +""" + struct Sizes + ndims::Vector{Int} + size_offset::Vector{Int} + size::Vector{Int} + storage_offset::Vector{Int} + end + +The node at index `k` is an array of `ndims[k]` dimensions and size `sizes[size_offset[k] .+ (1:ndims[k])]`. +Note that `size_offset` is a nonincreasing vector so that `sizes` can be filled in a forward pass, +which goes through the nodes in decreasing index order. +""" +struct Sizes + ndims::Vector{Int} + size_offset::Vector{Int} + size::Vector{Int} + storage_offset::Vector{Int} +end + +function _size(sizes::Sizes, k::Int, dim::Int) + return sizes.size[sizes.size_offset[k]+dim] +end + +function _size(sizes::Sizes, k::Int) + return view(sizes.size, sizes.size_offset[k] .+ Base.OneTo(sizes.ndims[k])) +end + +function _length(sizes::Sizes, k::Int) + if sizes.ndims[k] == 0 + return 1 + else + return prod(_size(sizes, k)) + end +end + +_eachindex(sizes::Sizes, k) = Base.OneTo(_length(sizes, k)) + +_length(sizes::Sizes) = sizes.storage_offset[end] + +function _storage_range(sizes::Sizes, k::Int) + return sizes.storage_offset[k] .+ _eachindex(sizes, k) +end + +function _getindex(x, sizes::Sizes, k::Int, j) + return x[sizes.storage_offset[k]+j] +end + +function _setindex!(x, value, sizes::Sizes, k::Int, j) + return x[sizes.storage_offset[k]+j] = value +end + +# /!\ Can only be called in decreasing `k` order +function _add_size!(sizes::Sizes, k::Int, size::Tuple) + sizes.ndims[k] = length(size) + sizes.size_offset[k] = length(sizes.size) + append!(sizes.size, size) + return +end + +function _copy_size!(sizes::Sizes, k::Int, child::Int) + sizes.ndims[k] = sizes.ndims[child] + sizes.size_offset[k] = length(sizes.size) + for i in (sizes.size_offset[child] .+ Base.OneTo(sizes.ndims[child])) + push!(sizes.size, sizes.size[i]) + end + return +end + +function _assert_scalar_children(sizes, children_arr, children_indices, op) + for c_idx in children_indices + @inbounds ix = children_arr[c_idx] + # We don't support nested vectors of vectors, + # we only support real numbers and array of real numbers + @assert sizes.ndims[ix] == 0 "Array argument when expected scalar argument for operator `$op`" + end +end + +function _infer_sizes( + nodes::Vector{Nonlinear.Node}, + adj::SparseArrays.SparseMatrixCSC{Bool,Int}, +) + sizes = Sizes( + zeros(Int, length(nodes)), + zeros(Int, length(nodes)), + Int[], + zeros(Int, length(nodes) + 1), + ) + children_arr = SparseArrays.rowvals(adj) + for k in length(nodes):-1:1 + node = nodes[k] + children_indices = SparseArrays.nzrange(adj, k) + N = length(children_indices) + if node.type == Nonlinear.NODE_CALL_MULTIVARIATE + if !( + node.index in + eachindex(MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS) + ) + # TODO user-defined operators + continue + end + op = MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS[node.index] + if op == :vect + _assert_scalar_children( + sizes, + children_arr, + children_indices, + op, + ) + _add_size!(sizes, k, (N,)) + elseif op == :dot + # TODO assert all arguments have same size + elseif op == :+ || op == :- + # TODO assert all arguments have same size + _copy_size!(sizes, k, children_arr[first(children_indices)]) + elseif op == :* + # TODO assert compatible sizes and all ndims should be 0 or 2 + first_matrix = findfirst(children_indices) do i + return !iszero(sizes.ndims[children_arr[i]]) + end + if !isnothing(first_matrix) + last_matrix = findfirst(children_indices) do i + return !iszero(sizes.ndims[children_arr[i]]) + end + _add_size!( + sizes, + k, + ( + _size(sizes, first_matrix, 1), + _size(sizes, last_matrix, sizes.ndims[last_matrix]), + ), + ) + end + elseif op == :^ || op == :/ + @assert N == 2 + _assert_scalar_children( + sizes, + children_arr, + children_indices[2:end], + op, + ) + _copy_size!(sizes, k, children_arr[first(children_indices)]) + else + _assert_scalar_children( + sizes, + children_arr, + children_indices, + op, + ) + end + elseif node.type == Nonlinear.NODE_CALL_UNIVARIATE + if !( + node.index in + eachindex(MOI.Nonlinear.DEFAULT_UNIVARIATE_OPERATORS) + ) + # TODO user-defined operators + continue + end + @assert N == 1 + op = MOI.Nonlinear.DEFAULT_UNIVARIATE_OPERATORS[node.index] + if op == :+ || op == :- + _copy_size!(sizes, k, children_arr[first(children_indices)]) + else + _assert_scalar_children( + sizes, + children_arr, + children_indices, + op, + ) + end + end + end + for k in eachindex(nodes) + sizes.storage_offset[k+1] = sizes.storage_offset[k] + _length(sizes, k) + end + return sizes +end diff --git a/src/types.jl b/src/types.jl index 5dad1b8..4f1f19a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -7,6 +7,7 @@ struct _SubexpressionStorage nodes::Vector{Nonlinear.Node} adj::SparseArrays.SparseMatrixCSC{Bool,Int} + sizes::Sizes const_values::Vector{Float64} forward_storage::Vector{Float64} partials_storage::Vector{Float64} @@ -21,10 +22,12 @@ struct _SubexpressionStorage partials_storage_ϵ::Vector{Float64}, linearity::Linearity, ) - N = length(nodes) + sizes = _infer_sizes(nodes, adj) + N = _length(sizes) return new( nodes, adj, + _infer_sizes(nodes, adj), const_values, zeros(N), # forward_storage, zeros(N), # partials_storage, diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl new file mode 100644 index 0000000..f920107 --- /dev/null +++ b/test/ArrayDiff.jl @@ -0,0 +1,87 @@ +module TestArrayDiff + +using Test +import LinearAlgebra +import SparseArrays + +import MathOptInterface as MOI +const Nonlinear = MOI.Nonlinear + +import ArrayDiff +const Coloring = ArrayDiff.Coloring + +function runtests() + for name in names(@__MODULE__; all = true) + if startswith("$(name)", "test_") + @testset "$(name)" begin + getfield(@__MODULE__, name)() + end + end + end + return +end + +function test_objective_quadratic_univariate() + x = MOI.VariableIndex(1) + scalar = Nonlinear.Model() + Nonlinear.set_objective(model, :($x * $x)) + vector = Nonlinear.Model() + Nonlinear.set_objective(vector, :([$x] * [$x])) + evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x]) + MOI.initialize(evaluator, [:Grad, :Jac, :Hess]) + @test MOI.eval_objective(evaluator, [1.2]) == 1.2^2 + 1 + g = [NaN] + MOI.eval_objective_gradient(evaluator, g, [1.2]) + @test g == [2.4] + @test MOI.hessian_objective_structure(evaluator) == [(1, 1)] + H = [NaN] + MOI.eval_hessian_objective(evaluator, H, [1.2]) + @test H == [2.0] + @test MOI.hessian_lagrangian_structure(evaluator) == [(1, 1)] + H = [NaN] + MOI.eval_hessian_lagrangian(evaluator, H, [1.2], 1.5, Float64[]) + @test H == 1.5 .* [2.0] + MOI.eval_hessian_lagrangian_product( + evaluator, + H, + [1.2], + [1.2], + 1.5, + Float64[], + ) + @test H == [1.5 * 2.0 * 1.2] + return +end + +end # module + +TestArrayDiff.runtests() + +import MathOptInterface as MOI +import ArrayDiff +using Test +const Nonlinear = MOI.Nonlinear +model = Nonlinear.Model() +x = MOI.VariableIndex(1) +Nonlinear.set_objective(model, :(dot([$x], [$x]))) +evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x]) +MOI.initialize(evaluator, [:Grad]) +sizes = evaluator.backend.objective.expr.sizes +@test MOI.eval_objective(evaluator, [1.2]) == 1.2^2 +@test sizes.ndims == [0, 1, 0, 1, 0] +@test sizes.size_offset == [0, 1, 0, 0, 0] +@test sizes.size == [1, 1] +@test sizes.storage_offset == [0, 1, 2, 3, 4, 5] + +y = MOI.VariableIndex(1) +Nonlinear.set_objective(model, :(dot([$x, $y] - [1, 2], -[1, 2] + [$x, $y]))) +MOI.initialize(evaluator, [:Grad]) +sizes = evaluator.backend.objective.expr.sizes +@test MOI.eval_objective(evaluator, [1.2]) == 1.2^2 +@test sizes.ndims == [0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0] +@test sizes.size_offset == [0, 6, 5, 0, 0, 4, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0] +@test sizes.size == [2, 2, 2, 2, 2, 2, 2] +@test sizes.storage_offset == + [0, 1, 3, 5, 6, 7, 9, 10, 11, 13, 15, 17, 18, 19, 21, 22, 23] +g = [NaN] +MOI.eval_objective_gradient(evaluator, g, [1.2]) diff --git a/test/runtests.jl b/test/runtests.jl index ab66e5e..e290998 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1 +1,3 @@ include("ReverseAD.jl") + +import SparseArrays