diff --git a/Project.toml b/Project.toml index 3e86a39..b7409b2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DualArrays" uuid = "429e4e16-f749-45f3-beec-30742fae38ce" -authors = ["Sheehan Olver "] version = "0.2.0" +authors = ["Sheehan Olver "] [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" @@ -10,6 +10,7 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" [compat] DiffRules = "1.15.1" @@ -17,6 +18,7 @@ ForwardDiff = "1.2.2" Lux = "1.29.1" Plots = "1.41.1" SparseArrays = "1.10" +TensorOperations = "5.5.2" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" diff --git a/src/DualArrays.jl b/src/DualArrays.jl index c169778..19ba52b 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -6,16 +6,16 @@ A Julia package for efficient automatic differentiation using dual numbers and d This package provides: - `Dual`: A dual number type for storing values and their derivatives - `DualVector`: A vector of dual numbers represented with a Jacobian matrix - +- `DualMatrix`: A matrix of dual numbers represented with a Jacobian tensor` Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, Dual, jacobian +export DualVector, DualMatrix, Dual, jacobian -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ +import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims -using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules +using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations include("types.jl") include("indexing.jl") diff --git a/src/arithmetic.jl b/src/arithmetic.jl index afcfcbc..cf26666 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -36,6 +36,9 @@ function diff_fn(f, n) return map(dx -> makepartials(dx, syms), d) end +# We redefine one here since it is required for the derivative of + and -. +one(x::Dual) = Dual(one(x.value), zero(x.partials)) + for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) partials = diff_fn(f, n) if n == 1 @@ -78,9 +81,9 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) return DualVector(val, jac) end # Must have Base.$f in order not to import everything - @eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) * x.partials + $p2(x.value, y.value) * y.partials) - @eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) * x.partials) - @eval Base.$f(x::Real, y::Dual) = Dual($f(x, y.value), $p2(x, y.value) * y.partials) + @eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) .* x.partials + $p2(x.value, y.value) .* y.partials) + @eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) .* x.partials) + @eval Base.$f(x::Real, y::Dual) = Dual($f(x, y.value), $p2(x, y.value) .* y.partials) end end @@ -96,4 +99,66 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x) # solve -\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, x \ y.jacobian) \ No newline at end of file +\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) + +""" +----------------------- +Multiplication with ArrayOperators +----------------------- + +* for ArrayOperators generalises from matrix/vector +multiplication and uses tensor contraction provided by +TensorOperations.jl. Let an (A, B) ArrayOperator denote +an ArrayOperator with output dimension A and input dimension B +(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only +allow multiplication between an (A, B) ArrayOperator and a (B, C) +ArrayOperator, with the result being an (A, C) ArrayOperator. +This generalises from: + +-An inner product: a row vector * a column vector -> a scalar: + (0, 1) * (1, 0) -> (0, 0) + +-An outer product: a column vector * a row vector -> a matrix: + (1, 0) * (0, 1) -> (1, 1) + +- matrix/vector multiplication: a matrix * a column vector -> a column vector: + (1, 1) * (1, 0) -> (1, 0) + +We can treat multiplication between ArrayOperators and regular arrays as multiplication +between two ArrayOperators: given an (A, B) ArrayOperator and an L-array, we can fix it as +having input dimension B and infer C = L - B. We use this contraction rule to multiply the two. +""" + +# Helper function to perform tensor contraction between x and y +function _contract(x, y, A, B, C) + x_idx = (ntuple(i -> i, A), ntuple(i -> i + A, B)) + y_idx = (ntuple(i -> i, B), ntuple(i -> i + B, C)) + ret_idx = (ntuple(i -> i, A), ntuple(i -> i + A, C)) + + return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C} + return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} + C = L - B + return ArrayOperator{A}(_contract(x.data, y, A, B, C)) +end + +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C} + A = L - B + return ArrayOperator{A}(_contract(x, y.data, A, B, C)) +end + +# Extra required arithmetic for ArrayOperators +Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y +Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y +Base.:-(x::ArrayOperator) = (-).(x) + +Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} = + ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data) +Base.:*(t::ArrayOperator, a::Number) = a * t + + diff --git a/src/indexing.jl b/src/indexing.jl index 7ff7f53..822a693 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -3,31 +3,87 @@ using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays sparse_getindex(a...) = layout_getindex(a...) +sparse_getindex(d::DualMatrix, a...) = getindex(d, a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) + +# We need a system of indexing that takes two tuples +# of length N and M. We must then return a ArrayOperator whose new input and output dimensions +# are inferred from how many of the arguments in each respective tuple are integers. + +# For the purposes of DualArrays.jl, we only need to index the input dimensions +# +# Example: +# +# Consider a DualVector with standard matrix Jacobian (ArrayOperator{2, T, 1, 1}): +# +# d = a + Bϵ +# +# If we index d[1], we have: +# +# d[1] = a[1] + B[1, :]ϵ +# +# Where B[1, :] indexes the input dimension of B +Idx = Union{Int, Colon, AbstractUnitRange} +count_ints(t::Tuple) = count(x -> x isa Int, t) + +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) +end + +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + ArrayOperator{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +end +# Integer indexing always returns a scalar. +getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) +# Indexing with only slices/ranges returns a similar tensor +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) + """ -Extract a single Dual number from a DualVector at position y. +Extract a single Dual number from a DualArray. """ -function Base.getindex(x::DualVector, y::Int) - Dual(x.value[y], sparse_getindex(x.jacobian, y, :)) +function getindex(x::DualArray, args::Vararg{Int}) + Dual(x.value[args...], x.jacobian[args, :]) end """ -Extract a sub-DualVector from a DualVector using a range. +Extract a single Dual number from a DualMatrix at position (y, z). """ -function Base.getindex(x::DualVector, y::UnitRange) - newval = x.value[y] - newjac = sparse_getindex(x.jacobian, y, :) - DualVector(newval, newjac) +function getindex(x::DualMatrix, y::Int, z::Int) + # We collapse the epsilons into a single vector of partials. + Dual(x.value[y, z], vec(sparse_getindex(x.jacobian, y, z, :, :))) end """ -Return the size of the DualVector (length of the value vector). +Extract a row slice from a DualMatrix, returning a DualVector. + +NOTE: Since a DualVector requires a 2-dimensional Jacobian, and slicing a 4-tensor returns a 3-tensor, +we implicitly 'flatten' the matrix of epsilons. This is done by the reshape. + +For example, in a 2x2 case, [ϵ₁₁ ϵ₁₂;ϵ₂₁ ϵ₂₂] becomes [ϵ₁, ϵ₂, ϵ₃, ϵ₄]. """ -Base.size(x::DualVector) = (length(x.value),) +function getindex(x::DualMatrix, y::Int, ::Colon) + n = size(x.value, 2) + DualVector(x.value[y, :], reshape(sparse_getindex(x.jacobian, y, :, :, :), n, :)) +end """ -Return the axes of the DualVector. +Extract a sub-DualVector from a DualVector using a range. """ -Base.axes(x::DualVector) = axes(x.value) \ No newline at end of file +function getindex(x::DualVector, y::UnitRange) + newval = x.value[y] + newjac = x.jacobian[y, :] + DualVector(newval, newjac) +end + +# DualArray array interface +for op in (:size, :axes) + @eval $op(a::DualArray) = $op(a.value) +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 5367511..780e934 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,28 +1,134 @@ # Core type definitions for DualArrays.jl """ - Dual{T, Partials <: AbstractVector{T}} <: Real +This type represents a linear map from an M-array to an N-array, with a N+M=L-dimensional +array as its underlying data. This can be thought of analogously to an L-tensor equipped +with a contraction pattern characterised by (N, M). Specifically, the tensor has dimensions +a₁ x a₂ x ... x a_N x b₁ x b₂ x ... x b_M and maps an M-array of shape (b₁, b₂, ..., b_M) +to an N-array of shape (a₁, a₂, ..., a_N) by contracting over the last M indices. + +We have: +-L is the dimensionality of the tensor +-T is the element type of the tensor +-N is the dimensionality of the input array/number of lower indices +-M is the dimensionality of the output array/number of upper indices + +We enforce L = N + M by inferring M in the constructor. + +In the context of DualArrays.jl, a DualArray can be thought of as + +a + Jϵ + +Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. +In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. +""" +struct ArrayOperator{L, T, N, M} + data::AbstractArray{T, L} +end + +# Constructor to wrap an array with a tensor, given a contraction rule represented by N +function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} + ArrayOperator{L, T, N, L - N}(data) +end + +# Helper convert function +_convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) +_convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) + +# Basic array interface +for op in (:size, :axes, :iterate) + @eval begin + ($op)(t::ArrayOperator) = ($op)(t.data) + ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) + end +end + +# Since ArrayOperator is not an AbstractArray we define these manually +eltype(t::ArrayOperator) = eltype(t.data) + +Base.Broadcast.broadcastable(t::ArrayOperator) = t + +transpose(t::ArrayOperator) = transpose(t.data) +sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) + +==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data +==(a::ArrayOperator, b::AbstractArray) = a.data == b +==(a::AbstractArray, b::ArrayOperator) = a == b.data +isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) +isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) + +# Below we define a broadcast style for ArrayOperators and override copy and copyto! +# This allows all arithmetic/broadcasting with ArrayOperators to be handled by the +# underlying logic of the array contained in the struct, while ensuring that +# all results stay as a ArrayOperator. + +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end + +# N is the input dimension of the tensor being broadcasted. +# For he result of the broadcast we will choose to preserve the highest input dimension. +Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::Broadcast.DefaultArrayStyle{M}, ::ArrayOperatorBroadcastStyle{N}) where {N, M} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() + +# Helper functions to help define broadcasting/arithmetic with ArrayOperators. +# By converting a broadcast involving ArrayOperators into a broadcast +# involving the underlying arrays. +_unwrap(t::ArrayOperator) = t.data +_unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) +_unwrap(x) = x +_unwrap_args(args::Tuple) = map(_unwrap, args) + +# copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing + # the evaluated broadcast + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + ArrayOperator{N}(copy(Broadcast.flatten(databroadcast))) +end + +# copyto adds support for .= +function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} + # As above + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + copyto!(dest.data, Broadcast.flatten(databroadcast)) + dest +end + +""" + Dual{T, Partials <: AbstractArray{T}} <: Real A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a vector +- `partials::Partials`: The partial derivatives stored as an array + +NOTE: Partials will soon be in the ArrayOperator format. """ -struct Dual{T, Partials <: AbstractVector{T}} <: Real +struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T partials::Partials end +function Dual(value::T, partials::AbstractArray{S}) where {S, T} + T2 = promote_type(T, S) + Dual(convert(T2, value), _convert_array(T2, partials)) +end + +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} + T2 = promote_type(T, S) + Dual(convert(T2, value), _convert_array(T2, partials).data) +end + """ - DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} + DualVector{T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} Represents a vector of dual numbers given by: values + jacobian * [ε₁, …, εₙ] -For now the entries just return the values when indexed. - # Fields - `value::Vector{T}`: The primal values - `jacobian::M`: The Jacobian matrix containing partial derivatives @@ -32,29 +138,36 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} - value::V - jacobian::M +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{L, T, N, M} where {L, M})} <: AbstractVector{Dual{T}} + value::A + jacobian::J - function DualVector(value::V, jacobian::M) where {T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} - if size(jacobian, 1) != length(value) - x, y = length(value), size(jacobian, 1) - throw(ArgumentError("vector length must match number of rows in jacobian.\n" * - "vector length: $x\n" * - "no. of jacobian rows: $y")) + function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{L, T, N, M} where {L, M})} + if size(value) != ntuple(i -> size(jacobian, i), N) + throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T,V, M}(value, jacobian) + new{T,N, A, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) +function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) + DualArray(_convert_array(T, value), _convert_array(T, jacobian)) +end + +# Helper function to define DualArrays with AbstractArray jacobians +function DualArray(value::AbstractArray{S, M}, jacobian::AbstractArray{T, N}) where {S, T, N, M} + DualArray(value, ArrayOperator{M}(jacobian)) end +const DualVector = DualArray{T, 1} where {T} +const DualMatrix = DualArray{T, 2} where {T} + +DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) +DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file diff --git a/src/utilities.jl b/src/utilities.jl index 080155f..726af37 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -10,8 +10,8 @@ end # Helper functions for vcat operations _jacobian(d::Dual) = permutedims(d.partials) -_jacobian(d::DualVector) = d.jacobian -_jacobian(d::DualVector, ::Int) = d.jacobian +_jacobian(d::DualVector) = d.jacobian.data +_jacobian(d::DualVector, ::Int) = d.jacobian.data _jacobian(x::Number, N::Int) = Zeros(typeof(x), 1, N) _value(v::AbstractVector) = v @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -Base.show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualArray) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl new file mode 100644 index 0000000..9857e44 --- /dev/null +++ b/test/array_operator_test.jl @@ -0,0 +1,59 @@ +using Test, LinearAlgebra +using DualArrays: ArrayOperator + +@testset "ArrayOperator" begin + t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + + @testset "Equality" begin + @test t == [1 2 3;4 5 6;7 8 9] + @test [1 2 3;4 5 6;7 8 9] == t + @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + @test isapprox([1 2 3;4 5 6;7 8 9], t) + end + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) + @test t[1:2, 1:2] == ArrayOperator{1}([1 2;4 5]) + + s = ArrayOperator{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == ArrayOperator{0}([1, 1]) + @test s[(1,:), (:,)] == ArrayOperator{1}([1 1;1 1]) + @test s[(1,2), :] == ArrayOperator{0}([1, 1]) + end + + @testset "Arithmetic" begin + @test t + t isa ArrayOperator + @test t + t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa ArrayOperator + @test t .* 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == ArrayOperator{1}([1 2 3;8 10 12;21 24 27]) + + t1 = ArrayOperator{0}([1, 2, 3]) + t2 = ArrayOperator{1}([1 2 3]) + t3 = ArrayOperator{0}([2]) + + @test t1 .+ ones(3, 3) == [2 2 2;3 3 3; 4 4 4] + @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == ArrayOperator{0}(sin.([1, 2, 3])) + + s = ArrayOperator{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa ArrayOperator + @test result.data == s.data + @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + + @test t * t == ArrayOperator{1}([30 36 42;66 81 96; 102 126 150]) + @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) + @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) + + @test t * 2 == [2 4 6;8 10 12;14 16 18] + @test 2 * t == [2 4 6;8 10 12;14 16 18] + + @test s - t == ones(3, 3) + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 31da00c..2d2b0c8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,25 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Dual +using DualArrays: ArrayOperator @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) + @test Dual(1.0, [1, 2, 3]).partials == [1.0, 2.0, 3.0] end @testset "Indexing" begin - v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + v = DualVector([1., 2, 3], [1 2 3; 4 5 6;7 8 9]) + m = DualMatrix([1 2;3 4], zeros(2, 2, 2)) + + @test size(v) == (3,) + @test axes(v) == (Base.OneTo(3),) + + @test size(m) == (2, 2) + @test axes(m) == (Base.OneTo(2), Base.OneTo(2)) + + @test m[1, 1] == Dual(1, [0, 0]) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) @@ -22,10 +33,16 @@ using DualArrays: Dual n = 10 v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @test v[2:end].jacobian.data isa BandedMatrix @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end + + @testset "Indexing (DualMatrix)" begin + M = DualMatrix([1 2; 3 4], zeros(2, 2, 2, 2)) + @test M[1, 1] == Dual(1, [0, 0, 0, 0]) + @test M[1, :] == DualVector([1, 2], zeros(2, 4)) + end @testset "Arithmetic (DualVector)" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) @@ -86,7 +103,36 @@ using DualArrays: Dual @test vcat(x) == DualVector([1], [1 2 3]) @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + + z = DualVector([2, 3], [1 0; 0 1]) + @test vcat(1, z).jacobian.data == [0 0; 1 0; 0 1] + @test vcat(z, 1).jacobian.data == [1 0; 0 1; 0 0] + end + + @testset "show" begin + d = DualVector([1.0, 2.0], [1 0; 0 1]) + s = repr(MIME"text/plain"(), d) + @test occursin(" + ", s) + @test endswith(s, "𝛜") + end + + @testset "Nested Dual Numbers" begin + # Example of finding Hessian of f(x) = x[1]*x[2] + # Through nested dual numbers + d = DualVector([1, 2], sparse(I, 2, 2)) + D = DualMatrix(sparse(I, 2, 2), zeros(2, 2, 2, 1)) + d2 = DualVector(d, D) + val = d2[1] * d2[2] + @test val isa Dual + @test val.value isa Dual + @test val.partials isa DualVector + + @test val.value.value == 2 # f(x) + @test val.value.partials == [2, 1] # ∇f(x) + @test val.partials.value == [2, 1] # ∇f(x) + @test val.partials.jacobian == [0 1; 1 0] # Hessian of f(x) end include("broadcast_test.jl") + include("array_operator_test.jl") end \ No newline at end of file