From 7e509e011431ab6a81dd26a4bd34879d34bccd9f Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Fri, 10 Apr 2026 14:31:30 +0100 Subject: [PATCH 01/28] initial commit --- src/indexing.jl | 14 ++++++++++++ src/types.jl | 56 +++++++++++++++++++++++++++++++++++++++++++++--- test/runtests.jl | 1 + 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 7ff7f53..e64a7c0 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -6,6 +6,20 @@ sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) +""" +For the purposes of DualArrays.jl, we need to be able to index on the input indices +(corresponding to the the primal values) only. This way, when we index, we still get the +same 'arrangement' of dual parts. Example in the DualVector case (where the Jacobian is +a Tensor{2, Float64, 1, 1} acting on a vector ϵ): + +d = a + Jϵ +d[1] = a[1] + J[1,:]ϵ + +J[1, :] is a row vector so it can still be applied to our ϵ. +""" +getindex(t::Tensor{L, T, N, M}, i::NTuple{N, Int}, ::Colon) where {L, T, N, M} = sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...) +getindex(t::Tensor, i...) = t.data[i...] + """ Extract a single Dual number from a DualVector at position y. """ diff --git a/src/types.jl b/src/types.jl index 5367511..c8c3104 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,5 +1,45 @@ # Core type definitions for DualArrays.jl +""" +This type represents a Tensor along with its contraction rule. +A tensor with its contraction rule represents a linear map from an N-array to an M-array, +with 0-arrays considered scalars, 1-arrays vectors, etc. + +We have: +-L is the dimensionality of the tensor +-T is the element type of the tensor +-N is the dimensionality of the input array/number of lower indices +-M is the dimensionality of the output array/number of upper indices + +We enforce L = N + M. + +In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as + +a + Jϵ + +Where a is an M-array of real numbers, J is an N+M=tensor and ϵ is an N-array of dual parts. +In the simplest case, where M = 0, we have a Dual number with dual parts arranged in an N-array. +""" +struct Tensor{L, T, N, M} <: AbstractArray{T, L} + data::AbstractArray{T, L} +end + +function Tensor{N,M}(data::AbstractArray{T, L}) where {L, T, N, M} + if L != N + M + throw(ArgumentError("Tensor order must be equivalent to N + M.")) + end + Tensor{L, T, N, M}(data) +end + +Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = + Tensor{N, M}(convert.(T, tensor.data)) + +for op in (:size, :axes) + @eval begin + ($op)(t::Tensor) = ($op)(t.data) + end +end + """ Dual{T, Partials <: AbstractVector{T}} <: Real @@ -7,13 +47,23 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a vector +- `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: AbstractVector{T}} <: Real +struct Dual{T, Partials <: (Tensor{L, T, 0, M} where {L, M})} <: Real value::T partials::Partials end +function Dual(value::T, partials::Tensor{L, S, 0, M}) where {S, T, L, M} + T2 = promote_type(T, S) + Dual{T2, Tensor{L, T2, 0, M}}(convert(T2, value), convert(Tensor{L, T2, 0, M}, partials)) +end + +# Helper function to define Duals from an AbstractArray +function Dual(value, partials::AbstractArray{T, N}) where {T, N} + Dual(value, Tensor{0, N}(partials)) +end + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} @@ -32,7 +82,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},M <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V jacobian::M diff --git a/test/runtests.jl b/test/runtests.jl index 31da00c..7fd2ac4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,7 @@ using DualArrays: Dual @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) + @test Dual(1.0, [1, 2, 3]).partials == Tensor{0, 1}([1.0, 2.0, 3.0]) end @testset "Indexing" begin From a91f7aba9fe20fd385722e50735ef00a38972089 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 13:02:04 +0100 Subject: [PATCH 02/28] fixes and add tests --- src/indexing.jl | 21 +++++++----- src/types.jl | 84 ++++++++++++++++++++++++++++++++++----------- src/utilities.jl | 2 +- test/runtests.jl | 7 ++-- test/tensor_test.jl | 29 ++++++++++++++++ 5 files changed, 111 insertions(+), 32 deletions(-) create mode 100644 test/tensor_test.jl diff --git a/src/indexing.jl b/src/indexing.jl index e64a7c0..3cae792 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -16,32 +16,37 @@ d = a + Jϵ d[1] = a[1] + J[1,:]ϵ J[1, :] is a row vector so it can still be applied to our ϵ. + +Note that a slice of a tensor done in this way will reduce the input dimension to 0. """ -getindex(t::Tensor{L, T, N, M}, i::NTuple{N, Int}, ::Colon) where {L, T, N, M} = sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...) -getindex(t::Tensor, i...) = t.data[i...] +getindex(t::Tensor{<:Any, <:Any, N, M}, i::NTuple{N, Int}, ::Colon) where {N, M} = Tensor{0}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +# Integer indexing always returns a scalar. +getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) +# Indexing with only slices/ranges returns a similar tensor +getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. """ -function Base.getindex(x::DualVector, y::Int) - Dual(x.value[y], sparse_getindex(x.jacobian, y, :)) +function getindex(x::DualVector, y::Int) + Dual(x.value[y], x.jacobian[(y,), :]) end """ Extract a sub-DualVector from a DualVector using a range. """ -function Base.getindex(x::DualVector, y::UnitRange) +function getindex(x::DualVector, y::UnitRange) newval = x.value[y] - newjac = sparse_getindex(x.jacobian, y, :) + newjac = x.jacobian[y, :] DualVector(newval, newjac) end """ Return the size of the DualVector (length of the value vector). """ -Base.size(x::DualVector) = (length(x.value),) +size(x::DualVector) = (length(x.value),) """ Return the axes of the DualVector. """ -Base.axes(x::DualVector) = axes(x.value) \ No newline at end of file +axes(x::DualVector) = axes(x.value) \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index c8c3104..2f22fc0 100644 --- a/src/types.jl +++ b/src/types.jl @@ -11,7 +11,7 @@ We have: -N is the dimensionality of the input array/number of lower indices -M is the dimensionality of the output array/number of upper indices -We enforce L = N + M. +We enforce L = N + M by inferring M in the constructor. In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as @@ -24,22 +24,64 @@ struct Tensor{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} end -function Tensor{N,M}(data::AbstractArray{T, L}) where {L, T, N, M} - if L != N + M - throw(ArgumentError("Tensor order must be equivalent to N + M.")) - end - Tensor{L, T, N, M}(data) +# Constructor to wrap an array with a tensor, given a contraction rule represented by N +function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} + Tensor{L, T, N, L - N}(data) end +# Helper convert function +_convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) +_convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) + Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = - Tensor{N, M}(convert.(T, tensor.data)) + Tensor{L, T, N, M}(_convert_array(T, tensor.data)) +# Basic array interface for op in (:size, :axes) @eval begin ($op)(t::Tensor) = ($op)(t.data) + ($op)(t::Tensor, i...) = ($op)(t.data, i...) end end +# Below we define a broadcast style for Tensors and override copy and copyto! +# This allows all arithmetic/broadcasting with Tensors to be handled by the +# underlying logic of the array contained in the struct, while ensuring that +# all results stay as a Tensor. + +struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() + +# N is the input dimension of the tensor being broadcasted. +# For he result of the broadcast we will choose to preserve the highest input dimension. +Base.BroadcastStyle(::Type{<:Tensor{<:Any, <:Any, N, <:Any}}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::TensorBroadcastStyle{M}) where {N, M} = TensorBroadcastStyle{max(N, M)}() + +# Helper functions to help define broadcasting/arithmetic with Tensors. +# By converting a broadcast involving Tensors into a broadcast +# involving the underlying arrays. +_unwrap(t::Tensor) = t.data +_unwrap(x) = x +_unwrap_args(args::Tuple) = map(_unwrap, args) + +# copy ensures that arithmetic involving a Tensor returns a Tensor +function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a Tensor containing + # the evaluated broadcast + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + Tensor{N}(copy(Broadcast.flatten(databroadcast))) +end + +# copyto adds support for .= +function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) + # As above + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + copyto!(dest.data, Broadcast.flatten(databroadcast)) + dest +end + """ Dual{T, Partials <: AbstractVector{T}} <: Real @@ -61,7 +103,7 @@ end # Helper function to define Duals from an AbstractArray function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, Tensor{0, N}(partials)) + Dual(value, Tensor{0}(partials)) end """ @@ -82,27 +124,29 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},J <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V - jacobian::M - - function DualVector(value::V, jacobian::M) where {T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} - if size(jacobian, 1) != length(value) - x, y = length(value), size(jacobian, 1) - throw(ArgumentError("vector length must match number of rows in jacobian.\n" * - "vector length: $x\n" * - "no. of jacobian rows: $y")) + jacobian::J + + function DualVector(value::V, jacobian::J) where {T, V <: AbstractVector{T}, J <: (Tensor{L, T, 1, M} where {L, M})} + if length(value) != size(jacobian, 1) + throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T,V, M}(value, jacobian) + new{T, V, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) +function DualVector(value::AbstractVector, jacobian::Tensor) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) + DualVector(_convert_array(T, value), _convert_array(T, jacobian)) +end + +# Helper function to define DualVectors with AbstractArray jacobians +function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} + DualVector(value, Tensor{1}(jacobian)) end # Basic equality for Dual numbers diff --git a/src/utilities.jl b/src/utilities.jl index 080155f..e892b95 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -Base.show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualVector) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/runtests.jl b/test/runtests.jl index 7fd2ac4..68a1d81 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,11 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Dual +using DualArrays: Tensor @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == Tensor{0, 1}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) end @testset "Indexing" begin @@ -23,7 +23,7 @@ using DualArrays: Dual n = 10 v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @test v[2:end].jacobian.data isa BandedMatrix @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end @@ -90,4 +90,5 @@ using DualArrays: Dual end include("broadcast_test.jl") + include("tensor_test.jl") end \ No newline at end of file diff --git a/test/tensor_test.jl b/test/tensor_test.jl new file mode 100644 index 0000000..ad25701 --- /dev/null +++ b/test/tensor_test.jl @@ -0,0 +1,29 @@ +using Test, LinearAlgebra +using DualArrays: Tensor + +@testset "Tensor" begin + t = Tensor{1}([1 2 3;4 5 6;7 8 9]) + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), :] == Tensor{0}([1, 2, 3]) + @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) + end + + @testset "Arithmetic" begin + @test t + t isa Tensor + @test t + t == Tensor{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa Tensor + @test t .* 2 == Tensor{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == Tensor{1}([1 2 3;8 10 12;21 24 27]) + + t1 = Tensor{0}([1, 2, 3]) + t2 = Tensor{1}([1 2 3]) + t3 = Tensor{0}([2]) + + @test t1 .* t2 == Tensor{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) + end +end \ No newline at end of file From f7bd0e4542539b08203019e073838fbb5b8015ca Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 16:00:08 +0100 Subject: [PATCH 03/28] initial commit --- src/DualArrays.jl | 2 +- src/indexing.jl | 14 ++++++-------- src/types.jl | 28 ++++++++++++++++++++++++++++ src/utilities.jl | 2 +- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index c169778..a04d7fb 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -11,7 +11,7 @@ Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, Dual, jacobian +export DualMatrix, DualVector, Dual, jacobian import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ diff --git a/src/indexing.jl b/src/indexing.jl index 3cae792..8bf18af 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -41,12 +41,10 @@ function getindex(x::DualVector, y::UnitRange) DualVector(newval, newjac) end -""" -Return the size of the DualVector (length of the value vector). -""" -size(x::DualVector) = (length(x.value),) +function getindex(x::DualMatrix, y::Vararg{Int}) + Dual(x.value[y...], x.jacobian[y..., :]) +end -""" -Return the axes of the DualVector. -""" -axes(x::DualVector) = axes(x.value) \ No newline at end of file +# Array interface for DualArray +size(x::DualArray) = (length(x.value),) +axes(x::DualArray) = axes(x.value) \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 2f22fc0..b091f2b 100644 --- a/src/types.jl +++ b/src/types.jl @@ -149,6 +149,34 @@ function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where DualVector(value, Tensor{1}(jacobian)) end +""" + DualMatrix{T, M <: AbstractMatrix{T}, J } <: AbstractMatrix{Dual{T}} +""" + +struct DualMatrix{T, M <: AbstractMatrix{T}, J <: (Tensor{L, T, 2, M} where {L, M})} <: AbstractMatrix{Dual{T}} + value::M + jacobian::J + + function DualMatrix(value::M, jacobian::J) where {T, M <: AbstractMatrix{T}, J <: (Tensor{L, T, 2, N} where {L, N})} + if size(value, 1) != size(jacobian, 1) || size(value, 2) != size(jacobian, 2) + throw(ArgumentError("Dimensions of value matrix must match inner dimensions of Jacobian tensor.")) + end + new{T, M, J}(value, jacobian) + end +end + +function DualMatrix(value::AbstractMatrix, jacobian::Tensor) + T = promote_type(eltype(value), eltype(jacobian)) + DualMatrix(_convert_array(T, value), _convert_array(T, jacobian)) +end + +function DualMatrix(value::AbstractMatrix, jacobian::AbstractArray{T, N}) where {T, N} + DualMatrix(value, Tensor{2}(jacobian)) +end + +# For convenience +DualArray = Union{DualVector, DualMatrix} + # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file diff --git a/src/utilities.jl b/src/utilities.jl index e892b95..7e49d9d 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualArray) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ From 23e1633188ea3465e1a8950001bb0bd964637b87 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 17:31:38 +0100 Subject: [PATCH 04/28] promotion and display fixes --- src/indexing.jl | 2 +- src/types.jl | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 8bf18af..e3e7df3 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -46,5 +46,5 @@ function getindex(x::DualMatrix, y::Vararg{Int}) end # Array interface for DualArray -size(x::DualArray) = (length(x.value),) +size(x::DualArray) = size(x.value) axes(x::DualArray) = axes(x.value) \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index b091f2b..eb3a90e 100644 --- a/src/types.jl +++ b/src/types.jl @@ -32,6 +32,8 @@ end # Helper convert function _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) _convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) +_convert_array(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(_convert_array(T, a.value), _convert_array(T, a.jacobian)) +_convert_array(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(_convert_array(T, a.value), _convert_array(T, a.jacobian)) Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = Tensor{L, T, N, M}(_convert_array(T, tensor.data)) @@ -179,4 +181,7 @@ DualArray = Union{DualVector, DualMatrix} # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials -isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file +isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) + +# Type promotion on Dual +Base.promote_rule(::Type{Dual{T1}}, ::Type{Dual{T2}}) where {T1, T2} = Dual{promote_type(T1, T2)} \ No newline at end of file From 875ef60a3d08a698927d05ee575446ed4a254f71 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 18:10:41 +0100 Subject: [PATCH 05/28] fix indexing --- src/indexing.jl | 32 ++++++++++++++++++++------------ test/tensor_test.jl | 7 ++++++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 3cae792..b41d2d3 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -6,20 +6,28 @@ sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) -""" -For the purposes of DualArrays.jl, we need to be able to index on the input indices -(corresponding to the the primal values) only. This way, when we index, we still get the -same 'arrangement' of dual parts. Example in the DualVector case (where the Jacobian is -a Tensor{2, Float64, 1, 1} acting on a vector ϵ): -d = a + Jϵ -d[1] = a[1] + J[1,:]ϵ +# We need a system of indexing that takes two tuples +# of length N and M. We must then return a Tensor whose new input and output dimensions +# are inferred from how many of the arguments in each respective tuple are integers. -J[1, :] is a row vector so it can still be applied to our ϵ. +# For the purposes of DualArrays.jl, we only need to index the input dimensions +Idx = Union{Int, Colon, AbstractUnitRange} +count_ints(t::Tuple) = count(x -> x isa Int, t) -Note that a slice of a tensor done in this way will reduce the input dimension to 0. -""" -getindex(t::Tensor{<:Any, <:Any, N, M}, i::NTuple{N, Int}, ::Colon) where {N, M} = Tensor{0}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the Tensor constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + Tensor{newN}(sparse_getindex(t.data, i..., j...)) +end + +function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the Tensor constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + Tensor{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +end # Integer indexing always returns a scalar. getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor @@ -29,7 +37,7 @@ getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) wher Extract a single Dual number from a DualVector at position y. """ function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), :]) + Dual(x.value[y], x.jacobian[(y,), (:,)]) end """ diff --git a/test/tensor_test.jl b/test/tensor_test.jl index ad25701..8ce7572 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -5,8 +5,13 @@ using DualArrays: Tensor t = Tensor{1}([1 2 3;4 5 6;7 8 9]) @testset "Indexing" begin @test t[1,1] == 1 - @test t[(1,), :] == Tensor{0}([1, 2, 3]) + @test t[(1,), (:,)] == Tensor{0}([1, 2, 3]) @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) + + s = Tensor{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == Tensor{0}([1, 1]) + @test s[(1,:), (:,)] == Tensor{1}([1 1;1 1]) + @test s[(1,2), :] == Tensor{0}([1, 1]) end @testset "Arithmetic" begin From f96ba5cfc90fcc29c15ed9ba2d5a36350f40b138 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:13:23 +0100 Subject: [PATCH 06/28] fixes with tests --- examples/nestedduals.jl | 72 +++++++++++++++++++++++++++++++++++++++++ src/arithmetic.jl | 20 ++++++++++++ src/indexing.jl | 30 +++++++++++++---- src/types.jl | 22 +++++++++---- test/broadcast_test.jl | 3 ++ test/runtests.jl | 14 +++++++- 6 files changed, 147 insertions(+), 14 deletions(-) create mode 100644 examples/nestedduals.jl diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl new file mode 100644 index 0000000..79e1597 --- /dev/null +++ b/examples/nestedduals.jl @@ -0,0 +1,72 @@ +### +# +# IDEA: Can we have a DualVector inside a DualVector in order to +# achieve 'nested dual numbers' thereby achieving higher order derivatives? +# +# BACKGROUND: +# +# Consider a number with two dual parts, ϵ and ϵ' such that ϵϵ' ≠ 0. +# We can represent such a number as follows: +# +# a + bϵ + cϵ' + dϵϵ' +# +# Consider an arbitrary function f: ℝ → ℝ given by f(x) = x^n. +# We can evaluate f at the above dual number as follows: +# +# f(a + bϵ + cϵ' + dϵϵ') = +# a^n + n a^(n-1) b ϵ + n a^(n-1) c ϵ' + (n a^(n-1) d + n(n-1) a^(n-2) b c) ϵϵ' +# +# Fix a ∈ ℝ. We observe that if we fix b = 1, c = 1, d = 0, we have that the +# coefficient of ϵϵ' is equivalent to f''(a). Thus using dual numbers like this +# Gives us a way to compute second derivatives. +# +# We now generalise to the vector case. Consider vectors of dual parts ϵ and ϵ' +# Such that ϵᵢϵ'ⱼ ≠ 0 for all i, j. A dual vector in these can be written as +# +# (a + Bϵ) + (C + Dϵ)ϵ' +# +# Where a is a value of reals, B is the matrix of coefficients of ϵ, +# C is the coefficients of ϵ' and D is the 3-tensor of coefficients of ϵϵ', +# (one dimension for each of a, ϵ and ϵ'. This is expanded by applying D to ϵ +# to first obtain a matrix via tensor contraction, and then applying this to ϵ') +# +# Analogously, if we fix B = I, C = I, D = 0, we have that the resulting 3-tensor corresponding +# to ϵϵ' is the Hessians of f at each element of a concatenated. If we consider a +# vector to scalar function, this will be a matrix that is just the Hessian. +# +# We now implement an example using f(x) = x[1] * x[2] below. + +f(x) = x[1] * x[2] + +using DualArrays +# a + Bϵ +x = DualVector([2, 3], [1 0; 0 1]) + +# C + Dϵ +y = DualMatrix([1 0;0 1], zeros(2,2,2)) + +# Setup the nested dual vector (a + Bϵ) + (C + Dϵ)ϵ' +z = DualVector(x, y) + +# This will return a Dual number: +# a + b^Tϵ + (c + Dϵ)ϵ' +result = f(z) + +# equivalent to a +value = result.value.value +# equivalent to b +gradient = result.value.partials +# equivalent to c +gradient2 = result.partials.data.value +# equivalent to d +hessian = result.partials.data.jacobian + +# We expect this to be 6 +print("Value: ", value, "\n") +# We expect this to be [3, 2] +print("Gradient: ", gradient, "\n") +# We expect this to be the same +print("Gradient: ", gradient2, "\n") +# We expect this to be [0 1; 1 0] +print("Hessian: ", hessian, "\n") + diff --git a/src/arithmetic.jl b/src/arithmetic.jl index afcfcbc..021a67f 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -1,5 +1,11 @@ # Arithmetic operations for DualArrays.jl +# Tensor transpose +Base.transpose(t::Tensor{<:Any, <:Any, <:N, <:Any}) where {N} = Tensor{N}(transpose(t.data)) +# Special case: Transpose of a vector becomes a matrix +# TODO: Technically a row vector is a Tensor{1, T, 1, 0} +Base.transpose(t::Tensor{1, <:Any, 0, <:Any}) = Tensor{1}(transpose(t.data)) + """ Addition/Subtraction of DualVectors. """ @@ -14,6 +20,10 @@ end Matrix multiplication with a DualVector. """ *(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +*(x::Number, t::Tensor) = x .* t +*(t::Tensor, x::Number) = t .* x +*(x::Dual, t::Tensor{<:Any, <:Any, N, <:Any}) where {N} = Tensor{N}(x .* t.data) +*(t::Tensor{<:Any, <:Any, N, <:Any}, x::Dual) where {N} = Tensor{N}(t.data .* x) """ this section attempts to define broadcasting rules on DualVectors for functions @@ -77,6 +87,16 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) jac = $p1.(x.value, y.value) .* x.jacobian .+ $p2.(x.value, y.value) .* y.jacobian return DualVector(val, jac) end + @eval function broadcasted(::typeof($f), x::Dual, y::AbstractVector) + val = $f.(x.value, y) + jac = $p1.(x.value, y) .* transpose(x.partials) + return DualVector(val, jac) + end + @eval function broadcasted(::typeof($f), x::AbstractVector, y::Dual) + val = $f.(x, y.value) + jac = $p2.(x, y.value) .* transpose(y.partials) + return DualVector(val, jac) + end # Must have Base.$f in order not to import everything @eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) * x.partials + $p2(x.value, y.value) * y.partials) @eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) * x.partials) diff --git a/src/indexing.jl b/src/indexing.jl index e3e3080..c3010a9 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -5,7 +5,7 @@ using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) - +sparse_getindex(x::DualArray, I...) = getindex(x, I...) # We need a system of indexing that takes two tuples # of length N and M. We must then return a Tensor whose new input and output dimensions @@ -33,11 +33,11 @@ getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) -""" -Extract a single Dual number from a DualVector at position y. -""" +# Integer indexing for a DualVector. +# If the jacobian is a Tensor, we use the Tensor indexing on input dimensions +# Otherwise if it is a function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), (:,)]) + Dual(x.value[y], x.jacobian[(y,), :]) end """ @@ -50,7 +50,25 @@ function getindex(x::DualVector, y::UnitRange) end function getindex(x::DualMatrix, y::Vararg{Int}) - Dual(x.value[y...], x.jacobian[y..., :]) + Dual(x.value[y...], x.jacobian[y, :]) +end + +function getindex(x::DualMatrix, y::Int, ::Colon) + newval = x.value[y, :] + newjac = x.jacobian[(y,:), :] + DualVector(newval, newjac) +end + +function getindex(x::DualMatrix, ::Colon, y::Int) + newval = x.value[:, y] + newjac = x.jacobian[(:,y), :] + DualVector(newval, newjac) +end + +function getindex(x::DualMatrix, y::Vararg{Union{Colon, UnitRange}}) + newval = x.value[y...] + newjac = x.jacobian[y, :] + DualMatrix(newval, newjac) end # Array interface for DualArray diff --git a/src/types.jl b/src/types.jl index eb3a90e..1e45177 100644 --- a/src/types.jl +++ b/src/types.jl @@ -30,10 +30,9 @@ function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} end # Helper convert function +_convert_array(::Type{T}, a::AbstractArray{T}) where {T} = a _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) _convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) -_convert_array(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(_convert_array(T, a.value), _convert_array(T, a.jacobian)) -_convert_array(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(_convert_array(T, a.value), _convert_array(T, a.jacobian)) Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = Tensor{L, T, N, M}(_convert_array(T, tensor.data)) @@ -71,16 +70,22 @@ _unwrap_args(args::Tuple) = map(_unwrap, args) # copy ensures that arithmetic involving a Tensor returns a Tensor function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} # We create a Broadcasted of the underlying arrays and create a Tensor containing - # the evaluated broadcast - databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - Tensor{N}(copy(Broadcast.flatten(databroadcast))) + # the evaluated broadcast. We check if Base.broadcasted is a Broadcasted + # or is overriden such as with DualArrays + databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...) + result = databroadcast isa Broadcast.Broadcasted ? copy(Broadcast.flatten(databroadcast)) : databroadcast + Tensor{N}(result) end # copyto adds support for .= function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) # As above - databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - copyto!(dest.data, Broadcast.flatten(databroadcast)) + databroadcast = Base.broadcasted(bc.f, _unwrap_args(bc.args)...) + if databroadcast isa Broadcast.Broadcasted + copyto!(dest.data, Broadcast.flatten(databroadcast)) + else + copyto!(dest.data, databroadcast) + end dest end @@ -179,6 +184,9 @@ end # For convenience DualArray = Union{DualVector, DualMatrix} +_convert_array(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(_convert_array(T, a.value), _convert_array(T, a.jacobian)) +_convert_array(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(_convert_array(T, a.value), _convert_array(T, a.jacobian)) + # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) diff --git a/test/broadcast_test.jl b/test/broadcast_test.jl index f87d9f4..bcab4ad 100644 --- a/test/broadcast_test.jl +++ b/test/broadcast_test.jl @@ -71,4 +71,7 @@ using DualArrays, Test, SparseArrays, LinearAlgebra @test s.jacobian ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0] @test m.jacobian ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0] @test div.jacobian ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75] + + a = [1,2,3] + @test a .* x == DualVector([2,4,6], [1 2 3;2 4 6;3 6 9]) end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 68a1d81..2a05253 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,7 @@ using DualArrays: Tensor @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) end - @testset "Indexing" begin + @testset "Indexing (Vector)" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @@ -28,6 +28,18 @@ using DualArrays: Tensor @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end + @testset "Indexing (Matrix)" begin + m = DualMatrix([1 2 3;4 5 6;7 8 9], ones(3,3,3)) + + @test m[1,1] isa Dual + @test m[1,1] == Dual(1, [1, 1, 1]) + + @test m[1, :] isa DualVector + @test m[1, :] == DualVector([1, 2, 3], [1 1 1; 1 1 1; 1 1 1]) + + @test m[1:2, 1:2] == DualMatrix([1 2; 4 5], ones(2,2,3)) + end + @testset "Arithmetic (DualVector)" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) w = v + v From 0b7bd6501b4fa74f3caead3fee78268cbfee4551 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:34:54 +0100 Subject: [PATCH 07/28] small corrections --- src/indexing.jl | 2 +- src/types.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index b41d2d3..c03a5de 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -37,7 +37,7 @@ getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) wher Extract a single Dual number from a DualVector at position y. """ function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), (:,)]) + Dual(x.value[y], x.jacobian[(y,), :]) end """ diff --git a/src/types.jl b/src/types.jl index 2f22fc0..a17656a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -2,7 +2,7 @@ """ This type represents a Tensor along with its contraction rule. -A tensor with its contraction rule represents a linear map from an N-array to an M-array, +A tensor with its contraction rule represents a linear map from an M-array to an N-array, with 0-arrays considered scalars, 1-arrays vectors, etc. We have: @@ -17,8 +17,8 @@ In the context of DualArrays.jl, a DualArray (currently only a vector) can be th a + Jϵ -Where a is an M-array of real numbers, J is an N+M=tensor and ϵ is an N-array of dual parts. -In the simplest case, where M = 0, we have a Dual number with dual parts arranged in an N-array. +Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. +In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ struct Tensor{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} From f8b03b25f8b3d8f18d0ef501f434df40c2de4f40 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:52:45 +0100 Subject: [PATCH 08/28] improve test coverage --- src/types.jl | 2 +- test/runtests.jl | 11 +++++++++++ test/tensor_test.jl | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index a17656a..e67cca5 100644 --- a/src/types.jl +++ b/src/types.jl @@ -75,7 +75,7 @@ function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} end # copyto adds support for .= -function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) +function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) diff --git a/test/runtests.jl b/test/runtests.jl index 68a1d81..aafd661 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,10 @@ using DualArrays: Tensor @testset "Indexing" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + + @test size(v) == (3,) + @test axes(v) == (Base.OneTo(3),) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) @@ -88,6 +92,13 @@ using DualArrays: Tensor @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) end + + @testset "show" begin + d = DualVector([1.0, 2.0], [1 0; 0 1]) + s = repr(MIME"text/plain"(), d) + @test occursin(" + ", s) + @test endswith(s, "𝛜") + end include("broadcast_test.jl") include("tensor_test.jl") diff --git a/test/tensor_test.jl b/test/tensor_test.jl index 8ce7572..70e1570 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -30,5 +30,13 @@ using DualArrays: Tensor @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) + + s = Tensor{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa Tensor + @test result.data == s.data + @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) end end \ No newline at end of file From 70097f6f78ca22dbb6044de4d109f022dc2ff38d Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:56:55 +0100 Subject: [PATCH 09/28] add indexing example --- src/indexing.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/indexing.jl b/src/indexing.jl index c03a5de..dc2dbf0 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -12,6 +12,18 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz # are inferred from how many of the arguments in each respective tuple are integers. # For the purposes of DualArrays.jl, we only need to index the input dimensions +# +# Example: +# +# Consider a DualVector with standard matrix Jacobian (Tensor{2, T, 1, 1}): +# +# d = a + Bϵ +# +# If we index d[1], we have: +# +# d[1] = a[1] + B[1, :]ϵ +# +# Where B[1, :] indexes the input dimension of B Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) From c085dea01ea66c47aa4a2de0dc70a27d195ad848 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 13:21:57 +0100 Subject: [PATCH 10/28] small fixes --- examples/nestedduals.jl | 7 ++++--- src/arithmetic.jl | 4 ---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl index 79e1597..feefdfd 100644 --- a/examples/nestedduals.jl +++ b/examples/nestedduals.jl @@ -30,9 +30,10 @@ # (one dimension for each of a, ϵ and ϵ'. This is expanded by applying D to ϵ # to first obtain a matrix via tensor contraction, and then applying this to ϵ') # -# Analogously, if we fix B = I, C = I, D = 0, we have that the resulting 3-tensor corresponding -# to ϵϵ' is the Hessians of f at each element of a concatenated. If we consider a -# vector to scalar function, this will be a matrix that is just the Hessian. +# Analogously, if we fix B = I, C = I, D = 0 and apply a function f: ℝⁿ → ℝᵐ +# the coefficient of ϵϵ' will be an n x n x m tensor T such that T[:, :, i] +# is the Hessian of the i-th result of f. More simply, if we consider a +# function f: ℝⁿ → ℝ, the coefficient of ϵϵ' will be the Hessian of f. # # We now implement an example using f(x) = x[1] * x[2] below. diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 021a67f..9357e12 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -20,10 +20,6 @@ end Matrix multiplication with a DualVector. """ *(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) -*(x::Number, t::Tensor) = x .* t -*(t::Tensor, x::Number) = t .* x -*(x::Dual, t::Tensor{<:Any, <:Any, N, <:Any}) where {N} = Tensor{N}(x .* t.data) -*(t::Tensor{<:Any, <:Any, N, <:Any}, x::Dual) where {N} = Tensor{N}(t.data .* x) """ this section attempts to define broadcasting rules on DualVectors for functions From 8aee1f5d50b5ebf57ad93dbbe453bccc718d2087 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 15:53:17 +0100 Subject: [PATCH 11/28] improve coverage --- examples/nestedduals.jl | 5 +++++ src/arithmetic.jl | 10 ++++++++++ src/indexing.jl | 19 +++++++++++++++++-- src/types.jl | 4 +++- test/runtests.jl | 20 +++++++++++++++++--- test/tensor_test.jl | 28 +++++++++++++++++++++++++++- 6 files changed, 79 insertions(+), 7 deletions(-) diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl index feefdfd..21e8523 100644 --- a/examples/nestedduals.jl +++ b/examples/nestedduals.jl @@ -35,6 +35,11 @@ # is the Hessian of the i-th result of f. More simply, if we consider a # function f: ℝⁿ → ℝ, the coefficient of ϵϵ' will be the Hessian of f. # +# In DualArrays.jl, this can be achieved by setting up a DualVector with a DualVector +# value and DualMatrix jacobian. Applying f: ℝⁿ → ℝ will give us a Dual with a Dual +# value and a DualVector vector partials. The jacobian of the DualVector partials +# is our Hessian. +# # We now implement an example using f(x) = x[1] * x[2] below. f(x) = x[1] * x[2] diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 9357e12..c05715d 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -65,6 +65,16 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) jac = $p2.(x, y.value) .* y.jacobian return DualVector(val, jac) end + @eval function broadcasted(::typeof($f), x::DualVector, y::AbstractVector) + val = $f.(x.value, y) + jac = $p1.(x.value, y) .* x.jacobian + return DualVector(val, jac) + end + @eval function broadcasted(::typeof($f), x::AbstractVector, y::DualVector) + val = $f.(x, y.value) + jac = $p2.(x, y.value) .* y.jacobian + return DualVector(val, jac) + end @eval function broadcasted(::typeof($f), x::DualVector, y::Dual) val = $f.(x.value, y.value) # Product rule diff --git a/src/indexing.jl b/src/indexing.jl index 044f5bf..0be13c7 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -2,10 +2,12 @@ using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays + sparse_getindex(a...) = layout_getindex(a...) + +# TODO: should we move these? sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) -sparse_getindex(x::DualArray, I...) = getindex(x, I...) # We need a system of indexing that takes two tuples # of length N and M. We must then return a Tensor whose new input and output dimensions @@ -85,4 +87,17 @@ end # Array interface for DualArray size(x::DualArray) = size(x.value) -axes(x::DualArray) = axes(x.value) \ No newline at end of file +axes(x::DualArray) = axes(x.value) + +# Copyto! (For broadcast assignments) +function Base.copyto!(dest::DualVector, src::DualVector) + copyto!(dest.value, src.value) + copyto!(dest.jacobian.data, src.jacobian.data) + return dest +end + +function Base.copyto!(dest::DualMatrix, src::DualMatrix) + copyto!(dest.value, src.value) + copyto!(dest.jacobian.data, src.jacobian.data) + return dest +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 21eed0d..a129dae 100644 --- a/src/types.jl +++ b/src/types.jl @@ -48,7 +48,9 @@ end # Below we define a broadcast style for Tensors and override copy and copyto! # This allows all arithmetic/broadcasting with Tensors to be handled by the # underlying logic of the array contained in the struct, while ensuring that -# all results stay as a Tensor. +# all results stay as a Tensor. This is with the exception of DualArrays, +# Where we expect the overloaded broadcasted to materialize DualArrays +# Using the Tensors obtained from below. struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() diff --git a/test/runtests.jl b/test/runtests.jl index 90cd18c..1aa8146 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices, FillArrays using DualArrays: Tensor @testset "DualArrays" begin @@ -6,10 +6,12 @@ using DualArrays: Tensor @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) + + @test_throws ArgumentError DualMatrix([1 2; 3 4], ones(3,3,3)) end @testset "Indexing (Vector)" begin - v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + v = DualVector([1., 2, 3], [1 2 3; 4 5 6;7 8 9]) @test size(v) == (3,) @test axes(v) == (Base.OneTo(3),) @@ -21,6 +23,9 @@ using DualArrays: Tensor @test_throws BoundsError v[4] @test v == DualVector([1,2, 3], [1 2 3; 4 5 6;7 8 9]) + v2 = DualVector([1., 2, 3], Diagonal(ones(3))) + @test v2[1] == Dual(1, OneElement(1.0, 1, 3)) + x,y = v[1:2],v[2:3] @test x == DualVector([1,2],[1 2 3;4 5 6]) @test y == DualVector([2,3],[4 5 6;7 8 9]) @@ -39,7 +44,8 @@ using DualArrays: Tensor @test m[1,1] == Dual(1, [1, 1, 1]) @test m[1, :] isa DualVector - @test m[1, :] == DualVector([1, 2, 3], [1 1 1; 1 1 1; 1 1 1]) + @test m[1, :] == DualVector([1, 2, 3], ones(3, 3)) + @test m[:, 1] == DualVector([1, 4, 7], ones(3, 3)) @test m[1:2, 1:2] == DualMatrix([1 2; 4 5], ones(2,2,3)) end @@ -111,6 +117,14 @@ using DualArrays: Tensor @test occursin(" + ", s) @test endswith(s, "𝛜") end + + @testset "Nested Duals" begin + d = DualVector([2, 3], [1 0;0 1]) + d1 = DualMatrix([1.0 0; 0 1], zeros(2, 2, 2)) + d2 = DualVector(d, d1) + + @test d2 isa DualVector + end include("broadcast_test.jl") include("tensor_test.jl") diff --git a/test/tensor_test.jl b/test/tensor_test.jl index 70e1570..d4dae29 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -1,4 +1,4 @@ -using Test, LinearAlgebra +using Test, LinearAlgebra, FillArrays using DualArrays: Tensor @testset "Tensor" begin @@ -38,5 +38,31 @@ using DualArrays: Tensor @test result isa Tensor @test result.data == s.data @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) + + # Use FillArrays to test copyto! with a non-standard broadcast. + a = Tensor{1}(Zeros(3)) + b = Tensor{1}(zeros(3)) + + bc = Base.Broadcast.broadcasted(+, a, 1) + + expected = collect(a.data .+ 1) + copyto!(b, bc) + @test b.data == expected + end + @testset "Tensor + DualVector" begin + d = DualVector([1, 2], [1 0; 0 1]) + t = Tensor{1}([1, 1]) + + r = d .+ t + @test r isa DualVector + @test r == [Dual(2, [1, 0]), Dual(3, [0, 1])] + + r2 = t .+ d + @test r2 isa DualVector + @test r2 == r + + s = DualVector(zeros(2), zeros(2, 2)) + s .= d .+ t + @test s == r end end \ No newline at end of file From 07cf173d19c0b212163fcc4825c91e856ce0b4f6 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 16:54:40 +0100 Subject: [PATCH 12/28] fix coverage again --- src/indexing.jl | 6 ------ test/broadcast_test.jl | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 0be13c7..3e29fda 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -94,10 +94,4 @@ function Base.copyto!(dest::DualVector, src::DualVector) copyto!(dest.value, src.value) copyto!(dest.jacobian.data, src.jacobian.data) return dest -end - -function Base.copyto!(dest::DualMatrix, src::DualMatrix) - copyto!(dest.value, src.value) - copyto!(dest.jacobian.data, src.jacobian.data) - return dest end \ No newline at end of file diff --git a/test/broadcast_test.jl b/test/broadcast_test.jl index bcab4ad..957f63c 100644 --- a/test/broadcast_test.jl +++ b/test/broadcast_test.jl @@ -74,4 +74,5 @@ using DualArrays, Test, SparseArrays, LinearAlgebra a = [1,2,3] @test a .* x == DualVector([2,4,6], [1 2 3;2 4 6;3 6 9]) + @test x .* a == DualVector([2,4,6], [1 2 3;2 4 6;3 6 9]) end \ No newline at end of file From 1a17a05e3ce0e13e68ac3bf193348122258cc830 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 23 Apr 2026 22:01:35 +0100 Subject: [PATCH 13/28] rename to ArrayOperator --- src/indexing.jl | 20 +++++----- src/types.jl | 74 +++++++++++++++++++------------------ test/array_operator_test.jl | 42 +++++++++++++++++++++ test/runtests.jl | 6 +-- test/tensor_test.jl | 42 --------------------- 5 files changed, 93 insertions(+), 91 deletions(-) create mode 100644 test/array_operator_test.jl delete mode 100644 test/tensor_test.jl diff --git a/src/indexing.jl b/src/indexing.jl index dc2dbf0..bc78038 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -8,14 +8,14 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz # We need a system of indexing that takes two tuples -# of length N and M. We must then return a Tensor whose new input and output dimensions +# of length N and M. We must then return a ArrayOperator whose new input and output dimensions # are inferred from how many of the arguments in each respective tuple are integers. # For the purposes of DualArrays.jl, we only need to index the input dimensions # # Example: # -# Consider a DualVector with standard matrix Jacobian (Tensor{2, T, 1, 1}): +# Consider a DualVector with standard matrix Jacobian (ArrayOperator{2, T, 1, 1}): # # d = a + Bϵ # @@ -27,23 +27,23 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) -function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} - # new value of M is inferred from the Tensor constructor and the order of +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - Tensor{newN}(sparse_getindex(t.data, i..., j...)) + ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., j...)) end -function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} - # new value of M is inferred from the Tensor constructor and the order of +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - Tensor{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) + ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) end # Integer indexing always returns a scalar. -getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) +getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N, T, N, M}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. diff --git a/src/types.jl b/src/types.jl index e67cca5..e7577a7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,9 +1,11 @@ # Core type definitions for DualArrays.jl """ -This type represents a Tensor along with its contraction rule. -A tensor with its contraction rule represents a linear map from an M-array to an N-array, -with 0-arrays considered scalars, 1-arrays vectors, etc. +This type represents a linear map from an M-array to an N-array, with a N+M=L-dimensional +array as its underlying data. This can be thought of analogously to an L-tensor equipped +with a contraction pattern characterised by (N, M). Specifically, the tensor has dimensions +a₁ x a₂ x ... x a_N x b₁ x b₂ x ... x b_M and maps an M-array of shape (b₁, b₂, ..., b_M) +to an N-array of shape (a₁, a₂, ..., a_N) by contracting over the last M indices. We have: -L is the dimensionality of the tensor @@ -13,69 +15,69 @@ We have: We enforce L = N + M by inferring M in the constructor. -In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as +In the context of DualArrays.jl, a DualArray can be thought of as a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct Tensor{L, T, N, M} <: AbstractArray{T, L} +struct ArrayOperator{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} end # Constructor to wrap an array with a tensor, given a contraction rule represented by N -function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} - Tensor{L, T, N, L - N}(data) +function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} + ArrayOperator{L, T, N, L - N}(data) end # Helper convert function _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) -_convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) +_convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) -Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = - Tensor{L, T, N, M}(_convert_array(T, tensor.data)) +Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M}) where {L, T, N, M, S} = + ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) # Basic array interface for op in (:size, :axes) @eval begin - ($op)(t::Tensor) = ($op)(t.data) - ($op)(t::Tensor, i...) = ($op)(t.data, i...) + ($op)(t::ArrayOperator) = ($op)(t.data) + ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) end end -# Below we define a broadcast style for Tensors and override copy and copyto! -# This allows all arithmetic/broadcasting with Tensors to be handled by the +# Below we define a broadcast style for ArrayOperators and override copy and copyto! +# This allows all arithmetic/broadcasting with ArrayOperators to be handled by the # underlying logic of the array contained in the struct, while ensuring that -# all results stay as a Tensor. +# all results stay as a ArrayOperator. -struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end -TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. -Base.BroadcastStyle(::Type{<:Tensor{<:Any, <:Any, N, <:Any}}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::TensorBroadcastStyle{M}) where {N, M} = TensorBroadcastStyle{max(N, M)}() +Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() -# Helper functions to help define broadcasting/arithmetic with Tensors. -# By converting a broadcast involving Tensors into a broadcast +# Helper functions to help define broadcasting/arithmetic with ArrayOperators. +# By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. -_unwrap(t::Tensor) = t.data +_unwrap(t::ArrayOperator) = t.data _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) -# copy ensures that arithmetic involving a Tensor returns a Tensor -function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} - # We create a Broadcasted of the underlying arrays and create a Tensor containing +# copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing # the evaluated broadcast databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - Tensor{N}(copy(Broadcast.flatten(databroadcast))) + ArrayOperator{N}(copy(Broadcast.flatten(databroadcast))) end # copyto adds support for .= -function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} +function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) @@ -91,19 +93,19 @@ A dual number type that stores a value and its partials (derivatives). - `value::T`: The primal value - `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: (Tensor{L, T, 0, M} where {L, M})} <: Real +struct Dual{T, Partials <: (ArrayOperator{L, T, 0, M} where {L, M})} <: Real value::T partials::Partials end -function Dual(value::T, partials::Tensor{L, S, 0, M}) where {S, T, L, M} +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {S, T, L, M} T2 = promote_type(T, S) - Dual{T2, Tensor{L, T2, 0, M}}(convert(T2, value), convert(Tensor{L, T2, 0, M}, partials)) + Dual{T2, ArrayOperator{L, T2, 0, M}}(convert(T2, value), convert(ArrayOperator{L, T2, 0, M}, partials)) end # Helper function to define Duals from an AbstractArray function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, Tensor{0}(partials)) + Dual(value, ArrayOperator{0}(partials)) end """ @@ -124,7 +126,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},J <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},J <: (ArrayOperator{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V jacobian::J @@ -139,14 +141,14 @@ end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::Tensor) +function DualVector(value::AbstractVector, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) DualVector(_convert_array(T, value), _convert_array(T, jacobian)) end # Helper function to define DualVectors with AbstractArray jacobians function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} - DualVector(value, Tensor{1}(jacobian)) + DualVector(value, ArrayOperator{1}(jacobian)) end # Basic equality for Dual numbers diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl new file mode 100644 index 0000000..57e9969 --- /dev/null +++ b/test/array_operator_test.jl @@ -0,0 +1,42 @@ +using Test, LinearAlgebra +using DualArrays: ArrayOperator + +@testset "Tensor" begin + t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) + @test t[1:2, 1:2] == ArrayOperator{1}([1 2;4 5]) + + s = ArrayOperator{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == ArrayOperator{0}([1, 1]) + @test s[(1,:), (:,)] == ArrayOperator{1}([1 1;1 1]) + @test s[(1,2), :] == ArrayOperator{0}([1, 1]) + end + + @testset "Arithmetic" begin + @test t + t isa ArrayOperator + @test t + t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa ArrayOperator + @test t .* 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == ArrayOperator{1}([1 2 3;8 10 12;21 24 27]) + + t1 = ArrayOperator{0}([1, 2, 3]) + t2 = ArrayOperator{1}([1 2 3]) + t3 = ArrayOperator{0}([2]) + + @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == ArrayOperator{0}(sin.([1, 2, 3])) + + s = ArrayOperator{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa ArrayOperator + @test result.data == s.data + @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index aafd661..3875011 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,11 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Tensor +using DualArrays: ArrayOperator @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == ArrayOperator{0}([1.0, 2.0, 3.0]) end @testset "Indexing" begin @@ -101,5 +101,5 @@ using DualArrays: Tensor end include("broadcast_test.jl") - include("tensor_test.jl") + include("array_operator_test.jl") end \ No newline at end of file diff --git a/test/tensor_test.jl b/test/tensor_test.jl deleted file mode 100644 index 70e1570..0000000 --- a/test/tensor_test.jl +++ /dev/null @@ -1,42 +0,0 @@ -using Test, LinearAlgebra -using DualArrays: Tensor - -@testset "Tensor" begin - t = Tensor{1}([1 2 3;4 5 6;7 8 9]) - @testset "Indexing" begin - @test t[1,1] == 1 - @test t[(1,), (:,)] == Tensor{0}([1, 2, 3]) - @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) - - s = Tensor{2}(ones(2,2,2)) - @test s[(1,1), (:,)] == Tensor{0}([1, 1]) - @test s[(1,:), (:,)] == Tensor{1}([1 1;1 1]) - @test s[(1,2), :] == Tensor{0}([1, 1]) - end - - @testset "Arithmetic" begin - @test t + t isa Tensor - @test t + t == Tensor{1}([2 4 6;8 10 12;14 16 18]) - - @test t .* 2 isa Tensor - @test t .* 2 == Tensor{1}([2 4 6;8 10 12;14 16 18]) - @test t .* [1,2,3] == Tensor{1}([1 2 3;8 10 12;21 24 27]) - - t1 = Tensor{0}([1, 2, 3]) - t2 = Tensor{1}([1 2 3]) - t3 = Tensor{0}([2]) - - @test t1 .* t2 == Tensor{1}([1 2 3;2 4 6;3 6 9]) - @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) - - @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) - - s = Tensor{1}(zeros(3, 3)) - - result = (s .= t .+ 1) - - @test result isa Tensor - @test result.data == s.data - @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) - end -end \ No newline at end of file From 02860bb05785338213b7725cf7f61876b61211a9 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Fri, 24 Apr 2026 18:02:24 +0100 Subject: [PATCH 14/28] fix bugs, implement dualarray --- src/indexing.jl | 6 +++--- src/types.jl | 25 +++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index bc78038..d59193b 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -31,19 +31,19 @@ function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., j...)) + ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) end function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) + ArrayOperator{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) end # Integer indexing always returns a scalar. getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N, T, N, M}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. diff --git a/src/types.jl b/src/types.jl index e7577a7..03314e7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -126,31 +126,36 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},J <: (ArrayOperator{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} - value::V +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{L, T, N, M} where {L, M})} <: AbstractVector{Dual{T}} + value::A jacobian::J - function DualVector(value::V, jacobian::J) where {T, V <: AbstractVector{T}, J <: (Tensor{L, T, 1, M} where {L, M})} - if length(value) != size(jacobian, 1) + function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{L, T, N, M} where {L, M})} + if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T, V, J}(value, jacobian) + new{T,N, A, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::ArrayOperator) +function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(_convert_array(T, value), _convert_array(T, jacobian)) + DualArray(_convert_array(T, value), _convert_array(T, jacobian)) end -# Helper function to define DualVectors with AbstractArray jacobians -function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} - DualVector(value, ArrayOperator{1}(jacobian)) +# Helper function to define DualArrays with AbstractArray jacobians +function DualArray(value::AbstractArray{S, M}, jacobian::AbstractArray{T, N}) where {S, T, N, M} + DualArray(value, ArrayOperator{M}(jacobian)) end +const DualVector = DualArray{T, 1} where {T} +const DualMatrix = DualArray{T, 2} where {T} + +DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) +DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file From a54c7f781cbd2361fa1d4082651cc61e2808152b Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Sat, 25 Apr 2026 13:58:57 +0100 Subject: [PATCH 15/28] change dual to have abstractarray partials --- src/DualArrays.jl | 4 ++-- src/types.jl | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index c169778..9375853 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -6,12 +6,12 @@ A Julia package for efficient automatic differentiation using dual numbers and d This package provides: - `Dual`: A dual number type for storing values and their derivatives - `DualVector`: A vector of dual numbers represented with a Jacobian matrix - +- `DualMatrix`: A matrix of dual numbers represented with a Jacobian tensor` Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, Dual, jacobian +export DualVector, DualMatrix, Dual, jacobian import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ diff --git a/src/types.jl b/src/types.jl index 03314e7..6757855 100644 --- a/src/types.jl +++ b/src/types.jl @@ -93,19 +93,14 @@ A dual number type that stores a value and its partials (derivatives). - `value::T`: The primal value - `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: (ArrayOperator{L, T, 0, M} where {L, M})} <: Real +struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T partials::Partials end -function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {S, T, L, M} +function Dual(value::T, partials::AbstractArray{S}) where {S, T} T2 = promote_type(T, S) - Dual{T2, ArrayOperator{L, T2, 0, M}}(convert(T2, value), convert(ArrayOperator{L, T2, 0, M}, partials)) -end - -# Helper function to define Duals from an AbstractArray -function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, ArrayOperator{0}(partials)) + Dual(convert(T2, value), _convert_array(T2, partials)) end """ From ecbaba8dadbc31a66a8bd620de000bcd94defbaf Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Sat, 25 Apr 2026 18:53:15 +0100 Subject: [PATCH 16/28] add * for ArrayOperator, remove <: AbstractArray and fix resulting bugs --- Project.toml | 4 ++- src/DualArrays.jl | 4 +-- src/arithmetic.jl | 64 ++++++++++++++++++++++++++++++++++++++++++++++- src/types.jl | 33 +++++++++++++++++++----- src/utilities.jl | 4 +-- test/runtests.jl | 2 +- 6 files changed, 98 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index 3e86a39..b7409b2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DualArrays" uuid = "429e4e16-f749-45f3-beec-30742fae38ce" -authors = ["Sheehan Olver "] version = "0.2.0" +authors = ["Sheehan Olver "] [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" @@ -10,6 +10,7 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" [compat] DiffRules = "1.15.1" @@ -17,6 +18,7 @@ ForwardDiff = "1.2.2" Lux = "1.29.1" Plots = "1.41.1" SparseArrays = "1.10" +TensorOperations = "5.5.2" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 9375853..19ba52b 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -13,9 +13,9 @@ module DualArrays export DualVector, DualMatrix, Dual, jacobian -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ +import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims -using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules +using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations include("types.jl") include("indexing.jl") diff --git a/src/arithmetic.jl b/src/arithmetic.jl index afcfcbc..1effd91 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -96,4 +96,66 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x) # solve -\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, x \ y.jacobian) \ No newline at end of file +\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) + +""" +----------------------- +Multiplication with ArrayOperators +----------------------- + +* for ArrayOperators generalises from matrix/vector +multiplication and uses tensor contraction provided by +TensorOperations.jl. Let an (A, B) ArrayOperator denote +an ArrayOperator with output dimension A and input dimension B +(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only +allow multiplication between an (A, B) ArrayOperator and a (B, C) +ArrayOperator, with the result being an (A, C) ArrayOperator. +This generalises from: + +-An inner product: a row vector * a column vector -> a scalar: + (0, 1) * (1, 0) -> (0, 0) + +-An outer product: a column vector * a row vector -> a matrix: + (1, 0) * (0, 1) -> (1, 1) + +- matrix/vector multiplication: a matrix * a column vector -> a column vector: + (1, 1) * (1, 0) -> (1, 0) + +We can treat multiplication between ArrayOperators and regular arrays as multiplication +between two ArrayOperators: given an (A, B) ArrayOperator and an L-array, we can fix it as +having input dimension B and infer C = L - B. We use this contraction rule to multiply the two. +""" + +# Helper function to perform tensor contraction between x and y +function _contract(x, y, A, B, C) + x_idx = (ntuple(i -> i, A), ntuple(i -> i + A, B)) + y_idx = (ntuple(i -> i, B), ntuple(i -> i + B, C)) + ret_idx = (ntuple(i -> i, A), ntuple(i -> i + A, C)) + + return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C} + return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} + C = L - B + return ArrayOperator{A}(_contract(x.data, y, A, B, C)) +end + +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C} + A = L - B + return ArrayOperator{A}(_contract(x, y.data, A, B, C)) +end + +# Extra required arithmetic for ArrayOperators +Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y +Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y +Base.:-(x::ArrayOperator) = (-).(x) + +Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} = + ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data) +Base.:*(t::ArrayOperator, a::Number) = a * t + + diff --git a/src/types.jl b/src/types.jl index 6757855..e592488 100644 --- a/src/types.jl +++ b/src/types.jl @@ -22,7 +22,7 @@ a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct ArrayOperator{L, T, N, M} <: AbstractArray{T, L} +struct ArrayOperator{L, T, N, M} data::AbstractArray{T, L} end @@ -39,13 +39,28 @@ Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) # Basic array interface -for op in (:size, :axes) +for op in (:size, :axes, :iterate) @eval begin ($op)(t::ArrayOperator) = ($op)(t.data) ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) end end +# Since ArrayOperator is not an AbstractArray we define these manually +eltype(t::ArrayOperator) = eltype(t.data) + +Base.Broadcast.broadcastable(t::ArrayOperator) = t + +transpose(t::ArrayOperator) = transpose(t.data) +sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) + +==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data +==(a::ArrayOperator, b::AbstractArray) = a.data == b +==(a::AbstractArray, b::ArrayOperator) = a == b.data +isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) +isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kwargs...) + # Below we define a broadcast style for ArrayOperators and override copy and copyto! # This allows all arithmetic/broadcasting with ArrayOperators to be handled by the # underlying logic of the array contained in the struct, while ensuring that @@ -57,14 +72,15 @@ ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastSt # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::Broadcast.DefaultArrayStyle{M}, ::ArrayOperatorBroadcastStyle{N}) where {N, M} = ArrayOperatorBroadcastStyle{N}() Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() # Helper functions to help define broadcasting/arithmetic with ArrayOperators. # By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. _unwrap(t::ArrayOperator) = t.data +_unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) @@ -85,13 +101,13 @@ function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperat end """ - Dual{T, Partials <: AbstractVector{T}} <: Real + Dual{T, Partials <: AbstractArray{T}} <: Real A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a tensor mapping to a scalar +- `partials::Partials`: The partial derivatives stored as an array """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T @@ -103,6 +119,11 @@ function Dual(value::T, partials::AbstractArray{S}) where {S, T} Dual(convert(T2, value), _convert_array(T2, partials)) end +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} + T2 = promote_type(T, S) + Dual(convert(T2, value), _convert_array(T2, partials).data) +end + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} diff --git a/src/utilities.jl b/src/utilities.jl index e892b95..91d3c20 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -10,8 +10,8 @@ end # Helper functions for vcat operations _jacobian(d::Dual) = permutedims(d.partials) -_jacobian(d::DualVector) = d.jacobian -_jacobian(d::DualVector, ::Int) = d.jacobian +_jacobian(d::DualVector) = d.jacobian.data +_jacobian(d::DualVector, ::Int) = d.jacobian.data _jacobian(x::Number, N::Int) = Zeros(typeof(x), 1, N) _value(v::AbstractVector) = v diff --git a/test/runtests.jl b/test/runtests.jl index 3875011..9b87f05 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using DualArrays: ArrayOperator @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == ArrayOperator{0}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == [1.0, 2.0, 3.0] end @testset "Indexing" begin From e356ef728001023d2dd6c3f6d26b316803c88c14 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 14:54:10 +0100 Subject: [PATCH 17/28] add tests, improve coverage --- src/indexing.jl | 19 +++++++------------ src/types.jl | 4 +++- src/utilities.jl | 2 +- test/array_operator_test.jl | 25 ++++++++++++++++++++++++- test/runtests.jl | 8 +++++++- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index d59193b..189b0aa 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -46,10 +46,10 @@ getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ -Extract a single Dual number from a DualVector at position y. +Extract a single Dual number from a DualArray. """ -function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), :]) +function getindex(x::DualArray, args::Vararg{Int}) + Dual(x.value[args...], x.jacobian[args, :]) end """ @@ -61,12 +61,7 @@ function getindex(x::DualVector, y::UnitRange) DualVector(newval, newjac) end -""" -Return the size of the DualVector (length of the value vector). -""" -size(x::DualVector) = (length(x.value),) - -""" -Return the axes of the DualVector. -""" -axes(x::DualVector) = axes(x.value) \ No newline at end of file +# DualArray array interface +for op in (:size, :axes) + @eval $op(a::DualArray) = $op(a.value) +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index e592488..725e2b1 100644 --- a/src/types.jl +++ b/src/types.jl @@ -66,7 +66,7 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kw # underlying logic of the array contained in the struct, while ensuring that # all results stay as a ArrayOperator. -struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. @@ -108,6 +108,8 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value - `partials::Partials`: The partial derivatives stored as an array + +NOTE: Partials will soon be in the ArrayOperator format. """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T diff --git a/src/utilities.jl b/src/utilities.jl index 91d3c20..726af37 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualArray) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 57e9969..4fa3ff3 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -1,8 +1,21 @@ using Test, LinearAlgebra using DualArrays: ArrayOperator -@testset "Tensor" begin +@testset "ArrayOperator" begin t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + + @testset "Equality" begin + @test t == [1 2 3;4 5 6;7 8 9] + @test [1 2 3;4 5 6;7 8 9] == t + @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + end + @testset "BroadcastStyle Val constructor" begin + # A plain broadcast operation; broadcast machinery should dimension-adapt the + # ArrayOperator broadcast style via ArrayOperatorBroadcastStyle{N}(::Val{M}). + u = t .+ ones(3, 3) + @test u isa ArrayOperator + @test u == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + end @testset "Indexing" begin @test t[1,1] == 1 @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) @@ -26,6 +39,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) + @test t1 .+ ones(3) == ArrayOperator{0}([2, 3, 4]) @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) @@ -38,5 +52,14 @@ using DualArrays: ArrayOperator @test result isa ArrayOperator @test result.data == s.data @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + + @test t * t == ArrayOperator{1}([30 36 42;66 81 96; 102 126 150]) + @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) + @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) + + @test t * 2 == [2 4 6;8 10 12;14 16 18] + @test 2 * t == [2 4 6;8 10 12;14 16 18] + + @test s - t == ones(3, 3) end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9b87f05..144ed01 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,11 +9,17 @@ using DualArrays: ArrayOperator end @testset "Indexing" begin - v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + v = DualVector([1., 2, 3], [1 2 3; 4 5 6;7 8 9]) + m = DualMatrix([1 2;3 4], zeros(2, 2, 2)) @test size(v) == (3,) @test axes(v) == (Base.OneTo(3),) + @test size(m) == (2, 2) + @test axes(m) == (Base.OneTo(2), Base.OneTo(2)) + + @test m[1, 1] == Dual(1, [0, 0]) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) From 13ef5b8de9b97cf907f72df69984ff5b25df842c Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 15:12:39 +0100 Subject: [PATCH 18/28] small additions --- src/types.jl | 5 +---- test/array_operator_test.jl | 9 +-------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/types.jl b/src/types.jl index 725e2b1..08cb494 100644 --- a/src/types.jl +++ b/src/types.jl @@ -35,9 +35,6 @@ end _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) _convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) -Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M}) where {L, T, N, M, S} = - ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) - # Basic array interface for op in (:size, :axes, :iterate) @eval begin @@ -59,7 +56,7 @@ sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) ==(a::AbstractArray, b::ArrayOperator) = a == b.data isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) -isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # Below we define a broadcast style for ArrayOperators and override copy and copyto! # This allows all arithmetic/broadcasting with ArrayOperators to be handled by the diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 4fa3ff3..0194b3a 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -9,13 +9,6 @@ using DualArrays: ArrayOperator @test [1 2 3;4 5 6;7 8 9] == t @test isapprox(t, [1 2 3;4 5 6;7 8 9]) end - @testset "BroadcastStyle Val constructor" begin - # A plain broadcast operation; broadcast machinery should dimension-adapt the - # ArrayOperator broadcast style via ArrayOperatorBroadcastStyle{N}(::Val{M}). - u = t .+ ones(3, 3) - @test u isa ArrayOperator - @test u == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) - end @testset "Indexing" begin @test t[1,1] == 1 @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) @@ -39,7 +32,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) - @test t1 .+ ones(3) == ArrayOperator{0}([2, 3, 4]) + @test t1 .+ ones(3, 3) == [2 2 2;3 3 3; 4 4 4] @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) From 514b34aaa63cf30994bd31f77dbe219528176a72 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 15:27:01 +0100 Subject: [PATCH 19/28] fix coverage --- src/types.jl | 1 - test/array_operator_test.jl | 1 + test/runtests.jl | 4 ++++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index 08cb494..82330bf 100644 --- a/src/types.jl +++ b/src/types.jl @@ -64,7 +64,6 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # all results stay as a ArrayOperator. struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end -ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 0194b3a..9857e44 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -8,6 +8,7 @@ using DualArrays: ArrayOperator @test t == [1 2 3;4 5 6;7 8 9] @test [1 2 3;4 5 6;7 8 9] == t @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + @test isapprox([1 2 3;4 5 6;7 8 9], t) end @testset "Indexing" begin @test t[1,1] == 1 diff --git a/test/runtests.jl b/test/runtests.jl index 144ed01..2f4c8ed 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -97,6 +97,10 @@ using DualArrays: ArrayOperator @test vcat(x) == DualVector([1], [1 2 3]) @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + + z = DualVector([2, 3], [1 0; 0 1]) + @test vcat(1, z).jacobian.data == [0 0; 1 0; 0 1] + @test vcat(z, 1).jacobian.data == [1 0; 0 1; 0 0] end @testset "show" begin From a65cd5ee9fb12d87ff280ff7275054c1196a7f7f Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 28 Apr 2026 09:42:44 +0100 Subject: [PATCH 20/28] fixes --- src/arithmetic.jl | 4 ++-- src/indexing.jl | 1 + src/types.jl | 9 +++++++-- test/runtests.jl | 4 +--- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/arithmetic.jl b/src/arithmetic.jl index c227687..ab32cad 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -1,10 +1,10 @@ # Arithmetic operations for DualArrays.jl # Tensor transpose -Base.transpose(t::Tensor{<:Any, <:Any, <:N, <:Any}) where {N} = Tensor{N}(transpose(t.data)) +transpose(t::ArrayOperator{<:Any, <:Any, <:N, <:Any}) where {N} = ArrayOperator{N}(transpose(t.data)) # Special case: Transpose of a vector becomes a matrix # TODO: Technically a row vector is a Tensor{1, T, 1, 0} -Base.transpose(t::Tensor{1, <:Any, 0, <:Any}) = Tensor{1}(transpose(t.data)) +transpose(t::ArrayOperator{1, <:Any, 0, <:Any}) = ArrayOperator{1}(transpose(t.data)) """ Addition/Subtraction of DualVectors. diff --git a/src/indexing.jl b/src/indexing.jl index a1b53cd..7c38e39 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -8,6 +8,7 @@ sparse_getindex(a...) = layout_getindex(a...) # TODO: should we move these? sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) +sparse_getindex(d::DualArray, args...) = d[args...] # We need a system of indexing that takes two tuples # of length N and M. We must then return a ArrayOperator whose new input and output dimensions diff --git a/src/types.jl b/src/types.jl index 4110846..ebf4f08 100644 --- a/src/types.jl +++ b/src/types.jl @@ -36,6 +36,7 @@ _convert_array(::Type{T}, a::AbstractArray{T}) where {T} = a _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) _convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) + # Basic array interface for op in (:size, :axes, :iterate) @eval begin @@ -49,7 +50,6 @@ eltype(t::ArrayOperator) = eltype(t.data) Base.Broadcast.broadcastable(t::ArrayOperator) = t -transpose(t::ArrayOperator) = transpose(t.data) sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) ==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data @@ -82,7 +82,7 @@ _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) # copy ensures that arithmetic involving a Tensor returns a Tensor -function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} # We create a Broadcasted of the underlying arrays and create a Tensor containing # the evaluated broadcast. We check if Base.broadcasted is a Broadcasted # or is overriden such as with DualArrays @@ -129,6 +129,8 @@ function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} Dual(convert(T2, value), _convert_array(T2, partials).data) end +Dual(value::T, partials::ArrayOperator{L, T}) where {L, T} = Dual(value, partials.data) + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} @@ -175,6 +177,9 @@ end const DualVector = DualArray{T, 1} where {T} const DualMatrix = DualArray{T, 2} where {T} +_convert_array(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(_convert_array(T, a.value), _convert_array(T, a.jacobian)) +_convert_array(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(_convert_array(T, a.value), _convert_array(T, a.jacobian)) + DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) # Basic equality for Dual numbers diff --git a/test/runtests.jl b/test/runtests.jl index 15ec635..5319ef2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices, FillArrays using DualArrays: ArrayOperator @testset "DualArrays" begin @@ -50,8 +50,6 @@ using DualArrays: ArrayOperator @test m[1, :] isa DualVector @test m[1, :] == DualVector([1, 2, 3], ones(3, 3)) @test m[:, 1] == DualVector([1, 4, 7], ones(3, 3)) - - @test m[1:2, 1:2] == DualMatrix([1 2; 4 5], ones(2,2,3)) end @testset "Arithmetic (DualVector)" begin From cf3c93068cd021bf9af4a97be02d54a913526b68 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 5 May 2026 12:08:09 +0100 Subject: [PATCH 21/28] rethink broadcasting --- src/types.jl | 57 ++++++++++++++++++++++++++++--------- test/array_operator_test.jl | 4 ++- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/types.jl b/src/types.jl index 82330bf..962804d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -58,19 +58,48 @@ isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.dat isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) -# Below we define a broadcast style for ArrayOperators and override copy and copyto! -# This allows all arithmetic/broadcasting with ArrayOperators to be handled by the -# underlying logic of the array contained in the struct, while ensuring that -# all results stay as a ArrayOperator. - -struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end - -# N is the input dimension of the tensor being broadcasted. -# For he result of the broadcast we will choose to preserve the highest input dimension. -Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::Broadcast.DefaultArrayStyle{M}, ::ArrayOperatorBroadcastStyle{N}) where {N, M} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() +# -------------------- +# Broadcasting with ArrayOperators +# -------------------- +# +# We want broadcasting on ArrayOperators to behave +# as it would on the underlying array, but preserving the +# ArrayOperator{N, M, T, L} type with the correct type parameters. +# T and L are preserved or inferred from type promotion. +# When the highest order is an ArrayOperator, N and M of that +# ArrayOperator are preserved. We do not support binary broadcasts +# for cases where: +# +# 1. The highest order is not an ArrayOperator +# 2. We have an (N, M) ArrayOperator and an (N2, M2) ArrayOperator +# with N + M = N2 + M2 but (N, M) != (N2, M2). +# +# As inferring N and M of the resulting ArrayOperator is ambiguous. + +# We define a custom broadcast style. +# We inherit from the L-array broadcast style and require output dimension N +# as extra information. +struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end + +Base.BroadcastStyle(::Type{<:ArrayOperator{L, <:Any, N, <:Any}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() +function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} + # Julia optimises these checks at compile time. + if L >= M + ArrayOperatorBroadcastStyle{L, N}() + else + throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) + end +end +Base.BroadcastStyle(a::Broadcast.DefaultArrayStyle{M}, b::ArrayOperatorBroadcastStyle{L, N}) where {L, N, M} = Base.BroadcastStyle(b, a) +function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L1, N1},::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2} + if L1 > L2 + ArrayOperatorBroadcastStyle{L1, N1}() + elseif L2 > L1 + ArrayOperatorBroadcastStyle{L2, N2}() + else + throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) + end +end # Helper functions to help define broadcasting/arithmetic with ArrayOperators. # By converting a broadcast involving ArrayOperators into a broadcast @@ -89,7 +118,7 @@ function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) wh end # copyto adds support for .= -function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} +function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 9857e44..76ddf94 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -33,7 +33,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) - @test t1 .+ ones(3, 3) == [2 2 2;3 3 3; 4 4 4] + @test_throws ArgumentError t1 .+ ones(3, 3) @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) @@ -55,5 +55,7 @@ using DualArrays: ArrayOperator @test 2 * t == [2 4 6;8 10 12;14 16 18] @test s - t == ones(3, 3) + + @test_throws ArgumentError ArrayOperator{1}(ones(3,3)) .* ArrayOperator{0}(ones(3,3)) end end \ No newline at end of file From 75b0953e8ef9d903ff9590e1a099e662bb99b57c Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 5 May 2026 12:26:47 +0100 Subject: [PATCH 22/28] fix parameter ordering --- src/arithmetic.jl | 10 +++++----- src/indexing.jl | 6 +++--- src/types.jl | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 1effd91..323bc03 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -135,16 +135,16 @@ function _contract(x, y, A, B, C) return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) end -function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C} +function *(x::ArrayOperator{A, B, <:Any, <:Any}, y::ArrayOperator{B, C, <:Any, <:Any}) where {A, B, C} return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) end -function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} +function *(x::ArrayOperator{A, B, <:Any, <:Any}, y::AbstractArray{<:Any, L}) where {A, B, L} C = L - B return ArrayOperator{A}(_contract(x.data, y, A, B, C)) end -function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C} +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{B, C, <:Any, <:Any}) where {L, B, C} A = L - B return ArrayOperator{A}(_contract(x, y.data, A, B, C)) end @@ -154,8 +154,8 @@ Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y Base.:-(x::ArrayOperator) = (-).(x) -Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} = - ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data) +Base.:*(a::Number, t::ArrayOperator{N, M, T, L}) where {N, M, T, L} = + ArrayOperator{N, M, promote_type(typeof(a), T), L}(a .* t.data) Base.:*(t::ArrayOperator, a::Number) = a * t diff --git a/src/indexing.jl b/src/indexing.jl index 189b0aa..e6c9d56 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -27,14 +27,14 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) -function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} +function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) end -function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} +function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, ::Colon) where {N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) @@ -43,7 +43,7 @@ end # Integer indexing always returns a scalar. getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualArray. diff --git a/src/types.jl b/src/types.jl index 962804d..bd97444 100644 --- a/src/types.jl +++ b/src/types.jl @@ -22,18 +22,18 @@ a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct ArrayOperator{L, T, N, M} +struct ArrayOperator{N, M, T, L} data::AbstractArray{T, L} end # Constructor to wrap an array with a tensor, given a contraction rule represented by N function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} - ArrayOperator{L, T, N, L - N}(data) + ArrayOperator{N, L - N, T, L}(data) end # Helper convert function _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) -_convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) +_convert_array(::Type{T}, t::ArrayOperator{N, M, S, L}) where {T, N, M, S, L} = ArrayOperator{N, M, T, L}(_convert_array(T, t.data)) # Basic array interface for op in (:size, :axes, :iterate) @@ -81,7 +81,7 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # as extra information. struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end -Base.BroadcastStyle(::Type{<:ArrayOperator{L, <:Any, N, <:Any}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() +Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} # Julia optimises these checks at compile time. if L >= M @@ -110,7 +110,7 @@ _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) # copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator -function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing # the evaluated broadcast databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) @@ -146,7 +146,7 @@ function Dual(value::T, partials::AbstractArray{S}) where {S, T} Dual(convert(T2, value), _convert_array(T2, partials)) end -function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} +function Dual(value::T, partials::ArrayOperator{0, M, S, L}) where {L, S, M, T} T2 = promote_type(T, S) Dual(convert(T2, value), _convert_array(T2, partials).data) end @@ -169,11 +169,11 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{L, T, N, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractVector{Dual{T}} value::A jacobian::J - function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{L, T, N, M} where {L, M})} + function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end From 62e4f433f21c54eab064ee179ff3ea3ab7a43f50 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 5 May 2026 12:35:54 +0100 Subject: [PATCH 23/28] fix equality --- src/types.jl | 9 +++------ test/array_operator_test.jl | 14 +++++++------- test/broadcast_test.jl | 18 +++++++++--------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/types.jl b/src/types.jl index bd97444..32ae8ea 100644 --- a/src/types.jl +++ b/src/types.jl @@ -51,12 +51,9 @@ Base.Broadcast.broadcastable(t::ArrayOperator) = t transpose(t::ArrayOperator) = transpose(t.data) sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) -==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data -==(a::ArrayOperator, b::AbstractArray) = a.data == b -==(a::AbstractArray, b::ArrayOperator) = a == b.data -isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) -isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) -isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) +# Equality is only defined for two ArrayOperators of the same (N, M). +==(a::ArrayOperator{N, M}, b::ArrayOperator{N, M}) where {N, M} = a.data == b.data +isapprox(a::ArrayOperator{N, M}, b::ArrayOperator{N, M}; kwargs...) where {N, M} = isapprox(a.data, b.data; kwargs...) # -------------------- # Broadcasting with ArrayOperators diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 76ddf94..4ae5e30 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -5,10 +5,10 @@ using DualArrays: ArrayOperator t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) @testset "Equality" begin - @test t == [1 2 3;4 5 6;7 8 9] - @test [1 2 3;4 5 6;7 8 9] == t - @test isapprox(t, [1 2 3;4 5 6;7 8 9]) - @test isapprox([1 2 3;4 5 6;7 8 9], t) + @test t == ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + @test isapprox(t, ArrayOperator{1}([1 2 3;4 5 6;7 8 9])) + @test t != [1 2 3;4 5 6;7 8 9] + @test t != ArrayOperator{0}([1 2 3;4 5 6;7 8 9]) end @testset "Indexing" begin @test t[1,1] == 1 @@ -51,10 +51,10 @@ using DualArrays: ArrayOperator @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) - @test t * 2 == [2 4 6;8 10 12;14 16 18] - @test 2 * t == [2 4 6;8 10 12;14 16 18] + @test t * 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + @test 2 * t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) - @test s - t == ones(3, 3) + @test s - t == ArrayOperator{1}(ones(3, 3)) @test_throws ArgumentError ArrayOperator{1}(ones(3,3)) .* ArrayOperator{0}(ones(3,3)) end diff --git a/test/broadcast_test.jl b/test/broadcast_test.jl index f87d9f4..e368e08 100644 --- a/test/broadcast_test.jl +++ b/test/broadcast_test.jl @@ -22,11 +22,11 @@ using DualArrays, Test, SparseArrays, LinearAlgebra @test l.value ≈ log.([1.0, 2.0, 3.0]) @test a.value ≈ tanh.([1.0, 2.0, 3.0]) - @test s.jacobian ≈ Diagonal(cos.([1.0, 2.0, 3.0])) - @test c.jacobian ≈ Diagonal(-sin.([1.0, 2.0, 3.0])) - @test e.jacobian ≈ Diagonal(exp.([1.0, 2.0, 3.0])) - @test l.jacobian ≈ Diagonal(1.0 ./ [1.0, 2.0, 3.0]) - @test a.jacobian ≈ Diagonal(1.0 .- tanh.([1.0, 2.0, 3.0]).^2) + @test s.jacobian.data ≈ Diagonal(cos.([1.0, 2.0, 3.0])) + @test c.jacobian.data ≈ Diagonal(-sin.([1.0, 2.0, 3.0])) + @test e.jacobian.data ≈ Diagonal(exp.([1.0, 2.0, 3.0])) + @test l.jacobian.data ≈ Diagonal(1.0 ./ [1.0, 2.0, 3.0]) + @test a.jacobian.data ≈ Diagonal(1.0 .- tanh.([1.0, 2.0, 3.0]).^2) # Test broadcasting with binary operations a = d .+ 2.0 @@ -67,8 +67,8 @@ using DualArrays, Test, SparseArrays, LinearAlgebra @test m.value ≈ [2.0, 4.0, 6.0] @test div.value ≈ [0.5, 1.0, 1.5] - @test a.jacobian ≈ [2.0 2.0 3.0; 1.0 3.0 3.0; 1.0 2.0 4.0] - @test s.jacobian ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0] - @test m.jacobian ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0] - @test div.jacobian ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75] + @test a.jacobian.data ≈ [2.0 2.0 3.0; 1.0 3.0 3.0; 1.0 2.0 4.0] + @test s.jacobian.data ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0] + @test m.jacobian.data ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0] + @test div.jacobian.data ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75] end \ No newline at end of file From 0178c078d166c352396ff2a40a03334ec5c523f2 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 7 May 2026 22:43:23 +0100 Subject: [PATCH 24/28] fixes --- examples/nestedduals.jl | 4 ++-- src/arithmetic.jl | 6 +++--- src/types.jl | 2 +- src/utilities.jl | 4 +++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl index 21e8523..8aa224f 100644 --- a/examples/nestedduals.jl +++ b/examples/nestedduals.jl @@ -63,9 +63,9 @@ value = result.value.value # equivalent to b gradient = result.value.partials # equivalent to c -gradient2 = result.partials.data.value +gradient2 = result.partials.value # equivalent to d -hessian = result.partials.data.jacobian +hessian = result.partials.jacobian.data # We expect this to be 6 print("Value: ", value, "\n") diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 46e685e..cd4670e 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -1,10 +1,10 @@ # Arithmetic operations for DualArrays.jl # Tensor transpose -transpose(t::ArrayOperator{<:Any, <:Any, <:N, <:Any}) where {N} = ArrayOperator{N}(transpose(t.data)) +transpose(t::ArrayOperator{N, M}) where {N, M} = ArrayOperator{M}(transpose(t.data)) # Special case: Transpose of a vector becomes a matrix -# TODO: Technically a row vector is a Tensor{1, T, 1, 0} -transpose(t::ArrayOperator{1, <:Any, 0, <:Any}) = ArrayOperator{1}(transpose(t.data)) +# TODO: Technically a row vector is a Tensor{0, 1, T, 1} +transpose(t::ArrayOperator{1, 0, <:Any, <:Any}) = ArrayOperator{1}(transpose(t.data)) """ Addition/Subtraction of DualVectors. diff --git a/src/types.jl b/src/types.jl index e3e0190..cb4324c 100644 --- a/src/types.jl +++ b/src/types.jl @@ -178,7 +178,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractArray{Dual{T}} +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractArray{Dual{T}, N} value::A jacobian::J diff --git a/src/utilities.jl b/src/utilities.jl index 726af37..8c19492 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -50,7 +50,9 @@ end Custom display method for DualVectors. """ show(io::IO, ::MIME"text/plain", x::DualArray) = - (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) + (show(io, x.value); print(io, " + "); show(io, x.jacobian); print(io, "𝛜")) + +show(io::IO, x::DualArray) = show(io, MIME"text/plain"(), x) """ Utility function to compute the jacobian of a function `f` at point `x`. From 513469ff09817fae784318039daa3eb5b44cadba Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 7 May 2026 23:19:06 +0100 Subject: [PATCH 25/28] improve coverage --- src/arithmetic.jl | 3 --- src/indexing.jl | 6 ------ src/utilities.jl | 4 +--- test/array_operator_test.jl | 5 +++++ test/runtests.jl | 16 ++++++++++++++++ 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/arithmetic.jl b/src/arithmetic.jl index cd4670e..1358c44 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -2,9 +2,6 @@ # Tensor transpose transpose(t::ArrayOperator{N, M}) where {N, M} = ArrayOperator{M}(transpose(t.data)) -# Special case: Transpose of a vector becomes a matrix -# TODO: Technically a row vector is a Tensor{0, 1, T, 1} -transpose(t::ArrayOperator{1, 0, <:Any, <:Any}) = ArrayOperator{1}(transpose(t.data)) """ Addition/Subtraction of DualVectors. diff --git a/src/indexing.jl b/src/indexing.jl index ad0a916..9662149 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -82,12 +82,6 @@ function getindex(x::DualMatrix, y::Vararg{Union{Colon, UnitRange}}) DualMatrix(newval, newjac) end -# Copyto! (For broadcast assignments) -function Base.copyto!(dest::DualVector, src::DualVector) - copyto!(dest.value, src.value) - copyto!(dest.jacobian.data, src.jacobian.data) - return dest -end # DualArray array interface for op in (:size, :axes) @eval $op(a::DualArray) = $op(a.value) diff --git a/src/utilities.jl b/src/utilities.jl index 8c19492..bea6ee4 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -14,13 +14,11 @@ _jacobian(d::DualVector) = d.jacobian.data _jacobian(d::DualVector, ::Int) = d.jacobian.data _jacobian(x::Number, N::Int) = Zeros(typeof(x), 1, N) -_value(v::AbstractVector) = v _value(d::DualVector) = d.value -_value(d::Dual) = d.value _value(x::Number) = x _size(x::Real) = 1 -_size(x::DualVector) = length(x.value) +_size(x::DualVector) = size(x.jacobian.data, 2) """ Vertically concatenate Dual numbers and DualVectors. diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 4ae5e30..6bc9f1c 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -58,4 +58,9 @@ using DualArrays: ArrayOperator @test_throws ArgumentError ArrayOperator{1}(ones(3,3)) .* ArrayOperator{0}(ones(3,3)) end + + @testset "Transpose" begin + @test transpose(t) == ArrayOperator{1}([1 4 7;2 5 8;3 6 9]) + @test transpose(ArrayOperator{0}([1, 2, 3])) == ArrayOperator{1}([1 2 3]) + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 5319ef2..8ac5fcc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -41,6 +41,11 @@ using DualArrays: ArrayOperator @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end + @testset "Sparse Indexing" begin + d = DualVector([1,2,3], I(3)) + @test d[1].partials isa OneElement + @test d[1].partials == OneElement(1.0, 1, 3) + end @testset "Indexing (Matrix)" begin m = DualMatrix([1 2 3;4 5 6;7 8 9], ones(3,3,3)) @@ -50,6 +55,8 @@ using DualArrays: ArrayOperator @test m[1, :] isa DualVector @test m[1, :] == DualVector([1, 2, 3], ones(3, 3)) @test m[:, 1] == DualVector([1, 4, 7], ones(3, 3)) + + @test m[1:2, 1:2] == DualMatrix([1 2;4 5], ones(2, 2, 3)) end @testset "Arithmetic (DualVector)" begin @@ -65,6 +72,9 @@ using DualArrays: ArrayOperator @test sum(x .* y) isa Dual @test sum(x .* y) == Dual(5,[16,23,30]) + + @test y .* [1,2] == DualVector([2, 6], [4 5 6;14 16 18]) + @test [1,2] .* y == DualVector([2, 6], [4 5 6;14 16 18]) end @testset "Arithmetic (Dual)" begin @@ -83,6 +93,9 @@ using DualArrays: ArrayOperator @test sin(a) == Dual(sin(2), cos(2) * [1, 2, 3]) @test cos(b) == Dual(cos(3), -sin(3) * [4, 5, 6]) + + @test a .* [1, 2] == DualVector([2, 4], [1 2 3; 2 4 6]) + @test [1, 2] .* a == DualVector([2, 4], [1 2 3; 2 4 6]) end @testset "Dot product" begin @@ -130,6 +143,9 @@ using DualArrays: ArrayOperator d2 = DualVector(d, d1) @test d2 isa DualVector + + @test d2[1].value == Dual(2, [1, 0]) + @test d2[1].partials == DualVector([1.0, 0], zeros(2, 2)) end include("broadcast_test.jl") From 165bb3db834c65b7008a0a010ee935cbf5d9b049 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 7 May 2026 23:34:17 +0100 Subject: [PATCH 26/28] fix coverage --- src/types.jl | 2 -- test/array_operator_test.jl | 10 +++++++++- test/runtests.jl | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/types.jl b/src/types.jl index cb4324c..0edc889 100644 --- a/src/types.jl +++ b/src/types.jl @@ -158,8 +158,6 @@ end # Lets us declare duals with a column vector as well as a row vector. Dual(value::T, partials::ArrayOperator{N, 0, S, L}) where {L, S, N, T} = Dual(value, ArrayOperator{0}(partials.data)) -Dual(value::T, partials::ArrayOperator{L, T}) where {L, T} = Dual(value, partials.data) - """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 6bc9f1c..201526b 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -1,5 +1,6 @@ using Test, LinearAlgebra using DualArrays: ArrayOperator +using FillArrays @testset "ArrayOperator" begin t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) @@ -46,7 +47,6 @@ using DualArrays: ArrayOperator @test result isa ArrayOperator @test result.data == s.data @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) - @test t * t == ArrayOperator{1}([30 36 42;66 81 96; 102 126 150]) @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) @@ -63,4 +63,12 @@ using DualArrays: ArrayOperator @test transpose(t) == ArrayOperator{1}([1 4 7;2 5 8;3 6 9]) @test transpose(ArrayOperator{0}([1, 2, 3])) == ArrayOperator{1}([1 2 3]) end + + @testset "Arithmetic with nonstandard array" begin + o = OneElement(1, 1, 3) + a = ArrayOperator{1}(o) + b = ArrayOperator{1}(zeros(3)) + b .= a .* 3 + @test b == ArrayOperator{1}(OneElement(3, 1, 3)) + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 8ac5fcc..8bb6a8c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -133,6 +133,7 @@ using DualArrays: ArrayOperator @testset "show" begin d = DualVector([1.0, 2.0], [1 0; 0 1]) s = repr(MIME"text/plain"(), d) + @test repr(d) == s @test occursin(" + ", s) @test endswith(s, "𝛜") end From 05eb51a2e4e206ef71fd64ee4113ce3b28e4af39 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:41:27 +0100 Subject: [PATCH 27/28] Update indexing.jl --- src/indexing.jl | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 888dda0..0ceb7fc 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -10,44 +10,6 @@ sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, siz sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) sparse_getindex(d::DualArray, args...) = d[args...] -# We need a system of indexing that takes two tuples -# of length N and M. We must then return a ArrayOperator whose new input and output dimensions -# are inferred from how many of the arguments in each respective tuple are integers. - -# For the purposes of DualArrays.jl, we only need to index the input dimensions -# -# Example: -# -# Consider a DualVector with standard matrix Jacobian (ArrayOperator{2, T, 1, 1}): -# -# d = a + Bϵ -# -# If we index d[1], we have: -# -# d[1] = a[1] + B[1, :]ϵ -# -# Where B[1, :] indexes the input dimension of B -Idx = Union{Int, Colon, AbstractUnitRange} -count_ints(t::Tuple) = count(x -> x isa Int, t) - -function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {N, M} - # new value of M is inferred from the ArrayOperator constructor and the order of - # indexing the underlying array. - newN = N - count_ints(i) - ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) -end - -function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, ::Colon) where {N, M} - # new value of M is inferred from the ArrayOperator constructor and the order of - # indexing the underlying array. - newN = N - count_ints(i) - ArrayOperator{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) -end -# Integer indexing always returns a scalar. -getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) -# Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) - # We need a system of indexing that takes two tuples # of length N and M. We must then return a ArrayOperator whose new input and output dimensions From 97f5005759100867b9dcfffbab484317a06c16c9 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:57:12 +0100 Subject: [PATCH 28/28] improve constructors and test example --- examples/nestedduals.jl | 2 +- src/DualArrays.jl | 2 +- src/types.jl | 33 ++++++++++++++++++++++++++++----- test/runtests.jl | 6 +++++- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/examples/nestedduals.jl b/examples/nestedduals.jl index 8aa224f..51af36d 100644 --- a/examples/nestedduals.jl +++ b/examples/nestedduals.jl @@ -42,9 +42,9 @@ # # We now implement an example using f(x) = x[1] * x[2] below. +using DualArrays f(x) = x[1] * x[2] -using DualArrays # a + Bϵ x = DualVector([2, 3], [1 0; 0 1]) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index b639121..a5d2230 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -11,7 +11,7 @@ Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, DualMatrix, Dual, jacobian +export DualVector, DualMatrix, Dual, jacobian, ArrayOperator import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims diff --git a/src/types.jl b/src/types.jl index 24789b9..fb516a0 100644 --- a/src/types.jl +++ b/src/types.jl @@ -48,7 +48,6 @@ eltype(::Type{<:ArrayOperator{N, M, T}}) where {N,M,T} = T Base.Broadcast.broadcastable(t::ArrayOperator) = t -transpose(t::ArrayOperator) = transpose(t.data) sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) # Equality is only defined for two ArrayOperators of the same (N, M). @@ -182,7 +181,7 @@ struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} value::A jacobian::J - function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} + function DualArray{T,N,A,J}(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end @@ -190,12 +189,17 @@ struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} end end +DualArray{T,N}(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} = + DualArray{T,N,A,J}(value, jacobian) + +DualArray{T,N}(value, jacobian) where {T, N} = DualArray{T,N}(elconvert(T, value), elconvert(T, jacobian)) + """ Constructor that forces type compatibility """ function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualArray(elconvert(T, value), elconvert(T, jacobian)) + DualArray{T}(value, jacobian) end # Helper function to define DualArrays with AbstractArray jacobians @@ -206,11 +210,30 @@ end const DualVector = DualArray{T, 1} where {T} const DualMatrix = DualArray{T, 2} where {T} +function DualVector(value::AbstractArray, jacobian::ArrayOperator) + T = promote_type(eltype(value), eltype(jacobian)) + DualVector{T}(value, jacobian) +end + +function DualMatrix(value::AbstractArray, jacobian::ArrayOperator) + T = promote_type(eltype(value), eltype(jacobian)) + DualMatrix{T}(value, jacobian) +end + + + +function DualVector(value::AbstractVector{S}, jacobian::AbstractArray{T, N}) where {S, T, N} + DualVector(value, ArrayOperator{1}(jacobian)) +end + +function DualMatrix(value::AbstractMatrix{S}, jacobian::AbstractArray{T, N}) where {S, T, N} + DualMatrix(value, ArrayOperator{2}(jacobian)) +end + + elconvert(::Type{Dual{T}}, a::DualVector) where {T} = DualVector(elconvert(T, a.value), elconvert(T, a.jacobian)) elconvert(::Type{Dual{T}}, a::DualMatrix) where {T} = DualMatrix(elconvert(T, a.value), elconvert(T, a.jacobian)) -DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) -DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) diff --git a/test/runtests.jl b/test/runtests.jl index 8bb6a8c..ab24156 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -151,4 +151,8 @@ using DualArrays: ArrayOperator include("broadcast_test.jl") include("array_operator_test.jl") -end \ No newline at end of file +end + + +### test examples +include("../examples/nestedduals.jl") \ No newline at end of file