From 7e509e011431ab6a81dd26a4bd34879d34bccd9f Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Fri, 10 Apr 2026 14:31:30 +0100 Subject: [PATCH 01/21] initial commit --- src/indexing.jl | 14 ++++++++++++ src/types.jl | 56 +++++++++++++++++++++++++++++++++++++++++++++--- test/runtests.jl | 1 + 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 7ff7f53..e64a7c0 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -6,6 +6,20 @@ sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) +""" +For the purposes of DualArrays.jl, we need to be able to index on the input indices +(corresponding to the the primal values) only. This way, when we index, we still get the +same 'arrangement' of dual parts. Example in the DualVector case (where the Jacobian is +a Tensor{2, Float64, 1, 1} acting on a vector ϵ): + +d = a + Jϵ +d[1] = a[1] + J[1,:]ϵ + +J[1, :] is a row vector so it can still be applied to our ϵ. +""" +getindex(t::Tensor{L, T, N, M}, i::NTuple{N, Int}, ::Colon) where {L, T, N, M} = sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...) +getindex(t::Tensor, i...) = t.data[i...] + """ Extract a single Dual number from a DualVector at position y. """ diff --git a/src/types.jl b/src/types.jl index 5367511..c8c3104 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,5 +1,45 @@ # Core type definitions for DualArrays.jl +""" +This type represents a Tensor along with its contraction rule. +A tensor with its contraction rule represents a linear map from an N-array to an M-array, +with 0-arrays considered scalars, 1-arrays vectors, etc. + +We have: +-L is the dimensionality of the tensor +-T is the element type of the tensor +-N is the dimensionality of the input array/number of lower indices +-M is the dimensionality of the output array/number of upper indices + +We enforce L = N + M. + +In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as + +a + Jϵ + +Where a is an M-array of real numbers, J is an N+M=tensor and ϵ is an N-array of dual parts. +In the simplest case, where M = 0, we have a Dual number with dual parts arranged in an N-array. +""" +struct Tensor{L, T, N, M} <: AbstractArray{T, L} + data::AbstractArray{T, L} +end + +function Tensor{N,M}(data::AbstractArray{T, L}) where {L, T, N, M} + if L != N + M + throw(ArgumentError("Tensor order must be equivalent to N + M.")) + end + Tensor{L, T, N, M}(data) +end + +Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = + Tensor{N, M}(convert.(T, tensor.data)) + +for op in (:size, :axes) + @eval begin + ($op)(t::Tensor) = ($op)(t.data) + end +end + """ Dual{T, Partials <: AbstractVector{T}} <: Real @@ -7,13 +47,23 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a vector +- `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: AbstractVector{T}} <: Real +struct Dual{T, Partials <: (Tensor{L, T, 0, M} where {L, M})} <: Real value::T partials::Partials end +function Dual(value::T, partials::Tensor{L, S, 0, M}) where {S, T, L, M} + T2 = promote_type(T, S) + Dual{T2, Tensor{L, T2, 0, M}}(convert(T2, value), convert(Tensor{L, T2, 0, M}, partials)) +end + +# Helper function to define Duals from an AbstractArray +function Dual(value, partials::AbstractArray{T, N}) where {T, N} + Dual(value, Tensor{0, N}(partials)) +end + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} @@ -32,7 +82,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},M <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V jacobian::M diff --git a/test/runtests.jl b/test/runtests.jl index 31da00c..7fd2ac4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,7 @@ using DualArrays: Dual @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) + @test Dual(1.0, [1, 2, 3]).partials == Tensor{0, 1}([1.0, 2.0, 3.0]) end @testset "Indexing" begin From a91f7aba9fe20fd385722e50735ef00a38972089 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 13:02:04 +0100 Subject: [PATCH 02/21] fixes and add tests --- src/indexing.jl | 21 +++++++----- src/types.jl | 84 ++++++++++++++++++++++++++++++++++----------- src/utilities.jl | 2 +- test/runtests.jl | 7 ++-- test/tensor_test.jl | 29 ++++++++++++++++ 5 files changed, 111 insertions(+), 32 deletions(-) create mode 100644 test/tensor_test.jl diff --git a/src/indexing.jl b/src/indexing.jl index e64a7c0..3cae792 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -16,32 +16,37 @@ d = a + Jϵ d[1] = a[1] + J[1,:]ϵ J[1, :] is a row vector so it can still be applied to our ϵ. + +Note that a slice of a tensor done in this way will reduce the input dimension to 0. """ -getindex(t::Tensor{L, T, N, M}, i::NTuple{N, Int}, ::Colon) where {L, T, N, M} = sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...) -getindex(t::Tensor, i...) = t.data[i...] +getindex(t::Tensor{<:Any, <:Any, N, M}, i::NTuple{N, Int}, ::Colon) where {N, M} = Tensor{0}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +# Integer indexing always returns a scalar. +getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) +# Indexing with only slices/ranges returns a similar tensor +getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. """ -function Base.getindex(x::DualVector, y::Int) - Dual(x.value[y], sparse_getindex(x.jacobian, y, :)) +function getindex(x::DualVector, y::Int) + Dual(x.value[y], x.jacobian[(y,), :]) end """ Extract a sub-DualVector from a DualVector using a range. """ -function Base.getindex(x::DualVector, y::UnitRange) +function getindex(x::DualVector, y::UnitRange) newval = x.value[y] - newjac = sparse_getindex(x.jacobian, y, :) + newjac = x.jacobian[y, :] DualVector(newval, newjac) end """ Return the size of the DualVector (length of the value vector). """ -Base.size(x::DualVector) = (length(x.value),) +size(x::DualVector) = (length(x.value),) """ Return the axes of the DualVector. """ -Base.axes(x::DualVector) = axes(x.value) \ No newline at end of file +axes(x::DualVector) = axes(x.value) \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index c8c3104..2f22fc0 100644 --- a/src/types.jl +++ b/src/types.jl @@ -11,7 +11,7 @@ We have: -N is the dimensionality of the input array/number of lower indices -M is the dimensionality of the output array/number of upper indices -We enforce L = N + M. +We enforce L = N + M by inferring M in the constructor. In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as @@ -24,22 +24,64 @@ struct Tensor{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} end -function Tensor{N,M}(data::AbstractArray{T, L}) where {L, T, N, M} - if L != N + M - throw(ArgumentError("Tensor order must be equivalent to N + M.")) - end - Tensor{L, T, N, M}(data) +# Constructor to wrap an array with a tensor, given a contraction rule represented by N +function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} + Tensor{L, T, N, L - N}(data) end +# Helper convert function +_convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) +_convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) + Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = - Tensor{N, M}(convert.(T, tensor.data)) + Tensor{L, T, N, M}(_convert_array(T, tensor.data)) +# Basic array interface for op in (:size, :axes) @eval begin ($op)(t::Tensor) = ($op)(t.data) + ($op)(t::Tensor, i...) = ($op)(t.data, i...) end end +# Below we define a broadcast style for Tensors and override copy and copyto! +# This allows all arithmetic/broadcasting with Tensors to be handled by the +# underlying logic of the array contained in the struct, while ensuring that +# all results stay as a Tensor. + +struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() + +# N is the input dimension of the tensor being broadcasted. +# For he result of the broadcast we will choose to preserve the highest input dimension. +Base.BroadcastStyle(::Type{<:Tensor{<:Any, <:Any, N, <:Any}}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::TensorBroadcastStyle{M}) where {N, M} = TensorBroadcastStyle{max(N, M)}() + +# Helper functions to help define broadcasting/arithmetic with Tensors. +# By converting a broadcast involving Tensors into a broadcast +# involving the underlying arrays. +_unwrap(t::Tensor) = t.data +_unwrap(x) = x +_unwrap_args(args::Tuple) = map(_unwrap, args) + +# copy ensures that arithmetic involving a Tensor returns a Tensor +function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a Tensor containing + # the evaluated broadcast + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + Tensor{N}(copy(Broadcast.flatten(databroadcast))) +end + +# copyto adds support for .= +function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) + # As above + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + copyto!(dest.data, Broadcast.flatten(databroadcast)) + dest +end + """ Dual{T, Partials <: AbstractVector{T}} <: Real @@ -61,7 +103,7 @@ end # Helper function to define Duals from an AbstractArray function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, Tensor{0, N}(partials)) + Dual(value, Tensor{0}(partials)) end """ @@ -82,27 +124,29 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},J <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V - jacobian::M - - function DualVector(value::V, jacobian::M) where {T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} - if size(jacobian, 1) != length(value) - x, y = length(value), size(jacobian, 1) - throw(ArgumentError("vector length must match number of rows in jacobian.\n" * - "vector length: $x\n" * - "no. of jacobian rows: $y")) + jacobian::J + + function DualVector(value::V, jacobian::J) where {T, V <: AbstractVector{T}, J <: (Tensor{L, T, 1, M} where {L, M})} + if length(value) != size(jacobian, 1) + throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T,V, M}(value, jacobian) + new{T, V, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) +function DualVector(value::AbstractVector, jacobian::Tensor) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) + DualVector(_convert_array(T, value), _convert_array(T, jacobian)) +end + +# Helper function to define DualVectors with AbstractArray jacobians +function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} + DualVector(value, Tensor{1}(jacobian)) end # Basic equality for Dual numbers diff --git a/src/utilities.jl b/src/utilities.jl index 080155f..e892b95 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -Base.show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualVector) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/runtests.jl b/test/runtests.jl index 7fd2ac4..68a1d81 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,11 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Dual +using DualArrays: Tensor @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == Tensor{0, 1}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) end @testset "Indexing" begin @@ -23,7 +23,7 @@ using DualArrays: Dual n = 10 v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @test v[2:end].jacobian.data isa BandedMatrix @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end @@ -90,4 +90,5 @@ using DualArrays: Dual end include("broadcast_test.jl") + include("tensor_test.jl") end \ No newline at end of file diff --git a/test/tensor_test.jl b/test/tensor_test.jl new file mode 100644 index 0000000..ad25701 --- /dev/null +++ b/test/tensor_test.jl @@ -0,0 +1,29 @@ +using Test, LinearAlgebra +using DualArrays: Tensor + +@testset "Tensor" begin + t = Tensor{1}([1 2 3;4 5 6;7 8 9]) + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), :] == Tensor{0}([1, 2, 3]) + @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) + end + + @testset "Arithmetic" begin + @test t + t isa Tensor + @test t + t == Tensor{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa Tensor + @test t .* 2 == Tensor{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == Tensor{1}([1 2 3;8 10 12;21 24 27]) + + t1 = Tensor{0}([1, 2, 3]) + t2 = Tensor{1}([1 2 3]) + t3 = Tensor{0}([2]) + + @test t1 .* t2 == Tensor{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) + end +end \ No newline at end of file From 875ef60a3d08a698927d05ee575446ed4a254f71 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 18:10:41 +0100 Subject: [PATCH 03/21] fix indexing --- src/indexing.jl | 32 ++++++++++++++++++++------------ test/tensor_test.jl | 7 ++++++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 3cae792..b41d2d3 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -6,20 +6,28 @@ sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) -""" -For the purposes of DualArrays.jl, we need to be able to index on the input indices -(corresponding to the the primal values) only. This way, when we index, we still get the -same 'arrangement' of dual parts. Example in the DualVector case (where the Jacobian is -a Tensor{2, Float64, 1, 1} acting on a vector ϵ): -d = a + Jϵ -d[1] = a[1] + J[1,:]ϵ +# We need a system of indexing that takes two tuples +# of length N and M. We must then return a Tensor whose new input and output dimensions +# are inferred from how many of the arguments in each respective tuple are integers. -J[1, :] is a row vector so it can still be applied to our ϵ. +# For the purposes of DualArrays.jl, we only need to index the input dimensions +Idx = Union{Int, Colon, AbstractUnitRange} +count_ints(t::Tuple) = count(x -> x isa Int, t) -Note that a slice of a tensor done in this way will reduce the input dimension to 0. -""" -getindex(t::Tensor{<:Any, <:Any, N, M}, i::NTuple{N, Int}, ::Colon) where {N, M} = Tensor{0}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the Tensor constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + Tensor{newN}(sparse_getindex(t.data, i..., j...)) +end + +function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the Tensor constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + Tensor{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +end # Integer indexing always returns a scalar. getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor @@ -29,7 +37,7 @@ getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) wher Extract a single Dual number from a DualVector at position y. """ function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), :]) + Dual(x.value[y], x.jacobian[(y,), (:,)]) end """ diff --git a/test/tensor_test.jl b/test/tensor_test.jl index ad25701..8ce7572 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -5,8 +5,13 @@ using DualArrays: Tensor t = Tensor{1}([1 2 3;4 5 6;7 8 9]) @testset "Indexing" begin @test t[1,1] == 1 - @test t[(1,), :] == Tensor{0}([1, 2, 3]) + @test t[(1,), (:,)] == Tensor{0}([1, 2, 3]) @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) + + s = Tensor{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == Tensor{0}([1, 1]) + @test s[(1,:), (:,)] == Tensor{1}([1 1;1 1]) + @test s[(1,2), :] == Tensor{0}([1, 1]) end @testset "Arithmetic" begin From 0b7bd6501b4fa74f3caead3fee78268cbfee4551 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:34:54 +0100 Subject: [PATCH 04/21] small corrections --- src/indexing.jl | 2 +- src/types.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index b41d2d3..c03a5de 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -37,7 +37,7 @@ getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) wher Extract a single Dual number from a DualVector at position y. """ function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), (:,)]) + Dual(x.value[y], x.jacobian[(y,), :]) end """ diff --git a/src/types.jl b/src/types.jl index 2f22fc0..a17656a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -2,7 +2,7 @@ """ This type represents a Tensor along with its contraction rule. -A tensor with its contraction rule represents a linear map from an N-array to an M-array, +A tensor with its contraction rule represents a linear map from an M-array to an N-array, with 0-arrays considered scalars, 1-arrays vectors, etc. We have: @@ -17,8 +17,8 @@ In the context of DualArrays.jl, a DualArray (currently only a vector) can be th a + Jϵ -Where a is an M-array of real numbers, J is an N+M=tensor and ϵ is an N-array of dual parts. -In the simplest case, where M = 0, we have a Dual number with dual parts arranged in an N-array. +Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. +In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ struct Tensor{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} From f8b03b25f8b3d8f18d0ef501f434df40c2de4f40 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:52:45 +0100 Subject: [PATCH 05/21] improve test coverage --- src/types.jl | 2 +- test/runtests.jl | 11 +++++++++++ test/tensor_test.jl | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index a17656a..e67cca5 100644 --- a/src/types.jl +++ b/src/types.jl @@ -75,7 +75,7 @@ function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} end # copyto adds support for .= -function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) +function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) diff --git a/test/runtests.jl b/test/runtests.jl index 68a1d81..aafd661 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,10 @@ using DualArrays: Tensor @testset "Indexing" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + + @test size(v) == (3,) + @test axes(v) == (Base.OneTo(3),) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) @@ -88,6 +92,13 @@ using DualArrays: Tensor @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) end + + @testset "show" begin + d = DualVector([1.0, 2.0], [1 0; 0 1]) + s = repr(MIME"text/plain"(), d) + @test occursin(" + ", s) + @test endswith(s, "𝛜") + end include("broadcast_test.jl") include("tensor_test.jl") diff --git a/test/tensor_test.jl b/test/tensor_test.jl index 8ce7572..70e1570 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -30,5 +30,13 @@ using DualArrays: Tensor @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) + + s = Tensor{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa Tensor + @test result.data == s.data + @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) end end \ No newline at end of file From 70097f6f78ca22dbb6044de4d109f022dc2ff38d Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:56:55 +0100 Subject: [PATCH 06/21] add indexing example --- src/indexing.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/indexing.jl b/src/indexing.jl index c03a5de..dc2dbf0 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -12,6 +12,18 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz # are inferred from how many of the arguments in each respective tuple are integers. # For the purposes of DualArrays.jl, we only need to index the input dimensions +# +# Example: +# +# Consider a DualVector with standard matrix Jacobian (Tensor{2, T, 1, 1}): +# +# d = a + Bϵ +# +# If we index d[1], we have: +# +# d[1] = a[1] + B[1, :]ϵ +# +# Where B[1, :] indexes the input dimension of B Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) From 1a17a05e3ce0e13e68ac3bf193348122258cc830 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 23 Apr 2026 22:01:35 +0100 Subject: [PATCH 07/21] rename to ArrayOperator --- src/indexing.jl | 20 +++++----- src/types.jl | 74 +++++++++++++++++++------------------ test/array_operator_test.jl | 42 +++++++++++++++++++++ test/runtests.jl | 6 +-- test/tensor_test.jl | 42 --------------------- 5 files changed, 93 insertions(+), 91 deletions(-) create mode 100644 test/array_operator_test.jl delete mode 100644 test/tensor_test.jl diff --git a/src/indexing.jl b/src/indexing.jl index dc2dbf0..bc78038 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -8,14 +8,14 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz # We need a system of indexing that takes two tuples -# of length N and M. We must then return a Tensor whose new input and output dimensions +# of length N and M. We must then return a ArrayOperator whose new input and output dimensions # are inferred from how many of the arguments in each respective tuple are integers. # For the purposes of DualArrays.jl, we only need to index the input dimensions # # Example: # -# Consider a DualVector with standard matrix Jacobian (Tensor{2, T, 1, 1}): +# Consider a DualVector with standard matrix Jacobian (ArrayOperator{2, T, 1, 1}): # # d = a + Bϵ # @@ -27,23 +27,23 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) -function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} - # new value of M is inferred from the Tensor constructor and the order of +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - Tensor{newN}(sparse_getindex(t.data, i..., j...)) + ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., j...)) end -function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} - # new value of M is inferred from the Tensor constructor and the order of +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - Tensor{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) + ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) end # Integer indexing always returns a scalar. -getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) +getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N, T, N, M}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. diff --git a/src/types.jl b/src/types.jl index e67cca5..e7577a7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,9 +1,11 @@ # Core type definitions for DualArrays.jl """ -This type represents a Tensor along with its contraction rule. -A tensor with its contraction rule represents a linear map from an M-array to an N-array, -with 0-arrays considered scalars, 1-arrays vectors, etc. +This type represents a linear map from an M-array to an N-array, with a N+M=L-dimensional +array as its underlying data. This can be thought of analogously to an L-tensor equipped +with a contraction pattern characterised by (N, M). Specifically, the tensor has dimensions +a₁ x a₂ x ... x a_N x b₁ x b₂ x ... x b_M and maps an M-array of shape (b₁, b₂, ..., b_M) +to an N-array of shape (a₁, a₂, ..., a_N) by contracting over the last M indices. We have: -L is the dimensionality of the tensor @@ -13,69 +15,69 @@ We have: We enforce L = N + M by inferring M in the constructor. -In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as +In the context of DualArrays.jl, a DualArray can be thought of as a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct Tensor{L, T, N, M} <: AbstractArray{T, L} +struct ArrayOperator{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} end # Constructor to wrap an array with a tensor, given a contraction rule represented by N -function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} - Tensor{L, T, N, L - N}(data) +function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} + ArrayOperator{L, T, N, L - N}(data) end # Helper convert function _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) -_convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) +_convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) -Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = - Tensor{L, T, N, M}(_convert_array(T, tensor.data)) +Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M}) where {L, T, N, M, S} = + ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) # Basic array interface for op in (:size, :axes) @eval begin - ($op)(t::Tensor) = ($op)(t.data) - ($op)(t::Tensor, i...) = ($op)(t.data, i...) + ($op)(t::ArrayOperator) = ($op)(t.data) + ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) end end -# Below we define a broadcast style for Tensors and override copy and copyto! -# This allows all arithmetic/broadcasting with Tensors to be handled by the +# Below we define a broadcast style for ArrayOperators and override copy and copyto! +# This allows all arithmetic/broadcasting with ArrayOperators to be handled by the # underlying logic of the array contained in the struct, while ensuring that -# all results stay as a Tensor. +# all results stay as a ArrayOperator. -struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end -TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. -Base.BroadcastStyle(::Type{<:Tensor{<:Any, <:Any, N, <:Any}}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::TensorBroadcastStyle{M}) where {N, M} = TensorBroadcastStyle{max(N, M)}() +Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() -# Helper functions to help define broadcasting/arithmetic with Tensors. -# By converting a broadcast involving Tensors into a broadcast +# Helper functions to help define broadcasting/arithmetic with ArrayOperators. +# By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. -_unwrap(t::Tensor) = t.data +_unwrap(t::ArrayOperator) = t.data _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) -# copy ensures that arithmetic involving a Tensor returns a Tensor -function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} - # We create a Broadcasted of the underlying arrays and create a Tensor containing +# copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing # the evaluated broadcast databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - Tensor{N}(copy(Broadcast.flatten(databroadcast))) + ArrayOperator{N}(copy(Broadcast.flatten(databroadcast))) end # copyto adds support for .= -function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} +function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) @@ -91,19 +93,19 @@ A dual number type that stores a value and its partials (derivatives). - `value::T`: The primal value - `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: (Tensor{L, T, 0, M} where {L, M})} <: Real +struct Dual{T, Partials <: (ArrayOperator{L, T, 0, M} where {L, M})} <: Real value::T partials::Partials end -function Dual(value::T, partials::Tensor{L, S, 0, M}) where {S, T, L, M} +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {S, T, L, M} T2 = promote_type(T, S) - Dual{T2, Tensor{L, T2, 0, M}}(convert(T2, value), convert(Tensor{L, T2, 0, M}, partials)) + Dual{T2, ArrayOperator{L, T2, 0, M}}(convert(T2, value), convert(ArrayOperator{L, T2, 0, M}, partials)) end # Helper function to define Duals from an AbstractArray function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, Tensor{0}(partials)) + Dual(value, ArrayOperator{0}(partials)) end """ @@ -124,7 +126,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},J <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},J <: (ArrayOperator{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V jacobian::J @@ -139,14 +141,14 @@ end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::Tensor) +function DualVector(value::AbstractVector, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) DualVector(_convert_array(T, value), _convert_array(T, jacobian)) end # Helper function to define DualVectors with AbstractArray jacobians function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} - DualVector(value, Tensor{1}(jacobian)) + DualVector(value, ArrayOperator{1}(jacobian)) end # Basic equality for Dual numbers diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl new file mode 100644 index 0000000..57e9969 --- /dev/null +++ b/test/array_operator_test.jl @@ -0,0 +1,42 @@ +using Test, LinearAlgebra +using DualArrays: ArrayOperator + +@testset "Tensor" begin + t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) + @test t[1:2, 1:2] == ArrayOperator{1}([1 2;4 5]) + + s = ArrayOperator{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == ArrayOperator{0}([1, 1]) + @test s[(1,:), (:,)] == ArrayOperator{1}([1 1;1 1]) + @test s[(1,2), :] == ArrayOperator{0}([1, 1]) + end + + @testset "Arithmetic" begin + @test t + t isa ArrayOperator + @test t + t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa ArrayOperator + @test t .* 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == ArrayOperator{1}([1 2 3;8 10 12;21 24 27]) + + t1 = ArrayOperator{0}([1, 2, 3]) + t2 = ArrayOperator{1}([1 2 3]) + t3 = ArrayOperator{0}([2]) + + @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == ArrayOperator{0}(sin.([1, 2, 3])) + + s = ArrayOperator{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa ArrayOperator + @test result.data == s.data + @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index aafd661..3875011 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,11 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Tensor +using DualArrays: ArrayOperator @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == ArrayOperator{0}([1.0, 2.0, 3.0]) end @testset "Indexing" begin @@ -101,5 +101,5 @@ using DualArrays: Tensor end include("broadcast_test.jl") - include("tensor_test.jl") + include("array_operator_test.jl") end \ No newline at end of file diff --git a/test/tensor_test.jl b/test/tensor_test.jl deleted file mode 100644 index 70e1570..0000000 --- a/test/tensor_test.jl +++ /dev/null @@ -1,42 +0,0 @@ -using Test, LinearAlgebra -using DualArrays: Tensor - -@testset "Tensor" begin - t = Tensor{1}([1 2 3;4 5 6;7 8 9]) - @testset "Indexing" begin - @test t[1,1] == 1 - @test t[(1,), (:,)] == Tensor{0}([1, 2, 3]) - @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) - - s = Tensor{2}(ones(2,2,2)) - @test s[(1,1), (:,)] == Tensor{0}([1, 1]) - @test s[(1,:), (:,)] == Tensor{1}([1 1;1 1]) - @test s[(1,2), :] == Tensor{0}([1, 1]) - end - - @testset "Arithmetic" begin - @test t + t isa Tensor - @test t + t == Tensor{1}([2 4 6;8 10 12;14 16 18]) - - @test t .* 2 isa Tensor - @test t .* 2 == Tensor{1}([2 4 6;8 10 12;14 16 18]) - @test t .* [1,2,3] == Tensor{1}([1 2 3;8 10 12;21 24 27]) - - t1 = Tensor{0}([1, 2, 3]) - t2 = Tensor{1}([1 2 3]) - t3 = Tensor{0}([2]) - - @test t1 .* t2 == Tensor{1}([1 2 3;2 4 6;3 6 9]) - @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) - - @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) - - s = Tensor{1}(zeros(3, 3)) - - result = (s .= t .+ 1) - - @test result isa Tensor - @test result.data == s.data - @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) - end -end \ No newline at end of file From 02860bb05785338213b7725cf7f61876b61211a9 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Fri, 24 Apr 2026 18:02:24 +0100 Subject: [PATCH 08/21] fix bugs, implement dualarray --- src/indexing.jl | 6 +++--- src/types.jl | 25 +++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index bc78038..d59193b 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -31,19 +31,19 @@ function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., j...)) + ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) end function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) + ArrayOperator{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) end # Integer indexing always returns a scalar. getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N, T, N, M}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. diff --git a/src/types.jl b/src/types.jl index e7577a7..03314e7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -126,31 +126,36 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},J <: (ArrayOperator{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} - value::V +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{L, T, N, M} where {L, M})} <: AbstractVector{Dual{T}} + value::A jacobian::J - function DualVector(value::V, jacobian::J) where {T, V <: AbstractVector{T}, J <: (Tensor{L, T, 1, M} where {L, M})} - if length(value) != size(jacobian, 1) + function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{L, T, N, M} where {L, M})} + if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T, V, J}(value, jacobian) + new{T,N, A, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::ArrayOperator) +function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(_convert_array(T, value), _convert_array(T, jacobian)) + DualArray(_convert_array(T, value), _convert_array(T, jacobian)) end -# Helper function to define DualVectors with AbstractArray jacobians -function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} - DualVector(value, ArrayOperator{1}(jacobian)) +# Helper function to define DualArrays with AbstractArray jacobians +function DualArray(value::AbstractArray{S, M}, jacobian::AbstractArray{T, N}) where {S, T, N, M} + DualArray(value, ArrayOperator{M}(jacobian)) end +const DualVector = DualArray{T, 1} where {T} +const DualMatrix = DualArray{T, 2} where {T} + +DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) +DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file From a54c7f781cbd2361fa1d4082651cc61e2808152b Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Sat, 25 Apr 2026 13:58:57 +0100 Subject: [PATCH 09/21] change dual to have abstractarray partials --- src/DualArrays.jl | 4 ++-- src/types.jl | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index c169778..9375853 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -6,12 +6,12 @@ A Julia package for efficient automatic differentiation using dual numbers and d This package provides: - `Dual`: A dual number type for storing values and their derivatives - `DualVector`: A vector of dual numbers represented with a Jacobian matrix - +- `DualMatrix`: A matrix of dual numbers represented with a Jacobian tensor` Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, Dual, jacobian +export DualVector, DualMatrix, Dual, jacobian import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ diff --git a/src/types.jl b/src/types.jl index 03314e7..6757855 100644 --- a/src/types.jl +++ b/src/types.jl @@ -93,19 +93,14 @@ A dual number type that stores a value and its partials (derivatives). - `value::T`: The primal value - `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: (ArrayOperator{L, T, 0, M} where {L, M})} <: Real +struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T partials::Partials end -function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {S, T, L, M} +function Dual(value::T, partials::AbstractArray{S}) where {S, T} T2 = promote_type(T, S) - Dual{T2, ArrayOperator{L, T2, 0, M}}(convert(T2, value), convert(ArrayOperator{L, T2, 0, M}, partials)) -end - -# Helper function to define Duals from an AbstractArray -function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, ArrayOperator{0}(partials)) + Dual(convert(T2, value), _convert_array(T2, partials)) end """ From ecbaba8dadbc31a66a8bd620de000bcd94defbaf Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Sat, 25 Apr 2026 18:53:15 +0100 Subject: [PATCH 10/21] add * for ArrayOperator, remove <: AbstractArray and fix resulting bugs --- Project.toml | 4 ++- src/DualArrays.jl | 4 +-- src/arithmetic.jl | 64 ++++++++++++++++++++++++++++++++++++++++++++++- src/types.jl | 33 +++++++++++++++++++----- src/utilities.jl | 4 +-- test/runtests.jl | 2 +- 6 files changed, 98 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index 3e86a39..b7409b2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DualArrays" uuid = "429e4e16-f749-45f3-beec-30742fae38ce" -authors = ["Sheehan Olver "] version = "0.2.0" +authors = ["Sheehan Olver "] [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" @@ -10,6 +10,7 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" [compat] DiffRules = "1.15.1" @@ -17,6 +18,7 @@ ForwardDiff = "1.2.2" Lux = "1.29.1" Plots = "1.41.1" SparseArrays = "1.10" +TensorOperations = "5.5.2" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 9375853..19ba52b 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -13,9 +13,9 @@ module DualArrays export DualVector, DualMatrix, Dual, jacobian -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ +import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims -using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules +using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations include("types.jl") include("indexing.jl") diff --git a/src/arithmetic.jl b/src/arithmetic.jl index afcfcbc..1effd91 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -96,4 +96,66 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x) # solve -\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, x \ y.jacobian) \ No newline at end of file +\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) + +""" +----------------------- +Multiplication with ArrayOperators +----------------------- + +* for ArrayOperators generalises from matrix/vector +multiplication and uses tensor contraction provided by +TensorOperations.jl. Let an (A, B) ArrayOperator denote +an ArrayOperator with output dimension A and input dimension B +(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only +allow multiplication between an (A, B) ArrayOperator and a (B, C) +ArrayOperator, with the result being an (A, C) ArrayOperator. +This generalises from: + +-An inner product: a row vector * a column vector -> a scalar: + (0, 1) * (1, 0) -> (0, 0) + +-An outer product: a column vector * a row vector -> a matrix: + (1, 0) * (0, 1) -> (1, 1) + +- matrix/vector multiplication: a matrix * a column vector -> a column vector: + (1, 1) * (1, 0) -> (1, 0) + +We can treat multiplication between ArrayOperators and regular arrays as multiplication +between two ArrayOperators: given an (A, B) ArrayOperator and an L-array, we can fix it as +having input dimension B and infer C = L - B. We use this contraction rule to multiply the two. +""" + +# Helper function to perform tensor contraction between x and y +function _contract(x, y, A, B, C) + x_idx = (ntuple(i -> i, A), ntuple(i -> i + A, B)) + y_idx = (ntuple(i -> i, B), ntuple(i -> i + B, C)) + ret_idx = (ntuple(i -> i, A), ntuple(i -> i + A, C)) + + return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C} + return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} + C = L - B + return ArrayOperator{A}(_contract(x.data, y, A, B, C)) +end + +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C} + A = L - B + return ArrayOperator{A}(_contract(x, y.data, A, B, C)) +end + +# Extra required arithmetic for ArrayOperators +Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y +Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y +Base.:-(x::ArrayOperator) = (-).(x) + +Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} = + ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data) +Base.:*(t::ArrayOperator, a::Number) = a * t + + diff --git a/src/types.jl b/src/types.jl index 6757855..e592488 100644 --- a/src/types.jl +++ b/src/types.jl @@ -22,7 +22,7 @@ a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct ArrayOperator{L, T, N, M} <: AbstractArray{T, L} +struct ArrayOperator{L, T, N, M} data::AbstractArray{T, L} end @@ -39,13 +39,28 @@ Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) # Basic array interface -for op in (:size, :axes) +for op in (:size, :axes, :iterate) @eval begin ($op)(t::ArrayOperator) = ($op)(t.data) ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) end end +# Since ArrayOperator is not an AbstractArray we define these manually +eltype(t::ArrayOperator) = eltype(t.data) + +Base.Broadcast.broadcastable(t::ArrayOperator) = t + +transpose(t::ArrayOperator) = transpose(t.data) +sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) + +==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data +==(a::ArrayOperator, b::AbstractArray) = a.data == b +==(a::AbstractArray, b::ArrayOperator) = a == b.data +isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) +isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kwargs...) + # Below we define a broadcast style for ArrayOperators and override copy and copyto! # This allows all arithmetic/broadcasting with ArrayOperators to be handled by the # underlying logic of the array contained in the struct, while ensuring that @@ -57,14 +72,15 @@ ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastSt # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::Broadcast.DefaultArrayStyle{M}, ::ArrayOperatorBroadcastStyle{N}) where {N, M} = ArrayOperatorBroadcastStyle{N}() Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() # Helper functions to help define broadcasting/arithmetic with ArrayOperators. # By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. _unwrap(t::ArrayOperator) = t.data +_unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) @@ -85,13 +101,13 @@ function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperat end """ - Dual{T, Partials <: AbstractVector{T}} <: Real + Dual{T, Partials <: AbstractArray{T}} <: Real A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a tensor mapping to a scalar +- `partials::Partials`: The partial derivatives stored as an array """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T @@ -103,6 +119,11 @@ function Dual(value::T, partials::AbstractArray{S}) where {S, T} Dual(convert(T2, value), _convert_array(T2, partials)) end +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} + T2 = promote_type(T, S) + Dual(convert(T2, value), _convert_array(T2, partials).data) +end + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} diff --git a/src/utilities.jl b/src/utilities.jl index e892b95..91d3c20 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -10,8 +10,8 @@ end # Helper functions for vcat operations _jacobian(d::Dual) = permutedims(d.partials) -_jacobian(d::DualVector) = d.jacobian -_jacobian(d::DualVector, ::Int) = d.jacobian +_jacobian(d::DualVector) = d.jacobian.data +_jacobian(d::DualVector, ::Int) = d.jacobian.data _jacobian(x::Number, N::Int) = Zeros(typeof(x), 1, N) _value(v::AbstractVector) = v diff --git a/test/runtests.jl b/test/runtests.jl index 3875011..9b87f05 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using DualArrays: ArrayOperator @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == ArrayOperator{0}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == [1.0, 2.0, 3.0] end @testset "Indexing" begin From e356ef728001023d2dd6c3f6d26b316803c88c14 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 14:54:10 +0100 Subject: [PATCH 11/21] add tests, improve coverage --- src/indexing.jl | 19 +++++++------------ src/types.jl | 4 +++- src/utilities.jl | 2 +- test/array_operator_test.jl | 25 ++++++++++++++++++++++++- test/runtests.jl | 8 +++++++- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index d59193b..189b0aa 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -46,10 +46,10 @@ getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ -Extract a single Dual number from a DualVector at position y. +Extract a single Dual number from a DualArray. """ -function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), :]) +function getindex(x::DualArray, args::Vararg{Int}) + Dual(x.value[args...], x.jacobian[args, :]) end """ @@ -61,12 +61,7 @@ function getindex(x::DualVector, y::UnitRange) DualVector(newval, newjac) end -""" -Return the size of the DualVector (length of the value vector). -""" -size(x::DualVector) = (length(x.value),) - -""" -Return the axes of the DualVector. -""" -axes(x::DualVector) = axes(x.value) \ No newline at end of file +# DualArray array interface +for op in (:size, :axes) + @eval $op(a::DualArray) = $op(a.value) +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index e592488..725e2b1 100644 --- a/src/types.jl +++ b/src/types.jl @@ -66,7 +66,7 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kw # underlying logic of the array contained in the struct, while ensuring that # all results stay as a ArrayOperator. -struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. @@ -108,6 +108,8 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value - `partials::Partials`: The partial derivatives stored as an array + +NOTE: Partials will soon be in the ArrayOperator format. """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T diff --git a/src/utilities.jl b/src/utilities.jl index 91d3c20..726af37 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualArray) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 57e9969..4fa3ff3 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -1,8 +1,21 @@ using Test, LinearAlgebra using DualArrays: ArrayOperator -@testset "Tensor" begin +@testset "ArrayOperator" begin t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + + @testset "Equality" begin + @test t == [1 2 3;4 5 6;7 8 9] + @test [1 2 3;4 5 6;7 8 9] == t + @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + end + @testset "BroadcastStyle Val constructor" begin + # A plain broadcast operation; broadcast machinery should dimension-adapt the + # ArrayOperator broadcast style via ArrayOperatorBroadcastStyle{N}(::Val{M}). + u = t .+ ones(3, 3) + @test u isa ArrayOperator + @test u == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + end @testset "Indexing" begin @test t[1,1] == 1 @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) @@ -26,6 +39,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) + @test t1 .+ ones(3) == ArrayOperator{0}([2, 3, 4]) @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) @@ -38,5 +52,14 @@ using DualArrays: ArrayOperator @test result isa ArrayOperator @test result.data == s.data @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + + @test t * t == ArrayOperator{1}([30 36 42;66 81 96; 102 126 150]) + @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) + @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) + + @test t * 2 == [2 4 6;8 10 12;14 16 18] + @test 2 * t == [2 4 6;8 10 12;14 16 18] + + @test s - t == ones(3, 3) end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9b87f05..144ed01 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,11 +9,17 @@ using DualArrays: ArrayOperator end @testset "Indexing" begin - v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + v = DualVector([1., 2, 3], [1 2 3; 4 5 6;7 8 9]) + m = DualMatrix([1 2;3 4], zeros(2, 2, 2)) @test size(v) == (3,) @test axes(v) == (Base.OneTo(3),) + @test size(m) == (2, 2) + @test axes(m) == (Base.OneTo(2), Base.OneTo(2)) + + @test m[1, 1] == Dual(1, [0, 0]) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) From 13ef5b8de9b97cf907f72df69984ff5b25df842c Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 15:12:39 +0100 Subject: [PATCH 12/21] small additions --- src/types.jl | 5 +---- test/array_operator_test.jl | 9 +-------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/types.jl b/src/types.jl index 725e2b1..08cb494 100644 --- a/src/types.jl +++ b/src/types.jl @@ -35,9 +35,6 @@ end _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) _convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) -Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M}) where {L, T, N, M, S} = - ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) - # Basic array interface for op in (:size, :axes, :iterate) @eval begin @@ -59,7 +56,7 @@ sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) ==(a::AbstractArray, b::ArrayOperator) = a == b.data isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) -isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # Below we define a broadcast style for ArrayOperators and override copy and copyto! # This allows all arithmetic/broadcasting with ArrayOperators to be handled by the diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 4fa3ff3..0194b3a 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -9,13 +9,6 @@ using DualArrays: ArrayOperator @test [1 2 3;4 5 6;7 8 9] == t @test isapprox(t, [1 2 3;4 5 6;7 8 9]) end - @testset "BroadcastStyle Val constructor" begin - # A plain broadcast operation; broadcast machinery should dimension-adapt the - # ArrayOperator broadcast style via ArrayOperatorBroadcastStyle{N}(::Val{M}). - u = t .+ ones(3, 3) - @test u isa ArrayOperator - @test u == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) - end @testset "Indexing" begin @test t[1,1] == 1 @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) @@ -39,7 +32,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) - @test t1 .+ ones(3) == ArrayOperator{0}([2, 3, 4]) + @test t1 .+ ones(3, 3) == [2 2 2;3 3 3; 4 4 4] @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) From 514b34aaa63cf30994bd31f77dbe219528176a72 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 15:27:01 +0100 Subject: [PATCH 13/21] fix coverage --- src/types.jl | 1 - test/array_operator_test.jl | 1 + test/runtests.jl | 4 ++++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index 08cb494..82330bf 100644 --- a/src/types.jl +++ b/src/types.jl @@ -64,7 +64,6 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # all results stay as a ArrayOperator. struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end -ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 0194b3a..9857e44 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -8,6 +8,7 @@ using DualArrays: ArrayOperator @test t == [1 2 3;4 5 6;7 8 9] @test [1 2 3;4 5 6;7 8 9] == t @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + @test isapprox([1 2 3;4 5 6;7 8 9], t) end @testset "Indexing" begin @test t[1,1] == 1 diff --git a/test/runtests.jl b/test/runtests.jl index 144ed01..2f4c8ed 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -97,6 +97,10 @@ using DualArrays: ArrayOperator @test vcat(x) == DualVector([1], [1 2 3]) @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + + z = DualVector([2, 3], [1 0; 0 1]) + @test vcat(1, z).jacobian.data == [0 0; 1 0; 0 1] + @test vcat(z, 1).jacobian.data == [1 0; 0 1; 0 0] end @testset "show" begin From cf3c93068cd021bf9af4a97be02d54a913526b68 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 5 May 2026 12:08:09 +0100 Subject: [PATCH 14/21] rethink broadcasting --- src/types.jl | 57 ++++++++++++++++++++++++++++--------- test/array_operator_test.jl | 4 ++- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/types.jl b/src/types.jl index 82330bf..962804d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -58,19 +58,48 @@ isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.dat isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) -# Below we define a broadcast style for ArrayOperators and override copy and copyto! -# This allows all arithmetic/broadcasting with ArrayOperators to be handled by the -# underlying logic of the array contained in the struct, while ensuring that -# all results stay as a ArrayOperator. - -struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end - -# N is the input dimension of the tensor being broadcasted. -# For he result of the broadcast we will choose to preserve the highest input dimension. -Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::Broadcast.DefaultArrayStyle{M}, ::ArrayOperatorBroadcastStyle{N}) where {N, M} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() +# -------------------- +# Broadcasting with ArrayOperators +# -------------------- +# +# We want broadcasting on ArrayOperators to behave +# as it would on the underlying array, but preserving the +# ArrayOperator{N, M, T, L} type with the correct type parameters. +# T and L are preserved or inferred from type promotion. +# When the highest order is an ArrayOperator, N and M of that +# ArrayOperator are preserved. We do not support binary broadcasts +# for cases where: +# +# 1. The highest order is not an ArrayOperator +# 2. We have an (N, M) ArrayOperator and an (N2, M2) ArrayOperator +# with N + M = N2 + M2 but (N, M) != (N2, M2). +# +# As inferring N and M of the resulting ArrayOperator is ambiguous. + +# We define a custom broadcast style. +# We inherit from the L-array broadcast style and require output dimension N +# as extra information. +struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end + +Base.BroadcastStyle(::Type{<:ArrayOperator{L, <:Any, N, <:Any}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() +function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} + # Julia optimises these checks at compile time. + if L >= M + ArrayOperatorBroadcastStyle{L, N}() + else + throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) + end +end +Base.BroadcastStyle(a::Broadcast.DefaultArrayStyle{M}, b::ArrayOperatorBroadcastStyle{L, N}) where {L, N, M} = Base.BroadcastStyle(b, a) +function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L1, N1},::ArrayOperatorBroadcastStyle{L2, N2}) where {L1, N1, L2, N2} + if L1 > L2 + ArrayOperatorBroadcastStyle{L1, N1}() + elseif L2 > L1 + ArrayOperatorBroadcastStyle{L2, N2}() + else + throw(ArgumentError("Ambiguous output dimension for resulting ArrayOperator")) + end +end # Helper functions to help define broadcasting/arithmetic with ArrayOperators. # By converting a broadcast involving ArrayOperators into a broadcast @@ -89,7 +118,7 @@ function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) wh end # copyto adds support for .= -function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} +function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 9857e44..76ddf94 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -33,7 +33,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) - @test t1 .+ ones(3, 3) == [2 2 2;3 3 3; 4 4 4] + @test_throws ArgumentError t1 .+ ones(3, 3) @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) @@ -55,5 +55,7 @@ using DualArrays: ArrayOperator @test 2 * t == [2 4 6;8 10 12;14 16 18] @test s - t == ones(3, 3) + + @test_throws ArgumentError ArrayOperator{1}(ones(3,3)) .* ArrayOperator{0}(ones(3,3)) end end \ No newline at end of file From 75b0953e8ef9d903ff9590e1a099e662bb99b57c Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 5 May 2026 12:26:47 +0100 Subject: [PATCH 15/21] fix parameter ordering --- src/arithmetic.jl | 10 +++++----- src/indexing.jl | 6 +++--- src/types.jl | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 1effd91..323bc03 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -135,16 +135,16 @@ function _contract(x, y, A, B, C) return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) end -function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C} +function *(x::ArrayOperator{A, B, <:Any, <:Any}, y::ArrayOperator{B, C, <:Any, <:Any}) where {A, B, C} return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) end -function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} +function *(x::ArrayOperator{A, B, <:Any, <:Any}, y::AbstractArray{<:Any, L}) where {A, B, L} C = L - B return ArrayOperator{A}(_contract(x.data, y, A, B, C)) end -function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C} +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{B, C, <:Any, <:Any}) where {L, B, C} A = L - B return ArrayOperator{A}(_contract(x, y.data, A, B, C)) end @@ -154,8 +154,8 @@ Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y Base.:-(x::ArrayOperator) = (-).(x) -Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} = - ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data) +Base.:*(a::Number, t::ArrayOperator{N, M, T, L}) where {N, M, T, L} = + ArrayOperator{N, M, promote_type(typeof(a), T), L}(a .* t.data) Base.:*(t::ArrayOperator, a::Number) = a * t diff --git a/src/indexing.jl b/src/indexing.jl index 189b0aa..e6c9d56 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -27,14 +27,14 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) -function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} +function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) end -function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} +function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, ::Colon) where {N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) @@ -43,7 +43,7 @@ end # Integer indexing always returns a scalar. getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualArray. diff --git a/src/types.jl b/src/types.jl index 962804d..bd97444 100644 --- a/src/types.jl +++ b/src/types.jl @@ -22,18 +22,18 @@ a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct ArrayOperator{L, T, N, M} +struct ArrayOperator{N, M, T, L} data::AbstractArray{T, L} end # Constructor to wrap an array with a tensor, given a contraction rule represented by N function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} - ArrayOperator{L, T, N, L - N}(data) + ArrayOperator{N, L - N, T, L}(data) end # Helper convert function _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) -_convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) +_convert_array(::Type{T}, t::ArrayOperator{N, M, S, L}) where {T, N, M, S, L} = ArrayOperator{N, M, T, L}(_convert_array(T, t.data)) # Basic array interface for op in (:size, :axes, :iterate) @@ -81,7 +81,7 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # as extra information. struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end -Base.BroadcastStyle(::Type{<:ArrayOperator{L, <:Any, N, <:Any}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() +Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() function Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{L, N}, ::Broadcast.DefaultArrayStyle{M}) where {L, N, M} # Julia optimises these checks at compile time. if L >= M @@ -110,7 +110,7 @@ _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) # copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator -function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{L, N}}) where {L, N} # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing # the evaluated broadcast databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) @@ -146,7 +146,7 @@ function Dual(value::T, partials::AbstractArray{S}) where {S, T} Dual(convert(T2, value), _convert_array(T2, partials)) end -function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} +function Dual(value::T, partials::ArrayOperator{0, M, S, L}) where {L, S, M, T} T2 = promote_type(T, S) Dual(convert(T2, value), _convert_array(T2, partials).data) end @@ -169,11 +169,11 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{L, T, N, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractVector{Dual{T}} value::A jacobian::J - function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{L, T, N, M} where {L, M})} + function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end From 62e4f433f21c54eab064ee179ff3ea3ab7a43f50 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 5 May 2026 12:35:54 +0100 Subject: [PATCH 16/21] fix equality --- src/types.jl | 9 +++------ test/array_operator_test.jl | 14 +++++++------- test/broadcast_test.jl | 18 +++++++++--------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/types.jl b/src/types.jl index bd97444..32ae8ea 100644 --- a/src/types.jl +++ b/src/types.jl @@ -51,12 +51,9 @@ Base.Broadcast.broadcastable(t::ArrayOperator) = t transpose(t::ArrayOperator) = transpose(t.data) sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) -==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data -==(a::ArrayOperator, b::AbstractArray) = a.data == b -==(a::AbstractArray, b::ArrayOperator) = a == b.data -isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) -isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) -isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) +# Equality is only defined for two ArrayOperators of the same (N, M). +==(a::ArrayOperator{N, M}, b::ArrayOperator{N, M}) where {N, M} = a.data == b.data +isapprox(a::ArrayOperator{N, M}, b::ArrayOperator{N, M}; kwargs...) where {N, M} = isapprox(a.data, b.data; kwargs...) # -------------------- # Broadcasting with ArrayOperators diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 76ddf94..4ae5e30 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -5,10 +5,10 @@ using DualArrays: ArrayOperator t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) @testset "Equality" begin - @test t == [1 2 3;4 5 6;7 8 9] - @test [1 2 3;4 5 6;7 8 9] == t - @test isapprox(t, [1 2 3;4 5 6;7 8 9]) - @test isapprox([1 2 3;4 5 6;7 8 9], t) + @test t == ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + @test isapprox(t, ArrayOperator{1}([1 2 3;4 5 6;7 8 9])) + @test t != [1 2 3;4 5 6;7 8 9] + @test t != ArrayOperator{0}([1 2 3;4 5 6;7 8 9]) end @testset "Indexing" begin @test t[1,1] == 1 @@ -51,10 +51,10 @@ using DualArrays: ArrayOperator @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) - @test t * 2 == [2 4 6;8 10 12;14 16 18] - @test 2 * t == [2 4 6;8 10 12;14 16 18] + @test t * 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + @test 2 * t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) - @test s - t == ones(3, 3) + @test s - t == ArrayOperator{1}(ones(3, 3)) @test_throws ArgumentError ArrayOperator{1}(ones(3,3)) .* ArrayOperator{0}(ones(3,3)) end diff --git a/test/broadcast_test.jl b/test/broadcast_test.jl index f87d9f4..e368e08 100644 --- a/test/broadcast_test.jl +++ b/test/broadcast_test.jl @@ -22,11 +22,11 @@ using DualArrays, Test, SparseArrays, LinearAlgebra @test l.value ≈ log.([1.0, 2.0, 3.0]) @test a.value ≈ tanh.([1.0, 2.0, 3.0]) - @test s.jacobian ≈ Diagonal(cos.([1.0, 2.0, 3.0])) - @test c.jacobian ≈ Diagonal(-sin.([1.0, 2.0, 3.0])) - @test e.jacobian ≈ Diagonal(exp.([1.0, 2.0, 3.0])) - @test l.jacobian ≈ Diagonal(1.0 ./ [1.0, 2.0, 3.0]) - @test a.jacobian ≈ Diagonal(1.0 .- tanh.([1.0, 2.0, 3.0]).^2) + @test s.jacobian.data ≈ Diagonal(cos.([1.0, 2.0, 3.0])) + @test c.jacobian.data ≈ Diagonal(-sin.([1.0, 2.0, 3.0])) + @test e.jacobian.data ≈ Diagonal(exp.([1.0, 2.0, 3.0])) + @test l.jacobian.data ≈ Diagonal(1.0 ./ [1.0, 2.0, 3.0]) + @test a.jacobian.data ≈ Diagonal(1.0 .- tanh.([1.0, 2.0, 3.0]).^2) # Test broadcasting with binary operations a = d .+ 2.0 @@ -67,8 +67,8 @@ using DualArrays, Test, SparseArrays, LinearAlgebra @test m.value ≈ [2.0, 4.0, 6.0] @test div.value ≈ [0.5, 1.0, 1.5] - @test a.jacobian ≈ [2.0 2.0 3.0; 1.0 3.0 3.0; 1.0 2.0 4.0] - @test s.jacobian ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0] - @test m.jacobian ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0] - @test div.jacobian ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75] + @test a.jacobian.data ≈ [2.0 2.0 3.0; 1.0 3.0 3.0; 1.0 2.0 4.0] + @test s.jacobian.data ≈ [0.0 -2.0 -3.0; -1.0 -1.0 -3.0; -1.0 -2.0 -2.0] + @test m.jacobian.data ≈ [3.0 2.0 3.0; 2.0 6.0 6.0; 3.0 6.0 11.0] + @test div.jacobian.data ≈ [0.25 -0.5 -0.75; -0.5 -0.5 -1.5; -0.75 -1.5 -1.75] end \ No newline at end of file From 6d2fb7b3a1ad5c06e436fc6b9a1460a9aa5c7262 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:10:53 +0100 Subject: [PATCH 17/21] Update arithmetic.jl --- src/arithmetic.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 323bc03..fee37b4 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -135,16 +135,16 @@ function _contract(x, y, A, B, C) return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) end -function *(x::ArrayOperator{A, B, <:Any, <:Any}, y::ArrayOperator{B, C, <:Any, <:Any}) where {A, B, C} +function *(x::ArrayOperator{A, B}, y::ArrayOperator{B, C}) where {A, B, C} return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) end -function *(x::ArrayOperator{A, B, <:Any, <:Any}, y::AbstractArray{<:Any, L}) where {A, B, L} +function *(x::ArrayOperator{A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} C = L - B return ArrayOperator{A}(_contract(x.data, y, A, B, C)) end -function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{B, C, <:Any, <:Any}) where {L, B, C} +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{B, C}) where {L, B, C} A = L - B return ArrayOperator{A}(_contract(x, y.data, A, B, C)) end From 900e4873fa5ce48572c303aa121b8b0d1d7bae2c Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:11:52 +0100 Subject: [PATCH 18/21] Update indexing.jl --- src/indexing.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index e6c9d56..16ed280 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -27,14 +27,14 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) -function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {N, M} +function getindex(t::ArrayOperator{N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) end -function getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::NTuple{N, Idx}, ::Colon) where {N, M} +function getindex(t::ArrayOperator{N, M}, i::NTuple{N, Idx}, ::Colon) where {N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) @@ -43,7 +43,7 @@ end # Integer indexing always returns a scalar. getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{N, M, <:Any, <:Any}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualArray. From cafa16b57571f129f9c32a23f38076a05a901a63 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:16:57 +0100 Subject: [PATCH 19/21] _convert_array -> elconvert --- src/DualArrays.jl | 2 ++ src/types.jl | 9 ++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 19ba52b..b639121 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -17,6 +17,8 @@ import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, conve using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations +import FillArrays: elconvert + include("types.jl") include("indexing.jl") include("arithmetic.jl") diff --git a/src/types.jl b/src/types.jl index 32ae8ea..bfa597e 100644 --- a/src/types.jl +++ b/src/types.jl @@ -32,8 +32,7 @@ function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} end # Helper convert function -_convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) -_convert_array(::Type{T}, t::ArrayOperator{N, M, S, L}) where {T, N, M, S, L} = ArrayOperator{N, M, T, L}(_convert_array(T, t.data)) +elconvert(::Type{T}, t::ArrayOperator{N, M, S, L}) where {T, N, M, S, L} = ArrayOperator{N, M, T, L}(elconvert(T, t.data)) # Basic array interface for op in (:size, :axes, :iterate) @@ -140,12 +139,12 @@ end function Dual(value::T, partials::AbstractArray{S}) where {S, T} T2 = promote_type(T, S) - Dual(convert(T2, value), _convert_array(T2, partials)) + Dual(convert(T2, value), elconvert(T2, partials)) end function Dual(value::T, partials::ArrayOperator{0, M, S, L}) where {L, S, M, T} T2 = promote_type(T, S) - Dual(convert(T2, value), _convert_array(T2, partials).data) + Dual(convert(T2, value), elconvert(T2, partials).data) end """ @@ -183,7 +182,7 @@ Constructor that forces type compatibility """ function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualArray(_convert_array(T, value), _convert_array(T, jacobian)) + DualArray(elconvert(T, value), elconvert(T, jacobian)) end # Helper function to define DualArrays with AbstractArray jacobians From e595a7f879b59290594ddf435612c4ebe02ffb03 Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:21:39 +0100 Subject: [PATCH 20/21] add eltype for type --- src/types.jl | 1 + test/array_operator_test.jl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/types.jl b/src/types.jl index bfa597e..594b16d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -44,6 +44,7 @@ end # Since ArrayOperator is not an AbstractArray we define these manually eltype(t::ArrayOperator) = eltype(t.data) +eltype(::Type{<:ArrayOperator{N, M, T}}) where {N,M,T} = T Base.Broadcast.broadcastable(t::ArrayOperator) = t diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 4ae5e30..8af28cd 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -4,6 +4,10 @@ using DualArrays: ArrayOperator @testset "ArrayOperator" begin t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + @testset "eltype" begin + @test eltype(t) == eltype(typeof(t)) == Int + end + @testset "Equality" begin @test t == ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) @test isapprox(t, ArrayOperator{1}([1 2 3;4 5 6;7 8 9])) From 2e4e543036fbc515ba456badf11680b3e4441d0b Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Fri, 8 May 2026 11:29:08 +0100 Subject: [PATCH 21/21] Update types.jl --- src/types.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/types.jl b/src/types.jl index 594b16d..d9c32fe 100644 --- a/src/types.jl +++ b/src/types.jl @@ -130,11 +130,12 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value - `partials::Partials`: The partial derivatives stored as an array - -NOTE: Partials will soon be in the ArrayOperator format. """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T + # represents an ArrayOperator from an array to a scalar. + # This is the transpose of the data stored by an ArrayOperator, so that + # in the simple vector case we only store a vector (not a row-vector). partials::Partials end @@ -166,7 +167,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractVector{Dual{T}} +struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractVector{Dual{T}} value::A jacobian::J