From 423675a7448ebf40f4f7b97ec37d8663e85d68f9 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 5 Mar 2026 15:22:03 +0000 Subject: [PATCH 01/16] initial commit --- Project.toml | 8 -------- src/DualArrays.jl | 2 +- src/types.jl | 30 ++++++++++++++++++++++++++---- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index 43f0322..65c10b2 100644 --- a/Project.toml +++ b/Project.toml @@ -8,22 +8,15 @@ ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] DiffRules = "1.15.1" -DifferentialEquations = "7.17.0" ForwardDiff = "1.2.2" Lux = "1.29.1" Plots = "1.41.1" -ProfileView = "1.10.2" SparseArrays = "1.10" -StaticArrays = "1.9.16" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" @@ -36,6 +29,5 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -dev = ["Revise"] examples = ["Plots", "DifferentialEquations", "ForwardDiff", "BandedMatrices", "Lux"] test = ["Test", "ForwardDiff", "BandedMatrices", "SparseArrays"] diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 2d30c3d..dd8642b 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -11,7 +11,7 @@ Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, Dual, jacobian +export DualVector, Dual, jacobian, DualMatrix import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox diff --git a/src/types.jl b/src/types.jl index 5367511..b950738 100644 --- a/src/types.jl +++ b/src/types.jl @@ -15,14 +15,12 @@ struct Dual{T, Partials <: AbstractVector{T}} <: Real end """ - DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} + DualVector{T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} Represents a vector of dual numbers given by: values + jacobian * [ε₁, …, εₙ] -For now the entries just return the values when indexed. - # Fields - `value::Vector{T}`: The primal values - `jacobian::M`: The Jacobian matrix containing partial derivatives @@ -52,7 +50,31 @@ Constructor that forces type compatibility """ function DualVector(value::AbstractVector, jacobian::AbstractMatrix) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) + DualVector(T.(value), T.(jacobian)) +end + +""" +DualMatrix{T, M <: AbstractMatrix{T}, A <: AbstractArray{T, 4}} <: AbstractMatrix{Dual{T}} + +Represents a matrix of dual numbers given by: + +values + jacobian * [ϵ₁₁ ϵ₁₂ … ϵ₁ₘ + ϵ₂₁ ϵ₂₂ … ϵ₂ₘ + ⋮ ⋮ ⋱ ⋮ + ϵₙ₁ ϵₙ₂ … ϵₙₘ] + +Where the jacobian here is a 4D tensor. This is contracted with with the matrix of ϵs +as follows: + +Dᵢⱼ = ∑ₖₗ Jᵢⱼₖₗ ϵₖₗ + +Where J is the jacobian tensor, ϵ is the matrix of ϵs, and D is the resulting matrix of +tangents. +""" + +struct DualMatrix{T, M <: AbstractMatrix{T}, A <: AbstractArray{T, 4}} <: AbstractMatrix{Dual{T}} + value::M + jacobian::A end # Basic equality for Dual numbers From 7c36f01a54a3e33e6e358905513d2f74a048e5d8 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 5 Mar 2026 19:43:03 +0000 Subject: [PATCH 02/16] implement DualMatrix --- src/DualArrays.jl | 2 +- src/arithmetic.jl | 11 ++++++++--- src/indexing.jl | 39 +++++++++++++++++++++++++++++---------- src/types.jl | 36 +++++++++++++++++++++++++++++++++++- src/utilities.jl | 2 +- 5 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index dd8642b..def4ff9 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -13,7 +13,7 @@ module DualArrays export DualVector, Dual, jacobian, DualMatrix -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox +import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, promote_type, convert using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 998b50b..23ffb2d 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -36,6 +36,11 @@ function diff_fn(f, n) return map(dx -> makepartials(dx, syms), d) end +_one(x) = one(x) +_one(x::Dual) = Dual(one(x.value), zero(x.partials)) + +DiffRules.@define_diffrule Base.:+(x, y) = :(_one(x)), :(_one(y)) + for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) partials = diff_fn(f, n) if n == 1 @@ -78,9 +83,9 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) return DualVector(val, jac) end # Must have Base.$f in order not to import everything - @eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) * x.partials + $p2(x.value, y.value) * y.partials) - @eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) * x.partials) - @eval Base.$f(x::Real, y::Dual) = Dual($f(x, y.value), $p2(x, y.value) * y.partials) + @eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) .* x.partials + $p2(x.value, y.value) .* y.partials) + @eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) .* x.partials) + @eval Base.$f(x::Real, y::Dual) = Dual($f(x, y.value), $p2(x, y.value) .* y.partials) end end diff --git a/src/indexing.jl b/src/indexing.jl index 7ff7f53..e9d7bad 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -3,31 +3,50 @@ using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays sparse_getindex(a...) = layout_getindex(a...) +sparse_getindex(d::DualMatrix, a...) = getindex(d, a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) """ Extract a single Dual number from a DualVector at position y. """ -function Base.getindex(x::DualVector, y::Int) +function getindex(x::DualVector, y::Int) Dual(x.value[y], sparse_getindex(x.jacobian, y, :)) end """ -Extract a sub-DualVector from a DualVector using a range. +Extract a single Dual number from a DualMatrix at position (y, z). """ -function Base.getindex(x::DualVector, y::UnitRange) - newval = x.value[y] - newjac = sparse_getindex(x.jacobian, y, :) - DualVector(newval, newjac) +function getindex(x::DualMatrix, y::Int, z::Int) + # We collapse the epsilons into a single vector of partials. + Dual(x.value[y, z], vec(sparse_getindex(x.jacobian, y, z, :, :))) end """ -Return the size of the DualVector (length of the value vector). +Extract a row slice from a DualMatrix, returning a DualVector. + +NOTE: Since a DualVector requires a 2-dimensional Jacobian, and slicing a 4-tensor returns a 3-tensor, +we implicitly 'flatten' the matrix of epsilons. This is done by the reshape. + +For example, in a 2x2 case, [ϵ₁₁ ϵ₁₂;ϵ₂₁ ϵ₂₂] becomes [ϵ₁, ϵ₂, ϵ₃, ϵ₄]. """ -Base.size(x::DualVector) = (length(x.value),) +function getindex(x::DualMatrix, y::Int, ::Colon) + n = size(x.value, 2) + DualVector(x.value[y, :], reshape(sparse_getindex(x.jacobian, y, :, :, :), n, :)) +end """ -Return the axes of the DualVector. +Extract a sub-DualVector from a DualVector using a range. """ -Base.axes(x::DualVector) = axes(x.value) \ No newline at end of file +function getindex(x::DualVector, y::UnitRange) + newval = x.value[y] + newjac = sparse_getindex(x.jacobian, y, :) + DualVector(newval, newjac) +end + +# Base array interface +for f in (:size, :axes) + @eval function $f(x::DualArray) + $f(x.value) + end +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index b950738..50a0c86 100644 --- a/src/types.jl +++ b/src/types.jl @@ -14,6 +14,20 @@ struct Dual{T, Partials <: AbstractVector{T}} <: Real partials::Partials end +function Dual(value::T, partials::AbstractVector{S}) where {T, S} + T2 = promote_type(T, S) + val = T2(value) + partials = _convert(T2, partials) + Dual(val, partials) +end + +Dual{T, P}(x) where {T, P <: AbstractVector{T}} = + Dual{T, P}(convert(T, x), convert(P, T[])) + +promote_type(::Type{<:Dual{T1}}, ::Type{<:Dual{T2}}) where {T1, T2} = + Dual{promote_type(T1, T2)} + + """ DualVector{T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} @@ -45,14 +59,29 @@ struct DualVector{T, V <: AbstractVector{T},M <: AbstractMatrix{T}} <: AbstractV end end +_convert(T::Type, x::AbstractArray) = T.(x) +_convert(::Type{Dual{T}}, x::DualVector) where {T} = DualVector(_convert(T, x.value), _convert(T, x.jacobian)) + + """ Constructor that forces type compatibility """ function DualVector(value::AbstractVector, jacobian::AbstractMatrix) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(T.(value), T.(jacobian)) + value_T = _convert(T, value) + jacobian_T = _convert(T, jacobian) + DualVector(value_T, jacobian_T) end +function DualMatrix(value::AbstractMatrix, jacobian::AbstractArray{<:Any, 4}) + T = promote_type(eltype(value), eltype(jacobian)) + value_T = _convert(T, value) + jacobian_T = _convert(T, jacobian) + DualMatrix(value_T, jacobian_T) +end + + + """ DualMatrix{T, M <: AbstractMatrix{T}, A <: AbstractArray{T, 4}} <: AbstractMatrix{Dual{T}} @@ -77,6 +106,11 @@ struct DualMatrix{T, M <: AbstractMatrix{T}, A <: AbstractArray{T, 4}} <: Abstra jacobian::A end +_convert(::Type{Dual{T}}, x::DualMatrix) where {T} = DualMatrix(_convert(T, x.value), _convert(T, x.jacobian)) + +# Alias for convenience +DualArray = Union{DualVector, DualMatrix} + # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file diff --git a/src/utilities.jl b/src/utilities.jl index 080155f..112b792 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -Base.show(io::IO, ::MIME"text/plain", x::DualVector) = +Base.show(io::IO, ::MIME"text/plain", x::DualArray) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ From 374f054f4adb3913a9395ce6e3a8d66214229038 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 5 Mar 2026 20:08:11 +0000 Subject: [PATCH 03/16] add tests --- src/DualArrays.jl | 2 +- src/arithmetic.jl | 6 ++---- src/types.jl | 18 +++++++----------- test/runtests.jl | 25 ++++++++++++++++++++++++- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index def4ff9..6bb0572 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -13,7 +13,7 @@ module DualArrays export DualVector, Dual, jacobian, DualMatrix -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, promote_type, convert +import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, promote_type, convert, one using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules diff --git a/src/arithmetic.jl b/src/arithmetic.jl index 23ffb2d..374be6b 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -36,10 +36,8 @@ function diff_fn(f, n) return map(dx -> makepartials(dx, syms), d) end -_one(x) = one(x) -_one(x::Dual) = Dual(one(x.value), zero(x.partials)) - -DiffRules.@define_diffrule Base.:+(x, y) = :(_one(x)), :(_one(y)) +# We redefine one here since it is required for the derivative of + and -. +one(x::Dual) = Dual(one(x.value), zero(x.partials)) for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,)) partials = diff_fn(f, n) diff --git a/src/types.jl b/src/types.jl index 50a0c86..a75a62a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -62,10 +62,6 @@ end _convert(T::Type, x::AbstractArray) = T.(x) _convert(::Type{Dual{T}}, x::DualVector) where {T} = DualVector(_convert(T, x.value), _convert(T, x.jacobian)) - -""" -Constructor that forces type compatibility -""" function DualVector(value::AbstractVector, jacobian::AbstractMatrix) T = promote_type(eltype(value), eltype(jacobian)) value_T = _convert(T, value) @@ -73,13 +69,6 @@ function DualVector(value::AbstractVector, jacobian::AbstractMatrix) DualVector(value_T, jacobian_T) end -function DualMatrix(value::AbstractMatrix, jacobian::AbstractArray{<:Any, 4}) - T = promote_type(eltype(value), eltype(jacobian)) - value_T = _convert(T, value) - jacobian_T = _convert(T, jacobian) - DualMatrix(value_T, jacobian_T) -end - """ @@ -106,6 +95,13 @@ struct DualMatrix{T, M <: AbstractMatrix{T}, A <: AbstractArray{T, 4}} <: Abstra jacobian::A end +function DualMatrix(value::AbstractMatrix{T}, jacobian::AbstractArray{S, 4}) where {T, S} + T2 = promote_type(T, S) + val = _convert(T2, value) + jac = _convert(T2, jacobian) + DualMatrix(val, jac) +end + _convert(::Type{Dual{T}}, x::DualMatrix) where {T} = DualMatrix(_convert(T, x.value), _convert(T, x.jacobian)) # Alias for convenience diff --git a/test/runtests.jl b/test/runtests.jl index 729b4ed..16fd8c6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices +using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices, SparseArrays using DualArrays: Dual @testset "DualArrays" begin @@ -26,6 +26,12 @@ using DualArrays: Dual @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end + + @testset "Indexing (DualMatrix)" begin + M = DualMatrix([1 2; 3 4], zeros(2, 2, 2, 2)) + @test M[1, 1] == Dual(1, [0, 0, 0, 0]) + @test M[1, :] == DualVector([1, 2], zeros(2, 4)) + end @testset "Arithmetic (DualVector)" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) @@ -81,6 +87,23 @@ using DualArrays: Dual @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) end + + @testset "Nested Dual Numbers" begin + # Example of finding Hessian of f(x) = x[1]*x[2] + # Through nested dual numbers + d = DualVector([1, 2], sparse(I, 2, 2)) + D = DualMatrix(sparse(I, 2, 2), zeros(2, 2, 2, 1)) + d2 = DualVector(d, D) + val = d2[1] * d2[2] + @test val isa Dual + @test val.value isa Dual + @test val.partials isa DualVector + + @test val.value.value == 2 # f(x) + @test val.value.partials == [2, 1] # ∇f(x) + @test val.partials.value == [2, 1] # ∇f(x) + @test val.partials.jacobian == [0 1; 1 0] # Hessian of f(x) + end include("broadcast_test.jl") end \ No newline at end of file From 7e509e011431ab6a81dd26a4bd34879d34bccd9f Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Fri, 10 Apr 2026 14:31:30 +0100 Subject: [PATCH 04/16] initial commit --- src/indexing.jl | 14 ++++++++++++ src/types.jl | 56 +++++++++++++++++++++++++++++++++++++++++++++--- test/runtests.jl | 1 + 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 7ff7f53..e64a7c0 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -6,6 +6,20 @@ sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) +""" +For the purposes of DualArrays.jl, we need to be able to index on the input indices +(corresponding to the the primal values) only. This way, when we index, we still get the +same 'arrangement' of dual parts. Example in the DualVector case (where the Jacobian is +a Tensor{2, Float64, 1, 1} acting on a vector ϵ): + +d = a + Jϵ +d[1] = a[1] + J[1,:]ϵ + +J[1, :] is a row vector so it can still be applied to our ϵ. +""" +getindex(t::Tensor{L, T, N, M}, i::NTuple{N, Int}, ::Colon) where {L, T, N, M} = sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...) +getindex(t::Tensor, i...) = t.data[i...] + """ Extract a single Dual number from a DualVector at position y. """ diff --git a/src/types.jl b/src/types.jl index 5367511..c8c3104 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,5 +1,45 @@ # Core type definitions for DualArrays.jl +""" +This type represents a Tensor along with its contraction rule. +A tensor with its contraction rule represents a linear map from an N-array to an M-array, +with 0-arrays considered scalars, 1-arrays vectors, etc. + +We have: +-L is the dimensionality of the tensor +-T is the element type of the tensor +-N is the dimensionality of the input array/number of lower indices +-M is the dimensionality of the output array/number of upper indices + +We enforce L = N + M. + +In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as + +a + Jϵ + +Where a is an M-array of real numbers, J is an N+M=tensor and ϵ is an N-array of dual parts. +In the simplest case, where M = 0, we have a Dual number with dual parts arranged in an N-array. +""" +struct Tensor{L, T, N, M} <: AbstractArray{T, L} + data::AbstractArray{T, L} +end + +function Tensor{N,M}(data::AbstractArray{T, L}) where {L, T, N, M} + if L != N + M + throw(ArgumentError("Tensor order must be equivalent to N + M.")) + end + Tensor{L, T, N, M}(data) +end + +Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = + Tensor{N, M}(convert.(T, tensor.data)) + +for op in (:size, :axes) + @eval begin + ($op)(t::Tensor) = ($op)(t.data) + end +end + """ Dual{T, Partials <: AbstractVector{T}} <: Real @@ -7,13 +47,23 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a vector +- `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: AbstractVector{T}} <: Real +struct Dual{T, Partials <: (Tensor{L, T, 0, M} where {L, M})} <: Real value::T partials::Partials end +function Dual(value::T, partials::Tensor{L, S, 0, M}) where {S, T, L, M} + T2 = promote_type(T, S) + Dual{T2, Tensor{L, T2, 0, M}}(convert(T2, value), convert(Tensor{L, T2, 0, M}, partials)) +end + +# Helper function to define Duals from an AbstractArray +function Dual(value, partials::AbstractArray{T, N}) where {T, N} + Dual(value, Tensor{0, N}(partials)) +end + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} @@ -32,7 +82,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},M <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V jacobian::M diff --git a/test/runtests.jl b/test/runtests.jl index 31da00c..7fd2ac4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,7 @@ using DualArrays: Dual @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) + @test Dual(1.0, [1, 2, 3]).partials == Tensor{0, 1}([1.0, 2.0, 3.0]) end @testset "Indexing" begin From a91f7aba9fe20fd385722e50735ef00a38972089 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 13:02:04 +0100 Subject: [PATCH 05/16] fixes and add tests --- src/indexing.jl | 21 +++++++----- src/types.jl | 84 ++++++++++++++++++++++++++++++++++----------- src/utilities.jl | 2 +- test/runtests.jl | 7 ++-- test/tensor_test.jl | 29 ++++++++++++++++ 5 files changed, 111 insertions(+), 32 deletions(-) create mode 100644 test/tensor_test.jl diff --git a/src/indexing.jl b/src/indexing.jl index e64a7c0..3cae792 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -16,32 +16,37 @@ d = a + Jϵ d[1] = a[1] + J[1,:]ϵ J[1, :] is a row vector so it can still be applied to our ϵ. + +Note that a slice of a tensor done in this way will reduce the input dimension to 0. """ -getindex(t::Tensor{L, T, N, M}, i::NTuple{N, Int}, ::Colon) where {L, T, N, M} = sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...) -getindex(t::Tensor, i...) = t.data[i...] +getindex(t::Tensor{<:Any, <:Any, N, M}, i::NTuple{N, Int}, ::Colon) where {N, M} = Tensor{0}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +# Integer indexing always returns a scalar. +getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) +# Indexing with only slices/ranges returns a similar tensor +getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. """ -function Base.getindex(x::DualVector, y::Int) - Dual(x.value[y], sparse_getindex(x.jacobian, y, :)) +function getindex(x::DualVector, y::Int) + Dual(x.value[y], x.jacobian[(y,), :]) end """ Extract a sub-DualVector from a DualVector using a range. """ -function Base.getindex(x::DualVector, y::UnitRange) +function getindex(x::DualVector, y::UnitRange) newval = x.value[y] - newjac = sparse_getindex(x.jacobian, y, :) + newjac = x.jacobian[y, :] DualVector(newval, newjac) end """ Return the size of the DualVector (length of the value vector). """ -Base.size(x::DualVector) = (length(x.value),) +size(x::DualVector) = (length(x.value),) """ Return the axes of the DualVector. """ -Base.axes(x::DualVector) = axes(x.value) \ No newline at end of file +axes(x::DualVector) = axes(x.value) \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index c8c3104..2f22fc0 100644 --- a/src/types.jl +++ b/src/types.jl @@ -11,7 +11,7 @@ We have: -N is the dimensionality of the input array/number of lower indices -M is the dimensionality of the output array/number of upper indices -We enforce L = N + M. +We enforce L = N + M by inferring M in the constructor. In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as @@ -24,22 +24,64 @@ struct Tensor{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} end -function Tensor{N,M}(data::AbstractArray{T, L}) where {L, T, N, M} - if L != N + M - throw(ArgumentError("Tensor order must be equivalent to N + M.")) - end - Tensor{L, T, N, M}(data) +# Constructor to wrap an array with a tensor, given a contraction rule represented by N +function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} + Tensor{L, T, N, L - N}(data) end +# Helper convert function +_convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) +_convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) + Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = - Tensor{N, M}(convert.(T, tensor.data)) + Tensor{L, T, N, M}(_convert_array(T, tensor.data)) +# Basic array interface for op in (:size, :axes) @eval begin ($op)(t::Tensor) = ($op)(t.data) + ($op)(t::Tensor, i...) = ($op)(t.data, i...) end end +# Below we define a broadcast style for Tensors and override copy and copyto! +# This allows all arithmetic/broadcasting with Tensors to be handled by the +# underlying logic of the array contained in the struct, while ensuring that +# all results stay as a Tensor. + +struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() + +# N is the input dimension of the tensor being broadcasted. +# For he result of the broadcast we will choose to preserve the highest input dimension. +Base.BroadcastStyle(::Type{<:Tensor{<:Any, <:Any, N, <:Any}}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = TensorBroadcastStyle{N}() +Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::TensorBroadcastStyle{M}) where {N, M} = TensorBroadcastStyle{max(N, M)}() + +# Helper functions to help define broadcasting/arithmetic with Tensors. +# By converting a broadcast involving Tensors into a broadcast +# involving the underlying arrays. +_unwrap(t::Tensor) = t.data +_unwrap(x) = x +_unwrap_args(args::Tuple) = map(_unwrap, args) + +# copy ensures that arithmetic involving a Tensor returns a Tensor +function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a Tensor containing + # the evaluated broadcast + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + Tensor{N}(copy(Broadcast.flatten(databroadcast))) +end + +# copyto adds support for .= +function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) + # As above + databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) + copyto!(dest.data, Broadcast.flatten(databroadcast)) + dest +end + """ Dual{T, Partials <: AbstractVector{T}} <: Real @@ -61,7 +103,7 @@ end # Helper function to define Duals from an AbstractArray function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, Tensor{0, N}(partials)) + Dual(value, Tensor{0}(partials)) end """ @@ -82,27 +124,29 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},M <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},J <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V - jacobian::M - - function DualVector(value::V, jacobian::M) where {T, V <: AbstractVector{T}, M <: AbstractMatrix{T}} - if size(jacobian, 1) != length(value) - x, y = length(value), size(jacobian, 1) - throw(ArgumentError("vector length must match number of rows in jacobian.\n" * - "vector length: $x\n" * - "no. of jacobian rows: $y")) + jacobian::J + + function DualVector(value::V, jacobian::J) where {T, V <: AbstractVector{T}, J <: (Tensor{L, T, 1, M} where {L, M})} + if length(value) != size(jacobian, 1) + throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T,V, M}(value, jacobian) + new{T, V, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::AbstractMatrix) +function DualVector(value::AbstractVector, jacobian::Tensor) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(convert(Vector{T}, value), convert(AbstractMatrix{T}, jacobian)) + DualVector(_convert_array(T, value), _convert_array(T, jacobian)) +end + +# Helper function to define DualVectors with AbstractArray jacobians +function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} + DualVector(value, Tensor{1}(jacobian)) end # Basic equality for Dual numbers diff --git a/src/utilities.jl b/src/utilities.jl index 080155f..e892b95 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -Base.show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualVector) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/runtests.jl b/test/runtests.jl index 7fd2ac4..68a1d81 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,11 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Dual +using DualArrays: Tensor @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == Tensor{0, 1}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) end @testset "Indexing" begin @@ -23,7 +23,7 @@ using DualArrays: Dual n = 10 v = DualVector(1:n, I(n)) - @test v[2:end].jacobian isa BandedMatrix + @test v[2:end].jacobian.data isa BandedMatrix @test sum(v[1:end-1] .* v[2:end]).partials == ForwardDiff.gradient(v -> sum(v[1:end-1] .* v[2:end]), 1:n) end @@ -90,4 +90,5 @@ using DualArrays: Dual end include("broadcast_test.jl") + include("tensor_test.jl") end \ No newline at end of file diff --git a/test/tensor_test.jl b/test/tensor_test.jl new file mode 100644 index 0000000..ad25701 --- /dev/null +++ b/test/tensor_test.jl @@ -0,0 +1,29 @@ +using Test, LinearAlgebra +using DualArrays: Tensor + +@testset "Tensor" begin + t = Tensor{1}([1 2 3;4 5 6;7 8 9]) + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), :] == Tensor{0}([1, 2, 3]) + @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) + end + + @testset "Arithmetic" begin + @test t + t isa Tensor + @test t + t == Tensor{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa Tensor + @test t .* 2 == Tensor{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == Tensor{1}([1 2 3;8 10 12;21 24 27]) + + t1 = Tensor{0}([1, 2, 3]) + t2 = Tensor{1}([1 2 3]) + t3 = Tensor{0}([2]) + + @test t1 .* t2 == Tensor{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) + end +end \ No newline at end of file From 875ef60a3d08a698927d05ee575446ed4a254f71 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 13 Apr 2026 18:10:41 +0100 Subject: [PATCH 06/16] fix indexing --- src/indexing.jl | 32 ++++++++++++++++++++------------ test/tensor_test.jl | 7 ++++++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 3cae792..b41d2d3 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -6,20 +6,28 @@ sparse_getindex(a...) = layout_getindex(a...) sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) -""" -For the purposes of DualArrays.jl, we need to be able to index on the input indices -(corresponding to the the primal values) only. This way, when we index, we still get the -same 'arrangement' of dual parts. Example in the DualVector case (where the Jacobian is -a Tensor{2, Float64, 1, 1} acting on a vector ϵ): -d = a + Jϵ -d[1] = a[1] + J[1,:]ϵ +# We need a system of indexing that takes two tuples +# of length N and M. We must then return a Tensor whose new input and output dimensions +# are inferred from how many of the arguments in each respective tuple are integers. -J[1, :] is a row vector so it can still be applied to our ϵ. +# For the purposes of DualArrays.jl, we only need to index the input dimensions +Idx = Union{Int, Colon, AbstractUnitRange} +count_ints(t::Tuple) = count(x -> x isa Int, t) -Note that a slice of a tensor done in this way will reduce the input dimension to 0. -""" -getindex(t::Tensor{<:Any, <:Any, N, M}, i::NTuple{N, Int}, ::Colon) where {N, M} = Tensor{0}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the Tensor constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + Tensor{newN}(sparse_getindex(t.data, i..., j...)) +end + +function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the Tensor constructor and the order of + # indexing the underlying array. + newN = N - count_ints(i) + Tensor{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) +end # Integer indexing always returns a scalar. getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor @@ -29,7 +37,7 @@ getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) wher Extract a single Dual number from a DualVector at position y. """ function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), :]) + Dual(x.value[y], x.jacobian[(y,), (:,)]) end """ diff --git a/test/tensor_test.jl b/test/tensor_test.jl index ad25701..8ce7572 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -5,8 +5,13 @@ using DualArrays: Tensor t = Tensor{1}([1 2 3;4 5 6;7 8 9]) @testset "Indexing" begin @test t[1,1] == 1 - @test t[(1,), :] == Tensor{0}([1, 2, 3]) + @test t[(1,), (:,)] == Tensor{0}([1, 2, 3]) @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) + + s = Tensor{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == Tensor{0}([1, 1]) + @test s[(1,:), (:,)] == Tensor{1}([1 1;1 1]) + @test s[(1,2), :] == Tensor{0}([1, 1]) end @testset "Arithmetic" begin From 0b7bd6501b4fa74f3caead3fee78268cbfee4551 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:34:54 +0100 Subject: [PATCH 07/16] small corrections --- src/indexing.jl | 2 +- src/types.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index b41d2d3..c03a5de 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -37,7 +37,7 @@ getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) wher Extract a single Dual number from a DualVector at position y. """ function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), (:,)]) + Dual(x.value[y], x.jacobian[(y,), :]) end """ diff --git a/src/types.jl b/src/types.jl index 2f22fc0..a17656a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -2,7 +2,7 @@ """ This type represents a Tensor along with its contraction rule. -A tensor with its contraction rule represents a linear map from an N-array to an M-array, +A tensor with its contraction rule represents a linear map from an M-array to an N-array, with 0-arrays considered scalars, 1-arrays vectors, etc. We have: @@ -17,8 +17,8 @@ In the context of DualArrays.jl, a DualArray (currently only a vector) can be th a + Jϵ -Where a is an M-array of real numbers, J is an N+M=tensor and ϵ is an N-array of dual parts. -In the simplest case, where M = 0, we have a Dual number with dual parts arranged in an N-array. +Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. +In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ struct Tensor{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} From f8b03b25f8b3d8f18d0ef501f434df40c2de4f40 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:52:45 +0100 Subject: [PATCH 08/16] improve test coverage --- src/types.jl | 2 +- test/runtests.jl | 11 +++++++++++ test/tensor_test.jl | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index a17656a..e67cca5 100644 --- a/src/types.jl +++ b/src/types.jl @@ -75,7 +75,7 @@ function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} end # copyto adds support for .= -function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle}) +function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) diff --git a/test/runtests.jl b/test/runtests.jl index 68a1d81..aafd661 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,10 @@ using DualArrays: Tensor @testset "Indexing" begin v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + + @test size(v) == (3,) + @test axes(v) == (Base.OneTo(3),) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) @@ -88,6 +92,13 @@ using DualArrays: Tensor @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) end + + @testset "show" begin + d = DualVector([1.0, 2.0], [1 0; 0 1]) + s = repr(MIME"text/plain"(), d) + @test occursin(" + ", s) + @test endswith(s, "𝛜") + end include("broadcast_test.jl") include("tensor_test.jl") diff --git a/test/tensor_test.jl b/test/tensor_test.jl index 8ce7572..70e1570 100644 --- a/test/tensor_test.jl +++ b/test/tensor_test.jl @@ -30,5 +30,13 @@ using DualArrays: Tensor @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) + + s = Tensor{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa Tensor + @test result.data == s.data + @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) end end \ No newline at end of file From 70097f6f78ca22dbb6044de4d109f022dc2ff38d Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Tue, 14 Apr 2026 12:56:55 +0100 Subject: [PATCH 09/16] add indexing example --- src/indexing.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/indexing.jl b/src/indexing.jl index c03a5de..dc2dbf0 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -12,6 +12,18 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz # are inferred from how many of the arguments in each respective tuple are integers. # For the purposes of DualArrays.jl, we only need to index the input dimensions +# +# Example: +# +# Consider a DualVector with standard matrix Jacobian (Tensor{2, T, 1, 1}): +# +# d = a + Bϵ +# +# If we index d[1], we have: +# +# d[1] = a[1] + B[1, :]ϵ +# +# Where B[1, :] indexes the input dimension of B Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) From 1a17a05e3ce0e13e68ac3bf193348122258cc830 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Thu, 23 Apr 2026 22:01:35 +0100 Subject: [PATCH 10/16] rename to ArrayOperator --- src/indexing.jl | 20 +++++----- src/types.jl | 74 +++++++++++++++++++------------------ test/array_operator_test.jl | 42 +++++++++++++++++++++ test/runtests.jl | 6 +-- test/tensor_test.jl | 42 --------------------- 5 files changed, 93 insertions(+), 91 deletions(-) create mode 100644 test/array_operator_test.jl delete mode 100644 test/tensor_test.jl diff --git a/src/indexing.jl b/src/indexing.jl index dc2dbf0..bc78038 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -8,14 +8,14 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz # We need a system of indexing that takes two tuples -# of length N and M. We must then return a Tensor whose new input and output dimensions +# of length N and M. We must then return a ArrayOperator whose new input and output dimensions # are inferred from how many of the arguments in each respective tuple are integers. # For the purposes of DualArrays.jl, we only need to index the input dimensions # # Example: # -# Consider a DualVector with standard matrix Jacobian (Tensor{2, T, 1, 1}): +# Consider a DualVector with standard matrix Jacobian (ArrayOperator{2, T, 1, 1}): # # d = a + Bϵ # @@ -27,23 +27,23 @@ sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, siz Idx = Union{Int, Colon, AbstractUnitRange} count_ints(t::Tuple) = count(x -> x isa Int, t) -function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} - # new value of M is inferred from the Tensor constructor and the order of +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - Tensor{newN}(sparse_getindex(t.data, i..., j...)) + ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., j...)) end -function getindex(t::Tensor{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} - # new value of M is inferred from the Tensor constructor and the order of +function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} + # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - Tensor{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) + ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) end # Integer indexing always returns a scalar. -getindex(t::Tensor, i::Vararg{Int}) = getindex(t.data, i...) +getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::Tensor{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = Tensor{N}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N, T, N, M}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. diff --git a/src/types.jl b/src/types.jl index e67cca5..e7577a7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,9 +1,11 @@ # Core type definitions for DualArrays.jl """ -This type represents a Tensor along with its contraction rule. -A tensor with its contraction rule represents a linear map from an M-array to an N-array, -with 0-arrays considered scalars, 1-arrays vectors, etc. +This type represents a linear map from an M-array to an N-array, with a N+M=L-dimensional +array as its underlying data. This can be thought of analogously to an L-tensor equipped +with a contraction pattern characterised by (N, M). Specifically, the tensor has dimensions +a₁ x a₂ x ... x a_N x b₁ x b₂ x ... x b_M and maps an M-array of shape (b₁, b₂, ..., b_M) +to an N-array of shape (a₁, a₂, ..., a_N) by contracting over the last M indices. We have: -L is the dimensionality of the tensor @@ -13,69 +15,69 @@ We have: We enforce L = N + M by inferring M in the constructor. -In the context of DualArrays.jl, a DualArray (currently only a vector) can be thought of as +In the context of DualArrays.jl, a DualArray can be thought of as a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct Tensor{L, T, N, M} <: AbstractArray{T, L} +struct ArrayOperator{L, T, N, M} <: AbstractArray{T, L} data::AbstractArray{T, L} end # Constructor to wrap an array with a tensor, given a contraction rule represented by N -function Tensor{N}(data::AbstractArray{T, L}) where {L, T, N} - Tensor{L, T, N, L - N}(data) +function ArrayOperator{N}(data::AbstractArray{T, L}) where {L, T, N} + ArrayOperator{L, T, N, L - N}(data) end # Helper convert function _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) -_convert_array(::Type{T}, t::Tensor{L, S, N, M}) where {T, L, S, N, M} = Tensor{L, T, N, M}(_convert_array(T, t.data)) +_convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) -Base.convert(::Type{Tensor{L, T, N, M}}, tensor::Tensor{L, S, N, M}) where {L, T, N, M, S} = - Tensor{L, T, N, M}(_convert_array(T, tensor.data)) +Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M}) where {L, T, N, M, S} = + ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) # Basic array interface for op in (:size, :axes) @eval begin - ($op)(t::Tensor) = ($op)(t.data) - ($op)(t::Tensor, i...) = ($op)(t.data, i...) + ($op)(t::ArrayOperator) = ($op)(t.data) + ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) end end -# Below we define a broadcast style for Tensors and override copy and copyto! -# This allows all arithmetic/broadcasting with Tensors to be handled by the +# Below we define a broadcast style for ArrayOperators and override copy and copyto! +# This allows all arithmetic/broadcasting with ArrayOperators to be handled by the # underlying logic of the array contained in the struct, while ensuring that -# all results stay as a Tensor. +# all results stay as a ArrayOperator. -struct TensorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end -TensorBroadcastStyle{N}(::Val{M}) where {N, M} = TensorBroadcastStyle{N}() +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. -Base.BroadcastStyle(::Type{<:Tensor{<:Any, <:Any, N, <:Any}}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = TensorBroadcastStyle{N}() -Base.BroadcastStyle(::TensorBroadcastStyle{N}, ::TensorBroadcastStyle{M}) where {N, M} = TensorBroadcastStyle{max(N, M)}() +Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() -# Helper functions to help define broadcasting/arithmetic with Tensors. -# By converting a broadcast involving Tensors into a broadcast +# Helper functions to help define broadcasting/arithmetic with ArrayOperators. +# By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. -_unwrap(t::Tensor) = t.data +_unwrap(t::ArrayOperator) = t.data _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) -# copy ensures that arithmetic involving a Tensor returns a Tensor -function Base.copy(bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} - # We create a Broadcasted of the underlying arrays and create a Tensor containing +# copy ensures that arithmetic involving a ArrayOperator returns a ArrayOperator +function Base.copy(bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} + # We create a Broadcasted of the underlying arrays and create a ArrayOperator containing # the evaluated broadcast databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) - Tensor{N}(copy(Broadcast.flatten(databroadcast))) + ArrayOperator{N}(copy(Broadcast.flatten(databroadcast))) end # copyto adds support for .= -function Base.copyto!(dest::Tensor, bc::Broadcast.Broadcasted{TensorBroadcastStyle{N}}) where {N} +function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperatorBroadcastStyle{N}}) where {N} # As above databroadcast = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) copyto!(dest.data, Broadcast.flatten(databroadcast)) @@ -91,19 +93,19 @@ A dual number type that stores a value and its partials (derivatives). - `value::T`: The primal value - `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: (Tensor{L, T, 0, M} where {L, M})} <: Real +struct Dual{T, Partials <: (ArrayOperator{L, T, 0, M} where {L, M})} <: Real value::T partials::Partials end -function Dual(value::T, partials::Tensor{L, S, 0, M}) where {S, T, L, M} +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {S, T, L, M} T2 = promote_type(T, S) - Dual{T2, Tensor{L, T2, 0, M}}(convert(T2, value), convert(Tensor{L, T2, 0, M}, partials)) + Dual{T2, ArrayOperator{L, T2, 0, M}}(convert(T2, value), convert(ArrayOperator{L, T2, 0, M}, partials)) end # Helper function to define Duals from an AbstractArray function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, Tensor{0}(partials)) + Dual(value, ArrayOperator{0}(partials)) end """ @@ -124,7 +126,7 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},J <: (Tensor{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} +struct DualVector{T, V <: AbstractVector{T},J <: (ArrayOperator{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} value::V jacobian::J @@ -139,14 +141,14 @@ end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::Tensor) +function DualVector(value::AbstractVector, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) DualVector(_convert_array(T, value), _convert_array(T, jacobian)) end # Helper function to define DualVectors with AbstractArray jacobians function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} - DualVector(value, Tensor{1}(jacobian)) + DualVector(value, ArrayOperator{1}(jacobian)) end # Basic equality for Dual numbers diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl new file mode 100644 index 0000000..57e9969 --- /dev/null +++ b/test/array_operator_test.jl @@ -0,0 +1,42 @@ +using Test, LinearAlgebra +using DualArrays: ArrayOperator + +@testset "Tensor" begin + t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + @testset "Indexing" begin + @test t[1,1] == 1 + @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) + @test t[1:2, 1:2] == ArrayOperator{1}([1 2;4 5]) + + s = ArrayOperator{2}(ones(2,2,2)) + @test s[(1,1), (:,)] == ArrayOperator{0}([1, 1]) + @test s[(1,:), (:,)] == ArrayOperator{1}([1 1;1 1]) + @test s[(1,2), :] == ArrayOperator{0}([1, 1]) + end + + @testset "Arithmetic" begin + @test t + t isa ArrayOperator + @test t + t == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + + @test t .* 2 isa ArrayOperator + @test t .* 2 == ArrayOperator{1}([2 4 6;8 10 12;14 16 18]) + @test t .* [1,2,3] == ArrayOperator{1}([1 2 3;8 10 12;21 24 27]) + + t1 = ArrayOperator{0}([1, 2, 3]) + t2 = ArrayOperator{1}([1 2 3]) + t3 = ArrayOperator{0}([2]) + + @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) + @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) + + @test sin.(t1) == ArrayOperator{0}(sin.([1, 2, 3])) + + s = ArrayOperator{1}(zeros(3, 3)) + + result = (s .= t .+ 1) + + @test result isa ArrayOperator + @test result.data == s.data + @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index aafd661..3875011 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,11 @@ using DualArrays, Test, LinearAlgebra, ForwardDiff, BandedMatrices -using DualArrays: Tensor +using DualArrays: ArrayOperator @testset "DualArrays" begin @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == Tensor{0}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == ArrayOperator{0}([1.0, 2.0, 3.0]) end @testset "Indexing" begin @@ -101,5 +101,5 @@ using DualArrays: Tensor end include("broadcast_test.jl") - include("tensor_test.jl") + include("array_operator_test.jl") end \ No newline at end of file diff --git a/test/tensor_test.jl b/test/tensor_test.jl deleted file mode 100644 index 70e1570..0000000 --- a/test/tensor_test.jl +++ /dev/null @@ -1,42 +0,0 @@ -using Test, LinearAlgebra -using DualArrays: Tensor - -@testset "Tensor" begin - t = Tensor{1}([1 2 3;4 5 6;7 8 9]) - @testset "Indexing" begin - @test t[1,1] == 1 - @test t[(1,), (:,)] == Tensor{0}([1, 2, 3]) - @test t[1:2, 1:2] == Tensor{1}([1 2;4 5]) - - s = Tensor{2}(ones(2,2,2)) - @test s[(1,1), (:,)] == Tensor{0}([1, 1]) - @test s[(1,:), (:,)] == Tensor{1}([1 1;1 1]) - @test s[(1,2), :] == Tensor{0}([1, 1]) - end - - @testset "Arithmetic" begin - @test t + t isa Tensor - @test t + t == Tensor{1}([2 4 6;8 10 12;14 16 18]) - - @test t .* 2 isa Tensor - @test t .* 2 == Tensor{1}([2 4 6;8 10 12;14 16 18]) - @test t .* [1,2,3] == Tensor{1}([1 2 3;8 10 12;21 24 27]) - - t1 = Tensor{0}([1, 2, 3]) - t2 = Tensor{1}([1 2 3]) - t3 = Tensor{0}([2]) - - @test t1 .* t2 == Tensor{1}([1 2 3;2 4 6;3 6 9]) - @test t1 .* t2 .+ t3 == Tensor{1}([3 4 5;4 6 8;5 8 11]) - - @test sin.(t1) == Tensor{0}(sin.([1, 2, 3])) - - s = Tensor{1}(zeros(3, 3)) - - result = (s .= t .+ 1) - - @test result isa Tensor - @test result.data == s.data - @test s == Tensor{1}([2 3 4; 5 6 7; 8 9 10]) - end -end \ No newline at end of file From 02860bb05785338213b7725cf7f61876b61211a9 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Fri, 24 Apr 2026 18:02:24 +0100 Subject: [PATCH 11/16] fix bugs, implement dualarray --- src/indexing.jl | 6 +++--- src/types.jl | 25 +++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index bc78038..d59193b 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -31,19 +31,19 @@ function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., j...)) + ArrayOperator{newN}(sparse_getindex(t.data, i..., j...)) end function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M} # new value of M is inferred from the ArrayOperator constructor and the order of # indexing the underlying array. newN = N - count_ints(i) - ArrayOperator{newN, T, newN, M}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) + ArrayOperator{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...)) end # Integer indexing always returns a scalar. getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor -getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N, T, N, M}(sparse_getindex(t.data, i...)) +getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ Extract a single Dual number from a DualVector at position y. diff --git a/src/types.jl b/src/types.jl index e7577a7..03314e7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -126,31 +126,36 @@ For now the entries just return the values when indexed. Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. """ -struct DualVector{T, V <: AbstractVector{T},J <: (ArrayOperator{L, T, 1, M} where {L, M})} <: AbstractVector{Dual{T}} - value::V +struct DualArray{T, N , A <: AbstractArray{T,N},J <: (ArrayOperator{L, T, N, M} where {L, M})} <: AbstractVector{Dual{T}} + value::A jacobian::J - function DualVector(value::V, jacobian::J) where {T, V <: AbstractVector{T}, J <: (Tensor{L, T, 1, M} where {L, M})} - if length(value) != size(jacobian, 1) + function DualArray(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{L, T, N, M} where {L, M})} + if size(value) != ntuple(i -> size(jacobian, i), N) throw(ArgumentError("Length of value vector must match number of rows in Jacobian.")) end - new{T, V, J}(value, jacobian) + new{T,N, A, J}(value, jacobian) end end """ Constructor that forces type compatibility """ -function DualVector(value::AbstractVector, jacobian::ArrayOperator) +function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) - DualVector(_convert_array(T, value), _convert_array(T, jacobian)) + DualArray(_convert_array(T, value), _convert_array(T, jacobian)) end -# Helper function to define DualVectors with AbstractArray jacobians -function DualVector(value::AbstractVector, jacobian::AbstractArray{T, N}) where {T, N} - DualVector(value, ArrayOperator{1}(jacobian)) +# Helper function to define DualArrays with AbstractArray jacobians +function DualArray(value::AbstractArray{S, M}, jacobian::AbstractArray{T, N}) where {S, T, N, M} + DualArray(value, ArrayOperator{M}(jacobian)) end +const DualVector = DualArray{T, 1} where {T} +const DualMatrix = DualArray{T, 2} where {T} + +DualVector(value::AbstractVector, jacobian) = DualArray(value, jacobian) +DualMatrix(value::AbstractMatrix, jacobian) = DualArray(value, jacobian) # Basic equality for Dual numbers ==(a::Dual, b::Dual) = a.value == b.value && a.partials == b.partials isapprox(a::Dual, b::Dual) = isapprox(a.value, b.value) && isapprox(a.partials, b.partials) \ No newline at end of file From a54c7f781cbd2361fa1d4082651cc61e2808152b Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Sat, 25 Apr 2026 13:58:57 +0100 Subject: [PATCH 12/16] change dual to have abstractarray partials --- src/DualArrays.jl | 4 ++-- src/types.jl | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/DualArrays.jl b/src/DualArrays.jl index c169778..9375853 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -6,12 +6,12 @@ A Julia package for efficient automatic differentiation using dual numbers and d This package provides: - `Dual`: A dual number type for storing values and their derivatives - `DualVector`: A vector of dual numbers represented with a Jacobian matrix - +- `DualMatrix`: A matrix of dual numbers represented with a Jacobian tensor` Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays -export DualVector, Dual, jacobian +export DualVector, DualMatrix, Dual, jacobian import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ diff --git a/src/types.jl b/src/types.jl index 03314e7..6757855 100644 --- a/src/types.jl +++ b/src/types.jl @@ -93,19 +93,14 @@ A dual number type that stores a value and its partials (derivatives). - `value::T`: The primal value - `partials::Partials`: The partial derivatives as a tensor mapping to a scalar """ -struct Dual{T, Partials <: (ArrayOperator{L, T, 0, M} where {L, M})} <: Real +struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T partials::Partials end -function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {S, T, L, M} +function Dual(value::T, partials::AbstractArray{S}) where {S, T} T2 = promote_type(T, S) - Dual{T2, ArrayOperator{L, T2, 0, M}}(convert(T2, value), convert(ArrayOperator{L, T2, 0, M}, partials)) -end - -# Helper function to define Duals from an AbstractArray -function Dual(value, partials::AbstractArray{T, N}) where {T, N} - Dual(value, ArrayOperator{0}(partials)) + Dual(convert(T2, value), _convert_array(T2, partials)) end """ From ecbaba8dadbc31a66a8bd620de000bcd94defbaf Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Sat, 25 Apr 2026 18:53:15 +0100 Subject: [PATCH 13/16] add * for ArrayOperator, remove <: AbstractArray and fix resulting bugs --- Project.toml | 4 ++- src/DualArrays.jl | 4 +-- src/arithmetic.jl | 64 ++++++++++++++++++++++++++++++++++++++++++++++- src/types.jl | 33 +++++++++++++++++++----- src/utilities.jl | 4 +-- test/runtests.jl | 2 +- 6 files changed, 98 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index 3e86a39..b7409b2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DualArrays" uuid = "429e4e16-f749-45f3-beec-30742fae38ce" -authors = ["Sheehan Olver "] version = "0.2.0" +authors = ["Sheehan Olver "] [deps] ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" @@ -10,6 +10,7 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" [compat] DiffRules = "1.15.1" @@ -17,6 +18,7 @@ ForwardDiff = "1.2.2" Lux = "1.29.1" Plots = "1.41.1" SparseArrays = "1.10" +TensorOperations = "5.5.2" [extras] BandedMatrices = "aae01518-5342-5314-be14-df237901396f" diff --git a/src/DualArrays.jl b/src/DualArrays.jl index 9375853..19ba52b 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -13,9 +13,9 @@ module DualArrays export DualVector, DualMatrix, Dual, jacobian -import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \ +import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims -using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules +using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations include("types.jl") include("indexing.jl") diff --git a/src/arithmetic.jl b/src/arithmetic.jl index afcfcbc..1effd91 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -96,4 +96,66 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x) # solve -\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, x \ y.jacobian) \ No newline at end of file +\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) + +""" +----------------------- +Multiplication with ArrayOperators +----------------------- + +* for ArrayOperators generalises from matrix/vector +multiplication and uses tensor contraction provided by +TensorOperations.jl. Let an (A, B) ArrayOperator denote +an ArrayOperator with output dimension A and input dimension B +(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only +allow multiplication between an (A, B) ArrayOperator and a (B, C) +ArrayOperator, with the result being an (A, C) ArrayOperator. +This generalises from: + +-An inner product: a row vector * a column vector -> a scalar: + (0, 1) * (1, 0) -> (0, 0) + +-An outer product: a column vector * a row vector -> a matrix: + (1, 0) * (0, 1) -> (1, 1) + +- matrix/vector multiplication: a matrix * a column vector -> a column vector: + (1, 1) * (1, 0) -> (1, 0) + +We can treat multiplication between ArrayOperators and regular arrays as multiplication +between two ArrayOperators: given an (A, B) ArrayOperator and an L-array, we can fix it as +having input dimension B and infer C = L - B. We use this contraction rule to multiply the two. +""" + +# Helper function to perform tensor contraction between x and y +function _contract(x, y, A, B, C) + x_idx = (ntuple(i -> i, A), ntuple(i -> i + A, B)) + y_idx = (ntuple(i -> i, B), ntuple(i -> i + B, C)) + ret_idx = (ntuple(i -> i, A), ntuple(i -> i + A, C)) + + return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C} + return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) +end + +function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L} + C = L - B + return ArrayOperator{A}(_contract(x.data, y, A, B, C)) +end + +function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C} + A = L - B + return ArrayOperator{A}(_contract(x, y.data, A, B, C)) +end + +# Extra required arithmetic for ArrayOperators +Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y +Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y +Base.:-(x::ArrayOperator) = (-).(x) + +Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} = + ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data) +Base.:*(t::ArrayOperator, a::Number) = a * t + + diff --git a/src/types.jl b/src/types.jl index 6757855..e592488 100644 --- a/src/types.jl +++ b/src/types.jl @@ -22,7 +22,7 @@ a + Jϵ Where a is an N-array of real numbers, J is an N+M=tensor and ϵ is an M-array of dual parts. In the simplest case, where N = 0, we have a Dual number with dual parts arranged in an M-array. """ -struct ArrayOperator{L, T, N, M} <: AbstractArray{T, L} +struct ArrayOperator{L, T, N, M} data::AbstractArray{T, L} end @@ -39,13 +39,28 @@ Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) # Basic array interface -for op in (:size, :axes) +for op in (:size, :axes, :iterate) @eval begin ($op)(t::ArrayOperator) = ($op)(t.data) ($op)(t::ArrayOperator, i...) = ($op)(t.data, i...) end end +# Since ArrayOperator is not an AbstractArray we define these manually +eltype(t::ArrayOperator) = eltype(t.data) + +Base.Broadcast.broadcastable(t::ArrayOperator) = t + +transpose(t::ArrayOperator) = transpose(t.data) +sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) + +==(a::ArrayOperator, b::ArrayOperator) = a.data == b.data +==(a::ArrayOperator, b::AbstractArray) = a.data == b +==(a::AbstractArray, b::ArrayOperator) = a == b.data +isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) +isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kwargs...) + # Below we define a broadcast style for ArrayOperators and override copy and copyto! # This allows all arithmetic/broadcasting with ArrayOperators to be handled by the # underlying logic of the array contained in the struct, while ensuring that @@ -57,14 +72,15 @@ ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastSt # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. Base.BroadcastStyle(::Type{<:ArrayOperator{<:Any, <:Any, N, <:Any}}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{0}) where {N} = ArrayOperatorBroadcastStyle{N}() -Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{1}) where {N} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::Broadcast.DefaultArrayStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() +Base.BroadcastStyle(::Broadcast.DefaultArrayStyle{M}, ::ArrayOperatorBroadcastStyle{N}) where {N, M} = ArrayOperatorBroadcastStyle{N}() Base.BroadcastStyle(::ArrayOperatorBroadcastStyle{N}, ::ArrayOperatorBroadcastStyle{M}) where {N, M} = ArrayOperatorBroadcastStyle{max(N, M)}() # Helper functions to help define broadcasting/arithmetic with ArrayOperators. # By converting a broadcast involving ArrayOperators into a broadcast # involving the underlying arrays. _unwrap(t::ArrayOperator) = t.data +_unwrap(bc::Broadcast.Broadcasted) = Broadcast.Broadcasted(bc.f, _unwrap_args(bc.args), bc.axes) _unwrap(x) = x _unwrap_args(args::Tuple) = map(_unwrap, args) @@ -85,13 +101,13 @@ function Base.copyto!(dest::ArrayOperator, bc::Broadcast.Broadcasted{ArrayOperat end """ - Dual{T, Partials <: AbstractVector{T}} <: Real + Dual{T, Partials <: AbstractArray{T}} <: Real A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value -- `partials::Partials`: The partial derivatives as a tensor mapping to a scalar +- `partials::Partials`: The partial derivatives stored as an array """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T @@ -103,6 +119,11 @@ function Dual(value::T, partials::AbstractArray{S}) where {S, T} Dual(convert(T2, value), _convert_array(T2, partials)) end +function Dual(value::T, partials::ArrayOperator{L, S, 0, M}) where {L, S, M, T} + T2 = promote_type(T, S) + Dual(convert(T2, value), _convert_array(T2, partials).data) +end + """ DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} diff --git a/src/utilities.jl b/src/utilities.jl index e892b95..91d3c20 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -10,8 +10,8 @@ end # Helper functions for vcat operations _jacobian(d::Dual) = permutedims(d.partials) -_jacobian(d::DualVector) = d.jacobian -_jacobian(d::DualVector, ::Int) = d.jacobian +_jacobian(d::DualVector) = d.jacobian.data +_jacobian(d::DualVector, ::Int) = d.jacobian.data _jacobian(x::Number, N::Int) = Zeros(typeof(x), 1, N) _value(v::AbstractVector) = v diff --git a/test/runtests.jl b/test/runtests.jl index 3875011..9b87f05 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using DualArrays: ArrayOperator @testset "Type Definition" begin @test_throws ArgumentError DualVector([1,2],I(3)) - @test Dual(1.0, [1, 2, 3]).partials == ArrayOperator{0}([1.0, 2.0, 3.0]) + @test Dual(1.0, [1, 2, 3]).partials == [1.0, 2.0, 3.0] end @testset "Indexing" begin From e356ef728001023d2dd6c3f6d26b316803c88c14 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 14:54:10 +0100 Subject: [PATCH 14/16] add tests, improve coverage --- src/indexing.jl | 19 +++++++------------ src/types.jl | 4 +++- src/utilities.jl | 2 +- test/array_operator_test.jl | 25 ++++++++++++++++++++++++- test/runtests.jl | 8 +++++++- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index d59193b..189b0aa 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -46,10 +46,10 @@ getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) """ -Extract a single Dual number from a DualVector at position y. +Extract a single Dual number from a DualArray. """ -function getindex(x::DualVector, y::Int) - Dual(x.value[y], x.jacobian[(y,), :]) +function getindex(x::DualArray, args::Vararg{Int}) + Dual(x.value[args...], x.jacobian[args, :]) end """ @@ -61,12 +61,7 @@ function getindex(x::DualVector, y::UnitRange) DualVector(newval, newjac) end -""" -Return the size of the DualVector (length of the value vector). -""" -size(x::DualVector) = (length(x.value),) - -""" -Return the axes of the DualVector. -""" -axes(x::DualVector) = axes(x.value) \ No newline at end of file +# DualArray array interface +for op in (:size, :axes) + @eval $op(a::DualArray) = $op(a.value) +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index e592488..725e2b1 100644 --- a/src/types.jl +++ b/src/types.jl @@ -66,7 +66,7 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kw # underlying logic of the array contained in the struct, while ensuring that # all results stay as a ArrayOperator. -struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{1} end +struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. @@ -108,6 +108,8 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value - `partials::Partials`: The partial derivatives stored as an array + +NOTE: Partials will soon be in the ArrayOperator format. """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T diff --git a/src/utilities.jl b/src/utilities.jl index 91d3c20..726af37 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -49,7 +49,7 @@ end """ Custom display method for DualVectors. """ -show(io::IO, ::MIME"text/plain", x::DualVector) = +show(io::IO, ::MIME"text/plain", x::DualArray) = (print(io, x.value); print(io, " + "); print(io, x.jacobian); print(io, "𝛜")) """ diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 57e9969..4fa3ff3 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -1,8 +1,21 @@ using Test, LinearAlgebra using DualArrays: ArrayOperator -@testset "Tensor" begin +@testset "ArrayOperator" begin t = ArrayOperator{1}([1 2 3;4 5 6;7 8 9]) + + @testset "Equality" begin + @test t == [1 2 3;4 5 6;7 8 9] + @test [1 2 3;4 5 6;7 8 9] == t + @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + end + @testset "BroadcastStyle Val constructor" begin + # A plain broadcast operation; broadcast machinery should dimension-adapt the + # ArrayOperator broadcast style via ArrayOperatorBroadcastStyle{N}(::Val{M}). + u = t .+ ones(3, 3) + @test u isa ArrayOperator + @test u == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + end @testset "Indexing" begin @test t[1,1] == 1 @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) @@ -26,6 +39,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) + @test t1 .+ ones(3) == ArrayOperator{0}([2, 3, 4]) @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) @@ -38,5 +52,14 @@ using DualArrays: ArrayOperator @test result isa ArrayOperator @test result.data == s.data @test s == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) + + @test t * t == ArrayOperator{1}([30 36 42;66 81 96; 102 126 150]) + @test t * [1, 0, 0] == ArrayOperator{1}([1,4,7]) + @test [1 0 0] * t == ArrayOperator{1}([1 2 3]) + + @test t * 2 == [2 4 6;8 10 12;14 16 18] + @test 2 * t == [2 4 6;8 10 12;14 16 18] + + @test s - t == ones(3, 3) end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9b87f05..144ed01 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,11 +9,17 @@ using DualArrays: ArrayOperator end @testset "Indexing" begin - v = DualVector([1, 2, 3], [1 2 3; 4 5 6;7 8 9]) + v = DualVector([1., 2, 3], [1 2 3; 4 5 6;7 8 9]) + m = DualMatrix([1 2;3 4], zeros(2, 2, 2)) @test size(v) == (3,) @test axes(v) == (Base.OneTo(3),) + @test size(m) == (2, 2) + @test axes(m) == (Base.OneTo(2), Base.OneTo(2)) + + @test m[1, 1] == Dual(1, [0, 0]) + @test v[1] isa Dual @test v[1] == Dual(1,[1,2,3]) @test v[2] == Dual(2,[4,5,6]) From 13ef5b8de9b97cf907f72df69984ff5b25df842c Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 15:12:39 +0100 Subject: [PATCH 15/16] small additions --- src/types.jl | 5 +---- test/array_operator_test.jl | 9 +-------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/types.jl b/src/types.jl index 725e2b1..08cb494 100644 --- a/src/types.jl +++ b/src/types.jl @@ -35,9 +35,6 @@ end _convert_array(::Type{T}, a::AbstractArray) where {T} = T.(a) _convert_array(::Type{T}, t::ArrayOperator{L, S, N, M}) where {T, L, S, N, M} = ArrayOperator{L, T, N, M}(_convert_array(T, t.data)) -Base.convert(::Type{ArrayOperator{L, T, N, M}}, tensor::ArrayOperator{L, S, N, M}) where {L, T, N, M, S} = - ArrayOperator{L, T, N, M}(_convert_array(T, tensor.data)) - # Basic array interface for op in (:size, :axes, :iterate) @eval begin @@ -59,7 +56,7 @@ sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) ==(a::AbstractArray, b::ArrayOperator) = a == b.data isapprox(a::ArrayOperator, b::ArrayOperator; kwargs...) = isapprox(a.data, b.data; kwargs...) isapprox(a::ArrayOperator, b::AbstractArray; kwargs...) = isapprox(a.data, b; kwargs...) -isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(a, b.data; kwargs...) +isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # Below we define a broadcast style for ArrayOperators and override copy and copyto! # This allows all arithmetic/broadcasting with ArrayOperators to be handled by the diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 4fa3ff3..0194b3a 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -9,13 +9,6 @@ using DualArrays: ArrayOperator @test [1 2 3;4 5 6;7 8 9] == t @test isapprox(t, [1 2 3;4 5 6;7 8 9]) end - @testset "BroadcastStyle Val constructor" begin - # A plain broadcast operation; broadcast machinery should dimension-adapt the - # ArrayOperator broadcast style via ArrayOperatorBroadcastStyle{N}(::Val{M}). - u = t .+ ones(3, 3) - @test u isa ArrayOperator - @test u == ArrayOperator{1}([2 3 4; 5 6 7; 8 9 10]) - end @testset "Indexing" begin @test t[1,1] == 1 @test t[(1,), (:,)] == ArrayOperator{0}([1, 2, 3]) @@ -39,7 +32,7 @@ using DualArrays: ArrayOperator t2 = ArrayOperator{1}([1 2 3]) t3 = ArrayOperator{0}([2]) - @test t1 .+ ones(3) == ArrayOperator{0}([2, 3, 4]) + @test t1 .+ ones(3, 3) == [2 2 2;3 3 3; 4 4 4] @test t1 .* t2 == ArrayOperator{1}([1 2 3;2 4 6;3 6 9]) @test t1 .* t2 .+ t3 == ArrayOperator{1}([3 4 5;4 6 8;5 8 11]) From 514b34aaa63cf30994bd31f77dbe219528176a72 Mon Sep 17 00:00:00 2001 From: Max Vassiliev Date: Mon, 27 Apr 2026 15:27:01 +0100 Subject: [PATCH 16/16] fix coverage --- src/types.jl | 1 - test/array_operator_test.jl | 1 + test/runtests.jl | 4 ++++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index 08cb494..82330bf 100644 --- a/src/types.jl +++ b/src/types.jl @@ -64,7 +64,6 @@ isapprox(a::AbstractArray, b::ArrayOperator; kwargs...) = isapprox(b, a) # all results stay as a ArrayOperator. struct ArrayOperatorBroadcastStyle{N} <: Broadcast.AbstractArrayStyle{0} end -ArrayOperatorBroadcastStyle{N}(::Val{M}) where {N, M} = ArrayOperatorBroadcastStyle{N}() # N is the input dimension of the tensor being broadcasted. # For he result of the broadcast we will choose to preserve the highest input dimension. diff --git a/test/array_operator_test.jl b/test/array_operator_test.jl index 0194b3a..9857e44 100644 --- a/test/array_operator_test.jl +++ b/test/array_operator_test.jl @@ -8,6 +8,7 @@ using DualArrays: ArrayOperator @test t == [1 2 3;4 5 6;7 8 9] @test [1 2 3;4 5 6;7 8 9] == t @test isapprox(t, [1 2 3;4 5 6;7 8 9]) + @test isapprox([1 2 3;4 5 6;7 8 9], t) end @testset "Indexing" begin @test t[1,1] == 1 diff --git a/test/runtests.jl b/test/runtests.jl index 144ed01..2f4c8ed 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -97,6 +97,10 @@ using DualArrays: ArrayOperator @test vcat(x) == DualVector([1], [1 2 3]) @test vcat(x, x) == DualVector([1, 1], [1 2 3;1 2 3]) @test vcat(x, y) == DualVector([1, 2, 3], [1 2 3;4 5 6;7 8 9]) + + z = DualVector([2, 3], [1 0; 0 1]) + @test vcat(1, z).jacobian.data == [0 0; 1 0; 0 1] + @test vcat(z, 1).jacobian.data == [1 0; 0 1; 0 0] end @testset "show" begin