Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DualArrays"
uuid = "429e4e16-f749-45f3-beec-30742fae38ce"
authors = ["Sheehan Olver <solver@mac.com>"]
version = "0.2.0"
authors = ["Sheehan Olver <solver@mac.com>"]

[deps]
ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
Expand All @@ -10,13 +10,15 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"

[compat]
DiffRules = "1.15.1"
ForwardDiff = "1.2.2"
Lux = "1.29.1"
Plots = "1.41.1"
SparseArrays = "1.10"
TensorOperations = "5.5.2"

[extras]
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
Expand Down
8 changes: 4 additions & 4 deletions src/DualArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ A Julia package for efficient automatic differentiation using dual numbers and d
This package provides:
- `Dual`: A dual number type for storing values and their derivatives
- `DualVector`: A vector of dual numbers represented with a Jacobian matrix

- `DualMatrix`: A matrix of dual numbers represented with a Jacobian tensor`
Differentiation rules are mostly provided by ChainRules.jl.
"""
module DualArrays

export DualVector, Dual, jacobian
export DualVector, DualMatrix, Dual, jacobian

import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \
import Base: +, -, ==, getindex, size, axes, broadcasted, show, sum, vcat, convert, *, isapprox, \, eltype, transpose, permutedims

using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules
using LinearAlgebra, ArrayLayouts, FillArrays, DiffRules, TensorOperations

include("types.jl")
include("indexing.jl")
Expand Down
73 changes: 69 additions & 4 deletions src/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ function diff_fn(f, n)
return map(dx -> makepartials(dx, syms), d)
end

# We redefine one here since it is required for the derivative of + and -.
one(x::Dual) = Dual(one(x.value), zero(x.partials))

for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,))
partials = diff_fn(f, n)
if n == 1
Expand Down Expand Up @@ -78,9 +81,9 @@ for (_, f, n) in DiffRules.diffrules(filter_modules=(:Base,))
return DualVector(val, jac)
end
# Must have Base.$f in order not to import everything
@eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) * x.partials + $p2(x.value, y.value) * y.partials)
@eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) * x.partials)
@eval Base.$f(x::Real, y::Dual) = Dual($f(x, y.value), $p2(x, y.value) * y.partials)
@eval Base.$f(x::Dual, y::Dual) = Dual($f(x.value, y.value), $p1(x.value, y.value) .* x.partials + $p2(x.value, y.value) .* y.partials)
@eval Base.$f(x::Dual, y::Real) = Dual($f(x.value, y), $p1(x.value, y) .* x.partials)
@eval Base.$f(x::Real, y::Dual) = Dual($f(x, y.value), $p2(x, y.value) .* y.partials)
end
end

Expand All @@ -96,4 +99,66 @@ LinearAlgebra.dot(x::DualVector, y::AbstractVector) = Dual(dot(x.value, y), tran
LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), transpose(y.jacobian) * x)

# solve
\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, x \ y.jacobian)
\(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data))

"""
-----------------------
Multiplication with ArrayOperators
-----------------------

* for ArrayOperators generalises from matrix/vector
multiplication and uses tensor contraction provided by
TensorOperations.jl. Let an (A, B) ArrayOperator denote
an ArrayOperator with output dimension A and input dimension B
(i.e an ArrayOperator{A+B, T, A, B} for some type T). We only
allow multiplication between an (A, B) ArrayOperator and a (B, C)
ArrayOperator, with the result being an (A, C) ArrayOperator.
This generalises from:

-An inner product: a row vector * a column vector -> a scalar:
(0, 1) * (1, 0) -> (0, 0)

-An outer product: a column vector * a row vector -> a matrix:
(1, 0) * (0, 1) -> (1, 1)

- matrix/vector multiplication: a matrix * a column vector -> a column vector:
(1, 1) * (1, 0) -> (1, 0)

We can treat multiplication between ArrayOperators and regular arrays as multiplication
between two ArrayOperators: given an (A, B) ArrayOperator and an L-array, we can fix it as
having input dimension B and infer C = L - B. We use this contraction rule to multiply the two.
"""

# Helper function to perform tensor contraction between x and y
function _contract(x, y, A, B, C)
x_idx = (ntuple(i -> i, A), ntuple(i -> i + A, B))
y_idx = (ntuple(i -> i, B), ntuple(i -> i + B, C))
ret_idx = (ntuple(i -> i, A), ntuple(i -> i + A, C))

return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1)
end

function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::ArrayOperator{<:Any, <:Any, B, C}) where {A, B, C}
return ArrayOperator{A}(_contract(x.data, y.data, A, B, C))
end

function *(x::ArrayOperator{<:Any, <:Any, A, B}, y::AbstractArray{<:Any, L}) where {A, B, L}
C = L - B
return ArrayOperator{A}(_contract(x.data, y, A, B, C))
end

function *(x::AbstractArray{<:Any, L}, y::ArrayOperator{<:Any, <:Any, B, C}) where {L, B, C}
A = L - B
return ArrayOperator{A}(_contract(x, y.data, A, B, C))
end

# Extra required arithmetic for ArrayOperators
Base.:+(x::ArrayOperator, y::ArrayOperator) = x .+ y
Base.:-(x::ArrayOperator, y::ArrayOperator) = x .- y
Base.:-(x::ArrayOperator) = (-).(x)

Base.:*(a::Number, t::ArrayOperator{L, T, N, M}) where {L, T, N, M} =
ArrayOperator{L, promote_type(typeof(a), T), N, M}(a .* t.data)
Base.:*(t::ArrayOperator, a::Number) = a * t


80 changes: 68 additions & 12 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,87 @@
using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays

sparse_getindex(a...) = layout_getindex(a...)
sparse_getindex(d::DualMatrix, a...) = getindex(d, a...)
sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2))
sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1))


# We need a system of indexing that takes two tuples
# of length N and M. We must then return a ArrayOperator whose new input and output dimensions
# are inferred from how many of the arguments in each respective tuple are integers.

# For the purposes of DualArrays.jl, we only need to index the input dimensions
#
# Example:
#
# Consider a DualVector with standard matrix Jacobian (ArrayOperator{2, T, 1, 1}):
#
# d = a + Bϵ
#
# If we index d[1], we have:
#
# d[1] = a[1] + B[1, :]ϵ
#
# Where B[1, :] indexes the input dimension of B
Idx = Union{Int, Colon, AbstractUnitRange}
count_ints(t::Tuple) = count(x -> x isa Int, t)

function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, j::NTuple{M, Idx}) where {T, N, M}
# new value of M is inferred from the ArrayOperator constructor and the order of
# indexing the underlying array.
newN = N - count_ints(i)
ArrayOperator{newN}(sparse_getindex(t.data, i..., j...))
end

function getindex(t::ArrayOperator{<:Any, T, N, M}, i::NTuple{N, Idx}, ::Colon) where {T, N, M}
# new value of M is inferred from the ArrayOperator constructor and the order of
# indexing the underlying array.
newN = N - count_ints(i)
ArrayOperator{newN}(sparse_getindex(t.data, i..., ntuple(_ -> Colon(), M)...))
end
# Integer indexing always returns a scalar.
getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...)
# Indexing with only slices/ranges returns a similar tensor
getindex(t::ArrayOperator{<:Any, <:Any, N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...))

"""
Extract a single Dual number from a DualVector at position y.
Extract a single Dual number from a DualArray.
"""
function Base.getindex(x::DualVector, y::Int)
Dual(x.value[y], sparse_getindex(x.jacobian, y, :))
function getindex(x::DualArray, args::Vararg{Int})
Dual(x.value[args...], x.jacobian[args, :])
end

"""
Extract a sub-DualVector from a DualVector using a range.
Extract a single Dual number from a DualMatrix at position (y, z).
"""
function Base.getindex(x::DualVector, y::UnitRange)
newval = x.value[y]
newjac = sparse_getindex(x.jacobian, y, :)
DualVector(newval, newjac)
function getindex(x::DualMatrix, y::Int, z::Int)
# We collapse the epsilons into a single vector of partials.
Dual(x.value[y, z], vec(sparse_getindex(x.jacobian, y, z, :, :)))
end

"""
Return the size of the DualVector (length of the value vector).
Extract a row slice from a DualMatrix, returning a DualVector.

NOTE: Since a DualVector requires a 2-dimensional Jacobian, and slicing a 4-tensor returns a 3-tensor,
we implicitly 'flatten' the matrix of epsilons. This is done by the reshape.

For example, in a 2x2 case, [ϵ₁₁ ϵ₁₂;ϵ₂₁ ϵ₂₂] becomes [ϵ₁, ϵ₂, ϵ₃, ϵ₄].
"""
Base.size(x::DualVector) = (length(x.value),)
function getindex(x::DualMatrix, y::Int, ::Colon)
n = size(x.value, 2)
DualVector(x.value[y, :], reshape(sparse_getindex(x.jacobian, y, :, :, :), n, :))
end

"""
Return the axes of the DualVector.
Extract a sub-DualVector from a DualVector using a range.
"""
Base.axes(x::DualVector) = axes(x.value)
function getindex(x::DualVector, y::UnitRange)
newval = x.value[y]
newjac = x.jacobian[y, :]
DualVector(newval, newjac)
end

# DualArray array interface
for op in (:size, :axes)
@eval $op(a::DualArray) = $op(a.value)
end
Loading
Loading