From fe4e350ad232f477ac3b193ab472474799bf5732 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Tue, 24 Feb 2026 20:52:11 -0600 Subject: [PATCH 1/2] Update the interface for DI.jl --- Project.toml | 10 +- docs/Project.toml | 2 +- docs/src/backend.md | 20 ++-- docs/src/predefined.md | 2 +- src/ADNLPModels.jl | 4 +- src/di.jl | 209 ++++++++++++++++++++++++++++++++++++++ src/predefined_backend.jl | 16 +++ test/runtests.jl | 38 ++++++- test/utils.jl | 36 ------- 9 files changed, 283 insertions(+), 54 deletions(-) create mode 100644 src/di.jl delete mode 100644 test/utils.jl diff --git a/Project.toml b/Project.toml index 337ebfde..65149d9f 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "0.8.13" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" @@ -19,10 +20,11 @@ Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" ADNLPModelsEnzymeExt = "Enzyme" [compat] -ADTypes = "1.2.1" -ForwardDiff = "1" -NLPModels = "0.21.5" -ReverseDiff = "1" +ADTypes = "1.21.0" +DifferentiationInterface = "0.7.16" +ForwardDiff = "1.3.2" +NLPModels = "0.21.8" +ReverseDiff = "1.16.2" SparseConnectivityTracer = "1" SparseMatrixColorings = "0.4.21" Enzyme = "0.13.129" diff --git a/docs/Project.toml b/docs/Project.toml index a08a60e3..da1daa95 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -19,7 +19,7 @@ Documenter = "1.0" ManualNLPModels = "0.2" NLPModels = "0.21.5" NLPModelsJuMP = "0.13" -OptimizationProblems = "0.8" +OptimizationProblems = "0.9" Percival = "0.7" Plots = "1" SolverBenchmark = "0.6" diff --git a/docs/src/backend.md b/docs/src/backend.md index 3b6393b0..dea5b5a8 100644 --- a/docs/src/backend.md +++ b/docs/src/backend.md @@ -1,21 +1,21 @@ # How to switch backend in ADNLPModels `ADNLPModels` allows the use of different backends to compute the derivatives required within NLPModel API. -It uses `ForwardDiff.jl`, `ReverseDiff.jl`, and more via optional depencies. +It uses `ForwardDiff.jl`, `ReverseDiff.jl`, and more via optional dependencies. The backend information is in a structure [`ADNLPModels.ADModelBackend`](@ref) in the attribute `adbackend` of a `ADNLPModel`, it can also be accessed with [`get_adbackend`](@ref). The functions used internally to define the NLPModel API and the possible backends are defined in the following table: -| Functions | FowardDiff backends | ReverseDiff backends | Enzyme backend | Sparse backend | -| --------- | ------------------- | -------------------- | -------------- | -------------- | -| `gradient` and `gradient!` | `ForwardDiffADGradient`/`GenericForwardDiffADGradient` | `ReverseDiffADGradient`/`GenericReverseDiffADGradient` | `EnzymeReverseADGradient` | -- | -| `jacobian` | `ForwardDiffADJacobian` | `ReverseDiffADJacobian` | `SparseEnzymeADJacobian` | `SparseADJacobian` | -| `hessian` | `ForwardDiffADHessian` | `ReverseDiffADHessian` | `SparseEnzymeADHessian` | `SparseADHessian`/`SparseReverseADHessian` | -| `Jprod` | `ForwardDiffADJprod`/`GenericForwardDiffADJprod` | `ReverseDiffADJprod`/`GenericReverseDiffADJprod` | `EnzymeReverseADJprod` | -- | -| `Jtprod` | `ForwardDiffADJtprod`/`GenericForwardDiffADJtprod` | `ReverseDiffADJtprod`/`GenericReverseDiffADJtprod` | `EnzymeReverseADJtprod` | -- | -| `Hvprod` | `ForwardDiffADHvprod`/`GenericForwardDiffADHvprod` | `ReverseDiffADHvprod`/`GenericReverseDiffADHvprod` | `EnzymeReverseADHvprod` | -- | -| `directional_second_derivative` | `ForwardDiffADGHjvprod` | -- | -- | -- | +| Functions | FowardDiff backends | ReverseDiff backends | Enzyme backend | Sparse backend | DI backend | +| --------- | ------------------- | -------------------- | -------------- | -------------- | -------------- | +| `gradient` and `gradient!` | `ForwardDiffADGradient`/`GenericForwardDiffADGradient` | `ReverseDiffADGradient`/`GenericReverseDiffADGradient` | `EnzymeReverseADGradient` | -- | `DIADGradient` | +| `jacobian` | `ForwardDiffADJacobian` | `ReverseDiffADJacobian` | `SparseEnzymeADJacobian` | `SparseADJacobian` | `DIADJacobian` / `SparseDIJacobian` | +| `hessian` | `ForwardDiffADHessian` | `ReverseDiffADHessian` | `SparseEnzymeADHessian` | `SparseADHessian`/`SparseReverseADHessian` | `DIADHessian` / `SparseDIADHessian` | +| `Jprod` | `ForwardDiffADJprod`/`GenericForwardDiffADJprod` | `ReverseDiffADJprod`/`GenericReverseDiffADJprod` | `EnzymeReverseADJprod` | -- | `DIADJprod` | +| `Jtprod` | `ForwardDiffADJtprod`/`GenericForwardDiffADJtprod` | `ReverseDiffADJtprod`/`GenericReverseDiffADJtprod` | `EnzymeReverseADJtprod` | -- | `DIADJtprod` | +| `Hvprod` | `ForwardDiffADHvprod`/`GenericForwardDiffADHvprod` | `ReverseDiffADHvprod`/`GenericReverseDiffADHvprod` | `EnzymeReverseADHvprod` | -- | `DIADHvprod` | +| `directional_second_derivative` | `ForwardDiffADGHjvprod` | -- | -- | -- | -- | The functions `hess_structure!`, `hess_coord!`, `jac_structure!` and `jac_coord!` defined in `ad.jl` are generic to all the backends for now. diff --git a/docs/src/predefined.md b/docs/src/predefined.md index 5f778ff8..3e8d58f6 100644 --- a/docs/src/predefined.md +++ b/docs/src/predefined.md @@ -40,7 +40,7 @@ ADNLPModels.predefined_backend[:optimized] The backend `:generic` focuses on backend that make no assumptions on the element type, see [Creating an ADNLPModels backend that supports multiple precisions](https://jso.dev/tutorials/generic-adnlpmodels/). -It is possible to use these pre-defined backends using the keyword argument `backend` when instantiating the model. +It is possible to use these pre-defined backends by using the keyword argument `backend` when instantiating the model. ```@example ex1 nlp = ADNLPModel!(f, x0, lvar, uvar, c!, lcon, ucon, backend = :optimized) diff --git a/src/ADNLPModels.jl b/src/ADNLPModels.jl index f89c384e..4d16301d 100644 --- a/src/ADNLPModels.jl +++ b/src/ADNLPModels.jl @@ -4,7 +4,8 @@ module ADNLPModels using LinearAlgebra, SparseArrays # external -using ADTypes: ADTypes, AbstractColoringAlgorithm, AbstractSparsityDetector +import DifferentiationInterface +using ADTypes: ADTypes, AbstractADType, AbstractColoringAlgorithm, AbstractSparsityDetector, AutoForwardDiff, AutoReverseDiff using SparseConnectivityTracer: TracerSparsityDetector using SparseMatrixColorings using ForwardDiff, ReverseDiff @@ -27,6 +28,7 @@ include("sparse_hessian.jl") include("forward.jl") include("reverse.jl") include("enzyme.jl") +include("di.jl") include("predefined_backend.jl") include("nlp.jl") diff --git a/src/di.jl b/src/di.jl new file mode 100644 index 00000000..9c63cd81 --- /dev/null +++ b/src/di.jl @@ -0,0 +1,209 @@ +struct DIADGradient{B, E} <: ADBackend + backend::B + prep::E +end + +function DIADGradient( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + backend = AutoReverseDiff(), + kwargs..., +) + prep = DifferentiationInterface.prepare_gradient(f, backend, x0) + return DIADGradient(backend, prep) +end + +function gradient(b::DIADGradient, f, x) + g = DifferentiationInterface.gradient(f, b.prep, b.backend, x) + return g +end + +function gradient!(b::DIADGradient, g, f, x) + DifferentiationInterface.gradient!(f, g, b.prep, b.backend, x) + return g +end + +struct DIADJprod{B, E} <: ADBackend + backend::B + prep::E +end + +function DIADJprod( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + backend = AutoReverseDiff(), + kwargs..., +) + dy = similar(x0, ncon) + dx = similar(x0, nvar) + prep = DifferentiationInterface.prepare_pushforward(c, dy, backend, x0, dx) + return DIADJprod(backend, prep) +end + +function Jprod!(b::DIADJprod, Jv, c, x, v, ::Val) + DifferentiationInterface.pushforward!(c, Jv, b.prep, b.backend, x, v) + return Jv +end + +struct DIADJtprod{B, E} <: ADBackend + backend::B + prep::E +end + +function DIADJtprod( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + backend = AutoReverseDiff(), + kwargs..., +) + dx = similar(x0, nvar) + dy = similar(x0, ncon) + prep = DifferentiationInterface.prepare_pullback(c, dx, backend, x0, dy) + return DIADJtprod(backend, prep) +end + +function Jtprod!(b::DIADJtprod, Jtv, c, x, v, ::Val) + DifferentiationInterface.pullback!(c, Jtv, b.prep, b.backend, x, v) + return Jtv +end + +struct DIADJacobian{B, E} <: ADBackend + backend::B + prep::E +end + +function DIADJacobian( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + backend = AutoForwardDiff(), + kwargs..., +) + y = similar(x0, ncon) + prep = DifferentiationInterface.prepare_jacobian(c, y, backend, x0) + return DIADJacobian(backend, prep) +end + +function jacobian(b::DIADJacobian, c, x) + J = DifferentiationInterface.jacobian(c, b.prep, b.backend, x) + return J +end + +struct SparseDIADJacobian{B, E} <: ADBackend + backend::B + prep::E +end + +function SparseDIADJacobian( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:direct}( + postprocessing = true, + ), + detector::AbstractSparsityDetector = TracerSparsityDetector(), + backend = AutoForwardDiff(), + kwargs..., +) + y = similar(x0, ncon) + sparse_backend = DifferentiationInterface.AutoSparse(backend, sparsity_detector=detector, coloring_algorithm=coloring_algorithm) + prep = DifferentiationInterface.prepare_jacobian(c, y, sparse_backend, x0) + return SparseDIADJacobian(sparse_backend, prep) +end + +function jacobian(b::SparseDIADJacobian, c, x) + J = DifferentiationInterface.jacobian(c, b.prep, b.backend, x) + return J +end + +struct DIADHvprod{B, E} <: ADBackend + backend::B + prep::E +end + +function DIADHvprod( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + backend = AutoReverseDiff(), + kwargs..., +) + tx = similar(x0) + prep = DifferentiationInterface.prepare_hvp(f, backend, x0, tx) + return DIADHvprod(backend, prep) +end + +function Hvprod!(b::DIADHvprod, Hv, f, x, v, ::Val) + DifferentiationInterface.hvp!(f, Hv, b.prep, b.backend, x, v) + return Hv +end + +struct DIADHessian{B, E} <: ADBackend + backend::B + prep::E +end + +function DIADHessian( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + first_backend = AutoReverseDiff(), + second_backend = AutoForwardDiff(), + kwargs..., +) + backend = DifferentiationInterface.SecondOrder(second_backend, first_backend) + prep = DifferentiationInterface.prepare_hessian(f, backend, x0) + return DIADHessian(backend, prep) +end + +function hessian(b::DIADHessian, f, x) + H = DifferentiationInterface.hessian(f, b.prep, b.backend, x) + return H +end + +struct SparseDIADHessian{B, E} <: ADBackend + backend::B + prep::E +end + +function SparseDIADHessian( + nvar::Integer, + f, + ncon::Integer = 0, + c::Function = (args...) -> []; + x0::AbstractVector = rand(nvar), + coloring_algorithm::AbstractColoringAlgorithm = GreedyColoringAlgorithm{:substitution}( + postprocessing = true, + ), + detector::AbstractSparsityDetector = TracerSparsityDetector(), + first_backend = AutoReverseDiff(), + second_backend = AutoForwardDiff(), + kwargs..., +) + backend = DifferentiationInterface.SecondOrder(second_backend, first_backend) + sparse_backend = DifferentiationInterface.AutoSparse(backend, sparsity_detector=detector, coloring_algorithm=coloring_algorithm) + prep = DifferentiationInterface.prepare_hessian(f, backend, x0) + return SparseDIADHessian(sparse_backend, prep) +end + +function hessian(b::SparseDIADHessian, f, x) + H = DifferentiationInterface.hessian(f, b.prep, b.backend, x) + return H +end diff --git a/src/predefined_backend.jl b/src/predefined_backend.jl index b55f1cac..4b1ff3f1 100644 --- a/src/predefined_backend.jl +++ b/src/predefined_backend.jl @@ -58,11 +58,27 @@ enzyme_backend = Dict( :hessian_residual_backend => SparseEnzymeADHessian, ) +di_backend = Dict( + :gradient_backend => DIADGradient, + :jprod_backend => DIADJprod, + :jtprod_backend => DIADJtprod, + :hprod_backend => DIADHvprod, + :jacobian_backend => DIADJacobian, # SparseDIADJacobian, + :hessian_backend => DIADHessian, # SparseDIADHessian, + :ghjvprod_backend => EmptyADbackend, + :jprod_residual_backend => DIADJprod, + :jtprod_residual_backend => DIADJtprod, + :hprod_residual_backend => DIADHvprod, + :jacobian_residual_backend => DIADJacobian, # SparseDIADJacobian, + :hessian_residual_backend => DIADHessian, # SparseDIADHessian, +) + predefined_backend = Dict( :default => default_backend, :optimized => optimized_backend, :generic => generic_backend, :enzyme => enzyme_backend, + :di => di_backend, ) """ diff --git a/test/runtests.jl b/test/runtests.jl index e9e3c7ac..827e509b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -96,7 +96,43 @@ for problem in NLPModelsTest.nls_problems include("nls/problems/$(lowercase(problem)).jl") end -include("utils.jl") +ReverseDiffAD(nvar, f) = ADNLPModels.ADModelBackend( + nvar, + f, + gradient_backend = ADNLPModels.ReverseDiffADGradient, + hprod_backend = ADNLPModels.ReverseDiffADHvprod, + jprod_backend = ADNLPModels.ReverseDiffADJprod, + jtprod_backend = ADNLPModels.ReverseDiffADJtprod, + jacobian_backend = ADNLPModels.ReverseDiffADJacobian, + hessian_backend = ADNLPModels.ReverseDiffADHessian, +) + +function test_getter_setter(nlp) + @test get_adbackend(nlp) == nlp.adbackend + if typeof(nlp) <: ADNLPModel + set_adbackend!(nlp, ReverseDiffAD(nlp.meta.nvar, nlp.f)) + elseif typeof(nlp) <: ADNLSModel + function F(x; nequ = nlp.nls_meta.nequ) + Fx = similar(x, nequ) + nlp.F!(Fx, x) + return Fx + end + set_adbackend!(nlp, ReverseDiffAD(nlp.meta.nvar, x -> sum(F(x) .^ 2))) + end + @test typeof(get_adbackend(nlp).gradient_backend) <: ADNLPModels.ReverseDiffADGradient + @test typeof(get_adbackend(nlp).hprod_backend) <: ADNLPModels.ReverseDiffADHvprod + @test typeof(get_adbackend(nlp).hessian_backend) <: ADNLPModels.ReverseDiffADHessian + set_adbackend!( + nlp, + gradient_backend = ADNLPModels.ForwardDiffADGradient, + jtprod_backend = ADNLPModels.GenericForwardDiffADJtprod(), + ) + @test typeof(get_adbackend(nlp).gradient_backend) <: ADNLPModels.ForwardDiffADGradient + @test typeof(get_adbackend(nlp).hprod_backend) <: ADNLPModels.ReverseDiffADHvprod + @test typeof(get_adbackend(nlp).jtprod_backend) <: ADNLPModels.GenericForwardDiffADJtprod + @test typeof(get_adbackend(nlp).hessian_backend) <: ADNLPModels.ReverseDiffADHessian +end + include("nlp/basic.jl") include("nlp/nlpmodelstest.jl") include("nls/basic.jl") diff --git a/test/utils.jl b/test/utils.jl deleted file mode 100644 index 7246354b..00000000 --- a/test/utils.jl +++ /dev/null @@ -1,36 +0,0 @@ -ReverseDiffAD(nvar, f) = ADNLPModels.ADModelBackend( - nvar, - f, - gradient_backend = ADNLPModels.ReverseDiffADGradient, - hprod_backend = ADNLPModels.ReverseDiffADHvprod, - jprod_backend = ADNLPModels.ReverseDiffADJprod, - jtprod_backend = ADNLPModels.ReverseDiffADJtprod, - jacobian_backend = ADNLPModels.ReverseDiffADJacobian, - hessian_backend = ADNLPModels.ReverseDiffADHessian, -) - -function test_getter_setter(nlp) - @test get_adbackend(nlp) == nlp.adbackend - if typeof(nlp) <: ADNLPModel - set_adbackend!(nlp, ReverseDiffAD(nlp.meta.nvar, nlp.f)) - elseif typeof(nlp) <: ADNLSModel - function F(x; nequ = nlp.nls_meta.nequ) - Fx = similar(x, nequ) - nlp.F!(Fx, x) - return Fx - end - set_adbackend!(nlp, ReverseDiffAD(nlp.meta.nvar, x -> sum(F(x) .^ 2))) - end - @test typeof(get_adbackend(nlp).gradient_backend) <: ADNLPModels.ReverseDiffADGradient - @test typeof(get_adbackend(nlp).hprod_backend) <: ADNLPModels.ReverseDiffADHvprod - @test typeof(get_adbackend(nlp).hessian_backend) <: ADNLPModels.ReverseDiffADHessian - set_adbackend!( - nlp, - gradient_backend = ADNLPModels.ForwardDiffADGradient, - jtprod_backend = ADNLPModels.GenericForwardDiffADJtprod(), - ) - @test typeof(get_adbackend(nlp).gradient_backend) <: ADNLPModels.ForwardDiffADGradient - @test typeof(get_adbackend(nlp).hprod_backend) <: ADNLPModels.ReverseDiffADHvprod - @test typeof(get_adbackend(nlp).jtprod_backend) <: ADNLPModels.GenericForwardDiffADJtprod - @test typeof(get_adbackend(nlp).hessian_backend) <: ADNLPModels.ReverseDiffADHessian -end From 08f82121f63eb75ffc8b37e5b3f9b2e818acb7cf Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Tue, 24 Feb 2026 22:29:56 -0600 Subject: [PATCH 2/2] wip --- src/di.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/di.jl b/src/di.jl index 9c63cd81..45cf6b50 100644 --- a/src/di.jl +++ b/src/di.jl @@ -40,14 +40,15 @@ function DIADJprod( backend = AutoReverseDiff(), kwargs..., ) + T = eltype(x0) dy = similar(x0, ncon) - dx = similar(x0, nvar) + dx = ntuple(_ -> zero(T), nvar) prep = DifferentiationInterface.prepare_pushforward(c, dy, backend, x0, dx) return DIADJprod(backend, prep) end function Jprod!(b::DIADJprod, Jv, c, x, v, ::Val) - DifferentiationInterface.pushforward!(c, Jv, b.prep, b.backend, x, v) + DifferentiationInterface.pushforward!(c, Jv, b.prep, b.backend, x, Tuple(v)) return Jv end @@ -65,14 +66,15 @@ function DIADJtprod( backend = AutoReverseDiff(), kwargs..., ) + T = eltype(x0) dx = similar(x0, nvar) - dy = similar(x0, ncon) + dy = ntuple(_ -> zero(T), ncon) prep = DifferentiationInterface.prepare_pullback(c, dx, backend, x0, dy) return DIADJtprod(backend, prep) end function Jtprod!(b::DIADJtprod, Jtv, c, x, v, ::Val) - DifferentiationInterface.pullback!(c, Jtv, b.prep, b.backend, x, v) + DifferentiationInterface.pullback!(c, Jtv, b.prep, b.backend, x, Tuple(v)) return Jtv end @@ -143,13 +145,14 @@ function DIADHvprod( backend = AutoReverseDiff(), kwargs..., ) - tx = similar(x0) + T = eltype(x0) + tx = ntuple(_ -> zero(T), nvar) prep = DifferentiationInterface.prepare_hvp(f, backend, x0, tx) return DIADHvprod(backend, prep) end function Hvprod!(b::DIADHvprod, Hv, f, x, v, ::Val) - DifferentiationInterface.hvp!(f, Hv, b.prep, b.backend, x, v) + DifferentiationInterface.hvp!(f, Hv, b.prep, b.backend, x, Tuple(v)) return Hv end