diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 33d28bb9..6bf42f74 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -23,16 +23,3 @@ steps: julia --color=yes --project=test -e 'using Pkg; Pkg.add("Enzyme"); Pkg.develop(path="."); Pkg.instantiate()' julia --color=yes --project=test -e 'include("test/enzyme.jl")' timeout_in_minutes: 30 - - - label: "CPUs -- Zygote.jl" - plugins: - - JuliaCI/julia#v1: - version: "1.10" - agents: - queue: "juliaecosystem" - os: "linux" - arch: "x86_64" - command: | - julia --color=yes --project=test -e 'using Pkg; Pkg.add("Zygote"); Pkg.develop(path="."); Pkg.instantiate()' - julia --color=yes --project=test -e 'include("test/zygote.jl")' - timeout_in_minutes: 30 diff --git a/Project.toml b/Project.toml index 19fa264c..337ebfde 100644 --- a/Project.toml +++ b/Project.toml @@ -7,18 +7,25 @@ ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" -Requires = "ae029012-a4dd-5104-9daa-d747884805df" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" +[weakdeps] +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" + +[extensions] +ADNLPModelsEnzymeExt = "Enzyme" + [compat] ADTypes = "1.2.1" -ForwardDiff = "0.9, 0.10, 1" +ForwardDiff = "1" NLPModels = "0.21.5" -Requires = "1" ReverseDiff = "1" -SparseConnectivityTracer = "1.0" +SparseConnectivityTracer = "1" SparseMatrixColorings = "0.4.21" +Enzyme = "0.13.129" +LinearAlgebra = "1.10" +SparseArrays = "1.10" julia = "1.10" diff --git a/docs/Project.toml b/docs/Project.toml index 07fa2fb6..a08a60e3 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -12,12 +12,11 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" SolverBenchmark = "581a75fa-a23a-52d0-a590-d6201de2218a" SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] DataFrames = "1" Documenter = "1.0" -ManualNLPModels = "0.1" +ManualNLPModels = "0.2" NLPModels = "0.21.5" NLPModelsJuMP = "0.13" OptimizationProblems = "0.8" @@ -26,4 +25,3 @@ Plots = "1" SolverBenchmark = "0.6" SparseConnectivityTracer = "1.0" SparseMatrixColorings = "0.4.21" -Zygote = "0.6.62" diff --git a/docs/src/backend.md b/docs/src/backend.md index 35637a64..3b6393b0 100644 --- a/docs/src/backend.md +++ b/docs/src/backend.md @@ -7,15 +7,15 @@ The backend information is in a structure [`ADNLPModels.ADModelBackend`](@ref) i The functions used internally to define the NLPModel API and the possible backends are defined in the following table: -| Functions | FowardDiff backends | ReverseDiff backends | Zygote backends | Enzyme backend | Sparse backend | -| --------- | ------------------- | -------------------- | --------------- | -------------- | -------------- | -| `gradient` and `gradient!` | `ForwardDiffADGradient`/`GenericForwardDiffADGradient` | `ReverseDiffADGradient`/`GenericReverseDiffADGradient` | `ZygoteADGradient` | `EnzymeReverseADGradient` | -- | -| `jacobian` | `ForwardDiffADJacobian` | `ReverseDiffADJacobian` | `ZygoteADJacobian` | `SparseEnzymeADJacobian` | `SparseADJacobian` | -| `hessian` | `ForwardDiffADHessian` | `ReverseDiffADHessian` | `ZygoteADHessian` | `SparseEnzymeADHessian` | `SparseADHessian`/`SparseReverseADHessian` | -| `Jprod` | `ForwardDiffADJprod`/`GenericForwardDiffADJprod` | `ReverseDiffADJprod`/`GenericReverseDiffADJprod` | `ZygoteADJprod` | `EnzymeReverseADJprod` | -- | -| `Jtprod` | `ForwardDiffADJtprod`/`GenericForwardDiffADJtprod` | `ReverseDiffADJtprod`/`GenericReverseDiffADJtprod` | `ZygoteADJtprod` | `EnzymeReverseADJtprod` | -- | -| `Hvprod` | `ForwardDiffADHvprod`/`GenericForwardDiffADHvprod` | `ReverseDiffADHvprod`/`GenericReverseDiffADHvprod` | -- | `EnzymeReverseADHvprod` | -- | -| `directional_second_derivative` | `ForwardDiffADGHjvprod` | -- | -- | -- | -- | +| Functions | FowardDiff backends | ReverseDiff backends | Enzyme backend | Sparse backend | +| --------- | ------------------- | -------------------- | -------------- | -------------- | +| `gradient` and `gradient!` | `ForwardDiffADGradient`/`GenericForwardDiffADGradient` | `ReverseDiffADGradient`/`GenericReverseDiffADGradient` | `EnzymeReverseADGradient` | -- | +| `jacobian` | `ForwardDiffADJacobian` | `ReverseDiffADJacobian` | `SparseEnzymeADJacobian` | `SparseADJacobian` | +| `hessian` | `ForwardDiffADHessian` | `ReverseDiffADHessian` | `SparseEnzymeADHessian` | `SparseADHessian`/`SparseReverseADHessian` | +| `Jprod` | `ForwardDiffADJprod`/`GenericForwardDiffADJprod` | `ReverseDiffADJprod`/`GenericReverseDiffADJprod` | `EnzymeReverseADJprod` | -- | +| `Jtprod` | `ForwardDiffADJtprod`/`GenericForwardDiffADJtprod` | `ReverseDiffADJtprod`/`GenericReverseDiffADJtprod` | `EnzymeReverseADJtprod` | -- | +| `Hvprod` | `ForwardDiffADHvprod`/`GenericForwardDiffADHvprod` | `ReverseDiffADHvprod`/`GenericReverseDiffADHvprod` | `EnzymeReverseADHvprod` | -- | +| `directional_second_derivative` | `ForwardDiffADGHjvprod` | -- | -- | -- | The functions `hess_structure!`, `hess_coord!`, `jac_structure!` and `jac_coord!` defined in `ad.jl` are generic to all the backends for now. diff --git a/docs/src/index.md b/docs/src/index.md index d89db6c0..fb164d10 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -24,8 +24,8 @@ ADNLPModels.jl functionalities are extended by other packages that are not autom In other words, you sometimes need to load the desired package separately to access some functionalities. ```julia -using ADNLPModels # load only the default functionalities -using Zygote # load the Zygote backends +using ADNLPModels # load only the default functionalities +using Enzyme # load the Enzyme backends ``` Versions compatibility for the extensions are available in the file `test/Project.toml`. diff --git a/docs/src/predefined.md b/docs/src/predefined.md index 14e49cf2..5f778ff8 100644 --- a/docs/src/predefined.md +++ b/docs/src/predefined.md @@ -56,5 +56,3 @@ get_adbackend(nlp) !!! danger The interface for Enzyme.jl is still under development. - -The backend `:zygote` focuses on backend based on [Zygote.jl](https://github.com/FluxML/Zygote.jl). diff --git a/ext/ADNLPModelsEnzymeExt.jl b/ext/ADNLPModelsEnzymeExt.jl new file mode 100644 index 00000000..97537260 --- /dev/null +++ b/ext/ADNLPModelsEnzymeExt.jl @@ -0,0 +1,436 @@ +module ADNLPModelsEnzymeExt + +using SparseArrays +using ADNLPModels, NLPModels +using SparseMatrixColorings +using Enzyme + +function _gradient!(dx, f, x) + Enzyme.make_zero!(dx) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Reverse), + f, + Enzyme.Active, + Enzyme.Duplicated(x, dx), + ) + return nothing +end + +function _hvp!(res, f, x, v) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Forward), + _gradient!, + res, + Enzyme.Const(f), + Enzyme.Duplicated(x, v), + ) + return nothing +end + +function _gradient!(dx, ℓ, x, y, obj_weight, cx) + Enzyme.make_zero!(dx) + dcx = Enzyme.make_zero(cx) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Reverse), + ℓ, + Enzyme.Active, + Enzyme.Duplicated(x, dx), + Enzyme.Const(y), + Enzyme.Const(obj_weight), + Enzyme.Duplicated(cx, dcx), + ) + return nothing +end + +function _hvp!(res, ℓ, x, v, y, obj_weight, cx) + dcx = Enzyme.make_zero(cx) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Forward), + _gradient!, + res, + Enzyme.Const(ℓ), + Enzyme.Duplicated(x, v), + Enzyme.Const(y), + Enzyme.Const(obj_weight), + Enzyme.Duplicated(cx, dcx), + ) + return nothing +end + +function ADNLPModels.gradient(::ADNLPModels.EnzymeReverseADGradient, f, x) + g = similar(x) + Enzyme.autodiff( + Enzyme.set_runtime_activity(Enzyme.Reverse), + Enzyme.Const(f), + Enzyme.Active, + Enzyme.Duplicated(x, g), + ) + return g +end + +function ADNLPModels.gradient!(::ADNLPModels.EnzymeReverseADGradient, g, f, x) + Enzyme.make_zero!(g) + Enzyme.autodiff(Enzyme.Reverse, Enzyme.Const(f), Enzyme.Active, Enzyme.Duplicated(x, g)) + return g +end + +ADNLPModels.jacobian(::ADNLPModels.EnzymeReverseADJacobian, f, x) = Enzyme.jacobian(Enzyme.Reverse, f, x) + +function ADNLPModels.hessian(b::ADNLPModels.EnzymeReverseADHessian, f, x) + T = eltype(x) + n = length(x) + hess = zeros(T, n, n) + fill!(b.seed, zero(T)) + for i = 1:n + b.seed[i] = one(T) + grad = Enzyme.make_zero(x) + _hvp!(Enzyme.DuplicatedNoNeed(grad, b.Hv), f, x, b.seed) + view(hess, :, i) .= b.Hv + b.seed[i] = zero(T) + end + return hess +end + +function ADNLPModels.Jprod!(b::ADNLPModels.EnzymeReverseADJprod, Jv, c!, x, v, ::Val) + copyto!(b.xbuf, x) + copyto!(b.vbuf, v) + Enzyme.autodiff( + Enzyme.Forward, + Enzyme.Const(c!), + Enzyme.Duplicated(b.cx, b.jvbuf), + Enzyme.Duplicated(b.xbuf, b.vbuf), + ) + copyto!(Jv, b.jvbuf) + return Jv +end + +# Wrapper that calls c!(y, x) but returns nothing. +# Enzyme reverse mode requires functions to return nothing (not their output array), +# otherwise it errors with "Duplicated Returns not yet handled". +function _void_c!(c!, y, x) + c!(y, x) + return nothing +end + +function ADNLPModels.Jtprod!(b::ADNLPModels.EnzymeReverseADJtprod, Jtv, c!, x, v, ::Val) + copyto!(b.xbuf, x) + copyto!(b.vbuf, v) + Enzyme.make_zero!(b.jtvbuf) + Enzyme.autodiff( + Enzyme.Reverse, + Enzyme.Const(_void_c!), + Enzyme.Const(c!), + Enzyme.Duplicated(b.cx, b.vbuf), + Enzyme.Duplicated(b.xbuf, b.jtvbuf), + ) + copyto!(Jtv, b.jtvbuf) + return Jtv +end + +function ADNLPModels.Hvprod!( + b::ADNLPModels.EnzymeReverseADHvprod, + Hv, + x, + v, + ℓ_unused, + ::Val{:lag}, + y, + obj_weight::Real = one(eltype(x)), +) + copyto!(b.xbuf, x) + copyto!(b.vbuf, v) + _hvp!(Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), b.ℓ, b.xbuf, b.vbuf, y, obj_weight, b.cx) + copyto!(Hv, b.hvbuf) + return Hv +end + +function ADNLPModels.Hvprod!( + b::ADNLPModels.EnzymeReverseADHvprod, + Hv, + x, + v, + f_unused, + ::Val{:obj}, + obj_weight::Real = one(eltype(x)), +) + copyto!(b.xbuf, x) + copyto!(b.vbuf, v) + _hvp!(Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), b.f, b.xbuf, b.vbuf) + @. Hv = obj_weight * b.hvbuf + return Hv +end + +# jth_hprod: Hessian-vector product for the j-th constraint. +# Uses the Lagrangian with y = e_j (unit vector) and obj_weight = 0, +# avoiding the closure x -> c(x)[j] that Enzyme can't handle. +function NLPModels.hprod!( + b::ADNLPModels.EnzymeReverseADHvprod, + nlp::ADNLPModels.ADModel, + x::AbstractVector, + v::AbstractVector, + j::Integer, + Hv::AbstractVector, +) + copyto!(b.xbuf, x) + copyto!(b.vbuf, v) + b.cx .= 0 + # Build y = e_{j-nlin} (unit vector for nonlinear constraint index) + b.ybuf .= 0 + k = 0 + for i in nlp.meta.nln + k += 1 + if i == j + b.ybuf[k] = one(eltype(x)) + break + end + end + _hvp!( + Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), + b.ℓ, + b.xbuf, + b.vbuf, + b.ybuf, + zero(eltype(x)), + b.cx, + ) + copyto!(Hv, b.hvbuf) + return Hv +end + +# hprod_residual: Hessian-vector product for the i-th residual. +# Uses forward-over-reverse on F_i(x) = F(x)[i]. +function NLPModels.hprod_residual!( + b::ADNLPModels.EnzymeReverseADHvprod, + nls::ADNLPModels.AbstractADNLSModel, + x::AbstractVector, + v::AbstractVector, + i::Integer, + Hv::AbstractVector, +) + F = get_F(nls) # out-of-place version + Fi(x) = F(x)[i] + copyto!(b.xbuf, x) + copyto!(b.vbuf, v) + _hvp!(Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), Fi, b.xbuf, b.vbuf) + copyto!(Hv, b.hvbuf) + return Hv +end + +# Sparse Jacobian +function ADNLPModels.get_nln_nnzj(b::ADNLPModels.SparseEnzymeADJacobian, nvar, ncon) + length(b.rowval) +end + +function NLPModels.jac_structure!( + b::ADNLPModels.SparseEnzymeADJacobian, + nlp::ADNLPModels.ADModel, + rows::AbstractVector{<:Integer}, + cols::AbstractVector{<:Integer}, +) + rows .= b.rowval + for i = 1:(nlp.meta.nvar) + for j = b.colptr[i]:(b.colptr[i + 1] - 1) + cols[j] = i + end + end + return rows, cols +end + +function sparse_jac_coord!( + c!::Function, + b::ADNLPModels.SparseEnzymeADJacobian, + x::AbstractVector, + vals::AbstractVector, +) + # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression + A = SparseMatrixCSC(b.ncon, b.nvar, b.colptr, b.rowval, b.nzval) + + # Enzyme.Duplicated requires primal and shadow to have the same type. + # Copy x into a pre-allocated buffer to ensure type match with b.v. + copyto!(b.xbuf, x) + + groups = column_groups(b.result_coloring) + for (icol, cols) in enumerate(groups) + # Update the seed + b.v .= 0 + for col in cols + b.v[col] = 1 + end + + # b.compressed_jacobian is just a vector Jv here + # We don't use the vector mode + Enzyme.autodiff( + Enzyme.Forward, + Enzyme.Const(c!), + Enzyme.Duplicated(b.cx, b.compressed_jacobian), + Enzyme.Duplicated(b.xbuf, b.v), + ) + + # Update the columns of the Jacobian that have the color `icol` + decompress_single_color!(A, b.compressed_jacobian, icol, b.result_coloring) + end + vals .= b.nzval + return vals +end + +function NLPModels.jac_coord!( + b::ADNLPModels.SparseEnzymeADJacobian, + nlp::ADNLPModels.ADModel, + x::AbstractVector, + vals::AbstractVector, +) + sparse_jac_coord!(nlp.c!, b, x, vals) + return vals +end + +function NLPModels.jac_structure_residual!( + b::ADNLPModels.SparseEnzymeADJacobian, + nls::ADNLPModels.AbstractADNLSModel, + rows::AbstractVector{<:Integer}, + cols::AbstractVector{<:Integer}, +) + rows .= b.rowval + for i = 1:(nls.meta.nvar) + for j = b.colptr[i]:(b.colptr[i + 1] - 1) + cols[j] = i + end + end + return rows, cols +end + +function NLPModels.jac_coord_residual!( + b::ADNLPModels.SparseEnzymeADJacobian, + nls::ADNLPModels.AbstractADNLSModel, + x::AbstractVector, + vals::AbstractVector, +) + sparse_jac_coord!(nls.F!, b, x, vals) + return vals +end + +# Sparse Hessian +function ADNLPModels.get_nln_nnzh(b::ADNLPModels.SparseEnzymeADHessian, nvar) + return length(b.rowval) +end + +function NLPModels.hess_structure!( + b::ADNLPModels.SparseEnzymeADHessian, + nlp::ADNLPModels.ADModel, + rows::AbstractVector{<:Integer}, + cols::AbstractVector{<:Integer}, +) + rows .= b.rowval + for i = 1:(nlp.meta.nvar) + for j = b.colptr[i]:(b.colptr[i + 1] - 1) + cols[j] = i + end + end + return rows, cols +end + +function sparse_hess_coord!( + b::ADNLPModels.SparseEnzymeADHessian, + x::AbstractVector, + obj_weight, + y::AbstractVector, + vals::AbstractVector, +) + # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression + A = SparseMatrixCSC(b.nvar, b.nvar, b.colptr, b.rowval, b.nzval) + + # Enzyme.Duplicated requires primal and shadow to have the same type. + # Copy x into a pre-allocated buffer to ensure type match with b.v. + copyto!(b.xbuf, x) + + groups = column_groups(b.result_coloring) + for (icol, cols) in enumerate(groups) + # Update the seed + b.v .= 0 + for col in cols + b.v[col] = 1 + end + + _hvp!( + Enzyme.DuplicatedNoNeed(b.grad, b.compressed_hessian_icol), + b.ℓ, + b.xbuf, + b.v, + y, + obj_weight, + b.cx, + ) + + if b.coloring_mode == :direct + # Update the coefficients of the lower triangular part of the Hessian that are related to the color `icol` + decompress_single_color!(A, b.compressed_hessian_icol, icol, b.result_coloring, :L) + end + if b.coloring_mode == :substitution + view(b.compressed_hessian, :, icol) .= b.compressed_hessian_icol + end + end + if b.coloring_mode == :substitution + decompress!(A, b.compressed_hessian, b.result_coloring, :L) + end + vals .= b.nzval + return vals +end + +function NLPModels.hess_coord!( + b::ADNLPModels.SparseEnzymeADHessian, + nlp::ADNLPModels.ADModel, + x::AbstractVector, + y::AbstractVector, + obj_weight::Real, + vals::AbstractVector, +) + sparse_hess_coord!(b, x, obj_weight, y, vals) +end + +# Could be optimized! +function NLPModels.hess_coord!( + b::ADNLPModels.SparseEnzymeADHessian, + nlp::ADNLPModels.ADModel, + x::AbstractVector, + obj_weight::Real, + vals::AbstractVector, +) + b.y .= 0 + sparse_hess_coord!(b, x, obj_weight, b.y, vals) +end + +function NLPModels.hess_coord!( + b::ADNLPModels.SparseEnzymeADHessian, + nlp::ADNLPModels.ADModel, + x::AbstractVector, + j::Integer, + vals::AbstractVector, +) + for (w, k) in enumerate(nlp.meta.nln) + b.y[w] = k == j ? 1 : 0 + end + obj_weight = zero(eltype(x)) + sparse_hess_coord!(b, x, obj_weight, b.y, vals) + return vals +end + +function NLPModels.hess_structure_residual!( + b::ADNLPModels.SparseEnzymeADHessian, + nls::ADNLPModels.AbstractADNLSModel, + rows::AbstractVector{<:Integer}, + cols::AbstractVector{<:Integer}, +) + return hess_structure!(b, nls, rows, cols) +end + +function NLPModels.hess_coord_residual!( + b::ADNLPModels.SparseEnzymeADHessian, + nls::ADNLPModels.AbstractADNLSModel, + x::AbstractVector, + v::AbstractVector, + vals::AbstractVector, +) + obj_weight = zero(eltype(x)) + sparse_hess_coord!(b, x, obj_weight, v, vals) +end + +end diff --git a/src/ADNLPModels.jl b/src/ADNLPModels.jl index 9874adf8..f89c384e 100644 --- a/src/ADNLPModels.jl +++ b/src/ADNLPModels.jl @@ -11,7 +11,6 @@ using ForwardDiff, ReverseDiff # JSO using NLPModels -using Requires abstract type AbstractADNLPModel{T, S} <: AbstractNLPModel{T, S} end abstract type AbstractADNLSModel{T, S} <: AbstractNLSModel{T, S} end @@ -28,7 +27,6 @@ include("sparse_hessian.jl") include("forward.jl") include("reverse.jl") include("enzyme.jl") -include("zygote.jl") include("predefined_backend.jl") include("nlp.jl") diff --git a/src/enzyme.jl b/src/enzyme.jl index de603868..687315ce 100644 --- a/src/enzyme.jl +++ b/src/enzyme.jl @@ -326,435 +326,3 @@ function SparseEnzymeADHessian( xbuf, ) end - -@init begin - @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" begin - function _gradient!(dx, f, x) - Enzyme.make_zero!(dx) - Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Reverse), - f, - Enzyme.Active, - Enzyme.Duplicated(x, dx), - ) - return nothing - end - - function _hvp!(res, f, x, v) - Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Forward), - _gradient!, - res, - Enzyme.Const(f), - Enzyme.Duplicated(x, v), - ) - return nothing - end - - function _gradient!(dx, ℓ, x, y, obj_weight, cx) - Enzyme.make_zero!(dx) - dcx = Enzyme.make_zero(cx) - Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Reverse), - ℓ, - Enzyme.Active, - Enzyme.Duplicated(x, dx), - Enzyme.Const(y), - Enzyme.Const(obj_weight), - Enzyme.Duplicated(cx, dcx), - ) - return nothing - end - - function _hvp!(res, ℓ, x, v, y, obj_weight, cx) - dcx = Enzyme.make_zero(cx) - Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Forward), - _gradient!, - res, - Enzyme.Const(ℓ), - Enzyme.Duplicated(x, v), - Enzyme.Const(y), - Enzyme.Const(obj_weight), - Enzyme.Duplicated(cx, dcx), - ) - return nothing - end - - function ADNLPModels.gradient(::EnzymeReverseADGradient, f, x) - g = similar(x) - Enzyme.autodiff( - Enzyme.set_runtime_activity(Enzyme.Reverse), - Enzyme.Const(f), - Enzyme.Active, - Enzyme.Duplicated(x, g), - ) - return g - end - - function ADNLPModels.gradient!(::EnzymeReverseADGradient, g, f, x) - Enzyme.make_zero!(g) - Enzyme.autodiff(Enzyme.Reverse, Enzyme.Const(f), Enzyme.Active, Enzyme.Duplicated(x, g)) - return g - end - - jacobian(::EnzymeReverseADJacobian, f, x) = Enzyme.jacobian(Enzyme.Reverse, f, x) - - function hessian(b::EnzymeReverseADHessian, f, x) - T = eltype(x) - n = length(x) - hess = zeros(T, n, n) - fill!(b.seed, zero(T)) - for i = 1:n - b.seed[i] = one(T) - grad = Enzyme.make_zero(x) - _hvp!(Enzyme.DuplicatedNoNeed(grad, b.Hv), f, x, b.seed) - view(hess, :, i) .= b.Hv - b.seed[i] = zero(T) - end - return hess - end - - function Jprod!(b::EnzymeReverseADJprod, Jv, c!, x, v, ::Val) - copyto!(b.xbuf, x) - copyto!(b.vbuf, v) - Enzyme.autodiff( - Enzyme.Forward, - Enzyme.Const(c!), - Enzyme.Duplicated(b.cx, b.jvbuf), - Enzyme.Duplicated(b.xbuf, b.vbuf), - ) - copyto!(Jv, b.jvbuf) - return Jv - end - - # Wrapper that calls c!(y, x) but returns nothing. - # Enzyme reverse mode requires functions to return nothing (not their output array), - # otherwise it errors with "Duplicated Returns not yet handled". - function _void_c!(c!, y, x) - c!(y, x) - return nothing - end - - function Jtprod!(b::EnzymeReverseADJtprod, Jtv, c!, x, v, ::Val) - copyto!(b.xbuf, x) - copyto!(b.vbuf, v) - Enzyme.make_zero!(b.jtvbuf) - Enzyme.autodiff( - Enzyme.Reverse, - Enzyme.Const(_void_c!), - Enzyme.Const(c!), - Enzyme.Duplicated(b.cx, b.vbuf), - Enzyme.Duplicated(b.xbuf, b.jtvbuf), - ) - copyto!(Jtv, b.jtvbuf) - return Jtv - end - - function Hvprod!( - b::EnzymeReverseADHvprod, - Hv, - x, - v, - ℓ_unused, - ::Val{:lag}, - y, - obj_weight::Real = one(eltype(x)), - ) - copyto!(b.xbuf, x) - copyto!(b.vbuf, v) - _hvp!(Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), b.ℓ, b.xbuf, b.vbuf, y, obj_weight, b.cx) - copyto!(Hv, b.hvbuf) - return Hv - end - - function Hvprod!( - b::EnzymeReverseADHvprod, - Hv, - x, - v, - f_unused, - ::Val{:obj}, - obj_weight::Real = one(eltype(x)), - ) - copyto!(b.xbuf, x) - copyto!(b.vbuf, v) - _hvp!(Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), b.f, b.xbuf, b.vbuf) - @. Hv = obj_weight * b.hvbuf - return Hv - end - - # jth_hprod: Hessian-vector product for the j-th constraint. - # Uses the Lagrangian with y = e_j (unit vector) and obj_weight = 0, - # avoiding the closure x -> c(x)[j] that Enzyme can't handle. - function NLPModels.hprod!( - b::EnzymeReverseADHvprod, - nlp::ADModel, - x::AbstractVector, - v::AbstractVector, - j::Integer, - Hv::AbstractVector, - ) - copyto!(b.xbuf, x) - copyto!(b.vbuf, v) - b.cx .= 0 - # Build y = e_{j-nlin} (unit vector for nonlinear constraint index) - b.ybuf .= 0 - k = 0 - for i in nlp.meta.nln - k += 1 - if i == j - b.ybuf[k] = one(eltype(x)) - break - end - end - _hvp!( - Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), - b.ℓ, - b.xbuf, - b.vbuf, - b.ybuf, - zero(eltype(x)), - b.cx, - ) - copyto!(Hv, b.hvbuf) - return Hv - end - - # hprod_residual: Hessian-vector product for the i-th residual. - # Uses forward-over-reverse on F_i(x) = F(x)[i]. - function NLPModels.hprod_residual!( - b::EnzymeReverseADHvprod, - nls::AbstractADNLSModel, - x::AbstractVector, - v::AbstractVector, - i::Integer, - Hv::AbstractVector, - ) - F = get_F(nls) # out-of-place version - Fi(x) = F(x)[i] - copyto!(b.xbuf, x) - copyto!(b.vbuf, v) - _hvp!(Enzyme.DuplicatedNoNeed(b.grad, b.hvbuf), Fi, b.xbuf, b.vbuf) - copyto!(Hv, b.hvbuf) - return Hv - end - - # Sparse Jacobian - function get_nln_nnzj(b::SparseEnzymeADJacobian, nvar, ncon) - length(b.rowval) - end - - function NLPModels.jac_structure!( - b::SparseEnzymeADJacobian, - nlp::ADModel, - rows::AbstractVector{<:Integer}, - cols::AbstractVector{<:Integer}, - ) - rows .= b.rowval - for i = 1:(nlp.meta.nvar) - for j = b.colptr[i]:(b.colptr[i + 1] - 1) - cols[j] = i - end - end - return rows, cols - end - - function sparse_jac_coord!( - c!::Function, - b::SparseEnzymeADJacobian, - x::AbstractVector, - vals::AbstractVector, - ) - # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression - A = SparseMatrixCSC(b.ncon, b.nvar, b.colptr, b.rowval, b.nzval) - - # Enzyme.Duplicated requires primal and shadow to have the same type. - # Copy x into a pre-allocated buffer to ensure type match with b.v. - copyto!(b.xbuf, x) - - groups = column_groups(b.result_coloring) - for (icol, cols) in enumerate(groups) - # Update the seed - b.v .= 0 - for col in cols - b.v[col] = 1 - end - - # b.compressed_jacobian is just a vector Jv here - # We don't use the vector mode - Enzyme.autodiff( - Enzyme.Forward, - Enzyme.Const(c!), - Enzyme.Duplicated(b.cx, b.compressed_jacobian), - Enzyme.Duplicated(b.xbuf, b.v), - ) - - # Update the columns of the Jacobian that have the color `icol` - decompress_single_color!(A, b.compressed_jacobian, icol, b.result_coloring) - end - vals .= b.nzval - return vals - end - - function NLPModels.jac_coord!( - b::SparseEnzymeADJacobian, - nlp::ADModel, - x::AbstractVector, - vals::AbstractVector, - ) - sparse_jac_coord!(nlp.c!, b, x, vals) - return vals - end - - function NLPModels.jac_structure_residual!( - b::SparseEnzymeADJacobian, - nls::AbstractADNLSModel, - rows::AbstractVector{<:Integer}, - cols::AbstractVector{<:Integer}, - ) - rows .= b.rowval - for i = 1:(nls.meta.nvar) - for j = b.colptr[i]:(b.colptr[i + 1] - 1) - cols[j] = i - end - end - return rows, cols - end - - function NLPModels.jac_coord_residual!( - b::SparseEnzymeADJacobian, - nls::AbstractADNLSModel, - x::AbstractVector, - vals::AbstractVector, - ) - sparse_jac_coord!(nls.F!, b, x, vals) - return vals - end - - # Sparse Hessian - function get_nln_nnzh(b::SparseEnzymeADHessian, nvar) - return length(b.rowval) - end - - function NLPModels.hess_structure!( - b::SparseEnzymeADHessian, - nlp::ADModel, - rows::AbstractVector{<:Integer}, - cols::AbstractVector{<:Integer}, - ) - rows .= b.rowval - for i = 1:(nlp.meta.nvar) - for j = b.colptr[i]:(b.colptr[i + 1] - 1) - cols[j] = i - end - end - return rows, cols - end - - function sparse_hess_coord!( - b::SparseEnzymeADHessian, - x::AbstractVector, - obj_weight, - y::AbstractVector, - vals::AbstractVector, - ) - # SparseMatrixColorings.jl requires a SparseMatrixCSC for the decompression - A = SparseMatrixCSC(b.nvar, b.nvar, b.colptr, b.rowval, b.nzval) - - # Enzyme.Duplicated requires primal and shadow to have the same type. - # Copy x into a pre-allocated buffer to ensure type match with b.v. - copyto!(b.xbuf, x) - - groups = column_groups(b.result_coloring) - for (icol, cols) in enumerate(groups) - # Update the seed - b.v .= 0 - for col in cols - b.v[col] = 1 - end - - _hvp!( - Enzyme.DuplicatedNoNeed(b.grad, b.compressed_hessian_icol), - b.ℓ, - b.xbuf, - b.v, - y, - obj_weight, - b.cx, - ) - - if b.coloring_mode == :direct - # Update the coefficients of the lower triangular part of the Hessian that are related to the color `icol` - decompress_single_color!(A, b.compressed_hessian_icol, icol, b.result_coloring, :L) - end - if b.coloring_mode == :substitution - view(b.compressed_hessian, :, icol) .= b.compressed_hessian_icol - end - end - if b.coloring_mode == :substitution - decompress!(A, b.compressed_hessian, b.result_coloring, :L) - end - vals .= b.nzval - return vals - end - - function NLPModels.hess_coord!( - b::SparseEnzymeADHessian, - nlp::ADModel, - x::AbstractVector, - y::AbstractVector, - obj_weight::Real, - vals::AbstractVector, - ) - sparse_hess_coord!(b, x, obj_weight, y, vals) - end - - # Could be optimized! - function NLPModels.hess_coord!( - b::SparseEnzymeADHessian, - nlp::ADModel, - x::AbstractVector, - obj_weight::Real, - vals::AbstractVector, - ) - b.y .= 0 - sparse_hess_coord!(b, x, obj_weight, b.y, vals) - end - - function NLPModels.hess_coord!( - b::SparseEnzymeADHessian, - nlp::ADModel, - x::AbstractVector, - j::Integer, - vals::AbstractVector, - ) - for (w, k) in enumerate(nlp.meta.nln) - b.y[w] = k == j ? 1 : 0 - end - obj_weight = zero(eltype(x)) - sparse_hess_coord!(b, x, obj_weight, b.y, vals) - return vals - end - - function NLPModels.hess_structure_residual!( - b::SparseEnzymeADHessian, - nls::AbstractADNLSModel, - rows::AbstractVector{<:Integer}, - cols::AbstractVector{<:Integer}, - ) - return hess_structure!(b, nls, rows, cols) - end - - function NLPModels.hess_coord_residual!( - b::SparseEnzymeADHessian, - nls::AbstractADNLSModel, - x::AbstractVector, - v::AbstractVector, - vals::AbstractVector, - ) - obj_weight = zero(eltype(x)) - sparse_hess_coord!(b, x, obj_weight, v, vals) - end - end -end diff --git a/src/nlp.jl b/src/nlp.jl index 37315c24..c517880a 100644 --- a/src/nlp.jl +++ b/src/nlp.jl @@ -63,7 +63,6 @@ One can specify a new backend with the keyword arguments `backend::ADNLPModels.A There are three pre-coded backends: - the default `ForwardDiffAD`. - `ReverseDiffAD`. -- `ZygoteDiffAD` accessible after loading `Zygote.jl` in your environment. For an advanced usage, one can define its own backend and redefine the API as done in [ADNLPModels.jl/src/forward.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl/blob/main/src/forward.jl). # Examples @@ -74,9 +73,6 @@ x0 = ones(3) nvar = 3 ADNLPModel(f, x0) # uses the default ForwardDiffAD backend. ADNLPModel(f, x0; backend = ADNLPModels.ReverseDiffAD) # uses ReverseDiffAD backend. - -using Zygote -ADNLPModel(f, x0; backend = ADNLPModels.ZygoteAD) ``` ```julia @@ -87,9 +83,6 @@ c(x) = [1x[1] + x[2]; x[2]] nvar, ncon = 3, 2 ADNLPModel(f, x0, c, zeros(ncon), zeros(ncon)) # uses the default ForwardDiffAD backend. ADNLPModel(f, x0, c, zeros(ncon), zeros(ncon); backend = ADNLPModels.ReverseDiffAD) # uses ReverseDiffAD backend. - -using Zygote -ADNLPModel(f, x0, c, zeros(ncon), zeros(ncon); backend = ADNLPModels.ZygoteAD) ``` For in-place constraints function, use one of the following constructors: diff --git a/src/nls.jl b/src/nls.jl index 8484479a..79ca6aba 100644 --- a/src/nls.jl +++ b/src/nls.jl @@ -71,7 +71,6 @@ One can specify a new backend with the keyword arguments `backend::ADNLPModels.A There are three pre-coded backends: - the default `ForwardDiffAD`. - `ReverseDiffAD`. -- `ZygoteDiffAD` accessible after loading `Zygote.jl` in your environment. For an advanced usage, one can define its own backend and redefine the API as done in [ADNLPModels.jl/src/forward.jl](https://github.com/JuliaSmoothOptimizers/ADNLPModels.jl/blob/main/src/forward.jl). # Examples @@ -83,9 +82,6 @@ x0 = ones(3) nvar = 3 ADNLSModel(F, x0, nequ) # uses the default ForwardDiffAD backend. ADNLSModel(F, x0, nequ; backend = ADNLPModels.ReverseDiffAD) # uses ReverseDiffAD backend. - -using Zygote -ADNLSModel(F, x0, nequ; backend = ADNLPModels.ZygoteAD) ``` ```julia @@ -97,9 +93,6 @@ c(x) = [1x[1] + x[2]; x[2]] nvar, ncon = 3, 2 ADNLSModel(F, x0, nequ, c, zeros(ncon), zeros(ncon)) # uses the default ForwardDiffAD backend. ADNLSModel(F, x0, nequ, c, zeros(ncon), zeros(ncon); backend = ADNLPModels.ReverseDiffAD) # uses ReverseDiffAD backend. - -using Zygote -ADNLSModel(F, x0, nequ, c, zeros(ncon), zeros(ncon); backend = ADNLPModels.ZygoteAD) ``` For in-place constraints and residual function, use one of the following constructors: diff --git a/src/predefined_backend.jl b/src/predefined_backend.jl index 463e8c59..b55f1cac 100644 --- a/src/predefined_backend.jl +++ b/src/predefined_backend.jl @@ -58,27 +58,11 @@ enzyme_backend = Dict( :hessian_residual_backend => SparseEnzymeADHessian, ) -zygote_backend = Dict( - :gradient_backend => ZygoteADGradient, - :jprod_backend => ZygoteADJprod, - :jtprod_backend => ZygoteADJtprod, - :hprod_backend => ForwardDiffADHvprod, - :jacobian_backend => ZygoteADJacobian, - :hessian_backend => ZygoteADHessian, - :ghjvprod_backend => ForwardDiffADGHjvprod, - :jprod_residual_backend => ZygoteADJprod, - :jtprod_residual_backend => ZygoteADJtprod, - :hprod_residual_backend => ForwardDiffADHvprod, - :jacobian_residual_backend => ZygoteADJacobian, - :hessian_residual_backend => ZygoteADHessian, -) - predefined_backend = Dict( :default => default_backend, :optimized => optimized_backend, :generic => generic_backend, :enzyme => enzyme_backend, - :zygote => zygote_backend, ) """ diff --git a/src/zygote.jl b/src/zygote.jl deleted file mode 100644 index 63358a7e..00000000 --- a/src/zygote.jl +++ /dev/null @@ -1,119 +0,0 @@ -struct ZygoteADGradient <: ADBackend end -struct ZygoteADJacobian <: ImmutableADbackend - nnzj::Int -end -struct ZygoteADHessian <: ImmutableADbackend - nnzh::Int -end -struct ZygoteADJprod <: ImmutableADbackend end -struct ZygoteADJtprod <: ImmutableADbackend end - -@init begin - @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin - # See https://fluxml.ai/Zygote.jl/latest/limitations/ - function get_immutable_c(nlp::ADModel) - function c(x; nnln = nlp.meta.nnln) - c = Zygote.Buffer(x, nnln) - nlp.c!(c, x) - return copy(c) - end - return c - end - get_c(nlp::ADModel, ::ImmutableADbackend) = get_immutable_c(nlp) - - function get_immutable_F(nls::AbstractADNLSModel) - function F(x; nequ = nls.nls_meta.nequ) - Fx = Zygote.Buffer(x, nequ) - nls.F!(Fx, x) - return copy(Fx) - end - return F - end - get_F(nls::AbstractADNLSModel, ::ImmutableADbackend) = get_immutable_F(nls) - - function ZygoteADGradient( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - return ZygoteADGradient() - end - function gradient(::ZygoteADGradient, f, x) - g = Zygote.gradient(f, x)[1] - return g === nothing ? zero(x) : g - end - function gradient!(::ZygoteADGradient, g, f, x) - _g = Zygote.gradient(f, x)[1] - g .= _g === nothing ? 0 : _g - end - - function ZygoteADJacobian( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - @assert nvar > 0 - nnzj = nvar * ncon - return ZygoteADJacobian(nnzj) - end - function jacobian(::ZygoteADJacobian, f, x) - return Zygote.jacobian(f, x)[1] - end - - function ZygoteADHessian( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - @assert nvar > 0 - nnzh = nvar * (nvar + 1) / 2 - return ZygoteADHessian(nnzh) - end - function hessian(b::ZygoteADHessian, f, x) - return jacobian( - ForwardDiffADJacobian(length(x), f, x0 = x), - x -> gradient(ZygoteADGradient(), f, x), - x, - ) - end - - function ZygoteADJprod( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - return ZygoteADJprod() - end - function Jprod!(::ZygoteADJprod, Jv, f, x, v, ::Val) - Jv .= vec(Zygote.jacobian(t -> f(x + t * v), 0)[1]) - return Jv - end - - function ZygoteADJtprod( - nvar::Integer, - f, - ncon::Integer = 0, - c::Function = (args...) -> []; - kwargs..., - ) - return ZygoteADJtprod() - end - function Jtprod!(::ZygoteADJtprod, Jtv, f, x, v, ::Val) - g = Zygote.gradient(x -> dot(f(x), v), x)[1] - if g === nothing - Jtv .= zero(x) - else - Jtv .= g - end - return Jtv - end - end -end diff --git a/test/Project.toml b/test/Project.toml index e6ae782e..bedc0e9a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -11,10 +11,10 @@ SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -ForwardDiff = "0.10" -ManualNLPModels = "0.1" -NLPModels = "0.21" +ForwardDiff = "1" +ManualNLPModels = "0.2" +NLPModels = "0.21.5" NLPModelsModifiers = "0.7" NLPModelsTest = "0.10" ReverseDiff = "1" -SparseMatrixColorings = "0.4.0" +SparseMatrixColorings = "0.4.21" diff --git a/test/runtests.jl b/test/runtests.jl index 21f8dfa4..e9e3c7ac 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -103,27 +103,23 @@ include("nls/basic.jl") include("nls/nlpmodelstest.jl") @testset "Basic NLP tests using $backend " for backend in keys(ADNLPModels.predefined_backend) - (backend == :zygote) && continue (backend == :enzyme) && continue test_autodiff_model("$backend", backend = backend) end @testset "Checking NLPModelsTest (NLP) tests with $backend" for backend in keys(ADNLPModels.predefined_backend) - (backend == :zygote) && continue (backend == :enzyme) && continue nlp_nlpmodelstest(backend) end @testset "Basic NLS tests using $backend " for backend in keys(ADNLPModels.predefined_backend) - (backend == :zygote) && continue (backend == :enzyme) && continue autodiff_nls_test("$backend", backend = backend) end @testset "Checking NLPModelsTest (NLS) tests with $backend" for backend in keys(ADNLPModels.predefined_backend) - (backend == :zygote) && continue (backend == :enzyme) && continue nls_nlpmodelstest(backend) end diff --git a/test/zygote.jl b/test/zygote.jl deleted file mode 100644 index 023c217d..00000000 --- a/test/zygote.jl +++ /dev/null @@ -1,80 +0,0 @@ -using LinearAlgebra, SparseArrays, Test -using ADNLPModels, ManualNLPModels, NLPModels, NLPModelsModifiers, NLPModelsTest -using ADNLPModels: - gradient, gradient!, jacobian, hessian, Jprod!, Jtprod!, directional_second_derivative, Hvprod! - -for problem in NLPModelsTest.nlp_problems ∪ ["GENROSE"] - include("nlp/problems/$(lowercase(problem)).jl") -end -for problem in NLPModelsTest.nls_problems - include("nls/problems/$(lowercase(problem)).jl") -end - -ZygoteAD() = ADNLPModels.ADModelBackend( - ADNLPModels.ZygoteADGradient(), - ADNLPModels.GenericForwardDiffADHvprod(), - ADNLPModels.ZygoteADJprod(), - ADNLPModels.ZygoteADJtprod(), - ADNLPModels.ZygoteADJacobian(0), - ADNLPModels.ZygoteADHessian(0), - ADNLPModels.ForwardDiffADGHjvprod(), - ADNLPModels.EmptyADbackend(), - ADNLPModels.EmptyADbackend(), - ADNLPModels.EmptyADbackend(), - ADNLPModels.EmptyADbackend(), - ADNLPModels.EmptyADbackend(), -) - -function test_autodiff_backend_error() - @testset "Error without loading package - $backend" for backend in [:ZygoteAD] - adbackend = eval(backend)() - @test_throws ArgumentError gradient(adbackend.gradient_backend, sum, [1.0]) - @test_throws ArgumentError gradient!(adbackend.gradient_backend, [1.0], sum, [1.0]) - @test_throws ArgumentError jacobian(adbackend.jacobian_backend, identity, [1.0]) - @test_throws ArgumentError hessian(adbackend.hessian_backend, sum, [1.0]) - @test_throws ArgumentError Jprod!( - adbackend.jprod_backend, - [1.0], - [1.0], - identity, - [1.0], - Val(:c), - ) - @test_throws ArgumentError Jtprod!( - adbackend.jtprod_backend, - [1.0], - [1.0], - identity, - [1.0], - Val(:c), - ) - end -end - -# Test the argument error without loading the packages -test_autodiff_backend_error() - -# Automatically loads the code for Zygote with Requires -import Zygote - -include("utils.jl") -include("nlp/basic.jl") -include("nls/basic.jl") -include("nlp/nlpmodelstest.jl") -include("nls/nlpmodelstest.jl") - -@testset "Basic NLP tests using $backend " for backend in (:zygote,) - test_autodiff_model("$backend", backend = backend) -end - -@testset "Checking NLPModelsTest (NLP) tests with $backend" for backend in (:zygote,) - nlp_nlpmodelstest(backend) -end - -@testset "Basic NLS tests using $backend " for backend in (:zygote,) - autodiff_nls_test("$backend", backend = backend) -end - -@testset "Checking NLPModelsTest (NLS) tests with $backend" for backend in (:zygote,) - nls_nlpmodelstest(backend) -end