Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f86642e
Initial plan
Copilot May 25, 2026
a20e171
Add ForwardDiff extension using ForwardDiff's public API
Copilot May 25, 2026
0d98973
Fix CI: format ext files and route DI cache structural test through A…
yebai May 25, 2026
3f9ad39
Apply scrutinise findings to ForwardDiff extension
yebai May 25, 2026
cb6e14c
Wire ext/forwarddiff into CI
yebai May 25, 2026
dc313f7
Unify FD gradient/Jacobian/Hessian caches into FDCache{A}
yebai May 25, 2026
ea03f72
Honor AutoForwardDiff tag and probe the problem once in prepare
yebai May 25, 2026
45b1d5e
Tidy two minor redundancies in the FD extension
yebai May 25, 2026
29050c0
Bump to 0.15.2 with HISTORY entry for the FD extension
yebai May 25, 2026
e5d253b
Inline _fd_target and clarify why _fd_call must stay top-level
yebai May 25, 2026
3be777f
Share :allocations and :type_stability groups via AbstractPPLTestExt
yebai May 25, 2026
c246d4a
Apply scrutinise findings to the shared test groups
yebai May 25, 2026
97788ad
Fix CI: format the ext, gate :allocations on Julia 1.10, add :context…
yebai May 25, 2026
0bfe1e8
Skip ForwardDiff.checktag so custom Tag sentinels work in hot paths
yebai May 25, 2026
268e545
Honor caller atol in the :context group's gradient assertion
yebai May 25, 2026
c35ac4a
Unify the conformance harness into TestCase + run_testcase
yebai May 25, 2026
4246f6a
Move TestCase to the Test extension; one Val-dispatched generator
yebai May 25, 2026
650d443
Merge :vector and :context runners
yebai May 25, 2026
80d8ade
Document the remaining allocations_safe=false reasons
yebai May 25, 2026
1ea0a78
Drop Julia 1.10 broken-marker on scalar-gradient allocations
yebai May 25, 2026
d84606a
Skip Mooncake :alloc checks on Julia 1.10 — they're resolver-flaky
yebai May 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
matrix:
label:
- ext/differentiationinterface
- ext/forwarddiff
- ext/mooncake
version:
- '1'
Expand Down
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.15.2

Added `AbstractPPLForwardDiffExt`, a direct ForwardDiff path for `AutoForwardDiff` (gradient, Jacobian, Hessian, `context`, chunk size, custom `tag`).

## 0.15.1

Added Hessian support to the AD interface. Pass `order=2` to `prepare(adtype, problem, x)` to build a Hessian-capable evaluator. The new `value_gradient_and_hessian!!(prepared, x)` then returns `(value, gradient, hessian)` in a single call. Both the DifferentiationInterface and Mooncake extensions implement this.
Expand Down
7 changes: 6 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ uuid = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
keywords = ["probabilistic programming"]
license = "MIT"
desc = "Common interfaces for probabilistic programming"
version = "0.15.1"
version = "0.15.2"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand All @@ -19,14 +19,17 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[weakdeps]
DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[extensions]
AbstractPPLDifferentiationInterfaceExt = ["DifferentiationInterface"]
AbstractPPLDistributionsExt = ["Distributions", "LinearAlgebra"]
AbstractPPLForwardDiffExt = ["ForwardDiff", "DiffResults"]
AbstractPPLMooncakeExt = ["Mooncake"]
AbstractPPLTestExt = ["Test"]

Expand All @@ -36,8 +39,10 @@ AbstractMCMC = "2, 3, 4, 5"
Accessors = "0.1"
BangBang = "0.4"
DensityInterface = "0.4"
DiffResults = "1"
DifferentiationInterface = "0.6, 0.7"
Distributions = "0.25"
ForwardDiff = "0.10, 1"
JSON = "0.19 - 0.21, 1"
LinearAlgebra = "<0.0.1, 1"
MacroTools = "0.5"
Expand Down
225 changes: 225 additions & 0 deletions ext/AbstractPPLForwardDiffExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
module AbstractPPLForwardDiffExt

using AbstractPPL: AbstractPPL
using AbstractPPL.Evaluators: Evaluators, Prepared, VectorEvaluator, _ad_output_arity
using ADTypes: AutoForwardDiff
using ForwardDiff: ForwardDiff
using DiffResults: DiffResults

# `AutoForwardDiff{CS}` carries the chunk size as a type parameter; `nothing`
# defers the choice to ForwardDiff.
_fd_chunk(::AutoForwardDiff{nothing}, x) = ForwardDiff.Chunk(x)
_fd_chunk(::AutoForwardDiff{CS}, _) where {CS} = ForwardDiff.Chunk{CS}()

# A user-supplied `adtype.tag` (for nested differentiation) is threaded into the
# `*Config` constructors; `nothing` (the ADTypes default) reproduces
# ForwardDiff's per-constructor default of `Tag(target, eltype(x))`.
@inline _fd_tag(adtype::AutoForwardDiff, target, x) =
adtype.tag === nothing ? ForwardDiff.Tag(target, eltype(x)) : adtype.tag

# `A::Symbol` ∈ `(:scalar, :vector, :hessian)` encodes both output arity
# (order=1) and order (order=2 ≡ `:hessian`), so dispatch resolves the hot path
# and the arity-mismatch failure modes at compile time without a runtime branch.
# `gradient_result` / `gradient_config` are populated only on `:hessian` caches
# so `value_and_gradient!!` on an order=2 prep skips the O(n²) Hessian work.
# `result::Nothing` is the empty-input sentinel: hot paths dispatch on
# `FDCache{A,Nothing}` to short-circuit before any ForwardDiff call (chunk
# selection `BoundsError`s on length-zero inputs). The stored `result` aliases
# the arrays returned by `value_and_*!!`, per the `!!` contract.
struct FDCache{A,R,C,GR,GC}
result::R
config::C
gradient_result::GR
gradient_config::GC
function FDCache{A}(
result::R, config::C, gradient_result::GR=nothing, gradient_config::GC=nothing
) where {A,R,C,GR,GC}
return new{A,R,C,GR,GC}(result, config, gradient_result, gradient_config)
end
end

"""
prepare(adtype::AutoForwardDiff, problem, x; check_dims=true, context::Tuple=(), order=1)

Prepare a ForwardDiff gradient, Jacobian, or Hessian evaluator for a vector
input. `order=1` (default) picks gradient/Jacobian by output arity; `order=2`
builds Hessian machinery and requires a scalar-valued problem. `context` and
`check_dims` follow the base `prepare` contract.
"""
function AbstractPPL.prepare(
adtype::AutoForwardDiff,
problem,
x::AbstractVector{<:Real};
check_dims::Bool=true,
context::Tuple=(),
order::Int=1,
)
Evaluators._validate_ad_order(order)
evaluator = AbstractPPL.prepare(problem, x; check_dims, context)::VectorEvaluator
# Probe the output once: the value classifies arity, and the vector branch
# reuses it as the Jacobian-result prototype. The base `prepare` contract
# promises one prep-time call into `problem`.
y_probe = evaluator(x)
arity = _ad_output_arity(y_probe)
chunk = _fd_chunk(adtype, x)
target = Base.Fix2(_fd_call, evaluator)
tag = _fd_tag(adtype, target, x)

if order == 2
arity === :scalar || Evaluators._throw_hessian_needs_scalar()
length(x) == 0 &&
return Prepared(adtype, evaluator, FDCache{:hessian}(nothing, nothing), Val(2))
hess_result = DiffResults.MutableDiffResult(
zero(eltype(x)), (similar(x), similar(x, length(x), length(x)))
)
hess_config = ForwardDiff.HessianConfig(target, hess_result, x, chunk, tag)
grad_result = DiffResults.MutableDiffResult(zero(eltype(x)), (similar(x),))
grad_config = ForwardDiff.GradientConfig(target, x, chunk, tag)
cache = FDCache{:hessian}(hess_result, hess_config, grad_result, grad_config)
return Prepared(adtype, evaluator, cache, Val(2))
end

if arity === :scalar
length(x) == 0 &&
return Prepared(adtype, evaluator, FDCache{:scalar}(nothing, nothing))
result = DiffResults.MutableDiffResult(zero(eltype(x)), (similar(x),))
config = ForwardDiff.GradientConfig(target, x, chunk, tag)
return Prepared(adtype, evaluator, FDCache{:scalar}(result, config))
else
length(x) == 0 &&
return Prepared(adtype, evaluator, FDCache{:vector}(nothing, nothing))
result = DiffResults.MutableDiffResult(
similar(y_probe), (similar(y_probe, length(y_probe), length(x)),)
)
config = ForwardDiff.JacobianConfig(target, x, chunk, tag)
return Prepared(adtype, evaluator, FDCache{:vector}(result, config))
end
end

# Top-level so `typeof(_fd_call)` is stable across `prepare` and the hot paths.
# ForwardDiff's `*Config` keys its `Tag` on the target type; a closure built
# inside one method would have a different type from one built inside another,
# desyncing the per-call `Base.Fix2(_fd_call, evaluator)` target from the
# config captured at prep time.
@inline _fd_call(x, e::VectorEvaluator) = e.f(x, e.context...)

# `Val(false)` on every hot-path call below skips `ForwardDiff.checktag`. A
# user-supplied `adtype.tag` (e.g. DynamicPPL's `DynamicPPLTag` sentinel for
# nested AD) has a tag-type parameter that does not equal `typeof(target)`, so
# the default check would error. The tag's role is only to label the outer
# Dual scope; the config we built at prep time already encodes the right tag.

@inline function AbstractPPL.value_and_gradient!!(
p::Prepared{
<:AutoForwardDiff,
<:VectorEvaluator,
<:Union{FDCache{:scalar,Nothing},FDCache{:hessian,Nothing}},
},
x::AbstractVector{T},
) where {T<:Real}
Evaluators._check_ad_input(p.evaluator, x)
return (p.evaluator(x), T[])
end

@inline function AbstractPPL.value_and_gradient!!(
p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:scalar}},
x::AbstractVector{<:Real},
)
Evaluators._check_ad_input(p.evaluator, x)
ForwardDiff.gradient!(
p.cache.result, Base.Fix2(_fd_call, p.evaluator), x, p.cache.config, Val(false)
)
return (DiffResults.value(p.cache.result), DiffResults.gradient(p.cache.result))
end

# Order=2 prep also satisfies the order=1 gradient contract via the dedicated
# gradient cache built at prep time — skips the O(n²) Hessian work.
@inline function AbstractPPL.value_and_gradient!!(
p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:hessian}},
x::AbstractVector{<:Real},
)
Evaluators._check_ad_input(p.evaluator, x)
ForwardDiff.gradient!(
p.cache.gradient_result,
Base.Fix2(_fd_call, p.evaluator),
x,
p.cache.gradient_config,
Val(false),
)
return (
DiffResults.value(p.cache.gradient_result),
DiffResults.gradient(p.cache.gradient_result),
)
end

# Arity-mismatch rejections live on dedicated cache tags so dispatch resolves
# the failure mode at compile time.
@inline function AbstractPPL.value_and_gradient!!(
::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:vector}},
::AbstractVector{<:Real},
)
return Evaluators._throw_gradient_needs_scalar()
end

@inline function AbstractPPL.value_and_jacobian!!(
::Prepared{
<:AutoForwardDiff,<:VectorEvaluator,<:Union{FDCache{:scalar},FDCache{:hessian}}
},
::AbstractVector{<:Real},
)
return Evaluators._throw_jacobian_needs_vector()
end

@inline function AbstractPPL.value_and_jacobian!!(
p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:vector,Nothing}},
x::AbstractVector{<:Real},
)
Evaluators._check_ad_input(p.evaluator, x)
val = p.evaluator(x)
return (val, similar(x, length(val), 0))
end

@inline function AbstractPPL.value_and_jacobian!!(
p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:vector}},
x::AbstractVector{<:Real},
)
Evaluators._check_ad_input(p.evaluator, x)
ForwardDiff.jacobian!(
p.cache.result, Base.Fix2(_fd_call, p.evaluator), x, p.cache.config, Val(false)
)
return (DiffResults.value(p.cache.result), DiffResults.jacobian(p.cache.result))
end

@inline function AbstractPPL.value_gradient_and_hessian!!(
::Prepared{
<:AutoForwardDiff,<:VectorEvaluator,<:Union{FDCache{:scalar},FDCache{:vector}}
},
::AbstractVector{<:Real},
)
return Evaluators._throw_hessian_needs_order_2_prep()
end

@inline function AbstractPPL.value_gradient_and_hessian!!(
p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:hessian,Nothing}},
x::AbstractVector{T},
) where {T<:Real}
Evaluators._check_ad_input(p.evaluator, x)
return (p.evaluator(x), T[], similar(x, 0, 0))
end

@inline function AbstractPPL.value_gradient_and_hessian!!(
p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:hessian}},
x::AbstractVector{<:Real},
)
Evaluators._check_ad_input(p.evaluator, x)
ForwardDiff.hessian!(
p.cache.result, Base.Fix2(_fd_call, p.evaluator), x, p.cache.config, Val(false)
)
return (
DiffResults.value(p.cache.result),
DiffResults.gradient(p.cache.result),
DiffResults.hessian(p.cache.result),
)
end

end # module
Loading
Loading