TuringLang · yebai · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -61,6 +61,7 @@ jobs:
       matrix:
         label:
           - ext/differentiationinterface
+          - ext/forwarddiff
           - ext/mooncake
         version:
           - '1'

diff --git a/HISTORY.md b/HISTORY.md
@@ -1,3 +1,7 @@
+## 0.15.2
+
+Added `AbstractPPLForwardDiffExt`, a direct ForwardDiff path for `AutoForwardDiff` (gradient, Jacobian, Hessian, `context`, chunk size, custom `tag`).
+
 ## 0.15.1
 
 Added Hessian support to the AD interface. Pass `order=2` to `prepare(adtype, problem, x)` to build a Hessian-capable evaluator. The new `value_gradient_and_hessian!!(prepared, x)` then returns `(value, gradient, hessian)` in a single call. Both the DifferentiationInterface and Mooncake extensions implement this.

diff --git a/Project.toml b/Project.toml
@@ -3,7 +3,7 @@ uuid = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 keywords = ["probabilistic programming"]
 license = "MIT"
 desc = "Common interfaces for probabilistic programming"
-version = "0.15.1"
+version = "0.15.2"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -19,14 +19,17 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
+DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [extensions]
 AbstractPPLDifferentiationInterfaceExt = ["DifferentiationInterface"]
 AbstractPPLDistributionsExt = ["Distributions", "LinearAlgebra"]
+AbstractPPLForwardDiffExt = ["ForwardDiff", "DiffResults"]
 AbstractPPLMooncakeExt = ["Mooncake"]
 AbstractPPLTestExt = ["Test"]
 
@@ -36,8 +39,10 @@ AbstractMCMC = "2, 3, 4, 5"
 Accessors = "0.1"
 BangBang = "0.4"
 DensityInterface = "0.4"
+DiffResults = "1"
 DifferentiationInterface = "0.6, 0.7"
 Distributions = "0.25"
+ForwardDiff = "0.10, 1"
 JSON = "0.19 - 0.21, 1"
 LinearAlgebra = "<0.0.1, 1"
 MacroTools = "0.5"

diff --git a/ext/AbstractPPLForwardDiffExt.jl b/ext/AbstractPPLForwardDiffExt.jl
@@ -0,0 +1,225 @@
+module AbstractPPLForwardDiffExt
+
+using AbstractPPL: AbstractPPL
+using AbstractPPL.Evaluators: Evaluators, Prepared, VectorEvaluator, _ad_output_arity
+using ADTypes: AutoForwardDiff
+using ForwardDiff: ForwardDiff
+using DiffResults: DiffResults
+
+# `AutoForwardDiff{CS}` carries the chunk size as a type parameter; `nothing`
+# defers the choice to ForwardDiff.
+_fd_chunk(::AutoForwardDiff{nothing}, x) = ForwardDiff.Chunk(x)
+_fd_chunk(::AutoForwardDiff{CS}, _) where {CS} = ForwardDiff.Chunk{CS}()
+
+# A user-supplied `adtype.tag` (for nested differentiation) is threaded into the
+# `*Config` constructors; `nothing` (the ADTypes default) reproduces
+# ForwardDiff's per-constructor default of `Tag(target, eltype(x))`.
+@inline _fd_tag(adtype::AutoForwardDiff, target, x) =
+    adtype.tag === nothing ? ForwardDiff.Tag(target, eltype(x)) : adtype.tag
+
+# `A::Symbol` ∈ `(:scalar, :vector, :hessian)` encodes both output arity
+# (order=1) and order (order=2 ≡ `:hessian`), so dispatch resolves the hot path
+# and the arity-mismatch failure modes at compile time without a runtime branch.
+# `gradient_result` / `gradient_config` are populated only on `:hessian` caches
+# so `value_and_gradient!!` on an order=2 prep skips the O(n²) Hessian work.
+# `result::Nothing` is the empty-input sentinel: hot paths dispatch on
+# `FDCache{A,Nothing}` to short-circuit before any ForwardDiff call (chunk
+# selection `BoundsError`s on length-zero inputs). The stored `result` aliases
+# the arrays returned by `value_and_*!!`, per the `!!` contract.
+struct FDCache{A,R,C,GR,GC}
+    result::R
+    config::C
+    gradient_result::GR
+    gradient_config::GC
+    function FDCache{A}(
+        result::R, config::C, gradient_result::GR=nothing, gradient_config::GC=nothing
+    ) where {A,R,C,GR,GC}
+        return new{A,R,C,GR,GC}(result, config, gradient_result, gradient_config)
+    end
+end
+
+"""
+    prepare(adtype::AutoForwardDiff, problem, x; check_dims=true, context::Tuple=(), order=1)
+
+Prepare a ForwardDiff gradient, Jacobian, or Hessian evaluator for a vector
+input. `order=1` (default) picks gradient/Jacobian by output arity; `order=2`
+builds Hessian machinery and requires a scalar-valued problem. `context` and
+`check_dims` follow the base `prepare` contract.
+"""
+function AbstractPPL.prepare(
+    adtype::AutoForwardDiff,
+    problem,
+    x::AbstractVector{<:Real};
+    check_dims::Bool=true,
+    context::Tuple=(),
+    order::Int=1,
+)
+    Evaluators._validate_ad_order(order)
+    evaluator = AbstractPPL.prepare(problem, x; check_dims, context)::VectorEvaluator
+    # Probe the output once: the value classifies arity, and the vector branch
+    # reuses it as the Jacobian-result prototype. The base `prepare` contract
+    # promises one prep-time call into `problem`.
+    y_probe = evaluator(x)
+    arity = _ad_output_arity(y_probe)
+    chunk = _fd_chunk(adtype, x)
+    target = Base.Fix2(_fd_call, evaluator)
+    tag = _fd_tag(adtype, target, x)
+
+    if order == 2
+        arity === :scalar || Evaluators._throw_hessian_needs_scalar()
+        length(x) == 0 &&
+            return Prepared(adtype, evaluator, FDCache{:hessian}(nothing, nothing), Val(2))
+        hess_result = DiffResults.MutableDiffResult(
+            zero(eltype(x)), (similar(x), similar(x, length(x), length(x)))
+        )
+        hess_config = ForwardDiff.HessianConfig(target, hess_result, x, chunk, tag)
+        grad_result = DiffResults.MutableDiffResult(zero(eltype(x)), (similar(x),))
+        grad_config = ForwardDiff.GradientConfig(target, x, chunk, tag)
+        cache = FDCache{:hessian}(hess_result, hess_config, grad_result, grad_config)
+        return Prepared(adtype, evaluator, cache, Val(2))
+    end
+
+    if arity === :scalar
+        length(x) == 0 &&
+            return Prepared(adtype, evaluator, FDCache{:scalar}(nothing, nothing))
+        result = DiffResults.MutableDiffResult(zero(eltype(x)), (similar(x),))
+        config = ForwardDiff.GradientConfig(target, x, chunk, tag)
+        return Prepared(adtype, evaluator, FDCache{:scalar}(result, config))
+    else
+        length(x) == 0 &&
+            return Prepared(adtype, evaluator, FDCache{:vector}(nothing, nothing))
+        result = DiffResults.MutableDiffResult(
+            similar(y_probe), (similar(y_probe, length(y_probe), length(x)),)
+        )
+        config = ForwardDiff.JacobianConfig(target, x, chunk, tag)
+        return Prepared(adtype, evaluator, FDCache{:vector}(result, config))
+    end
+end
+
+# Top-level so `typeof(_fd_call)` is stable across `prepare` and the hot paths.
+# ForwardDiff's `*Config` keys its `Tag` on the target type; a closure built
+# inside one method would have a different type from one built inside another,
+# desyncing the per-call `Base.Fix2(_fd_call, evaluator)` target from the
+# config captured at prep time.
+@inline _fd_call(x, e::VectorEvaluator) = e.f(x, e.context...)
+
+# `Val(false)` on every hot-path call below skips `ForwardDiff.checktag`. A
+# user-supplied `adtype.tag` (e.g. DynamicPPL's `DynamicPPLTag` sentinel for
+# nested AD) has a tag-type parameter that does not equal `typeof(target)`, so
+# the default check would error. The tag's role is only to label the outer
+# Dual scope; the config we built at prep time already encodes the right tag.
+
+@inline function AbstractPPL.value_and_gradient!!(
+    p::Prepared{
+        <:AutoForwardDiff,
+        <:VectorEvaluator,
+        <:Union{FDCache{:scalar,Nothing},FDCache{:hessian,Nothing}},
+    },
+    x::AbstractVector{T},
+) where {T<:Real}
+    Evaluators._check_ad_input(p.evaluator, x)
+    return (p.evaluator(x), T[])
+end
+
+@inline function AbstractPPL.value_and_gradient!!(
+    p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:scalar}},
+    x::AbstractVector{<:Real},
+)
+    Evaluators._check_ad_input(p.evaluator, x)
+    ForwardDiff.gradient!(
+        p.cache.result, Base.Fix2(_fd_call, p.evaluator), x, p.cache.config, Val(false)
+    )
+    return (DiffResults.value(p.cache.result), DiffResults.gradient(p.cache.result))
+end
+
+# Order=2 prep also satisfies the order=1 gradient contract via the dedicated
+# gradient cache built at prep time — skips the O(n²) Hessian work.
+@inline function AbstractPPL.value_and_gradient!!(
+    p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:hessian}},
+    x::AbstractVector{<:Real},
+)
+    Evaluators._check_ad_input(p.evaluator, x)
+    ForwardDiff.gradient!(
+        p.cache.gradient_result,
+        Base.Fix2(_fd_call, p.evaluator),
+        x,
+        p.cache.gradient_config,
+        Val(false),
+    )
+    return (
+        DiffResults.value(p.cache.gradient_result),
+        DiffResults.gradient(p.cache.gradient_result),
+    )
+end
+
+# Arity-mismatch rejections live on dedicated cache tags so dispatch resolves
+# the failure mode at compile time.
+@inline function AbstractPPL.value_and_gradient!!(
+    ::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:vector}},
+    ::AbstractVector{<:Real},
+)
+    return Evaluators._throw_gradient_needs_scalar()
+end
+
+@inline function AbstractPPL.value_and_jacobian!!(
+    ::Prepared{
+        <:AutoForwardDiff,<:VectorEvaluator,<:Union{FDCache{:scalar},FDCache{:hessian}}
+    },
+    ::AbstractVector{<:Real},
+)
+    return Evaluators._throw_jacobian_needs_vector()
+end
+
+@inline function AbstractPPL.value_and_jacobian!!(
+    p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:vector,Nothing}},
+    x::AbstractVector{<:Real},
+)
+    Evaluators._check_ad_input(p.evaluator, x)
+    val = p.evaluator(x)
+    return (val, similar(x, length(val), 0))
+end
+
+@inline function AbstractPPL.value_and_jacobian!!(
+    p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:vector}},
+    x::AbstractVector{<:Real},
+)
+    Evaluators._check_ad_input(p.evaluator, x)
+    ForwardDiff.jacobian!(
+        p.cache.result, Base.Fix2(_fd_call, p.evaluator), x, p.cache.config, Val(false)
+    )
+    return (DiffResults.value(p.cache.result), DiffResults.jacobian(p.cache.result))
+end
+
+@inline function AbstractPPL.value_gradient_and_hessian!!(
+    ::Prepared{
+        <:AutoForwardDiff,<:VectorEvaluator,<:Union{FDCache{:scalar},FDCache{:vector}}
+    },
+    ::AbstractVector{<:Real},
+)
+    return Evaluators._throw_hessian_needs_order_2_prep()
+end
+
+@inline function AbstractPPL.value_gradient_and_hessian!!(
+    p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:hessian,Nothing}},
+    x::AbstractVector{T},
+) where {T<:Real}
+    Evaluators._check_ad_input(p.evaluator, x)
+    return (p.evaluator(x), T[], similar(x, 0, 0))
+end
+
+@inline function AbstractPPL.value_gradient_and_hessian!!(
+    p::Prepared{<:AutoForwardDiff,<:VectorEvaluator,<:FDCache{:hessian}},
+    x::AbstractVector{<:Real},
+)
+    Evaluators._check_ad_input(p.evaluator, x)
+    ForwardDiff.hessian!(
+        p.cache.result, Base.Fix2(_fd_call, p.evaluator), x, p.cache.config, Val(false)
+    )
+    return (
+        DiffResults.value(p.cache.result),
+        DiffResults.gradient(p.cache.result),
+        DiffResults.hessian(p.cache.result),
+    )
+end
+
+end # module