From 064113e0b493ebc05b5ce80817061fa189910ca1 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 20 Apr 2026 15:43:45 +0100
Subject: [PATCH 01/41] Remove DifferentiationInterface dependency; use
 AbstractPPL AD interface

- remove DifferentiationInterface from Project.toml deps and compat
- replace DI gradient preparation/evaluation with AbstractPPL.prepare and AbstractPPL.value_and_gradient
- reuse LogDensityAt and closures without local AbstractPPL.prepare piracy
- guard the Mooncake precompile workload until AbstractPPLMooncakeExt is loaded
- pin AbstractPPL to the evaluator-interface branch in [sources] for this environment

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 Project.toml                 |  7 +++---
 ext/DynamicPPLMooncakeExt.jl | 19 ++++++++++------
 src/logdensityfunction.jl    | 42 +++++++++---------------------------
 src/test_utils/ad.jl         |  1 -
 4 files changed, 26 insertions(+), 43 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9c3268bc8..40ece9ad2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,7 +12,6 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
-DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
@@ -28,6 +27,9 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
+[sources]
+AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "evaluator-interface"}
+
 [weakdeps]
 EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -42,7 +44,7 @@ DynamicPPLEnzymeCoreExt = ["EnzymeCore"]
 DynamicPPLForwardDiffExt = ["ForwardDiff"]
 DynamicPPLMCMCChainsExt = ["MCMCChains"]
 DynamicPPLMarginalLogDensitiesExt = ["MarginalLogDensities"]
-DynamicPPLMooncakeExt = ["Mooncake", "DifferentiationInterface"]
+DynamicPPLMooncakeExt = ["Mooncake"]
 DynamicPPLReverseDiffExt = ["ReverseDiff"]
 
 [compat]
@@ -55,7 +57,6 @@ Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
 Compat = "4"
 ConstructionBase = "1.5.4"
-DifferentiationInterface = "0.6.41, 0.7"
 Distributions = "0.25"
 DocStringExtensions = "0.9"
 EnzymeCore = "0.6 - 0.8"
diff --git a/ext/DynamicPPLMooncakeExt.jl b/ext/DynamicPPLMooncakeExt.jl
index 9760d9f4b..e1368952b 100644
--- a/ext/DynamicPPLMooncakeExt.jl
+++ b/ext/DynamicPPLMooncakeExt.jl
@@ -1,6 +1,7 @@
 module DynamicPPLMooncakeExt
 
 using DynamicPPL: DynamicPPL, is_transformed
+using AbstractPPL: AbstractPPL
 using Mooncake: Mooncake
 
 # These are purely optimisations (although quite significant ones sometimes, especially for
@@ -15,17 +16,21 @@ Mooncake.@zero_derivative Mooncake.DefaultCtx Tuple{
 
 using DynamicPPL: @model, LinkAll, getlogjoint_internal, LogDensityFunction
 using ADTypes: AutoMooncake
-import DifferentiationInterface
 using Distributions: Normal, InverseGamma, Beta
 using PrecompileTools: @setup_workload, @compile_workload
 @setup_workload begin
     @compile_workload begin
-        for dist in (Normal(), InverseGamma(2, 3), Beta(2, 2))
-            @model f() = x ~ dist
-            ldf = LogDensityFunction(
-                f(), getlogjoint_internal, LinkAll(); adtype=AutoMooncake()
-            )
-            DynamicPPL.LogDensityProblems.logdensity_and_gradient(ldf, [0.5])
+        # Julia does not guarantee transitive extensions are loaded while this
+        # extension precompiles, so skip the workload unless Mooncake's
+        # AbstractPPL methods are already available.
+        if !isnothing(Base.get_extension(AbstractPPL, :AbstractPPLMooncakeExt))
+            for dist in (Normal(), InverseGamma(2, 3), Beta(2, 2))
+                @model f() = x ~ dist
+                ldf = LogDensityFunction(
+                    f(), getlogjoint_internal, LinkAll(); adtype=AutoMooncake()
+                )
+                DynamicPPL.LogDensityProblems.logdensity_and_gradient(ldf, [0.5])
+            end
         end
     end
 end
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index b1ae82f2b..d1ce87bf4 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -23,7 +23,6 @@ using ADTypes: ADTypes
 using BangBang: BangBang
 using AbstractPPL: AbstractPPL, VarName
 using LogDensityProblems: LogDensityProblems
-import DifferentiationInterface as DI
 using Random: Random
 
 """
@@ -178,7 +177,7 @@ struct LogDensityFunction{
     L<:AbstractTransformStrategy,
     F,
     VNT<:VarNamedTuple,
-    ADP<:Union{Nothing,DI.GradientPrep},
+    ADP,
     # type of the vector passed to logdensity functions
     X<:AbstractVector,
     AC<:AccumulatorTuple,
@@ -246,12 +245,15 @@ struct LogDensityFunction{
         else
             # Make backend-specific tweaks to the adtype
             adtype = DynamicPPL.tweak_adtype(adtype, model, x)
-            args = (model, getlogdensity, all_ranges, transform_strategy, accs)
-            if _use_closure(adtype)
-                DI.prepare_gradient(LogDensityAt(args...), adtype, x)
+            lda = LogDensityAt(model, getlogdensity, all_ranges, transform_strategy, accs)
+            problem = if _use_closure(adtype)
+                lda
             else
-                DI.prepare_gradient(logdensity_at, adtype, x, map(DI.Constant, args)...)
+                let lda = lda
+                    params -> lda(params)
+                end
             end
+            AbstractPPL.prepare(adtype, problem, x)
         end
         return new{
             typeof(model),
@@ -426,32 +428,8 @@ function LogDensityProblems.logdensity_and_gradient(
     # `params` has to be converted to the same vector type that was used for AD preparation,
     # otherwise the preparation will not be valid.
     params = convert(get_input_vector_type(ldf), params)
-    return if _use_closure(ldf.adtype)
-        DI.value_and_gradient(
-            LogDensityAt(
-                ldf.model,
-                ldf._getlogdensity,
-                ldf._varname_ranges,
-                ldf.transform_strategy,
-                ldf._accs,
-            ),
-            ldf._adprep,
-            ldf.adtype,
-            params,
-        )
-    else
-        DI.value_and_gradient(
-            logdensity_at,
-            ldf._adprep,
-            ldf.adtype,
-            params,
-            DI.Constant(ldf.model),
-            DI.Constant(ldf._getlogdensity),
-            DI.Constant(ldf._varname_ranges),
-            DI.Constant(ldf.transform_strategy),
-            DI.Constant(ldf._accs),
-        )
-    end
+    # Choice between LogDensityAt and closure was fixed at prepare time.
+    return AbstractPPL.value_and_gradient(ldf._adprep, params)
 end
 
 function LogDensityProblems.capabilities(::Type{<:LogDensityFunction{M,Nothing}}) where {M}
diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
index 8c9f96491..820a5093c 100644
--- a/src/test_utils/ad.jl
+++ b/src/test_utils/ad.jl
@@ -2,7 +2,6 @@ module AD
 
 using ADTypes: AbstractADType, AutoForwardDiff
 using Chairmarks: @be
-import DifferentiationInterface as DI
 using DocStringExtensions
 using DynamicPPL:
     DynamicPPL,

From d59e645f4942f358fd0da84091ff4a5c74865b5a Mon Sep 17 00:00:00 2001
From: Hong Ge <3279477+yebai@users.noreply.github.com>
Date: Thu, 23 Apr 2026 19:28:01 +0100
Subject: [PATCH 02/41] Use structural AbstractPPL AD prep for
 LogDensityFunction (#1365)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix https://github.com/TuringLang/DynamicPPL.jl/issues/1364

TODO
- [x] use `main` as base, as otherwise benchmarking would fail, need to
change back to #1364 before merging.
- ~~Julia 1.10’s Pkg does not reliably apply the root [sources] override
during build/test, so min CI fails.~~

This change switches `LogDensityFunction` AD preparation to pass a
structural `LogDensityAt` problem directly into
`AbstractPPL.prepare(...)` instead of wrapping it in backend-specific
anonymous closures. It also removes the old `_use_closure` machinery,
since the structural problem object now provides the stable one-argument
evaluator shape that AD backends need.

The accompanying test coverage adds a focused `ReverseDiff` check to
confirm that `AutoReverseDiff(; compile=true)` still retains a compiled
tape and can be reused across repeated `logdensity_and_gradient` calls.

Benchmarks


```
  +----------------------+------------+--------------+--------+--------------+
  | Backend              | PR grad μs | main grad μs |  Ratio | Summary      |
  +----------------------+------------+--------------+--------+--------------+
  | ForwardDiff          |      0.322 |        0.322 |  1.001x | 0.1% slower  |
  | Forward Mooncake     |      0.900 |        2.601 |  0.346x | 2.89x faster |
  | Forward Enzyme       |      0.789 |        1.051 |  0.751x | 24.9% faster |
  | ReverseDiff          |     14.239 |       15.225 |  0.935x | 6.5% faster  |
  | ReverseDiff compiled |      5.355 |        5.872 |  0.912x | 8.8% faster  |
  | Reverse Mooncake     |      0.892 |        0.928 |  0.961x | 3.9% faster  |
  | Reverse Enzyme       |      1.253 |        1.268 |  0.988x | 1.2% faster  |
  +----------------------+------------+--------------+--------+--------------+
```

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 benchmarks/Project.toml                  |  2 +-
 docs/Project.toml                        |  3 ++
 ext/DynamicPPLMarginalLogDensitiesExt.jl |  4 +-
 src/logdensityfunction.jl                | 53 +++---------------------
 src/test_utils/ad.jl                     |  4 --
 test/Project.toml                        |  5 +++
 test/floattypes/Project.toml             |  2 +-
 test/integration/enzyme/Project.toml     |  4 +-
 test/logdensityfunction.jl               | 30 ++++++++++++++
 test/runtests.jl                         |  1 +
 10 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 6adb27efa..9a2b2afcc 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -18,7 +18,7 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [sources]
-DynamicPPL = {path = "../"}
+DynamicPPL = {path = ".."}
 
 [compat]
 ADTypes = "1.14.0"
diff --git a/docs/Project.toml b/docs/Project.toml
index 288ae162a..38261c0a4 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -22,6 +22,9 @@ OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
+[sources]
+AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "evaluator-interface"}
+
 [compat]
 ADTypes = "1"
 AbstractMCMC = "5"
diff --git a/ext/DynamicPPLMarginalLogDensitiesExt.jl b/ext/DynamicPPLMarginalLogDensitiesExt.jl
index 905ee168b..ae5f28444 100644
--- a/ext/DynamicPPLMarginalLogDensitiesExt.jl
+++ b/ext/DynamicPPLMarginalLogDensitiesExt.jl
@@ -144,7 +144,7 @@ accs = DynamicPPL.OnlyAccsVarInfo((
     DynamicPPL.RawValueAccumulator(false),
     # ... whatever else you need
 ))
-_, accs = DynamicPPL.init!!(rng, model, oavi, init_strategy, DynamicPPL.UnlinkAll())
+_, accs = DynamicPPL.init!!(rng, model, accs, init_strategy, DynamicPPL.UnlinkAll())
 ```
 
 You can then extract all the updated data from `accs` using DynamicPPL's existing API (see
@@ -178,7 +178,7 @@ retcode: Success
 u: 1-element Vector{Float64}:
  4.88281250001733e-5
 
-julia> # Get the an initialisation strategy representing the mode of `y`.
+julia> # Get an initialisation strategy representing the mode of `y`.
        init_strategy = InitFromVector(mld, opt_solution.u);
 
 julia> # Evaluate the model with this initialisation strategy.
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index 135fcab73..524f1e676 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -32,7 +32,7 @@ using Random: Random
         vi_vnt_or_tfm_strategy=_default_vnt(model, UnlinkAll()),
         accs::Union{NTuple{<:Any,AbstractAccumulator},AccumulatorTuple}=DynamicPPL.ldf_accs(getlogdensity);
         adtype::Union{ADTypes.AbstractADType,Nothing}=nothing,
-        fix_transform::Bool=false,
+        fix_transforms::Bool=false,
     )
 
 A struct which contains a model, along with all the information necessary to:
@@ -260,17 +260,12 @@ struct LogDensityFunction{
         else
             # Make backend-specific tweaks to the adtype
             adtype = DynamicPPL.tweak_adtype(adtype, model, x)
-            lda = LogDensityAt(
+            problem = LogDensityAt(
                 model, getlogdensity, ranges_and_transforms, transform_strategy, accs
             )
-            problem = if _use_closure(adtype)
-                lda
-            else
-                let lda = lda
-                    params -> lda(params)
-                end
-            end
-            AbstractPPL.prepare(adtype, problem, x)
+            # `x` was just constructed from the same range metadata stored in `problem`,
+            # so the AD wrapper can skip its hot-path dimension validation.
+            AbstractPPL.prepare(adtype, problem, x; check_dims=false)
         end
         return new{
             typeof(model),
@@ -473,7 +468,6 @@ function LogDensityProblems.logdensity_and_gradient(
     # `params` has to be converted to the same vector type that was used for AD preparation,
     # otherwise the preparation will not be valid.
     params = convert(get_input_vector_type(ldf), params)
-    # Choice between LogDensityAt and closure was fixed at prepare time.
     return AbstractPPL.value_and_gradient(ldf._adprep, params)
 end
 
@@ -505,43 +499,6 @@ By default, this just returns the input unchanged.
 """
 tweak_adtype(adtype::ADTypes.AbstractADType, ::Model, ::AbstractVector) = adtype
 
-"""
-    _use_closure(adtype::ADTypes.AbstractADType)
-
-In LogDensityProblems, we want to calculate the derivative of `logdensity(f, x)` with
-respect to x, where f is the model (in our case LogDensityFunction or its arguments ) and is
-a constant. However, DifferentiationInterface generally expects a single-argument function
-g(x) to differentiate.
-
-There are two ways of dealing with this:
-
-1. Construct a closure over the model, i.e. let g = Base.Fix1(logdensity, f)
-
-2. Use a constant DI.Context. This lets us pass a two-argument function to DI, as long as we
-   also give it the 'inactive argument' (i.e. the model) wrapped in `DI.Constant`.
-
-The relative performance of the two approaches, however, depends on the AD backend used.
-Some benchmarks are provided here: https://github.com/TuringLang/DynamicPPL.jl/pull/1172
-
-This function is used to determine whether a given AD backend should use a closure or a
-constant. If `use_closure(adtype)` returns `true`, then the closure approach will be used.
-By default, this function returns `false`, i.e. the constant approach will be used.
-"""
-# For these AD backends both closure and no closure work, but it is just faster to not use a
-# closure (see link in the docstring).
-_use_closure(::ADTypes.AutoForwardDiff) = false
-_use_closure(::ADTypes.AutoMooncake) = false
-_use_closure(::ADTypes.AutoMooncakeForward) = false
-# For ReverseDiff, with the compiled tape, you _must_ use a closure because otherwise with
-# DI.Constant arguments the tape will always be recompiled upon each call to
-# value_and_gradient. For non-compiled ReverseDiff, it is faster to not use a closure.
-_use_closure(::ADTypes.AutoReverseDiff{compile}) where {compile} = compile
-# For AutoEnzyme it allows us to avoid setting function_annotation
-_use_closure(::ADTypes.AutoEnzyme) = false
-# Since for most backends it's faster to not use a closure, we set that as the default
-# for unknown AD backends
-_use_closure(::ADTypes.AbstractADType) = false
-
 ######################################################
 # Helper functions to extract ranges and link status #
 ######################################################
diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
index 4c145943d..42ac9203d 100644
--- a/src/test_utils/ad.jl
+++ b/src/test_utils/ad.jl
@@ -339,8 +339,6 @@ function run_ad(
 
     # Calculate log-density and gradient with the backend of interest
     value, grad = logdensity_and_gradient(ldf, params)
-    # collect(): https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754
-    grad = collect(grad)
     verbose && println("       actual : $((value, grad))")
 
     # Test correctness
@@ -357,8 +355,6 @@ function run_ad(
                 model, getlogdensity, transform_strategy; adtype=test.adtype
             )
             value_true, grad_true = logdensity_and_gradient(ldf_reference, params)
-            # collect(): https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754
-            grad_true = collect(grad_true)
         end
         # Perform testing
         verbose && println("     expected : $((value_true, grad_true))")
diff --git a/test/Project.toml b/test/Project.toml
index 73cff23ed..2c635ed19 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -14,6 +14,7 @@ DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -30,6 +31,10 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
+[sources]
+AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+DynamicPPL = {path = ".."}
+
 [compat]
 ADTypes = "1"
 AbstractMCMC = "5.10"
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index 02a770fe7..e47e1ebf4 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -7,4 +7,4 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-DynamicPPL = {path = "../../"}
+DynamicPPL = {path = "../.."}
diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml
index c26655fae..c673319b1 100644
--- a/test/integration/enzyme/Project.toml
+++ b/test/integration/enzyme/Project.toml
@@ -1,9 +1,11 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-DynamicPPL = {path = "../../../"}
+AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+DynamicPPL = {path = "../../.."}
diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
index 0b956dfc1..5ccdb48a1 100644
--- a/test/logdensityfunction.jl
+++ b/test/logdensityfunction.jl
@@ -485,6 +485,36 @@ end
         end
     end
 
+    # Compiled ReverseDiff prep should be observable as lower repeated-call allocations.
+    @testset "ReverseDiff compiled prep reduces repeated-call allocations" begin
+        @model f() = x ~ Normal()
+        ldf_compiled = LogDensityFunction(
+            f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=true)
+        )
+        ldf_uncompiled = LogDensityFunction(
+            f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=false)
+        )
+        params = rand(ldf_compiled)
+
+        LogDensityProblems.logdensity_and_gradient(ldf_compiled, params)
+        LogDensityProblems.logdensity_and_gradient(ldf_uncompiled, params)
+
+        function repeated_call_allocs(ldf, params)
+            GC.gc()
+            before = Base.gc_num()
+            for _ in 1:100
+                LogDensityProblems.logdensity_and_gradient(ldf, params)
+            end
+            after = Base.gc_num()
+            return Base.GC_Diff(after, before).allocd
+        end
+
+        allocs_compiled = repeated_call_allocs(ldf_compiled, params)
+        allocs_uncompiled = repeated_call_allocs(ldf_uncompiled, params)
+
+        @test allocs_compiled < allocs_uncompiled
+    end
+
     # Test that various different ways of specifying array types as arguments work with all
     # ADTypes.
     @testset "Array argument types" begin
diff --git a/test/runtests.jl b/test/runtests.jl
index 1ba744c3f..edde98144 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,5 @@
 using Documenter: Documenter
+using DifferentiationInterface
 using DynamicPPL: DynamicPPL
 using Random: Random
 using Test: @testset, @test_throws

From a11e9cc1b7b5ac59a9759c0dfb753f78bda3f2fd Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Sun, 26 Apr 2026 23:00:37 +0100
Subject: [PATCH 03/41] Split AD integration tests

---
 .github/workflows/CI.yml                      | 34 +++++++++++
 src/transformed_values.jl                     |  8 ++-
 test/Project.toml                             |  7 +--
 .../Project.toml                              | 21 +++++++
 test/integration/reversediff/Project.toml     | 19 +++++++
 test/integration/reversediff/main.jl          | 50 ++++++++++++++++
 test/logdensityfunction.jl                    | 57 ++++---------------
 test/runtests.jl                              |  2 -
 8 files changed, 143 insertions(+), 55 deletions(-)
 create mode 100644 test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
 create mode 100644 test/integration/reversediff/Project.toml
 create mode 100644 test/integration/reversediff/main.jl

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 906f34e0f..bc1c708f1 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -80,3 +80,37 @@ jobs:
           files: lcov.info
           token: ${{ secrets.CODECOV_TOKEN }}
           fail_ci_if_error: true
+
+  reversediff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: "1"
+
+      - uses: julia-actions/cache@v3
+
+      - name: Run AD with ReverseDiff on demo models
+        working-directory: test/integration/reversediff
+        run: |
+          julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. --color=yes main.jl
+
+  marginallogdensities:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: "1"
+
+      - uses: julia-actions/cache@v3
+
+      - name: Run MarginalLogDensities integration tests
+        working-directory: test/ext/DynamicPPLMarginalLogDensitiesExt
+        run: |
+          julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. --color=yes ../DynamicPPLMarginalLogDensitiesExt.jl
diff --git a/src/transformed_values.jl b/src/transformed_values.jl
index 6063a5417..9210db6a0 100644
--- a/src/transformed_values.jl
+++ b/src/transformed_values.jl
@@ -99,8 +99,9 @@ get_internal_value(tv::TransformedValue) = tv.value
 Create a new `TransformedValue` with the same transformation as `tv`, but with
 internal value `new_val`.
 """
-set_internal_value(tv::TransformedValue, new_val) =
+function set_internal_value(tv::TransformedValue, new_val)
     TransformedValue(new_val, get_transform(tv))
+end
 
 """
     DynamicPPL.get_raw_value(tv::TransformedValue)
@@ -347,6 +348,11 @@ function apply_transform_strategy(
         fwd_transform = inverse(target.transform)
         transformed_value, logjac = with_logabsdet_jacobian(fwd_transform, raw_value)
         transformed_tv = TransformedValue(transformed_value, target)
+        # TODO: Check whether this should return `logjac` rather than
+        # `logjac - inv_logjac`. When `tv` is already `DynamicLink` and the target is a
+        # link-equivalent `FixedTransform`, the accumulator should represent only the
+        # target transform's log-Jacobian. Subtracting the inverse-link Jacobian here may
+        # double-count the link correction.
         (raw_value, transformed_tv, logjac - inv_logjac)
     else
         error("unknown target transform: $target")
diff --git a/test/Project.toml b/test/Project.toml
index 2c635ed19..68ee8b196 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -9,7 +9,6 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -20,19 +19,18 @@ InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
-MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
 AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
 [compat]
@@ -45,7 +43,6 @@ BangBang = "0.4"
 Bijectors = "0.15.17"
 Chairmarks = "1"
 Combinatorics = "1"
-DifferentiationInterface = "0.6.41, 0.7"
 DimensionalData = "0.30"
 Distributions = "0.25"
 Documenter = "1"
@@ -53,10 +50,8 @@ ForwardDiff = "0.10.12, 1"
 InvertedIndices = "1"
 LogDensityProblems = "2"
 MCMCChains = "7.2.1"
-MarginalLogDensities = "0.4"
 Mooncake = "0.4, 0.5"
 OffsetArrays = "1"
 OrderedCollections = "1"
-ReverseDiff = "1"
 StableRNGs = "1"
 julia = "1.10"
diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
new file mode 100644
index 000000000..7a01092a0
--- /dev/null
+++ b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
@@ -0,0 +1,21 @@
+[deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[sources]
+AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
+DynamicPPL = {path = "../../.."}
+
+[compat]
+ADTypes = "1"
+Bijectors = "0.15.17"
+Distributions = "0.25"
+ForwardDiff = "0.10.12, 1"
+MarginalLogDensities = "0.4"
diff --git a/test/integration/reversediff/Project.toml b/test/integration/reversediff/Project.toml
new file mode 100644
index 000000000..d6a6e2204
--- /dev/null
+++ b/test/integration/reversediff/Project.toml
@@ -0,0 +1,19 @@
+[deps]
+ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
+DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[sources]
+AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+DynamicPPL = {path = "../../.."}
+
+[compat]
+DifferentiationInterface = "0.6.41, 0.7"
+ReverseDiff = "1"
diff --git a/test/integration/reversediff/main.jl b/test/integration/reversediff/main.jl
new file mode 100644
index 000000000..44fffbadc
--- /dev/null
+++ b/test/integration/reversediff/main.jl
@@ -0,0 +1,50 @@
+using ADTypes: AutoReverseDiff
+using DifferentiationInterface
+using DynamicPPL
+using DynamicPPL.TestUtils: ALL_MODELS
+using DynamicPPL.TestUtils.AD: run_ad
+using Distributions: Normal
+using ForwardDiff: ForwardDiff  # run_ad uses FD for correctness test
+using LogDensityProblems: LogDensityProblems
+using ReverseDiff: ReverseDiff
+using Test: @test, @testset
+
+ADTYPES = (
+    ("ReverseDiff", AutoReverseDiff(; compile=false)),
+    ("ReverseDiffCompiled", AutoReverseDiff(; compile=true)),
+)
+
+@testset "$ad_key" for (ad_key, ad_type) in ADTYPES
+    @testset "$(model.f)" for model in ALL_MODELS
+        @test run_ad(model, ad_type) isa Any
+    end
+end
+
+@testset "ReverseDiff compiled prep reduces repeated-call allocations" begin
+    @model f() = x ~ Normal()
+    ldf_compiled = LogDensityFunction(
+        f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=true)
+    )
+    ldf_uncompiled = LogDensityFunction(
+        f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=false)
+    )
+    params = rand(ldf_compiled)
+
+    LogDensityProblems.logdensity_and_gradient(ldf_compiled, params)
+    LogDensityProblems.logdensity_and_gradient(ldf_uncompiled, params)
+
+    function repeated_call_allocs(ldf, params)
+        GC.gc()
+        before = Base.gc_num()
+        for _ in 1:100
+            LogDensityProblems.logdensity_and_gradient(ldf, params)
+        end
+        after = Base.gc_num()
+        return Base.GC_Diff(after, before).allocd
+    end
+
+    allocs_compiled = repeated_call_allocs(ldf_compiled, params)
+    allocs_uncompiled = repeated_call_allocs(ldf_uncompiled, params)
+
+    @test allocs_compiled < allocs_uncompiled
+end
diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
index bb210c111..2efc6e32d 100644
--- a/test/logdensityfunction.jl
+++ b/test/logdensityfunction.jl
@@ -14,7 +14,6 @@ using Random: Xoshiro
 using StableRNGs: StableRNG
 
 using ForwardDiff: ForwardDiff
-using ReverseDiff: ReverseDiff
 using Mooncake: Mooncake
 
 @testset "LogDensityFunction: constructors" begin
@@ -193,12 +192,12 @@ end
         struct ErrorAccumulatorException <: Exception end
         struct ErrorAccumulator <: DynamicPPL.AbstractAccumulator end
         DynamicPPL.accumulator_name(::ErrorAccumulator) = :ERROR
-        DynamicPPL.accumulate_assume!!(
-            ::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any
-        ) = throw(ErrorAccumulatorException())
-        DynamicPPL.accumulate_observe!!(
-            ::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any
-        ) = throw(ErrorAccumulatorException())
+        DynamicPPL.accumulate_assume!!(::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any) = throw(
+            ErrorAccumulatorException()
+        )
+        DynamicPPL.accumulate_observe!!(::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any) = throw(
+            ErrorAccumulatorException()
+        )
         DynamicPPL.reset(ea::ErrorAccumulator) = ea
         Base.copy(ea::ErrorAccumulator) = ea
         # Construct an LDF
@@ -457,11 +456,7 @@ end
     # Used as the ground truth that others are compared against.
     ref_adtype = AutoForwardDiff()
 
-    test_adtypes = [
-        AutoReverseDiff(; compile=false),
-        AutoReverseDiff(; compile=true),
-        AutoMooncake(; config=nothing),
-    ]
+    test_adtypes = [AutoForwardDiff(), AutoMooncake(; config=nothing)]
 
     @testset "Correctness" begin
         @testset "$(m.f)" for m in DynamicPPL.TestUtils.ALL_MODELS
@@ -501,36 +496,6 @@ end
         end
     end
 
-    # Compiled ReverseDiff prep should be observable as lower repeated-call allocations.
-    @testset "ReverseDiff compiled prep reduces repeated-call allocations" begin
-        @model f() = x ~ Normal()
-        ldf_compiled = LogDensityFunction(
-            f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=true)
-        )
-        ldf_uncompiled = LogDensityFunction(
-            f(), getlogjoint_internal, LinkAll(); adtype=AutoReverseDiff(; compile=false)
-        )
-        params = rand(ldf_compiled)
-
-        LogDensityProblems.logdensity_and_gradient(ldf_compiled, params)
-        LogDensityProblems.logdensity_and_gradient(ldf_uncompiled, params)
-
-        function repeated_call_allocs(ldf, params)
-            GC.gc()
-            before = Base.gc_num()
-            for _ in 1:100
-                LogDensityProblems.logdensity_and_gradient(ldf, params)
-            end
-            after = Base.gc_num()
-            return Base.GC_Diff(after, before).allocd
-        end
-
-        allocs_compiled = repeated_call_allocs(ldf_compiled, params)
-        allocs_uncompiled = repeated_call_allocs(ldf_uncompiled, params)
-
-        @test allocs_compiled < allocs_uncompiled
-    end
-
     # Test that various different ways of specifying array types as arguments work with all
     # ADTypes.
     @testset "Array argument types" begin
@@ -541,7 +506,7 @@ end
             return LogDensityProblems.logdensity_and_gradient(ldf, m[:])
         end
 
-        @model function scalar_matrix_model(::Type{T}=Float64) where {T<:Real}
+        @model function scalar_matrix_model((::Type{T})=Float64) where {T<:Real}
             m = Matrix{T}(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
@@ -550,14 +515,14 @@ end
             scalar_matrix_model, test_m, ref_adtype
         )
 
-        @model function matrix_model(::Type{T}=Matrix{Float64}) where {T}
+        @model function matrix_model((::Type{T})=Matrix{Float64}) where {T}
             m = T(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
 
         matrix_model_reference = eval_logp_and_grad(matrix_model, test_m, ref_adtype)
 
-        @model function scalar_array_model(::Type{T}=Float64) where {T<:Real}
+        @model function scalar_array_model((::Type{T})=Float64) where {T<:Real}
             m = Array{T}(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
@@ -566,7 +531,7 @@ end
             scalar_array_model, test_m, ref_adtype
         )
 
-        @model function array_model(::Type{T}=Array{Float64}) where {T}
+        @model function array_model((::Type{T})=Array{Float64}) where {T}
             m = T(undef, 2, 3)
             return m ~ filldist(MvNormal(zeros(2), I), 3)
         end
diff --git a/test/runtests.jl b/test/runtests.jl
index edde98144..bdd14521c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,5 +1,4 @@
 using Documenter: Documenter
-using DifferentiationInterface
 using DynamicPPL: DynamicPPL
 using Random: Random
 using Test: @testset, @test_throws
@@ -50,7 +49,6 @@ Random.seed!(100)
         include("transformed_values.jl")
         include("logdensityfunction.jl")
         @testset "extensions" begin
-            include("ext/DynamicPPLMarginalLogDensitiesExt.jl")
             include("ext/DynamicPPLMCMCChainsExt.jl")
         end
         @testset "ad" begin

From 00969e072aa5e93c8b56636c4386c01a267249fc Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Sun, 26 Apr 2026 23:10:50 +0100
Subject: [PATCH 04/41] Fix benchmark dependency sources

---
 benchmarks/Project.toml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 9a2b2afcc..4ff2471e7 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -4,6 +4,8 @@ version = "0.1.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
@@ -18,10 +20,14 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [sources]
+AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
 [compat]
 ADTypes = "1.14.0"
+AbstractPPL = "0.14"
+Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
 Distributions = "0.25.117"
 DynamicPPL = "0.41"

From ad29168cb11fdf85492ab8ca042ed64b878f6770 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Sun, 26 Apr 2026 23:12:06 +0100
Subject: [PATCH 05/41] Format logdensityfunction test

---
 test/logdensityfunction.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
index 2efc6e32d..36bc98ab2 100644
--- a/test/logdensityfunction.jl
+++ b/test/logdensityfunction.jl
@@ -192,12 +192,12 @@ end
         struct ErrorAccumulatorException <: Exception end
         struct ErrorAccumulator <: DynamicPPL.AbstractAccumulator end
         DynamicPPL.accumulator_name(::ErrorAccumulator) = :ERROR
-        DynamicPPL.accumulate_assume!!(::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any) = throw(
-            ErrorAccumulatorException()
-        )
-        DynamicPPL.accumulate_observe!!(::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any) = throw(
-            ErrorAccumulatorException()
-        )
+        DynamicPPL.accumulate_assume!!(
+            ::ErrorAccumulator, ::Any, ::Any, ::Any, ::VarName, ::Distribution, ::Any
+        ) = throw(ErrorAccumulatorException())
+        DynamicPPL.accumulate_observe!!(
+            ::ErrorAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any
+        ) = throw(ErrorAccumulatorException())
         DynamicPPL.reset(ea::ErrorAccumulator) = ea
         Base.copy(ea::ErrorAccumulator) = ea
         # Construct an LDF

From 33f262cd2538bd5234b75ffe3d44a0411e8612ad Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 27 Apr 2026 22:28:51 +0100
Subject: [PATCH 06/41] format

---
 src/transformed_values.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformed_values.jl b/src/transformed_values.jl
index 9210db6a0..f77afa682 100644
--- a/src/transformed_values.jl
+++ b/src/transformed_values.jl
@@ -100,7 +100,7 @@ Create a new `TransformedValue` with the same transformation as `tv`, but with
 internal value `new_val`.
 """
 function set_internal_value(tv::TransformedValue, new_val)
-    TransformedValue(new_val, get_transform(tv))
+    return TransformedValue(new_val, get_transform(tv))
 end
 
 """

From 187bd3d9c693d14ecccff5b20f11978cb3698842 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 29 Apr 2026 22:23:14 +0100
Subject: [PATCH 07/41] Add DifferentiationInterface to benchmarks

---
 benchmarks/Project.toml                | 2 ++
 benchmarks/src/DynamicPPLBenchmarks.jl | 1 +
 2 files changed, 3 insertions(+)

diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 4ff2471e7..25e9b0ac1 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -8,6 +8,7 @@ AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -30,6 +31,7 @@ AbstractPPL = "0.14"
 Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
 Distributions = "0.25.117"
+DifferentiationInterface = "0.7"
 DynamicPPL = "0.41"
 Enzyme = "0.13"
 ForwardDiff = "1"
diff --git a/benchmarks/src/DynamicPPLBenchmarks.jl b/benchmarks/src/DynamicPPLBenchmarks.jl
index f4cc1511e..9bc3f7897 100644
--- a/benchmarks/src/DynamicPPLBenchmarks.jl
+++ b/benchmarks/src/DynamicPPLBenchmarks.jl
@@ -4,6 +4,7 @@ using DynamicPPL: VarInfo, VarName, LinkAll, UnlinkAll
 using DynamicPPL: DynamicPPL
 using DynamicPPL.TestUtils.AD: run_ad, NoTest
 using ADTypes: ADTypes
+using DifferentiationInterface: DifferentiationInterface
 using LogDensityProblems: LogDensityProblems
 
 using ForwardDiff: ForwardDiff

From aa3af76a4d79a736839cbd7c67bcc7b7b5dfeb44 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 4 May 2026 21:57:33 +0100
Subject: [PATCH 08/41] Use AbstractPPL gradient bang API

Update LogDensityFunction gradient evaluation to match the AbstractPPL evaluator interface and make the floattypes environment resolve that source branch explicitly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/logdensityfunction.jl    | 2 +-
 test/floattypes/Project.toml | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index e8b6b7759..0340ae7fe 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -541,7 +541,7 @@ function LogDensityProblems.logdensity_and_gradient(
     # `params` has to be converted to the same vector type that was used for AD preparation,
     # otherwise the preparation will not be valid.
     params = convert(get_input_vector_type(ldf), params)
-    return AbstractPPL.value_and_gradient(ldf._adprep, params)
+    return AbstractPPL.value_and_gradient!!(ldf._adprep, params)
 end
 
 function LogDensityProblems.capabilities(::Type{<:LogDensityFunction{M,Nothing}}) where {M}
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index e47e1ebf4..c8772eed3 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -7,4 +8,5 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
+AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 DynamicPPL = {path = "../.."}

From ecfe04d257cfc103049a9b9daa75e56f24539fe4 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Thu, 30 Apr 2026 09:19:12 +0100
Subject: [PATCH 09/41] Simplify benchmarks: single CI job, absolute
 log-density times

Drops the base-vs-head comparison entirely. The benchmark workflow now
runs once on the PR head, on a pinned `ubuntu-22.04` runner, and reports
absolute log-density times plus gradient/log-density ratios in the
posted comment. Output schema follows Mooncake's bench harness; readers
compare against recent main-branch comments to spot regressions.

Noise reduction in `run_ad`: per-sample incremental GC teardown and a
full GC before each measurement keep accumulated garbage from triggering
mid-sample collections that inflate individual samples. Adds a
`benchmark_seconds` knob for tightening the median estimate. Also
removes the synthetic reference timing that normalised eval times
against a non-DPPL function.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml     | 102 +++--------
 benchmarks/Project.toml                |   4 +-
 benchmarks/README.md                   |   6 +
 benchmarks/benchmarks.jl               | 226 ++++++-------------------
 benchmarks/src/DynamicPPLBenchmarks.jl |  16 +-
 benchmarks/src/Models.jl               |  13 +-
 src/test_utils/ad.jl                   |  29 +++-
 7 files changed, 124 insertions(+), 272 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 48bd72875..4e5433c96 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -4,36 +4,11 @@ on:
   pull_request:
 
 jobs:
-  benchmark-base:
-    runs-on: ubuntu-latest
-    outputs:
-      results: ${{ steps.benchmark.outputs.results }}
-      sha: ${{ steps.benchmark.outputs.sha }}
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          ref: ${{ github.base_ref }}
-      - uses: julia-actions/setup-julia@v3
-        with:
-          version: '1.11'
-      - uses: julia-actions/cache@v3
-
-      - name: Run benchmarks
-        id: benchmark
-        working-directory: ./benchmarks
-        run: |
-          # github output can't handle more than 1 line, hence the tail
-          julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
-          echo $results
-          echo "results=$results" >> "$GITHUB_OUTPUT"
-          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
-
-  benchmark-head:
-    runs-on: ubuntu-latest
-    outputs:
-      results: ${{ steps.benchmark.outputs.results }}
-      sha: ${{ steps.benchmark.outputs.sha }}
+  benchmark:
+    # Pinned (rather than `ubuntu-latest`) so that successive runs land on the
+    # same VM family. GitHub silently rotates `latest`, which changes the noise
+    # floor between runs and makes timings hard to compare across PRs.
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v6
         with:
@@ -44,57 +19,26 @@ jobs:
       - uses: julia-actions/cache@v3
 
       - name: Run benchmarks
-        id: benchmark
         working-directory: ./benchmarks
         run: |
-          # github output can't handle more than 1 line, hence the tail
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
-          echo $results
-          echo "results=$results" >> "$GITHUB_OUTPUT"
-          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
-
-  combine-results:
-    runs-on: ubuntu-latest
-    needs: [benchmark-base, benchmark-head]
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-      - uses: julia-actions/setup-julia@v3
-        with:
-          version: '1.11'
-      - uses: julia-actions/cache@v3
 
-      - name: Combine benchmark results
-        working-directory: ./benchmarks
-        run: |
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
-          echo "$version_info"
-          echo "VERSION_INFO<<EOF" >> $GITHUB_ENV
-          echo "$version_info" >> $GITHUB_ENV
-          echo "EOF" >> $GITHUB_ENV
 
-          # save outputs of previous jobs to json file
-          echo "Base results"
-          echo "--------------------------------------------------------"
-          echo '${{needs.benchmark-base.outputs.results}}'
-          echo '${{needs.benchmark-base.outputs.results}}' > base.json
-          echo "Head results"
-          echo "--------------------------------------------------------"
-          echo '${{needs.benchmark-head.outputs.results}}'
-          echo '${{needs.benchmark-head.outputs.results}}' > head.json
-
-          # combine them and save the output as an env var for later steps
-          julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          results=$(julia --project=. benchmarks.jl combine head.json base.json)
-          echo "Combined results"
-          echo "--------------------------------------------------------"
-          echo "$results"
-
-          echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          echo "$results" >> $GITHUB_ENV
-          echo "EOF" >> $GITHUB_ENV
+          # Capture the markdown-mode benchmark output. The `tee` keeps it in
+          # the workflow log too, so a failure during comment posting does not
+          # lose the numbers.
+          results_file=$(mktemp)
+          julia --project=. benchmarks.jl markdown | tee "$results_file"
+
+          {
+            echo "VERSION_INFO<<DPPL_BENCH_EOF"
+            echo "$version_info"
+            echo "DPPL_BENCH_EOF"
+            echo "BENCHMARK_OUTPUT<<DPPL_BENCH_EOF"
+            cat "$results_file"
+            echo "DPPL_BENCH_EOF"
+          } >> "$GITHUB_ENV"
 
       - name: Find existing benchmark comment
         uses: peter-evans/find-comment@v4
@@ -102,6 +46,7 @@ jobs:
         with:
           issue-number: ${{ github.event.pull_request.number }}
           comment-author: github-actions[bot]
+          body-includes: Benchmark Report
 
       - name: Create or update benchmark comment
         uses: peter-evans/create-or-update-comment@v5
@@ -110,8 +55,11 @@ jobs:
           body: |
             ## Benchmark Report
 
-            - this PR's head: `${{ needs.benchmark-head.outputs.sha }}`
-            - base branch: `${{ needs.benchmark-base.outputs.sha }}`
+            - this PR's head: `${{ github.event.pull_request.head.sha }}`
+
+            Absolute log-density times and grad/log-density ratios are
+            reported. To judge whether a PR helps or hurts, compare against
+            the latest comment on a recent main-branch PR run.
 
             ### Computer Information
             ```
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 6adb27efa..7481755dd 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -9,7 +9,6 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
@@ -18,7 +17,7 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [sources]
-DynamicPPL = {path = "../"}
+DynamicPPL = {path = ".."}
 
 [compat]
 ADTypes = "1.14.0"
@@ -27,7 +26,6 @@ Distributions = "0.25.117"
 DynamicPPL = "0.41"
 Enzyme = "0.13"
 ForwardDiff = "1"
-JSON = "1.3.0"
 LogDensityProblems = "2.1.2"
 Mooncake = "0.4, 0.5"
 PrettyTables = "3"
diff --git a/benchmarks/README.md b/benchmarks/README.md
index ad70b7c03..4c94c3a98 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -3,3 +3,9 @@ To run the benchmarks locally, run this from the root directory of the repositor
 ```sh
 julia --project=benchmarks benchmarks/benchmarks.jl
 ```
+
+This prints absolute log-density times and gradient/log-density ratios for a
+fixed set of model × AD-backend combinations. Run on each PR by the
+`Benchmarking` CI workflow, which posts the resulting table as a comment.
+There is no base-vs-head comparison: judge regressions by comparing against
+the most recent main-branch run in the comment history.
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 5be32fdef..f6289c8be 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -1,40 +1,48 @@
-using Pkg
-
-using Chairmarks: @be, median
+using Chairmarks: median
 using DynamicPPLBenchmarks: Models, benchmark, model_dimension
-using JSON: JSON
-using PrettyTables: pretty_table, fmt__printf, EmptyCells, MultiColumn, TextTableFormat
+using PrettyTables: pretty_table
 using Printf: @sprintf
 using StableRNGs: StableRNG
 
 rng = StableRNG(23)
 
-colnames = ["Model", "Dim", "AD Backend", "Linked", "t(eval)/t(ref)", "t(grad)/t(eval)"]
-function print_results(results_table; to_json=false)
-    if to_json
-        # Print to the given file as JSON
-        results_array = [
-            Dict(colnames[i] => results_table[j][i] for i in eachindex(colnames)) for
-            j in eachindex(results_table)
-        ]
-        # do not use pretty=true, as GitHub Actions expects no linebreaks
-        JSON.json(stdout, results_array)
-        println()
-    else
-        # Pretty-print to terminal
-        table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
-        return pretty_table(
-            table_matrix;
-            column_labels=colnames,
-            backend=:text,
-            formatters=[fmt__printf("%.1f", [6, 7])],
-            fit_table_in_display_horizontally=false,
-            fit_table_in_display_vertically=false,
-        )
+# Schema follows Mooncake's bench output: absolute log-density time plus the
+# gradient/log-density ratio. We deliberately do not compare against the base
+# branch — readers eyeball regressions across the PR-comment history instead.
+# Cf. https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
+const COLNAMES = [
+    "Model", "Dim", "AD Backend", "Linked", "t(logdensity)", "t(grad)/t(logdensity)"
+]
+
+# Adapted from Mooncake's bench harness.
+fix_sig_fig(t) = string(round(t; sigdigits=3))
+function format_time(t::Float64)
+    t < 1e-6 && return fix_sig_fig(t * 1e9) * " ns"
+    t < 1e-3 && return fix_sig_fig(t * 1e6) * " μs"
+    t < 1 && return fix_sig_fig(t * 1e3) * " ms"
+    return fix_sig_fig(t) * " s"
+end
+format_time(::Missing) = "err"
+
+format_ratio(x::Float64) = @sprintf("%.2f", x)
+format_ratio(::Missing) = "err"
+
+function print_results(results_table)
+    isempty(results_table) && return println("No benchmark results obtained.")
+    display_rows = map(results_table) do row
+        (row[1], row[2], row[3], row[4], format_time(row[5]), format_ratio(row[6]))
     end
+    table_matrix = hcat(Iterators.map(collect, zip(display_rows...))...)
+    return pretty_table(
+        table_matrix;
+        column_labels=COLNAMES,
+        backend=:text,
+        fit_table_in_display_horizontally=false,
+        fit_table_in_display_vertically=false,
+    )
 end
 
-function run(; to_json=false)
+function run(; markdown::Bool=false)
     # Create DynamicPPL.Model instances to run benchmarks on.
     smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
     loop_univariate1k, multivariate1k = begin
@@ -55,7 +63,6 @@ function run(; to_json=false)
         Models.lda(2, d, w)
     end
 
-    # Specify the combinations to test:
     # (Model Name, model instance, AD backend, linked)
     chosen_combinations = [
         (
@@ -78,26 +85,17 @@ function run(; to_json=false)
         ("LDA", lda_instance, :reversediff, true),
     ]
 
-    # Time running a model-like function that does not use DynamicPPL, as a reference point.
-    # Eval timings will be relative to this.
-    reference_time = begin
-        obs = randn(rng)
-        median(@be Models.simple_assume_observe_non_model(obs)).time
-    end
-    @info "Reference evaluation time: $(reference_time) seconds"
-
     results_table = Tuple{
         String,Int,String,Bool,Union{Float64,Missing},Union{Float64,Missing}
     }[]
 
     for (model_name, model, adbackend, islinked) in chosen_combinations
         @info "Running benchmark for $model_name, $adbackend, $islinked"
-        relative_eval_time, relative_ad_eval_time = try
+        logdensity_time, grad_over_logdensity = try
             results = benchmark(model, adbackend, islinked)
-            @info " t(eval) = $(results.primal_time)"
-            @info " t(grad) = $(results.grad_time)"
-            (results.primal_time / reference_time),
-            (results.grad_time / results.primal_time)
+            @info " t(logdensity) = $(results.primal_time)"
+            @info " t(grad)       = $(results.grad_time)"
+            (results.primal_time, results.grad_time / results.primal_time)
         catch e
             @info "benchmark errored: $e"
             missing, missing
@@ -109,144 +107,26 @@ function run(; to_json=false)
                 model_dimension(model, islinked),
                 string(adbackend),
                 islinked,
-                relative_eval_time,
-                relative_ad_eval_time,
+                logdensity_time,
+                grad_over_logdensity,
             ),
         )
-        print_results(results_table; to_json=to_json)
     end
-    print_results(results_table; to_json=to_json)
-    return nothing
-end
 
-struct TestCase
-    model_name::String
-    dim::Integer
-    ad_backend::String
-    linked::Bool
-    TestCase(d::Dict{String,Any}) = new((d[c] for c in colnames[1:4])...)
-end
-function combine(head_filename::String, base_filename::String)
-    head_results = try
-        JSON.parsefile(head_filename, Vector{Dict{String,Any}})
-    catch
-        Dict{String,Any}[]
-    end
-    @info "Loaded $(length(head_results)) results from $head_filename"
-    base_results = try
-        JSON.parsefile(base_filename, Vector{Dict{String,Any}})
-    catch
-        Dict{String,Any}[]
-    end
-    @info "Loaded $(length(base_results)) results from $base_filename"
-    # Identify unique combinations of (Model, Dim, AD Backend, Linked)
-    head_testcases = Dict(
-        TestCase(d) => (d[colnames[5]], d[colnames[6]]) for d in head_results
-    )
-    base_testcases = Dict(
-        TestCase(d) => (d[colnames[5]], d[colnames[6]]) for d in base_results
-    )
-    all_testcases = union(Set(keys(head_testcases)), Set(keys(base_testcases)))
-    @info "$(length(all_testcases)) unique test cases found"
-    sorted_testcases = sort(
-        collect(all_testcases); by=(c -> (c.model_name, c.linked, c.ad_backend))
-    )
-    results_table = Tuple{
-        String,
-        Int,
-        String,
-        Bool,
-        String,
-        String,
-        String,
-        String,
-        String,
-        String,
-        String,
-        String,
-        String,
-    }[]
-    sublabels = ["base", "this PR", "speedup"]
-    results_colnames = [
-        [
-            EmptyCells(4),
-            MultiColumn(3, "t(eval) / t(ref)"),
-            MultiColumn(3, "t(grad) / t(eval)"),
-            MultiColumn(3, "t(grad) / t(ref)"),
-        ],
-        [colnames[1:4]..., sublabels..., sublabels..., sublabels...],
-    ]
-    sprint_float(x::Float64) = @sprintf("%.2f", x)
-    sprint_float(m::Missing) = "err"
-    for c in sorted_testcases
-        head_eval, head_grad = get(head_testcases, c, (missing, missing))
-        base_eval, base_grad = get(base_testcases, c, (missing, missing))
-        # If the benchmark errored, it will return `missing` in the `run()` function above.
-        # The issue with this is that JSON serialisation converts it to `null`, and then
-        # when reading back from JSON, it becomes `nothing` instead of `missing`!
-        head_eval = head_eval === nothing ? missing : head_eval
-        head_grad = head_grad === nothing ? missing : head_grad
-        base_eval = base_eval === nothing ? missing : base_eval
-        base_grad = base_grad === nothing ? missing : base_grad
-        # Finally that lets us do this division safely
-        speedup_eval = base_eval / head_eval
-        speedup_grad = base_grad / head_grad
-        # As well as this multiplication, which is t(grad) / t(ref)
-        head_grad_vs_ref = head_grad * head_eval
-        base_grad_vs_ref = base_grad * base_eval
-        speedup_grad_vs_ref = base_grad_vs_ref / head_grad_vs_ref
-        push!(
-            results_table,
-            (
-                c.model_name,
-                c.dim,
-                c.ad_backend,
-                c.linked,
-                sprint_float(base_eval),
-                sprint_float(head_eval),
-                sprint_float(speedup_eval),
-                sprint_float(base_grad),
-                sprint_float(head_grad),
-                sprint_float(speedup_grad),
-                sprint_float(base_grad_vs_ref),
-                sprint_float(head_grad_vs_ref),
-                sprint_float(speedup_grad_vs_ref),
-            ),
-        )
-    end
-    # Pretty-print to terminal
-    if isempty(results_table)
-        println("No benchmark results obtained.")
-    else
-        table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
-        println("```")
-        pretty_table(
-            table_matrix;
-            column_labels=results_colnames,
-            backend=:text,
-            fit_table_in_display_horizontally=false,
-            fit_table_in_display_vertically=false,
-            table_format=TextTableFormat(;
-                horizontal_line_at_merged_column_labels=true,
-                horizontal_lines_at_data_rows=collect(3:3:length(results_table)),
-            ),
-        )
-        println("```")
-    end
+    # Markdown mode wraps the text table in a fenced block so it renders
+    # monospaced when posted as a PR comment.
+    markdown && println("```")
+    print_results(results_table)
+    markdown && println("```")
+    return nothing
 end
 
-# The command-line arguments are used on CI purposes.
-# Run with `julia --project=. benchmarks.jl json` to run benchmarks and output JSON to
-# stdout
-# Run with `julia --project=. benchmarks.jl combine head.json base.json` to combine two JSON
-# files
-if length(ARGS) == 3 && ARGS[1] == "combine"
-    combine(ARGS[2], ARGS[3])
-elseif ARGS == ["json"]
-    run(; to_json=true)
+# Run with `julia --project=. benchmarks.jl markdown` to emit a fenced text
+# table to stdout, suitable for pasting into a PR comment. Run with no
+# arguments to pretty-print to the terminal.
+if ARGS == ["markdown"]
+    run(; markdown=true)
 elseif ARGS == []
-    # When running locally just omit the argument and it will just benchmark and print to
-    # terminal.
     run()
 else
     error("invalid arguments: $(ARGS)")
diff --git a/benchmarks/src/DynamicPPLBenchmarks.jl b/benchmarks/src/DynamicPPLBenchmarks.jl
index f4cc1511e..cbeb951cc 100644
--- a/benchmarks/src/DynamicPPLBenchmarks.jl
+++ b/benchmarks/src/DynamicPPLBenchmarks.jl
@@ -53,22 +53,28 @@ function to_backend(x::Union{AbstractString,Symbol})
 end
 
 """
-    benchmark(model, adbackend::Symbol, islinked::Bool)
+    benchmark(model, adbackend::Symbol, islinked::Bool; seconds::Real=2)
 
-Benchmark evaluation and gradient calculation for `model` using the selected AD backend.
+Benchmark log-density evaluation and gradient calculation for `model` using the
+selected AD backend.
 
-The AD backend should be specified as a Symbol (e.g. `:forwarddiff`, `:reversediff`, `:zygote`).
+`adbackend` is a Symbol key into `SYMBOL_TO_BACKEND` (e.g. `:forwarddiff`,
+`:reversediff`, `:reversediff_compiled`, `:mooncake`, `:enzyme`).
 
 `islinked` determines whether to link the VarInfo for evaluation.
+
+`seconds` is the per-measurement time budget passed to Chairmarks; the default
+doubles Chairmarks' own default to tighten the median estimate.
 """
-function benchmark(model, adbackend::Symbol, islinked::Bool)
+function benchmark(model, adbackend::Symbol, islinked::Bool; seconds::Real=2)
     transform_strategy = islinked ? LinkAll() : UnlinkAll()
     return run_ad(
         model,
         to_backend(adbackend);
         rng=StableRNG(23),
-        transform_strategy=transform_strategy,
+        transform_strategy,
         benchmark=true,
+        benchmark_seconds=seconds,
         test=NoTest(),
         verbose=false,
     )
diff --git a/benchmarks/src/Models.jl b/benchmarks/src/Models.jl
index 76d4b2e93..4fa386f6a 100644
--- a/benchmarks/src/Models.jl
+++ b/benchmarks/src/Models.jl
@@ -20,17 +20,8 @@ using Distributions:
 using DynamicPPL: DynamicPPL, @model, to_submodel
 using LinearAlgebra: cholesky
 
-export simple_assume_observe_non_model,
-    simple_assume_observe, smorgasbord, loop_univariate, multivariate, parent, dynamic, lda
-
-# This one is like simple_assume_observe, but explicitly does not use DynamicPPL.
-# Other runtimes are normalised by this one's runtime.
-function simple_assume_observe_non_model(obs)
-    x = rand(Normal())
-    logp = logpdf(Normal(), x)
-    logp += logpdf(Normal(x, 1), obs)
-    return (; logp=logp, x=x)
-end
+export simple_assume_observe,
+    smorgasbord, loop_univariate, multivariate, parent, dynamic, lda
 
 """
 A simple model that does one scalar assumption and one scalar observation.
diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
index 40336d4fe..349c61e4b 100644
--- a/src/test_utils/ad.jl
+++ b/src/test_utils/ad.jl
@@ -293,7 +293,10 @@ Everything else is optional, and can be categorised into several groups:
    When enabled, the time taken to evaluate logp as well as its gradient is
    measured using Chairmarks.jl, and the `ADResult` object returned will
    contain `grad_time` and `primal_time` fields with the median times (in
-   seconds).
+   seconds). The `benchmark_seconds` keyword (default `1`) sets the time
+   budget passed to Chairmarks for each of the two measurements; raising it
+   collects more samples and yields a tighter median estimate at the cost
+   of a longer run.
 
 1. _Whether to output extra logging information._
 
@@ -314,6 +317,7 @@ function run_ad(
     adtype::AbstractADType;
     test::Union{AbstractADCorrectnessTestSetting,Bool}=WithBackend(),
     benchmark::Bool=false,
+    benchmark_seconds::Real=1,
     atol::AbstractFloat=100 * eps(),
     rtol::AbstractFloat=sqrt(eps()),
     getlogdensity::Function=getlogjoint_internal,
@@ -370,15 +374,34 @@ function run_ad(
 
     # Benchmark
     grad_time, primal_time = if benchmark
+        # Per-sample incremental GC keeps accumulated garbage from triggering a
+        # full collection mid-sample, which would inflate that sample several-
+        # fold. Auto-tuned `evals` (not pinned to 1) batches enough calls per
+        # sample that fast log-densities clear `time_ns`'s real precision floor
+        # (tens of ns on Linux/macOS) instead of reading as zero. Pattern
+        # borrowed from Mooncake's bench harness:
+        # https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
         logdensity(ldf, params)  # Warm-up
-        primal_benchmark = @be logdensity($ldf, $params)
+        GC.gc(true)
+        primal_benchmark = @be(
+            _,
+            logdensity($ldf, $params),
+            _ -> GC.gc(false),
+            seconds = benchmark_seconds,
+        )
         if verbose
             print("   evaluation : ")
             show(stdout, MIME("text/plain"), median(primal_benchmark))
             println()
         end
         logdensity_and_gradient(ldf, params)  # Warm-up
-        grad_benchmark = @be logdensity_and_gradient($ldf, $params)
+        GC.gc(true)
+        grad_benchmark = @be(
+            _,
+            logdensity_and_gradient($ldf, $params),
+            _ -> GC.gc(false),
+            seconds = benchmark_seconds,
+        )
         if verbose
             print("     gradient : ")
             show(stdout, MIME("text/plain"), median(grad_benchmark))

From d89c1e66b7b63c6b65d87f2fe88a22df10b9a0e2 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 4 May 2026 22:27:03 +0100
Subject: [PATCH 10/41] Collapse benchmarks/src into a single script

Inline `Models.jl` and `DynamicPPLBenchmarks.jl` into `benchmarks.jl` and
convert `benchmarks/Project.toml` from a package to a flat environment,
mirroring Mooncake.jl's `bench/run_benchmarks.jl` layout.

Also: take dim from `length(r.params)` (run_ad already constructed the LDF)
so models are no longer evaluated twice on the success path; switch results
to NamedTuples so `print_results` reads `r.name`/`r.dim`/...; extract
`transform_strategy(islinked)` helper; drop unused imports.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/Project.toml                |   5 +-
 benchmarks/README.md                   |  11 +-
 benchmarks/benchmarks.jl               | 314 ++++++++++++++++++-------
 benchmarks/src/DynamicPPLBenchmarks.jl |  83 -------
 benchmarks/src/Models.jl               | 147 ------------
 5 files changed, 230 insertions(+), 330 deletions(-)
 delete mode 100644 benchmarks/src/DynamicPPLBenchmarks.jl
 delete mode 100644 benchmarks/src/Models.jl

diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 7481755dd..1440ba40e 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -1,7 +1,3 @@
-name = "DynamicPPLBenchmarks"
-uuid = "d94a1522-c11e-44a7-981a-42bf5dc1a001"
-version = "0.1.0"
-
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
@@ -13,6 +9,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 4c94c3a98..67a2cca43 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,11 +1,10 @@
-To run the benchmarks locally, run this from the root directory of the repository:
+Run from the repository root:
 
 ```sh
+julia --project=benchmarks -e 'using Pkg; Pkg.instantiate()'
 julia --project=benchmarks benchmarks/benchmarks.jl
 ```
 
-This prints absolute log-density times and gradient/log-density ratios for a
-fixed set of model × AD-backend combinations. Run on each PR by the
-`Benchmarking` CI workflow, which posts the resulting table as a comment.
-There is no base-vs-head comparison: judge regressions by comparing against
-the most recent main-branch run in the comment history.
+The `Benchmarking` CI workflow runs this on each PR and posts the table as a
+comment. There is no base-vs-head comparison: judge regressions by comparing
+against the most recent main-branch run in the comment history.
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index f6289c8be..49f13b712 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -1,20 +1,184 @@
-using Chairmarks: median
-using DynamicPPLBenchmarks: Models, benchmark, model_dimension
+using ADTypes: ADTypes
+using Distributions:
+    Categorical,
+    Dirichlet,
+    Exponential,
+    Gamma,
+    InverseWishart,
+    LKJCholesky,
+    Normal,
+    product_distribution,
+    truncated
+using DynamicPPL: DynamicPPL, @model, to_submodel, VarInfo, LinkAll, UnlinkAll
+using DynamicPPL.TestUtils.AD: run_ad, NoTest
+using Enzyme: Enzyme
+using ForwardDiff: ForwardDiff
+using LinearAlgebra: cholesky
+using Mooncake: Mooncake
 using PrettyTables: pretty_table
 using Printf: @sprintf
+using ReverseDiff: ReverseDiff
 using StableRNGs: StableRNG
 
-rng = StableRNG(23)
+#
+#  Models
+#
 
-# Schema follows Mooncake's bench output: absolute log-density time plus the
-# gradient/log-density ratio. We deliberately do not compare against the base
-# branch — readers eyeball regressions across the PR-comment history instead.
-# Cf. https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
+"One scalar assumption, one scalar observation."
+@model function simple_assume_observe(obs)
+    x ~ Normal()
+    obs ~ Normal(x, 1)
+    return (; x=x)
+end
+
+"""
+Covers many DynamicPPL features: scalar/vector/multivariate variables, `~`,
+`.~`, loops, allocated vectors, and observations as both arguments and literals.
+"""
+@model function smorgasbord(x, y, ::Type{TV}=Vector{Float64}) where {TV}
+    @assert length(x) == length(y)
+    m ~ truncated(Normal(); lower=0)
+    means ~ product_distribution(fill(Exponential(m), length(x)))
+    stds = TV(undef, length(x))
+    stds .~ Gamma(1, 1)
+    for i in 1:length(x)
+        x[i] ~ Normal(means[i], stds[i])
+    end
+    y ~ product_distribution(map((mean, std) -> Normal(mean, std), means, stds))
+    0.0 ~ Normal(sum(y), 1)
+    return (; m=m, means=means, stds=stds)
+end
+
+"`num_dims` univariate normals via a loop. Condition on `o` after instantiation."
+@model function loop_univariate(num_dims, ::Type{TV}=Vector{Float64}) where {TV}
+    a = TV(undef, num_dims)
+    o = TV(undef, num_dims)
+    for i in 1:num_dims
+        a[i] ~ Normal(0, 1)
+    end
+    m = sum(a)
+    for i in 1:num_dims
+        o[i] ~ Normal(m, 1)
+    end
+    return (; a=a)
+end
+
+"As `loop_univariate`, but using `product_distribution` instead of loops."
+@model function multivariate(num_dims, ::Type{TV}=Vector{Float64}) where {TV}
+    a = TV(undef, num_dims)
+    o = TV(undef, num_dims)
+    a ~ product_distribution(fill(Normal(0, 1), num_dims))
+    m = sum(a)
+    o ~ product_distribution(fill(Normal(m, 1), num_dims))
+    return (; a=a)
+end
+
+@model function _sub()
+    x ~ Normal()
+    return x
+end
+
+"As `simple_assume_observe`, but with the assumed RV inside a submodel."
+@model function parent(obs)
+    x ~ to_submodel(_sub())
+    obs ~ Normal(x, 1)
+    return (; x=x)
+end
+
+"Variables whose support varies under linking, or otherwise nontrivial bijectors."
+@model function dynamic(::Type{T}=Vector{Float64}) where {T}
+    eta ~ truncated(Normal(); lower=0.0, upper=0.1)
+    mat1 ~ LKJCholesky(4, eta)
+    mat2 ~ InverseWishart(3.2, cholesky([1.0 0.5; 0.5 1.0]))
+    return (; eta=eta, mat1=mat1, mat2=mat2)
+end
+
+"Linear Discriminant Analysis."
+@model function lda(K, d, w)
+    V = length(unique(w))
+    D = length(unique(d))
+    N = length(d)
+    @assert length(w) == N
+
+    ϕ = Vector{Vector{Real}}(undef, K)
+    for i in 1:K
+        ϕ[i] ~ Dirichlet(ones(V) / V)
+    end
+
+    θ = Vector{Vector{Real}}(undef, D)
+    for i in 1:D
+        θ[i] ~ Dirichlet(ones(K) / K)
+    end
+
+    z = zeros(Int, N)
+    for i in 1:N
+        z[i] ~ Categorical(θ[d[i]])
+        w[i] ~ Categorical(ϕ[d[i]])
+    end
+    return (; ϕ=ϕ, θ=θ, z=z)
+end
+
+#
+#  Benchmark harness
+#
+
+# Copied from TuringBenchmarking.jl.
+const SYMBOL_TO_BACKEND = Dict(
+    :forwarddiff => ADTypes.AutoForwardDiff(),
+    :reversediff => ADTypes.AutoReverseDiff(; compile=false),
+    :reversediff_compiled => ADTypes.AutoReverseDiff(; compile=true),
+    :mooncake => ADTypes.AutoMooncake(; config=nothing),
+    :enzyme => ADTypes.AutoEnzyme(;
+        mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
+        function_annotation=Enzyme.Const,
+    ),
+)
+
+transform_strategy(islinked) = islinked ? LinkAll() : UnlinkAll()
+
+"Dimension of `model`, accounting for linking. Used as a fallback when `benchmark` errors."
+function model_dimension(model, islinked)
+    vi = last(
+        DynamicPPL.init!!(
+            StableRNG(23),
+            model,
+            VarInfo(),
+            DynamicPPL.InitFromPrior(),
+            transform_strategy(islinked),
+        ),
+    )
+    return length(vi[:])
+end
+
+"""
+    benchmark(model, adbackend, islinked; seconds=2)
+
+Time log-density and gradient evaluation for `model` with the given AD backend.
+`seconds` is Chairmarks' per-measurement budget (doubled from its default to
+tighten the median estimate).
+"""
+function benchmark(model, adbackend::Symbol, islinked::Bool; seconds::Real=2)
+    return run_ad(
+        model,
+        SYMBOL_TO_BACKEND[adbackend];
+        rng=StableRNG(23),
+        transform_strategy=transform_strategy(islinked),
+        benchmark=true,
+        benchmark_seconds=seconds,
+        test=NoTest(),
+        verbose=false,
+    )
+end
+
+#
+#  Reporting
+#
+
+# https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
 const COLNAMES = [
     "Model", "Dim", "AD Backend", "Linked", "t(logdensity)", "t(grad)/t(logdensity)"
 ]
 
-# Adapted from Mooncake's bench harness.
 fix_sig_fig(t) = string(round(t; sigdigits=3))
 function format_time(t::Float64)
     t < 1e-6 && return fix_sig_fig(t * 1e9) * " ns"
@@ -27,14 +191,14 @@ format_time(::Missing) = "err"
 format_ratio(x::Float64) = @sprintf("%.2f", x)
 format_ratio(::Missing) = "err"
 
-function print_results(results_table)
-    isempty(results_table) && return println("No benchmark results obtained.")
-    display_rows = map(results_table) do row
-        (row[1], row[2], row[3], row[4], format_time(row[5]), format_ratio(row[6]))
+function print_results(results)
+    isempty(results) && return println("No benchmark results obtained.")
+    rows = map(results) do r
+        (r.name, r.dim, r.adbackend, r.islinked, format_time(r.t_logd), format_ratio(r.ratio))
     end
-    table_matrix = hcat(Iterators.map(collect, zip(display_rows...))...)
+    matrix = hcat(Iterators.map(collect, zip(rows...))...)
     return pretty_table(
-        table_matrix;
+        matrix;
         column_labels=COLNAMES,
         backend=:text,
         fit_table_in_display_horizontally=false,
@@ -42,92 +206,62 @@ function print_results(results_table)
     )
 end
 
-function run(; markdown::Bool=false)
-    # Create DynamicPPL.Model instances to run benchmarks on.
-    smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
-    loop_univariate1k, multivariate1k = begin
-        data_1k = randn(rng, 1_000)
-        loop = Models.loop_univariate(length(data_1k)) | (; o=data_1k)
-        multi = Models.multivariate(length(data_1k)) | (; o=data_1k)
-        loop, multi
-    end
-    loop_univariate10k, multivariate10k = begin
-        data_10k = randn(rng, 10_000)
-        loop = Models.loop_univariate(length(data_10k)) | (; o=data_10k)
-        multi = Models.multivariate(length(data_10k)) | (; o=data_10k)
-        loop, multi
-    end
-    lda_instance = begin
-        w = [1, 2, 3, 2, 1, 1]
-        d = [1, 1, 1, 2, 2, 2]
-        Models.lda(2, d, w)
-    end
+#
+#  Main
+#
 
-    # (Model Name, model instance, AD backend, linked)
-    chosen_combinations = [
-        (
-            "Simple assume observe",
-            Models.simple_assume_observe(randn(rng)),
-            :forwarddiff,
-            false,
-        ),
-        ("Smorgasbord", smorgasbord_instance, :forwarddiff, false),
-        ("Smorgasbord", smorgasbord_instance, :forwarddiff, true),
-        ("Smorgasbord", smorgasbord_instance, :reversediff, true),
-        ("Smorgasbord", smorgasbord_instance, :mooncake, true),
-        ("Smorgasbord", smorgasbord_instance, :enzyme, true),
-        ("Loop univariate 1k", loop_univariate1k, :mooncake, true),
-        ("Multivariate 1k", multivariate1k, :mooncake, true),
-        ("Loop univariate 10k", loop_univariate10k, :mooncake, true),
-        ("Multivariate 10k", multivariate10k, :mooncake, true),
-        ("Dynamic", Models.dynamic(), :mooncake, true),
-        ("Submodel", Models.parent(randn(rng)), :mooncake, true),
-        ("LDA", lda_instance, :reversediff, true),
+function build_combinations(rng)
+    smorg = smorgasbord(randn(rng, 100), randn(rng, 100))
+    combos = [
+        ("Simple assume observe", simple_assume_observe(randn(rng)), :forwarddiff, false),
+        ("Smorgasbord", smorg, :forwarddiff, false),
+        ("Smorgasbord", smorg, :forwarddiff, true),
+        ("Smorgasbord", smorg, :reversediff, true),
+        ("Smorgasbord", smorg, :mooncake, true),
+        ("Smorgasbord", smorg, :enzyme, true),
     ]
+    for n in (1_000, 10_000)
+        data = randn(rng, n)
+        loop = loop_univariate(n) | (; o=data)
+        multi = multivariate(n) | (; o=data)
+        push!(combos, ("Loop univariate $(n ÷ 1_000)k", loop, :mooncake, true))
+        push!(combos, ("Multivariate $(n ÷ 1_000)k", multi, :mooncake, true))
+    end
+    lda_inst = lda(2, [1, 1, 1, 2, 2, 2], [1, 2, 3, 2, 1, 1])
+    push!(combos, ("Dynamic", dynamic(), :mooncake, true))
+    push!(combos, ("Submodel", parent(randn(rng)), :mooncake, true))
+    push!(combos, ("LDA", lda_inst, :reversediff, true))
+    return combos
+end
 
-    results_table = Tuple{
-        String,Int,String,Bool,Union{Float64,Missing},Union{Float64,Missing}
-    }[]
-
-    for (model_name, model, adbackend, islinked) in chosen_combinations
-        @info "Running benchmark for $model_name, $adbackend, $islinked"
-        logdensity_time, grad_over_logdensity = try
-            results = benchmark(model, adbackend, islinked)
-            @info " t(logdensity) = $(results.primal_time)"
-            @info " t(grad)       = $(results.grad_time)"
-            (results.primal_time, results.grad_time / results.primal_time)
+function run(; markdown::Bool=false)
+    combinations = build_combinations(StableRNG(23))
+    results = []
+    for (name, model, adbackend, islinked) in combinations
+        @info "Running benchmark for $name, $adbackend, $islinked"
+        dim, t_logd, ratio = try
+            r = benchmark(model, adbackend, islinked)
+            @info " t(logdensity) = $(r.primal_time)"
+            @info " t(grad)       = $(r.grad_time)"
+            (length(r.params), r.primal_time, r.grad_time / r.primal_time)
         catch e
             @info "benchmark errored: $e"
-            missing, missing
+            (model_dimension(model, islinked), missing, missing)
         end
-        push!(
-            results_table,
-            (
-                model_name,
-                model_dimension(model, islinked),
-                string(adbackend),
-                islinked,
-                logdensity_time,
-                grad_over_logdensity,
-            ),
-        )
+        push!(results, (; name, dim, adbackend=string(adbackend), islinked, t_logd, ratio))
     end
-
-    # Markdown mode wraps the text table in a fenced block so it renders
-    # monospaced when posted as a PR comment.
     markdown && println("```")
-    print_results(results_table)
+    print_results(results)
     markdown && println("```")
     return nothing
 end
 
-# Run with `julia --project=. benchmarks.jl markdown` to emit a fenced text
-# table to stdout, suitable for pasting into a PR comment. Run with no
-# arguments to pretty-print to the terminal.
-if ARGS == ["markdown"]
-    run(; markdown=true)
-elseif ARGS == []
-    run()
-else
-    error("invalid arguments: $(ARGS)")
+if abspath(PROGRAM_FILE) == @__FILE__
+    if ARGS == ["markdown"]
+        run(; markdown=true)
+    elseif ARGS == []
+        run()
+    else
+        error("invalid arguments: $(ARGS)")
+    end
 end
diff --git a/benchmarks/src/DynamicPPLBenchmarks.jl b/benchmarks/src/DynamicPPLBenchmarks.jl
deleted file mode 100644
index cbeb951cc..000000000
--- a/benchmarks/src/DynamicPPLBenchmarks.jl
+++ /dev/null
@@ -1,83 +0,0 @@
-module DynamicPPLBenchmarks
-
-using DynamicPPL: VarInfo, VarName, LinkAll, UnlinkAll
-using DynamicPPL: DynamicPPL
-using DynamicPPL.TestUtils.AD: run_ad, NoTest
-using ADTypes: ADTypes
-using LogDensityProblems: LogDensityProblems
-
-using ForwardDiff: ForwardDiff
-using ReverseDiff: ReverseDiff
-using Mooncake: Mooncake
-using Enzyme: Enzyme
-using StableRNGs: StableRNG
-
-include("./Models.jl")
-using .Models: Models
-export Models, benchmark, model_dimension
-
-"""
-    model_dimension(model, islinked)
-
-Return the dimension of `model`, accounting for linking, if any.
-"""
-function model_dimension(model, islinked)
-    tfm_strategy = islinked ? DynamicPPL.LinkAll() : DynamicPPL.UnlinkAll()
-    vi = last(
-        DynamicPPL.init!!(
-            StableRNG(23), model, VarInfo(), DynamicPPL.InitFromPrior(), tfm_strategy
-        ),
-    )
-    return length(vi[:])
-end
-
-# Utility functions for representing AD backends using symbols.
-# Copied from TuringBenchmarking.jl.
-const SYMBOL_TO_BACKEND = Dict(
-    :forwarddiff => ADTypes.AutoForwardDiff(),
-    :reversediff => ADTypes.AutoReverseDiff(; compile=false),
-    :reversediff_compiled => ADTypes.AutoReverseDiff(; compile=true),
-    :mooncake => ADTypes.AutoMooncake(; config=nothing),
-    :enzyme => ADTypes.AutoEnzyme(;
-        mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
-        function_annotation=Enzyme.Const,
-    ),
-)
-
-to_backend(x) = error("Unknown backend: $x")
-to_backend(x::ADTypes.AbstractADType) = x
-function to_backend(x::Union{AbstractString,Symbol})
-    k = Symbol(lowercase(string(x)))
-    haskey(SYMBOL_TO_BACKEND, k) || error("Unknown backend: $x")
-    return SYMBOL_TO_BACKEND[k]
-end
-
-"""
-    benchmark(model, adbackend::Symbol, islinked::Bool; seconds::Real=2)
-
-Benchmark log-density evaluation and gradient calculation for `model` using the
-selected AD backend.
-
-`adbackend` is a Symbol key into `SYMBOL_TO_BACKEND` (e.g. `:forwarddiff`,
-`:reversediff`, `:reversediff_compiled`, `:mooncake`, `:enzyme`).
-
-`islinked` determines whether to link the VarInfo for evaluation.
-
-`seconds` is the per-measurement time budget passed to Chairmarks; the default
-doubles Chairmarks' own default to tighten the median estimate.
-"""
-function benchmark(model, adbackend::Symbol, islinked::Bool; seconds::Real=2)
-    transform_strategy = islinked ? LinkAll() : UnlinkAll()
-    return run_ad(
-        model,
-        to_backend(adbackend);
-        rng=StableRNG(23),
-        transform_strategy,
-        benchmark=true,
-        benchmark_seconds=seconds,
-        test=NoTest(),
-        verbose=false,
-    )
-end
-
-end
diff --git a/benchmarks/src/Models.jl b/benchmarks/src/Models.jl
deleted file mode 100644
index 4fa386f6a..000000000
--- a/benchmarks/src/Models.jl
+++ /dev/null
@@ -1,147 +0,0 @@
-"""
-Models for benchmarking Turing.jl.
-
-Each model returns a NamedTuple of all the random variables in the model that are not
-observed.
-"""
-module Models
-
-using Distributions:
-    Categorical,
-    Dirichlet,
-    Exponential,
-    Gamma,
-    LKJCholesky,
-    InverseWishart,
-    Normal,
-    logpdf,
-    product_distribution,
-    truncated
-using DynamicPPL: DynamicPPL, @model, to_submodel
-using LinearAlgebra: cholesky
-
-export simple_assume_observe,
-    smorgasbord, loop_univariate, multivariate, parent, dynamic, lda
-
-"""
-A simple model that does one scalar assumption and one scalar observation.
-"""
-@model function simple_assume_observe(obs)
-    x ~ Normal()
-    obs ~ Normal(x, 1)
-    return (; x=x)
-end
-
-"""
-A short model that tries to cover many DynamicPPL features.
-
-Includes scalar, vector univariate, and multivariate variables; ~, .~, and loops; allocating
-a variable vector; observations passed as arguments, and as literals.
-"""
-@model function smorgasbord(x, y, ::Type{TV}=Vector{Float64}) where {TV}
-    @assert length(x) == length(y)
-    m ~ truncated(Normal(); lower=0)
-    means ~ product_distribution(fill(Exponential(m), length(x)))
-    stds = TV(undef, length(x))
-    stds .~ Gamma(1, 1)
-    for i in 1:length(x)
-        x[i] ~ Normal(means[i], stds[i])
-    end
-    y ~ product_distribution(map((mean, std) -> Normal(mean, std), means, stds))
-    0.0 ~ Normal(sum(y), 1)
-    return (; m=m, means=means, stds=stds)
-end
-
-"""
-A model that loops over two vectors of univariate normals of length `num_dims`.
-
-The second variable, `o`, is meant to be conditioned on after model instantiation.
-
-See `multivariate` for a version that uses `product_distribution` rather than loops.
-"""
-@model function loop_univariate(num_dims, ::Type{TV}=Vector{Float64}) where {TV}
-    a = TV(undef, num_dims)
-    o = TV(undef, num_dims)
-    for i in 1:num_dims
-        a[i] ~ Normal(0, 1)
-    end
-    m = sum(a)
-    for i in 1:num_dims
-        o[i] ~ Normal(m, 1)
-    end
-    return (; a=a)
-end
-
-"""
-A model with two multivariate normal distributed variables of dimension `num_dims`.
-
-The second variable, `o`, is meant to be conditioned on after model instantiation.
-
-See `loop_univariate` for a version that uses loops rather than `product_distribution`.
-"""
-@model function multivariate(num_dims, ::Type{TV}=Vector{Float64}) where {TV}
-    a = TV(undef, num_dims)
-    o = TV(undef, num_dims)
-    a ~ product_distribution(fill(Normal(0, 1), num_dims))
-    m = sum(a)
-    o ~ product_distribution(fill(Normal(m, 1), num_dims))
-    return (; a=a)
-end
-
-"""
-A submodel for `parent`. Not exported.
-"""
-@model function sub()
-    x ~ Normal()
-    return x
-end
-
-"""
-Like simple_assume_observe, but with a submodel for the assumed random variable.
-"""
-@model function parent(obs)
-    x ~ to_submodel(sub())
-    obs ~ Normal(x, 1)
-    return (; x=x)
-end
-
-"""
-A model with random variables that have changing support under linking, or otherwise
-complicated bijectors.
-"""
-@model function dynamic(::Type{T}=Vector{Float64}) where {T}
-    eta ~ truncated(Normal(); lower=0.0, upper=0.1)
-    mat1 ~ LKJCholesky(4, eta)
-    mat2 ~ InverseWishart(3.2, cholesky([1.0 0.5; 0.5 1.0]))
-    return (; eta=eta, mat1=mat1, mat2=mat2)
-end
-
-"""
-A simple Linear Discriminant Analysis model.
-"""
-@model function lda(K, d, w)
-    V = length(unique(w))
-    D = length(unique(d))
-    N = length(d)
-    @assert length(w) == N
-
-    ϕ = Vector{Vector{Real}}(undef, K)
-    for i in 1:K
-        ϕ[i] ~ Dirichlet(ones(V) / V)
-    end
-
-    θ = Vector{Vector{Real}}(undef, D)
-    for i in 1:D
-        θ[i] ~ Dirichlet(ones(K) / K)
-    end
-
-    z = zeros(Int, N)
-
-    for i in 1:N
-        z[i] ~ Categorical(θ[d[i]])
-        w[i] ~ Categorical(ϕ[d[i]])
-    end
-    return (; ϕ=ϕ, θ=θ, z=z)
-end
-
-end

From 5849eb63a5caea098af0096aef04d6489ac5a699 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 4 May 2026 22:46:45 +0100
Subject: [PATCH 11/41] Match Mooncake's bench harness for the AD-benchmarking
 flow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Run a full cross-product of the 9 model configs × 4 AD backends ×
{linked, unlinked} = 72 rows, ordered model → linked → backend so each
model's eight rows are adjacent for side-by-side inspection.
`:reversediff_compiled` is excluded because compiled tapes are
input-dependent and silently produce wrong gradients on
parameter-dependent control flow (see CLAUDE.md).

Per-row logging mirrors Mooncake's `bench/run_benchmarks.jl`: an
`(i / N, name, (linked = …))` header, the backend on its own line,
then `t(logdensity)` / `t(grad)` formatted with units. `model_dimension`
is now defensive (returns `missing` on init failures) and the table
formats `missing` dims as `err`, so combos that crash during dimension
lookup still produce a well-formed row instead of derailing the run.

Also: add a `setup` stage to `run_ad`'s Chairmarks pipeline that
deep-copies `params` per sample, matching Mooncake's harness — setup
runs before the timed window, so the copy is excluded from
measurements. Widen `combos` to a typed `Tuple{...}[]` so it accepts
models with non-default contexts (e.g. the `condition`-wrapped
`loop_univariate`/`multivariate` rows).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/benchmarks.jl | 83 +++++++++++++++++++++++++---------------
 src/test_utils/ad.jl     | 11 ++++--
 2 files changed, 59 insertions(+), 35 deletions(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 49f13b712..6a8d1cab8 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -138,16 +138,20 @@ transform_strategy(islinked) = islinked ? LinkAll() : UnlinkAll()
 
 "Dimension of `model`, accounting for linking. Used as a fallback when `benchmark` errors."
 function model_dimension(model, islinked)
-    vi = last(
-        DynamicPPL.init!!(
-            StableRNG(23),
-            model,
-            VarInfo(),
-            DynamicPPL.InitFromPrior(),
-            transform_strategy(islinked),
-        ),
-    )
-    return length(vi[:])
+    return try
+        vi = last(
+            DynamicPPL.init!!(
+                StableRNG(23),
+                model,
+                VarInfo(),
+                DynamicPPL.InitFromPrior(),
+                transform_strategy(islinked),
+            ),
+        )
+        length(vi[:])
+    catch
+        missing
+    end
 end
 
 """
@@ -191,10 +195,20 @@ format_time(::Missing) = "err"
 format_ratio(x::Float64) = @sprintf("%.2f", x)
 format_ratio(::Missing) = "err"
 
+format_dim(d::Integer) = string(d)
+format_dim(::Missing) = "err"
+
 function print_results(results)
     isempty(results) && return println("No benchmark results obtained.")
     rows = map(results) do r
-        (r.name, r.dim, r.adbackend, r.islinked, format_time(r.t_logd), format_ratio(r.ratio))
+        (
+            r.name,
+            format_dim(r.dim),
+            r.adbackend,
+            r.islinked,
+            format_time(r.t_logd),
+            format_ratio(r.ratio),
+        )
     end
     matrix = hcat(Iterators.map(collect, zip(rows...))...)
     return pretty_table(
@@ -210,42 +224,49 @@ end
 #  Main
 #
 
+# Backends compared on every model. `:reversediff_compiled` is excluded because
+# compiled tapes are input-dependent and silently produce wrong gradients on
+# models with parameter-dependent control flow (see CLAUDE.md).
+const BACKENDS = (:forwarddiff, :reversediff, :mooncake, :enzyme)
+
 function build_combinations(rng)
     smorg = smorgasbord(randn(rng, 100), randn(rng, 100))
-    combos = [
-        ("Simple assume observe", simple_assume_observe(randn(rng)), :forwarddiff, false),
-        ("Smorgasbord", smorg, :forwarddiff, false),
-        ("Smorgasbord", smorg, :forwarddiff, true),
-        ("Smorgasbord", smorg, :reversediff, true),
-        ("Smorgasbord", smorg, :mooncake, true),
-        ("Smorgasbord", smorg, :enzyme, true),
+    models = Tuple{String,DynamicPPL.Model}[
+        ("Simple assume observe", simple_assume_observe(randn(rng))), ("Smorgasbord", smorg)
     ]
     for n in (1_000, 10_000)
         data = randn(rng, n)
-        loop = loop_univariate(n) | (; o=data)
-        multi = multivariate(n) | (; o=data)
-        push!(combos, ("Loop univariate $(n ÷ 1_000)k", loop, :mooncake, true))
-        push!(combos, ("Multivariate $(n ÷ 1_000)k", multi, :mooncake, true))
+        push!(models, ("Loop univariate $(n ÷ 1_000)k", loop_univariate(n) | (; o=data)))
+        push!(models, ("Multivariate $(n ÷ 1_000)k", multivariate(n) | (; o=data)))
+    end
+    push!(models, ("Dynamic", dynamic()))
+    push!(models, ("Submodel", parent(randn(rng))))
+    push!(models, ("LDA", lda(2, [1, 1, 1, 2, 2, 2], [1, 2, 3, 2, 1, 1])))
+
+    # Order: model → linked → backend, so each model's eight rows are adjacent
+    # and inspecting one model side-by-side across backends/links is trivial.
+    combos = Tuple{String,DynamicPPL.Model,Symbol,Bool}[]
+    for (name, model) in models, islinked in (false, true), backend in BACKENDS
+        push!(combos, (name, model, backend, islinked))
     end
-    lda_inst = lda(2, [1, 1, 1, 2, 2, 2], [1, 2, 3, 2, 1, 1])
-    push!(combos, ("Dynamic", dynamic(), :mooncake, true))
-    push!(combos, ("Submodel", parent(randn(rng)), :mooncake, true))
-    push!(combos, ("LDA", lda_inst, :reversediff, true))
     return combos
 end
 
 function run(; markdown::Bool=false)
     combinations = build_combinations(StableRNG(23))
+    total = length(combinations)
     results = []
-    for (name, model, adbackend, islinked) in combinations
-        @info "Running benchmark for $name, $adbackend, $islinked"
+    for (i, (name, model, adbackend, islinked)) in enumerate(combinations)
+        # Mooncake-style header: index/total, then model + config, then backend.
+        @info "$i / $total", name, (; linked=islinked)
+        @info adbackend
         dim, t_logd, ratio = try
             r = benchmark(model, adbackend, islinked)
-            @info " t(logdensity) = $(r.primal_time)"
-            @info " t(grad)       = $(r.grad_time)"
+            @info "  t(logdensity) = $(format_time(r.primal_time))"
+            @info "  t(grad)       = $(format_time(r.grad_time))"
             (length(r.params), r.primal_time, r.grad_time / r.primal_time)
         catch e
-            @info "benchmark errored: $e"
+            @info "  errored: $(sprint(showerror, e))"
             (model_dimension(model, islinked), missing, missing)
         end
         push!(results, (; name, dim, adbackend=string(adbackend), islinked, t_logd, ratio))
diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
index 502be6fa5..4ff8dc461 100644
--- a/src/test_utils/ad.jl
+++ b/src/test_utils/ad.jl
@@ -376,11 +376,14 @@ function run_ad(
         # (tens of ns on Linux/macOS) instead of reading as zero. Pattern
         # borrowed from Mooncake's bench harness:
         # https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
+        # Per-sample `setup` deep-copies `params` so each sample starts from a
+        # fresh input buffer, matching Mooncake's bench harness. (Setup runs
+        # before the timed window, so the copy is excluded from measurements.)
         logdensity(ldf, params)  # Warm-up
         GC.gc(true)
         primal_benchmark = @be(
-            _,
-            logdensity($ldf, $params),
+            deepcopy($params),
+            logdensity($ldf, _),
             _ -> GC.gc(false),
             seconds = benchmark_seconds,
         )
@@ -392,8 +395,8 @@ function run_ad(
         logdensity_and_gradient(ldf, params)  # Warm-up
         GC.gc(true)
         grad_benchmark = @be(
-            _,
-            logdensity_and_gradient($ldf, $params),
+            deepcopy($params),
+            logdensity_and_gradient($ldf, _),
             _ -> GC.gc(false),
             seconds = benchmark_seconds,
         )

From f31e912729b92efe419bdf669680f27232ca5930 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 4 May 2026 23:11:57 +0100
Subject: [PATCH 12/41] Skip ill-defined LDA-unlinked rows; surface a
 Smorgasbord gist + collapsible full table

LDA's discrete `Categorical` RVs make `linked = false` ill-defined for
gradient-based AD, so all four backends previously errored on those
rows. Skip them at combination time, leaving 68 rows.

In markdown mode, emit a `### Gist: Smorgasbord` block with just that
model's eight rows (Smorgasbord covers the broadest set of DPPL
features, so it is the most informative single row band), then put the
full 68-row table inside `<details><summary>` so it is collapsed by
default in GitHub PR comments. Plain (non-markdown) output is
unchanged. Drop the now-redundant `### Benchmark Results` heading from
the workflow body since the script emits its own.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml |  1 -
 benchmarks/benchmarks.jl           | 33 +++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 4e5433c96..09923acf4 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -65,7 +65,6 @@ jobs:
             ```
             ${{ env.VERSION_INFO }}
             ```
-            ### Benchmark Results
 
             ${{ env.BENCHMARK_OUTPUT }}
 
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 6a8d1cab8..c3b47f201 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -247,11 +247,20 @@ function build_combinations(rng)
     # and inspecting one model side-by-side across backends/links is trivial.
     combos = Tuple{String,DynamicPPL.Model,Symbol,Bool}[]
     for (name, model) in models, islinked in (false, true), backend in BACKENDS
+        # LDA's discrete Categorical RVs make `linked = false` ill-defined for
+        # gradient-based AD (every backend errors), so the row is omitted.
+        name == "LDA" && !islinked && continue
         push!(combos, (name, model, backend, islinked))
     end
     return combos
 end
 
+# Representative model whose 8 rows are surfaced as the at-a-glance "gist"
+# in markdown mode. `Smorgasbord` covers the broadest set of DPPL features
+# (scalar/vector/multivariate variables, `~`, `.~`, loops, observations as
+# both arguments and literals), so it is the most informative single row band.
+const GIST_MODEL = "Smorgasbord"
+
 function run(; markdown::Bool=false)
     combinations = build_combinations(StableRNG(23))
     total = length(combinations)
@@ -271,9 +280,27 @@ function run(; markdown::Bool=false)
         end
         push!(results, (; name, dim, adbackend=string(adbackend), islinked, t_logd, ratio))
     end
-    markdown && println("```")
-    print_results(results)
-    markdown && println("```")
+    if markdown
+        gist = filter(r -> r.name == GIST_MODEL, results)
+        if !isempty(gist)
+            println("### Gist: ", GIST_MODEL)
+            println()
+            println("```")
+            print_results(gist)
+            println("```")
+            println()
+        end
+        println("<details>")
+        println("<summary>Full table (", length(results), " rows)</summary>")
+        println()
+        println("```")
+        print_results(results)
+        println("```")
+        println()
+        println("</details>")
+    else
+        print_results(results)
+    end
     return nothing
 end
 

From 49b1837d80e380c47c2d312497e2674102f187b3 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 4 May 2026 23:39:34 +0100
Subject: [PATCH 13/41] Add DifferentiationInterface to benchmarks env

Required as a direct dep so the benchmarks project resolves cleanly
without a manual `Pkg.resolve()` after `Pkg.instantiate()`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/Project.toml  | 2 ++
 benchmarks/benchmarks.jl | 1 +
 2 files changed, 3 insertions(+)

diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index e80a64305..537d9f5f1 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -3,6 +3,7 @@ ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
@@ -25,6 +26,7 @@ ADTypes = "1.14.0"
 AbstractPPL = "0.14"
 Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
+DifferentiationInterface = "0.7"
 Distributions = "0.25.117"
 DynamicPPL = "0.41"
 Enzyme = "0.13"
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index c3b47f201..9398a1cca 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -9,6 +9,7 @@ using Distributions:
     Normal,
     product_distribution,
     truncated
+using DifferentiationInterface: DifferentiationInterface
 using DynamicPPL: DynamicPPL, @model, to_submodel, VarInfo, LinkAll, UnlinkAll
 using DynamicPPL.TestUtils.AD: run_ad, NoTest
 using Enzyme: Enzyme

From dfe042e39029a780f15fb9c3e55d1a50502f2e5a Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 10:53:10 +0100
Subject: [PATCH 14/41] Restructure benchmark PR comment; pin remaining
 setup-julia to v3

The Benchmarking comment now reads top-down as: SHA in the heading,
Smorgasbord gist as a level-3 section, explanatory paragraph, full
table as a level-3 section, and computer info as a foldable <details>
with an inline <b>-styled summary (avoids markdown inside <details>,
which renders inconsistently). The two integration jobs added to
CI.yml were still on setup-julia@v2; bumped to @v3 for consistency
with the main job.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 18 +++++++-----------
 .github/workflows/CI.yml           |  4 ++--
 benchmarks/benchmarks.jl           | 13 ++++++++-----
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 09923acf4..fc5ddc3a2 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -53,20 +53,16 @@ jobs:
         with:
           issue-number: ${{ github.event.pull_request.number }}
           body: |
-            ## Benchmark Report
+            ## Benchmark Report (${{ github.event.pull_request.head.sha }})
 
-            - this PR's head: `${{ github.event.pull_request.head.sha }}`
-
-            Absolute log-density times and grad/log-density ratios are
-            reported. To judge whether a PR helps or hurts, compare against
-            the latest comment on a recent main-branch PR run.
+            ${{ env.BENCHMARK_OUTPUT }}
 
-            ### Computer Information
-            ```
+            <details>
+            <summary><b>Computer Information</b></summary>
+            <pre>
             ${{ env.VERSION_INFO }}
-            ```
-
-            ${{ env.BENCHMARK_OUTPUT }}
+            </pre>
+            </details>
 
           comment-id: ${{ steps.find_comment.outputs.comment-id }}
           edit-mode: replace
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 51ffeeb8d..7e45eda74 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -86,7 +86,7 @@ jobs:
     steps:
       - uses: actions/checkout@v6
 
-      - uses: julia-actions/setup-julia@v2
+      - uses: julia-actions/setup-julia@v3
         with:
           version: "1"
 
@@ -103,7 +103,7 @@ jobs:
     steps:
       - uses: actions/checkout@v6
 
-      - uses: julia-actions/setup-julia@v2
+      - uses: julia-actions/setup-julia@v3
         with:
           version: "1"
 
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 9398a1cca..5c7867fae 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -284,21 +284,24 @@ function run(; markdown::Bool=false)
     if markdown
         gist = filter(r -> r.name == GIST_MODEL, results)
         if !isempty(gist)
-            println("### Gist: ", GIST_MODEL)
+            println("### ", GIST_MODEL)
             println()
             println("```")
             print_results(gist)
             println("```")
             println()
         end
-        println("<details>")
-        println("<summary>Full table (", length(results), " rows)</summary>")
+        println(
+            "Absolute log-density times and grad/log-density ratios are\n" *
+            "reported. To judge whether a PR helps or hurts, compare against\n" *
+            "the latest comment on a recent main-branch PR run.",
+        )
+        println()
+        println("### Full table (", length(results), " rows)")
         println()
         println("```")
         print_results(results)
         println("```")
-        println()
-        println("</details>")
     else
         print_results(results)
     end

From e2d5161f61105bb9e7f45119ae959c1acdd281e8 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 11:09:05 +0100
Subject: [PATCH 15/41] Bench PR head and main side-by-side; tolerate
 main-bench failure

Splits Benchmarking.yml into three jobs (benchmark-pr, benchmark-main,
post-comment). The PR-comment now shows PR-head numbers up top and
main's numbers in a foldout below, with a column legend and labelled
SHAs. If benchmark-main fails (e.g. transitionally before this branch
lands on main, since main's bench script does not yet support markdown
mode), post-comment still posts PR-head numbers and notes the main job
result inline.

Workflow hardening: concurrency group cancels superseded PR runs,
60min timeout per bench job, explicit pull-requests: write permission
so fork PRs can post comments.

Body assembly moved out of the YAML literal block scalar into a shell
heredoc using `body-path:` (peter-evans pattern), avoiding env-var
multi-line interpolation. Bench script no longer prints the obsolete
"compare against main-branch PR run" paragraph -- main is now in the
same comment.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 138 +++++++++++++++++++++++------
 benchmarks/benchmarks.jl           |   6 --
 2 files changed, 109 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index fc5ddc3a2..ae515de5f 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -3,12 +3,26 @@ name: Benchmarking
 on:
   pull_request:
 
+# Needed so `peter-evans/create-or-update-comment` can post on PRs from forks
+# (default GITHUB_TOKEN is read-only for fork pull_requests).
+permissions:
+  pull-requests: write
+
+# Cancel in-flight runs on the same PR when a new commit arrives. Benchmark
+# jobs are slow (~10min each), so back-to-back force-pushes would otherwise
+# spawn parallel runs that race to post the comment.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
 jobs:
-  benchmark:
-    # Pinned (rather than `ubuntu-latest`) so that successive runs land on the
-    # same VM family. GitHub silently rotates `latest`, which changes the noise
-    # floor between runs and makes timings hard to compare across PRs.
+  benchmark-pr:
+    # OS pinned (rather than `ubuntu-latest`) so that successive runs land on
+    # the same VM family — GitHub silently rotates `latest` and the noise
+    # floor changes between runs. Julia version pinned for the same reason:
+    # comparing timings under different compiler versions is meaningless.
     runs-on: ubuntu-22.04
+    timeout-minutes: 60
     steps:
       - uses: actions/checkout@v6
         with:
@@ -22,23 +36,100 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia -e 'using InteractiveUtils; versioninfo()' > version_info.txt
+          # `tee` so the table also appears in the workflow log at-a-glance.
+          julia --project=. benchmarks.jl markdown | tee results.md
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-pr
+          path: |
+            benchmarks/results.md
+            benchmarks/version_info.txt
+
+  benchmark-main:
+    # Tracks main's moving HEAD — the displayed main SHA may shift between
+    # successive re-runs of the same PR if main advances in the interim.
+    runs-on: ubuntu-22.04
+    timeout-minutes: 60
+    outputs:
+      sha: ${{ steps.mainsha.outputs.sha }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: main
+      - id: mainsha
+        run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+      - uses: julia-actions/setup-julia@v3
+        with:
+          version: '1.11'
+      - uses: julia-actions/cache@v3
+
+      - name: Run benchmarks
+        working-directory: ./benchmarks
+        run: |
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. benchmarks.jl markdown | tee results.md
 
-          version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-main
+          path: benchmarks/results.md
 
-          # Capture the markdown-mode benchmark output. The `tee` keeps it in
-          # the workflow log too, so a failure during comment posting does not
-          # lose the numbers.
-          results_file=$(mktemp)
-          julia --project=. benchmarks.jl markdown | tee "$results_file"
+  post-comment:
+    needs: [benchmark-pr, benchmark-main]
+    # Post the comment as long as the PR-head bench succeeded. If the main
+    # bench failed (e.g. transitionally, before this PR's bench changes are on
+    # main), the comment still goes up with a note in place of main's numbers.
+    if: ${{ !cancelled() && needs.benchmark-pr.result == 'success' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: benchmark-pr
+          path: head
+      - uses: actions/download-artifact@v4
+        if: needs.benchmark-main.result == 'success'
+        with:
+          name: benchmark-main
+          path: main
 
+      - name: Build comment body
+        run: |
+          head_sha='${{ github.event.pull_request.head.sha }}'
+          main_sha='${{ needs.benchmark-main.outputs.sha }}'
+          main_status='${{ needs.benchmark-main.result }}'
           {
-            echo "VERSION_INFO<<DPPL_BENCH_EOF"
-            echo "$version_info"
-            echo "DPPL_BENCH_EOF"
-            echo "BENCHMARK_OUTPUT<<DPPL_BENCH_EOF"
-            cat "$results_file"
-            echo "DPPL_BENCH_EOF"
-          } >> "$GITHUB_ENV"
+            echo "## Benchmark Report"
+            echo ""
+            echo "**PR head:** \`${head_sha}\`  "
+            if [[ "$main_status" == "success" ]]; then
+              echo "**Main:** \`${main_sha}\` (foldout below)"
+            else
+              echo "**Main:** benchmark job did not succeed (\`${main_status}\`) — see workflow logs"
+            fi
+            echo ""
+            echo "\`t(logdensity)\`: wall-clock time per log-density evaluation."
+            echo "\`t(grad)/t(logdensity)\`: AD overhead ratio, lower is better."
+            echo ""
+            cat head/results.md
+            echo ""
+            if [[ "$main_status" == "success" ]]; then
+              echo "<details>"
+              echo "<summary><b>Main branch results</b></summary>"
+              echo ""
+              cat main/results.md
+              echo ""
+              echo "</details>"
+              echo ""
+            fi
+            echo "<details>"
+            echo "<summary><b>Computer Information</b></summary>"
+            echo "<pre>"
+            cat head/version_info.txt
+            echo "</pre>"
+            echo "</details>"
+          } > body.md
 
       - name: Find existing benchmark comment
         uses: peter-evans/find-comment@v4
@@ -52,17 +143,6 @@ jobs:
         uses: peter-evans/create-or-update-comment@v5
         with:
           issue-number: ${{ github.event.pull_request.number }}
-          body: |
-            ## Benchmark Report (${{ github.event.pull_request.head.sha }})
-
-            ${{ env.BENCHMARK_OUTPUT }}
-
-            <details>
-            <summary><b>Computer Information</b></summary>
-            <pre>
-            ${{ env.VERSION_INFO }}
-            </pre>
-            </details>
-
+          body-path: body.md
           comment-id: ${{ steps.find_comment.outputs.comment-id }}
           edit-mode: replace
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 5c7867fae..935941a0f 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -291,12 +291,6 @@ function run(; markdown::Bool=false)
             println("```")
             println()
         end
-        println(
-            "Absolute log-density times and grad/log-density ratios are\n" *
-            "reported. To judge whether a PR helps or hurts, compare against\n" *
-            "the latest comment on a recent main-branch PR run.",
-        )
-        println()
         println("### Full table (", length(results), " rows)")
         println()
         println("```")

From 35b7b89c8727754e1680d17045261f01c5f41a06 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 12:32:09 +0100
Subject: [PATCH 16/41] Collapse full benchmark table into a <details> foldout

The 68-row table dominates the PR comment vertically. Wrap it in a
<details><summary> block so it is collapsed by default, leaving the
Smorgasbord gist as the at-a-glance view.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/benchmarks.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 935941a0f..9f2ee4246 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -291,11 +291,14 @@ function run(; markdown::Bool=false)
             println("```")
             println()
         end
-        println("### Full table (", length(results), " rows)")
+        println("<details>")
+        println("<summary>Full table (", length(results), " rows)</summary>")
         println()
         println("```")
         print_results(results)
         println("```")
+        println()
+        println("</details>")
     else
         print_results(results)
     end

From f7a116cc1273cd76cfca3758163251a8bb52e1ae Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 17:10:24 +0100
Subject: [PATCH 17/41] Pivot benchmark table by AD backend; restructure PR
 comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Long-form table (one row per (model, linked, backend)) becomes a
pivoted table where each (Model, Dim, Linked) row spans all four AD
backends as columns. 68 rows collapse to 17, the gist/full-table split
goes away (single unified table), and the gist constant + foldout
wrapper in benchmarks/benchmarks.jl are dropped.

`t(logdensity)` does not depend on the AD backend, so the four primal
samples per group are noise around a common value — take the minimum
as the most stable estimate. Per-backend `err` cells render
independently of the primal column.

Workflow comment restructure to match: SHA goes inline in the H2
title, "PR head"/"Main" preamble lines drop, main-branch results live
under their own H3 with a "Click to see" foldout (and a fallback H3
note if benchmark-main failed), Computer Information is its own H3.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 21 ++++----
 benchmarks/benchmarks.jl           | 80 ++++++++++++++++--------------
 2 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index ae515de5f..ebb986e59 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -100,14 +100,7 @@ jobs:
           main_sha='${{ needs.benchmark-main.outputs.sha }}'
           main_status='${{ needs.benchmark-main.result }}'
           {
-            echo "## Benchmark Report"
-            echo ""
-            echo "**PR head:** \`${head_sha}\`  "
-            if [[ "$main_status" == "success" ]]; then
-              echo "**Main:** \`${main_sha}\` (foldout below)"
-            else
-              echo "**Main:** benchmark job did not succeed (\`${main_status}\`) — see workflow logs"
-            fi
+            echo "## Benchmark Report (${head_sha})"
             echo ""
             echo "\`t(logdensity)\`: wall-clock time per log-density evaluation."
             echo "\`t(grad)/t(logdensity)\`: AD overhead ratio, lower is better."
@@ -115,16 +108,24 @@ jobs:
             cat head/results.md
             echo ""
             if [[ "$main_status" == "success" ]]; then
+              echo "### Main branch results (${main_sha})"
+              echo ""
               echo "<details>"
-              echo "<summary><b>Main branch results</b></summary>"
+              echo "<summary>Click to see.</summary>"
               echo ""
               cat main/results.md
               echo ""
               echo "</details>"
               echo ""
+            else
+              echo "### Main branch results"
+              echo ""
+              echo "Benchmark job for main did not succeed (\`${main_status}\`) — see workflow logs."
+              echo ""
             fi
+            echo "### Computer Information"
             echo "<details>"
-            echo "<summary><b>Computer Information</b></summary>"
+            echo "<summary>Click to see.</summary>"
             echo "<pre>"
             cat head/version_info.txt
             echo "</pre>"
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 9f2ee4246..9c9f47cde 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -179,11 +179,6 @@ end
 #  Reporting
 #
 
-# https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
-const COLNAMES = [
-    "Model", "Dim", "AD Backend", "Linked", "t(logdensity)", "t(grad)/t(logdensity)"
-]
-
 fix_sig_fig(t) = string(round(t; sigdigits=3))
 function format_time(t::Float64)
     t < 1e-6 && return fix_sig_fig(t * 1e9) * " ns"
@@ -199,22 +194,55 @@ format_ratio(::Missing) = "err"
 format_dim(d::Integer) = string(d)
 format_dim(::Missing) = "err"
 
+# Pivot so each (Model, Dim, Linked) row spans all backends. A long-form table
+# (one row per (model, linked, backend)) reads as four near-duplicate rows
+# differing only in the backend column; pivoting puts the backends side-by-side
+# where the ratios are actually compared. `t(logdensity)` does not depend on
+# the AD backend (it is the primal model evaluation), so the four primal
+# samples per group are noise around a common value — take the minimum, which
+# is the most stable estimate (see `run_ad`'s benchmark docstring).
+function pivot(results, backends)
+    keys_in_order = Tuple{String,Bool}[]
+    seen = Set{Tuple{String,Bool}}()
+    for r in results
+        k = (r.name, r.islinked)
+        if !(k in seen)
+            push!(seen, k)
+            push!(keys_in_order, k)
+        end
+    end
+    return map(keys_in_order) do (name, islinked)
+        rows = filter(r -> r.name == name && r.islinked == islinked, results)
+        primals = collect(skipmissing(r.t_logd for r in rows))
+        primal = isempty(primals) ? missing : minimum(primals)
+        ratios = Dict{String,Union{Float64,Missing}}(
+            string(b) => missing for b in backends
+        )
+        for r in rows
+            ratios[r.adbackend] = r.ratio
+        end
+        (; name, dim=first(rows).dim, islinked, primal, ratios)
+    end
+end
+
 function print_results(results)
     isempty(results) && return println("No benchmark results obtained.")
-    rows = map(results) do r
-        (
-            r.name,
-            format_dim(r.dim),
-            r.adbackend,
-            r.islinked,
-            format_time(r.t_logd),
-            format_ratio(r.ratio),
-        )
+    pivoted = pivot(results, BACKENDS)
+    backend_strs = [string(b) for b in BACKENDS]
+    n_cols = 4 + length(backend_strs)
+    matrix = Matrix{Any}(undef, length(pivoted), n_cols)
+    for (i, g) in enumerate(pivoted)
+        matrix[i, 1] = g.name
+        matrix[i, 2] = format_dim(g.dim)
+        matrix[i, 3] = g.islinked
+        matrix[i, 4] = format_time(g.primal)
+        for (j, b) in enumerate(backend_strs)
+            matrix[i, 4 + j] = format_ratio(g.ratios[b])
+        end
     end
-    matrix = hcat(Iterators.map(collect, zip(rows...))...)
     return pretty_table(
         matrix;
-        column_labels=COLNAMES,
+        column_labels=["Model", "Dim", "Linked", "t(logdensity)", backend_strs...],
         backend=:text,
         fit_table_in_display_horizontally=false,
         fit_table_in_display_vertically=false,
@@ -256,12 +284,6 @@ function build_combinations(rng)
     return combos
 end
 
-# Representative model whose 8 rows are surfaced as the at-a-glance "gist"
-# in markdown mode. `Smorgasbord` covers the broadest set of DPPL features
-# (scalar/vector/multivariate variables, `~`, `.~`, loops, observations as
-# both arguments and literals), so it is the most informative single row band.
-const GIST_MODEL = "Smorgasbord"
-
 function run(; markdown::Bool=false)
     combinations = build_combinations(StableRNG(23))
     total = length(combinations)
@@ -282,23 +304,9 @@ function run(; markdown::Bool=false)
         push!(results, (; name, dim, adbackend=string(adbackend), islinked, t_logd, ratio))
     end
     if markdown
-        gist = filter(r -> r.name == GIST_MODEL, results)
-        if !isempty(gist)
-            println("### ", GIST_MODEL)
-            println()
-            println("```")
-            print_results(gist)
-            println("```")
-            println()
-        end
-        println("<details>")
-        println("<summary>Full table (", length(results), " rows)</summary>")
-        println()
         println("```")
         print_results(results)
         println("```")
-        println()
-        println("</details>")
     else
         print_results(results)
     end

From 09c012d648f99b0f29f2b3ed13545d359fb65a93 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 17:14:51 +0100
Subject: [PATCH 18/41] Tighten benchmark PR comment template
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the two-line legend with a single italic caption scoped only
to the AD-backend columns (the `t(logdensity)` header is
self-explanatory). Title becomes `## Benchmarks @ sha` and the Main /
Environment sections collapse into single-line `<details>` foldouts —
no separate H3 headings. Failure case stays an italic inline note.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index ebb986e59..c45498b06 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -100,32 +100,24 @@ jobs:
           main_sha='${{ needs.benchmark-main.outputs.sha }}'
           main_status='${{ needs.benchmark-main.result }}'
           {
-            echo "## Benchmark Report (${head_sha})"
+            echo "## Benchmarks @ ${head_sha}"
             echo ""
-            echo "\`t(logdensity)\`: wall-clock time per log-density evaluation."
-            echo "\`t(grad)/t(logdensity)\`: AD overhead ratio, lower is better."
+            echo "_AD-backend columns: \`t(grad)/t(logdensity)\`, lower is better._"
             echo ""
             cat head/results.md
             echo ""
             if [[ "$main_status" == "success" ]]; then
-              echo "### Main branch results (${main_sha})"
-              echo ""
-              echo "<details>"
-              echo "<summary>Click to see.</summary>"
+              echo "<details><summary>Main @ ${main_sha}</summary>"
               echo ""
               cat main/results.md
               echo ""
               echo "</details>"
               echo ""
             else
-              echo "### Main branch results"
-              echo ""
-              echo "Benchmark job for main did not succeed (\`${main_status}\`) — see workflow logs."
+              echo "_Main bench did not succeed (\`${main_status}\`); see workflow logs._"
               echo ""
             fi
-            echo "### Computer Information"
-            echo "<details>"
-            echo "<summary>Click to see.</summary>"
+            echo "<details><summary>Environment</summary>"
             echo "<pre>"
             cat head/version_info.txt
             echo "</pre>"

From 8982438a8d9f4b329e39cd291bbfc5f1771b9974 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 18:27:49 +0100
Subject: [PATCH 19/41] Restructure benchmark report table formatting

Replace the PrettyTables benchmark report with a manual text formatter
modeled on posteriordb-bench: top/bottom `=` rules, centered `eval`
and `gradient` banners, dashed subgroup underlines, and a stub of
Model/dim/linked columns. Keep the current pivoted data shape, with a
shared `primal` column and backend ratio columns labelled FwdDiff,
RvsDiff, Mooncake, and Enzyme.

While there, simplify the renderer by formatting rows once up front and
using a single backend key/label table as the source of truth. Update
the PR comment caption to explain that `primal` is shared
`t(logdensity)` and the backend columns are `t(grad)/t(logdensity)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml |  2 +-
 benchmarks/benchmarks.jl           | 89 +++++++++++++++++++++++-------
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index c45498b06..fe67ae238 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -102,7 +102,7 @@ jobs:
           {
             echo "## Benchmarks @ ${head_sha}"
             echo ""
-            echo "_AD-backend columns: \`t(grad)/t(logdensity)\`, lower is better._"
+            echo "_`primal` is shared \`t(logdensity)\`; AD-backend columns are \`t(grad)/t(logdensity)\` (lower is better)._"
             echo ""
             cat head/results.md
             echo ""
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 9c9f47cde..cab363811 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -16,7 +16,6 @@ using Enzyme: Enzyme
 using ForwardDiff: ForwardDiff
 using LinearAlgebra: cholesky
 using Mooncake: Mooncake
-using PrettyTables: pretty_table
 using Printf: @sprintf
 using ReverseDiff: ReverseDiff
 using StableRNGs: StableRNG
@@ -215,9 +214,7 @@ function pivot(results, backends)
         rows = filter(r -> r.name == name && r.islinked == islinked, results)
         primals = collect(skipmissing(r.t_logd for r in rows))
         primal = isempty(primals) ? missing : minimum(primals)
-        ratios = Dict{String,Union{Float64,Missing}}(
-            string(b) => missing for b in backends
-        )
+        ratios = Dict{String,Union{Float64,Missing}}(string(b) => missing for b in backends)
         for r in rows
             ratios[r.adbackend] = r.ratio
         end
@@ -228,25 +225,75 @@ end
 function print_results(results)
     isempty(results) && return println("No benchmark results obtained.")
     pivoted = pivot(results, BACKENDS)
-    backend_strs = [string(b) for b in BACKENDS]
-    n_cols = 4 + length(backend_strs)
-    matrix = Matrix{Any}(undef, length(pivoted), n_cols)
-    for (i, g) in enumerate(pivoted)
-        matrix[i, 1] = g.name
-        matrix[i, 2] = format_dim(g.dim)
-        matrix[i, 3] = g.islinked
-        matrix[i, 4] = format_time(g.primal)
-        for (j, b) in enumerate(backend_strs)
-            matrix[i, 4 + j] = format_ratio(g.ratios[b])
-        end
+    backend_info = [
+        (key="forwarddiff", label="FwdDiff"),
+        (key="reversediff", label="RvsDiff"),
+        (key="mooncake", label="Mooncake"),
+        (key="enzyme", label="Enzyme"),
+    ]
+
+    rows = map(pivoted) do g
+        ratios = [format_ratio(g.ratios[b.key]) for b in backend_info]
+        (
+            name=g.name,
+            dim=format_dim(g.dim),
+            linked=string(g.islinked),
+            primal=format_time(g.primal),
+            ratios,
+        )
     end
-    return pretty_table(
-        matrix;
-        column_labels=["Model", "Dim", "Linked", "t(logdensity)", backend_strs...],
-        backend=:text,
-        fit_table_in_display_horizontally=false,
-        fit_table_in_display_vertically=false,
+
+    name_w = max(length("Model"), maximum(textwidth(r.name) for r in rows)) + 1
+    dim_w = max(length("dim"), maximum(textwidth(r.dim) for r in rows)) + 2
+    linked_w = max(length("linked"), maximum(textwidth(r.linked) for r in rows)) + 2
+    primal_w = max(length("primal"), maximum(textwidth(r.primal) for r in rows)) + 2
+    ratio_ws = [
+        max(length(b.label), maximum(textwidth(r.ratios[i]) for r in rows)) + 2 for
+        (i, b) in enumerate(backend_info)
+    ]
+
+    gap = "  "
+    gap_w = textwidth(gap)
+    stub_w = name_w + dim_w + linked_w + 2 * gap_w
+    eval_w = primal_w
+    grad_w = sum(ratio_ws) + gap_w * (length(ratio_ws) - 1)
+    total_w = stub_w + gap_w + eval_w + gap_w + grad_w
+
+    center(s, w) = lpad(rpad(s, div(w + textwidth(s), 2)), w)
+    println(repeat("=", total_w))
+    println(
+        rpad("", stub_w) * gap * center("eval", eval_w) * gap * center("gradient", grad_w)
     )
+    println(rpad("", stub_w) * gap * repeat("-", eval_w) * gap * repeat("-", grad_w))
+
+    header =
+        rpad("Model", name_w) *
+        gap *
+        lpad("dim", dim_w) *
+        gap *
+        lpad("linked", linked_w) *
+        gap *
+        lpad("primal", primal_w) *
+        gap *
+        join((lpad(b.label, w) for (b, w) in zip(backend_info, ratio_ws)), gap)
+    println(header)
+    println(repeat("-", total_w))
+
+    for r in rows
+        row =
+            rpad(r.name, name_w) *
+            gap *
+            lpad(r.dim, dim_w) *
+            gap *
+            lpad(r.linked, linked_w) *
+            gap *
+            lpad(r.primal, primal_w) *
+            gap *
+            join((lpad(x, w) for (x, w) in zip(r.ratios, ratio_ws)), gap)
+        println(row)
+    end
+    println(repeat("=", total_w))
+    return nothing
 end
 
 #

From adf76470e95715ef280e07d342c1b0bf19e71a9b Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 21:06:36 +0100
Subject: [PATCH 20/41] Sync benchmark workflow comment body with benchmarks
 branch

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index fe67ae238..8de69b6c1 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -102,11 +102,11 @@ jobs:
           {
             echo "## Benchmarks @ ${head_sha}"
             echo ""
-            echo "_`primal` is shared \`t(logdensity)\`; AD-backend columns are \`t(grad)/t(logdensity)\` (lower is better)._"
-            echo ""
             cat head/results.md
             echo ""
             if [[ "$main_status" == "success" ]]; then
+              echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model. Compare against \`main\` below to spot regressions."
+              echo ""
               echo "<details><summary>Main @ ${main_sha}</summary>"
               echo ""
               cat main/results.md
@@ -114,7 +114,9 @@ jobs:
               echo "</details>"
               echo ""
             else
-              echo "_Main bench did not succeed (\`${main_status}\`); see workflow logs._"
+              echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model."
+              echo ""
+              echo "Main benchmark job did not succeed (\`${main_status}\`) — see workflow logs."
               echo ""
             fi
             echo "<details><summary>Environment</summary>"

From f307d428a7d268e066ce3639a8dd21ee58e8803f Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 22:36:49 +0100
Subject: [PATCH 21/41] Minor tweaks.

---
 .github/workflows/Benchmarking.yml | 18 +++++++++++-------
 benchmarks/README.md               | 20 ++++++++++++++++++--
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 8de69b6c1..fcfcf70fa 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -3,9 +3,9 @@ name: Benchmarking
 on:
   pull_request:
 
-# Needed so `peter-evans/create-or-update-comment` can post on PRs from forks
-# (default GITHUB_TOKEN is read-only for fork pull_requests).
 permissions:
+  contents: read
+  issues: write
   pull-requests: write
 
 # Cancel in-flight runs on the same PR when a new commit arrives. Benchmark
@@ -100,13 +100,18 @@ jobs:
           main_sha='${{ needs.benchmark-main.outputs.sha }}'
           main_status='${{ needs.benchmark-main.result }}'
           {
+            echo "<!-- benchmark-report:begin -->"
             echo "## Benchmarks @ ${head_sha}"
             echo ""
+            echo "### Performance"
+            echo ""
+            echo "Performance Ratio:"
+            echo "Ratio of time to compute gradient and time to compute log-density."
+            echo "Warning: results are very approximate! See [benchmark notes](https://github.com/TuringLang/DynamicPPL.jl/tree/main/benchmarks#interpreting-results) for more context."
+            echo ""
             cat head/results.md
             echo ""
             if [[ "$main_status" == "success" ]]; then
-              echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model. Compare against \`main\` below to spot regressions."
-              echo ""
               echo "<details><summary>Main @ ${main_sha}</summary>"
               echo ""
               cat main/results.md
@@ -114,8 +119,6 @@ jobs:
               echo "</details>"
               echo ""
             else
-              echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model."
-              echo ""
               echo "Main benchmark job did not succeed (\`${main_status}\`) — see workflow logs."
               echo ""
             fi
@@ -124,6 +127,7 @@ jobs:
             cat head/version_info.txt
             echo "</pre>"
             echo "</details>"
+            echo "<!-- benchmark-report:end -->"
           } > body.md
 
       - name: Find existing benchmark comment
@@ -132,7 +136,7 @@ jobs:
         with:
           issue-number: ${{ github.event.pull_request.number }}
           comment-author: github-actions[bot]
-          body-includes: Benchmark Report
+          body-includes: '<!-- benchmark-report:begin -->'
 
       - name: Create or update benchmark comment
         uses: peter-evans/create-or-update-comment@v5
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 67a2cca43..ceddcdbda 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,3 +1,5 @@
+# Benchmarks
+
 Run from the repository root:
 
 ```sh
@@ -6,5 +8,19 @@ julia --project=benchmarks benchmarks/benchmarks.jl
 ```
 
 The `Benchmarking` CI workflow runs this on each PR and posts the table as a
-comment. There is no base-vs-head comparison: judge regressions by comparing
-against the most recent main-branch run in the comment history.
+comment.
+
+## Interpreting results
+
+Each row times one of DynamicPPL's reference models. `Dim` is the parameter
+count. `Linked` is `true` when parameters have been mapped to unconstrained
+space. `t(logdensity)` is the wall-clock time for one log-density evaluation.
+
+The AD backend columns are performance ratios: each value is the gradient time
+divided by `t(logdensity)`. For example, a value of `10` means computing the
+gradient takes 10 times as long as evaluating the log-density. Lower is better.
+`err` means the backend errored on that model.
+
+The CI comment shows the PR head table first and, when available, includes a
+collapsed `main` table for comparison. Treat the numbers as approximate and use
+the `main` table to spot likely regressions.

From 83e72a7baa5534105591b6c748665e72126c4adf Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 23:07:17 +0100
Subject: [PATCH 22/41] Clarify noisy benchmark ratios

---
 .github/workflows/Benchmarking.yml |  2 ++
 benchmarks/README.md               |  5 +++++
 benchmarks/benchmarks.jl           | 15 ++++++++++-----
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index fcfcf70fa..885462692 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -111,6 +111,8 @@ jobs:
             echo ""
             cat head/results.md
             echo ""
+            echo "Rows marked \`*\` have \`t(logdensity)\` below about 100 ns; their ratios can be dominated by timer floor, fixed overhead, and run-to-run variation. For those rows, raw \`t(grad)\` is more meaningful than \`t(grad)/t(logdensity)\`."
+            echo ""
             if [[ "$main_status" == "success" ]]; then
               echo "<details><summary>Main @ ${main_sha}</summary>"
               echo ""
diff --git a/benchmarks/README.md b/benchmarks/README.md
index ceddcdbda..5c266a2d4 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -21,6 +21,11 @@ divided by `t(logdensity)`. For example, a value of `10` means computing the
 gradient takes 10 times as long as evaluating the log-density. Lower is better.
 `err` means the backend errored on that model.
 
+If `t(logdensity)` is below about 100 ns, ratios are often dominated by timer
+floor and fixed overhead. For those rows, raw `t(grad)` is more meaningful than
+`t(grad)/t(logdensity)`. These microbenchmarks can also vary noticeably across
+runs.
+
 The CI comment shows the PR head table first and, when available, includes a
 collapsed `main` table for comparison. Treat the numbers as approximate and use
 the `main` table to spot likely regressions.
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index cab363811..4c260cec1 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -35,7 +35,7 @@ end
 Covers many DynamicPPL features: scalar/vector/multivariate variables, `~`,
 `.~`, loops, allocated vectors, and observations as both arguments and literals.
 """
-@model function smorgasbord(x, y, ::Type{TV}=Vector{Float64}) where {TV}
+@model function smorgasbord(x, y, (::Type{TV})=Vector{Float64}) where {TV}
     @assert length(x) == length(y)
     m ~ truncated(Normal(); lower=0)
     means ~ product_distribution(fill(Exponential(m), length(x)))
@@ -50,7 +50,7 @@ Covers many DynamicPPL features: scalar/vector/multivariate variables, `~`,
 end
 
 "`num_dims` univariate normals via a loop. Condition on `o` after instantiation."
-@model function loop_univariate(num_dims, ::Type{TV}=Vector{Float64}) where {TV}
+@model function loop_univariate(num_dims, (::Type{TV})=Vector{Float64}) where {TV}
     a = TV(undef, num_dims)
     o = TV(undef, num_dims)
     for i in 1:num_dims
@@ -64,7 +64,7 @@ end
 end
 
 "As `loop_univariate`, but using `product_distribution` instead of loops."
-@model function multivariate(num_dims, ::Type{TV}=Vector{Float64}) where {TV}
+@model function multivariate(num_dims, (::Type{TV})=Vector{Float64}) where {TV}
     a = TV(undef, num_dims)
     o = TV(undef, num_dims)
     a ~ product_distribution(fill(Normal(0, 1), num_dims))
@@ -86,7 +86,7 @@ end
 end
 
 "Variables whose support varies under linking, or otherwise nontrivial bijectors."
-@model function dynamic(::Type{T}=Vector{Float64}) where {T}
+@model function dynamic((::Type{T})=Vector{Float64}) where {T}
     eta ~ truncated(Normal(); lower=0.0, upper=0.1)
     mat1 ~ LKJCholesky(4, eta)
     mat2 ~ InverseWishart(3.2, cholesky([1.0 0.5; 0.5 1.0]))
@@ -193,6 +193,11 @@ format_ratio(::Missing) = "err"
 format_dim(d::Integer) = string(d)
 format_dim(::Missing) = "err"
 
+const TINY_PRIMAL_THRESHOLD_SECONDS = 100e-9
+
+is_tiny_primal(t::Float64) = t < TINY_PRIMAL_THRESHOLD_SECONDS
+is_tiny_primal(::Missing) = false
+
 # Pivot so each (Model, Dim, Linked) row spans all backends. A long-form table
 # (one row per (model, linked, backend)) reads as four near-duplicate rows
 # differing only in the backend column; pivoting puts the backends side-by-side
@@ -235,7 +240,7 @@ function print_results(results)
     rows = map(pivoted) do g
         ratios = [format_ratio(g.ratios[b.key]) for b in backend_info]
         (
-            name=g.name,
+            name=is_tiny_primal(g.primal) ? "$(g.name)*" : g.name,
             dim=format_dim(g.dim),
             linked=string(g.islinked),
             primal=format_time(g.primal),

From e8efe38f6bddf50c13489a33bc255d14e223f394 Mon Sep 17 00:00:00 2001
From: Hong Ge <3279477+yebai@users.noreply.github.com>
Date: Wed, 6 May 2026 23:50:09 +0100
Subject: [PATCH 23/41] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 0c065cc8c..81321045f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,7 @@ DynamicPPLReverseDiffExt = ["ReverseDiff"]
 [compat]
 ADTypes = "1"
 AbstractMCMC = "5.14"
-AbstractPPL = "0.14.1"
+AbstractPPL = "0.15"
 Accessors = "0.1"
 BangBang = "0.4.1"
 Bijectors = "0.15.17"

From 6cd4d1e781a4ff96abb4844cc3179399f1a52906 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 11:27:11 +0100
Subject: [PATCH 24/41] Bump AbstractPPL compat to 0.15 in test, docs, and
 benchmarks envs

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 AGENTS.md               | 1 +
 benchmarks/Project.toml | 2 +-
 docs/Project.toml       | 2 +-
 test/Project.toml       | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index c051f5269..639f7c084 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -14,6 +14,7 @@ DynamicPPL builds on AbstractPPL.jl for shared PPL interfaces such as `VarName`,
 
   - CI also runs Aqua.jl quality checks and doctests.
   - Test files are self-contained: use package imports, not relative imports or `include()`, so they run individually with TestPicker.jl.
+  - Always refresh each environment (`Pkg.update()` / `up`) before tests or doc builds — a stale manifest can cause subtle resolution and loading issues.
   - Formatting is JuliaFormatter v1 (Blue style), enforced by CI:
     
     ```bash
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 537d9f5f1..4eda0fcbb 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -23,7 +23,7 @@ DynamicPPL = {path = ".."}
 
 [compat]
 ADTypes = "1.14.0"
-AbstractPPL = "0.14"
+AbstractPPL = "0.15"
 Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
 DifferentiationInterface = "0.7"
diff --git a/docs/Project.toml b/docs/Project.toml
index 38261c0a4..da8c87408 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -28,7 +28,7 @@ AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "eval
 [compat]
 ADTypes = "1"
 AbstractMCMC = "5"
-AbstractPPL = "0.14"
+AbstractPPL = "0.15"
 Accessors = "0.1"
 BangBang = "0.4"
 Bijectors = "0.15.17"
diff --git a/test/Project.toml b/test/Project.toml
index 67a996136..66defcb9c 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -37,7 +37,7 @@ DynamicPPL = {path = ".."}
 [compat]
 ADTypes = "1"
 AbstractMCMC = "5.10"
-AbstractPPL = "0.14"
+AbstractPPL = "0.15"
 Accessors = "0.1"
 Aqua = "0.8"
 ComponentArrays = "0.15"

From 66f5aeddf5f3e116e7920d80d417e334736c4432 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 12:02:00 +0100
Subject: [PATCH 25/41] Add Bijectors source override to envs that lack it

Registered Bijectors 0.15.17+ caps AbstractPPL below 0.15, so
the main package and the integration/floattypes/docs envs need
the replace-di-with-abstractppl branch to resolve alongside
the AbstractPPL evaluator-interface branch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Project.toml                              | 1 +
 docs/Project.toml                         | 1 +
 test/floattypes/Project.toml              | 1 +
 test/integration/enzyme/Project.toml      | 1 +
 test/integration/reversediff/Project.toml | 1 +
 5 files changed, 5 insertions(+)

diff --git a/Project.toml b/Project.toml
index 81321045f..3ff3f5b2d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,6 +30,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
 AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "evaluator-interface"}
+Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
 
 [weakdeps]
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
diff --git a/docs/Project.toml b/docs/Project.toml
index da8c87408..56ddc96af 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -24,6 +24,7 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
 [sources]
 AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "evaluator-interface"}
+Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
 
 [compat]
 ADTypes = "1"
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index c8772eed3..1a2ef9227 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -9,4 +9,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
 AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../.."}
diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml
index c673319b1..66ccf7056 100644
--- a/test/integration/enzyme/Project.toml
+++ b/test/integration/enzyme/Project.toml
@@ -8,4 +8,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
 AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
diff --git a/test/integration/reversediff/Project.toml b/test/integration/reversediff/Project.toml
index d6a6e2204..e1b38102f 100644
--- a/test/integration/reversediff/Project.toml
+++ b/test/integration/reversediff/Project.toml
@@ -12,6 +12,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
 AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 
 [compat]

From 64557cca098ad575d28875998527dc6a2ae89a91 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 12:08:49 +0100
Subject: [PATCH 26/41] Add Bijectors to deps in floattypes and integration
 envs

The [sources] override for Bijectors requires Bijectors to be
listed in [deps]; Pkg rejects sources for packages not present
in deps or extras.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/floattypes/Project.toml              | 1 +
 test/integration/enzyme/Project.toml      | 1 +
 test/integration/reversediff/Project.toml | 1 +
 3 files changed, 3 insertions(+)

diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index 1a2ef9227..87872f218 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml
index 66ccf7056..7da8e842d 100644
--- a/test/integration/enzyme/Project.toml
+++ b/test/integration/enzyme/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
diff --git a/test/integration/reversediff/Project.toml b/test/integration/reversediff/Project.toml
index e1b38102f..8c4ab031d 100644
--- a/test/integration/reversediff/Project.toml
+++ b/test/integration/reversediff/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"

From 59344819eadfdc463f43b7fc5010f467857dcaca Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 15:25:01 +0100
Subject: [PATCH 27/41] Inline LogDensityProblems entry points

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/logdensityfunction.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index 0340ae7fe..b19feae7a 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -522,7 +522,7 @@ function (f::LogDensityAt)(params::AbstractVector{<:Real})
     )
 end
 
-function LogDensityProblems.logdensity(
+@inline function LogDensityProblems.logdensity(
     ldf::LogDensityFunction, params::AbstractVector{<:Real}
 )
     return logdensity_at(
@@ -535,7 +535,7 @@ function LogDensityProblems.logdensity(
     )
 end
 
-function LogDensityProblems.logdensity_and_gradient(
+@inline function LogDensityProblems.logdensity_and_gradient(
     ldf::LogDensityFunction, params::AbstractVector{<:Real}
 )
     # `params` has to be converted to the same vector type that was used for AD preparation,

From 52cdddc85601755d0f0453f4da008406bc301215 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 16:08:48 +0100
Subject: [PATCH 28/41] Load DifferentiationInterface in AD-using test envs

AbstractPPL's `prepare(::AbstractADType, ...)` method lives in its
DifferentiationInterface extension, so test envs that exercise the AD
path with ForwardDiff/Enzyme need DI present and imported.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/Project.toml                    | 2 ++
 test/floattypes/Project.toml         | 1 +
 test/floattypes/main.jl              | 3 ++-
 test/integration/enzyme/Project.toml | 1 +
 test/integration/enzyme/main.jl      | 1 +
 test/logdensityfunction.jl           | 1 +
 6 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index 66defcb9c..d2d17f5ac 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -9,6 +9,7 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -45,6 +46,7 @@ BangBang = "0.4"
 Bijectors = "0.15.17"
 Chairmarks = "1"
 Combinatorics = "1"
+DifferentiationInterface = "0.6.41, 0.7"
 DimensionalData = "0.30"
 Distributions = "0.25"
 Documenter = "1"
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index 87872f218..ff6cf0f90 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -2,6 +2,7 @@
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
diff --git a/test/floattypes/main.jl b/test/floattypes/main.jl
index 235bf5b52..69974245a 100644
--- a/test/floattypes/main.jl
+++ b/test/floattypes/main.jl
@@ -7,7 +7,8 @@
 #
 # and this should be looped over for `f64`, `f32`, `f16`, and `min`.
 
-using DynamicPPL, LogDensityProblems, ForwardDiff, Distributions, ADTypes, Test
+using DynamicPPL,
+    DifferentiationInterface, LogDensityProblems, ForwardDiff, Distributions, ADTypes, Test
 
 function floattypestr_to_type(floattypestr)
     if floattypestr == "f64"
diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml
index 7da8e842d..e22e6d0cc 100644
--- a/test/integration/enzyme/Project.toml
+++ b/test/integration/enzyme/Project.toml
@@ -2,6 +2,7 @@
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
diff --git a/test/integration/enzyme/main.jl b/test/integration/enzyme/main.jl
index ec589ae39..1a762f0fe 100644
--- a/test/integration/enzyme/main.jl
+++ b/test/integration/enzyme/main.jl
@@ -1,6 +1,7 @@
 using DynamicPPL.TestUtils: ALL_MODELS
 using DynamicPPL.TestUtils.AD: run_ad
 using ADTypes: AutoEnzyme
+using DifferentiationInterface: DifferentiationInterface
 using Test: @test, @testset
 import Enzyme: set_runtime_activity, Forward, Reverse, Const
 using ForwardDiff: ForwardDiff  # run_ad uses FD for correctness test
diff --git a/test/logdensityfunction.jl b/test/logdensityfunction.jl
index b07798c24..a6ec92773 100644
--- a/test/logdensityfunction.jl
+++ b/test/logdensityfunction.jl
@@ -13,6 +13,7 @@ using LogDensityProblems: LogDensityProblems
 using Random: Xoshiro
 using StableRNGs: StableRNG
 
+using DifferentiationInterface: DifferentiationInterface
 using ForwardDiff: ForwardDiff
 using Mooncake: Mooncake
 

From ed4d85224cf23609e25c27d59090e4f4d4cf672a Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 16:18:46 +0100
Subject: [PATCH 29/41] Re-collect AD gradients in run_ad

Some backends (notably Enzyme) return gradients as non-Vector types
(e.g. Enzyme.TupleArray). ADResult expects Vector, so collect() before
storing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/test_utils/ad.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
index 4ff8dc461..4daf6f702 100644
--- a/src/test_utils/ad.jl
+++ b/src/test_utils/ad.jl
@@ -343,6 +343,8 @@ function run_ad(
 
     # Calculate log-density and gradient with the backend of interest
     value, grad = logdensity_and_gradient(ldf, params)
+    # collect(): some backends (e.g. Enzyme) return non-Vector gradients
+    grad = collect(grad)
     verbose && println("       actual : $((value, grad))")
 
     # Test correctness
@@ -359,6 +361,7 @@ function run_ad(
                 model, getlogdensity, transform_strategy; adtype=test.adtype
             )
             value_true, grad_true = logdensity_and_gradient(ldf_reference, params)
+            grad_true = collect(grad_true)
         end
         # Perform testing
         verbose && println("     expected : $((value_true, grad_true))")

From 1bfa6e6596dcba05e031e3fd99484ed66b11e01f Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 17:53:40 +0100
Subject: [PATCH 30/41] Pass raw gradient target to AbstractPPL.prepare

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/logdensityfunction.jl | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index b19feae7a..e607e754b 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -231,7 +231,16 @@ struct LogDensityFunction{
             )
             # `x` was just constructed from the same range metadata stored in `problem`,
             # so the AD wrapper can skip its hot-path dimension validation.
-            AbstractPPL.prepare(adtype, problem, x; check_dims=false)
+            contexts = (
+                model, getlogdensity, ranges_and_transforms, transform_strategy, accs
+            )
+            AbstractPPL.prepare(
+                adtype,
+                problem,
+                x;
+                check_dims=false,
+                raw_gradient_target=(logdensity_at, contexts),
+            )
         end
         return new{
             typeof(model),

From cd1cedf245a506147034da7fab667a48823f095b Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 17:58:16 +0100
Subject: [PATCH 31/41] Bind raw_gradient_target to problem fields

Reading from `problem`'s fields keeps the AD prep target in sync if
`LogDensityAt`'s shape ever changes, instead of duplicating the 5-tuple.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml |  2 --
 Project.toml                       |  2 +-
 src/logdensityfunction.jl          | 14 ++++++++++----
 src/test_utils/ad.jl               |  2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 63dfc13d3..5088d5b98 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -4,8 +4,6 @@ on:
   pull_request:
 
 permissions:
-  contents: read
-  issues: write
   pull-requests: write
 
 # Cancel in-flight runs on the same PR when a new commit arrives. Benchmark
diff --git a/Project.toml b/Project.toml
index 3ff3f5b2d..df46dca1c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "DynamicPPL"
 uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8"
-version = "0.41.7"
+version = "0.42"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index e607e754b..ef490ab05 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -231,15 +231,21 @@ struct LogDensityFunction{
             )
             # `x` was just constructed from the same range metadata stored in `problem`,
             # so the AD wrapper can skip its hot-path dimension validation.
-            contexts = (
-                model, getlogdensity, ranges_and_transforms, transform_strategy, accs
-            )
             AbstractPPL.prepare(
                 adtype,
                 problem,
                 x;
                 check_dims=false,
-                raw_gradient_target=(logdensity_at, contexts),
+                raw_gradient_target=(
+                    logdensity_at,
+                    (
+                        problem.model,
+                        problem.getlogdensity,
+                        problem.varname_ranges,
+                        problem.transform_strategy,
+                        problem.accs,
+                    ),
+                ),
             )
         end
         return new{
diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
index 4daf6f702..7b879f3ac 100644
--- a/src/test_utils/ad.jl
+++ b/src/test_utils/ad.jl
@@ -343,7 +343,7 @@ function run_ad(
 
     # Calculate log-density and gradient with the backend of interest
     value, grad = logdensity_and_gradient(ldf, params)
-    # collect(): some backends (e.g. Enzyme) return non-Vector gradients
+    # Some AD backends (e.g. Enzyme) return non-Vector gradients; normalise to Vector.
     grad = collect(grad)
     verbose && println("       actual : $((value, grad))")
 

From 94c1b853431cc679eecb5eab16db22519c012a69 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Wed, 13 May 2026 18:12:43 +0100
Subject: [PATCH 32/41] Bump DynamicPPL compat to 0.42 in benchmarks and docs
 envs

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/Project.toml | 2 +-
 docs/Project.toml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 4eda0fcbb..0117ae6be 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -28,7 +28,7 @@ Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
 DifferentiationInterface = "0.7"
 Distributions = "0.25.117"
-DynamicPPL = "0.41"
+DynamicPPL = "0.42"
 Enzyme = "0.13"
 ForwardDiff = "1"
 LogDensityProblems = "2.1.2"
diff --git a/docs/Project.toml b/docs/Project.toml
index 56ddc96af..7e7b26428 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -40,7 +40,7 @@ Distributions = "0.25"
 Documenter = "1"
 DocumenterInterLinks = "1"
 DocumenterMermaid = "0.1, 0.2"
-DynamicPPL = "0.41"
+DynamicPPL = "0.42"
 FillArrays = "0.13, 1"
 ForwardDiff = "0.10, 1"
 LogDensityProblems = "2"

From 07879c86fb5bae35ef81f820b99075b7465aa618 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Mon, 18 May 2026 14:30:19 +0100
Subject: [PATCH 33/41] Switch AD prep to context-based AbstractPPL.prepare API

Pass the LogDensityFunction state as a constant `context` tuple to
`AbstractPPL.prepare(adtype, logdensity_internal, x; ...)` instead of
building a `LogDensityAt` problem object and routing through the
Mooncake-only `raw_gradient_target` keyword. This restores a single
prep entry point that both the DI and Mooncake AbstractPPL extensions
can handle.

Rename `logdensity_at` to `logdensity_internal` to reflect that it is
the implementation behind `LogDensityProblems.logdensity(ldf, params)`
rather than a user-facing wrapper. Keep `logdensity_at` as a const
alias so existing references still resolve.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/logdensityfunction.jl | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index ef490ab05..374c8cf6a 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -226,26 +226,13 @@ struct LogDensityFunction{
         else
             # Make backend-specific tweaks to the adtype
             adtype = DynamicPPL.tweak_adtype(adtype, model, x)
-            problem = LogDensityAt(
+            context = (
                 model, getlogdensity, ranges_and_transforms, transform_strategy, accs
             )
-            # `x` was just constructed from the same range metadata stored in `problem`,
+            # `x` was just constructed from the same range metadata stored in `context`,
             # so the AD wrapper can skip its hot-path dimension validation.
             AbstractPPL.prepare(
-                adtype,
-                problem,
-                x;
-                check_dims=false,
-                raw_gradient_target=(
-                    logdensity_at,
-                    (
-                        problem.model,
-                        problem.getlogdensity,
-                        problem.varname_ranges,
-                        problem.transform_strategy,
-                        problem.accs,
-                    ),
-                ),
+                adtype, logdensity_internal, x; check_dims=false, context=context
             )
         end
         return new{
@@ -474,7 +461,7 @@ ldf_accs(::typeof(getlogprior)) = AccumulatorTuple((LogPriorAccumulator(),))
 ldf_accs(::typeof(getloglikelihood)) = AccumulatorTuple((LogLikelihoodAccumulator(),))
 
 """
-    logdensity_at(
+    logdensity_internal(
         params::AbstractVector{<:Real},
         model::Model,
         getlogdensity::Any,
@@ -484,9 +471,10 @@ ldf_accs(::typeof(getloglikelihood)) = AccumulatorTuple((LogLikelihoodAccumulato
     )
 
 Calculate the log density at the given `params`, using the provided information extracted
-from a `LogDensityFunction`.
+from a `LogDensityFunction`. This is the internal implementation behind
+`LogDensityProblems.logdensity(ldf, params)`.
 """
-function logdensity_at(
+function logdensity_internal(
     params::AbstractVector{<:Real},
     model::Model,
     getlogdensity::Any,
@@ -501,6 +489,9 @@ function logdensity_at(
     return getlogdensity(vi)
 end
 
+# Backwards-compatible alias for the previous name.
+const logdensity_at = logdensity_internal
+
 """
     LogDensityAt(
         model::Model,
@@ -514,6 +505,7 @@ A callable struct that behaves in the same way as `logdensity_at`, but stores th
 other information internally. Having two separate functions/structs allows for better
 performance with AD backends.
 """
+# TODO: remove this compatibility wrapper in the next breaking release.
 struct LogDensityAt{
     M<:Model,F,V<:VarNamedTuple,L<:AbstractTransformStrategy,A<:AccumulatorTuple
 }

From 6876b4e811745b4c17d78350989053a258aaee24 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 19 May 2026 12:57:48 +0100
Subject: [PATCH 34/41] Point AbstractPPL source to main branch in all envs

Switch the `evaluator-interface` revision overrides in every Project.toml
to `main` now that the evaluator API has landed there. Also drop redundant
parens in a `@model` Type default in benchmarks/benchmarks.jl.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Project.toml                                            | 2 +-
 benchmarks/Project.toml                                 | 2 +-
 benchmarks/benchmarks.jl                                | 2 +-
 docs/Project.toml                                       | 2 +-
 test/Project.toml                                       | 2 +-
 test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml | 2 +-
 test/floattypes/Project.toml                            | 2 +-
 test/integration/enzyme/Project.toml                    | 2 +-
 test/integration/reversediff/Project.toml               | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Project.toml b/Project.toml
index df46dca1c..7b1bae954 100644
--- a/Project.toml
+++ b/Project.toml
@@ -29,7 +29,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "evaluator-interface"}
+AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "main"}
 Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
 
 [weakdeps]
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 0117ae6be..ba2f95d7f 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -17,7 +17,7 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [sources]
-AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 4c260cec1..0201307b8 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -86,7 +86,7 @@ end
 end
 
 "Variables whose support varies under linking, or otherwise nontrivial bijectors."
-@model function dynamic((::Type{T})=Vector{Float64}) where {T}
+@model function dynamic(::Type{T}=Vector{Float64}) where {T}
     eta ~ truncated(Normal(); lower=0.0, upper=0.1)
     mat1 ~ LKJCholesky(4, eta)
     mat2 ~ InverseWishart(3.2, cholesky([1.0 0.5; 0.5 1.0]))
diff --git a/docs/Project.toml b/docs/Project.toml
index 7e7b26428..3bcec387e 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -23,7 +23,7 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
 [sources]
-AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "evaluator-interface"}
+AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "main"}
 Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
 
 [compat]
diff --git a/test/Project.toml b/test/Project.toml
index d2d17f5ac..b82894974 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -31,7 +31,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 
 [sources]
-AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
index 7a01092a0..e7e2e8d42 100644
--- a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
+++ b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
@@ -9,7 +9,7 @@ MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index ff6cf0f90..d6b1c0259 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -10,6 +10,6 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../.."}
diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml
index e22e6d0cc..f9053924f 100644
--- a/test/integration/enzyme/Project.toml
+++ b/test/integration/enzyme/Project.toml
@@ -9,6 +9,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
diff --git a/test/integration/reversediff/Project.toml b/test/integration/reversediff/Project.toml
index 8c4ab031d..c76a86805 100644
--- a/test/integration/reversediff/Project.toml
+++ b/test/integration/reversediff/Project.toml
@@ -12,7 +12,7 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "evaluator-interface", url = "https://github.com/TuringLang/AbstractPPL.jl"}
+AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 

From bf6c85b484ca49bcbe4d50198e41ac60f0355d43 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 19 May 2026 12:57:53 +0100
Subject: [PATCH 35/41] Deprecate LogDensityAt; alias to
 AbstractPPL.Evaluators.VectorEvaluator

Replace the LogDensityAt struct with a deprecation shim that emits
`Base.depwarn` and returns an `AbstractPPL.Evaluators.VectorEvaluator`
wrapping a closure over `logdensity_internal`. The new path is the
sanctioned one now that AD prep flows through `AbstractPPL.prepare`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/logdensityfunction.jl | 55 +++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index 374c8cf6a..8b53a4ba2 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -495,38 +495,43 @@ const logdensity_at = logdensity_internal
 """
     LogDensityAt(
         model::Model,
-        getlogdensity::Any,
+        getlogdensity,
         varname_ranges::VarNamedTuple,
         transform_strategy::AbstractTransformStrategy,
         accs::AccumulatorTuple,
     )
 
-A callable struct that behaves in the same way as `logdensity_at`, but stores the model and
-other information internally. Having two separate functions/structs allows for better
-performance with AD backends.
+!!! warning "Deprecated"
+    `LogDensityAt` is retained as a compatibility shim and emits a deprecation
+    warning. It returns an `AbstractPPL.Evaluators.VectorEvaluator` whose call
+    forwards to [`DynamicPPL.logdensity_internal`](@ref). New code should
+    construct a `VectorEvaluator` (or `AbstractPPL.Evaluators.Prepared` via
+    `AbstractPPL.prepare`) directly.
 """
-# TODO: remove this compatibility wrapper in the next breaking release.
-struct LogDensityAt{
-    M<:Model,F,V<:VarNamedTuple,L<:AbstractTransformStrategy,A<:AccumulatorTuple
-}
-    model::M
-    getlogdensity::F
-    varname_ranges::V
-    transform_strategy::L
-    accs::A
-
-    function LogDensityAt(
-        model::M, getlogdensity::F, varname_ranges::V, transform_strategy::L, accs::A
-    ) where {M,F,V,L,A}
-        return new{M,F,V,L,A}(
-            model, getlogdensity, varname_ranges, transform_strategy, accs
-        )
-    end
-end
-function (f::LogDensityAt)(params::AbstractVector{<:Real})
-    return logdensity_at(
-        params, f.model, f.getlogdensity, f.varname_ranges, f.transform_strategy, f.accs
+function LogDensityAt(
+    model::Model,
+    getlogdensity,
+    varname_ranges::VarNamedTuple,
+    transform_strategy::AbstractTransformStrategy,
+    accs::AccumulatorTuple,
+)
+    Base.depwarn(
+        "`DynamicPPL.LogDensityAt` is deprecated; wrap a closure over " *
+        "`DynamicPPL.logdensity_internal` in `AbstractPPL.Evaluators.VectorEvaluator`, " *
+        "or call `AbstractPPL.prepare` on a `LogDensityFunction`.",
+        :LogDensityAt,
     )
+    f =
+        let m = model,
+            g = getlogdensity,
+            r = varname_ranges,
+            t = transform_strategy,
+            a = accs
+
+            params -> logdensity_internal(params, m, g, r, t, a)
+        end
+    dim = mapreduce(rat -> length(rat.range), +, values(varname_ranges); init=0)
+    return AbstractPPL.Evaluators.VectorEvaluator(f, dim)
 end
 
 @inline function LogDensityProblems.logdensity(

From db0ac059affe8264cd6d83ff2004cc2b54eb004d Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 19 May 2026 13:02:14 +0100
Subject: [PATCH 36/41] Use AbstractPPL.prepare context kwarg in LogDensityAt
 deprecation

Replace the closure-over-state form with the new `context::Tuple` kwarg
on `AbstractPPL.prepare`, which threads constants through to the problem
function. Equivalent semantics, no closure construction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/logdensityfunction.jl | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index 8b53a4ba2..8cb4606ee 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -504,9 +504,8 @@ const logdensity_at = logdensity_internal
 !!! warning "Deprecated"
     `LogDensityAt` is retained as a compatibility shim and emits a deprecation
     warning. It returns an `AbstractPPL.Evaluators.VectorEvaluator` whose call
-    forwards to [`DynamicPPL.logdensity_internal`](@ref). New code should
-    construct a `VectorEvaluator` (or `AbstractPPL.Evaluators.Prepared` via
-    `AbstractPPL.prepare`) directly.
+    forwards to [`DynamicPPL.logdensity_internal`](@ref). New code should call
+    `AbstractPPL.prepare(logdensity_internal, x; context=...)` directly.
 """
 function LogDensityAt(
     model::Model,
@@ -516,22 +515,14 @@ function LogDensityAt(
     accs::AccumulatorTuple,
 )
     Base.depwarn(
-        "`DynamicPPL.LogDensityAt` is deprecated; wrap a closure over " *
-        "`DynamicPPL.logdensity_internal` in `AbstractPPL.Evaluators.VectorEvaluator`, " *
-        "or call `AbstractPPL.prepare` on a `LogDensityFunction`.",
+        "`DynamicPPL.LogDensityAt` is deprecated; call " *
+        "`AbstractPPL.prepare(DynamicPPL.logdensity_internal, x; context=...)` " *
+        "instead.",
         :LogDensityAt,
     )
-    f =
-        let m = model,
-            g = getlogdensity,
-            r = varname_ranges,
-            t = transform_strategy,
-            a = accs
-
-            params -> logdensity_internal(params, m, g, r, t, a)
-        end
     dim = mapreduce(rat -> length(rat.range), +, values(varname_ranges); init=0)
-    return AbstractPPL.Evaluators.VectorEvaluator(f, dim)
+    context = (model, getlogdensity, varname_ranges, transform_strategy, accs)
+    return AbstractPPL.prepare(logdensity_internal, zeros(dim); context=context)
 end
 
 @inline function LogDensityProblems.logdensity(

From 54a082013190664d3adfa4157a74683eee0f80b3 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 19 May 2026 13:03:51 +0100
Subject: [PATCH 37/41] Skip dims check in LogDensityAt deprecation shim

Pass `check_dims=false` to match the pre-deprecation `LogDensityAt`'s
unchecked call behavior; callers that relied on it shouldn't suddenly
hit a `DimensionMismatch`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/logdensityfunction.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/logdensityfunction.jl b/src/logdensityfunction.jl
index 8cb4606ee..d31f50186 100644
--- a/src/logdensityfunction.jl
+++ b/src/logdensityfunction.jl
@@ -522,7 +522,9 @@ function LogDensityAt(
     )
     dim = mapreduce(rat -> length(rat.range), +, values(varname_ranges); init=0)
     context = (model, getlogdensity, varname_ranges, transform_strategy, accs)
-    return AbstractPPL.prepare(logdensity_internal, zeros(dim); context=context)
+    return AbstractPPL.prepare(
+        logdensity_internal, zeros(dim); check_dims=false, context=context
+    )
 end
 
 @inline function LogDensityProblems.logdensity(

From aaf1e3f5567cc777efa99318070be2bbef48bfe8 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 19 May 2026 13:16:32 +0100
Subject: [PATCH 38/41] Consolidate integration test envs under test/ext

Move test/integration/{enzyme,reversediff}/ to test/ext/{DynamicPPLEnzymeCoreExt,DynamicPPLReverseDiffExt}/ and convert the MarginalLogDensities split (subdir env + sibling script) to a single test/ext/DynamicPPLMarginalLogDensitiesExt/{Project.toml,main.jl}.

Each integration env now follows the same layout. CI.yml updated accordingly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/CI.yml                                      | 4 ++--
 .../enzyme => ext/DynamicPPLEnzymeCoreExt}/Project.toml       | 0
 .../enzyme => ext/DynamicPPLEnzymeCoreExt}/main.jl            | 0
 .../main.jl}                                                  | 0
 .../reversediff => ext/DynamicPPLReverseDiffExt}/Project.toml | 0
 .../reversediff => ext/DynamicPPLReverseDiffExt}/main.jl      | 0
 6 files changed, 2 insertions(+), 2 deletions(-)
 rename test/{integration/enzyme => ext/DynamicPPLEnzymeCoreExt}/Project.toml (100%)
 rename test/{integration/enzyme => ext/DynamicPPLEnzymeCoreExt}/main.jl (100%)
 rename test/ext/{DynamicPPLMarginalLogDensitiesExt.jl => DynamicPPLMarginalLogDensitiesExt/main.jl} (100%)
 rename test/{integration/reversediff => ext/DynamicPPLReverseDiffExt}/Project.toml (100%)
 rename test/{integration/reversediff => ext/DynamicPPLReverseDiffExt}/main.jl (100%)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 7e45eda74..c9766e0cf 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -93,7 +93,7 @@ jobs:
       - uses: julia-actions/cache@v3
 
       - name: Run AD with ReverseDiff on demo models
-        working-directory: test/integration/reversediff
+        working-directory: test/ext/DynamicPPLReverseDiffExt
         run: |
           julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()'
           julia --project=. --color=yes main.jl
@@ -113,4 +113,4 @@ jobs:
         working-directory: test/ext/DynamicPPLMarginalLogDensitiesExt
         run: |
           julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()'
-          julia --project=. --color=yes ../DynamicPPLMarginalLogDensitiesExt.jl
+          julia --project=. --color=yes main.jl
diff --git a/test/integration/enzyme/Project.toml b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
similarity index 100%
rename from test/integration/enzyme/Project.toml
rename to test/ext/DynamicPPLEnzymeCoreExt/Project.toml
diff --git a/test/integration/enzyme/main.jl b/test/ext/DynamicPPLEnzymeCoreExt/main.jl
similarity index 100%
rename from test/integration/enzyme/main.jl
rename to test/ext/DynamicPPLEnzymeCoreExt/main.jl
diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt.jl b/test/ext/DynamicPPLMarginalLogDensitiesExt/main.jl
similarity index 100%
rename from test/ext/DynamicPPLMarginalLogDensitiesExt.jl
rename to test/ext/DynamicPPLMarginalLogDensitiesExt/main.jl
diff --git a/test/integration/reversediff/Project.toml b/test/ext/DynamicPPLReverseDiffExt/Project.toml
similarity index 100%
rename from test/integration/reversediff/Project.toml
rename to test/ext/DynamicPPLReverseDiffExt/Project.toml
diff --git a/test/integration/reversediff/main.jl b/test/ext/DynamicPPLReverseDiffExt/main.jl
similarity index 100%
rename from test/integration/reversediff/main.jl
rename to test/ext/DynamicPPLReverseDiffExt/main.jl

From a142a58b78ec9014b8494084c9ca6270acbf0c8b Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 19 May 2026 16:08:00 +0100
Subject: [PATCH 39/41] Drop AbstractPPL source pins now that 0.15 is
 registered

Remove the `AbstractPPL = {url = ..., rev = "main"}` overrides from all
eight Project.toml files. Compat is already at 0.15, so the resolver
picks up the registered release.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Project.toml                                            | 1 -
 benchmarks/Project.toml                                 | 1 -
 docs/Project.toml                                       | 1 -
 test/Project.toml                                       | 1 -
 test/ext/DynamicPPLEnzymeCoreExt/Project.toml           | 1 -
 test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml | 1 -
 test/ext/DynamicPPLReverseDiffExt/Project.toml          | 1 -
 test/floattypes/Project.toml                            | 1 -
 8 files changed, 8 deletions(-)

diff --git a/Project.toml b/Project.toml
index 7b1bae954..f8f5516a6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -29,7 +29,6 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "main"}
 Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
 
 [weakdeps]
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index ba2f95d7f..2b1fb2aff 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -17,7 +17,6 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [sources]
-AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
diff --git a/docs/Project.toml b/docs/Project.toml
index 3bcec387e..b750c1aa8 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -23,7 +23,6 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
 [sources]
-AbstractPPL = {url = "https://github.com/TuringLang/AbstractPPL.jl", rev = "main"}
 Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
 
 [compat]
diff --git a/test/Project.toml b/test/Project.toml
index b82894974..fc68e90dd 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -31,7 +31,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 
 [sources]
-AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
diff --git a/test/ext/DynamicPPLEnzymeCoreExt/Project.toml b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
index f9053924f..9c7c6dff2 100644
--- a/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
+++ b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
@@ -9,6 +9,5 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
index e7e2e8d42..7f43a16b5 100644
--- a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
+++ b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
@@ -9,7 +9,6 @@ MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 
diff --git a/test/ext/DynamicPPLReverseDiffExt/Project.toml b/test/ext/DynamicPPLReverseDiffExt/Project.toml
index c76a86805..2b22d4c71 100644
--- a/test/ext/DynamicPPLReverseDiffExt/Project.toml
+++ b/test/ext/DynamicPPLReverseDiffExt/Project.toml
@@ -12,7 +12,6 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index d6b1c0259..8be272ce4 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -10,6 +10,5 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-AbstractPPL = {rev = "main", url = "https://github.com/TuringLang/AbstractPPL.jl"}
 Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../.."}

From 02f6bc2503f317edeb24d9d0eef4e2eb1e0ca15a Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Thu, 21 May 2026 20:45:09 +0100
Subject: [PATCH 40/41] Drop Bijectors source pins now that 0.16 is registered

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Project.toml                                            | 5 +----
 benchmarks/Project.toml                                 | 3 +--
 docs/Project.toml                                       | 5 +----
 test/Project.toml                                       | 3 +--
 test/ext/DynamicPPLEnzymeCoreExt/Project.toml           | 1 -
 test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml | 3 +--
 test/ext/DynamicPPLReverseDiffExt/Project.toml          | 1 -
 test/floattypes/Project.toml                            | 1 -
 8 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/Project.toml b/Project.toml
index f8f5516a6..07fdb81ea 100644
--- a/Project.toml
+++ b/Project.toml
@@ -28,9 +28,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[sources]
-Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
-
 [weakdeps]
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
@@ -56,7 +53,7 @@ AbstractMCMC = "5.14"
 AbstractPPL = "0.15"
 Accessors = "0.1"
 BangBang = "0.4.1"
-Bijectors = "0.15.17"
+Bijectors = "0.16"
 Chairmarks = "1.3.1"
 Compat = "4"
 ComponentArrays = "0.15"
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 2b1fb2aff..8133937b0 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -17,13 +17,12 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [sources]
-Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
 [compat]
 ADTypes = "1.14.0"
 AbstractPPL = "0.15"
-Bijectors = "0.15.17"
+Bijectors = "0.16"
 Chairmarks = "1.3.1"
 DifferentiationInterface = "0.7"
 Distributions = "0.25.117"
diff --git a/docs/Project.toml b/docs/Project.toml
index b750c1aa8..17659122c 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -22,16 +22,13 @@ OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
-[sources]
-Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "replace-di-with-abstractppl"}
-
 [compat]
 ADTypes = "1"
 AbstractMCMC = "5"
 AbstractPPL = "0.15"
 Accessors = "0.1"
 BangBang = "0.4"
-Bijectors = "0.15.17"
+Bijectors = "0.16"
 Chairmarks = "1"
 ChangesOfVariables = "0.1"
 DimensionalData = "0.30"
diff --git a/test/Project.toml b/test/Project.toml
index fc68e90dd..a14ce0bb1 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -31,7 +31,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 
 [sources]
-Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = ".."}
 
 [compat]
@@ -42,7 +41,7 @@ Accessors = "0.1"
 Aqua = "0.8"
 ComponentArrays = "0.15"
 BangBang = "0.4"
-Bijectors = "0.15.17"
+Bijectors = "0.16"
 Chairmarks = "1"
 Combinatorics = "1"
 DifferentiationInterface = "0.6.41, 0.7"
diff --git a/test/ext/DynamicPPLEnzymeCoreExt/Project.toml b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
index 9c7c6dff2..7a3d097ac 100644
--- a/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
+++ b/test/ext/DynamicPPLEnzymeCoreExt/Project.toml
@@ -9,5 +9,4 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
diff --git a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
index 7f43a16b5..c541036d1 100644
--- a/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
+++ b/test/ext/DynamicPPLMarginalLogDensitiesExt/Project.toml
@@ -9,12 +9,11 @@ MarginalLogDensities = "f0c3360a-fb8d-11e9-1194-5521fd7ee392"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 
 [compat]
 ADTypes = "1"
-Bijectors = "0.15.17"
+Bijectors = "0.16"
 Distributions = "0.25"
 ForwardDiff = "0.10.12, 1"
 MarginalLogDensities = "0.4"
diff --git a/test/ext/DynamicPPLReverseDiffExt/Project.toml b/test/ext/DynamicPPLReverseDiffExt/Project.toml
index 2b22d4c71..53dbe7915 100644
--- a/test/ext/DynamicPPLReverseDiffExt/Project.toml
+++ b/test/ext/DynamicPPLReverseDiffExt/Project.toml
@@ -12,7 +12,6 @@ ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../../.."}
 
 [compat]
diff --git a/test/floattypes/Project.toml b/test/floattypes/Project.toml
index 8be272ce4..5201c4ee0 100644
--- a/test/floattypes/Project.toml
+++ b/test/floattypes/Project.toml
@@ -10,5 +10,4 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [sources]
-Bijectors = {rev = "replace-di-with-abstractppl", url = "https://github.com/TuringLang/Bijectors.jl"}
 DynamicPPL = {path = "../.."}

From 3bdeb74281b9b4e92f4aec36070f30bc30955788 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Thu, 21 May 2026 21:03:54 +0100
Subject: [PATCH 41/41] Point Enzyme CI workflow at relocated integration test

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Enzyme.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/Enzyme.yml b/.github/workflows/Enzyme.yml
index 5a1d3d069..318b8ec9a 100644
--- a/.github/workflows/Enzyme.yml
+++ b/.github/workflows/Enzyme.yml
@@ -29,7 +29,7 @@ jobs:
       - uses: julia-actions/cache@v3
 
       - name: Run AD with Enzyme on demo models
-        working-directory: test/integration/enzyme
+        working-directory: test/ext/DynamicPPLEnzymeCoreExt
         run: |
           julia --project=. --color=yes -e 'using Pkg; Pkg.instantiate()'
           julia --project=. --color=yes main.jl