From a946b33009ef6d413e42941ce101a431b41bda6c Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 11:32:44 +0100
Subject: [PATCH 1/7] Cherry-pick benchmarks updates from adproblems-interface

Bring DifferentiationInterface into the benchmarks env and adopt the
flatter markdown layout (no <details> wrapper, no "Gist:" prefix).
Released AbstractPPL/Bijectors are used instead of the fork-branch
sources from the source branch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/Project.toml  | 6 ++++++
 benchmarks/benchmarks.jl | 8 +++-----
 2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 1440ba40e..fa5052254 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -1,6 +1,9 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AbstractPPL = "7a57a42e-76ec-4ea3-a279-07e840d6d9cf"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
@@ -18,7 +21,10 @@ DynamicPPL = {path = ".."}
 
 [compat]
 ADTypes = "1.14.0"
+AbstractPPL = "0.14"
+Bijectors = "0.15.17"
 Chairmarks = "1.3.1"
+DifferentiationInterface = "0.7"
 Distributions = "0.25.117"
 DynamicPPL = "0.41"
 Enzyme = "0.13"
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index c3b47f201..935941a0f 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -9,6 +9,7 @@ using Distributions:
     Normal,
     product_distribution,
     truncated
+using DifferentiationInterface: DifferentiationInterface
 using DynamicPPL: DynamicPPL, @model, to_submodel, VarInfo, LinkAll, UnlinkAll
 using DynamicPPL.TestUtils.AD: run_ad, NoTest
 using Enzyme: Enzyme
@@ -283,21 +284,18 @@ function run(; markdown::Bool=false)
     if markdown
         gist = filter(r -> r.name == GIST_MODEL, results)
         if !isempty(gist)
-            println("### Gist: ", GIST_MODEL)
+            println("### ", GIST_MODEL)
             println()
             println("```")
             print_results(gist)
             println("```")
             println()
         end
-        println("<details>")
-        println("<summary>Full table (", length(results), " rows)</summary>")
+        println("### Full table (", length(results), " rows)")
         println()
         println("```")
         print_results(results)
         println("```")
-        println()
-        println("</details>")
     else
         print_results(results)
     end

From df6de192dd0761d80946591bbacefcd6a7c710c3 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 11:35:09 +0100
Subject: [PATCH 2/7] Cherry-pick Benchmarking.yml from adproblems-interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pairs with the prior commit's benchmarks.jl markdown changes — the new
workflow benches PR head and main side-by-side and wraps main's table
in <details> on the CI side.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 143 ++++++++++++++++++++++-------
 1 file changed, 109 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 4e5433c96..ae515de5f 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -3,12 +3,26 @@ name: Benchmarking
 on:
   pull_request:
 
+# Needed so `peter-evans/create-or-update-comment` can post on PRs from forks
+# (default GITHUB_TOKEN is read-only for fork pull_requests).
+permissions:
+  pull-requests: write
+
+# Cancel in-flight runs on the same PR when a new commit arrives. Benchmark
+# jobs are slow (~10min each), so back-to-back force-pushes would otherwise
+# spawn parallel runs that race to post the comment.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
 jobs:
-  benchmark:
-    # Pinned (rather than `ubuntu-latest`) so that successive runs land on the
-    # same VM family. GitHub silently rotates `latest`, which changes the noise
-    # floor between runs and makes timings hard to compare across PRs.
+  benchmark-pr:
+    # OS pinned (rather than `ubuntu-latest`) so that successive runs land on
+    # the same VM family — GitHub silently rotates `latest` and the noise
+    # floor changes between runs. Julia version pinned for the same reason:
+    # comparing timings under different compiler versions is meaningless.
     runs-on: ubuntu-22.04
+    timeout-minutes: 60
     steps:
       - uses: actions/checkout@v6
         with:
@@ -22,23 +36,100 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia -e 'using InteractiveUtils; versioninfo()' > version_info.txt
+          # `tee` so the table also appears in the workflow log at-a-glance.
+          julia --project=. benchmarks.jl markdown | tee results.md
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-pr
+          path: |
+            benchmarks/results.md
+            benchmarks/version_info.txt
+
+  benchmark-main:
+    # Tracks main's moving HEAD — the displayed main SHA may shift between
+    # successive re-runs of the same PR if main advances in the interim.
+    runs-on: ubuntu-22.04
+    timeout-minutes: 60
+    outputs:
+      sha: ${{ steps.mainsha.outputs.sha }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: main
+      - id: mainsha
+        run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+      - uses: julia-actions/setup-julia@v3
+        with:
+          version: '1.11'
+      - uses: julia-actions/cache@v3
+
+      - name: Run benchmarks
+        working-directory: ./benchmarks
+        run: |
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. benchmarks.jl markdown | tee results.md
 
-          version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-main
+          path: benchmarks/results.md
 
-          # Capture the markdown-mode benchmark output. The `tee` keeps it in
-          # the workflow log too, so a failure during comment posting does not
-          # lose the numbers.
-          results_file=$(mktemp)
-          julia --project=. benchmarks.jl markdown | tee "$results_file"
+  post-comment:
+    needs: [benchmark-pr, benchmark-main]
+    # Post the comment as long as the PR-head bench succeeded. If the main
+    # bench failed (e.g. transitionally, before this PR's bench changes are on
+    # main), the comment still goes up with a note in place of main's numbers.
+    if: ${{ !cancelled() && needs.benchmark-pr.result == 'success' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: benchmark-pr
+          path: head
+      - uses: actions/download-artifact@v4
+        if: needs.benchmark-main.result == 'success'
+        with:
+          name: benchmark-main
+          path: main
 
+      - name: Build comment body
+        run: |
+          head_sha='${{ github.event.pull_request.head.sha }}'
+          main_sha='${{ needs.benchmark-main.outputs.sha }}'
+          main_status='${{ needs.benchmark-main.result }}'
           {
-            echo "VERSION_INFO<<DPPL_BENCH_EOF"
-            echo "$version_info"
-            echo "DPPL_BENCH_EOF"
-            echo "BENCHMARK_OUTPUT<<DPPL_BENCH_EOF"
-            cat "$results_file"
-            echo "DPPL_BENCH_EOF"
-          } >> "$GITHUB_ENV"
+            echo "## Benchmark Report"
+            echo ""
+            echo "**PR head:** \`${head_sha}\`  "
+            if [[ "$main_status" == "success" ]]; then
+              echo "**Main:** \`${main_sha}\` (foldout below)"
+            else
+              echo "**Main:** benchmark job did not succeed (\`${main_status}\`) — see workflow logs"
+            fi
+            echo ""
+            echo "\`t(logdensity)\`: wall-clock time per log-density evaluation."
+            echo "\`t(grad)/t(logdensity)\`: AD overhead ratio, lower is better."
+            echo ""
+            cat head/results.md
+            echo ""
+            if [[ "$main_status" == "success" ]]; then
+              echo "<details>"
+              echo "<summary><b>Main branch results</b></summary>"
+              echo ""
+              cat main/results.md
+              echo ""
+              echo "</details>"
+              echo ""
+            fi
+            echo "<details>"
+            echo "<summary><b>Computer Information</b></summary>"
+            echo "<pre>"
+            cat head/version_info.txt
+            echo "</pre>"
+            echo "</details>"
+          } > body.md
 
       - name: Find existing benchmark comment
         uses: peter-evans/find-comment@v4
@@ -52,22 +143,6 @@ jobs:
         uses: peter-evans/create-or-update-comment@v5
         with:
           issue-number: ${{ github.event.pull_request.number }}
-          body: |
-            ## Benchmark Report
-
-            - this PR's head: `${{ github.event.pull_request.head.sha }}`
-
-            Absolute log-density times and grad/log-density ratios are
-            reported. To judge whether a PR helps or hurts, compare against
-            the latest comment on a recent main-branch PR run.
-
-            ### Computer Information
-            ```
-            ${{ env.VERSION_INFO }}
-            ```
-            ### Benchmark Results
-
-            ${{ env.BENCHMARK_OUTPUT }}
-
+          body-path: body.md
           comment-id: ${{ steps.find_comment.outputs.comment-id }}
           edit-mode: replace

From ab6ac7cb371e22223e550e682410d6e8b9049bf8 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 18:27:49 +0100
Subject: [PATCH 3/7] Restructure benchmark report table formatting

Replace the PrettyTables benchmark report with a manual text formatter
modeled on posteriordb-bench: top/bottom `=` rules, centered `eval`
and `gradient` banners, dashed subgroup underlines, and a stub of
Model/dim/linked columns. Keep the current pivoted data shape, with a
shared `primal` column and backend ratio columns labelled FwdDiff,
RvsDiff, Mooncake, and Enzyme.

While there, simplify the renderer by formatting rows once up front and
using a single backend key/label table as the source of truth. Update
the PR comment caption to explain that `primal` is shared
`t(logdensity)` and the backend columns are `t(grad)/t(logdensity)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml |   3 +-
 benchmarks/benchmarks.jl           | 115 +++++++++++++++++++++++------
 2 files changed, 95 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index ae515de5f..6866ea098 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -109,8 +109,7 @@ jobs:
               echo "**Main:** benchmark job did not succeed (\`${main_status}\`) — see workflow logs"
             fi
             echo ""
-            echo "\`t(logdensity)\`: wall-clock time per log-density evaluation."
-            echo "\`t(grad)/t(logdensity)\`: AD overhead ratio, lower is better."
+            echo "_`primal` is shared \`t(logdensity)\`; AD-backend columns are \`t(grad)/t(logdensity)\` (lower is better)._"
             echo ""
             cat head/results.md
             echo ""
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 935941a0f..a9561002d 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -16,7 +16,6 @@ using Enzyme: Enzyme
 using ForwardDiff: ForwardDiff
 using LinearAlgebra: cholesky
 using Mooncake: Mooncake
-using PrettyTables: pretty_table
 using Printf: @sprintf
 using ReverseDiff: ReverseDiff
 using StableRNGs: StableRNG
@@ -179,11 +178,6 @@ end
 #  Reporting
 #
 
-# https://github.com/chalk-lab/Mooncake.jl/blob/main/bench/run_benchmarks.jl
-const COLNAMES = [
-    "Model", "Dim", "AD Backend", "Linked", "t(logdensity)", "t(grad)/t(logdensity)"
-]
-
 fix_sig_fig(t) = string(round(t; sigdigits=3))
 function format_time(t::Float64)
     t < 1e-6 && return fix_sig_fig(t * 1e9) * " ns"
@@ -199,26 +193,105 @@ format_ratio(::Missing) = "err"
 format_dim(d::Integer) = string(d)
 format_dim(::Missing) = "err"
 
+# Pivot so each (Model, Dim, Linked) row spans all backends. A long-form table
+# (one row per (model, linked, backend)) reads as four near-duplicate rows
+# differing only in the backend column; pivoting puts the backends side-by-side
+# where the ratios are actually compared. `t(logdensity)` does not depend on
+# the AD backend (it is the primal model evaluation), so the four primal
+# samples per group are noise around a common value — take the minimum, which
+# is the most stable estimate (see `run_ad`'s benchmark docstring).
+function pivot(results, backends)
+    keys_in_order = Tuple{String,Bool}[]
+    seen = Set{Tuple{String,Bool}}()
+    for r in results
+        k = (r.name, r.islinked)
+        if !(k in seen)
+            push!(seen, k)
+            push!(keys_in_order, k)
+        end
+    end
+    return map(keys_in_order) do (name, islinked)
+        rows = filter(r -> r.name == name && r.islinked == islinked, results)
+        primals = collect(skipmissing(r.t_logd for r in rows))
+        primal = isempty(primals) ? missing : minimum(primals)
+        ratios = Dict{String,Union{Float64,Missing}}(string(b) => missing for b in backends)
+        for r in rows
+            ratios[r.adbackend] = r.ratio
+        end
+        (; name, dim=first(rows).dim, islinked, primal, ratios)
+    end
+end
+
 function print_results(results)
     isempty(results) && return println("No benchmark results obtained.")
-    rows = map(results) do r
+    pivoted = pivot(results, BACKENDS)
+    backend_info = [
+        (key="forwarddiff", label="FwdDiff"),
+        (key="reversediff", label="RvsDiff"),
+        (key="mooncake", label="Mooncake"),
+        (key="enzyme", label="Enzyme"),
+    ]
+
+    rows = map(pivoted) do g
+        ratios = [format_ratio(g.ratios[b.key]) for b in backend_info]
         (
-            r.name,
-            format_dim(r.dim),
-            r.adbackend,
-            r.islinked,
-            format_time(r.t_logd),
-            format_ratio(r.ratio),
+            name = g.name,
+            dim = format_dim(g.dim),
+            linked = string(g.islinked),
+            primal = format_time(g.primal),
+            ratios,
         )
     end
-    matrix = hcat(Iterators.map(collect, zip(rows...))...)
-    return pretty_table(
-        matrix;
-        column_labels=COLNAMES,
-        backend=:text,
-        fit_table_in_display_horizontally=false,
-        fit_table_in_display_vertically=false,
-    )
+
+    name_w = max(length("Model"), maximum(textwidth(r.name) for r in rows)) + 1
+    dim_w = max(length("dim"), maximum(textwidth(r.dim) for r in rows)) + 2
+    linked_w = max(length("linked"), maximum(textwidth(r.linked) for r in rows)) + 2
+    primal_w = max(length("primal"), maximum(textwidth(r.primal) for r in rows)) + 2
+    ratio_ws = [
+        max(length(b.label), maximum(textwidth(r.ratios[i]) for r in rows)) + 2 for
+        (i, b) in enumerate(backend_info)
+    ]
+
+    gap = "  "
+    gap_w = textwidth(gap)
+    stub_w = name_w + dim_w + linked_w + 2 * gap_w
+    eval_w = primal_w
+    grad_w = sum(ratio_ws) + gap_w * (length(ratio_ws) - 1)
+    total_w = stub_w + gap_w + eval_w + gap_w + grad_w
+
+    center(s, w) = lpad(rpad(s, div(w + textwidth(s), 2)), w)
+    println(repeat("=", total_w))
+    println(rpad("", stub_w) * gap * center("eval", eval_w) * gap * center("gradient", grad_w))
+    println(rpad("", stub_w) * gap * repeat("-", eval_w) * gap * repeat("-", grad_w))
+
+    header =
+        rpad("Model", name_w) *
+        gap *
+        lpad("dim", dim_w) *
+        gap *
+        lpad("linked", linked_w) *
+        gap *
+        lpad("primal", primal_w) *
+        gap *
+        join((lpad(b.label, w) for (b, w) in zip(backend_info, ratio_ws)), gap)
+    println(header)
+    println(repeat("-", total_w))
+
+    for r in rows
+        row =
+            rpad(r.name, name_w) *
+            gap *
+            lpad(r.dim, dim_w) *
+            gap *
+            lpad(r.linked, linked_w) *
+            gap *
+            lpad(r.primal, primal_w) *
+            gap *
+            join((lpad(x, w) for (x, w) in zip(r.ratios, ratio_ws)), gap)
+        println(row)
+    end
+    println(repeat("=", total_w))
+    return nothing
 end
 
 #

From 70cf3158f984b4149366492fef5732cb803cee6b Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 18:33:05 +0100
Subject: [PATCH 4/7] format

---
 benchmarks/benchmarks.jl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index a9561002d..e6f0f99fb 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -235,10 +235,10 @@ function print_results(results)
     rows = map(pivoted) do g
         ratios = [format_ratio(g.ratios[b.key]) for b in backend_info]
         (
-            name = g.name,
-            dim = format_dim(g.dim),
-            linked = string(g.islinked),
-            primal = format_time(g.primal),
+            name=g.name,
+            dim=format_dim(g.dim),
+            linked=string(g.islinked),
+            primal=format_time(g.primal),
             ratios,
         )
     end
@@ -261,7 +261,9 @@ function print_results(results)
 
     center(s, w) = lpad(rpad(s, div(w + textwidth(s), 2)), w)
     println(repeat("=", total_w))
-    println(rpad("", stub_w) * gap * center("eval", eval_w) * gap * center("gradient", grad_w))
+    println(
+        rpad("", stub_w) * gap * center("eval", eval_w) * gap * center("gradient", grad_w)
+    )
     println(rpad("", stub_w) * gap * repeat("-", eval_w) * gap * repeat("-", grad_w))
 
     header =

From fc7df54d8d9d7e7b699ad1e8f9e5f860e173102a Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 21:01:35 +0100
Subject: [PATCH 5/7] Clarify benchmark PR comment for general audience

Restructure the comment so the table comes first, followed by a
single paragraph explaining what each column means and how to read
the AD backend ratios. Update the surrounding workflow text:

- "## Benchmark Report" + separate PR head/Main lines collapsed into
  a single "## Benchmarks @ <sha>" heading.
- Foldout summaries shortened to "Main @ <sha>" and "Environment".
- Comparison hint ("compare against `main`") only appears when the
  baseline foldout is actually available.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 6866ea098..918c225b7 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -100,30 +100,26 @@ jobs:
           main_sha='${{ needs.benchmark-main.outputs.sha }}'
           main_status='${{ needs.benchmark-main.result }}'
           {
-            echo "## Benchmark Report"
-            echo ""
-            echo "**PR head:** \`${head_sha}\`  "
-            if [[ "$main_status" == "success" ]]; then
-              echo "**Main:** \`${main_sha}\` (foldout below)"
-            else
-              echo "**Main:** benchmark job did not succeed (\`${main_status}\`) — see workflow logs"
-            fi
-            echo ""
-            echo "_`primal` is shared \`t(logdensity)\`; AD-backend columns are \`t(grad)/t(logdensity)\` (lower is better)._"
+            echo "## Benchmarks @ ${head_sha}"
             echo ""
             cat head/results.md
             echo ""
             if [[ "$main_status" == "success" ]]; then
-              echo "<details>"
-              echo "<summary><b>Main branch results</b></summary>"
+              echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model. Compare against \`main\` below to spot regressions."
+              echo ""
+              echo "<details><summary>Main @ ${main_sha}</summary>"
               echo ""
               cat main/results.md
               echo ""
               echo "</details>"
               echo ""
+            else
+              echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model."
+              echo ""
+              echo "_Main benchmark job did not succeed (\`${main_status}\`) — see workflow logs._"
+              echo ""
             fi
-            echo "<details>"
-            echo "<summary><b>Computer Information</b></summary>"
+            echo "<details><summary>Environment</summary>"
             echo "<pre>"
             cat head/version_info.txt
             echo "</pre>"

From 1971c91bc67fd455f5407e3935611ce3ceaac842 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 21:07:24 +0100
Subject: [PATCH 6/7] Use plain text for benchmark main-job failure note

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .github/workflows/Benchmarking.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 918c225b7..8de69b6c1 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -116,7 +116,7 @@ jobs:
             else
               echo "Each row times one of DynamicPPL's reference models on this PR's head. \`Dim\` is the parameter count; \`Linked\` is \`true\` when parameters have been mapped to unconstrained space. \`t(logdensity)\` is the wall-clock time for one log-density evaluation. The AD (automatic differentiation) backend columns express gradient time as a multiple of \`t(logdensity)\` — a value of \`10\` means computing the gradient takes 10× as long as the log-density. Lower is better throughout; \`err\` means the backend errored on that model."
               echo ""
-              echo "_Main benchmark job did not succeed (\`${main_status}\`) — see workflow logs._"
+              echo "Main benchmark job did not succeed (\`${main_status}\`) — see workflow logs."
               echo ""
             fi
             echo "<details><summary>Environment</summary>"

From 8f4888568c78328b7c878a441db84bfc4cadbed6 Mon Sep 17 00:00:00 2001
From: Hong Ge <hg344@cam.ac.uk>
Date: Tue, 5 May 2026 21:40:52 +0100
Subject: [PATCH 7/7] Print only the full benchmark table in markdown mode

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 benchmarks/benchmarks.jl | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index e6f0f99fb..cab363811 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -331,12 +331,6 @@ function build_combinations(rng)
     return combos
 end
 
-# Representative model whose 8 rows are surfaced as the at-a-glance "gist"
-# in markdown mode. `Smorgasbord` covers the broadest set of DPPL features
-# (scalar/vector/multivariate variables, `~`, `.~`, loops, observations as
-# both arguments and literals), so it is the most informative single row band.
-const GIST_MODEL = "Smorgasbord"
-
 function run(; markdown::Bool=false)
     combinations = build_combinations(StableRNG(23))
     total = length(combinations)
@@ -357,17 +351,6 @@ function run(; markdown::Bool=false)
         push!(results, (; name, dim, adbackend=string(adbackend), islinked, t_logd, ratio))
     end
     if markdown
-        gist = filter(r -> r.name == GIST_MODEL, results)
-        if !isempty(gist)
-            println("### ", GIST_MODEL)
-            println()
-            println("```")
-            print_results(gist)
-            println("```")
-            println()
-        end
-        println("### Full table (", length(results), " rows)")
-        println()
         println("```")
         print_results(results)
         println("```")