diff --git a/DESCRIPTION b/DESCRIPTION
index 1151c2f..76e4473 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,11 +31,9 @@ Imports:
     cli,
     rlang
 Suggests:
-    ggplot2,
     knitr,
     microbenchmark,
     quarto,
-    scales,
     TDA,
     tdaunif,
     tinysnapshot,
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
index ea00d00..2fa6440 100644
--- a/vignettes/.gitignore
+++ b/vignettes/.gitignore
@@ -2,3 +2,5 @@
 *.R
 *.log
 *_files
+
+/.quarto/
diff --git a/vignettes/validation-benchmark.qmd b/vignettes/validation-benchmark.qmd
index bf526fd..5d61357 100644
--- a/vignettes/validation-benchmark.qmd
+++ b/vignettes/validation-benchmark.qmd
@@ -4,7 +4,7 @@ vignette: >
   %\VignetteIndexEntry{Validation and Benchmark of Wasserstein Distances}
   %\VignetteEngine{quarto::html}
   %\VignetteEncoding{UTF-8}
-  %\VignetteDepends{TDA,microbenchmark,ggplot2,scales}
+  %\VignetteDepends{TDA,microbenchmark}
 knitr:
   opts_chunk:
     collapse: true
@@ -20,13 +20,13 @@ persistence diagrams and their implementations in {phutil}, adapted from
 2. Benchmark the implementations against those provided by {TDA} (adapted from
 Dionysus).
 
-In addition to {phutil}, we use {ggplot2} to visualize the benchmark results.
-We will also access the {tdaunif} package to generate larger point clouds and the {microbenchmark} package to perform benchmark tests.
+In addition to {phutil}, we access the {tdaunif} package to generate larger point clouds and the {microbenchmark} package to perform benchmark tests.
 
 ```{r}
 #| label: setup
+#| include: false
 library(phutil)
-library(ggplot2)
+init_par <- par()
 ```
 
 ## Definitions
@@ -139,7 +139,7 @@ abline(a = 0, b = 1)
 points(X, pch = 1)
 points(Y, pch = 5)
 segments(X[, 1], X[, 2], c(2, Y[, 1]), c(2, Y[, 2]), lty = 2)
-par(oldpar)
+par(mar = init_par$mar)
 ```
 
 Based on these observations, we get this expression for the Wasserstein distance
@@ -311,30 +311,43 @@ we convert the results into seconds ahead of formatting the axis in seconds.
 ```{r}
 #| label: fig-benchmark-large
 #| fig-width: 8
-#| fig-height: 3
+#| fig-height: 4
 #| fig-align: 'center'
 #| fig-retina: 2
 #| fig-cap: "Benchmark comparison of Dionysus via {TDA} and Hera via {phutil} on
-#| large persistence diagrams: Violin plots of runtime distributions on a common
-#| scale."
-bm_all <- transform(bm_all, expr = as.character(expr), time = unlist(time))
+#| large persistence diagrams: Jitter plots of runtime distributions
+#| (time measured in seconds)."
+bm_all <- transform(
+  bm_all,
+  expr = factor(as.character(expr), levels = c("TDA", "phutil")),
+  time = unlist(time) * 10e-9
+)
 bm_all <- subset(bm_all, select = c(expr, degree, power, time))
-ggplot(bm_all, aes(x = time * 10e-9, y = expr)) +
-  facet_grid(
-    rows = vars(power), cols = vars(degree),
-    labeller = label_both
-  ) +
-  geom_violin() +
-  scale_x_continuous(
-    transform = "log10",
-    labels = scales::label_timespan(units = "secs")
-  ) +
-  labs(x = NULL, y = NULL)
+xrans <- lapply(seq(0, 2), function(d) range(subset(bm_all, degree == d, time)))
+par(mfcol = c(3, 3), mar = c(2, 2, 2, 2) + .1)
+for (d in seq(0, 2)) for (p in c(1, 2, Inf)) {
+  bm_d_p <- subset(bm_all, degree == d & power == p)
+  plot(
+    x = bm_d_p$time, xlim = xrans[[d + 1]],
+    y = jitter(as.integer(bm_d_p$expr)), yaxt = "n",
+    pch = 19
+  )
+  axis(2, at = c(1, 2), labels = levels(bm_d_p$expr))
+  if (p == 1) axis(
+    3, at = mean(xrans[[d+1]]),
+    tick = FALSE, labels = paste("degree: ", d), padj = 0
+  )
+  if (d == 2) axis(
+    4, at = 1.5,
+    tick = FALSE, labels = paste("power: ", p), padj = 0
+  )
+}
+par(mfcol = init_par$mfcol)
 ```
 
 We note that Dionysus via {TDA} clearly outperforms Hera via {phutil} on degree-1 PDs, which in these cases have many fewer features.
 However, the tables are turned in degree 0, in which the PDs have many more features---which, when present, dominate the total computational cost.
-(The implementations are more evenly matched on the degree-2 PDs, which may have to do with many of them being empty.)
+(The implementations are more evenly matched on the least-costly degree-2 PDs, which may have to do with many of them being empty.)
 While by no means exhaustive and not necessarily representative, these results suggest that Hera via {phutil} scales more efficiently than Dionysus via {TDA} and should therefore be preferred for projects involving more feature-rich data sets.
 
 ## References