From ec0ba22e697e5c98a7683bafa2326332ffac1473 Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 20:41:57 +0000 Subject: [PATCH 1/2] Make Backend::pull_image fallible with PullError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `pull_image` panicked on any failure via `.unwrap()`. The upcoming cache-walk-back flow needs to distinguish "the registry does not have this tag" from "something else went wrong" so it can fall through to an older cache stage without silently swallowing auth or network errors. Introduce `PullError::{NotFound, Other}` and a pure `classify_pull_result` helper that maps (exit status, stderr bytes) to the variant. NotFound is detected via the OCI distribution-spec `MANIFEST_UNKNOWN` error code as rendered on stderr by docker, podman, and skopeo — the substring `"manifest unknown"`. The const comment documents why this is load-bearing string matching and why every better-looking alternative (engine sockets, CLI --json flags, manifest-inspect, direct registry HTTP) was rejected. `pull_image_if_absent` now propagates the Result. The three existing callers (two in pg-ephemeral/tests/base.rs, two in ociman/tests/integration.rs, one in the ociman internal backend::tests module) all `.unwrap()` — they want must-succeed semantics. Reference fields in PullError are boxed to stay under clippy::result-large-err's 128-byte threshold. Unit tests cover the classifier directly with canned stderr strings for the four interesting cases: success, podman not-found, docker not-found, auth failure, network error. No network, no daemon, no registry required. --- ociman/src/backend.rs | 242 +++++++++++++++++++++++++++++++++++-- pg-ephemeral/tests/base.rs | 4 +- 2 files changed, 231 insertions(+), 15 deletions(-) diff --git a/ociman/src/backend.rs b/ociman/src/backend.rs index 3ec588ca..01e3fc7c 100644 --- a/ociman/src/backend.rs +++ b/ociman/src/backend.rs @@ -1,5 +1,121 @@ use cmd_proc::*; +/// Substring used to detect the OCI distribution-spec `MANIFEST_UNKNOWN` +/// error code as rendered on stderr by docker, podman, and skopeo when a +/// registry reports that a tag does not exist. +/// +/// **This is load-bearing string matching and we do not like it.** We fall +/// back on it because there is no better option available today: +/// +/// - Neither `docker pull` nor `podman pull` has a `--json` / `--format` +/// flag. The CLIs only expose human-readable stderr. +/// - Exit codes are useless: both tools return `1` (or `125` for podman) +/// for every failure mode — not-found, auth, network, tls — without +/// discrimination. +/// - The docker/podman engine REST APIs do stream NDJSON, but the error +/// `message` / `errorDetail.message` fields contain the same human +/// string (`... manifest unknown`) — the daemons do not surface the +/// registry's structured error code — so switching to the socket would +/// just move the substring match from stderr to a JSON field, at the +/// cost of a ~400KB HTTP client dependency. No actual signal gain. +/// - `docker manifest inspect` still requires `experimental: enabled` as +/// of Docker 28, and `podman manifest inspect` is local-only. `skopeo +/// inspect` is clean but is a separate binary not always installed +/// (Docker Desktop ships without it). +/// - The only path to a clean spec-defined signal is talking to the +/// registry HTTP API directly, which means reimplementing bearer-token +/// auth, cred-helper integration, and auth challenges — substantial +/// work for a library that otherwise just shells out to the CLI. +/// +/// The OCI Distribution Spec v1.1.0 defines the `MANIFEST_UNKNOWN` error +/// code (code-7) that registries MUST return when a manifest is absent: +/// +/// +/// **However the spec does not mandate this stderr string.** The spec only +/// mandates the uppercase `code` field in the registry's JSON response; +/// the human-readable `message` field is OPTIONAL and its content is +/// unspecified. The lowercase `"manifest unknown"` substring this constant +/// matches is a de-facto convention that docker, podman, and skopeo all +/// happen to use when rendering the error to stderr. If a future CLI +/// version changes its wording, this constant must be updated and a +/// corresponding test will break. +const MANIFEST_UNKNOWN_STDERR_SIGNAL: &str = "manifest unknown"; + +#[derive(Debug, thiserror::Error)] +pub enum PullError { + // The `reference` field is boxed because `image::Reference` is ~176 + // bytes (Name + Vec of PathComponents + Tag + Digest), and + // `clippy::result-large-err` trips on anything >128 bytes inside a + // `Result::Err`. + #[error("image not found in registry: {reference}")] + NotFound { + reference: Box, + }, + #[error("pull failed for {reference}: {message}")] + Other { + reference: Box, + message: String, + }, + #[error(transparent)] + ImagePresent(#[from] ImagePresentError), +} + +/// Turn a pull subprocess exit status + captured stderr into a +/// [`PullError`] (or [`Ok`] on success). +/// +/// Split out from [`Backend::pull_image`] so it can be unit-tested with +/// canned stderr bytes — no network, no daemon, no registry. +fn classify_pull_result( + reference: &crate::image::Reference, + success: bool, + stderr: &[u8], +) -> Result<(), PullError> { + if success { + return Ok(()); + } + + let stderr = String::from_utf8_lossy(stderr); + if stderr.contains(MANIFEST_UNKNOWN_STDERR_SIGNAL) { + Err(PullError::NotFound { + reference: Box::new(reference.clone()), + }) + } else { + Err(PullError::Other { + reference: Box::new(reference.clone()), + message: stderr.trim().to_string(), + }) + } +} + +#[derive(Debug, thiserror::Error)] +#[error("push failed for {reference}: {message}")] +pub struct PushError { + // Boxed for the same reason as `PullError`'s reference fields — see + // that type's comment. + pub reference: Box, + pub message: String, +} + +/// Turn a push subprocess exit status + captured stderr into a +/// [`PushError`] (or [`Ok`] on success). +/// +/// Split out from [`Backend::push_image`] for the same reason as +/// [`classify_pull_result`] — unit-testable without a network or daemon. +fn classify_push_result( + reference: &crate::image::Reference, + success: bool, + stderr: &[u8], +) -> Result<(), PushError> { + if success { + return Ok(()); + } + + Err(PushError { + reference: Box::new(reference.clone()), + message: String::from_utf8_lossy(stderr).trim().to_string(), + }) +} + #[derive(Copy, Clone, Debug, Eq, PartialEq, serde::Deserialize, clap::ValueEnum)] #[serde(rename_all = "snake_case")] pub enum Selection { @@ -212,33 +328,54 @@ impl Backend { .unwrap(); } - /// Pull an image from a registry - pub async fn pull_image(&self, reference: &crate::image::Reference) { - self.command() + /// Pull an image from a registry. + /// + /// Stdout streams to the parent so users see layer progress. Stderr is + /// captured and parsed to distinguish [`PullError::NotFound`] (registry + /// reports `manifest unknown`) from other failures. + pub async fn pull_image(&self, reference: &crate::image::Reference) -> Result<(), PullError> { + let output = self + .command() .arguments(["pull", &reference.to_string()]) - .status() + .stderr_capture() + .accept_nonzero_exit() + .run() .await .unwrap(); + + classify_pull_result(reference, output.status.success(), &output.bytes) } - /// Pull an image only if it's not already present. + /// Pull an image only if it's not already present locally. pub async fn pull_image_if_absent( &self, reference: &crate::image::Reference, - ) -> Result<(), ImagePresentError> { - if !self.is_image_present(reference).await? { - self.pull_image(reference).await; + ) -> Result<(), PullError> { + if self.is_image_present(reference).await? { + Ok(()) + } else { + self.pull_image(reference).await } - Ok(()) } - /// Push an image to a registry - pub async fn push_image(&self, reference: &crate::image::Reference) { - self.command() + /// Push an image to a registry. + /// + /// Stdout streams to the parent so users see upload progress. Stderr + /// is captured and surfaced as [`PushError`] on non-zero exit. Unlike + /// pull, there's no useful sub-discrimination here: every push failure + /// (auth, network, rate limit, missing local image) collapses into the + /// same "it didn't upload" outcome as far as callers are concerned. + pub async fn push_image(&self, reference: &crate::image::Reference) -> Result<(), PushError> { + let output = self + .command() .arguments(["push", &reference.to_string()]) - .status() + .stderr_capture() + .accept_nonzero_exit() + .run() .await .unwrap(); + + classify_push_result(reference, output.status.success(), &output.bytes) } pub async fn remove_image(&self, reference: &crate::image::Reference) { @@ -1038,6 +1175,85 @@ pub mod resolve { mod tests { use super::*; + fn pull_test_reference() -> crate::image::Reference { + "ghcr.io/myorg/pg-ephemeral/main:abc123".parse().unwrap() + } + + #[test] + fn test_classify_pull_result_success() { + let reference = pull_test_reference(); + assert!(classify_pull_result(&reference, true, b"").is_ok()); + } + + #[test] + fn test_classify_pull_result_not_found_podman() { + let reference = pull_test_reference(); + // Representative podman stderr for a non-existent tag. + let stderr = b"Error: initializing source docker://ghcr.io/myorg/pg-ephemeral/main:abc123: reading manifest abc123 in ghcr.io/myorg/pg-ephemeral/main: manifest unknown"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::NotFound { reference: r }) => assert_eq!(*r, reference), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_not_found_docker() { + let reference = pull_test_reference(); + // Representative docker stderr for a non-existent tag. + let stderr = b"Error response from daemon: manifest for ghcr.io/myorg/pg-ephemeral/main:abc123 not found: manifest unknown: manifest unknown"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::NotFound { reference: r }) => assert_eq!(*r, reference), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_auth_failure_is_other() { + let reference = pull_test_reference(); + // Auth failure must NOT be misclassified as NotFound. + let stderr = b"Error response from daemon: pull access denied for ghcr.io/myorg/pg-ephemeral/main, repository does not exist or may require 'docker login': denied: requested access to the resource is denied"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::Other { + reference: r, + message, + }) => { + assert_eq!(*r, reference); + assert!(message.contains("denied")); + } + other => panic!("expected PullError::Other, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_network_error_is_other() { + let reference = pull_test_reference(); + let stderr = b"Error response from daemon: Get https://ghcr.io/v2/: dial tcp: lookup ghcr.io: no such host"; + let result = classify_pull_result(&reference, false, stderr); + assert!(matches!(result, Err(PullError::Other { .. }))); + } + + #[test] + fn test_classify_push_result_success() { + let reference = pull_test_reference(); + assert!(classify_push_result(&reference, true, b"").is_ok()); + } + + #[test] + fn test_classify_push_result_failure_captures_stderr() { + let reference = pull_test_reference(); + let stderr = b"unauthorized: authentication required\n"; + match classify_push_result(&reference, false, stderr) { + Err(PushError { + reference: r, + message, + }) => { + assert_eq!(*r, reference); + assert_eq!(message, "unauthorized: authentication required"); + } + Ok(()) => panic!("expected PushError"), + } + } + #[tokio::test] async fn test_container_resolver_localhost() { let backend = crate::test_backend_setup!(); diff --git a/pg-ephemeral/tests/base.rs b/pg-ephemeral/tests/base.rs index 90ef5d08..5b0e773e 100644 --- a/pg-ephemeral/tests/base.rs +++ b/pg-ephemeral/tests/base.rs @@ -5,7 +5,7 @@ async fn pull_test_images() { let backend = ociman::test_backend_setup!(); let default_image: ociman::image::Reference = (&pg_ephemeral::Image::default()).into(); - backend.pull_image(&default_image).await; + backend.pull_image(&default_image).await.unwrap(); for image in [ &*common::POSTGRES_IMAGE, @@ -13,7 +13,7 @@ async fn pull_test_images() { &*common::NODE_IMAGE, &*ociman::testing::ALPINE_LATEST_IMAGE, ] { - backend.pull_image(image).await; + backend.pull_image(image).await.unwrap(); } } From dfa1f4fd421dbd8bbfb5f54722019d60ad08e383 Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 20:47:14 +0000 Subject: [PATCH 2/2] Add cache push/pull support --- .github/workflows/ci.yml | 18 + manager/src/pg_ephemeral.rs | 12 + manager/src/pg_ephemeral/github_actions.rs | 18 + .../github_actions/cache_registry.rs | 307 ++++++++++++++++++ ociman/CHANGELOG.md | 16 + ociman/Cargo.toml | 2 +- ociman/src/backend.rs | 4 +- ociman/src/testing.rs | 186 +++++++++++ ociman/tests/registry.rs | 94 ++++++ pg-ephemeral/CHANGELOG.md | 19 ++ pg-ephemeral/README.md | 73 ++++- pg-ephemeral/src/cli.rs | 10 + pg-ephemeral/src/cli/cache.rs | 25 ++ pg-ephemeral/src/config.rs | 16 + pg-ephemeral/src/definition.rs | 119 +++++++ pg-ephemeral/src/seed.rs | 56 +++- pg-ephemeral/tests/base.rs | 11 + pg-ephemeral/tests/cache.rs | 151 +++++++++ 18 files changed, 1123 insertions(+), 14 deletions(-) create mode 100644 manager/src/pg_ephemeral/github_actions.rs create mode 100644 manager/src/pg_ephemeral/github_actions/cache_registry.rs create mode 100644 ociman/tests/registry.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c452e3f..5f3089c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,12 @@ env: jobs: build: + permissions: + contents: read + # Required by the cache-registry end-to-end test, which pushes + # test cache images to ghcr.io/${{ github.repository }}/pg-ephemeral-cache-test + # using GITHUB_TOKEN. + packages: write strategy: matrix: os: @@ -87,6 +93,18 @@ jobs: - name: Run doctests run: cargo --verbose test --doc --all-features --release + # End-to-end cache registry round-trip test. Pushes and pulls against + # ghcr.io using GITHUB_TOKEN. Gated to a single linux-musl matrix + # entry: the test is a registry integration check, not a platform + # compat check, so running it on every matrix target would be waste. + - name: Log in to ghcr.io for cache-registry test + if: ${{ matrix.os.cargo_build_target == 'x86_64-unknown-linux-musl' }} + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Run cache-registry end-to-end test + if: ${{ matrix.os.cargo_build_target == 'x86_64-unknown-linux-musl' }} + run: target/${{ matrix.os.cargo_build_target }}/release/manager pg-ephemeral github-actions cache-registry test + - name: Set up Ruby 3.4 for macOS gem build if: ${{ endsWith(matrix.os.cargo_build_target, '-darwin') }} uses: ruby/setup-ruby@0cb964fd540e0a24c900370abf38a33466142735 # v1.305.0 diff --git a/manager/src/pg_ephemeral.rs b/manager/src/pg_ephemeral.rs index b57e1489..8815e0c2 100644 --- a/manager/src/pg_ephemeral.rs +++ b/manager/src/pg_ephemeral.rs @@ -1,3 +1,4 @@ +pub(crate) mod github_actions; pub(crate) mod npm; pub(crate) mod ruby; @@ -13,6 +14,16 @@ pub(crate) enum Command { #[clap(subcommand)] command: npm::Command, }, + /// GitHub Actions-only commands + /// + /// Subcommands under this namespace assume they run from a GitHub Actions + /// workflow with the environment, secrets, and pre-setup (e.g. + /// `docker login`) that the workflow provides. They are not expected to + /// work from a vanilla local checkout. + GithubActions { + #[clap(subcommand)] + command: github_actions::Command, + }, /// Sync all generated files with Rust source of truth Sync { /// Fail if git is dirty after syncing (for CI verification) @@ -26,6 +37,7 @@ impl Command { match self { Self::Ruby { command } => command.run().await, Self::Npm { command } => command.run().await, + Self::GithubActions { command } => command.run().await, Self::Sync { reject_dirty } => { ruby::sync(*reject_dirty).await?; npm::sync(*reject_dirty).await?; diff --git a/manager/src/pg_ephemeral/github_actions.rs b/manager/src/pg_ephemeral/github_actions.rs new file mode 100644 index 00000000..8ae28de1 --- /dev/null +++ b/manager/src/pg_ephemeral/github_actions.rs @@ -0,0 +1,18 @@ +pub(crate) mod cache_registry; + +#[derive(Debug, clap::Parser)] +pub(crate) enum Command { + /// Cache registry end-to-end tests. + CacheRegistry { + #[clap(subcommand)] + command: cache_registry::Command, + }, +} + +impl Command { + pub(crate) async fn run(&self) -> Result<(), Box> { + match self { + Self::CacheRegistry { command } => command.run().await, + } + } +} diff --git a/manager/src/pg_ephemeral/github_actions/cache_registry.rs b/manager/src/pg_ephemeral/github_actions/cache_registry.rs new file mode 100644 index 00000000..a583dfdd --- /dev/null +++ b/manager/src/pg_ephemeral/github_actions/cache_registry.rs @@ -0,0 +1,307 @@ +use std::path::{Path, PathBuf}; + +use cmd_proc::EnvVariableName; + +const ENV_GITHUB_REPOSITORY: EnvVariableName = + EnvVariableName::from_static_or_panic("GITHUB_REPOSITORY"); +const ENV_GITHUB_RUN_ID: EnvVariableName = EnvVariableName::from_static_or_panic("GITHUB_RUN_ID"); + +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error( + "pg-ephemeral binary not found at {path}. Build it first with `cargo build --release --package pg-ephemeral`." + )] + BinaryMissing { path: PathBuf }, + #[error("unsupported host platform: {arch}-{os}")] + UnsupportedHostPlatform { + arch: &'static str, + os: &'static str, + }, + #[error("failed to prepare scratch directory at {path}")] + Scratch { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("pg-ephemeral {args} failed")] + PgEphemeral { + args: String, + #[source] + source: cmd_proc::CommandError, + }, + #[error("failed to parse cache status JSON")] + StatusParse(#[source] serde_json::Error), + #[error("cache status JSON missing required field: {path}")] + StatusShape { path: &'static str }, + #[error( + "cache status sanity check failed: seed {seed} expected status {expected}, got {actual}" + )] + StatusMismatch { + seed: String, + expected: &'static str, + actual: String, + }, + #[error("cache status returned no seeds — test configuration produced an empty chain")] + EmptySeeds, +} + +#[derive(Debug, clap::Parser)] +pub(crate) enum Command { + /// Run the end-to-end cache registry round trip. + /// + /// Populates a small test cache, pushes it to ghcr.io, clears the + /// local store, pulls it back, and verifies the tip becomes a local + /// hit again. Assumes `docker login ghcr.io` has already been done + /// by the caller (the CI workflow handles this via `GITHUB_TOKEN`). + Test, +} + +impl Command { + pub(crate) async fn run(&self) -> Result<(), Box> { + match self { + Self::Test => Ok(test().await?), + } + } +} + +/// Construct the ghcr.io registry path the test uses. +/// +/// On GitHub Actions, `GITHUB_REPOSITORY` is set to `owner/repo` (e.g. +/// `mbj/mrs`). Locally, we fall back to `mbj/mrs` so the command is at +/// least invocable off-CI for debugging. +fn cache_registry() -> String { + let repository = + std::env::var(ENV_GITHUB_REPOSITORY.as_str()).unwrap_or_else(|_| "mbj/mrs".to_string()); + format!("ghcr.io/{repository}/pg-ephemeral-cache-test") +} + +/// Generate a unique, validly-shaped instance name for this run. +/// +/// `InstanceName` only allows `[a-z0-9-]`, no leading/trailing dash, +/// max 63 bytes. We use `GITHUB_RUN_ID` (numeric, always valid) when +/// running on CI and fall back to the local process id otherwise. +fn instance_name() -> String { + let suffix = std::env::var(ENV_GITHUB_RUN_ID.as_str()) + .unwrap_or_else(|_| format!("local-{}", std::process::id())); + format!("ci-{suffix}") +} + +fn pg_ephemeral_binary() -> Result { + let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("manager crate has a parent directory") + .to_path_buf(); + + // Match the cargo target the current manager binary was built with. + // When manager runs from the CI release build, `CARGO_BUILD_TARGET` + // is set via the workflow env. Locally, fall back to the native host + // triple. + let rust_target = match std::env::var("CARGO_BUILD_TARGET") { + Ok(value) => value, + Err(_) => match (std::env::consts::ARCH, std::env::consts::OS) { + ("x86_64", "linux") => "x86_64-unknown-linux-musl".to_string(), + ("aarch64", "linux") => "aarch64-unknown-linux-musl".to_string(), + ("aarch64", "macos") => "aarch64-apple-darwin".to_string(), + (arch, os) => { + return Err(Error::UnsupportedHostPlatform { arch, os }); + } + }, + }; + + let path = workspace_root + .join("target") + .join(&rust_target) + .join("release") + .join("pg-ephemeral"); + + if !path.exists() { + return Err(Error::BinaryMissing { path }); + } + + Ok(path) +} + +/// Build a fresh `database.toml` + seed files inside a scratch directory. +/// The caller is responsible for cleanup. +fn prepare_test_directory(registry: &str, instance_name: &str) -> Result { + let dir = std::env::temp_dir().join(format!( + "pg-ephemeral-ci-cache-registry-test-{}", + std::process::id() + )); + + let wrap = |source: std::io::Error| Error::Scratch { + path: dir.clone(), + source, + }; + + // Start from a clean slate in case a prior local run left crumbs. + if dir.exists() { + std::fs::remove_dir_all(&dir).map_err(wrap)?; + } + std::fs::create_dir_all(&dir).map_err(wrap)?; + + std::fs::write( + dir.join("schema.sql"), + "CREATE TABLE cache_registry_test (id INTEGER PRIMARY KEY);\n", + ) + .map_err(wrap)?; + + std::fs::write( + dir.join("data.sql"), + "INSERT INTO cache_registry_test (id) VALUES (42);\n", + ) + .map_err(wrap)?; + + let toml = indoc::formatdoc! {r#" + image = "17.1" + cache_registry = "{registry}" + + [instances.{instance_name}.seeds.schema] + type = "sql-file" + path = "schema.sql" + + [instances.{instance_name}.seeds.data] + type = "sql-file" + path = "data.sql" + "#}; + std::fs::write(dir.join("database.toml"), toml).map_err(wrap)?; + + Ok(dir) +} + +fn display_args(args: &[&str]) -> String { + args.join(" ") +} + +async fn run_pg_ephemeral(binary: &Path, working_dir: &Path, args: &[&str]) -> Result<(), Error> { + // `cmd_proc::Command::status` returns `Err(CommandError)` on any + // non-zero exit unless `accept_nonzero_exit()` is set, so we just + // propagate. + cmd_proc::Command::new(binary) + .arguments(args) + .working_directory(working_dir) + .status() + .await + .map_err(|source| Error::PgEphemeral { + args: display_args(args), + source, + }) +} + +async fn capture_pg_ephemeral( + binary: &Path, + working_dir: &Path, + args: &[&str], +) -> Result { + cmd_proc::Command::new(binary) + .arguments(args) + .working_directory(working_dir) + .stdout_capture() + .string() + .await + .map_err(|source| Error::PgEphemeral { + args: display_args(args), + source, + }) +} + +fn parse_cache_status(json: &str) -> Result { + serde_json::from_str(json).map_err(Error::StatusParse) +} + +fn assert_all_stages_have_status( + status_json: &serde_json::Value, + expected: &'static str, +) -> Result<(), Error> { + let seeds = status_json["seeds"].as_array().ok_or(Error::StatusShape { + path: "seeds (expected array)", + })?; + if seeds.is_empty() { + return Err(Error::EmptySeeds); + } + + for seed in seeds { + let name = seed["name"].as_str().ok_or(Error::StatusShape { + path: "seeds[].name", + })?; + let status = seed["status"].as_str().ok_or(Error::StatusShape { + path: "seeds[].status", + })?; + if status != expected { + return Err(Error::StatusMismatch { + seed: name.to_string(), + expected, + actual: status.to_string(), + }); + } + } + Ok(()) +} + +fn assert_tip_hit(status_json: &serde_json::Value) -> Result<(), Error> { + let seeds = status_json["seeds"].as_array().ok_or(Error::StatusShape { + path: "seeds (expected array)", + })?; + let tip = seeds.last().ok_or(Error::EmptySeeds)?; + let name = tip["name"].as_str().ok_or(Error::StatusShape { + path: "seeds[last].name", + })?; + let status = tip["status"].as_str().ok_or(Error::StatusShape { + path: "seeds[last].status", + })?; + if status != "hit" { + return Err(Error::StatusMismatch { + seed: name.to_string(), + expected: "hit", + actual: status.to_string(), + }); + } + Ok(()) +} + +async fn test() -> Result<(), Error> { + let registry = cache_registry(); + let instance = instance_name(); + let binary = pg_ephemeral_binary()?; + + log::info!("Using cache_registry: {registry}"); + log::info!("Using instance name: {instance}"); + log::info!("Using pg-ephemeral: {}", binary.display()); + + let dir = prepare_test_directory(®istry, &instance)?; + let cache_args: &[&str] = &["cache", "--instance", instance.as_str()]; + + let populate_args: Vec<&str> = [cache_args, &["populate"]].concat(); + let push_args: Vec<&str> = [cache_args, &["push"]].concat(); + let reset_args: Vec<&str> = [cache_args, &["reset", "--force"]].concat(); + let pull_args: Vec<&str> = [cache_args, &["pull"]].concat(); + let status_args: Vec<&str> = [cache_args, &["status", "--json"]].concat(); + + log::info!("Step 1/6: cache populate"); + run_pg_ephemeral(&binary, &dir, &populate_args).await?; + + log::info!("Step 2/6: cache push"); + run_pg_ephemeral(&binary, &dir, &push_args).await?; + + log::info!("Step 3/6: cache reset --force (clear local cache)"); + run_pg_ephemeral(&binary, &dir, &reset_args).await?; + + log::info!("Step 4/6: verify cache is empty locally"); + let status_after_reset = capture_pg_ephemeral(&binary, &dir, &status_args).await?; + let parsed = parse_cache_status(&status_after_reset)?; + assert_all_stages_have_status(&parsed, "miss")?; + + log::info!("Step 5/6: cache pull (should walk back and land on tip)"); + run_pg_ephemeral(&binary, &dir, &pull_args).await?; + + log::info!("Step 6/6: verify tip is now a local hit"); + let status_after_pull = capture_pg_ephemeral(&binary, &dir, &status_args).await?; + let parsed = parse_cache_status(&status_after_pull)?; + assert_tip_hit(&parsed)?; + + log::info!("Cleanup: remove scratch directory"); + let _ = std::fs::remove_dir_all(&dir); + + log::info!("Cache registry end-to-end test PASSED"); + Ok(()) +} diff --git a/ociman/CHANGELOG.md b/ociman/CHANGELOG.md index 5d972c68..47d0d55f 100644 --- a/ociman/CHANGELOG.md +++ b/ociman/CHANGELOG.md @@ -88,6 +88,22 @@ that wanted both flags must chain `.tty().interactive()`. The split enables binary stdin/stdout pipe-through (e.g. `pg_dump`, `pg_restore`) without TTY line-buffering / CRLF translation corrupting the stream. +- `ociman::testing::REGISTRY_IMAGE` — `registry:2` reference for the + prefetch test alongside `ALPINE_LATEST_IMAGE`. +- `ociman::testing::EphemeralRegistry` — spins up a `registry:2` container + on an ephemeral host port for integration tests. Fallible `start` + (returns `EphemeralRegistryStartError` on host-port read or readiness + timeout). `reference(path, tag)` takes typed `reference::Path` and + `reference::Tag` and constructs the `image::Reference` directly. The + parsed `Domain` is exposed via `domain()`. Cleanup is via an explicit + `async fn shutdown(self)` — there is no `Drop` impl, so a panicking test + leaks the container (cleared by ` container prune`). `push` and + `pull` route through the same `classify_pull_result` / + `classify_push_result` helpers as `Backend::pull_image` / + `Backend::push_image`, but conditionally pass `--tls-verify=false` on + Podman since Podman intentionally does not treat localhost as + insecure-by-default (containers/image#544). Production push/pull are + unchanged. ## 0.5.0 diff --git a/ociman/Cargo.toml b/ociman/Cargo.toml index db0cff14..d597a7f6 100644 --- a/ociman/Cargo.toml +++ b/ociman/Cargo.toml @@ -23,7 +23,7 @@ cmd-proc.workspace = true dirs.workspace = true hex.workspace = true ipnet.workspace = true -tokio.workspace = true +tokio = { workspace = true, features = ["net", "time"] } log.workspace = true nom.workspace = true rustix.workspace = true diff --git a/ociman/src/backend.rs b/ociman/src/backend.rs index 01e3fc7c..6df868a3 100644 --- a/ociman/src/backend.rs +++ b/ociman/src/backend.rs @@ -65,7 +65,7 @@ pub enum PullError { /// /// Split out from [`Backend::pull_image`] so it can be unit-tested with /// canned stderr bytes — no network, no daemon, no registry. -fn classify_pull_result( +pub(crate) fn classify_pull_result( reference: &crate::image::Reference, success: bool, stderr: &[u8], @@ -101,7 +101,7 @@ pub struct PushError { /// /// Split out from [`Backend::push_image`] for the same reason as /// [`classify_pull_result`] — unit-testable without a network or daemon. -fn classify_push_result( +pub(crate) fn classify_push_result( reference: &crate::image::Reference, success: bool, stderr: &[u8], diff --git a/ociman/src/testing.rs b/ociman/src/testing.rs index 705660bc..01a39887 100644 --- a/ociman/src/testing.rs +++ b/ociman/src/testing.rs @@ -2,10 +2,16 @@ //! //! This module provides helpers for writing tests that interact with container runtimes. +use std::time::{Duration, Instant}; + /// Alpine latest image used by tests. pub static ALPINE_LATEST_IMAGE: std::sync::LazyLock = std::sync::LazyLock::new(|| "alpine:latest".parse().unwrap()); +/// OCI distribution registry image used by tests that need an ephemeral registry. +pub static REGISTRY_IMAGE: std::sync::LazyLock = + std::sync::LazyLock::new(|| "registry:2".parse().unwrap()); + #[allow(clippy::test_attr_in_doctest)] /// Check if the current platform is not supported for container tests /// @@ -118,3 +124,183 @@ macro_rules! test_backend_setup { .expect("No container backend detected") }}; } + +const REGISTRY_CONTAINER_PORT: u16 = 5000; +const READY_TIMEOUT: Duration = Duration::from_secs(30); +const READY_POLL_INTERVAL: Duration = Duration::from_millis(100); + +#[derive(Debug, thiserror::Error)] +pub enum EphemeralRegistryStartError { + #[error("failed to read registry host port: {0}")] + ReadHostPort(#[from] crate::ReadHostTcpPortError), + #[error("registry at {host} did not accept TCP connections within {timeout:?}")] + ReadyTimeout { + host: crate::reference::Domain, + timeout: Duration, + }, +} + +/// A `registry:2` container running on an ephemeral host port. +/// +/// Spins up a `registry:2` container for integration tests, exposes a +/// helper to mint references against it, and provides an explicit async +/// [`shutdown`] for teardown. There is no `Drop` impl: if a test panics +/// before reaching `shutdown`, the container leaks. The runtime daemon +/// retains it until the next ` container prune` (or until the +/// host is restarted). +/// +/// The registry serves plain HTTP. Docker auto-treats localhost as +/// insecure; Podman does not (intentionally — see +/// ), so the [`pull`] and +/// [`push`] methods add `--tls-verify=false` when the backend is Podman. +/// Production [`Backend::pull_image`] and [`Backend::push_image`] are +/// unchanged and never set that flag. +/// +/// [`pull`]: Self::pull +/// [`push`]: Self::push +/// [`shutdown`]: Self::shutdown +/// [`Backend::pull_image`]: crate::Backend::pull_image +/// [`Backend::push_image`]: crate::Backend::push_image +#[derive(Debug)] +pub struct EphemeralRegistry { + backend: crate::Backend, + container: crate::Container, + domain: crate::reference::Domain, +} + +impl EphemeralRegistry { + /// Start a fresh registry container and wait for it to accept TCP + /// connections on the published host port. + pub async fn start(backend: &crate::Backend) -> Result { + let container = crate::container::Definition::new(backend.clone(), REGISTRY_IMAGE.clone()) + .remove() + .publish(crate::Publish::tcp(REGISTRY_CONTAINER_PORT)) + .run_detached() + .await; + + let host_port = container + .read_host_tcp_port(REGISTRY_CONTAINER_PORT) + .await?; + + // PortNumber and DomainComponent lack public constructors, so the + // typed Domain is recovered via parse on the synthesised string. + // This happens once per registry, not per minted reference. + let domain: crate::reference::Domain = format!("localhost:{host_port}").parse().unwrap(); + + wait_for_ready(&domain).await?; + + Ok(Self { + backend: backend.clone(), + container, + domain, + }) + } + + /// Returns the domain (host + port) the registry is reachable at. + #[must_use] + pub fn domain(&self) -> &crate::reference::Domain { + &self.domain + } + + /// Build an image reference targeted at this registry. + #[must_use] + pub fn reference( + &self, + path: crate::reference::Path, + tag: crate::reference::Tag, + ) -> crate::image::Reference { + crate::image::Reference { + name: crate::reference::Name { + domain: Some(self.domain.clone()), + path, + }, + tag: Some(tag), + digest: None, + } + } + + /// Push an image to this registry, skipping TLS verification on Podman. + /// + /// Routes the result through the same classification logic as + /// [`Backend::push_image`](crate::Backend::push_image) so error + /// pinning tests still validate the production code path. + pub async fn push( + &self, + reference: &crate::image::Reference, + ) -> Result<(), crate::backend::PushError> { + let output = self + .registry_command("push", reference) + .stderr_capture() + .accept_nonzero_exit() + .run() + .await + .unwrap(); + + crate::backend::classify_push_result(reference, output.status.success(), &output.bytes) + } + + /// Pull an image from this registry, skipping TLS verification on Podman. + /// + /// Routes the result through the same classification logic as + /// [`Backend::pull_image`](crate::Backend::pull_image). + pub async fn pull( + &self, + reference: &crate::image::Reference, + ) -> Result<(), crate::backend::PullError> { + let output = self + .registry_command("pull", reference) + .stderr_capture() + .accept_nonzero_exit() + .run() + .await + .unwrap(); + + crate::backend::classify_pull_result(reference, output.status.success(), &output.bytes) + } + + /// Remove the registry container. + /// + /// Consumes the registry. Returns the underlying command error if the + /// runtime CLI exits non-zero. The container was started with `--rm` + /// so the daemon auto-removes it once the force-rm completes. + pub async fn shutdown(self) -> Result<(), cmd_proc::CommandError> { + self.backend + .command() + .arguments(["container", "rm", "--force"]) + .argument(self.container.id()) + .status() + .await + } + + fn registry_command( + &self, + subcommand: &'static str, + reference: &crate::image::Reference, + ) -> cmd_proc::Command { + let command = self.backend.command().argument(subcommand); + let command = match self.backend { + crate::Backend::Podman { .. } => command.argument("--tls-verify=false"), + crate::Backend::Docker { .. } => command, + }; + command.argument(reference.to_string()) + } +} + +async fn wait_for_ready( + domain: &crate::reference::Domain, +) -> Result<(), EphemeralRegistryStartError> { + let address = domain.to_string(); + let deadline = Instant::now() + READY_TIMEOUT; + loop { + if tokio::net::TcpStream::connect(&address).await.is_ok() { + return Ok(()); + } + if Instant::now() > deadline { + return Err(EphemeralRegistryStartError::ReadyTimeout { + host: domain.clone(), + timeout: READY_TIMEOUT, + }); + } + tokio::time::sleep(READY_POLL_INTERVAL).await; + } +} diff --git a/ociman/tests/registry.rs b/ociman/tests/registry.rs new file mode 100644 index 00000000..561ae749 --- /dev/null +++ b/ociman/tests/registry.rs @@ -0,0 +1,94 @@ +//! Integration tests against an ephemeral OCI distribution registry. +//! +//! Each test stands up a fresh `registry:2` container on an ephemeral host +//! port via [`ociman::testing::EphemeralRegistry`], so tests do not share +//! registry state and can run in parallel. Push and pull traffic to the +//! registry goes through the helper's `push`/`pull` methods so Podman +//! skips TLS verification for the plain-HTTP local registry; production +//! [`Backend::push_image`]/[`Backend::pull_image`] are not involved in +//! those calls but share their classification logic, so error pinning +//! still validates the production path. +//! +//! Tests must call [`EphemeralRegistry::shutdown`] explicitly. There is +//! no Drop fallback — a panicking test leaks the container. + +use ociman::backend::PullError; + +#[tokio::test] +async fn test_push_pull_round_trip() { + let backend = ociman::test_backend_setup!(); + let registry = ociman::testing::EphemeralRegistry::start(&backend) + .await + .unwrap(); + + let source = ociman::testing::ALPINE_LATEST_IMAGE.clone(); + let target = registry.reference( + "ociman-test/round-trip".parse().unwrap(), + "v1".parse().unwrap(), + ); + + backend.pull_image_if_absent(&source).await.unwrap(); + backend.tag_image(&source, &target).await; + + registry.push(&target).await.unwrap(); + + // Force-remove so the subsequent pull is the only path to a present + // image — proves the bytes round-tripped through the registry. + backend.remove_image_force(&target).await; + assert!(!backend.is_image_present(&target).await.unwrap()); + + registry.pull(&target).await.unwrap(); + assert!(backend.is_image_present(&target).await.unwrap()); + + backend.remove_image_force(&target).await; + registry.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_pull_image_not_found_real_registry() { + let backend = ociman::test_backend_setup!(); + let registry = ociman::testing::EphemeralRegistry::start(&backend) + .await + .unwrap(); + + // Never pushed — registry must reject with `manifest unknown`. + let missing = registry.reference( + "ociman-test/missing".parse().unwrap(), + "v1".parse().unwrap(), + ); + + match registry.pull(&missing).await { + Err(PullError::NotFound { reference }) => assert_eq!(*reference, missing), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + + registry.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_pull_image_if_absent_skips_when_present() { + let backend = ociman::test_backend_setup!(); + let registry = ociman::testing::EphemeralRegistry::start(&backend) + .await + .unwrap(); + + let source = ociman::testing::ALPINE_LATEST_IMAGE.clone(); + let target = registry.reference( + "ociman-test/short-circuit".parse().unwrap(), + "v1".parse().unwrap(), + ); + + backend.pull_image_if_absent(&source).await.unwrap(); + backend.tag_image(&source, &target).await; + registry.push(&target).await.unwrap(); + + // Image is now present locally. Shut down the registry to prove the + // next call cannot reach it; if `pull_image_if_absent` actually skips + // the network when the image is present, this still succeeds. + registry.shutdown().await.unwrap(); + + backend.pull_image_if_absent(&target).await.unwrap(); + assert!(backend.is_image_present(&target).await.unwrap()); + + backend.remove_image_force(&target).await; +} diff --git a/pg-ephemeral/CHANGELOG.md b/pg-ephemeral/CHANGELOG.md index f6fcea50..40a1a638 100644 --- a/pg-ephemeral/CHANGELOG.md +++ b/pg-ephemeral/CHANGELOG.md @@ -171,6 +171,25 @@ an image without re-deriving it from a `Definition`. ## 0.3.2 +### Added + +- `cache_registry` config field and `--cache-registry` CLI flag. When set, + all cache image references are prefixed with the given OCI registry name + (e.g. `ghcr.io/myorg`), so cache images can be pushed and pulled to share + warm cache state across machines. The cache key hash is unaffected — + different registries do not fragment the cache. +- `pg-ephemeral cache pull` subcommand. Walks the seed chain from tip + backwards and pulls the newest stage that exists in the configured + registry, then stops. Requires `cache_registry` to be set. +- `pg-ephemeral cache push` subcommand. Pushes every locally-cached + stage to the configured registry. Requires `cache_registry` to be set. + +### Changed + +- CLI errors are now printed using their user-facing `Display` form + (e.g. "Error: cache_registry must be set …") instead of the internal + `Debug` form (e.g. "CacheSync(RegistryNotSet)"). + ### Fixed - `bundle exec pg-ephemeral` now invokes the binary shipped with the Ruby gem diff --git a/pg-ephemeral/README.md b/pg-ephemeral/README.md index a97df448..2da9a9cf 100644 --- a/pg-ephemeral/README.md +++ b/pg-ephemeral/README.md @@ -69,6 +69,7 @@ cache = { type = "none" } |--------------------------|----------------------------------------------------------------------| | `image` | PostgreSQL version / image tag (e.g. `"17.1"`) | | `backend` | `"docker"`, `"podman"`, or omit for auto-detection (see below) | +| `cache_registry` | OCI registry prefix for cache images (e.g. `"ghcr.io/myorg"`). See [Sharing cache across machines](#sharing-cache-across-machines). | | `ssl_config` | SSL configuration with `hostname` field ([example](https://github.com/mbj/mrs/tree/main/pg-ephemeral/examples/08-ssl)) | | `wait_available_timeout` | How long to wait for PostgreSQL to accept connections (e.g. `"30s"`) | @@ -185,6 +186,12 @@ pg-ephemeral cache inspect pg-ephemeral/main: # Pre-populate the cache without running an interactive session pg-ephemeral cache populate +# Pull cache images from the configured registry (requires cache_registry) +pg-ephemeral cache pull + +# Push locally-cached stages to the configured registry (requires cache_registry) +pg-ephemeral cache push + # Remove cached images pg-ephemeral cache reset @@ -210,6 +217,57 @@ into the cache key; the strategy layers additional inputs on top: | `{ type = "key-script", script = "..." }` | Run a script; its stdout is folded into the cache key alongside the seed content. | | `{ type = "none" }` | Disable caching. Breaks the cache chain for this and all subsequent seeds. | +### Sharing cache across machines + +By default, cache images are named `pg-ephemeral/:` and live +only in the local Docker/Podman image store. Set `cache_registry` to a remote +OCI registry prefix and every cache image gains that prefix, so references +become push/pullable addresses in a registry you can share across machines +(CI runners, developer laptops, production build hosts): + +```toml +image = "17.1" +cache_registry = "ghcr.io/myorg" + +[instances.main.seeds.schema] +type = "sql-file" +path = "schema.sql" +``` + +The `cache_registry` value can be any valid OCI registry name — just a host +(`ghcr.io`), a host plus namespace (`ghcr.io/myorg`), or a private registry +(`registry.example.com:5000/team`). `pg-ephemeral cache status` will now +report references like `ghcr.io/myorg/pg-ephemeral/main:`. + +**The cache key hash is not affected by `cache_registry`.** Two machines +pointed at different registries still compute the same hex for the same +content, and switching a project from no registry to a registry (or between +registries) does not invalidate any existing cache. + +Once `cache_registry` is set, use the two dedicated subcommands to move +cache images between the local image store and the remote registry: + +```sh +# Pull the newest cached stage from the registry that exists remotely. +# Walks the seed chain from tip backwards and stops on the first hit. +pg-ephemeral cache pull + +# Populate anything still missing locally, then push everything that's +# now cached locally to the registry. Typical CI shape: +pg-ephemeral cache pull && pg-ephemeral cache populate && pg-ephemeral cache push +``` + +Registry authentication is handled entirely by the underlying container +CLI (`docker login`, `podman login`, or cred-helper integration) — no +pg-ephemeral-specific setup required. + +You can override the registry on a single invocation with `--cache-registry` +without editing `database.toml`: + +```sh +pg-ephemeral --cache-registry ghcr.io/myorg cache pull +``` + ## Rust Library pg-ephemeral can be used as a Rust library for integration tests or any code that needs @@ -447,7 +505,7 @@ Commands: shell Run an interactive shell host Operations executed on the host (psql, run-env, shell, schema-dump) container Operations executed inside the container (psql, run-env, shell, schema-dump) - cache Cache management (status, credentials, inspect, populate, reset) + cache Cache management (status, credentials, inspect, populate, pull, push, reset) integration-server Run integration server (pipe-based control protocol) list List defined instances meta Backend introspection (info) @@ -461,12 +519,13 @@ executes as the host user. Use `host ` for a host-side process, or When invoked with no subcommand, pg-ephemeral defaults to `psql`. Options: - --config-file Config file path (default: database.toml) - --no-config-file Use defaults, ignore any config file - --backend Override backend (docker, podman) - --image Override PostgreSQL image - --ssl-hostname Enable SSL with the specified hostname - --instance Target instance (default: main) + --config-file Config file path (default: database.toml) + --no-config-file Use defaults, ignore any config file + --backend Override backend (docker, podman) + --cache-registry Override cache_registry from config (e.g. ghcr.io/myorg) + --image Override PostgreSQL image + --ssl-hostname Enable SSL with the specified hostname + --instance Target instance (default: main) ``` ## How it compares to testcontainers diff --git a/pg-ephemeral/src/cli.rs b/pg-ephemeral/src/cli.rs index e44f66e7..175337d0 100644 --- a/pg-ephemeral/src/cli.rs +++ b/pg-ephemeral/src/cli.rs @@ -20,6 +20,8 @@ pub enum Error { Container(#[from] crate::container::Error), #[error(transparent)] EnvVariableValue(#[from] cmd_proc::EnvVariableValueError), + #[error(transparent)] + CacheSync(#[from] crate::definition::CacheSyncError), #[error("Unknown instance: {0}")] UnknownInstance(InstanceName), #[error("Instance {instance} has no seeds; cache credentials requires a cacheable seed")] @@ -106,6 +108,13 @@ pub struct App { /// If the autodetection fails exits with an error. #[arg(long)] backend: Option, + /// Overwrite cache registry + /// + /// When set, all cache image references are prefixed with this registry + /// name (e.g. `ghcr.io/myorg`), enabling push/pull against a remote + /// registry. Does not affect cache key hashing. + #[arg(long)] + cache_registry: Option, /// Overwrite image #[arg(long)] image: Option, @@ -120,6 +129,7 @@ impl App { pub async fn run(&self) -> Result<(), Error> { let overwrites = crate::config::InstanceDefinition { backend: self.backend, + cache_registry: self.cache_registry.clone(), image: self.image.clone(), parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), diff --git a/pg-ephemeral/src/cli/cache.rs b/pg-ephemeral/src/cli/cache.rs index 6517814c..876b8de0 100644 --- a/pg-ephemeral/src/cli/cache.rs +++ b/pg-ephemeral/src/cli/cache.rs @@ -39,6 +39,17 @@ pub enum Command { #[arg(long = "seed-name")] seed_name: Option, }, + /// Pull cache images from the configured registry. + /// + /// Walks the seed chain from tip backwards and pulls the newest stage + /// that exists remotely. Requires `cache_registry` to be set. + Pull, + /// Push all locally-cached stages to the configured registry. + /// + /// Pushes every stage currently stored locally (status "hit"). Stages + /// not yet populated locally are skipped. Requires `cache_registry` to + /// be set. + Push, } impl Command { @@ -156,6 +167,20 @@ impl Command { .map_err(crate::container::Error::SerializeImageMetadata)?; println!("{json}"); } + Self::Pull => { + let definition = super::get_instance(instance_map, instance_name)? + .definition(instance_name) + .await + .unwrap(); + definition.pull_cache(instance_name).await?; + } + Self::Push => { + let definition = super::get_instance(instance_map, instance_name)? + .definition(instance_name) + .await + .unwrap(); + definition.push_cache(instance_name).await?; + } } Ok(()) } diff --git a/pg-ephemeral/src/config.rs b/pg-ephemeral/src/config.rs index 5eb2ce10..e709d6ea 100644 --- a/pg-ephemeral/src/config.rs +++ b/pg-ephemeral/src/config.rs @@ -7,6 +7,7 @@ use crate::seed::{Command, Seed, SeedCacheConfig, SeedName}; pub struct Instance { pub application_name: Option, pub backend: ociman::backend::Selection, + pub cache_registry: Option, pub database: pg_client::Database, pub parameters: pg_client::parameter::Map, pub seeds: indexmap::IndexMap, @@ -23,6 +24,7 @@ impl Instance { Self { backend, application_name: None, + cache_registry: None, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -42,6 +44,7 @@ impl Instance { instance_name: instance_name.clone(), application_name: self.application_name.clone(), backend: self.backend.resolve().await?, + cache_registry: self.cache_registry.clone(), database: self.database.clone(), parameters: self.parameters.clone(), seeds: self.seeds.clone(), @@ -312,6 +315,7 @@ pub struct SslConfigDefinition { #[serde(deny_unknown_fields)] pub struct InstanceDefinition { pub backend: Option, + pub cache_registry: Option, pub image: Option, #[serde(default)] pub parameters: pg_client::parameter::Map, @@ -327,6 +331,7 @@ impl InstanceDefinition { pub fn empty() -> Self { Self { backend: None, + cache_registry: None, image: None, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -362,6 +367,13 @@ impl InstanceDefinition { .or(defaults.backend) .unwrap_or(ociman::backend::Selection::Auto); + let cache_registry = overwrites + .cache_registry + .as_ref() + .or(self.cache_registry.as_ref()) + .or(defaults.cache_registry.as_ref()) + .cloned(); + let seeds = self .seeds .into_iter() @@ -386,6 +398,7 @@ impl InstanceDefinition { Ok(Instance { application_name: None, backend, + cache_registry, database: pg_client::Database::POSTGRES, parameters: self.parameters, seeds, @@ -403,6 +416,7 @@ impl InstanceDefinition { pub struct Config { image: Option, backend: Option, + cache_registry: Option, ssl_config: Option, #[serde(default, with = "humantime_serde")] wait_available_timeout: Option, @@ -414,6 +428,7 @@ impl std::default::Default for Config { Self { image: Some(Image::default()), backend: None, + cache_registry: None, ssl_config: None, wait_available_timeout: None, instances: None, @@ -508,6 +523,7 @@ impl Config { ) -> Result { let defaults = InstanceDefinition { backend: self.backend, + cache_registry: self.cache_registry.clone(), image: self.image.clone(), parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), diff --git a/pg-ephemeral/src/definition.rs b/pg-ephemeral/src/definition.rs index 5a29103d..a7beba9d 100644 --- a/pg-ephemeral/src/definition.rs +++ b/pg-ephemeral/src/definition.rs @@ -15,6 +15,22 @@ pub enum SeedApplyError { EnvVariableValue(#[from] cmd_proc::EnvVariableValueError), } +/// Errors from cache sync operations ([`Definition::pull_cache`] / +/// [`Definition::push_cache`]). +#[derive(Debug, thiserror::Error)] +pub enum CacheSyncError { + #[error( + "cache_registry must be set in database.toml or via --cache-registry to use this command" + )] + RegistryNotSet, + #[error(transparent)] + SeedLoad(#[from] LoadError), + #[error(transparent)] + Pull(#[from] ociman::backend::PullError), + #[error(transparent)] + Push(#[from] ociman::backend::PushError), +} + #[derive(Clone, Debug, PartialEq)] pub enum SslConfig { Generated { @@ -71,6 +87,7 @@ pub struct Definition { pub instance_name: crate::InstanceName, pub application_name: Option, pub backend: ociman::Backend, + pub cache_registry: Option, pub database: pg_client::Database, pub parameters: pg_client::parameter::Map, pub seeds: indexmap::IndexMap, @@ -100,6 +117,7 @@ impl Definition { instance_name, backend, application_name: None, + cache_registry: None, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -168,6 +186,7 @@ impl Definition { &self.seeds, &self.backend, instance_name, + self.cache_registry.as_ref(), ) .await } @@ -424,6 +443,106 @@ impl Definition { Ok((previous_cache_reference.cloned(), Vec::new())) } + /// Pull cache images from the configured registry by walking the seed + /// chain from tip backwards. + /// + /// Returns as soon as any cacheable stage lands locally: + /// - Already present locally (Hit) → return immediately, nothing to pull. + /// - Missing locally (Miss) → attempt to pull from the registry; on + /// success return, on [`PullError::NotFound`] walk to the next older + /// stage, on [`PullError::Other`] abort. + /// - Uncacheable → skip and continue walking. + /// + /// Returns `Ok(())` even if no cached stage was found in the registry — + /// the caller can tell the difference via logs. + /// + /// # Errors + /// + /// Returns [`CacheSyncError::RegistryNotSet`] if the definition has no + /// `cache_registry` configured. + pub async fn pull_cache( + &self, + instance_name: &crate::InstanceName, + ) -> Result<(), CacheSyncError> { + if self.cache_registry.is_none() { + return Err(CacheSyncError::RegistryNotSet); + } + + let loaded_seeds = self.load_seeds(instance_name).await?; + let seeds: Vec<&LoadedSeed> = loaded_seeds.iter_seeds().collect(); + + for seed in seeds.iter().rev() { + use crate::seed::CacheStatus; + match seed.cache_status() { + CacheStatus::Uncacheable => { + log::debug!("cache pull: skipping uncacheable seed {}", seed.name()); + continue; + } + CacheStatus::Hit { reference, .. } => { + log::info!( + "cache pull: {} already present locally at {reference}", + seed.name() + ); + return Ok(()); + } + CacheStatus::Miss { reference, .. } => { + log::info!("cache pull: attempting {reference}"); + match self.backend.pull_image(reference).await { + Ok(()) => { + log::info!("cache pull: pulled {reference}"); + return Ok(()); + } + Err(ociman::backend::PullError::NotFound { .. }) => { + log::debug!("cache pull: {reference} not in registry, walking back"); + continue; + } + Err(error) => return Err(error.into()), + } + } + } + } + + log::info!("cache pull: no cached stage found in registry"); + Ok(()) + } + + /// Push all locally-cached stages to the configured registry. + /// + /// Iterates the seed chain in order and pushes every stage whose local + /// cache status is [`CacheStatus::Hit`](crate::seed::CacheStatus::Hit). + /// [`CacheStatus::Miss`](crate::seed::CacheStatus::Miss) and + /// [`CacheStatus::Uncacheable`](crate::seed::CacheStatus::Uncacheable) + /// stages are skipped. Aborts on the first push failure. + /// + /// # Errors + /// + /// Returns [`CacheSyncError::RegistryNotSet`] if the definition has no + /// `cache_registry` configured. + pub async fn push_cache( + &self, + instance_name: &crate::InstanceName, + ) -> Result<(), CacheSyncError> { + if self.cache_registry.is_none() { + return Err(CacheSyncError::RegistryNotSet); + } + + let loaded_seeds = self.load_seeds(instance_name).await?; + let mut pushed_any = false; + + for seed in loaded_seeds.iter_seeds() { + if let crate::seed::CacheStatus::Hit { reference, .. } = seed.cache_status() { + log::info!("cache push: pushing {reference}"); + self.backend.push_image(reference).await?; + pushed_any = true; + } + } + + if !pushed_any { + log::info!("cache push: no locally cached stages to push"); + } + Ok(()) + } + pub async fn run_integration_server( &self, result_fd: std::os::fd::RawFd, diff --git a/pg-ephemeral/src/seed.rs b/pg-ephemeral/src/seed.rs index 4b778617..1d5a5c0a 100644 --- a/pg-ephemeral/src/seed.rs +++ b/pg-ephemeral/src/seed.rs @@ -163,6 +163,40 @@ mod seed_hash_tests { } } +const PG_EPHEMERAL_COMPONENT: ociman::reference::PathComponent = + ociman::reference::PathComponent::from_static_or_panic("pg-ephemeral"); + +fn build_cache_reference( + cache_registry: Option<&ociman::reference::Name>, + instance_name: &crate::InstanceName, + cache_key: &SeedHash, +) -> ociman::Reference { + let instance_component: ociman::reference::PathComponent = + instance_name.as_str().parse().unwrap(); + + let (domain, path) = match cache_registry { + Some(registry) => ( + registry.domain.clone(), + registry + .path + .clone() + .extended(PG_EPHEMERAL_COMPONENT.clone()) + .extended(instance_component), + ), + None => ( + None, + ociman::reference::Path::from(PG_EPHEMERAL_COMPONENT.clone()) + .extended(instance_component), + ), + }; + + ociman::Reference { + name: ociman::reference::Name { domain, path }, + tag: Some(cache_key.0.into()), + digest: None, + } +} + #[derive(Clone, Debug, PartialEq)] pub enum CacheStatus { Hit { @@ -186,13 +220,12 @@ impl CacheStatus { cache_key: Option, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { let Some(hash) = cache_key else { return Ok(Self::Uncacheable); }; - let reference: ociman::Reference = format!("pg-ephemeral/{instance_name}:{hash}") - .parse() - .unwrap(); + let reference = build_cache_reference(cache_registry, instance_name, &hash); // Single inspect round-trip determines presence and (on Hit) returns // the labels in one call. NotFound from the underlying inspect is the // documented absence signal, so we map it to Miss instead of an error. @@ -522,6 +555,7 @@ impl Seed { hash_chain: &mut HashChain, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { match self { Seed::SqlFile { path } => { @@ -539,6 +573,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -579,6 +614,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -611,6 +647,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -630,6 +667,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -644,6 +682,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -658,6 +697,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -685,6 +725,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await?, name, @@ -869,6 +910,7 @@ impl<'a> LoadedSeeds<'a> { seeds: &indexmap::IndexMap, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { let mut hash_chain = HashChain::new(); let mut loaded_seeds = Vec::new(); @@ -900,7 +942,13 @@ impl<'a> LoadedSeeds<'a> { for (name, seed) in seeds { let loaded_seed = seed - .load(name.clone(), &mut hash_chain, backend, instance_name) + .load( + name.clone(), + &mut hash_chain, + backend, + instance_name, + cache_registry, + ) .await?; loaded_seeds.push(loaded_seed); } diff --git a/pg-ephemeral/tests/base.rs b/pg-ephemeral/tests/base.rs index 5b0e773e..2547d8e4 100644 --- a/pg-ephemeral/tests/base.rs +++ b/pg-ephemeral/tests/base.rs @@ -12,6 +12,7 @@ async fn pull_test_images() { &*common::RUBY_IMAGE, &*common::NODE_IMAGE, &*ociman::testing::ALPINE_LATEST_IMAGE, + &*ociman::testing::REGISTRY_IMAGE, ] { backend.pull_image(image).await.unwrap(); } @@ -93,6 +94,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -108,6 +110,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Podman, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -133,6 +136,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -148,6 +152,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -163,6 +168,7 @@ fn test_config_file() { "tests/database.toml", &pg_ephemeral::config::InstanceDefinition { backend: Some(ociman::backend::Selection::Docker), + cache_registry: None, image: Some("18.0".parse().unwrap()), parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -182,6 +188,7 @@ fn test_config_file_no_explicit_instance() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -205,6 +212,7 @@ fn test_config_file_no_explicit_instance() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Podman, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -219,6 +227,7 @@ fn test_config_file_no_explicit_instance() { "tests/database_no_explicit_instance.toml", &pg_ephemeral::config::InstanceDefinition { backend: Some(ociman::backend::Selection::Podman), + cache_registry: None, image: Some("18.0".parse().unwrap()), parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -250,6 +259,7 @@ fn test_config_ssl() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), @@ -617,6 +627,7 @@ fn test_config_image_with_sha256_digest() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, parameters: pg_client::parameter::Map::new(), seeds: indexmap::IndexMap::new(), diff --git a/pg-ephemeral/tests/cache.rs b/pg-ephemeral/tests/cache.rs index e8f55be6..1ffad6f7 100644 --- a/pg-ephemeral/tests/cache.rs +++ b/pg-ephemeral/tests/cache.rs @@ -390,6 +390,157 @@ async fn test_cache_status_change_with_image() { assert_ne!(stdout2, stdout1); } +#[tokio::test] +async fn test_cache_registry_prefixes_reference_without_changing_hash() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-registry-test"); + + // Same schema.sql + image as `test_cache_status_deterministic` and + // `test_cache_status_uncacheable_reason`, so the schema seed's + // reference hash is fixed and we can assert against exact JSON. + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + + fn expected_with_cache_image(cache_image: &str) -> serde_json::Value { + serde_json::json!({ + "instance": "main", + "base_image": "17.1", + "version": "0.4.0", + "summary": { + "total": 1, + "hits": 0, + "misses": 1, + "uncacheable": 0, + }, + "seeds": [ + { + "name": "schema", + "type": "sql-file", + "status": "miss", + "cache_image": cache_image, + }, + ], + }) + } + + // Baseline: no cache_registry. + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + let baseline = run_pg_ephemeral(&["cache", "status", "--json"], &dir.path).await; + let baseline: serde_json::Value = serde_json::from_str(&baseline).unwrap(); + assert_eq!( + baseline, + expected_with_cache_image( + "pg-ephemeral/main:8ee3896ee958931123af048077d74fd9758b4dd494450f29e11f909f2ed8160a", + ), + ); + + // Same config plus cache_registry: every other field must be + // byte-identical, only `cache_image` gains the registry prefix. + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + cache_registry = "ghcr.io/mbj" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + let prefixed = run_pg_ephemeral(&["cache", "status", "--json"], &dir.path).await; + let prefixed: serde_json::Value = serde_json::from_str(&prefixed).unwrap(); + assert_eq!( + prefixed, + expected_with_cache_image( + "ghcr.io/mbj/pg-ephemeral/main:8ee3896ee958931123af048077d74fd9758b4dd494450f29e11f909f2ed8160a", + ), + ); +} + +async fn run_pg_ephemeral_expect_failure( + args: &[&str], + current_dir: &std::path::Path, +) -> (String, String) { + let pg_ephemeral_bin = env!("CARGO_BIN_EXE_pg-ephemeral"); + let output = cmd_proc::Command::new(pg_ephemeral_bin) + .arguments(args) + .working_directory(current_dir) + .stdout_capture() + .stderr_capture() + .accept_nonzero_exit() + .run() + .await + .unwrap(); + + assert!( + !output.status.success(), + "expected pg-ephemeral {} to fail, but it succeeded\nstdout:\n{}", + args.join(" "), + String::from_utf8_lossy(&output.stdout) + ); + + ( + String::from_utf8_lossy(&output.stdout).into_owned(), + String::from_utf8_lossy(&output.stderr).into_owned(), + ) +} + +#[tokio::test] +async fn test_cache_pull_without_registry_errors() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-pull-no-registry"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + + let (_stdout, stderr) = run_pg_ephemeral_expect_failure(&["cache", "pull"], &dir.path).await; + assert!( + stderr.contains("cache_registry must be set"), + "expected registry-not-set error, got stderr:\n{stderr}" + ); +} + +#[tokio::test] +async fn test_cache_push_without_registry_errors() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-push-no-registry"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + + let (_stdout, stderr) = run_pg_ephemeral_expect_failure(&["cache", "push"], &dir.path).await; + assert!( + stderr.contains("cache_registry must be set"), + "expected registry-not-set error, got stderr:\n{stderr}" + ); +} + #[tokio::test] async fn test_cache_status_chain_propagates() { let _backend = ociman::test_backend_setup!();