diff --git a/.specify/feature.json b/.specify/feature.json index 14aaad7..01fd112 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1,3 +1,3 @@ { - "feature_directory": "specs/016-source-repository-layout" + "feature_directory": "specs/017-real-world-validation" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f32d7c..b509683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ versioning for public release policy decisions. ## [Unreleased] +### Changed + +- Add stateless `--source-repo ` flag for plan/apply/explain plus five real-world homelab examples under `examples//`. ## [2.1.1] - 2026-05-04 diff --git a/CLAUDE.md b/CLAUDE.md index c6eb7d8..a543e09 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,12 +1,14 @@ # core-ops Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-05-01 +Auto-generated from all feature plans. Last updated: 2026-05-05 ## Active Technologies - Rust 2021 + clap 4, serde / serde_json, miette, thiserror, tempfile (015-controller-state-lifecycle) - JSON state file at `/var/lib/core-ops/status.json` (atomic write via tempfile) (015-controller-state-lifecycle) - Rust 2021 (stable toolchain), as established by the existing `core-ops` crate at v1.0.0; this feature is the trigger for the v2.0.0 major bump. + `clap` 4.5 (derive), `serde` 1.0 (derive), `serde_yaml` 0.9, `serde_json` 1.0, `miette` 7.2 (fancy diagnostics), `thiserror` 1.0, `tempfile` 3.10. No new runtime dependencies are required by this feature. (016-source-repository-layout) - Source repository on filesystem (input); existing canonical status snapshot at `/var/lib/core-ops/status.json` (output). The status snapshot gains a `layout-version: "1"` field to record which layout produced it. (016-source-repository-layout) +- Rust 2021 (existing toolchain) + clap 4.5 (derive), serde 1.0, serde_yaml 0.9, serde_json 1.0, miette 7.2 (fancy diagnostics), thiserror 1.0, tempfile 3.10. **No new runtime dependencies.** Git invocation via `std::process::Command::new("git")` following the established pattern at `src/cli/init.rs:52`, `src/io/repo.rs:1312/1343/1372`, `src/io/release_governance.rs:367/440`, `src/cli/verification.rs:2068/2086/2090/2103`. (017-real-world-validation) +- Existing `/var/lib/core-ops/status.json` for init'd mode (unchanged). Stateless plan writes nothing under `/var/lib/`; stateless apply writes audit + status with path-based provenance (see FR-013); stateless explain writes nothing. Operator-explicit `--audit-dir` honored across both modes (see FR-012 plus 2026-05-05 clarification). (017-real-world-validation) - Rust 2021 — clap 4, serde, miette, serde_json, serde_yaml - GitHub Actions — ubuntu-latest runners, `gh` CLI, `rustup` @@ -96,10 +98,10 @@ removed or renamed. Follow standard Rust conventions. No new abstractions without justification. ## Recent Changes +- 017-real-world-validation: Added Rust 2021 (existing toolchain) + clap 4.5 (derive), serde 1.0, serde_yaml 0.9, serde_json 1.0, miette 7.2 (fancy diagnostics), thiserror 1.0, tempfile 3.10. **No new runtime dependencies.** Git invocation via `std::process::Command::new("git")` following the established pattern at `src/cli/init.rs:52`, `src/io/repo.rs:1312/1343/1372`, `src/io/release_governance.rs:367/440`, `src/cli/verification.rs:2068/2086/2090/2103`. - 016-source-repository-layout: Added Rust 2021 (stable toolchain), as established by the existing `core-ops` crate at v1.0.0; this feature is the trigger for the v2.0.0 major bump. + `clap` 4.5 (derive), `serde` 1.0 (derive), `serde_yaml` 0.9, `serde_json` 1.0, `miette` 7.2 (fancy diagnostics), `thiserror` 1.0, `tempfile` 3.10. No new runtime dependencies are required by this feature. - 015-controller-state-lifecycle: Added Rust 2021 + clap 4, serde / serde_json, miette, thiserror, tempfile -- 014-config-restart-fidelity: Fix planner to emit RestartUnit for config-file-dependent containers diff --git a/Cargo.lock b/Cargo.lock index b452705..ed696d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -157,7 +157,7 @@ checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "core-ops" -version = "2.1.1" +version = "2.2.0" dependencies = [ "clap", "libc", diff --git a/Cargo.toml b/Cargo.toml index 557872f..012e7bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "core-ops" -version = "2.1.1" +version = "2.2.0" edition = "2021" license = "AGPL-3.0-or-later" diff --git a/README.md b/README.md index d582c7b..631b652 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,32 @@ A valid installation should: --- +## Real-World Examples + +Five real-world homelab setups translated into the source-repository +layout. Each is runnable via stateless `--source-repo` invocation +without `core-ops init`. See `examples//README.md` for setup +intent, sources, and known limitations. + +* [`examples/01-caddy-whoami`](examples/01-caddy-whoami) — Caddy reverse proxy fronting whoami (single-Container baseline). +* [`examples/02-nextcloud`](examples/02-nextcloud) — Nextcloud + Postgres + Redis + Traefik (multi-Container, intra-service network, persistent storage). +* [`examples/03-immich`](examples/03-immich) — Immich photo server with ML worker (GPU device, multi-network). +* [`examples/04-traefik-authelia`](examples/04-traefik-authelia) — Traefik + Authelia + protected backend (cross-service ForwardAuth composition). +* [`examples/05-observability`](examples/05-observability) — Prometheus + Grafana + node-exporter + cadvisor (host-scope sidecars). + +Try one without committing to anything: + +```sh +core-ops plan --source-repo examples/01-caddy-whoami --host example +``` + +No prior `core-ops init` required; nothing is written under +`/var/lib/core-ops/`. To switch into long-lived tracking mode after +copying an example to your own setup directory, run +`git init && core-ops init ` once. + +--- + ## Installation (Current Phase) CoreOps is currently distributed as direct binaries for `x86_64` (`amd64`) diff --git a/changes/017-real-world-validation.md b/changes/017-real-world-validation.md new file mode 100644 index 0000000..2124f13 --- /dev/null +++ b/changes/017-real-world-validation.md @@ -0,0 +1,7 @@ +--- +change_id: 017-real-world-validation +release_intent: minor +summary: Add stateless `--source-repo ` flag for plan/apply/explain plus five real-world homelab examples under `examples//`. +scope: cli +release_preparation: false +--- diff --git a/docs/development.md b/docs/development.md index dda3366..ab923f0 100644 --- a/docs/development.md +++ b/docs/development.md @@ -222,10 +222,17 @@ local testing. The repository layout should include: - `hosts//host.yaml` with explicit service selection - `hosts//overrides/` for host-specific drop-ins -Override host selection during development with: +Override host selection during development. Stateless (no prior `init`): ``` -CORE_OPS_HOST= core-ops plan --repo --rev +core-ops plan --source-repo --host +``` + +Or initialize once and let persisted state carry the repo + ref: + +``` +core-ops init +core-ops plan --host ``` When adding or changing behavior, ensure tests and diagnostics preserve diff --git a/docs/follow-ups.md b/docs/follow-ups.md index e1d12e0..accb047 100644 --- a/docs/follow-ups.md +++ b/docs/follow-ups.md @@ -6,26 +6,18 @@ Deferred implementation work and discoveries that should be revisited after the ### Init Command -`core-ops` currently expects every `plan`, `apply` (or `agent`) to be supplied with `repo` and `rev` arguments. At the same time, expected use through operators is to initialize `core-ops` once against a repository and a tracking branch, and keep running `plan`, `apply` etc. against that. - -`repository` and `requested_ref` are already tracked through `core-ops`, so `repo` should be taken from their and `rev` be assumed to be the latest tracking `requested_ref`. - -In their place, a new `init` command shall be introduced in the form of `init [repo] [ref]` that sets up `core-ops` state with the tracking repository and ref. At the same time, remove the `repo` and `rev` arguments from `plan` and `apply`, effectively making the CLI stateful and aligned with the state store. +> Historical note: the `init`-as-explicit-entry-point + remove-`repo`/`rev` +> redesign described here shipped in spec/015. Stateless `--source-repo` +> for plan/apply/explain shipped in spec/017. The remaining open items +> in this section are about argument persistence and recovery UX, +> below. Other arguments currently taken by `plan`, `apply`, and `agent` which should persist are `quadlet-dir`, `systemd-unit-dir`, `state-file`, and `audit-dir`. -Rollbacks would then be validated against `rev`s on the tracking branch, and otherwise refuse action if pointing to a non-reachable commit from the current ref. `rollback-plan-only` (apply option) is completely misplaced and should instead become the `rollback` option for `plan`. There should be an explicit flow to re-initialize using `init`, e.g. using `--reinitialize` that changes the tracking repo and/or ref. -Summary: -- CoreOps already persists tracking repository/ref in controller state -- CLI UX should be aligned with that existing persistence -- init becomes the explicit operator entry point for managing this persisted desired-state configuration - -Read specs 004, 006, and 007 to get the full picture. - ### Reconciliation Cleanup Investigate the contents of status.json and deterministic-state.json to see whether state is duplicated. Consider removing state from status.json if duplicated. @@ -84,17 +76,30 @@ Instead of a warning that the user doesn't have permission to read or operate on For now, go with option 2. +## NFS-backed library mounts in real workloads + +Real homelab workloads (Immich photo library, Nextcloud data +directory, etc.) frequently back container volumes with NFS mounts +declared in `services//systemd/*.mount` units. Spec/017's +`examples/03-immich/` uses a Podman-managed `*.volume` instead because +NFS mount declarations are orthogonal to the validation iteration's +scope. A future iteration could ship a worked example exercising +mount-aware reconciliation against an NFS source. (Spec/017 synthesis +table classification: C.) + ## Source Repository UX -There is no user-facing and no agent-facing documentation for the required layout of the source Git repository. Even the naming is not aligned (`Source repository` vs `workload Git repository`). +> The `Source repository` vs `workload Git repository` naming gap and the +> remaining authoring-tool follow-ups below. Spec/016 + spec/017 closed +> the "rich, documented real-life examples" and "QnA for known +> limitations" bullets — see `examples//` and the synthesis +> table at `specs/017-real-world-validation/spec.md`. There should ideally be: - User facing documentation how to author valid source repositories - Agentic documentation for the same - An installable Agent skill that teaches agents how to deal with source repositories - A core-ops command that creates a source repository with basic layout, README.md and AGENTS.md from scratch (maybe plus the skill) -- Important: Rich, documented real-life examples of actual source repositories with real services, overrides, mounts etc. -- QnA for source repository use cases / known limitations and workarounds These changes should be structured around schema, patterns (conventions), and tooling. diff --git a/examples/01-caddy-whoami/README.md b/examples/01-caddy-whoami/README.md new file mode 100644 index 0000000..7077751 --- /dev/null +++ b/examples/01-caddy-whoami/README.md @@ -0,0 +1,59 @@ +# 01 — Caddy + whoami + +Single-Container baseline: a Caddy reverse proxy fronting a `whoami` +HTTP echo backend over a shared Quadlet network. Default config-root +(`/etc/caddy/`). Shape coverage: one service, one Quadlet `*.container`, +plus auxiliary Quadlet `*.network` and `*.volume` units. + +## Pressure axis + +Single-Container baseline. Validates that the spec/016 layout supports +a minimal real-world reverse-proxy + backend pattern with persistent +state (Caddy automatically issues and stages TLS certificates into the +`caddy-data` volume). + +## Sources + +These references shaped the Quadlet equivalents. Upstream YAML/compose +blocks were not copied verbatim (research.md D5 license hygiene). + +- Caddy quick-start: +- Caddy Docker official image: +- traefik/whoami container README: + +## Service-by-service intent + +| Service | Image | Purpose | Notes | +|---------|-------|---------|-------| +| `caddy` | `docker.io/library/caddy:2` | TLS terminator + reverse proxy | Mounts `/etc/caddy/Caddyfile` (default config-root); state in `caddy-data` volume | +| `whoami` | `docker.io/traefik/whoami` | HTTP echo backend | Joined to the same `caddy` network | + +## Try it + +> CLI output below is illustrative and not snapshot-tested. + +```sh +core-ops plan --source-repo examples/01-caddy-whoami --host example +``` + +Expected: exit 0; plan lists the Caddy container, the whoami container, +the shared network, and the two Caddy volumes. No prior `core-ops init` +required; nothing written under `/var/lib/core-ops/`. + +## Known limitations + +None encountered during translation — this example is the spec/016 +layout's narrowest shape and exercises no friction beyond the parser +contract. + +## Scaffold for your own setup + +```sh +cp -r examples/01-caddy-whoami ~/my-caddy +# Edit hosts/example/host.yaml → rename `example` to your host id. +# Edit services/caddy/config/Caddyfile → set your real domain + backend. +core-ops plan --source-repo ~/my-caddy --host +``` + +Once happy, `git init && core-ops init ~/my-caddy main` to switch into +long-lived tracking mode. diff --git a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/quadlet/webhook-receiver.container.d/10-resources.conf b/examples/01-caddy-whoami/hosts/example/caddy/quadlet/caddy.container.d/10-resources.conf similarity index 100% rename from specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/quadlet/webhook-receiver.container.d/10-resources.conf rename to examples/01-caddy-whoami/hosts/example/caddy/quadlet/caddy.container.d/10-resources.conf diff --git a/examples/01-caddy-whoami/hosts/example/host.yaml b/examples/01-caddy-whoami/hosts/example/host.yaml new file mode 100644 index 0000000..6476b6a --- /dev/null +++ b/examples/01-caddy-whoami/hosts/example/host.yaml @@ -0,0 +1,4 @@ +host: example +services: + - caddy + - whoami diff --git a/examples/01-caddy-whoami/services/caddy/config/Caddyfile b/examples/01-caddy-whoami/services/caddy/config/Caddyfile new file mode 100644 index 0000000..7127918 --- /dev/null +++ b/examples/01-caddy-whoami/services/caddy/config/Caddyfile @@ -0,0 +1,7 @@ +# Illustrative Caddyfile; replace example.com with the operator's +# domain before applying. RFC 2606 reserved domain used here so the +# example is safe to commit. + +whoami.example.com { + reverse_proxy whoami:80 +} diff --git a/examples/01-caddy-whoami/services/caddy/quadlet/caddy-config.volume b/examples/01-caddy-whoami/services/caddy/quadlet/caddy-config.volume new file mode 100644 index 0000000..910d0ce --- /dev/null +++ b/examples/01-caddy-whoami/services/caddy/quadlet/caddy-config.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Caddy autosaved JSON config + +[Volume] +VolumeName=caddy-config diff --git a/examples/01-caddy-whoami/services/caddy/quadlet/caddy-data.volume b/examples/01-caddy-whoami/services/caddy/quadlet/caddy-data.volume new file mode 100644 index 0000000..0fc872c --- /dev/null +++ b/examples/01-caddy-whoami/services/caddy/quadlet/caddy-data.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Persistent state for Caddy (certs, OCSP staples) + +[Volume] +VolumeName=caddy-data diff --git a/examples/01-caddy-whoami/services/caddy/quadlet/caddy.container b/examples/01-caddy-whoami/services/caddy/quadlet/caddy.container new file mode 100644 index 0000000..2397da5 --- /dev/null +++ b/examples/01-caddy-whoami/services/caddy/quadlet/caddy.container @@ -0,0 +1,21 @@ +[Unit] +Description=Caddy reverse proxy fronting whoami +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/library/caddy:2 +PublishPort=80:80 +PublishPort=443:443 +Volume=/etc/caddy:/etc/caddy:ro,Z +Volume=caddy-data.volume:/data:Z +Volume=caddy-config.volume:/config:Z +Network=caddy.network +Exec=caddy run --config /etc/caddy/Caddyfile --adapter caddyfile + +[Service] +Restart=always +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/01-caddy-whoami/services/caddy/quadlet/caddy.network b/examples/01-caddy-whoami/services/caddy/quadlet/caddy.network new file mode 100644 index 0000000..1c4b7ae --- /dev/null +++ b/examples/01-caddy-whoami/services/caddy/quadlet/caddy.network @@ -0,0 +1,6 @@ +[Unit] +Description=Shared network for caddy and whoami + +[Network] +NetworkName=caddy +Subnet=192.0.2.0/24 diff --git a/specs/016-source-repository-layout/examples/01-minimal-single-service/services/whoami/quadlet/whoami.container b/examples/01-caddy-whoami/services/whoami/quadlet/whoami.container similarity index 51% rename from specs/016-source-repository-layout/examples/01-minimal-single-service/services/whoami/quadlet/whoami.container rename to examples/01-caddy-whoami/services/whoami/quadlet/whoami.container index 527af07..d19d2d8 100644 --- a/specs/016-source-repository-layout/examples/01-minimal-single-service/services/whoami/quadlet/whoami.container +++ b/examples/01-caddy-whoami/services/whoami/quadlet/whoami.container @@ -1,17 +1,16 @@ [Unit] -Description=whoami — minimal example service for spec 016 layout v1 +Description=whoami HTTP echo backend After=network-online.target Wants=network-online.target [Container] Image=docker.io/traefik/whoami:latest -PublishPort=8000:80 -Volume=/etc/whoami/whoami.toml:/etc/whoami/whoami.toml:ro,Z -Exec=--port 80 +ContainerName=whoami +Network=caddy.network [Service] Restart=always -TimeoutStartSec=900 +TimeoutStartSec=180 [Install] WantedBy=multi-user.target default.target diff --git a/examples/02-nextcloud/README.md b/examples/02-nextcloud/README.md new file mode 100644 index 0000000..b6494d0 --- /dev/null +++ b/examples/02-nextcloud/README.md @@ -0,0 +1,76 @@ +# 02 — Nextcloud (community multi-container) + +Multi-Container homelab Nextcloud stack: Nextcloud + Postgres + Redis + +Traefik edge proxy. Intra-service Quadlet network, persistent storage +volumes, host-side TLS port drop-in. The `traefik-edge` service id +diverges from its `config-root: traefik`, exercising the +`service.yaml` redirection path. + +## Pressure axis + +Multi-Container, intra-service network, persistent storage. Validates +that the spec/016 layout supports a real-world four-container stack +where each container is its own service directory and the headlining +service (`nextcloud`) depends on its peers via Quadlet `Requires=`. + +## Sources + +These references shaped the Quadlet equivalents. Upstream YAML/compose +blocks were not copied verbatim (research.md D5 license hygiene). + +- Nextcloud official Docker image: +- Nextcloud community Docker examples (NOT the All-In-One container, + which manages its own sub-containers via the Docker socket and is + incompatible with external orchestration): + +- Postgres official image: +- Redis official image: +- Traefik v3 docs: + +## Service-by-service intent + +| Service | Image | Purpose | Notes | +|---------|-------|---------|-------| +| `nextcloud` | `docker.io/library/nextcloud:30` | Headlining Nextcloud app server | Mounts `nextcloud-data` volume; declares `Requires=` on db + redis | +| `nextcloud-db` | `docker.io/library/postgres:16` | Postgres backing store | Persistent `nextcloud-db-data` volume; password sourced via Podman secret | +| `nextcloud-redis` | `docker.io/library/redis:7-alpine` | In-memory cache | Save disabled (cache only) | +| `traefik-edge` | `docker.io/library/traefik:v3.1` | Edge reverse proxy | Service id `traefik-edge`, `config-root: traefik` (config-root divergence) | + +## Try it + +> CLI output below is illustrative and not snapshot-tested. + +```sh +core-ops plan --source-repo examples/02-nextcloud --host example +``` + +Expected: exit 0; plan lists 4 containers, 1 network, 2 volumes, 1 +config file (`/etc/traefik/traefik.yaml` — note the `traefik-edge` → +`traefik` config-root rewrite), and the host-side `traefik-edge.container.d/10-tls.conf` +drop-in adding the TLS port. + +## Known limitations + +- **Secrets are referenced, not committed**: the example declares a + Podman secret `nextcloud-db-password` but does not provide its + contents. Operators must `podman secret create nextcloud-db-password + /path/to/secret` on the host before applying. Secret bootstrap + belongs to the host, not the source-repo (FR-009: no real values). +- **Trusted domain placeholder**: `NEXTCLOUD_TRUSTED_DOMAINS` is set to + `cloud.example.com` (RFC 2606). Replace with the operator's real + domain in their own scaffold copy before applying. +- **Initial Nextcloud setup is interactive**: the first `apply` + installs files; the operator still needs to complete the install + wizard at `http:///` to create the admin account. This is a + Nextcloud product behavior, not a layout limitation. (Synthesis + table classification: `B` — workaround documented here.) + +## Scaffold for your own setup + +```sh +cp -r examples/02-nextcloud ~/my-nextcloud +# Edit hosts/example/host.yaml → rename `example` to your host id. +# Edit services/traefik-edge/config/traefik.yaml → set your domain. +# `podman secret create nextcloud-db-password ...` on the target host. +core-ops plan --source-repo ~/my-nextcloud --host +``` diff --git a/examples/02-nextcloud/hosts/example/host.yaml b/examples/02-nextcloud/hosts/example/host.yaml new file mode 100644 index 0000000..e13122b --- /dev/null +++ b/examples/02-nextcloud/hosts/example/host.yaml @@ -0,0 +1,6 @@ +host: example +services: + - nextcloud + - nextcloud-db + - nextcloud-redis + - traefik-edge diff --git a/examples/02-nextcloud/hosts/example/traefik-edge/quadlet/traefik-edge.container.d/10-tls.conf b/examples/02-nextcloud/hosts/example/traefik-edge/quadlet/traefik-edge.container.d/10-tls.conf new file mode 100644 index 0000000..d5615d7 --- /dev/null +++ b/examples/02-nextcloud/hosts/example/traefik-edge/quadlet/traefik-edge.container.d/10-tls.conf @@ -0,0 +1,2 @@ +[Container] +PublishPort=443:443 diff --git a/examples/02-nextcloud/services/nextcloud-db/quadlet/nextcloud-db-data.volume b/examples/02-nextcloud/services/nextcloud-db/quadlet/nextcloud-db-data.volume new file mode 100644 index 0000000..9c624e8 --- /dev/null +++ b/examples/02-nextcloud/services/nextcloud-db/quadlet/nextcloud-db-data.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Nextcloud Postgres data volume + +[Volume] +VolumeName=nextcloud-db-data diff --git a/examples/02-nextcloud/services/nextcloud-db/quadlet/nextcloud-db.container b/examples/02-nextcloud/services/nextcloud-db/quadlet/nextcloud-db.container new file mode 100644 index 0000000..166e1b2 --- /dev/null +++ b/examples/02-nextcloud/services/nextcloud-db/quadlet/nextcloud-db.container @@ -0,0 +1,21 @@ +[Unit] +Description=Nextcloud Postgres database +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/library/postgres:16 +ContainerName=nextcloud-db +Network=nextcloud.network +Volume=nextcloud-db-data.volume:/var/lib/postgresql/data:Z +Environment=POSTGRES_DB=nextcloud +Environment=POSTGRES_USER=nextcloud +Environment=POSTGRES_PASSWORD_FILE=/run/secrets/nextcloud-db-password +Secret=nextcloud-db-password,target=/run/secrets/nextcloud-db-password + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/02-nextcloud/services/nextcloud-redis/quadlet/nextcloud-redis.container b/examples/02-nextcloud/services/nextcloud-redis/quadlet/nextcloud-redis.container new file mode 100644 index 0000000..a7696b5 --- /dev/null +++ b/examples/02-nextcloud/services/nextcloud-redis/quadlet/nextcloud-redis.container @@ -0,0 +1,17 @@ +[Unit] +Description=Nextcloud Redis cache +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/library/redis:7-alpine +ContainerName=nextcloud-redis +Network=nextcloud.network +Exec=redis-server --save "" --appendonly no + +[Service] +Restart=always +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud-data.volume b/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud-data.volume new file mode 100644 index 0000000..ff83cbc --- /dev/null +++ b/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud-data.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Nextcloud /var/www/html persistent storage + +[Volume] +VolumeName=nextcloud-data diff --git a/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud.container b/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud.container new file mode 100644 index 0000000..8c6f51b --- /dev/null +++ b/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud.container @@ -0,0 +1,26 @@ +[Unit] +Description=Nextcloud (php-fpm + apache image) +After=network-online.target nextcloud-db.service nextcloud-redis.service +Wants=network-online.target +Requires=nextcloud-db.service nextcloud-redis.service + +[Container] +Image=docker.io/library/nextcloud:30 +ContainerName=nextcloud +Network=nextcloud.network +Volume=nextcloud-data.volume:/var/www/html:Z +Environment=NEXTCLOUD_TRUSTED_DOMAINS=cloud.example.com +Environment=POSTGRES_HOST=nextcloud-db +Environment=POSTGRES_DB=nextcloud +Environment=POSTGRES_USER=nextcloud +Environment=POSTGRES_PASSWORD_FILE=/run/secrets/nextcloud-db-password +Environment=REDIS_HOST=nextcloud-redis +Environment=REDIS_HOST_PORT=6379 +Secret=nextcloud-db-password,target=/run/secrets/nextcloud-db-password + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud.network b/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud.network new file mode 100644 index 0000000..fe6499b --- /dev/null +++ b/examples/02-nextcloud/services/nextcloud/quadlet/nextcloud.network @@ -0,0 +1,6 @@ +[Unit] +Description=Intra-service network for the Nextcloud stack + +[Network] +NetworkName=nextcloud +Subnet=192.0.2.0/24 diff --git a/examples/02-nextcloud/services/traefik-edge/config/traefik.yaml b/examples/02-nextcloud/services/traefik-edge/config/traefik.yaml new file mode 100644 index 0000000..4014d56 --- /dev/null +++ b/examples/02-nextcloud/services/traefik-edge/config/traefik.yaml @@ -0,0 +1,17 @@ +# Illustrative Traefik static configuration. Service id is +# `traefik-edge` but `service.yaml` declares `config-root: traefik` so +# this file deploys to /etc/traefik/traefik.yaml. + +entryPoints: + web: + address: ":80" + websecure: + address: ":443" + +providers: + docker: + exposedByDefault: false + network: nextcloud + +api: + dashboard: false diff --git a/examples/02-nextcloud/services/traefik-edge/quadlet/traefik-edge.container b/examples/02-nextcloud/services/traefik-edge/quadlet/traefik-edge.container new file mode 100644 index 0000000..7ffddf2 --- /dev/null +++ b/examples/02-nextcloud/services/traefik-edge/quadlet/traefik-edge.container @@ -0,0 +1,19 @@ +[Unit] +Description=Traefik edge proxy for the Nextcloud stack +After=network-online.target nextcloud.service +Wants=network-online.target + +[Container] +Image=docker.io/library/traefik:v3.1 +ContainerName=traefik-edge +PublishPort=80:80 +Network=nextcloud.network +Volume=/etc/traefik:/etc/traefik:ro,Z +Exec=--configfile=/etc/traefik/traefik.yaml + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/service.yaml b/examples/02-nextcloud/services/traefik-edge/service.yaml similarity index 100% rename from specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/service.yaml rename to examples/02-nextcloud/services/traefik-edge/service.yaml diff --git a/examples/03-immich/README.md b/examples/03-immich/README.md new file mode 100644 index 0000000..3316c0f --- /dev/null +++ b/examples/03-immich/README.md @@ -0,0 +1,77 @@ +# 03 — Immich photo server with ML worker + +Immich photo/video library: server + Postgres (pgvecto.rs) + Redis + +ML inference worker + Traefik edge proxy. Exercises GPU device +passthrough (host overlay drop-in on `immich-ml`) and multi-network +membership (`immich-server` joins both an internal and a public +network so the edge proxy can reach it). + +## Pressure axis + +GPU device passthrough, multi-network membership, ML worker. Validates +that the spec/016 layout supports a real workload where one service +joins multiple Quadlet networks and one service receives a host-side +device drop-in. + +## Sources + +These references shaped the Quadlet equivalents. Upstream YAML/compose +blocks were not copied verbatim (research.md D5 license hygiene). + +- Immich docker-compose example: + +- Immich Postgres image (pgvecto.rs flavor): + +- Podman CDI for NVIDIA GPUs: + +- Intel/AMD VAAPI on `/dev/dri`: + + +## Service-by-service intent + +| Service | Image | Purpose | Notes | +|---------|-------|---------|-------| +| `immich-server` | `ghcr.io/immich-app/immich-server:release` | Headlining app server | Joins both `immich-internal` and `immich-public` networks | +| `immich-database` | `ghcr.io/immich-app/postgres:16` | Postgres + pgvecto.rs | Internal network only | +| `immich-redis` | `docker.io/library/redis:7-alpine` | In-memory cache | Internal network only | +| `immich-ml` | `ghcr.io/immich-app/immich-machine-learning:release` | ML inference worker | Receives `AddDevice=/dev/dri` via host overlay drop-in | +| `traefik-edge` | `docker.io/library/traefik:v3.1` | Edge reverse proxy | Public network only; reaches `immich-server` via shared network | + +## Try it + +> CLI output below is illustrative and not snapshot-tested. + +```sh +core-ops plan --source-repo examples/03-immich --host example +``` + +Expected: exit 0; plan lists 5 containers, 2 networks, 3 volumes, and +the host-side `immich-ml.container.d/20-gpu.conf` drop-in adding GPU +device passthrough. + +## Known limitations + +- **GPU shape is host-specific**: the example ships an Intel/AMD VAAPI + drop-in (`AddDevice=/dev/dri:/dev/dri`). For NVIDIA, the operator + must rewrite to CDI (`AddDevice=nvidia.com/gpu=all` or + `PodmanArgs=--device nvidia.com/gpu=all`) and ensure the + nvidia-container-toolkit + CDI spec is installed on the host. + Synthesis table classification: `B` — workaround documented here; + no layout change is required. +- **Secrets are referenced, not committed**: `immich-db-password` is a + Podman secret the operator must create on the host + (`podman secret create immich-db-password ...`) before applying. +- **Library mount is in-host**: the example uses a Quadlet `*.volume` + for uploads (`immich-upload`). Real homelab deployments often back + this with NFS. NFS mount declarations are out-of-scope for this + example; see the synthesis table for tracking. + +## Scaffold for your own setup + +```sh +cp -r examples/03-immich ~/my-immich +# Edit hosts/example/host.yaml → rename `example` to your host id. +# If on NVIDIA, edit the GPU drop-in; if on Intel/AMD VAAPI, leave it. +# Create Podman secret on the target host before applying. +core-ops plan --source-repo ~/my-immich --host +``` diff --git a/examples/03-immich/hosts/example/host.yaml b/examples/03-immich/hosts/example/host.yaml new file mode 100644 index 0000000..9880c71 --- /dev/null +++ b/examples/03-immich/hosts/example/host.yaml @@ -0,0 +1,7 @@ +host: example +services: + - immich-server + - immich-database + - immich-redis + - immich-ml + - traefik-edge diff --git a/examples/03-immich/hosts/example/immich-ml/quadlet/immich-ml.container.d/20-gpu.conf b/examples/03-immich/hosts/example/immich-ml/quadlet/immich-ml.container.d/20-gpu.conf new file mode 100644 index 0000000..29e0e5a --- /dev/null +++ b/examples/03-immich/hosts/example/immich-ml/quadlet/immich-ml.container.d/20-gpu.conf @@ -0,0 +1,6 @@ +[Container] +# Hardware accel via Intel/AMD VAAPI on /dev/dri. For NVIDIA, replace +# with `AddDevice=nvidia.com/gpu=all` (CDI) or +# `PodmanArgs=--device nvidia.com/gpu=all`. Out of scope here; choose +# the right shape for the host's accelerator. +AddDevice=/dev/dri:/dev/dri diff --git a/examples/03-immich/services/immich-database/quadlet/immich-database.container b/examples/03-immich/services/immich-database/quadlet/immich-database.container new file mode 100644 index 0000000..8afbead --- /dev/null +++ b/examples/03-immich/services/immich-database/quadlet/immich-database.container @@ -0,0 +1,21 @@ +[Unit] +Description=Immich Postgres + pgvecto.rs database +After=network-online.target +Wants=network-online.target + +[Container] +Image=ghcr.io/immich-app/postgres:16 +ContainerName=immich-database +Network=immich-internal.network +Volume=immich-db-data.volume:/var/lib/postgresql/data:Z +Environment=POSTGRES_DB=immich +Environment=POSTGRES_USER=immich +Environment=POSTGRES_PASSWORD_FILE=/run/secrets/immich-db-password +Secret=immich-db-password,target=/run/secrets/immich-db-password + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/03-immich/services/immich-database/quadlet/immich-db-data.volume b/examples/03-immich/services/immich-database/quadlet/immich-db-data.volume new file mode 100644 index 0000000..0badf66 --- /dev/null +++ b/examples/03-immich/services/immich-database/quadlet/immich-db-data.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Immich database data volume + +[Volume] +VolumeName=immich-db-data diff --git a/examples/03-immich/services/immich-ml/quadlet/immich-ml-cache.volume b/examples/03-immich/services/immich-ml/quadlet/immich-ml-cache.volume new file mode 100644 index 0000000..b9df4c3 --- /dev/null +++ b/examples/03-immich/services/immich-ml/quadlet/immich-ml-cache.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Immich ML model cache + +[Volume] +VolumeName=immich-ml-cache diff --git a/examples/03-immich/services/immich-ml/quadlet/immich-ml.container b/examples/03-immich/services/immich-ml/quadlet/immich-ml.container new file mode 100644 index 0000000..4e88c6e --- /dev/null +++ b/examples/03-immich/services/immich-ml/quadlet/immich-ml.container @@ -0,0 +1,17 @@ +[Unit] +Description=Immich ML inference worker +After=network-online.target +Wants=network-online.target + +[Container] +Image=ghcr.io/immich-app/immich-machine-learning:release +ContainerName=immich-ml +Network=immich-internal.network +Volume=immich-ml-cache.volume:/cache:Z + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/03-immich/services/immich-redis/quadlet/immich-redis.container b/examples/03-immich/services/immich-redis/quadlet/immich-redis.container new file mode 100644 index 0000000..c0c6dc8 --- /dev/null +++ b/examples/03-immich/services/immich-redis/quadlet/immich-redis.container @@ -0,0 +1,17 @@ +[Unit] +Description=Immich Redis cache +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/library/redis:7-alpine +ContainerName=immich-redis +Network=immich-internal.network +Exec=redis-server --save "" --appendonly no + +[Service] +Restart=always +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/03-immich/services/immich-server/quadlet/immich-internal.network b/examples/03-immich/services/immich-server/quadlet/immich-internal.network new file mode 100644 index 0000000..b4eea43 --- /dev/null +++ b/examples/03-immich/services/immich-server/quadlet/immich-internal.network @@ -0,0 +1,6 @@ +[Unit] +Description=Internal network: server + db + redis + ML + +[Network] +NetworkName=immich-internal +Subnet=192.0.2.0/24 diff --git a/examples/03-immich/services/immich-server/quadlet/immich-public.network b/examples/03-immich/services/immich-server/quadlet/immich-public.network new file mode 100644 index 0000000..1cf9643 --- /dev/null +++ b/examples/03-immich/services/immich-server/quadlet/immich-public.network @@ -0,0 +1,6 @@ +[Unit] +Description=Public-facing network shared with the edge proxy + +[Network] +NetworkName=immich-public +Subnet=198.51.100.0/24 diff --git a/examples/03-immich/services/immich-server/quadlet/immich-server.container b/examples/03-immich/services/immich-server/quadlet/immich-server.container new file mode 100644 index 0000000..7dc128b --- /dev/null +++ b/examples/03-immich/services/immich-server/quadlet/immich-server.container @@ -0,0 +1,25 @@ +[Unit] +Description=Immich photo/video server +After=network-online.target immich-database.service immich-redis.service +Wants=network-online.target +Requires=immich-database.service immich-redis.service + +[Container] +Image=ghcr.io/immich-app/immich-server:release +ContainerName=immich-server +Network=immich-internal.network +Network=immich-public.network +Volume=immich-upload.volume:/usr/src/app/upload:Z +Environment=DB_HOSTNAME=immich-database +Environment=DB_PORT=5432 +Environment=DB_USERNAME=immich +Environment=DB_DATABASE_NAME=immich +Environment=REDIS_HOSTNAME=immich-redis +Environment=IMMICH_MACHINE_LEARNING_URL=http://immich-ml:3003 + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/03-immich/services/immich-server/quadlet/immich-upload.volume b/examples/03-immich/services/immich-server/quadlet/immich-upload.volume new file mode 100644 index 0000000..842c986 --- /dev/null +++ b/examples/03-immich/services/immich-server/quadlet/immich-upload.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Immich upload library + +[Volume] +VolumeName=immich-upload diff --git a/examples/03-immich/services/traefik-edge/quadlet/traefik-edge.container b/examples/03-immich/services/traefik-edge/quadlet/traefik-edge.container new file mode 100644 index 0000000..b09ec71 --- /dev/null +++ b/examples/03-immich/services/traefik-edge/quadlet/traefik-edge.container @@ -0,0 +1,19 @@ +[Unit] +Description=Traefik edge proxy for Immich +After=network-online.target immich-server.service +Wants=network-online.target + +[Container] +Image=docker.io/library/traefik:v3.1 +ContainerName=traefik-edge +PublishPort=80:80 +PublishPort=443:443 +Network=immich-public.network +PodmanArgs=--label=traefik.enable=true + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/04-traefik-authelia/README.md b/examples/04-traefik-authelia/README.md new file mode 100644 index 0000000..f031b76 --- /dev/null +++ b/examples/04-traefik-authelia/README.md @@ -0,0 +1,74 @@ +# 04 — Traefik + Authelia + protected backend + +Cross-service ForwardAuth composition: Traefik fronts a `whoami` backend +that is protected by Authelia via Traefik's `forwardAuth` middleware. +The middleware wiring lives in Traefik's static config; the host-side +drop-in on `whoami` selects which router/middleware to apply, so the +auth policy is layered on at host time rather than baked into the +service's base unit. + +## Pressure axis + +Cross-service ForwardAuth composition. Validates that the spec/016 +layout supports a real-world auth pattern where one service (Authelia) +provides a side-effect to another service (whoami) via a third +service's labels (Traefik). + +## Sources + +These references shaped the Quadlet equivalents. Upstream YAML/compose +blocks were not copied verbatim (research.md D5 license hygiene). + +- Authelia Traefik integration: + +- Traefik forwardAuth middleware: + +- traefik/whoami container README: + + +## Service-by-service intent + +| Service | Image | Purpose | Notes | +|---------|-------|---------|-------| +| `traefik` | `docker.io/library/traefik:v3.1` | Edge reverse proxy | Static config declares the `authelia` ForwardAuth middleware | +| `authelia` | `docker.io/authelia/authelia:4` | Identity provider + ForwardAuth target | Default config-root; reachable on `auth.network` at `http://authelia:9091` | +| `whoami` | `docker.io/traefik/whoami:latest` | Generic protected backend | Base unit is plain; host overlay adds Traefik labels for the auth router | + +## Try it + +> CLI output below is illustrative and not snapshot-tested. + +```sh +core-ops plan --source-repo examples/04-traefik-authelia --host example +``` + +Expected: exit 0; plan lists 3 containers, 1 network, 2 config files +(`/etc/traefik/traefik.yaml` + `/etc/authelia/configuration.yml`), and +the host-side `whoami.container.d/10-forwardauth.conf` drop-in adding +Traefik labels for the protected router. + +## Known limitations + +- **Users database stub**: Authelia expects a `users_database.yml` next + to its main config. The example does not commit one (FR-009 — no + fake or real credentials). Operators must populate + `/etc/authelia/users_database.yml` on the host before applying. + Synthesis table classification: `B` — workaround documented here. +- **No TLS certificate provider**: `entryPoints.websecure` is declared + but no certResolver is wired up. Real deployments need ACME (DNS-01 + or HTTP-01) configured against the operator's domain. Out of scope + for this example. +- **Authelia secrets are external**: JWT secret, session secret, and + storage encryption key all need to be sourced from a secrets backend + (`AUTHELIA_*_FILE` env vars are the standard pattern). Not committed + here. + +## Scaffold for your own setup + +```sh +cp -r examples/04-traefik-authelia ~/my-auth +# Edit hosts/example/host.yaml → rename `example` to your host id. +# Edit services/authelia/config/configuration.yml → set domain, ACL. +# Populate /etc/authelia/users_database.yml on the host before applying. +core-ops plan --source-repo ~/my-auth --host +``` diff --git a/examples/04-traefik-authelia/hosts/example/host.yaml b/examples/04-traefik-authelia/hosts/example/host.yaml new file mode 100644 index 0000000..8528558 --- /dev/null +++ b/examples/04-traefik-authelia/hosts/example/host.yaml @@ -0,0 +1,5 @@ +host: example +services: + - traefik + - authelia + - whoami diff --git a/examples/04-traefik-authelia/hosts/example/whoami/quadlet/whoami.container.d/10-forwardauth.conf b/examples/04-traefik-authelia/hosts/example/whoami/quadlet/whoami.container.d/10-forwardauth.conf new file mode 100644 index 0000000..2c34403 --- /dev/null +++ b/examples/04-traefik-authelia/hosts/example/whoami/quadlet/whoami.container.d/10-forwardauth.conf @@ -0,0 +1,9 @@ +[Container] +# Compose ForwardAuth at host time via Traefik labels. The base service +# unit stays generic; the protection rule is layered on by the host +# that wants it. Other hosts can re-use `whoami` without auth. +PodmanArgs=--label=traefik.enable=true +PodmanArgs=--label=traefik.http.routers.whoami.rule=Host(`whoami.example.com`) +PodmanArgs=--label=traefik.http.routers.whoami.entrypoints=websecure +PodmanArgs=--label=traefik.http.routers.whoami.middlewares=authelia@file +PodmanArgs=--label=traefik.http.services.whoami.loadbalancer.server.port=80 diff --git a/examples/04-traefik-authelia/services/authelia/config/configuration.yml b/examples/04-traefik-authelia/services/authelia/config/configuration.yml new file mode 100644 index 0000000..d040e57 --- /dev/null +++ b/examples/04-traefik-authelia/services/authelia/config/configuration.yml @@ -0,0 +1,32 @@ +# Authelia configuration. Default config-root = service id = authelia +# → file path /etc/authelia/configuration.yml on the host. + +theme: light +default_2fa_method: totp + +server: + address: tcp://0.0.0.0:9091 + +session: + name: authelia_session + domain: example.com + expiration: 1h + inactivity: 5m + +authentication_backend: + file: + path: /config/users_database.yml + +access_control: + default_policy: deny + rules: + - domain: whoami.example.com + policy: one_factor + +storage: + local: + path: /config/db.sqlite3 + +notifier: + filesystem: + filename: /config/notification.txt diff --git a/examples/04-traefik-authelia/services/authelia/quadlet/authelia.container b/examples/04-traefik-authelia/services/authelia/quadlet/authelia.container new file mode 100644 index 0000000..58e2579 --- /dev/null +++ b/examples/04-traefik-authelia/services/authelia/quadlet/authelia.container @@ -0,0 +1,17 @@ +[Unit] +Description=Authelia identity provider + ForwardAuth target +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/authelia/authelia:4 +ContainerName=authelia +Network=auth.network +Volume=/etc/authelia:/config:Z + +[Service] +Restart=always +TimeoutStartSec=900 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/04-traefik-authelia/services/traefik/config/traefik.yaml b/examples/04-traefik-authelia/services/traefik/config/traefik.yaml new file mode 100644 index 0000000..07779d8 --- /dev/null +++ b/examples/04-traefik-authelia/services/traefik/config/traefik.yaml @@ -0,0 +1,27 @@ +# Traefik static config. Service id and config-root both `traefik`. + +entryPoints: + web: + address: ":80" + websecure: + address: ":443" + +providers: + docker: + exposedByDefault: false + network: auth + +http: + middlewares: + authelia: + forwardAuth: + address: http://authelia:9091/api/authz/forward-auth + trustForwardHeader: true + authResponseHeaders: + - Remote-User + - Remote-Groups + - Remote-Name + - Remote-Email + +api: + dashboard: false diff --git a/examples/04-traefik-authelia/services/traefik/quadlet/auth.network b/examples/04-traefik-authelia/services/traefik/quadlet/auth.network new file mode 100644 index 0000000..2549b98 --- /dev/null +++ b/examples/04-traefik-authelia/services/traefik/quadlet/auth.network @@ -0,0 +1,6 @@ +[Unit] +Description=Cross-service network for Traefik + Authelia + protected backends + +[Network] +NetworkName=auth +Subnet=192.0.2.0/24 diff --git a/specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/quadlet/traefik-dnschallenge.container b/examples/04-traefik-authelia/services/traefik/quadlet/traefik.container similarity index 68% rename from specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/quadlet/traefik-dnschallenge.container rename to examples/04-traefik-authelia/services/traefik/quadlet/traefik.container index 2c7f997..674ef03 100644 --- a/specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/quadlet/traefik-dnschallenge.container +++ b/examples/04-traefik-authelia/services/traefik/quadlet/traefik.container @@ -1,13 +1,16 @@ [Unit] -Description=Traefik with DNS-01 challenge — variant config-root example +Description=Traefik edge proxy with Authelia ForwardAuth After=network-online.target Wants=network-online.target [Container] Image=docker.io/library/traefik:v3.1 +ContainerName=traefik PublishPort=80:80 PublishPort=443:443 +Network=auth.network Volume=/etc/traefik:/etc/traefik:ro,Z +Volume=/run/podman/podman.sock:/var/run/docker.sock:ro Exec=--configfile=/etc/traefik/traefik.yaml [Service] diff --git a/examples/04-traefik-authelia/services/whoami/quadlet/whoami.container b/examples/04-traefik-authelia/services/whoami/quadlet/whoami.container new file mode 100644 index 0000000..a9e94fd --- /dev/null +++ b/examples/04-traefik-authelia/services/whoami/quadlet/whoami.container @@ -0,0 +1,16 @@ +[Unit] +Description=whoami HTTP echo backend (will be ForwardAuth-protected via host overlay) +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/traefik/whoami:latest +ContainerName=whoami +Network=auth.network + +[Service] +Restart=always +TimeoutStartSec=180 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/05-observability/README.md b/examples/05-observability/README.md new file mode 100644 index 0000000..24e0382 --- /dev/null +++ b/examples/05-observability/README.md @@ -0,0 +1,77 @@ +# 05 — Observability stack + +Prometheus + Grafana + node-exporter + cadvisor with host-scope +sidecars (`/proc`, `/sys`, `/`-rootfs bind mounts on the metric +exporters). Demonstrates the "scrape-config templating" friction (see +known limitations). + +## Pressure axis + +Host-scope sidecars + scrape-config templating. Validates that the +spec/016 layout supports privileged containers with bind mounts that +escape the container filesystem (`Volume=/proc:/host/proc:ro,rslave` +etc.) and surfaces the per-host scrape-config templating gap. + +## Sources + +These references shaped the Quadlet equivalents. Upstream YAML/compose +blocks were not copied verbatim (research.md D5 license hygiene). + +- Prometheus docker-compose example: + +- node_exporter recommended bind mounts: + +- cadvisor recommended bind mounts: + +- Grafana provisioning docs: + + +## Service-by-service intent + +| Service | Image | Purpose | Notes | +|---------|-------|---------|-------| +| `prometheus` | `docker.io/prom/prometheus:v2` | Metrics scraper | TSDB on `prometheus-data` volume; static targets in `prometheus.yml` | +| `grafana` | `docker.io/grafana/grafana:11` | Dashboards | Persistent state on `grafana-data` volume | +| `node-exporter` | `quay.io/prometheus/node-exporter:latest` | Host metrics | Bind-mounts `/proc`, `/sys`, `/` (read-only, rslave) | +| `cadvisor` | `gcr.io/cadvisor/cadvisor:v0.49.1` | Container metrics | Bind-mounts `/`, `/sys`, `/var/run`, `/var/lib/containers`; runs `--privileged` | + +## Try it + +> CLI output below is illustrative and not snapshot-tested. + +```sh +core-ops plan --source-repo examples/05-observability --host example +``` + +Expected: exit 0; plan lists 4 containers, 1 network, 2 volumes, +2 config files (`/etc/prometheus/prometheus.yml`, +`/etc/grafana/grafana.ini`). The host overlay replaces +`prometheus.yml` with a host-tailored target list. + +## Known limitations + +- **Scrape-config templating gap**: Prometheus's `prometheus.yml` + needs the list of scrape targets baked into a static file. The + spec/016 layout has no templating layer that can compute "for each + host, list its scrape targets" automatically — every host needs to + ship its own `prometheus.yml` whole-file replacement under + `hosts//prometheus/config/prometheus.yml`. This example + demonstrates the workaround. Synthesis table classification: `B` — + workaround documented; no layout change required for this slice. + Future work could escalate to a templating-layer spec if multiple + workloads need this. +- **cadvisor requires `--privileged`**: cadvisor reads cgroups via + `/sys/fs/cgroup` and needs broader capabilities than the default + rootless Podman profile permits. The example uses `PodmanArgs=--privileged` + which is the documented upstream pattern. Operators on hardened + hosts may need to substitute fine-grained capabilities. + +## Scaffold for your own setup + +```sh +cp -r examples/05-observability ~/my-observability +# Edit hosts/example/host.yaml → rename `example` to your host id. +# Edit hosts//prometheus/config/prometheus.yml → list your +# host-specific scrape targets. +core-ops plan --source-repo ~/my-observability --host +``` diff --git a/examples/05-observability/hosts/example/host.yaml b/examples/05-observability/hosts/example/host.yaml new file mode 100644 index 0000000..e66ac15 --- /dev/null +++ b/examples/05-observability/hosts/example/host.yaml @@ -0,0 +1,6 @@ +host: example +services: + - prometheus + - grafana + - node-exporter + - cadvisor diff --git a/examples/05-observability/hosts/example/prometheus/config/prometheus.yml b/examples/05-observability/hosts/example/prometheus/config/prometheus.yml new file mode 100644 index 0000000..8347d6d --- /dev/null +++ b/examples/05-observability/hosts/example/prometheus/config/prometheus.yml @@ -0,0 +1,33 @@ +# Host-side whole-file replacement of the prometheus.yml so this host +# can scrape its own additional targets without forking the base +# service. See README's "Known limitations" — true scrape-config +# templating (per-host target lists computed by core-ops) is not yet +# supported (synthesis table classification: B — workaround in this +# host overlay). + +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: node-exporter + static_configs: + - targets: + - node-exporter:9100 + + - job_name: cadvisor + static_configs: + - targets: + - cadvisor:8080 + + # Host-specific: scrape an external dashboard endpoint reachable on + # the host network. Replace 198.51.100.1 with the operator's address. + - job_name: example-host-router + static_configs: + - targets: + - 198.51.100.1:9100 diff --git a/examples/05-observability/services/cadvisor/quadlet/cadvisor.container b/examples/05-observability/services/cadvisor/quadlet/cadvisor.container new file mode 100644 index 0000000..b662dc8 --- /dev/null +++ b/examples/05-observability/services/cadvisor/quadlet/cadvisor.container @@ -0,0 +1,22 @@ +[Unit] +Description=cadvisor — container metrics, host-scope sidecar +After=network-online.target +Wants=network-online.target + +[Container] +Image=gcr.io/cadvisor/cadvisor:v0.49.1 +ContainerName=cadvisor +PublishPort=8080:8080 +Network=observe.network +Volume=/:/rootfs:ro,rslave +Volume=/var/run:/var/run:ro +Volume=/sys:/sys:ro,rslave +Volume=/var/lib/containers:/var/lib/containers:ro,rslave +PodmanArgs=--privileged + +[Service] +Restart=always +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/05-observability/services/grafana/config/grafana.ini b/examples/05-observability/services/grafana/config/grafana.ini new file mode 100644 index 0000000..fc0b922 --- /dev/null +++ b/examples/05-observability/services/grafana/config/grafana.ini @@ -0,0 +1,9 @@ +[server] +http_port = 3000 +domain = grafana.example.com + +[auth.anonymous] +enabled = false + +[users] +allow_sign_up = false diff --git a/examples/05-observability/services/grafana/quadlet/grafana-data.volume b/examples/05-observability/services/grafana/quadlet/grafana-data.volume new file mode 100644 index 0000000..f1fdd7e --- /dev/null +++ b/examples/05-observability/services/grafana/quadlet/grafana-data.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Grafana persistent state + +[Volume] +VolumeName=grafana-data diff --git a/examples/05-observability/services/grafana/quadlet/grafana.container b/examples/05-observability/services/grafana/quadlet/grafana.container new file mode 100644 index 0000000..f82acea --- /dev/null +++ b/examples/05-observability/services/grafana/quadlet/grafana.container @@ -0,0 +1,19 @@ +[Unit] +Description=Grafana dashboards +After=network-online.target prometheus.service +Wants=network-online.target + +[Container] +Image=docker.io/grafana/grafana:11 +ContainerName=grafana +PublishPort=3000:3000 +Network=observe.network +Volume=grafana-data.volume:/var/lib/grafana:Z +Volume=/etc/grafana:/etc/grafana:ro,Z + +[Service] +Restart=always +TimeoutStartSec=600 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/05-observability/services/node-exporter/quadlet/node-exporter.container b/examples/05-observability/services/node-exporter/quadlet/node-exporter.container new file mode 100644 index 0000000..865dadb --- /dev/null +++ b/examples/05-observability/services/node-exporter/quadlet/node-exporter.container @@ -0,0 +1,21 @@ +[Unit] +Description=Prometheus node_exporter — host-scope sidecar +After=network-online.target +Wants=network-online.target + +[Container] +Image=quay.io/prometheus/node-exporter:latest +ContainerName=node-exporter +PublishPort=9100:9100 +Network=observe.network +Volume=/proc:/host/proc:ro,rslave +Volume=/sys:/host/sys:ro,rslave +Volume=/:/rootfs:ro,rslave +Exec=--path.procfs=/host/proc --path.sysfs=/host/sys --path.rootfs=/rootfs --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($|/) + +[Service] +Restart=always +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target default.target diff --git a/examples/05-observability/services/prometheus/config/prometheus.yml b/examples/05-observability/services/prometheus/config/prometheus.yml new file mode 100644 index 0000000..1513ac9 --- /dev/null +++ b/examples/05-observability/services/prometheus/config/prometheus.yml @@ -0,0 +1,22 @@ +# Default scrape config — single-host targets baked in. See README's +# "Known limitations" for the templating friction (synthesis table B). + +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: node-exporter + static_configs: + - targets: + - node-exporter:9100 + + - job_name: cadvisor + static_configs: + - targets: + - cadvisor:8080 diff --git a/examples/05-observability/services/prometheus/quadlet/observe.network b/examples/05-observability/services/prometheus/quadlet/observe.network new file mode 100644 index 0000000..d1dd533 --- /dev/null +++ b/examples/05-observability/services/prometheus/quadlet/observe.network @@ -0,0 +1,6 @@ +[Unit] +Description=Shared observability network + +[Network] +NetworkName=observe +Subnet=192.0.2.0/24 diff --git a/examples/05-observability/services/prometheus/quadlet/prometheus-data.volume b/examples/05-observability/services/prometheus/quadlet/prometheus-data.volume new file mode 100644 index 0000000..4cffbb4 --- /dev/null +++ b/examples/05-observability/services/prometheus/quadlet/prometheus-data.volume @@ -0,0 +1,5 @@ +[Unit] +Description=Prometheus TSDB storage + +[Volume] +VolumeName=prometheus-data diff --git a/examples/05-observability/services/prometheus/quadlet/prometheus.container b/examples/05-observability/services/prometheus/quadlet/prometheus.container new file mode 100644 index 0000000..f8d07ac --- /dev/null +++ b/examples/05-observability/services/prometheus/quadlet/prometheus.container @@ -0,0 +1,20 @@ +[Unit] +Description=Prometheus metrics scraper +After=network-online.target +Wants=network-online.target + +[Container] +Image=docker.io/prom/prometheus:v2 +ContainerName=prometheus +PublishPort=9090:9090 +Network=observe.network +Volume=/etc/prometheus:/etc/prometheus:ro,Z +Volume=prometheus-data.volume:/prometheus:Z +Exec=--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus + +[Service] +Restart=always +TimeoutStartSec=600 + +[Install] +WantedBy=multi-user.target default.target diff --git a/specs/016-source-repository-layout/examples/01-minimal-single-service/README.md b/specs/016-source-repository-layout/examples/01-minimal-single-service/README.md deleted file mode 100644 index 3c6a56d..0000000 --- a/specs/016-source-repository-layout/examples/01-minimal-single-service/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# Example 01 — Minimal Single Service - -The simplest conformant CoreOps source repository under layout version `1`: -one service, one Quadlet container, one config file, one host. No -`service.yaml` — the service's `config-root` defaults to its identifier. - -This example backs spec 016 User Story 1, acceptance scenario 1. - -## Tree - -```text -01-minimal-single-service/ -├── services/ -│ └── whoami/ -│ ├── quadlet/ -│ │ └── whoami.container -│ └── config/ -│ └── whoami.toml -└── hosts/ - └── example-host/ - └── host.yaml -``` - -No `service.yaml` is present; the service's `config-root` is therefore -`whoami` (defaulted from the directory name). - -## Dispatch (what `core-ops plan` produces against `example-host`) - -| Source file | Host destination | -|---|---| -| `services/whoami/quadlet/whoami.container` | `/etc/containers/systemd/whoami.container` | -| `services/whoami/config/whoami.toml` | `/etc/whoami/whoami.toml` | - -## What this example demonstrates - -- The default-`config-root` rule: a service with no `service.yaml` - deploys its `config/` payload to `/etc//`. -- The minimum viable host: `host.yaml` lists exactly the services it - applies, in order. No host overlay is required if the host has nothing - to override. -- Quadlet payload dispatch: a `*.container` under a service's `quadlet/` - directory deploys to `/etc/containers/systemd/`, the standard Quadlet - search path. - -## What this example does NOT demonstrate - -- Variant `config-root` (see `02-variant-config-root/`). -- Multi-unit services or drop-ins (see `03-multi-unit-with-dropins/`). -- Host overlays (see `04-host-overlay/`). - -## Try it - -From this directory: - -```bash -core-ops plan --source-repo . --host example-host -``` - -The expected plan applies the two destinations above and nothing else. diff --git a/specs/016-source-repository-layout/examples/01-minimal-single-service/hosts/example-host/host.yaml b/specs/016-source-repository-layout/examples/01-minimal-single-service/hosts/example-host/host.yaml deleted file mode 100644 index 7774473..0000000 --- a/specs/016-source-repository-layout/examples/01-minimal-single-service/hosts/example-host/host.yaml +++ /dev/null @@ -1,3 +0,0 @@ -host: example-host -services: - - whoami diff --git a/specs/016-source-repository-layout/examples/01-minimal-single-service/services/whoami/config/whoami.toml b/specs/016-source-repository-layout/examples/01-minimal-single-service/services/whoami/config/whoami.toml deleted file mode 100644 index fcc9057..0000000 --- a/specs/016-source-repository-layout/examples/01-minimal-single-service/services/whoami/config/whoami.toml +++ /dev/null @@ -1,6 +0,0 @@ -# Example placeholder config for the minimal whoami service. -# Layout version 1 deploys this file to /etc/whoami/whoami.toml because -# the service has no service.yaml, so config-root defaults to "whoami". - -[example] -note = "This file demonstrates the default config-root path. The whoami container references /etc/whoami/whoami.toml as a read-only mount." diff --git a/specs/016-source-repository-layout/examples/02-variant-config-root/README.md b/specs/016-source-repository-layout/examples/02-variant-config-root/README.md deleted file mode 100644 index 4f59168..0000000 --- a/specs/016-source-repository-layout/examples/02-variant-config-root/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# Example 02 — Variant Config Root - -A service whose deployment target differs from its identifier. The -service id is `traefik-dnschallenge` (descriptive, namespaceable in -the source repo) but its config payload deploys to `/etc/traefik/`, -the path the upstream binary expects. The mapping is declared in -`service.yaml` via `config-root: traefik`. - -This example backs spec 016 User Story 1, acceptance scenario 2. - -## Tree - -```text -02-variant-config-root/ -├── services/ -│ └── traefik-dnschallenge/ -│ ├── service.yaml # config-root: traefik -│ ├── quadlet/ -│ │ └── traefik-dnschallenge.container -│ └── config/ -│ └── traefik.yaml -└── hosts/ - └── example-host/ - └── host.yaml -``` - -## Dispatch - -| Source file | Host destination | -|---|---| -| `services/traefik-dnschallenge/quadlet/traefik-dnschallenge.container` | `/etc/containers/systemd/traefik-dnschallenge.container` | -| `services/traefik-dnschallenge/config/traefik.yaml` | `/etc/traefik/traefik.yaml` | - -The config file deploys to `/etc/traefik/traefik.yaml`, NOT -`/etc/traefik-dnschallenge/traefik.yaml`. The unit filename is -unaffected — it retains the service identifier for clarity in -`systemctl list-units` output. - -## What this example demonstrates - -- The `service.yaml` schema: a single optional key `config-root` - (string), kebab-case, no other keys permitted. -- Identity vs deployment target: the service identifier is free-form - and human-meaningful; the deployment target is dictated by what the - binary expects on disk. -- Invariant from spec 016: a service identifier never appears in a - deployed file path unless it equals its `config-root`. - -## What this example does NOT demonstrate - -- The default-`config-root` rule (see `01-minimal-single-service/`). -- Drop-ins or multi-unit services (see `03-multi-unit-with-dropins/`). -- Host overlays (see `04-host-overlay/`). - -## Try it - -```bash -core-ops plan --source-repo . --host example-host -``` - -The expected plan deploys the container under -`/etc/containers/systemd/` and the config under `/etc/traefik/`. diff --git a/specs/016-source-repository-layout/examples/02-variant-config-root/hosts/example-host/host.yaml b/specs/016-source-repository-layout/examples/02-variant-config-root/hosts/example-host/host.yaml deleted file mode 100644 index a6bcbc2..0000000 --- a/specs/016-source-repository-layout/examples/02-variant-config-root/hosts/example-host/host.yaml +++ /dev/null @@ -1,3 +0,0 @@ -host: example-host -services: - - traefik-dnschallenge diff --git a/specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/config/traefik.yaml b/specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/config/traefik.yaml deleted file mode 100644 index 94db082..0000000 --- a/specs/016-source-repository-layout/examples/02-variant-config-root/services/traefik-dnschallenge/config/traefik.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Example placeholder Traefik static configuration. -# Layout version 1 deploys this file to /etc/traefik/traefik.yaml because -# service.yaml declares config-root: traefik. - -entryPoints: - web: - address: ":80" - websecure: - address: ":443" - -providers: - file: - filename: /etc/traefik/traefik.yaml - watch: false diff --git a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/README.md b/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/README.md deleted file mode 100644 index de320f3..0000000 --- a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# Example 03 — Multi-Unit Service with Drop-Ins - -A service that combines a Quadlet container with a native systemd -socket, with a base drop-in on each. Demonstrates how a single -service spans payload-kind directories and how drop-ins refine the -base units in lex-sorted order. - -This example backs spec 016 FR-003, FR-004, FR-014 (drop-in lex -order), and the multi-unit class of FR-023. - -## Tree - -```text -03-multi-unit-with-dropins/ -├── services/ -│ └── webhook-receiver/ -│ ├── quadlet/ -│ │ ├── webhook-receiver.container -│ │ └── webhook-receiver.container.d/ -│ │ └── 10-resources.conf -│ └── systemd/ -│ ├── webhook-receiver.socket -│ └── webhook-receiver.socket.d/ -│ └── 10-hardening.conf -└── hosts/ - └── example-host/ - └── host.yaml -``` - -No `service.yaml`; `config-root` defaults to `webhook-receiver`. -The example carries no `config/` files because the unit-and-drop-in -shape is the point. - -## Dispatch - -| Source file | Host destination | -|---|---| -| `services/webhook-receiver/quadlet/webhook-receiver.container` | `/etc/containers/systemd/webhook-receiver.container` | -| `services/webhook-receiver/quadlet/webhook-receiver.container.d/10-resources.conf` | `/etc/containers/systemd/webhook-receiver.container.d/10-resources.conf` | -| `services/webhook-receiver/systemd/webhook-receiver.socket` | `/etc/systemd/system/webhook-receiver.socket` | -| `services/webhook-receiver/systemd/webhook-receiver.socket.d/10-hardening.conf` | `/etc/systemd/system/webhook-receiver.socket.d/10-hardening.conf` | - -## What this example demonstrates - -- A single service spanning both `quadlet/` and `systemd/`. The - payload-kind directory governs the deployment target root, not the - service id. -- Drop-in convention: a file at `..d/.conf` is a - drop-in for the parent unit `.`. Drop-ins are ordered - lexicographically by filename — the `10-` prefix reserves room for - later overrides at higher numeric prefixes. -- The drop-in shape is identical for Quadlet and native systemd. - -## What this example does NOT demonstrate - -- The `service.yaml` schema (see `02-variant-config-root/`). -- Host overlays adding drop-ins on top of base drop-ins (see - `04-host-overlay/`). -- Config payload files (see `01-minimal-single-service/`). - -## Try it - -```bash -core-ops plan --source-repo . --host example-host -``` diff --git a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/hosts/example-host/host.yaml b/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/hosts/example-host/host.yaml deleted file mode 100644 index 73440fb..0000000 --- a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/hosts/example-host/host.yaml +++ /dev/null @@ -1,3 +0,0 @@ -host: example-host -services: - - webhook-receiver diff --git a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/quadlet/webhook-receiver.container b/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/quadlet/webhook-receiver.container deleted file mode 100644 index 52cfa4a..0000000 --- a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/quadlet/webhook-receiver.container +++ /dev/null @@ -1,15 +0,0 @@ -[Unit] -Description=Webhook receiver — multi-unit example service -After=network-online.target webhook-receiver.socket -Requires=webhook-receiver.socket - -[Container] -Image=ghcr.io/example/webhook-receiver:latest -PublishPort=8080:8080 - -[Service] -Restart=always -TimeoutStartSec=900 - -[Install] -WantedBy=multi-user.target default.target diff --git a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/systemd/webhook-receiver.socket b/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/systemd/webhook-receiver.socket deleted file mode 100644 index c710cd8..0000000 --- a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/systemd/webhook-receiver.socket +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Webhook receiver socket activation - -[Socket] -ListenStream=8080 -Accept=no - -[Install] -WantedBy=sockets.target diff --git a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/systemd/webhook-receiver.socket.d/10-hardening.conf b/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/systemd/webhook-receiver.socket.d/10-hardening.conf deleted file mode 100644 index 0356bdf..0000000 --- a/specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/services/webhook-receiver/systemd/webhook-receiver.socket.d/10-hardening.conf +++ /dev/null @@ -1,3 +0,0 @@ -[Socket] -NoDelay=yes -KeepAlive=yes diff --git a/specs/016-source-repository-layout/examples/04-host-overlay/README.md b/specs/016-source-repository-layout/examples/04-host-overlay/README.md deleted file mode 100644 index 46ee403..0000000 --- a/specs/016-source-repository-layout/examples/04-host-overlay/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# Example 04 — Host Overlay - -A base service with one container and one config file, plus a host -that contributes both a drop-in addition and a `config/` whole-file -replacement. Demonstrates the per-host overlay shape and the merge -order rule (base service drop-ins lex-sorted, then host drop-ins -lex-sorted; whole-file `config/` entries fully replace base files). - -This example backs spec 016 FR-005 (host overlays MAY contribute -drop-ins and whole-file replacements but MUST NOT introduce base -units), FR-014 (drop-in merge order), and the host-overlay class of -FR-023. - -## Tree - -```text -04-host-overlay/ -├── services/ -│ └── node-exporter/ -│ ├── quadlet/ -│ │ └── node-exporter.container -│ └── config/ -│ └── node-exporter.env -└── hosts/ - └── host-a/ - ├── host.yaml - └── node-exporter/ - ├── quadlet/ - │ └── node-exporter.container.d/ - │ └── 30-listen-port.conf - └── config/ - └── node-exporter.env -``` - -No `service.yaml`; `config-root` defaults to `node-exporter`. - -## Dispatch (host = `host-a`) - -| Source file | Host destination | -|---|---| -| `services/node-exporter/quadlet/node-exporter.container` | `/etc/containers/systemd/node-exporter.container` | -| `hosts/host-a/node-exporter/quadlet/node-exporter.container.d/30-listen-port.conf` | merged into `/etc/containers/systemd/node-exporter.container.d/`, lex-sorted after any base service drop-ins | -| `hosts/host-a/node-exporter/config/node-exporter.env` | `/etc/node-exporter/node-exporter.env` (replaces the base service's file) | - -The base service ships its own `config/node-exporter.env`; the host's -`config/node-exporter.env` replaces it byte-for-byte at the same -destination. There is no merge or template at the file level — config -overlays are whole-file replacements only in layout v1. - -## What this example demonstrates - -- Host overlay shape: per-service subdirectory under `hosts//` - mirroring the service's payload-kind tree. -- Drop-in addition from a host: a `.conf` under `..d/` that - refines the base unit. The numeric prefix `30-` lex-sorts after any - base service drop-ins prefixed `10-` or `20-`. -- Whole-file replacement: a host's `config/` overrides the base - service's `config/` at the same path under `/etc//`. -- Forbidden form (NOT shown but enforced by the parser): a base unit - file directly at `hosts/host-a/node-exporter/quadlet/.` is - rejected with a diagnostic — the host overlay can only refine, not - introduce. - -## What this example does NOT demonstrate - -- The `service.yaml` schema (see `02-variant-config-root/`). -- Multi-unit services (see `03-multi-unit-with-dropins/`). -- Diagnostics on legacy or invalid layouts (see the integration - tests in `tests/integration/test_source_repo_layout.rs`). - -## Try it - -```bash -core-ops plan --source-repo . --host host-a -``` diff --git a/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/host.yaml b/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/host.yaml deleted file mode 100644 index 64b217a..0000000 --- a/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/host.yaml +++ /dev/null @@ -1,3 +0,0 @@ -host: host-a -services: - - node-exporter diff --git a/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/node-exporter/config/node-exporter.env b/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/node-exporter/config/node-exporter.env deleted file mode 100644 index 2ff44b1..0000000 --- a/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/node-exporter/config/node-exporter.env +++ /dev/null @@ -1,4 +0,0 @@ -# Host-a node-exporter environment — replaces the base service file -# byte-for-byte at /etc/node-exporter/node-exporter.env. -NODE_EXPORTER_LISTEN=:9100 -NODE_EXPORTER_COLLECTORS=cpu,meminfo,filesystem,netdev,systemd,processes diff --git a/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/node-exporter/quadlet/node-exporter.container.d/30-listen-port.conf b/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/node-exporter/quadlet/node-exporter.container.d/30-listen-port.conf deleted file mode 100644 index 58cdee6..0000000 --- a/specs/016-source-repository-layout/examples/04-host-overlay/hosts/host-a/node-exporter/quadlet/node-exporter.container.d/30-listen-port.conf +++ /dev/null @@ -1,2 +0,0 @@ -[Container] -PublishPort=9101:9100 diff --git a/specs/016-source-repository-layout/examples/04-host-overlay/services/node-exporter/config/node-exporter.env b/specs/016-source-repository-layout/examples/04-host-overlay/services/node-exporter/config/node-exporter.env deleted file mode 100644 index a900d91..0000000 --- a/specs/016-source-repository-layout/examples/04-host-overlay/services/node-exporter/config/node-exporter.env +++ /dev/null @@ -1,4 +0,0 @@ -# Base node-exporter environment — applied on hosts that do not provide -# a host-overlay replacement. -NODE_EXPORTER_LISTEN=:9100 -NODE_EXPORTER_COLLECTORS=cpu,meminfo,filesystem,netdev diff --git a/specs/016-source-repository-layout/examples/04-host-overlay/services/node-exporter/quadlet/node-exporter.container b/specs/016-source-repository-layout/examples/04-host-overlay/services/node-exporter/quadlet/node-exporter.container deleted file mode 100644 index e8743a8..0000000 --- a/specs/016-source-repository-layout/examples/04-host-overlay/services/node-exporter/quadlet/node-exporter.container +++ /dev/null @@ -1,16 +0,0 @@ -[Unit] -Description=Prometheus node_exporter — host-overlay example service -After=network-online.target -Wants=network-online.target - -[Container] -Image=quay.io/prometheus/node-exporter:latest -PublishPort=9100:9100 -EnvironmentFile=/etc/node-exporter/node-exporter.env - -[Service] -Restart=always -TimeoutStartSec=900 - -[Install] -WantedBy=multi-user.target default.target diff --git a/specs/016-source-repository-layout/spec.md b/specs/016-source-repository-layout/spec.md index b91d2c8..ec4d88d 100644 --- a/specs/016-source-repository-layout/spec.md +++ b/specs/016-source-repository-layout/spec.md @@ -127,6 +127,15 @@ The operator has a live source repository running the legacy layout. They migrat - a multi-unit service repository combining Quadlet containers and native systemd sockets with drop-ins on each, - a host overlay repository demonstrating both drop-in additions and `config/` whole-file replacements. + > **[SUPERSEDED by spec/017]** (2026-05-05) The four in-tree example fixtures + > `specs/016-source-repository-layout/examples/{01-minimal-single-service,02-variant-config-root,03-multi-unit-with-dropins,04-host-overlay}/` + > are removed by spec/017's real-world-validation iteration, which publishes + > five service-shaped examples under top-level `examples//`. The + > same shape coverage (default config-root, variant config-root, multi-unit + > with drop-ins, host overlay) is retained as a side effect of the + > real-world translations. See `specs/017-real-world-validation/spec.md` + > FR-017–FR-019 for the supersession rationale. + #### Migration - **FR-024**: The system MUST refuse to load legacy layouts (FR-012); migration is an out-of-band operation. A migration procedure (script or documented sequence of moves) MUST be provided as part of this feature so that the live legacy repository can be converted in a single mechanical pass. diff --git a/specs/016-source-repository-layout/tasks.md b/specs/016-source-repository-layout/tasks.md index 34f0883..6476f34 100644 --- a/specs/016-source-repository-layout/tasks.md +++ b/specs/016-source-repository-layout/tasks.md @@ -64,10 +64,10 @@ Single Rust project at repository root: > Write these tests FIRST. Confirm they FAIL before implementation begins. -- [X] T101 [P] [US1] Author `specs/016-source-repository-layout/examples/01-minimal-single-service/` — one service (no `service.yaml`), one Quadlet `*.container`, one `config/` file; documented `README.md` describing the example -- [X] T102 [P] [US1] Author `specs/016-source-repository-layout/examples/02-variant-config-root/` — one service whose id differs from `config-root` (modeled on `traefik-dnschallenge` → `/etc/traefik/`), with `service.yaml` and a `config/` file; `README.md` -- [X] T103 [P] [US1] Author `specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/` — service with both `quadlet/` (one `*.container` + drop-in) and `systemd/` (one `*.socket` + drop-in); `README.md` -- [X] T104 [P] [US1] Author `specs/016-source-repository-layout/examples/04-host-overlay/` — base service with units, plus a host directly under `hosts///` contributing a drop-in and a `config/` whole-file replacement; `README.md` +- [X] T101 [P] [US1] Author `specs/016-source-repository-layout/examples/01-minimal-single-service/` — one service (no `service.yaml`), one Quadlet `*.container`, one `config/` file; documented `README.md` describing the example [SUPERSEDED by spec/017] +- [X] T102 [P] [US1] Author `specs/016-source-repository-layout/examples/02-variant-config-root/` — one service whose id differs from `config-root` (modeled on `traefik-dnschallenge` → `/etc/traefik/`), with `service.yaml` and a `config/` file; `README.md` [SUPERSEDED by spec/017] +- [X] T103 [P] [US1] Author `specs/016-source-repository-layout/examples/03-multi-unit-with-dropins/` — service with both `quadlet/` (one `*.container` + drop-in) and `systemd/` (one `*.socket` + drop-in); `README.md` [SUPERSEDED by spec/017] +- [X] T104 [P] [US1] Author `specs/016-source-repository-layout/examples/04-host-overlay/` — base service with units, plus a host directly under `hosts///` contributing a drop-in and a `config/` whole-file replacement; `README.md` [SUPERSEDED by spec/017] - [X] T105 [P] [US1] Add `tests/integration/test_source_repo_layout.rs` with `#[test]` functions covering: each example loads cleanly (4 tests), reserved-name rejection (FR-009), config path traversal (FR-010), destination conflict (FR-011), legacy artifact rejection (FR-012), orphan drop-in (FR-013), determinism — repeated load yields identical `DesiredState` (FR-014/FR-015), missing-service diagnostic (FR-016), malformed `service.yaml` diagnostic for unknown key and parse error (FR-017), host overlay base-unit rejection (FR-018). Each test asserts diagnostic message content. - [X] T106 [P] [US1] Authored `tests/fixtures/verification/scenarios/source-repo-variant-config-root.yaml` plus a single-revision repo fixture under `tests/fixtures/verification/repos/source-repo-variant-config-root/source-repo-variant-config-root-v1/` (services/traefik-dnschallenge with `service.yaml: { config-root: traefik }` + `config/traefik.yaml` + container quadlet, plus hosts/example-host/host.yaml). The scenario boots a guest, runs init+apply (with `host: example-host` so the parser pins to a known host id rather than the auto-generated VM hostname), then exercises four `guest_command` checks: `/etc/traefik/traefik.yaml` exists, contains the expected payload, `/etc/traefik-dnschallenge/` does NOT exist, and `/etc/containers/systemd/traefik-dnschallenge.container` exists. Synthetic-mode dry-run passes 7/7 assertions; running against a real VM is T406. diff --git a/specs/017-real-world-validation/checklists/requirements.md b/specs/017-real-world-validation/checklists/requirements.md new file mode 100644 index 0000000..0538967 --- /dev/null +++ b/specs/017-real-world-validation/checklists/requirements.md @@ -0,0 +1,60 @@ +# Specification Quality Checklist: Real-World Validation, Examples, and Stateless Source-Repo Mode + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-05-05 +**Last Revised**: 2026-05-05 (scope expansion: stateless `--source-repo` flag + spec/016 example removal) +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [X] No implementation details (languages, frameworks, APIs) +- [X] Focused on user value and business needs +- [X] Written for non-technical stakeholders +- [X] All mandatory sections completed + +## Requirement Completeness + +- [X] No [NEEDS CLARIFICATION] markers remain +- [X] Requirements are testable and unambiguous +- [X] Success criteria are measurable +- [X] Success criteria are technology-agnostic (no implementation details) +- [X] All acceptance scenarios are defined +- [X] Edge cases are identified +- [X] Scope is clearly bounded +- [X] Dependencies and assumptions identified + +## Feature Readiness + +- [X] All functional requirements have clear acceptance criteria +- [X] User scenarios cover primary flows +- [X] Feature meets measurable outcomes defined in Success Criteria +- [X] No implementation details leak into specification + +## Notes + +- All checklist items pass on the second iteration. The first iteration was + authored against the operator-approved plan in + `/home/outergod/.claude/plans/this-is-spec-17-greedy-pancake.md`. The rewrite + was triggered by an operator correction surfacing during user-story authoring: + the original US1 acceptance scenario referenced `core-ops plan --repo X --rev Y`, + flags removed by spec/015 long ago. Investigation revealed (a) my mis-inference + came from stale documentation in `docs/follow-ups.md`, `docs/development.md`, + and `infra/repo/README.md`, and (b) the four spec/016 in-tree example READMEs + reference a `--source-repo` flag that has never been implemented — they shipped + in v2.0.0 with non-runnable "Try it" instructions. +- Operator decisions on the consequential trade: scope `--source-repo` into + spec/017 for both `plan` and `apply` (FR-010 through FR-016); remove the four + spec/016 example fixtures as superseded by spec/017's real-world examples + (FR-017 through FR-019); clean up the stale CLI documentation (FR-020). +- The `Verification Guidance` section from the spec template was deliberately + omitted: this feature does not introduce new mutation classes — stateless + apply is an entry-point variation over the existing apply path. Existing + apply VM-backed scenarios remain authoritative. The exemption is documented + explicitly in `Constitution Alignment → Test contract` per Principle 10. +- Stateless-mode provenance representation (e.g., the `(stateless)` sentinel + for `desired_state.requested_ref`) is intentionally left as an assumption to + be locked during `/speckit.plan` after a review of + `src/core/types.rs::DesiredStateProvenance`. This is not a `[NEEDS + CLARIFICATION]` marker because the user value (stateless invocations are + observable in `core-ops status` and distinguished from init'd state) is fully + specified; only the byte-level encoding is deferred. diff --git a/specs/017-real-world-validation/contracts/cli-flag.md b/specs/017-real-world-validation/contracts/cli-flag.md new file mode 100644 index 0000000..c9a5935 --- /dev/null +++ b/specs/017-real-world-validation/contracts/cli-flag.md @@ -0,0 +1,104 @@ +# Contract: `--source-repo` CLI flag + +**Phase**: 1 | **Spec**: [../spec.md](../spec.md) | **Plan**: [../plan.md](../plan.md) + +The `--source-repo ` flag is the user-facing surface of stateless mode. This contract specifies its shape, validation, mutual exclusion, error semantics, and help-text expectations. Implementation lives in `src/cli/args.rs`, `src/cli/plan.rs`, `src/cli/apply.rs`, and `src/cli/explain.rs`. + +--- + +## Acceptance: command coverage + +The flag MUST be accepted by exactly these subcommands: + +| Subcommand | Accepts `--source-repo`? | Mode semantics | +|------------|--------------------------|----------------| +| `core-ops plan` | YES | Read-only against persisted controller config; honors `--audit-dir` when explicitly set. | +| `core-ops apply` | YES | Mutates host state; writes audit + status snapshot with path-based provenance; init'd `desired_state.*` unchanged. | +| `core-ops explain` | YES | Read-only; no writes anywhere. | +| `core-ops init` | NO — init takes `` as a positional argument with separate semantics. | +| `core-ops agent` | NO — timer-driven; requires persisted tracking. | +| `core-ops status` | NO — reads only persisted state. | +| `core-ops skill` | NO — generates skill bundles; not source-repo-driven. | + +Adding the flag to a non-listed subcommand is spec drift. + +--- + +## Validation rules + +1. **Argument shape**: `--source-repo ` where `` is a filesystem path string. Implementation: clap `value_parser = clap::value_parser!(PathBuf)`. + +2. **`--host` requirement**: When `--source-repo` is present, `--host ` MUST also be present. Stateless mode has no host fallback; rejection happens at clap-level via `requires("host")`. + +3. **Path existence and shape**: `` MUST be an existing directory. Validation runs *before* the parser: + - Path does not exist → exit non-zero with `error: --source-repo path does not exist: `. + - Path exists but is not a directory → exit non-zero with `error: --source-repo path is not a directory: `. + - Path is a directory but the parser rejects its layout (legacy artifacts, missing services/, etc.) → exit non-zero with the existing parser diagnostic chain. + +4. **No git-URL fallback**: A value like `https://github.com/foo/bar.git` is treated as a path string and fails at the existence check. There is no implicit URL parsing; that is `core-ops init`'s job. + +5. **Canonicalization**: The provided path is canonicalized (symlinks resolved, made absolute) before being recorded as `desired_state.repository` provenance. + +6. **Mutual exclusion within an invocation**: `--source-repo` does not conflict with any flag currently on `plan`/`apply`/`explain`. (No legacy `--repo` / `--rev` flag exists to conflict with — they were removed in spec/015.) + +7. **Coexistence with init'd controller state**: The flag's presence in a single invocation is independent of whether `core-ops init` has been previously run on the host. Per the 2026-05-05 clarification (Q2), stateless invocations execute regardless of init'd state and never mutate `desired_state.repository` / `desired_state.requested_ref` of the persisted configuration (FR-013, SC-009). + +--- + +## Provenance recording (stateless `apply` only) + +When `core-ops apply --source-repo ` succeeds, the audit record and the status snapshot record provenance per the 2026-05-05 clarification (Q3): + +| Source path state | `desired_state.repository` | `desired_state.requested_ref` | +|-------------------|----------------------------|-------------------------------| +| Non-git directory | `` | `(stateless)` | +| Git working tree, dirty | `` | `(stateless+dirty)` | +| Git working tree, clean at commit `abc1234…` | `` | `` | + +Stateless `plan` and `explain` write no persisted provenance (they bypass `/var/lib/core-ops/`). Stateless `plan` MAY write a plan-audit record to an operator-specified `--audit-dir`, with the same provenance shape as above (per 2026-05-05 clarification Q4). + +--- + +## Help-text contract (FR-016) + +The `--help` output for each accepting subcommand MUST include: + +1. The flag name and value type: `--source-repo `. +2. A one-line description: e.g., `Use a filesystem path as the source of desired state, bypassing the persisted init'd configuration.` +3. The `--host` co-requirement: `Requires --host. The init'd mode (no flag) sources from persisted state set by 'core-ops init'.` +4. A pointer to the canonical init'd-mode workflow: `For long-lived tracking, run 'core-ops init ' once and omit --source-repo on subsequent invocations.` + +The help text is part of the user contract; changing its shape between this spec and a future iteration requires a SemVer bump per Principle 9 (Conservative Public Evolution). + +--- + +## Error semantics + +| Condition | Exit code | Stderr | +|-----------|-----------|--------| +| `--source-repo` set, `--host` missing | clap default (2) | clap-generated `error: the following required arguments were not provided: --host ` | +| `` does not exist | 64 (`EX_USAGE`) | `error: --source-repo path does not exist: ` (miette-rendered with help text) | +| `` is not a directory | 64 | `error: --source-repo path is not a directory: ` | +| `` is a directory but layout is invalid | 65 (`EX_DATAERR`) | existing parser diagnostic chain via `LayoutError` (`src/core/errors.rs`) | +| Git ref detection subprocess fails (`git -C ...`) | continues with `(stateless)` fallback; logs miette warning to stderr | `warning: git ref detection failed for ; recording as non-git source` | +| `--source-repo` used on `init`/`agent`/`status`/`skill` | clap default (2) | clap-generated `error: unexpected argument '--source-repo' found` | + +Exit codes follow the pattern already established in `scripts/migrate-legacy-source-repo.sh` (64 = usage, 65 = data, 66 = path) for cross-tool consistency. + +--- + +## Test coverage (FR-006, plan.md D8) + +Implementations of this contract MUST be covered by: + +1. **clap unit tests** (`src/cli/args.rs::tests`): each acceptance case in the table above, plus rejection cases. +2. **Integration tests** (`tests/integration/test_stateless_{plan,apply,explain}.rs`): real `cargo run --bin core-ops` invocations against `tempfile::TempDir`-built source repos, asserting exit codes and stderr substrings. +3. **Per-example integration tests** (`tests/integration/test_examples__.rs`): each of the five examples invoked via `core-ops plan --source-repo examples/ --host ` and asserted exit 0. + +--- + +## Future evolution + +- A `--repo` short alias is intentionally NOT added in this slice — `--repo` was removed by spec/015 and reintroducing the spelling would cause user confusion. If a shorter form is wanted later, it should be a new clap alias added explicitly with documentation. +- A reverse alias (e.g., environment variable `CORE_OPS_SOURCE_REPO`) is out of scope for this slice; spec/004 / spec/006 followed an environment-variable-free model. +- Stateless mode for `agent` is explicitly out of scope (architectural mismatch). The follow-ups document records no plan to add it. diff --git a/specs/017-real-world-validation/contracts/synthesis-table.md b/specs/017-real-world-validation/contracts/synthesis-table.md new file mode 100644 index 0000000..a3f9b3b --- /dev/null +++ b/specs/017-real-world-validation/contracts/synthesis-table.md @@ -0,0 +1,118 @@ +# Contract: Friction-Classification Synthesis Table + +**Phase**: 1 | **Spec**: [../spec.md](../spec.md) | **Plan**: [../plan.md](../plan.md) | **Data model**: [../data-model.md#e2--synthesis-table-schema-markdown-rendered-in-specmd](../data-model.md#e2--synthesis-table-schema-markdown-rendered-in-specmd) + +The synthesis table is the canonical evidence base for what spec/016 layout decisions held up under real-world translation and what didn't. It is a **markdown table inside `spec.md`**, populated during Phase 2 (Translation) of `/speckit.tasks` and reviewed during Phase 3 (Synthesis). This contract specifies its shape, classification semantics, lifecycle, and invariants. + +--- + +## Location + +`specs/017-real-world-validation/spec.md`, in a section titled `## Synthesis table` placed after `## Success Criteria` and before `## Assumptions`. The table is added by a single `/speckit.tasks`-emitted task in Phase 3 (Synthesis); rows are inserted by Phase 2 (Translation) tasks as friction surfaces, then reviewed and finalized in Phase 3. + +If translation surfaces zero friction (the spec/016 layout was sufficient as-shipped for all five setups), the section reads: + +```markdown +## Synthesis table + +No friction surfaced during translation of the five real-world setups. The +spec/016 source-repository layout was sufficient as-shipped. SC-002 is +trivially satisfied. +``` + +--- + +## Row shape + +Each row is exactly one friction. Columns and column order: + +| # | Column | Allowed values | Required? | +|---|--------|----------------|-----------| +| 1 | **Friction** | One-line prose; ≤ 100 chars; describes the layout gap or CLI gap encountered. | Yes | +| 2 | **Affected examples** | Comma-separated example slugs from {`01-caddy-whoami`, `02-nextcloud`, `03-immich`, `04-traefik-authelia`, `05-observability`}. | Yes (≥ 1 slug) | +| 3 | **Classification** | Exactly one of `A`, `B`, `C` (case-sensitive). | Yes | +| 4 | **Rationale** | One-line prose; ≤ 200 chars; explains why this classification fits. | Yes | +| 5 | **Action** | One of: `Escalate to spec/`, `Documented in /README.md`, `Tracked in docs/follow-ups.md`. | Yes | + +Markdown rendering: + +```markdown +| Friction | Affected examples | Classification | Rationale | Action | +|----------|-------------------|----------------|-----------|--------| +| Stateless plan against examples blocked all five fixtures | 01..05 | A | Layout was fine; the bottleneck was a missing CLI flag (`--source-repo`) shared across the entire roster | Escalate to spec/017 (this iteration absorbs the fix) | +| <…another row…> | <…> | <…> | <…> | <…> | +``` + +--- + +## Classification semantics + +### `A` — Amend-now (escalate to follow-up spec) + +- **Trigger**: ≥ 2 of 5 examples are blocked by *the same* layout gap (i.e., the spec/016 layout cannot express something necessary across multiple real workloads). +- **Required action**: row's Action MUST read `Escalate to spec/` where `` is a real (or to-be-created-imminently) follow-up spec number. +- **Implication**: spec/017 itself does NOT land the layout amendment. The escalation creates a separate spec branch and PR. spec/017 ships with the friction documented and the follow-up referenced. +- **Exception**: if the gap is CLI-level rather than layout-level and the fix is small, this slice MAY absorb the fix inline. In that case the Action is `Escalate to spec/017 (this iteration absorbs the fix)` (per the 2026-05-05 stateless-mode example). Self-escalation requires explicit operator approval recorded in spec.md Clarifications. + +### `B` — Workaround-with-doc (default) + +- **Trigger**: friction is real and addressable via a reserved-prefix subdir, host-side preparation step, drop-in trick, or other documented pattern. The workaround does not require any layout or CLI change. +- **Required action**: row's Action MUST read `Documented in /README.md` where `` is one of the affected example slugs. The example's README MUST contain a `## Known limitations` heading with the friction's name and the workaround text. +- **Implication**: future authors who hit this friction find the workaround in the example README and the row in this synthesis table. + +### `C` — Defer-to-spec-018 (acknowledged but not addressed) + +- **Trigger**: friction is real, neither blocking nor addressable in this slice. The translator could not find a clean workaround AND the gap does not block ≥ 2 examples (so it doesn't qualify for A). +- **Required action**: row's Action MUST read `Tracked in docs/follow-ups.md`. The corresponding bullet MUST exist in `docs/follow-ups.md` by the time spec/017 merges. +- **Implication**: spec/017 ships with the friction surfaced in the synthesis table and the follow-up entry. A future spec (e.g., spec/018) decides whether to address it. + +--- + +## Invariants + +The synthesis table MUST satisfy these invariants by the time the slice merges. They are review-time invariants, not machine-checked, but `/speckit.analyze` (the spec-kit analysis command, if run) SHOULD flag violations. + +1. **Coverage with example READMEs**: every friction surfaced in any `examples//README.md` (under `## Known limitations` or equivalent) MUST appear as a row in this table. (SC-002.) +2. **Classification A self-consistency**: any row classified `A` whose Action is `Escalate to spec/` MUST reference an `` that either already exists or will exist within one merge cycle of spec/017. Dangling references are spec drift. +3. **Classification B self-consistency**: any row classified `B` MUST have its workaround text in at least one of the affected examples' README under `## Known limitations`. +4. **Classification C self-consistency**: any row classified `C` MUST correspond to a bullet in `docs/follow-ups.md` whose text references the friction. +5. **No `A`-classified rows whose action is `Escalate to spec/017 (absorbed)` exist beyond the stateless-mode case**: any other in-scope absorption is a scope creep that should have been declared during `/speckit.clarify` and not at merge time. + +--- + +## Lifecycle + +```text + /speckit.tasks (Phase 3 task) inserts an empty + ## Synthesis table section. + │ + ▼ + Translation tasks (Phase 2) populate rows as friction surfaces: + - Each translator writes a row in the table at the time + friction is encountered, marking classification A/B/C + per the operator's judgment. + - Each translator updates the affected example's + README "Known limitations" section (B classification) + OR opens a follow-up bullet in docs/follow-ups.md + (C classification) OR opens a follow-up spec + placeholder (A classification) — same commit. + │ + ▼ + Synthesis task (Phase 3) reviews the populated table: + - Check invariants 1-5 above. + - Promote any C rows to A if a second example surfaces + the same friction (the threshold for amend-now is hit). + - Demote any A rows to C if the "≥ 2 examples" condition + did not actually hold. + - Final pass: row count, action consistency, dangling refs. + │ + ▼ + Slice merge: synthesis table becomes the evidence record. +``` + +--- + +## Future evolution + +- If/when the table grows beyond ~20 rows (a sign that the spec/016 layout has substantial friction across many workloads), promote it from a markdown table in spec.md to a structured YAML or TOML data file with a separate validator binary. Out of scope for this slice. +- The classification system itself (A/B/C) is intentionally minimal. Future iterations may add classifications (e.g., `D` for "duplicate of an A elsewhere"), but the additive change requires a spec amendment. diff --git a/specs/017-real-world-validation/data-model.md b/specs/017-real-world-validation/data-model.md new file mode 100644 index 0000000..9e33b6d --- /dev/null +++ b/specs/017-real-world-validation/data-model.md @@ -0,0 +1,89 @@ +# Data Model: Real-World Validation, Examples, and Stateless Source-Repo Mode + +**Phase**: 1 | **Spec**: [spec.md](./spec.md) | **Plan**: [plan.md](./plan.md) + +This feature does not introduce new core types. It introduces (a) **value-level conventions** on the existing `DesiredStateProvenance.requested_ref` field for stateless invocations, and (b) a **markdown-table schema** for the friction-classification synthesis surface in spec.md. + +--- + +## E1 — `DesiredStateProvenance.requested_ref` value conventions (extension) + +**Existing struct** (unchanged): `src/core/types.rs:553-560`: + +```rust +pub struct DesiredStateProvenance { + pub repository: String, // git URL or absolute filesystem path + pub requested_ref: String, // git ref name, SHA, or sentinel + pub layout_version: Option, + // ... other fields unchanged +} +``` + +**New value conventions for `requested_ref`** (introduced by spec/017): + +| Value shape | Meaning | When recorded | +|-------------|---------|---------------| +| `<40-char hex SHA>` | Specific git commit | (a) Init'd mode against a pinned ref. (b) Stateless mode when `--source-repo` is a clean git checkout at a known commit (per FR-013 + 2026-05-05 clarification Q3). | +| `` / `` | Symbolic git ref | Init'd mode when tracking a branch or tag; resolution-time SHA recorded separately in `repository_ref`. | +| `(stateless)` | Sentinel: non-git source | Stateless mode when `--source-repo` is a directory that is not a git working tree. | +| `(stateless+dirty)` | Sentinel: dirty git working tree | Stateless mode when `--source-repo` is a git working tree with uncommitted modifications, additions, deletions, or untracked files inside the source-repo path. | + +**Disambiguation guarantee**: sentinel values begin with the `(` character. Per `git check-ref-format`, parentheses are not valid in a git ref name. Therefore sentinels cannot be confused with real refs in any consumer code that pattern-matches against `requested_ref`. + +**`repository` value conventions for stateless invocations**: the canonical, symlink-resolved absolute path of the source-repo directory (from `std::fs::canonicalize`). This distinguishes path-based provenance from URL-based provenance unambiguously — git URLs always contain `:` (in `https://` or `user@host:`), while canonical filesystem paths begin with `/`. + +**Compatibility**: existing fixtures and tests that hardcode `requested_ref` values use either branch names (e.g., `master`, `main`) or SHA-shaped strings. None of them use `(`-prefixed values. Adding the sentinels is a non-breaking value-level extension. + +--- + +## E2 — Synthesis-table schema (markdown-rendered in `spec.md`) + +The friction-classification synthesis table is the validation iteration's evidence base. It lives directly in `spec.md` as a markdown table, populated during the Translation phase (Phase 2 of `/speckit.tasks`) and reviewed during the Synthesis phase. + +**Columns** (in order): + +| Column | Type | Purpose | +|--------|------|---------| +| `Friction` | short prose | One-line description of the gap encountered. | +| `Affected examples` | comma-separated example slugs (e.g., `01-caddy-whoami, 03-immich`) | Which examples surfaced this friction. | +| `Classification` | enum: `A` / `B` / `C` | A = amend-now (≥2 examples blocked, escalate to follow-up spec); B = workaround-with-doc (default); C = defer-to-spec-018 (acknowledged, not addressed). | +| `Rationale` | short prose | Why this classification. For A: name the structural impossibility. For B: name the workaround. For C: name why deferral is acceptable. | +| `Action` | one of: `Escalate to spec/` / `Documented in /README.md` / `Tracked in docs/follow-ups.md` | Next step. | + +**Validation rules**: +- Classification is exactly one letter from `A`, `B`, `C`. Values outside this set are spec drift. +- `A` rows MUST have `Action = "Escalate to spec/"` referencing a real (or to-be-created) follow-up spec number. The escalation triggers a separate spec/ branch and PR. +- `B` rows MUST have at least one example slug listed under `Affected examples`, and that example's `README.md` MUST document the workaround under a "Known limitations" heading. +- `C` rows MUST have `Action = "Tracked in docs/follow-ups.md"` and the corresponding bullet MUST exist in the follow-ups document by the time the slice merges. + +**Empty-table semantics**: an empty synthesis table (no rows) means no friction was encountered during translation, which is itself a finding — the spec/016 layout was sufficient as-shipped. SC-002 is trivially satisfied. + +**Schema is enforceable but not tested**: this is a markdown table in a spec, not a structured data file. The validation rules above are lint-style invariants; deviation is caught by review, not by code. If future iterations want machine-checkability, the table can be promoted to a YAML or TOML data file with a separate validator binary; out of scope here. + +--- + +## E3 — Per-example directory shape (already constrained by spec/016 parser) + +This is restated from `spec.md` FR-002 for completeness, not as a new model. Each `examples//` directory MUST have: + +```text +examples// +├── README.md # parser-tolerated at example root only +├── services/ +│ └── / +│ ├── service.yaml # OPTIONAL; required only if config-root != svc id +│ ├── quadlet/ # Container, Volume, Network, Pod +│ ├── systemd/ # Socket, Mount, Automount, Timer, Target, Path +│ └── config/ # mapped to /etc// +└── hosts/ + └── / + ├── host.yaml + └── / + ├── quadlet/ # drop-ins only (no new base units) + ├── systemd/ # drop-ins only + └── config/ # whole-file replacements +``` + +`` ∈ {`01-caddy-whoami`, `02-nextcloud`, `03-immich`, `04-traefik-authelia`, `05-observability`}. `` is per-example operator's choice (typically the headlining service name, e.g., `homelab.example.com` or simply `example`). + +The parser at `src/io/repo.rs` rejects any other directory or non-payload file inside `services//`; reserved-prefix subdirs (`_*`) are tolerated for documentation that needs to live inside a service directory. README at example root is fine; README inside `services//` is rejected. diff --git a/specs/017-real-world-validation/plan.md b/specs/017-real-world-validation/plan.md new file mode 100644 index 0000000..b86d301 --- /dev/null +++ b/specs/017-real-world-validation/plan.md @@ -0,0 +1,124 @@ +# Implementation Plan: Real-World Validation, Examples, and Stateless Source-Repo Mode + +**Branch**: `017-real-world-validation` | **Date**: 2026-05-05 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/017-real-world-validation/spec.md` + +## Summary + +Spec/017 closes the long-open follow-up at `docs/follow-ups.md` lines 87–99 (rich real-life examples + QnA-of-known-limitations) by publishing five real-world homelab examples under top-level `examples/`, and resolves a v2.0.0 regression where the four spec/016 example READMEs reference a `--source-repo` flag that has never been implemented. Adds the missing flag to `core-ops plan`, `apply`, and `explain` for stateless invocation against a filesystem path (no `core-ops init` required), with git-aware provenance recording (clean-checkout SHA / `(stateless+dirty)` / `(stateless)` sentinels). Removes the four superseded spec/016 example fixtures. Cleans up stale CLI documentation. + +Approach: reuse existing parser/planner/applier; add a CLI flag plumbed through `src/cli/{plan,apply,explain}.rs` that bypasses init'd-state lookup and resolves desired-state directly from the path. Reuse the existing shell-to-git pattern (already used in `src/io/repo.rs`, `src/cli/init.rs`) for ref detection. No new runtime dependencies. Per-example integration tests assert parser load + flag invocation; per-mode unit tests assert CLI argument parsing and provenance shape. + +## Technical Context + +**Language/Version**: Rust 2021 (existing toolchain) +**Primary Dependencies**: clap 4.5 (derive), serde 1.0, serde_yaml 0.9, serde_json 1.0, miette 7.2 (fancy diagnostics), thiserror 1.0, tempfile 3.10. **No new runtime dependencies.** Git invocation via `std::process::Command::new("git")` following the established pattern at `src/cli/init.rs:52`, `src/io/repo.rs:1312/1343/1372`, `src/io/release_governance.rs:367/440`, `src/cli/verification.rs:2068/2086/2090/2103`. +**Storage**: Existing `/var/lib/core-ops/status.json` for init'd mode (unchanged). Stateless plan writes nothing under `/var/lib/`; stateless apply writes audit + status with path-based provenance (see FR-013); stateless explain writes nothing. Operator-explicit `--audit-dir` honored across both modes (see FR-012 plus 2026-05-05 clarification). +**Testing**: `cargo test` and `cargo clippy --all-targets -- -D warnings` (project standard). New surfaces: per-example integration tests at `tests/integration/test_examples__.rs` (5 files), per-mode integration tests at `tests/integration/test_stateless_{plan,apply,explain}.rs` (3 files). Existing helper at `tests/integration/source_repo_support.rs:20` (`EXAMPLES_DIR = "specs/016-source-repository-layout/examples"`) is updated or replaced when the four spec/016 example dirs are removed. +**Target Platform**: Linux (Fedora CoreOS canonical, other systemd-based hosts expected to work). Stateless mode functions on any platform the existing parser supports. +**Project Type**: Rust CLI tool (single project, Option 1 in template). +**Performance Goals**: Per-example parse + plan latency ≤ 100ms each (existing parser overhead bounds). Aggregate added test-suite cost ≤ 500ms (8 new tests × ~50ms each). +**Constraints**: No new runtime dependencies; reuse existing git-CLI pattern. Stateless apply MUST NOT mutate init'd `desired_state.repository` / `desired_state.requested_ref` (FR-013, SC-009). All hostnames in `examples/` use RFC 2606 reserved domains; all IP literals use RFC 5737 documentation ranges. No file under `examples/` may reference operator-private values from `~/code/ulthar/repo/`. +**Scale/Scope**: 5 example directories (~30–60 files each, ~250 files total), 3 CLI commands modified, 1 module possibly added (`src/io/source_ref.rs` for git ref detection — TBD during implementation), 8 new tests, 4 spec/016 example dirs deleted, 4 stale-doc surfaces updated. + +## Constitution Check + +*GATE: passed before Phase 0 research. Re-evaluation after Phase 1 design records the same outcome — design did not introduce new violations.* + +- **Functional core / imperative shell** ✓ — Stateless mode adds a new entry-point boundary (path → in-memory `EvaluationInput`) but reuses the existing parser, planner, applier. No new core or side-effect surfaces. +- **Declarative state** ✓ — `EvaluationInput` representation unchanged; only the source-of-truth resolution differs. +- **Simplicity over cleverness** ✓ — One additional flag plus a code path that bypasses init'd-state lookup. No new abstractions; sentinel ref strings fit the existing `String` shape of `DesiredStateProvenance.requested_ref` (verified at `src/core/types.rs:555`). +- **Explicit effects / failures** ✓ — FR-014 (path validation), FR-015 (non-git supported), FR-013 (provenance shape) all explicit. Failure modes in Edge Cases. +- **Idempotence & convergence** ✓ — Stateless apply has identical convergence semantics to init'd apply against the same tree. +- **Open standards / native interfaces** ✓ — git, systemd, Quadlet preserved. Filesystem path-based source is the most native of all options. +- **Observability** ✓ — Plan output unchanged. Audit chain extends to stateless via path-based provenance. Sentinels (`(stateless)` / `(stateless+dirty)` / SHA) make the source mode visible in `core-ops status`. +- **Provenance & traceability** ✓ — FR-013 + 2026-05-05 clarification (Q3) record the most-precise traceable revision available: clean-checkout SHA when discoverable, dirty sentinel when working tree differs, non-git sentinel otherwise. +- **Safe defaults** ✓ — Default mode (no `--source-repo`) remains init'd. Stateless requires explicit flag. +- **Compatibility** ✓ — Existing init'd CLI surface unchanged; stateless flag is purely additive within each command. +- **Release version policy** — Adds CLI surface (`--source-repo` × 3 commands, FR-016 help text) ⇒ `minor` per spec/011 inferred-bump rules. Removes `specs/016-source-repository-layout/examples/` (4 directories). Whether the validator infers `major` from spec/example deletions is governed by `src/core/release_governance.rs` rules and `core-ops-release validate` is authoritative. Declared `release_intent: minor`; bump to `major` (and version `3.0.0`) if validator demands. +- **Release intent artifact** — `changes/017-real-world-validation.md` declares `release_intent: minor` (or `major` per validator). +- **Changelog discipline** — Re-render via `cargo run --bin core-ops-release -- changelog --write`; post-merge `core-ops-release promote` (shipped v2.1.0) handles the `[Unreleased]` → `[]` transition. +- **Rust validation gate** ✓ — `cargo test` and `cargo clippy --all-targets -- -D warnings` are required before merge. No exemption. +- **Test strategy** ✓ — Invariants: parser load succeeds for each example; init'd `desired_state.*` unchanged after stateless apply (SC-009). External behavior: `--source-repo` flag exit codes / output. Convergence: stateless apply idempotence covered. Failure: non-directory path, missing `--host`. +- **VM-backed scenario** — **Exemption recorded.** Stateless mode introduces a CLI entry-point variation but no new mutation classes — the actual host-state changes performed by stateless apply are identical to those performed by init'd apply against the same tree. Existing apply VM scenarios at `tests/fixtures/verification/scenarios/` remain authoritative for mutation semantics. Stateless mode is exercised at the unit + integration test layer. Exemption justification: spec.md Constitution Alignment section + the rationale here. Per Principle 10's exemption clause. +- **Regenerability** ✓ — Examples derivative of public upstream sources cited per-example. Synthesis table re-derivable from translation artifacts. + +**Constitution Check: PASS** with one explicit, narrow, machine-checkable VM-scenario exemption. + +## Project Structure + +### Documentation (this feature) + +```text +specs/017-real-world-validation/ +├── plan.md # This file +├── research.md # Phase 0 output (technical decision log) +├── data-model.md # Phase 1 output (provenance string conventions + synthesis-table schema) +├── quickstart.md # Phase 1 output (operator walkthrough) +├── contracts/ # Phase 1 output +│ ├── cli-flag.md # `--source-repo` CLI contract +│ └── synthesis-table.md # friction-classification table contract +├── checklists/ +│ └── requirements.md # written by /speckit.specify +├── spec.md # written by /speckit.specify +└── tasks.md # NEXT — produced by /speckit.tasks +``` + +### Source Code (repository root) + +```text +core-ops/ +├── examples/ # NEW — 5 real-world examples +│ ├── 01-caddy-whoami/ +│ │ ├── README.md +│ │ ├── services//{service.yaml?,quadlet/,systemd/?,config/?} +│ │ └── hosts//{host.yaml,/{quadlet/,systemd/?,config/?}} +│ ├── 02-nextcloud/ # multi-Container, intra-service network, persistent storage, TLS +│ ├── 03-immich/ # GPU device, multi-network, ML worker +│ ├── 04-traefik-authelia/ # ForwardAuth composition, cross-service network +│ └── 05-observability/ # Prometheus + Grafana + node-exporter + cadvisor; host-scope sidecars +├── src/ +│ ├── cli/ +│ │ ├── args.rs # MOD — add `source_repo: Option` to PlanArgs/ApplyArgs/ExplainArgs +│ │ ├── plan.rs # MOD — branch on source_repo, build EvaluationInput from path +│ │ ├── apply.rs # MOD — same; preserve init'd desired_state.* per FR-013 +│ │ └── explain.rs # MOD — same; pure-read +│ ├── core/ +│ │ └── types.rs # NO CODE CHANGE — DesiredStateProvenance.requested_ref is already String; sentinels fit. (Documentation in data-model.md.) +│ └── io/ +│ ├── source_ref.rs # NEW (or merged into existing repo.rs/init.rs) — `detect_provenance(path) -> { repository: AbsPath, requested_ref: String }` +│ └── repo.rs # NO CODE CHANGE expected +├── tests/integration/ +│ ├── test_examples_01_caddy_whoami.rs # NEW +│ ├── test_examples_02_nextcloud.rs # NEW +│ ├── test_examples_03_immich.rs # NEW +│ ├── test_examples_04_traefik_authelia.rs # NEW +│ ├── test_examples_05_observability.rs # NEW +│ ├── test_stateless_plan.rs # NEW — argument parsing, --host requirement, provenance shape, --audit-dir honored +│ ├── test_stateless_apply.rs # NEW — successful apply, init'd desired_state.* unchanged (SC-009), non-git path +│ ├── test_stateless_explain.rs # NEW — read-only invocation, no /var/lib writes +│ ├── source_repo_support.rs # MOD — EXAMPLES_DIR repointed at top-level `examples/`, or helper deleted if redundant +│ └── mod.rs # MOD — register the 8 new modules +├── docs/ +│ ├── follow-ups.md # MOD — remove the now-shipped Init Command paragraphs; preserve still-valid bullets +│ └── development.md # MOD — line 228 example updated (use --source-repo or init+plan) +├── infra/repo/ +│ └── README.md # MOD — lines 32, 35, 38 updated +├── specs/016-source-repository-layout/ +│ ├── examples/ # DELETED (4 subdirs removed) +│ ├── spec.md # MOD — FR-023 carries supersession note pointing at top-level examples/ +│ └── tasks.md # MOD — T101–T104 annotated `[X] [SUPERSEDED by spec/017]` +├── README.md # MOD — add `## Real-World Examples` section +├── Cargo.toml # MOD — version 2.1.1 → 2.2.0 (or 3.0.0 if validator demands) +├── CHANGELOG.md # MOD — re-rendered via core-ops-release changelog --write +└── changes/ + └── 017-real-world-validation.md # NEW — release fragment, release_intent: minor (or major) +``` + +**Structure Decision**: Single Rust project (template Option 1). Existing layout preserved; new artifacts under `examples/`, `tests/integration/test_examples_*.rs`, `tests/integration/test_stateless_*.rs`, plus per-spec docs under `specs/017-real-world-validation/`. + +## Complexity Tracking + +> Filled only if Constitution Check has violations to justify. + +No violations to justify. The single exemption (no new VM-backed scenario) is narrow, explicit, and recorded above; existing apply VM scenarios remain authoritative for the mutation classes that stateless apply exercises. diff --git a/specs/017-real-world-validation/quickstart.md b/specs/017-real-world-validation/quickstart.md new file mode 100644 index 0000000..2626820 --- /dev/null +++ b/specs/017-real-world-validation/quickstart.md @@ -0,0 +1,122 @@ +# Quickstart: Real-World Examples (operator walkthrough) + +**Phase**: 1 | **Spec**: [spec.md](./spec.md) | **Plan**: [plan.md](./plan.md) + +This is the operator-facing walkthrough that the post-implementation `examples//README.md` files will mirror, and which the root `README.md` "Real-World Examples" section will link to. It is a verification artifact for `/speckit.plan`: if these steps cannot be followed end-to-end against the implemented feature, the implementation is not complete. + +--- + +## Prerequisites + +- `core-ops` v2.2.0 or later (this slice). Stateless `--source-repo` requires the new flag. +- A clone of this repository, OR a downloaded `examples//` subtree. +- `git` available on `$PATH` (used for git-aware provenance detection; non-git directories also work). +- No prior `core-ops init` required. + +--- + +## Five-minute walkthrough + +### Step 1 — Browse the real-world examples + +```sh +git clone https://github.com/outergod/core-ops.git +cd core-ops +ls examples/ +``` + +You see five subdirectories, each documenting one widely-deployed homelab pattern: + +| Slug | Setup | Pressure axis | +|------|-------|---------------| +| `01-caddy-whoami` | Caddy + whoami | Single-Container baseline; default config-root | +| `02-nextcloud` | Nextcloud + Postgres + Redis + Traefik | Multi-Container, intra-service network, persistent storage | +| `03-immich` | Immich + Postgres + Redis + ML + Traefik | GPU device, multi-network, ML worker | +| `04-traefik-authelia` | Traefik + Authelia + protected backend | Cross-service ForwardAuth composition | +| `05-observability` | Prometheus + Grafana + node-exporter + cadvisor | Host-scope sidecars, scrape-config templating | + +Each example's `README.md` cites its public upstream design sources, lists the services involved, and documents any known limitations encountered during translation. + +### Step 2 — Plan an example without committing to it + +```sh +core-ops plan --source-repo examples/01-caddy-whoami --host example +``` + +This invocation: +- Reads desired state from the directory tree under `examples/01-caddy-whoami/`. +- Selects the host overlay under `hosts/example/`. +- Computes the reconciliation plan against the current host state. +- Writes nothing to `/var/lib/core-ops/`. +- Does not require — and does not consult — any prior `core-ops init`. + +Expected output: a plan listing the units that would be installed, started, or modified. Exit code 0. + +### Step 3 — Inspect a single object + +```sh +core-ops explain --source-repo examples/01-caddy-whoami --host example caddy.container +``` + +Produces an authoritative explanation of how the reconciliation model interprets the `caddy.container` Quadlet inside the example. Read-only; writes nothing. + +### Step 4 — Switch to a different example without `--force` + +```sh +core-ops plan --source-repo examples/05-observability --host example +``` + +This succeeds with no teardown step. Stateless invocations against a different example are independent and do not conflict. + +### Step 5 — (Optional) Apply against a real host + +> ⚠ **`apply` mutates host state.** Read the plan from Step 2 before running. + +```sh +core-ops apply --source-repo examples/01-caddy-whoami --host example +``` + +If the operator has already run `core-ops init ` against a different repository, that init'd state is left untouched by this stateless apply. Audit records and the status snapshot record the path-based provenance: + +```sh +core-ops status +``` + +Provenance shows: +- `desired_state.repository` = ``. +- `desired_state.requested_ref` = the git commit SHA if the path is a clean git checkout, or `(stateless+dirty)` for a working tree with uncommitted changes, or `(stateless)` for a non-git directory. + +### Step 6 — Author your own setup using an example as a scaffold + +Stateless mode is the inner-loop authoring substrate: + +```sh +cp -r examples/02-nextcloud ~/my-nextcloud +# Edit ~/my-nextcloud/hosts//host.yaml and service configs +core-ops plan --source-repo ~/my-nextcloud --host +``` + +Iterate without `git init`, without `core-ops init`, without `--force`. When ready for long-lived tracking: + +```sh +cd ~/my-nextcloud +git init && git add . && git commit -m "initial homelab config" +core-ops init ~/my-nextcloud main +core-ops plan # now sources from persisted init'd state +``` + +### Step 7 — Reading the synthesis table + +If the example's README mentions a "known limitation" you also encounter, look up the friction in `specs/017-real-world-validation/spec.md` — the synthesis table classifies every encountered friction as **A** (amend-now, escalated to a follow-up spec), **B** (workaround-with-doc, with the workaround inlined in the example's README), or **C** (defer-to-spec-018, tracked in `docs/follow-ups.md`). + +--- + +## Acceptance check (operator self-verification) + +- [ ] `core-ops plan --source-repo examples/01-caddy-whoami --host example` exits 0 and emits a non-empty plan, with no prior `core-ops init` and no writes to `/var/lib/core-ops/`. (SC-001, SC-008) +- [ ] Switching from one example to another via re-invocation does not require `--force`. (US1 AC2) +- [ ] On a host with a prior `core-ops init`, running stateless `apply --source-repo` does not mutate the persisted `desired_state.repository` / `desired_state.requested_ref` of the init'd configuration. (SC-009) +- [ ] `core-ops status` after a stateless apply shows path-based provenance (absolute path under `desired_state.repository`, SHA or sentinel under `desired_state.requested_ref`). +- [ ] All five examples parse cleanly via `cargo test` per-example integration tests. (SC-003) +- [ ] Each example's `README.md` cites at least one public upstream source URL. (SC-006) +- [ ] No file under `examples/` mentions any operator-private value (`not.one`, `ulthar`, `192.168.1.2`, GCloud DNS markers). (SC-005) diff --git a/specs/017-real-world-validation/research.md b/specs/017-real-world-validation/research.md new file mode 100644 index 0000000..0a7e8b7 --- /dev/null +++ b/specs/017-real-world-validation/research.md @@ -0,0 +1,179 @@ +# Research: Real-World Validation, Examples, and Stateless Source-Repo Mode + +**Phase**: 0 | **Spec**: [spec.md](./spec.md) | **Plan**: [plan.md](./plan.md) + +This document records the technical decisions for spec/017's implementation phase. The five-setup roster, synthesis-table classification semantics, and stateless-mode user-visible behavior are spec-level decisions already locked in `spec.md` (including the 2026-05-05 clarifications section). This document covers the *implementation* decisions: how to detect git provenance, how to wire the new flag, how to handle the existing helper that loads spec/016 examples, and which public upstream sources back each setup's translation. + +--- + +## D1 — Git ref detection strategy for stateless mode + +**Decision**: Shell out to the system `git` binary via `std::process::Command::new("git")` for git presence and ref detection. Reuse the established pattern already present in `src/cli/init.rs:52`, `src/io/repo.rs:1312/1343/1372`, `src/io/release_governance.rs:367/440`, and `src/cli/verification.rs:2068`+. + +**Rationale**: +- No new runtime dependency (Cargo.lock currently has no `git2` entry). +- Consistent with how every other git interaction in the codebase already works. +- Sufficient for the three states the spec requires: clean checkout, dirty working tree, non-git directory. +- The git CLI is already an implicit runtime requirement of CoreOps in init'd mode. + +**Alternatives considered**: +- **`git2` crate (libgit2 bindings)**: Rejected. Adds a non-trivial dependency for marginal gain. We need at most: detect `.git`, run `rev-parse HEAD`, run `status --porcelain --` against a path. CLI invocation handles all three trivially. +- **Pure Rust git parsing**: Rejected as gratuitously complex. Reading `.git/HEAD` and walking refs ourselves trades external-process cost for parsing complexity that has to track git internals. + +**Algorithm** (to be implemented in `src/io/source_ref.rs` or merged into `repo.rs`): + +```text +detect_provenance(path: &Path) -> { repository: AbsPath, requested_ref: String }: + 1. repository = canonicalize(path). + 2. Check `git -C path rev-parse --is-inside-work-tree` exits 0 and prints "true". + - If not: return { repository, requested_ref: "(stateless)" }. + 3. Check `git -C path status --porcelain -- .` for non-empty output. + - Non-empty (any modified, added, deleted, or untracked file under path): + return { repository, requested_ref: "(stateless+dirty)" }. + 4. Capture `git -C path rev-parse HEAD` SHA (40-char hex). + - Return { repository, requested_ref: }. + 5. On any subprocess error: log via miette diagnostic, fall back to "(stateless)". +``` + +**Edge cases the implementation must handle**: +- Path is a subdirectory of a git repository — `git -C path` correctly resolves the enclosing `.git/`. The status check is scoped to `-- .` so changes in sibling directories don't pollute the cleanliness check for our path. +- Detached HEAD — `rev-parse HEAD` returns the SHA, no special handling needed. +- Shallow clone — SHA is still meaningful, no special handling. +- Submodules under the path — out of scope for v1; submodule changes may incorrectly mark "clean" since `status --porcelain --` doesn't recurse into submodule working trees by default. Document as a known limitation if encountered during translation. + +**Test surface**: unit-level coverage for each branch using `tempfile`-created git repos; an integration test that exercises a real example under each state (clean / dirty / non-git). + +--- + +## D2 — `DesiredStateProvenance` schema compatibility + +**Decision**: No struct changes required. The existing `DesiredStateProvenance.requested_ref: String` (`src/core/types.rs:555`) accepts the sentinel strings `(stateless)` and `(stateless+dirty)` alongside SHA values without any schema modification. + +**Rationale**: +- `requested_ref` is `String` (not a constrained enum); it already holds branch names, tag names, and SHAs without further validation. +- Adding sentinel strings is a *value-level* convention, not a *type-level* change. No serde annotations need updating. +- Test fixtures that match against `requested_ref` need to be reviewed for hardcoded values — they likely use commit-SHA-shaped strings or branch names, which the sentinels do not collide with (sentinels start with `(`, which is not valid in a git ref name per `git check-ref-format`). + +**Verification**: +- `git check-ref-format -- "(stateless)"` exits non-zero — confirming the sentinel cannot collide with a real ref. +- The same is true for `(stateless+dirty)` — parens and `+` are reserved. + +**Document the convention** in `data-model.md` so future readers and serde consumers understand sentinel semantics. + +--- + +## D3 — Spec/016 example removal impact + +**Decision**: Delete the four `specs/016-source-repository-layout/examples/{01-minimal-single-service, 02-variant-config-root, 03-multi-unit-with-dropins, 04-host-overlay}/` directories. Update `tests/integration/source_repo_support.rs:20` (the only code-level consumer) — either repoint `EXAMPLES_DIR` at top-level `examples/` (if any test still uses it) or delete the helper entirely if all consumers are repointed at the new spec/017 examples. + +**Rationale**: +- One code consumer found via grep; the impact is bounded. +- Spec/016 spec.md FR-023 is amended with a supersession note; spec/016 tasks.md T101–T104 carry `[X] [SUPERSEDED by spec/017]` annotations that preserve the historical record without rewriting it. +- The four example dirs were layout-shape fixtures; their pedagogical role is now filled by the five real-world examples. + +**Concrete changes**: +- `git rm -r specs/016-source-repository-layout/examples/{01-*,02-*,03-*,04-*}`. +- Edit `tests/integration/source_repo_support.rs:20` — repoint or remove the `EXAMPLES_DIR` constant; update or remove any tests that referenced the four spec/016 example slugs. +- Edit `specs/016-source-repository-layout/spec.md`: append a supersession note to FR-023. +- Edit `specs/016-source-repository-layout/tasks.md`: append `[SUPERSEDED by spec/017]` to T101–T104. + +**Alternatives considered**: +- **Forwarding marker** (replace each dir with a `MOVED.md`): Rejected as low-value clutter. The spec.md supersession note + git history is sufficient for anyone looking back. +- **Keep spec/016 examples + add spec/017 examples**: Rejected. Two example sets in two locations is the kind of duplication the constitution Principle 3 calls out. + +--- + +## D4 — Stale CLI documentation cleanup + +**Decision**: Update three doc surfaces; leave historical spec quickstarts as time-capsules. + +| Surface | Action | +|--------|-------| +| `docs/follow-ups.md` lines 7–14 ("Init Command" paragraphs about `--repo`/`--rev` removal) | Remove the now-shipped paragraphs. Preserve still-valid follow-ups in the same section: `quadlet-dir`/`systemd-unit-dir`/`state-file`/`audit-dir` arg persistence, `rollback-plan-only` re-homing, `--reinitialize` UX. | +| `docs/development.md` line 228 (`CORE_OPS_HOST= core-ops plan --repo --rev `) | Replace with `core-ops plan --source-repo --host ` (stateless example) plus a note about the init'd-mode workflow. | +| `infra/repo/README.md` lines 32, 35, 38 (`core-ops plan --repo file:///… --rev demo-uat-vN`) | Update each to use `--source-repo` against a checkout of the demo repo. | +| `specs/001-gitops-quadlet-controller/quickstart.md:18`, `specs/007-explainable-reconcile-interface/quickstart.md:73` | **No change.** Historical spec quickstarts are time-capsules of the spec at the time it was written. | + +**Rationale**: Spec quickstarts are versioned artifacts whose purpose is to document the state of the system at the time the spec was authored. Updating them retroactively would erase historical context. Operational docs (`docs/`, `infra/`) describe current behavior and must reflect today's CLI. + +--- + +## D5 — License hygiene for upstream-derived examples + +**Decision**: Each example's Quadlet units are written from scratch as the implementer's interpretation of the upstream design intent. Upstream `compose.yml` / configuration files cited in the README as design references but not copied verbatim. + +**Rationale**: +- Avoids inheriting upstream license terms (Nextcloud, Immich, Authelia, and most observability tooling are AGPL/MIT/Apache mixes; verbatim copy of YAML blocks may carry license obligations into core-ops AGPLv3+). +- The translation is the deliverable: showing how a real workload becomes a spec/016-conformant repository. Verbatim copying would defeat the validation purpose (it would be a port, not a translation). +- Upstream attribution is preserved in each example's README under a "Sources" heading. + +**Public upstream sources** (URLs to be fetched and verified during the translation phase, one task per example in `/speckit.tasks`): + +| Slug | Primary upstream sources | +|------|--------------------------| +| `01-caddy-whoami` | Caddy official documentation (`caddyserver.com/docs/quick-starts`); `traefik/whoami` container README. | +| `02-nextcloud` | Nextcloud's "Docker Compose with reverse proxy" community example (NOT the All-In-One container, which manages its own sub-containers via Docker socket and is incompatible with external orchestration). | +| `03-immich` | `immich-app/immich` repository's `docker/docker-compose.yml`. | +| `04-traefik-authelia` | Authelia's official Traefik integration documentation (`authelia.com/integration/proxies/traefik/`). | +| `05-observability` | Prometheus, Grafana, node-exporter, cadvisor official docker-compose examples; `prometheus/node_exporter` README for host-scope bind mounts. | + +The implementer fetches these during Phase 2 (Translation tasks) and embeds canonical URLs in each example's `README.md`. + +--- + +## D6 — Stateless mode and `--audit-dir` interaction + +**Decision**: Stateless `plan` and `apply` honor an explicit `--audit-dir ` flag exactly as init'd mode does. Stateless `explain` is pure-read; it does not write audit. Pre-locked by 2026-05-05 clarification (Q4 in spec.md). + +**Rationale**: documented in spec.md Clarifications. Implementation impact: the existing audit-dir handling in `src/cli/{plan,apply}.rs` requires no special-case for stateless mode — the same code path writes to the operator-specified directory regardless of source mode. The `/var/lib/` separation is enforced by *not* writing the persisted controller state, not by suppressing `--audit-dir`. + +--- + +## D7 — Stateless mode argument validation + +**Decision**: Argument validation rules for stateless invocations: +1. `--source-repo ` MAY appear on `plan`, `apply`, `explain`. Not on `init`, `agent`, `status`, or `skill`. +2. `--source-repo` requires `--host ` to be present in the same invocation. Validation: clap-level `requires` constraint. +3. `--source-repo` is mutually exclusive *within an invocation* with any future `--repo` / `--rev` resurrection (currently moot; no such flag exists). +4. The path must exist and be a directory. Implementation: `tokio::fs::metadata` (sync equivalent) before parser invocation; emit `miette` diagnostic with `.help("path must be an existing directory containing a spec/016 layout")` on failure. +5. The path is canonicalized (resolves symlinks, makes absolute) before being passed to the parser, ensuring `repository` provenance is reproducible. + +**Rationale**: clap-derive supports `requires_ifs` / `conflicts_with` declaratively; minimum custom validation logic. Errors surface via the existing `miette` diagnostic chain. + +--- + +## D8 — Test coverage strategy + +**Decision**: Three layers of test coverage: + +1. **Unit tests** (in `src/cli/args.rs`'s `#[cfg(test)]` module): clap argument parsing — `--source-repo` accepted on plan/apply/explain, rejected elsewhere; `--host` requirement; mutual-exclusion edge cases. +2. **Integration tests** (`tests/integration/test_stateless_{plan,apply,explain}.rs`): full command invocation via `assert_cmd` or `Command::cargo_bin`, exercising real source-repo loading from a `tempfile::TempDir`-built spec/016 layout. Cover (a) clean git checkout → SHA provenance, (b) dirty git working tree → `(stateless+dirty)` provenance, (c) non-git directory → `(stateless)` provenance, (d) missing `--host` error, (e) non-directory path error, (f) stateless apply on host with init'd state — assert `desired_state.repository`/`requested_ref` byte-identical pre/post (SC-009). +3. **Per-example integration tests** (`tests/integration/test_examples__.rs`): for each of the five examples, parse via `Repository::load`, assert resolved service catalog contains expected service ids, assert example root carries `README.md`, run `core-ops plan --source-repo ` via `assert_cmd` and assert exit 0 plus non-empty stdout. + +**CI cost**: 8 new tests × ~50ms median = ~400ms aggregate added to `cargo test`. Acceptable. + +**No new VM-backed scenarios** required (exemption recorded; see plan.md Constitution Check). + +--- + +## D9 — Release governance bump baseline + +**Decision**: Spec language baseline updated from "2.1.0 → 2.2.0" to "current Cargo.toml → next minor". Master is currently at v2.1.1 (PR #31's auto-promote). The fragment declares `release_intent: minor`. The `core-ops-release validate --base-ref master` step is authoritative on whether the spec/016 example deletions force a `major` bump per the inferred-bump rules. + +**Rationale**: Baseline arithmetic is a moving target during a feature branch's life; the spec/011 governance machinery handles it. The fragment's `release_intent` field is what matters; the `--base-ref` validator computes the required bump from the actual diff. + +**Implementation note**: When updating `Cargo.toml`, set the version to `2.2.0` *or* run `cargo run --bin core-ops-release -- validate --base-ref master` first to see what the validator infers, then declare accordingly. + +--- + +## D10 — Spec/016 examples and the migration script + +**Decision**: `scripts/migrate-legacy-source-repo.sh` is independent of the spec/016 example fixtures and is unaffected by their removal. The script transforms a legacy-layout source repo into the formalized layout; it does not consume the example fixtures as inputs. + +**Verification**: Read `scripts/migrate-legacy-source-repo.sh` during the implementation phase to confirm no example-path references; document in `tasks.md` if any are found and update accordingly. + +--- + +## Open implementation questions deferred to /speckit.tasks + +None. All technical decisions above resolve cleanly. The implementation tasks emitted by `/speckit.tasks` will follow the project structure in plan.md without requiring further clarification rounds. diff --git a/specs/017-real-world-validation/spec.md b/specs/017-real-world-validation/spec.md new file mode 100644 index 0000000..f1d9f75 --- /dev/null +++ b/specs/017-real-world-validation/spec.md @@ -0,0 +1,220 @@ +# Feature Specification: Real-World Validation, Examples, and Stateless Source-Repo Mode + +**Feature Branch**: `017-real-world-validation` +**Created**: 2026-05-05 +**Status**: Draft +**Input**: User description: "Validate the spec/016 source-repository layout against five real-world homelab setups, produce documented examples under top-level `examples/`, and classify any friction encountered as amendment-now / workaround-with-doc / defer-to-spec-018. Closes the 'rich, documented real-life examples' and 'QnA for known limitations' bullets in docs/follow-ups.md lines 87–99." + +> **Scope expansion (locked 2026-05-05)**: the validation iteration's first finding is that `core-ops plan` and `core-ops apply` cannot be invoked against a source-repository directory without first running `core-ops init` (which writes persistent state to `/var/lib/core-ops/`). This blocks all five planned examples plus the four spec/016 example "Try it" snippets that ship referencing a non-existent `--source-repo` flag in v2.0.0. The slice is therefore expanded to: +> +> 1. Add a stateless `--source-repo ` flag to `plan` and `apply` for one-off invocations against a filesystem path, bypassing `init` and the persisted controller configuration. +> 2. Delete the four spec/016 in-tree example fixtures — superseded by this slice's real-world examples under top-level `examples/`. +> 3. Clean up stale CLI documentation that references the long-removed `--repo` / `--rev` flags. +> +> Items (1) and (2) cross the original "validation, not feature" framing; the trade is intentional and locked. + +## Clarifications + +### Session 2026-05-05 + +- Q: Should stateless apply require an explicit safety flag (e.g., `--confirm-stateless`) beyond `--source-repo`? → A: No. Standard apply semantics; the explicit `--source-repo` flag plus path-based provenance in audit + status snapshots is sufficient explicit intent. No additional confirmation ceremony. +- Q: On a host where `core-ops init` has already been run, can stateless `--source-repo` invocations execute, or do they require teardown of the init'd state first? → A: Coexistence. Stateless invocations always execute regardless of init'd state. Init'd `desired_state.repository` / `desired_state.requested_ref` are never mutated by stateless; the two modes coexist on the same host, distinguished only in audit/provenance. +- Q: When `--source-repo` points at a git working tree, what should `desired_state.requested_ref` record in audit + status snapshots? → A: Detected git commit when the path is a clean git checkout at a known commit; `(stateless+dirty)` sentinel when the working tree has uncommitted changes; `(stateless)` sentinel when the path is not a git repository at all. Honors Principle 12 (behavior is traceable to the desired-state revision actually applied) without misrepresenting state. +- Q: Should stateless `plan` honor an explicit `--audit-dir` flag (write the plan audit record to the operator-specified destination) or ignore it under the "pure read-and-render" framing of FR-012? → A: Honor `--audit-dir` when explicitly provided. The operator passing the flag is explicit consent per Principle 4. The "no `/var/lib/` writes" guarantee is preserved separately. Init'd plan and stateless plan behave identically with respect to `--audit-dir`. +- Q: Should `core-ops explain` also accept `--source-repo`, or stay init-only alongside `agent` and `status`? → A: Add `--source-repo` to `explain` in this slice. Symmetric with stateless `plan`/`apply` for the read-only inspection use case (debugging a working tree before commit, evaluating an arbitrary path's expansion). Pure-read so persistence semantics are simpler than apply. `agent` and `status` remain init-only by architectural fit (timer-driven; reads persisted state). + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - First-time operator runs a real example with one command (Priority: P1) + +A newcomer who has just installed CoreOps visits the project README, finds the "Real-World Examples" section, picks a setup that resembles the workload they want to run, clones the repo (or downloads the example), and runs **a single command** that produces a populated `core-ops plan` against that example — without `init`, without writing persisted state to `/var/lib/`, without committing to anything. + +**Why this priority**: This is the entry path for adoption. The cost of evaluating an example must be ~10 seconds (one command), not 30 minutes (clone, init, force, switch examples, force again). Every minute of friction at first impression bleeds adoption. + +**Independent Test**: A reviewer who has never seen CoreOps before clones the repo, reads `examples//README.md`, runs the suggested `core-ops plan --source-repo examples/ --host ` invocation, and gets a non-empty plan with no parser errors. No `init`, no state files written, no `--force` needed to pivot to a different example. + +**Acceptance Scenarios**: + +1. **Given** a fresh CoreOps install (no prior `init`) and the published `examples/01-caddy-whoami/`, **When** the operator runs `core-ops plan --source-repo examples/01-caddy-whoami --host example`, **Then** the command exits 0, emits a plan listing the Caddy container unit and any associated network/volume, and writes nothing to `/var/lib/core-ops/`. +2. **Given** the operator has just evaluated `examples/01-caddy-whoami` via stateless plan, **When** they pivot to `examples/02-nextcloud` and run the same flag against it, **Then** the command succeeds without `--force`, without prior teardown, and without surfacing any "controller configuration already exists" error. +3. **Given** any of the five published examples, **When** the operator reads the example's README, **Then** they see (a) a one-line setup description, (b) at least one public upstream source cited for the design, (c) a service-by-service intent table, (d) any known limitations encountered during translation, (e) a "Try it" code block using the stateless `--source-repo` invocation as a single-command demonstration. + +--- + +### User Story 2 - Operator authors and iterates on their own setup without committing first (Priority: P2) + +An operator with an existing homelab wants to migrate to CoreOps. They scan the `examples/` roster for an analog, copy the closest example to a working directory, and iterate locally — running `core-ops plan --source-repo ./my-source-repo --host myhost` repeatedly as they edit files, **before** they have a git repo, **before** they've committed anything, **before** they've decided whether to adopt CoreOps. When ready, they `git init` and `core-ops init` to switch into long-lived tracking mode. + +**Why this priority**: Authoring requires fast iteration. Forcing `git commit` + `core-ops init --force` between every experiment kills the inner loop. The stateless mode is the authoring substrate; the init-tracked mode is the production substrate. Both must coexist. + +**Independent Test**: Pick any of the five examples, copy its directory tree to a non-git scratch location, change the host name and one service config, and verify `core-ops plan --source-repo --host ` succeeds without git presence and without writing persisted state. + +**Acceptance Scenarios**: + +1. **Given** `examples/02-nextcloud/`, **When** the operator copies it to `~/my-source-repo/` (no `git init`), renames the host directory from `hosts/example/` to `hosts/myhost/`, and updates `host.yaml` accordingly, **Then** `core-ops plan --source-repo ~/my-source-repo --host myhost` succeeds. +2. **Given** an operator who encounters a friction documented as "workaround-with-doc" in this spec's synthesis table, **When** they apply the documented workaround in their stateless source repo, **Then** the workaround unblocks parsing without further escape hatches. +3. **Given** an operator who has finished iterating with stateless mode and now wants to commit, **When** they `git init && git commit && core-ops init `, **Then** subsequent `core-ops plan` (without `--source-repo`) sources from the persisted state and produces an equivalent plan to the last stateless run against the same tree. + +--- + +### User Story 3 - Stateless apply for one-off convergence and recovery (Priority: P2) + +An operator needs to converge a host to a desired state expressed in a local directory without committing to long-lived `init`-tracked operation. Examples: a recovery scenario where the persisted `init` state is corrupt and the operator has a known-good source-repo checkout; a one-off bootstrap where the host should converge once from a directory and then be re-init'd with a different repo; a CI/test scenario that must apply a synthetic source repo to a VM without persistent state. + +**Why this priority**: Stateless plan without stateless apply is half-finished — the operator can preview but cannot execute. This is P2 (not P1) because plan is the more frequent first-impression surface, but apply must follow in the same slice or the feature is not coherent. + +**Independent Test**: On a host with no prior `core-ops init`, run `core-ops apply --source-repo --host `. Verify (a) the apply succeeds and produces audit records, (b) `/var/lib/core-ops/` controller-configuration state is not written by the stateless invocation (or is written with a sentinel marking the stateless source), (c) a subsequent `core-ops status` reports the applied state with provenance pointing to the path-based source rather than a git URL. + +**Acceptance Scenarios**: + +1. **Given** a fresh host with no prior `init`, **When** the operator runs `core-ops apply --source-repo /path/to/repo --host edge-01`, **Then** apply succeeds, audit records are produced, and host state is converged. +2. **Given** apply has run statelessly, **When** the operator runs `core-ops status`, **Then** status reports the applied state with provenance referencing the path-based source clearly distinguished from a git-URL-based source. +3. **Given** the operator subsequently runs `core-ops init --force` and then `core-ops plan`, **Then** plan produces a normal init'd-mode plan without confusion from the prior stateless apply. + +--- + +### User Story 4 - Future spec author grounds amendments in validation evidence (Priority: P3) + +A future spec author considering an amendment to spec/016 (e.g., adding a templating layer, relaxing the payload-kind whitelist, introducing a new reserved-prefix convention) consults the synthesis table. They identify which gaps are blocking ≥2 examples (escalation candidates), which are workaround-with-doc, and which are deferred. The decision to amend is grounded in evidence, not speculation. + +**Why this priority**: Spec/016 just shipped at v2.0.0. Future amendments must clear a higher bar than informal anecdote. P3 because it serves a future workflow. + +**Independent Test**: Open this spec's synthesis table, count A/B/C classifications, verify each cited friction names the affected examples plus a rationale, and confirm any A-classified friction is escalated to a follow-up spec. + +**Acceptance Scenarios**: + +1. **Given** the published synthesis table, **When** a future spec author asks "is friction X worth amending?", **Then** the table answers with affected-example count, classification, and rationale, sufficient to decide without re-running the validation iteration. + +--- + +### Edge Cases + +- **Stateless invocation on a host that already has init'd controller configuration**: The stateless `--source-repo` invocation MUST NOT mutate the persisted `init`-tracked configuration. It MAY produce its own provenance record (audit, status snapshot) but the `desired_state.repository` / `desired_state.requested_ref` of the init'd configuration MUST remain unchanged. After the stateless run, a subsequent `core-ops plan` (without flag) MUST behave as before the stateless run — no detached state, no rollback ambiguity. +- **Stateless apply with no `--host` flag**: There is no persisted host identity for stateless mode to fall back on; the command MUST fail with a clear message naming `--host` as required. +- **Path that is not a directory**: Stateless mode MUST fail with a clear message; no fallback to interpreting the path as a git URL. +- **Path that is a directory but not spec/016-conformant**: Parser errors surface as today; the stateless flag does not change parser strictness. +- **Real-world setup cannot be expressed without a non-payload file inside `services//`** (parser rejects per `src/io/repo.rs`). → Reserved-prefix subdir (`_*`) is the documented escape; if even that doesn't suffice, classify as A (amend-now) and escalate to a follow-up spec. +- **Upstream's compose.yml or example config is licensed incompatibly with this repository's AGPLv3+**. → Derive own Quadlet equivalents inspired by the upstream design; cite upstream as the reference; do not copy YAML verbatim. +- **A setup's secrets pattern requires external host-managed state** (e.g., a sops/age credstore, a `LoadCredentialEncrypted` source). → Document the host-side prerequisite in the example README; commit only stub references with placeholder names; never commit fake or real secrets. +- **A real-world setup uses container images hosted on a registry with strict pull rate limits**. → Integration tests parse only; the parser does not pull images. +- **An asciinema-style demo is requested mid-iteration**. → Out of scope; the asciinema follow-up at `docs/follow-ups.md` line 109 remains open. + +## Requirements *(mandatory)* + +### Functional Requirements + +#### Real-world examples + +- **FR-001**: The repository MUST publish exactly five examples under `examples//` with slugs `01-caddy-whoami`, `02-nextcloud`, `03-immich`, `04-traefik-authelia`, `05-observability`. Each slug pairs a sequence number with a kebab-case identifier of the headlining service or theme. +- **FR-002**: Each example MUST conform to the spec/016 source-repository layout: a root `README.md`; a `services/` directory containing one or more service subdirectories; a `hosts/` directory containing at least one example host. Per-service directories MUST contain only `service.yaml` (optional) and the payload-kind subdirectories `quadlet/`, `systemd/`, `config/` accepted by the parser. +- **FR-003**: Each example MUST parse cleanly via `core-ops plan --source-repo examples/ --host ` — exit 0, non-empty plan output — without manual edits and without prior `core-ops init`. +- **FR-004**: Each example's `README.md` MUST contain (a) a one-line setup description, (b) at least one public upstream source cited as the design reference, (c) a service-by-service intent table, (d) any known limitations encountered during translation, (e) an explicit declaration that any CLI-output snippets shown are illustrative and not snapshot-tested, (f) a "Try it" code block using the stateless `--source-repo` invocation as a single-command demonstration, (g) a "Scaffold for your own setup" section with explicit `cp -r examples/ ~/my-setup` instructions (per quickstart.md Step 6) supporting the US2 operator-as-author workflow. +- **FR-005**: This spec MUST publish a synthesis table listing every friction encountered during translation. Each entry MUST carry: friction description, affected example list, classification A/B/C, rationale, and recommended action. Classification semantics: + - **A — amend-now**: ≥2 examples blocked by the same gap; the iteration escalates the gap to a follow-up spec rather than landing parser changes inline. + - **B — workaround-with-doc**: friction is real but addressable via reserved-prefix subdirs, host-side preparation, drop-in tricks, or other documented patterns. The workaround is documented in the affected example's README. + - **C — defer-to-spec-018**: friction is acknowledged but neither blocking nor addressable in this slice; named for a future iteration. +- **FR-006**: The repository MUST contain per-example integration tests at `tests/integration/test_examples__.rs` that load each example through the parser and assert (a) parse succeeds, (b) the resolved service catalog contains the expected service identifiers, (c) the example root carries a `README.md`, (d) `core-ops plan --source-repo --host ` exits 0 (gated on the `--source-repo` flag landing in this slice). New tests MUST be registered in `tests/integration/mod.rs`. +- **FR-007**: The root `README.md` MUST add a `## Real-World Examples` section between `## First Interaction` and `## Installation (Current Phase)`, linking each of the five examples with a one-line purpose statement. +- **FR-008**: All hostnames published in `examples/` MUST use RFC 2606 reserved domains (`*.example.com`, `*.example.org`, `*.test`, `*.invalid`, `*.localhost`). All IP literals MUST use RFC 5737 documentation ranges (`192.0.2.0/24`, `198.51.100.0/24`, `203.0.113.0/24`). +- **FR-009**: No file under `examples/` MAY contain operator-private values: the operator's real domain (`*.not.one`), real IP `192.168.1.2`, real GCloud DNS challenge credentials, or any other operational data sourced from the operator's private homelab repository (`~/code/ulthar/repo/`). Ulthar is consulted as a research data point only. + +#### Stateless `--source-repo` CLI mode + +- **FR-010**: `core-ops plan` MUST accept a new `--source-repo ` flag that selects a filesystem-directory source of desired state, bypassing the persisted controller configuration written by `core-ops init`. The flag requires `--host ` to be present (no host fallback exists in stateless mode). The flag MUST execute regardless of whether the controller is init'd; init'd `desired_state.*` is never read or mutated during a stateless invocation. Init'd-mode and stateless-mode coexist on the same host, mutually exclusive only within a single command invocation (per the 2026-05-05 clarification). +- **FR-011**: `core-ops apply` MUST accept the same `--source-repo ` flag with equivalent semantics: bypass the persisted controller configuration, accept the directory as the desired-state source, require `--host`. No additional safety/confirmation flag is required beyond `--source-repo` and `--host`; the audit chain plus path-based provenance provides the traceability trail (per the 2026-05-05 clarification). +- **FR-011a**: `core-ops explain` MUST accept the same `--source-repo ` flag with read-only semantics: bypass the persisted controller configuration, source the desired state from the directory, require `--host`, write nothing to `/var/lib/core-ops/` (per the 2026-05-05 clarification, symmetric with stateless `plan`). `core-ops agent` and `core-ops status` remain init-only — `agent` is timer-driven and requires persisted tracking; `status` reads only persisted state. +- **FR-012**: Stateless `plan` MUST NOT write to `/var/lib/core-ops/` or any other persisted controller-state location. It is a pure read-and-render operation with respect to controller state. Stateless `plan` MUST honor an explicitly-provided `--audit-dir ` flag exactly as init'd `plan` does — writing the plan audit record to the operator-specified destination. The operator passing `--audit-dir` is explicit consent (per the 2026-05-05 clarification) and does not violate the no-`/var/lib/` guarantee. +- **FR-013**: Stateless `apply` MUST converge host state and write audit records as today. The persisted `desired_state.repository` and `desired_state.requested_ref` of any existing init'd configuration MUST remain unchanged by a stateless apply. Provenance written by stateless apply (status snapshot, audit) MUST clearly distinguish the path-based source. `desired_state.repository` records the absolute path. `desired_state.requested_ref` records (per the 2026-05-05 clarification): + - the detected git commit SHA when the path is a clean git checkout at a known commit (full traceability); + - the sentinel `(stateless+dirty)` when the path is a git working tree with uncommitted changes (signals the applied state was not a clean revision); + - the sentinel `(stateless)` when the path is not a git repository at all. +- **FR-014**: Stateless mode MUST fail with an actionable error if `` is not a directory or is not a spec/016-conformant source repository. No fallback interpretation as a git URL. +- **FR-015**: Stateless mode MUST be expressible without git: a non-git directory containing a valid spec/016 layout MUST work end-to-end through `plan` and `apply`. +- **FR-016**: The CLI help text for `plan`, `apply`, and `explain` MUST document the new `--source-repo` flag, its bypass relationship with the init'd-mode default, and the `--host` requirement. The help text MUST also link to the canonical `init`-then-`plan` workflow for the long-lived case so users can choose the right mode. + +#### Spec/016 example removal & supersession + +- **FR-017**: This change MUST remove the four in-tree example fixtures at `specs/016-source-repository-layout/examples/{01-minimal-single-service,02-variant-config-root,03-multi-unit-with-dropins,04-host-overlay}/`. They are superseded by `examples//` published under FR-001. +- **FR-018**: This change MUST update spec/016's `spec.md` (FR-023 and any user-story references) and `tasks.md` (T101–T104 carry historical "[X] [SUPERSEDED by spec/017]" annotations) to record the supersession without rewriting historical task records. The decision file `decision_examples-are-layout-shape-fixtures.md` (memory) is updated by separate operator action; this spec does not write memory files. +- **FR-019**: Any integration test that loaded the four spec/016 example fixtures MUST be removed or repointed at the spec/017 examples. Test inventory MUST be net-positive (≥5 new tests, equal or fewer total deletions of the spec/016-example tests). + +#### Stale-doc cleanup + +- **FR-020**: This change MUST remove or update stale CLI documentation that references the long-removed `--repo` and `--rev` arguments: + - `docs/follow-ups.md`: the "Init Command" section's "currently expects" / "shall be introduced" / "remove the `repo` and `rev` arguments" prescriptions are now historical (the change shipped in spec/015). The paragraphs are removed or rewritten as a brief historical note. Other items in the same section that remain valid follow-ups (e.g., `rollback-plan-only` re-homing) are preserved. + - `docs/development.md` line 228: the literal `core-ops plan --repo --rev ` example is updated to use `--source-repo` (stateless mode) or `core-ops init` then `core-ops plan` (init'd mode), whichever is contextually appropriate. + - `infra/repo/README.md` lines 32, 35, 38: the `core-ops plan --repo file:///... --rev demo-uat-vN` examples are updated to the current CLI surface. + - Historical specs (`specs/001-*`, `specs/007-*` quickstart files) are NOT modified — they are time-capsule artifacts of the spec at the time it was authored. + +#### Release governance + +- **FR-021**: This change MUST update `Cargo.toml` to the next `minor` version above the current master Cargo.toml value (master is currently at `2.1.1`; the next minor is `2.2.0`), add `changes/017-real-world-validation.md` declaring `release_intent: minor`, and re-render `CHANGELOG.md` via `cargo run --bin core-ops-release -- changelog --write`. If the inferred-bump validator detects deletions in `specs/016-source-repository-layout/examples/` as `major` per spec/011, the declared intent is bumped to `major` and the version becomes `3.0.0` — the validator's verdict is authoritative. + +### Key Entities + +- **Example setup**: A directory under `examples//` carrying a self-contained source-repository expression of one real-world deployment topology, plus README documentation explaining intent, sources, pressure axis, and known limitations. +- **Friction record**: An entry in the synthesis table identifying a specific gap encountered during translation, naming the affected example(s), classification (A/B/C), rationale, and recommended action. +- **Pressure axis**: A design property each example is chosen to exercise (single-Container baseline; multi-Container with intra-service network and persistent storage; GPU device passthrough plus multi-network membership; cross-service ForwardAuth composition; host-scope sidecars with `/proc` and `/sys` bind mounts and scrape-config templating). +- **Stateless invocation**: A `core-ops plan` or `core-ops apply` invocation using `--source-repo ` as the desired-state source, bypassing the persisted controller configuration written by `core-ops init`. Distinguished in provenance by the path-based `desired_state.repository` and `(stateless)` sentinel `requested_ref`. + +## Constitution Alignment *(mandatory)* + +- **Functional core vs. side effects**: Stateless mode adds a new entry-point boundary (path → in-memory `EvaluationInput`) but reuses the existing parser, planner, and applier. No new core or side-effect surfaces beyond the existing apply mutation paths. +- **Declarative state model**: Examples are pure declarative artifacts. The stateless flag changes the source-of-truth resolution but not the in-memory representation; `EvaluationInput` is identical between init'd and stateless modes. +- **Idempotence & convergence**: Stateless apply has the same convergence guarantees as init'd apply against the same tree. Re-running stateless apply against the same path is idempotent. +- **Explicit effects/failures**: Stateless mode's persistence semantics are documented explicitly (FR-013): apply writes audit and status snapshots; plan writes nothing. The path-based provenance sentinel makes the source mode visible in `core-ops status`. +- **Observability**: Synthesis table is the validation observability surface. Stateless invocations produce normal audit records distinguished by their provenance source field. +- **Provenance & traceability**: Stateless invocations record path-based provenance distinct from git-URL provenance, preserving Principle 12's traceability invariant: runtime behavior is traceable to both reconciler revision and desired-state revision (where revision is the path's commit if git-managed, else `(stateless)` sentinel). +- **Safe defaults**: The default mode remains init'd; stateless requires the explicit `--source-repo` flag. Safe defaults preserved. +- **Compatibility**: Init'd-mode behavior is unchanged. Existing `core-ops plan` / `apply` invocations without `--source-repo` continue to source from persisted state. The flag is additive. +- **Release version policy**: Adds CLI surface (`--source-repo`) — `minor` per spec/011 inferred-bump rules. Removes spec/016 example directories — may trigger `major` per the inferred-bump rules' "deleted source" classification (spec/016 examples are not under `src/` but the validator's exact rules govern). Declared intent is `minor`; the `core-ops-release validate` step is authoritative on the final bump. +- **Release intent artifact**: `changes/017-real-world-validation.md` declares `release_intent: minor` (or `major` if the validator demands). The fragment lists CLI additions, example additions, spec/016 example removal, and stale-doc cleanup. +- **Changelog discipline**: An `[Unreleased]` entry is rendered via `core-ops-release changelog --write`; the post-merge promote step (shipped in v2.1.0) moves it to `[2.2.0]` (or `[3.0.0]`) automatically. +- **Test contract**: Per-example integration tests assert parser success and structural shape via the new `--source-repo` flag. New unit tests for stateless argument parsing, mutual-exclusion validation, path resolution. New integration test for stateless apply against a synthetic source repo (no real podman pull). `cargo test` and `cargo clippy --all-targets -- -D warnings` MUST pass before merge. **VM-backed scenario assessment**: stateless mode introduces a CLI entry-point variation but no new mutation classes — the actual host-state changes performed by stateless apply are identical to those performed by init'd apply against the same tree. Existing apply VM scenarios (`tests/fixtures/verification/scenarios/`) remain valid; stateless mode is exercised at the unit + integration test layer. Per Principle 10's exemption clause, no new VM-backed scenario is required for this feature; this exemption is recorded explicitly here as the documented justification. +- **Regenerability**: Examples are derivative of public upstream sources cited per-example. The synthesis table can be re-derived from translation artifacts. Stateless mode's semantics are spec'd here and tested by integration cases. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: A new operator can clone the repository and run `core-ops plan --source-repo examples/01-caddy-whoami --host example` successfully — exit code 0, non-empty plan, no `init` required, no writes to `/var/lib/core-ops/` — without modifying any file. +- **SC-002**: 100% of frictions surfaced in any example's README appear as entries in the synthesis table with an A/B/C classification, an affected-example list, and a rationale. +- **SC-003**: All five examples parse cleanly via the parser through the new stateless flag, asserted by per-example integration tests; `cargo test` adds at least five new test cases that pass and the aggregate test suite remains green. +- **SC-004**: The two follow-up bullets in `docs/follow-ups.md` lines 87–99 (Source Repository UX → "Rich, documented real-life examples" and "QnA for known limitations") are removable from `docs/follow-ups.md` upon merge. The "Init Command" section paragraphs that prescribe the (now-shipped) `--repo`/`--rev` removal are also removable. +- **SC-005**: A grep across `examples/` for the strings `not.one`, `ulthar`, `192.168.1.2`, and any GCloud DNS credential marker returns zero matches. +- **SC-006**: Each example's `README.md` cites at least one public upstream source URL. +- **SC-007**: The root `README.md` carries a `## Real-World Examples` section linking each of the five examples with a one-line purpose; rendered Github README shows the section between `## First Interaction` and `## Installation (Current Phase)`. +- **SC-008**: A `core-ops plan --source-repo ` invocation against a fresh host (no `core-ops init` ever run) succeeds without surfacing any "controller configuration not initialized" error. +- **SC-009**: A stateless apply against a host with prior init'd configuration MUST NOT mutate the init'd `desired_state.repository` or `desired_state.requested_ref` — verified by an integration test that init's, statelessly applies, then asserts the persisted desired-state fields are byte-identical to pre-stateless. +- **SC-010**: After this change merges, the four `specs/016-source-repository-layout/examples/` subdirectories are absent; `cargo test` passes with the spec/017 example tests as the sole example-fixture coverage. +- **SC-011**: `core-ops explain --source-repo --host ` succeeds against any of the five published examples without prior `core-ops init` and writes nothing to `/var/lib/core-ops/`. + +## Synthesis table + + + +| Friction | Affected examples | Classification | Rationale | Action | +|----------|-------------------|----------------|-----------|--------| +| Stateless plan/apply/explain blocked all five examples (CLI gap, not layout gap) | 01, 02, 03, 04, 05 | A | Layout was sufficient; bottleneck was missing `--source-repo` CLI surface. Self-escalation absorbed in this slice per 2026-05-05 operator approval. | Escalate to spec/017 (this iteration absorbs the fix) | +| Nextcloud first-run requires interactive admin-account wizard | 02 | B | Product behavior of the upstream Nextcloud image, not a layout limitation. The example documents the post-apply step in its `## Known limitations`. | Documented in `02-nextcloud/README.md` | +| Domain / hostname placeholders in committed configs require operator edits before applying | 02, 04 | B | FR-008 forces RFC 2606 reserved domains in committed configs (`*.example.com`, etc.); operators replace with their real domain in the scaffold copy. Authoring concern, not a layout gap. | Documented in `02-nextcloud/README.md` | +| Podman-secret values cannot ship in the source repo; operators must `podman secret create` on the host | 02, 03, 04 | B | FR-009 forbids fake-or-real credentials in `examples/`. The Quadlet `Secret=` references the secret by name; the value lives in the host's secrets store. | Documented in `02-nextcloud/README.md` | +| GPU device shape is host-specific (Intel/AMD VAAPI vs NVIDIA CDI) | 03 | B | Host overlay drop-in is the documented escape — operators replace `AddDevice=/dev/dri:/dev/dri` with `AddDevice=nvidia.com/gpu=all` (or the `PodmanArgs=--device` CDI form) in their own scaffold copy. No layout change required. | Documented in `03-immich/README.md` | +| Immich library upload often backed by NFS in real homelabs; NFS mount declarations not exercised | 03 | C | Real workload would extend the example with `*.mount` units under the layout, but mount-aware reconciliation against NFS is orthogonal to the validation iteration's scope. Tracked as a follow-up. | Tracked in `docs/follow-ups.md` | +| Authelia user database (`users_database.yml`) cannot ship in the source repo without leaking credentials | 04 | B | FR-009 forbids fake-or-real credentials in `examples/`. Operators must create `/etc/authelia/users_database.yml` on the target host before applying. | Documented in `04-traefik-authelia/README.md` | +| TLS cert resolver not wired (Traefik `entryPoints.websecure` declared without `certResolver`) | 04 | B | ACME (DNS-01 / HTTP-01) configuration is operator-domain-specific and FR-009-incompatible (would require domain + DNS credentials). Operators wire it up in their scaffold copy. | Documented in `04-traefik-authelia/README.md` | +| Authelia secrets (JWT, session, storage) sourced via `AUTHELIA_*_FILE` from a secrets backend | 04 | C | Common across many real workloads (Authelia, Nextcloud DB, Immich DB). The existing "Secrets UX" follow-up in `docs/follow-ups.md` is the right home; no new follow-up bullet needed. | Tracked in `docs/follow-ups.md` | +| Prometheus per-host scrape-target list cannot be expressed without templating | 05 | B | Spec/016 has no templating layer; the documented escape is a host-overlay whole-file replacement under `hosts//prometheus/config/prometheus.yml`. The example demonstrates this. Future work can amend spec/016 with templating if the workaround proves insufficient at scale. | Documented in `05-observability/README.md` | +| cadvisor requires `--privileged` for cgroup-v2 reads | 05 | B | Documented upstream pattern. Operators on hardened hosts may substitute fine-grained capabilities; the example uses the documented form via `PodmanArgs=--privileged`. | Documented in `05-observability/README.md` | + +## Assumptions + +- The five-setup roster is operator-confirmed and frozen for this slice: Caddy + whoami; Nextcloud (community multi-container) + Postgres + Redis + Traefik; Immich server + db + redis + ML + Traefik; Traefik + Authelia + protected backend; Prometheus + Grafana + node-exporter + cadvisor. +- Asciinema recording is deferred. The follow-up at `docs/follow-ups.md` line 109 remains open. +- Ulthar (`~/code/ulthar/repo/hosts/ulthar/`) is consulted as a research data point only. +- Examples are parse-only deliverables. CI does not pull images. +- Friction encountered does not block this slice's merge unless ≥2 of 5 examples are blocked by the same gap; in that case the validation iteration escalates the gap to a follow-up amendment spec rather than landing parser changes inline. +- The post-merge `core-ops-release promote` step (shipped in v2.1.0) handles the `[Unreleased]` → `[]` transition and fragment cleanup automatically. +- Stateless mode provenance representation is locked: full SHA hex / `(stateless+dirty)` / `(stateless)` sentinel values stored in the existing `DesiredStateProvenance.requested_ref: String` field at `src/core/types.rs:555` (no struct change required, sentinels disambiguated by leading `(` which is invalid in git ref names per `git check-ref-format`). Decided by the 2026-05-05 clarification Q3 and confirmed by research.md D2. +- Stateless apply does not introduce new behavioral mutation classes; existing apply VM-backed scenarios remain authoritative for the mutation semantics. +- Removal of `specs/016-source-repository-layout/examples/` does not break any release-governance validation rule beyond the standard `major`-on-deletion check; the release fragment may need to declare `major` if the validator considers spec example directories as governed source. The validator's verdict is authoritative. diff --git a/specs/017-real-world-validation/tasks.md b/specs/017-real-world-validation/tasks.md new file mode 100644 index 0000000..1c936d9 --- /dev/null +++ b/specs/017-real-world-validation/tasks.md @@ -0,0 +1,280 @@ +# Tasks: Real-World Validation, Examples, and Stateless Source-Repo Mode + +**Input**: Design documents from `/home/outergod/code/github.com/outergod/core-ops/specs/017-real-world-validation/` +**Prerequisites**: plan.md (✅), spec.md (✅), research.md (✅), data-model.md (✅), contracts/ (✅), quickstart.md (✅) + +**Tests**: REQUIRED. FR-006 explicitly mandates per-example integration tests; FR-016 mandates help-text contracts; spec.md Constitution Alignment requires `cargo test` and `cargo clippy --all-targets -- -D warnings` to pass before merge. The VM-backed-scenario exemption is recorded explicitly in spec.md and plan.md. + +**Organization**: Tasks are grouped by user story to enable independent implementation and testing. The validation iteration's user stories are: + +- **US1 (P1)** — First-time operator runs a real example with one command (stateless plan + explain + 5 examples) +- **US2 (P2)** — Operator authors and iterates on their own setup without committing first (non-git stateless support, scaffolding ergonomics) +- **US3 (P2)** — Stateless apply for one-off convergence and recovery (apply provenance, init'd state preservation) +- **US4 (P3)** — Future spec author grounds amendments in validation evidence (synthesis table) + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks) +- **[Story]**: Which user story this task belongs to (US1, US2, US3, US4); omitted for Setup/Foundational/Polish phases +- File paths are absolute or repo-root-relative + +## Path Conventions + +Single Rust project per `plan.md`. Repo-root-relative paths for `src/`, `tests/`, `examples/`, `docs/`, `specs/`. + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Clear the way for spec/017's deliverables. Removes the four superseded spec/016 example fixtures and their single in-tree consumer so the new top-level `examples/` doesn't fight the old. + +- [X] T001 Remove the four spec/016 example fixture directories: `git rm -r specs/016-source-repository-layout/examples/01-minimal-single-service specs/016-source-repository-layout/examples/02-variant-config-root specs/016-source-repository-layout/examples/03-multi-unit-with-dropins specs/016-source-repository-layout/examples/04-host-overlay` +- [X] T002 Update `tests/integration/source_repo_support.rs:20`: repoint `EXAMPLES_DIR` const at top-level `examples` (or delete the const + helper if no surviving consumer remains after T001 + T026–T030 land); audit any `examples_root()` callers and fix or remove. Run `cargo check --tests` to confirm no dangling references. +- [X] T003 [P] Annotate `specs/016-source-repository-layout/spec.md` FR-023: append a supersession note pointing at top-level `examples/` and at this spec (`specs/017-real-world-validation/`). Do not remove the FR text — preserve historical record. +- [X] T004 [P] Annotate `specs/016-source-repository-layout/tasks.md` T101–T104: append ` [SUPERSEDED by spec/017]` to each line (the lines are already marked `[X]`; do not duplicate the marker). Preserve historical record. +- [X] T005 [P] Verify `scripts/migrate-legacy-source-repo.sh` does not reference the spec/016 example paths (per research.md D10). If it does, capture the references for a follow-up task; if not, confirm in the commit message. + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Land the `--source-repo` CLI surface and the git-aware provenance helper that every user story depends on. No user-story phase can begin until this phase is complete and `cargo test` + `cargo clippy --all-targets -- -D warnings` are green. + +**⚠️ CRITICAL**: No user story work can begin until this phase is complete. + +- [X] T006 Add `source_repo: Option` field with `requires("host")` and `ArgGroup` semantics to `PlanArgs` in `src/cli/args.rs`. Include doc-comments naming the flag in `--help`, the `--host` requirement, and the bypass relationship with `core-ops init` (per FR-016 / contracts/cli-flag.md help-text contract). +- [X] T007 Add the same `source_repo: Option` field with `requires("host")` to `ApplyArgs` in `src/cli/args.rs` (same file as T006 → sequential). +- [X] T008 Add the same `source_repo: Option` field with `requires("host")` to `ExplainArgs` in `src/cli/args.rs` (same file → sequential after T007). +- [X] T009 [P] Implement `src/io/source_ref.rs::detect_provenance(path: &Path) -> Result` per research.md D1 algorithm: canonicalize path; check `git -C rev-parse --is-inside-work-tree`; check `git -C status --porcelain -- .` for cleanliness; capture `git -C rev-parse HEAD` SHA; emit `(stateless)` / `(stateless+dirty)` sentinels for non-git or dirty cases. Use `std::process::Command::new("git")` per the existing pattern at `src/cli/init.rs:52`. Validate path-is-directory with miette diagnostics emitting exit codes 64 / 65 / 66 per contracts/cli-flag.md. +- [X] T010 [P] Wire stateless source resolution into `src/cli/plan.rs`: branch on `args.source_repo`; when present, use `detect_provenance(path)` to build `EvaluationInput` directly; bypass init'd-state lookup; preserve existing `--audit-dir` handling unchanged (per FR-012 + 2026-05-05 clarification Q4); ensure no writes under `/var/lib/core-ops/`. +- [X] T011 [P] Wire stateless source resolution into `src/cli/apply.rs`: same as T010, plus apply-specific provenance recording — `desired_state.repository = canonicalize(path)`, `desired_state.requested_ref` = SHA-or-sentinel from `detect_provenance`. CRITICAL: do NOT mutate any existing init'd `desired_state.*` fields when stateless flag is present (FR-013, SC-009). +- [X] T012 [P] Wire stateless source resolution into `src/cli/explain.rs`: pure-read path; build `EvaluationInput` from `--source-repo`; ensure no writes anywhere; reuse existing explain rendering. +- [X] T013 Unit tests for clap argument parsing in `src/cli/args.rs::tests` (or a new `src/cli/args_tests.rs`): `--source-repo` accepted on plan/apply/explain with `--host`; `--source-repo` rejected on init/agent/status/skill; `--source-repo` without `--host` errors with the expected clap-generated message. +- [X] T014 [P] Unit tests for `src/io/source_ref.rs`: temp-dir non-git → `(stateless)`; temp-dir git-init clean → 40-char SHA; temp-dir git-init with uncommitted file → `(stateless+dirty)`; non-existent path → exit-64 error; path-is-file → exit-65 error. +- [X] T015 Run `cargo build`, `cargo test`, and `cargo clippy --all-targets -- -D warnings`. ALL MUST PASS before Phase 3 begins. This is the foundational checkpoint. + +**Checkpoint**: Foundation ready — user stories US1, US2, US3 can now begin in parallel. + +--- + +## Phase 3: User Story 1 — First-time operator runs a real example with one command (Priority: P1) 🎯 MVP + +**Goal**: Five real-world examples published under `examples//`, each runnable via `core-ops plan --source-repo examples/ --host ` exit 0 with a non-empty plan, no `core-ops init` required. + +**Independent Test**: A reviewer who has never seen CoreOps clones the repo, runs the suggested invocation against any of the five examples, and gets a populated plan. SC-001, SC-003, SC-006, SC-007, SC-008. + +### Tests for User Story 1 (REQUIRED) ⚠️ + +> Write tests FIRST when feasible; for example-authoring tasks the example dirs need to exist before the parse test can fail-then-pass — author the example and the test as a paired commit per task. + +- [X] T016 [P] [US1] Per-example integration test `tests/integration/test_examples_01_caddy_whoami.rs`: load `examples/01-caddy-whoami/` via the parser, assert (a) `Repository::load` succeeds, (b) resolved service catalog contains `caddy`, (c) example root carries `README.md`, (d) `cargo run --bin core-ops -- plan --source-repo examples/01-caddy-whoami --host example` exits 0 (via `assert_cmd`). +- [X] T017 [P] [US1] Per-example integration test `tests/integration/test_examples_02_nextcloud.rs`: same pattern; assert resolved services contain at minimum `nextcloud`, `nextcloud-db`, `nextcloud-redis`, `traefik` (or whichever ids the implementer locks during T022). +- [X] T018 [P] [US1] Per-example integration test `tests/integration/test_examples_03_immich.rs`: same pattern; assert services contain `immich-server`, `immich-database`, `immich-redis`, `immich-ml`, `traefik`. +- [X] T019 [P] [US1] Per-example integration test `tests/integration/test_examples_04_traefik_authelia.rs`: same pattern; assert services contain `traefik`, `authelia`, and at least one protected backend. +- [X] T020 [P] [US1] Per-example integration test `tests/integration/test_examples_05_observability.rs`: same pattern; assert services contain `prometheus`, `grafana`, `node-exporter`, `cadvisor`. +- [X] T021 [P] [US1] Stateless plan integration test `tests/integration/test_stateless_plan.rs`: cover (a) `--source-repo` against a non-git tempdir → exit 0 with `(stateless)` provenance, (b) clean git checkout → SHA provenance, (c) dirty working tree → `(stateless+dirty)` provenance, (d) missing `--host` → clap exit 2, (e) non-directory path → exit 64, (f) `--audit-dir` honored when explicitly set. +- [X] T022 [P] [US1] Stateless explain integration test `tests/integration/test_stateless_explain.rs`: pure-read invocation against **each of the five examples** (one sub-test per example, picking a deterministic object id from each — e.g., the first `*.container` declared in the example's services). Per sub-test assert exit 0, no writes to `/var/lib/core-ops/`, no audit files created when `--audit-dir` not set. This is the SC-011 coverage task — "any of the five published examples" requires all five exercised, not one. + +### Implementation for User Story 1 + +> Authoring tasks T023–T027 are the validation work itself: research the upstream design from public sources, write own Quadlet equivalents (no verbatim YAML copy per research.md D5), and embed citations in each `README.md`. Each authoring task is one example, fully independent (different directory). Per example: ≥1 service definition, ≥1 host overlay, README with sources/intent/dispatch-table/known-limitations/Try-it-snippet. + +- [X] T023 [P] [US1] Author `examples/01-caddy-whoami/`: single Container (Caddy fronting whoami), `services//quadlet/`, default config-root, one host overlay, README citing Caddy docs + traefik/whoami container README. +- [X] T024 [P] [US1] Author `examples/02-nextcloud/`: multi-Container with Nextcloud + Postgres + Redis + Traefik (community multi-container — NOT Nextcloud AIO per research.md D5), intra-service `Network=`, persistent `Volume=`, `service.yaml` with `config-root` where ids diverge, host overlay with TLS/domain config in drop-ins, README citing Nextcloud's community docker-compose docs. +- [X] T025 [P] [US1] Author `examples/03-immich/`: server + db + redis + ML worker + Traefik, GPU device passthrough via `PodmanArgs=` or equivalent quadlet directive, multi-network membership (immich-internal + traefik network), host overlay, README citing `immich-app/immich` docker-compose.yml. Document any friction (e.g., NFS mount patterns) in README's `## Known limitations` and route to synthesis table. +- [X] T026 [P] [US1] Author `examples/04-traefik-authelia/`: Traefik + Authelia + protected backend (e.g., whoami), ForwardAuth middleware composition via Traefik labels (drop-ins on the protected backend), cross-service network, host overlay with auth domain configured, README citing Authelia's Traefik integration docs. +- [X] T027 [P] [US1] Author `examples/05-observability/`: Prometheus + Grafana + node-exporter + cadvisor, host-scope sidecars with `/proc` and `/sys` bind mounts (declared as `Volume=/proc:/host/proc:ro,rslave`-style mounts), scrape-config templating limitation captured in README + synthesis table, host overlay, README citing Prometheus/Grafana/node-exporter/cadvisor official compose examples. +- [X] T028 [US1] Register the five new test modules in `tests/integration/mod.rs`: `pub mod test_examples_01_caddy_whoami;` through `pub mod test_examples_05_observability;` plus `pub mod test_stateless_plan;` and `pub mod test_stateless_explain;` (single file → sequential). +- [X] T029 [P] [US1] Add `## Real-World Examples` section to repo-root `README.md` between `## First Interaction` and `## Installation (Current Phase)`, linking each of the five examples with a one-line purpose statement (single file → sequential within itself, parallel with all other US1 tasks). + +**Checkpoint**: At this point, US1 is fully functional. A reviewer can run any of the five examples via stateless plan/explain. SC-001/003/006/007/008 measurable. + +--- + +## Phase 4: User Story 2 — Operator authors and iterates on their own setup (Priority: P2) + +**Goal**: Stateless mode supports non-git directories (FR-015) and the iterate-then-init transition is smooth (US2 AC3). + +**Independent Test**: Copy an example to a scratch directory, modify it without `git init`, run `core-ops plan --source-repo --host ` repeatedly with edits in between — all succeed. Then `git init && core-ops init` and verify subsequent `core-ops plan` (no flag) produces an equivalent plan. + +### Tests for User Story 2 (REQUIRED) ⚠️ + +- [X] T030 [US2] Add to `tests/integration/test_stateless_plan.rs` (or a new `test_stateless_authoring.rs`): copy `examples/02-nextcloud/` via `copy_dir_recursive` helper to a tempdir, rename `hosts/example/` to `hosts/myhost/`, edit `host.yaml`, run `core-ops plan --source-repo --host myhost` → exit 0 (US2 AC1). +- [X] T031 [US2] Add a transition test: stateless-plan against a scratch dir, then `git init && git add . && git commit && core-ops init main && core-ops plan` (no flag), assert the two plans produce equivalent action sets via `PlanOutput` JSON comparison (US2 AC3, idempotence under transition). + +### Implementation for User Story 2 + +> US2's implementation surface is fully covered by the foundational stateless-mode wiring (T009–T012) plus the FR-015 non-git support already exercised in T021. No new code paths needed; T030–T031 are integration-test deliverables only. + +- [X] T032 [US2] Verify each `examples//README.md` includes a "Scaffold for your own setup" section with explicit `cp -r examples/ ~/my-setup` instructions per quickstart.md Step 6. If absent, add to each README via a single edit pass (touches all five README files → sequential). Verified via `grep -l "## Scaffold for your own setup" examples/*/README.md` returning all five paths. + +**Checkpoint**: US2 validated. Operators can use any example as a starting scaffold. + +--- + +## Phase 5: User Story 3 — Stateless apply for one-off convergence and recovery (Priority: P2) + +**Goal**: `core-ops apply --source-repo --host ` mutates host state and writes path-based provenance; init'd `desired_state.*` is preserved (SC-009). + +**Independent Test**: Stateless apply against a fresh host succeeds and produces audit + status with path-based provenance. Stateless apply against a host with prior init'd configuration leaves `desired_state.repository` and `desired_state.requested_ref` of the init'd config byte-identical pre/post. + +### Tests for User Story 3 (REQUIRED) ⚠️ + +- [X] T033 [US3] Stateless apply integration test `tests/integration/test_stateless_apply.rs`: stateless apply against a synthetic source repo in tempdir; assert (a) exit 0, (b) audit record produced, (c) status snapshot reports `desired_state.repository = `, `desired_state.requested_ref` matches expected sentinel/SHA per the source's git state (FR-013, US3 AC1, US3 AC2). Implementation interpretation: per FR-013 ("MUST converge host state and write audit records as today") + SC-009 ("init'd state byte-identical pre/post"), stateless apply writes audit records but does not write to /var/lib/core-ops/status.json. The "status snapshot" assertion in (c) is asserted against the audit-bundle's `result.desired.requested_repository`/`requested_ref` fields, which carry the canonical-path / sentinel-or-SHA values produced by `detect_provenance`. +- [X] T034 [US3] Add to `test_stateless_apply.rs` or new file: (a) **Init'd-state preservation test** — `core-ops init main` (write init'd state to a `--state-file` tempdir), then `core-ops apply --source-repo --host --state-file `, assert `desired_state.repository` and `desired_state.requested_ref` from the init'd phase are byte-identical pre/post the stateless apply (SC-009). (b) **Stateless-apply → init'd-plan transition test** (US3 AC3) — after the stateless apply lands, run `core-ops init main --force --state-file ` then `core-ops plan --state-file ` (no `--source-repo`); assert plan exits 0 and produces a normal init'd-mode plan with no detached-state header and no rollback ambiguity surfacing from the prior stateless apply. +- [X] T035 [US3] Add to `test_stateless_apply.rs`: provenance-shape coverage — three sub-cases asserting `(stateless)` / `(stateless+dirty)` / SHA recorded under three working-tree conditions (matches T021's plan-side coverage but for apply's persisted snapshot). Sequential within `test_stateless_apply.rs` after T033/T034. + +### Implementation for User Story 3 + +> US3's implementation is fully covered by T011 (stateless wiring in apply.rs) and the provenance recording it lands. No new code paths needed; T033–T035 are integration-test deliverables. + +- [X] T036 [US3] Register `pub mod test_stateless_apply;` in `tests/integration/mod.rs` (single file → sequential after T028). + +**Checkpoint**: US3 validated. Stateless apply is functional with correct provenance and init'd-state preservation. + +--- + +## Phase 6: User Story 4 — Synthesis table populated and reviewed (Priority: P3) + +**Goal**: Friction-classification synthesis table in `spec.md` carries every translation finding with classification A/B/C, satisfying FR-005 and SC-002. Future spec authors have an evidence base. + +**Independent Test**: Open `spec.md`, count rows in the `## Synthesis table` section, verify each row has all five required columns and Classification ∈ {A, B, C}, verify each `B`-row's workaround text exists in the affected example's README under `## Known limitations`, verify each `C`-row corresponds to a `docs/follow-ups.md` bullet, verify each `A`-row references a real follow-up spec number. + +### Tests for User Story 4 + +> The synthesis table is markdown content, not code. Verification is review-time per contracts/synthesis-table.md invariants 1–5. No automated tests added. + +### Implementation for User Story 4 + +- [X] T037 [US4] Add an empty `## Synthesis table` section to `spec.md` between `## Success Criteria` and `## Assumptions`, with the column-header row only and an instruction comment for the synthesis pass: ``. +- [X] T038 [US4] Synthesis review pass: read every `examples//README.md`'s `## Known limitations` section, transcribe each friction as a row in the synthesis table with the correct classification per contracts/synthesis-table.md semantics. The first row pre-populated by this slice is the stateless-mode self-escalation: `Stateless plan/apply/explain blocked all five examples (CLI gap, not layout gap) | 01..05 | A | Layout was sufficient; bottleneck was missing --source-repo CLI surface. Self-escalation absorbed in this slice per 2026-05-05 operator approval. | Escalate to spec/017 (this iteration absorbs the fix)`. +- [X] T039 [US4] Verify synthesis-table invariants 1–5 from contracts/synthesis-table.md: every example's known-limitations entry is reflected; every `A` row references a real spec; every `B` row has its README workaround; every `C` row has a follow-up bullet. Verified pre-commit: 11 rows total (1 A absorbed, 8 B, 2 C); every README ## Known limitations entry has a backing row; the C-classified Immich NFS row has a new bullet at `docs/follow-ups.md` ("NFS-backed library mounts in real workloads"); the C-classified Authelia secrets row routes to the existing "Secrets UX" follow-up. + +**Checkpoint**: All four user stories complete. Validation iteration is structurally sound. + +--- + +## Phase 7: Polish & Cross-Cutting Concerns + +**Purpose**: Stale-doc cleanup, release governance, final validation gates. + +- [X] T040 Stale-doc cleanup: `docs/follow-ups.md` lines 7–14 — remove the now-shipped paragraphs about `--repo`/`--rev` argument removal (FR-020). Preserve still-valid follow-ups in the same section: quadlet-dir/systemd-unit-dir/state-file/audit-dir arg persistence; rollback-plan-only re-homing; `--reinitialize` UX. Also remove (or amend) the two follow-up bullets at lines 87–99 (Source Repository UX → "Rich, documented real-life examples" and "QnA for known limitations") — closed by this slice. +- [X] T041 [P] Stale-doc cleanup: `docs/development.md:228` — replace `CORE_OPS_HOST= core-ops plan --repo --rev ` with `core-ops plan --source-repo --host ` plus a brief note about the init'd-mode workflow. +- [X] T042 [P] Stale-doc cleanup: `infra/repo/README.md` lines 32, 35, 38 — update each `core-ops plan --repo file:///… --rev demo-uat-vN` to use `--source-repo ` against the demo repo's checkout. Implementation note: `infra/repo/README.md` is gitignored (`.gitignore:19: /infra/repo/`) — it's a developer-local working dir for the demo repo, not a tracked artifact. The local copy was updated to the canonical init-then-plan flow with `--force` re-attaches between revisions (since a multi-tag demo repo is the prototypical init'd-mode use case), but the change is untracked and won't appear in the commit. +- [X] T043 Update `Cargo.toml` version: bump from current master `2.1.1` to next minor. Validator verdict: minor; bumped to 2.2.0. Also bumped `tests/fixtures/provenance_state/valid-success.json` controller.version 2.1.1 → 2.2.0 to keep the `controller_version_provenance_matches_cargo_package_version` test green. +- [X] T044 Add release fragment `changes/017-real-world-validation.md` declaring `release_intent: minor`. +- [X] T045 Re-render `CHANGELOG.md` via `cargo run --bin core-ops-release -- changelog --write`. +- [X] T046 Run final `cargo build --locked --bin core-ops --bin core-ops-verify --bin core-ops-release` plus `cargo test` plus `cargo clippy --all-targets -- -D warnings`. Result: 468 tests passed, clippy clean, build clean. +- [X] T047 Run `cargo run --bin core-ops-release -- validate --base-ref master`. Result: passed; classification=releasable; required=minor; declared=minor; CHANGELOG aligned. +- [X] T048 Run quickstart.md validation manually. Spot-checked: `core-ops plan --source-repo examples/01-caddy-whoami --host example` and `--source-repo examples/05-observability` succeed without prior init or `--force`; explain against `container/caddy.container` succeeds. +- [X] T049 Privacy + RFC-compliance gate: (a) `grep -rE 'not\.one|ulthar|192\.168\.1\.2|gcloud[-_]dns|gcloud\.json' examples/` returns 0 matches. (b) Deployment-config FQDNs (`whoami.example.com`, `cloud.example.com`, `grafana.example.com`) are RFC 2606. README citation URLs (`hub.docker.com`, `docs.kernel.org`, `www.authelia.com`, `caddyserver.com`, `authelia.com`) point at real public upstream documentation per FR-004(b) and are exempt from FR-008's deployment-hostname rule. (c) IPv4 literals: 192.0.2.0/24 (4 networks) and 198.51.100.0/24 (2 references) are RFC 5737 documentation ranges. The single 0.0.0.0 in `examples/04-traefik-authelia/services/authelia/config/configuration.yml` is the unspecified-address sentinel for binding to all interfaces (canonical Authelia config pattern), not an addressable host literal. +- [X] T050 Spec/017 self-update: tasks ticked per phase as they shipped (Phase 1 commit 9ce9bf5, Phase 2 commit 929e489, Phase 3 commit de40ba2, Phase 4 commit da26a35, Phase 5 commit 3ae5102, Phase 6 commit 6055c57, Phase 7 polish commit forthcoming). + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup, T001–T005)**: No dependencies. T001 can run first; T002 follows T001; T003–T005 parallel with each other after T001. +- **Phase 2 (Foundational, T006–T015)**: Depends on Phase 1 completion. **BLOCKS all user stories.** T006 → T007 → T008 (same file `args.rs`); T009 / T010 / T011 / T012 parallel after T006–T008; T013 sequential after T006–T008; T014 parallel with T009; T015 (validation gate) sequential after all above. +- **Phase 3 (US1, T016–T029)**: Depends on Phase 2 completion. T016–T020 (per-example tests) parallel; T023–T027 (per-example authoring) parallel and independent of T016–T020 ordering — but each test passes only after its example exists. T021 / T022 (stateless plan/explain integration tests) parallel with T016–T020. T028 (mod.rs registration) sequential after T016–T022. T029 (root README) parallel with everything else. +- **Phase 4 (US2, T030–T032)**: Depends on Phase 2 + at least one example from Phase 3 (T024 specifically for T030). T030 / T031 parallel with each other if they live in different files; T032 single-file pass → sequential. +- **Phase 5 (US3, T033–T036)**: Depends on Phase 2 (T011 specifically). T033 / T034 / T035 are all in `test_stateless_apply.rs` and therefore sequential within that file (no `[P]`); T036 (mod.rs registration) sequential after T035 and after T028 (single-file edits to `tests/integration/mod.rs`). +- **Phase 6 (US4, T037–T039)**: Depends on Phase 3 completion (synthesis review needs all examples authored). T037 → T038 → T039 sequential (each operates on `spec.md`). +- **Phase 7 (Polish, T040–T050)**: Depends on Phase 3, 4, 5, 6 completion. T040 / T041 / T042 parallel; T043 → T044 → T045 sequential (release governance pipeline); T046 → T047 sequential after T043–T045; T048 / T049 parallel; T050 cross-cutting. + +### User Story Dependencies + +- **US1 (P1)**: After Foundational (Phase 2). Independent of US2/US3/US4. +- **US2 (P2)**: After Foundational + at least one US1 example (T024 specifically). Independent of US3. +- **US3 (P2)**: After Foundational. Independent of US1/US2. +- **US4 (P3)**: After all US1 example READMEs are authored (T023–T027). Independent of US2/US3 except for transcribed friction. + +### Parallel Opportunities + +- **Within Phase 1**: T003 / T004 / T005 parallel after T001 (different files). +- **Within Phase 2**: T009 / T010 / T011 / T012 parallel after T006–T008 land. T014 parallel with T009. +- **Within Phase 3**: T016–T020 parallel; T023–T027 parallel; T021 / T022 parallel. +- **Across user stories**: With multiple developers, US1 / US3 can run fully in parallel after Phase 2. +- **Within Phase 7**: T040 / T041 / T042 parallel. + +--- + +## Parallel Examples + +### Phase 2 foundational parallelism (after T006–T008 land args.rs) + +```bash +# Launch in parallel: +Task: "T009 Implement src/io/source_ref.rs::detect_provenance" +Task: "T010 Wire stateless source resolution into src/cli/plan.rs" +Task: "T011 Wire stateless source resolution into src/cli/apply.rs" +Task: "T012 Wire stateless source resolution into src/cli/explain.rs" +Task: "T014 Unit tests for src/io/source_ref.rs" +``` + +### US1 example authoring (after Phase 2 checkpoint) + +```bash +# Launch in parallel: +Task: "T023 Author examples/01-caddy-whoami/" +Task: "T024 Author examples/02-nextcloud/" +Task: "T025 Author examples/03-immich/" +Task: "T026 Author examples/04-traefik-authelia/" +Task: "T027 Author examples/05-observability/" +``` + +### US1 per-example integration tests (parallel with authoring) + +```bash +# Launch in parallel (after each example's authoring lands): +Task: "T016 Per-example test for examples/01-caddy-whoami" +Task: "T017 Per-example test for examples/02-nextcloud" +Task: "T018 Per-example test for examples/03-immich" +Task: "T019 Per-example test for examples/04-traefik-authelia" +Task: "T020 Per-example test for examples/05-observability" +``` + +--- + +## Implementation Strategy + +### MVP First (US1 only — runnable real examples) + +1. Phase 1: Setup — clear spec/016 examples and their consumer. +2. Phase 2: Foundational — land `--source-repo` across plan/apply/explain plus the source_ref helper. +3. Phase 3: US1 — author the five examples and their integration tests; ship root README section. +4. **STOP and VALIDATE**: Run `core-ops plan --source-repo examples/01-caddy-whoami --host example` end-to-end; confirm all five examples parse and run. SC-001, SC-003, SC-006, SC-007, SC-008 measurable. +5. (Optional) Demo: a reviewer who has never seen CoreOps clones the repo and runs an example in under 5 minutes. + +### Incremental Delivery + +1. MVP (Setup + Foundational + US1) → first valuable increment, addresses the v2.0.0 broken-examples regression. +2. Add US3 → stateless apply with provenance preservation. Critical for the recovery and CI workflows in spec.md. +3. Add US2 → non-git authoring scaffolds and stateless-to-init'd transition test. Mostly tests, no new implementation. +4. Add US4 → synthesis table population. Closes the validation iteration's evidence loop. +5. Polish → stale-doc cleanup, release governance, privacy gate, quickstart validation. + +### Parallel Team Strategy + +- Developer A: Phase 2 foundational (T006–T015), unblocks everyone. +- Developer B: After T015 lands, takes US1 example authoring (T023–T027 in parallel). +- Developer C: After T015 lands, takes US3 stateless apply tests (T033–T036). +- Developer D: After all US1 README authoring lands, takes US4 synthesis review (T037–T039). +- Polish phase shared at the end. + +--- + +## Notes + +- **Tests are mandatory** per FR-006, FR-016, and the spec's Constitution Alignment. The VM-backed-scenario exemption is recorded in spec.md and plan.md per Principle 10. +- **Tick off `- [X]` per task as it ships** per `feedback_speckit_tasks_checklist.md` — not batched at session end. Keep `tasks.md` in sync with git log. +- **Conventional commit messages** per `feedback_commit_style.md`: `feat(scope): subject` for code, `docs(scope): subject` for docs, `test(scope): subject` for tests. No `[Spec Kit]` prefix. +- **`cargo clippy --all-targets -- -D warnings` after every Rust-touching task** per `feedback_clippy.md`, before commit. +- **`--source-repo` flag naming is locked** per spec.md and contracts/cli-flag.md. Do not bikeshed. +- **No real-world ulthar values in `examples/`** per FR-009. Privacy gate at T049. +- **License hygiene**: write own Quadlet equivalents inspired by upstream sources. Cite upstream in each example README under a `## Sources` heading. Do NOT copy YAML blocks verbatim (research.md D5). +- **Spec/016 example removal happens BEFORE spec/017 examples are authored** (T001 in Phase 1) so `tests/integration/source_repo_support.rs:20` doesn't reference deleted dirs while implementation is in flight. +- **Self-escalation row in synthesis table** (T038) is the only `A`-classified-with-absorption row allowed; any other in-scope absorption is scope creep. +- **Avoid**: vague tasks, same-file conflicts, cross-story implementation dependencies that break independence. diff --git a/src/cli/apply.rs b/src/cli/apply.rs index d056997..bd6f3e9 100644 --- a/src/cli/apply.rs +++ b/src/cli/apply.rs @@ -827,6 +827,191 @@ pub fn execute_rollback_with_report( }) } +/// Stateless apply entry point (spec/017): converges host state from a +/// filesystem-resident source-repo without consulting or mutating the +/// persisted controller state at `/var/lib/core-ops/status.json`. +/// +/// Per FR-013 and SC-009, stateless apply MUST NOT mutate the +/// init'd-mode persisted `desired_state.repository` / +/// `desired_state.requested_ref`. This function achieves that by +/// performing zero state-file I/O — no `persist_in_progress_state`, +/// no `persist_finished_state`, no deterministic-state writes. The +/// audit chain (emitted by the caller) is the persisted record. +/// +/// `source` carries the canonical path plus path-based provenance +/// strings produced by [`crate::io::source_ref::detect_provenance`]. +pub fn apply_with_report_stateless( + source: &crate::io::source_ref::StatelessSource, + quadlet_dir: &Path, + reload_systemd: bool, +) -> Result { + let repo_path = source.repo_path.clone(); + let requested_repository = source.requested_repository.clone(); + let requested_ref = source.requested_ref.clone(); + let deps = ReconcileDependencies { + load_desired: &|| { + crate::io::repo::load_desired_state_from_path( + &repo_path, + &requested_repository, + &requested_ref, + ) + // Per `contracts/cli-flag.md` Error semantics: a path that + // exists as a directory but is not a spec/016-conformant + // source-repo (missing services/, legacy artifacts, etc.) + // exits 65 (`EX_DATAERR`). Thread the documented exit code + // through `CoreError.exit_code` so automation can classify + // by status alone. + .map_err(|err| { + CoreError::with_exit_code(FailureClass::Plan, err.to_string(), 65) + }) + }, + read_observed: &|desired| { + read_observed_state(quadlet_dir, Some(desired), None).map_err(map_plan_error) + }, + apply_plan: &|plan, desired| { + apply_plan_with_desired(plan, desired, quadlet_dir, reload_systemd) + .map(|_| ()) + .map_err(map_apply_error) + }, + }; + + let plan_result = reconcile_plan(&deps)?; + let observed_before = (deps.read_observed)(&plan_result.desired)?; + let scope_id = scope_id_for_observed(&observed_before); + let desired_snapshot = build_desired_snapshot_from_state(&plan_result.desired, &scope_id); + let observed_snapshot = + build_observed_snapshot(&observed_before, Some(&plan_result.desired), &scope_id); + let verification_results_before = normalize_verification_results_for_desired( + &plan_result.desired, + verify_state(&plan_result.desired, &observed_before), + ); + // Stateless mode has no last_applied baseline (init'd state is + // intentionally not consulted). Treat as FirstRun semantically. + let mut deterministic = reconcile_deterministic_plan_with_runtime( + &desired_snapshot, + None, + &observed_snapshot, + &verification_results_before, + )? + .plan; + deterministic.requested_repository = plan_result.desired.requested_repository.clone(); + deterministic.requested_ref = plan_result.desired.requested_ref.clone(); + + let result = reconcile_apply_with_retry(&deps, DEFAULT_RETRY_BUDGET)?; + if result + .desired + .mount_declarations + .iter() + .any(|mount| mount.automount) + { + deterministic.scope_id = scope_id.clone(); + } + let run_display_state = if observed_snapshot.objects.is_empty() { + ApplyRunDisplayState::FirstRun + } else { + ApplyRunDisplayState::Recovery + }; + let human_report = format_apply_output_report( + &deterministic, + &result.verification_results, + result.convergence.as_ref(), + ApplyHumanMode::Default, + run_display_state, + ); + let verbose_report = format_apply_output_report( + &deterministic, + &result.verification_results, + result.convergence.as_ref(), + ApplyHumanMode::Verbose, + run_display_state, + ); + let machine_report = format_apply_output_json( + &deterministic, + &result.verification_results, + result.convergence.as_ref(), + ); + let result_view = build_result_output( + &deterministic, + &result.verification_results, + result.convergence.as_ref(), + ); + let result_report = format_result_output_report(&result_view); + let result_machine_report = format_result_output_json(&result_view); + + Ok(ApplyReportBundle { + result, + human_report, + verbose_report, + machine_report, + result_report, + result_machine_report, + plan: plan_result.plan, + }) +} + +/// Build a synthetic `PersistedProvenanceState` for stateless apply +/// so that the audit event surfaces path-based provenance plus the +/// actual run outcome (success / failure) without consulting any +/// persisted `/var/lib/core-ops/status.json`. +/// +/// `revision_id` is the resolved desired-state revision (a SHA, a +/// `(stateless)` / `(stateless+dirty)` sentinel, or a synthetic id +/// from `load_desired_state_from_path`); `run_status` is the apply +/// outcome. Together they populate the audit event's +/// `reconciliation_status`, `attempted_revision`, and +/// `applied_revision` fields with values that reflect what actually +/// happened — fixing the prior bug where every stateless apply +/// emitted `reconciliation_status = "never_run"`. +pub fn synthetic_stateless_provenance( + requested_repository: &str, + requested_ref: &str, + revision_id: &str, + run_status: RunStatus, +) -> crate::core::types::PersistedProvenanceState { + use crate::core::types::{ + ControllerProvenance, DesiredStateProvenance, PersistedProvenanceState, + ReconciliationProvenance, ReconciliationStatus, TreeState, + PERSISTED_PROVENANCE_SCHEMA_VERSION, + }; + let reconciliation_status = match run_status { + RunStatus::Success => ReconciliationStatus::Success, + RunStatus::Failure => ReconciliationStatus::Failed, + }; + let last_applied_revision = match run_status { + RunStatus::Success => Some(revision_id.to_string()), + RunStatus::Failure => None, + }; + PersistedProvenanceState { + schema_version: PERSISTED_PROVENANCE_SCHEMA_VERSION, + controller: ControllerProvenance { + version: None, + revision: None, + build_time: None, + tree_state: TreeState::Unknown, + }, + desired_state: DesiredStateProvenance { + repository: requested_repository.to_string(), + requested_ref: requested_ref.to_string(), + last_observed_revision: Some(revision_id.to_string()), + last_observed_at: None, + layout_version: Some("1".to_string()), + }, + reconciliation: ReconciliationProvenance { + // Stateless mode has no prior generation context — this + // is a single ad-hoc run, semantically generation 1. + generation: 1, + status: reconciliation_status, + running: false, + last_attempted_revision: Some(revision_id.to_string()), + last_applied_revision, + last_started_at: None, + last_finished_at: None, + attempted_observed_divergence: None, + }, + detached: false, + } +} + fn classify_apply_run_display_state( last_applied_revision: Option<&str>, observed_snapshot: &crate::core::types::NormalizedSnapshot, @@ -929,15 +1114,9 @@ fn default_host_scope_id() -> Option { } fn map_plan_error(err: E) -> CoreError { - CoreError { - class: FailureClass::Plan, - message: err.to_string(), - } + CoreError::new(FailureClass::Plan, err.to_string()) } fn map_apply_error(err: E) -> CoreError { - CoreError { - class: FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(FailureClass::Apply, err.to_string()) } diff --git a/src/cli/args.rs b/src/cli/args.rs index 59a3067..8dcb683 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -15,9 +15,18 @@ Use --force to overwrite existing configuration or recover from a corrupt state const PLAN_AFTER_HELP: &str = "Examples: core-ops plan core-ops plan --host edge-01 + core-ops plan --source-repo ./my-repo --host edge-01 -Requires prior initialization via 'core-ops init'. Repository and ref are -sourced exclusively from persisted controller configuration. +Init'd mode (default, no --source-repo): requires prior initialization +via 'core-ops init'. Repository and ref are sourced exclusively from +persisted controller configuration. + +Stateless mode (--source-repo ): sources desired state from the +filesystem directory at , bypassing the persisted controller +configuration written by 'core-ops init'. Requires --host. Writes +nothing to /var/lib/core-ops/. Honors --audit-dir when explicitly set. +For long-lived tracking, run 'core-ops init ' once and omit +--source-repo on subsequent invocations. Human-readable plan headers keep the immutable target revision primary and render a meaningful requested ref secondarily, for example: @@ -29,9 +38,19 @@ const APPLY_AFTER_HELP: &str = "Examples: core-ops apply --verbose core-ops apply --rollback-to rev-1 core-ops apply --rollback-to rev-1 --rollback-plan-only + core-ops apply --source-repo ./my-repo --host edge-01 + +Init'd mode (default, no --source-repo): requires prior initialization +via 'core-ops init'. Repository and ref are sourced exclusively from +persisted controller configuration. -Requires prior initialization via 'core-ops init'. Repository and ref are -sourced exclusively from persisted controller configuration. +Stateless mode (--source-repo ): converges host state from the +filesystem directory at , bypassing the persisted controller +configuration written by 'core-ops init'. Requires --host. Writes audit +records but does not mutate persisted controller state — the +init'd configuration's desired_state.* fields are preserved byte-identical. +For long-lived tracking, run 'core-ops init ' once and omit +--source-repo on subsequent invocations. Deterministic reconciliation uses desired, last_applied, and actual state. Automatic retry is bounded; repeated failure or oscillation is surfaced in the @@ -52,12 +71,19 @@ and audit flows."; const EXPLAIN_AFTER_HELP: &str = "Examples: core-ops explain container/frontend.container core-ops explain mount/var-lib-demo.mount --json + core-ops explain --source-repo ./my-repo --host edge-01 caddy.container Explain output inspects a single known managed object using the authoritative plan/result model and renders full dependency and metadata context. -Requires prior initialization via 'core-ops init'. Repository and ref are -sourced exclusively from persisted controller configuration."; +Init'd mode (default, no --source-repo): requires prior initialization +via 'core-ops init'. Repository and ref are sourced exclusively from +persisted controller configuration. + +Stateless mode (--source-repo ): inspects the directory at +without consulting persisted state. Requires --host. Pure-read; writes +nothing anywhere. For long-lived tracking, run 'core-ops init ' +once and omit --source-repo on subsequent invocations."; const GLOBAL_AFTER_HELP: &str = "License: GNU Affero General Public License version 3 or later (AGPLv3+)"; @@ -117,6 +143,13 @@ pub struct PlanArgs { /// Host identity override for selecting hosts/, including host-specific mount overrides. #[arg(long)] pub host: Option, + /// Use a filesystem path as the source of desired state, bypassing the + /// persisted controller configuration written by 'core-ops init'. + /// Requires --host. The init'd mode (no flag) sources from persisted + /// state set by 'core-ops init '. Writes nothing under + /// /var/lib/core-ops/. Honors --audit-dir when explicitly set. + #[arg(long, value_name = "PATH", requires = "host")] + pub source_repo: Option, /// System-level Quadlet directory. #[arg(long, default_value = "/etc/containers/systemd")] pub quadlet_dir: PathBuf, @@ -139,6 +172,15 @@ pub struct ApplyArgs { /// Host identity override for selecting hosts/, including host-specific mount overrides. #[arg(long)] pub host: Option, + /// Use a filesystem path as the source of desired state, bypassing the + /// persisted controller configuration written by 'core-ops init'. + /// Requires --host. The init'd configuration's desired_state.* fields + /// are preserved byte-identical. Audit records are written; the canonical + /// /var/lib/core-ops/status.json is never mutated by stateless apply. + /// For long-lived tracking, run 'core-ops init ' once and + /// omit --source-repo on subsequent invocations. + #[arg(long, value_name = "PATH", requires = "host")] + pub source_repo: Option, /// System-level Quadlet directory. #[arg(long, default_value = "/etc/containers/systemd")] pub quadlet_dir: PathBuf, @@ -248,6 +290,13 @@ pub struct ExplainArgs { /// Host identity override for selecting hosts/. #[arg(long)] pub host: Option, + /// Use a filesystem path as the source of desired state, bypassing the + /// persisted controller configuration written by 'core-ops init'. + /// Requires --host. Pure-read; writes nothing anywhere. For long-lived + /// tracking, run 'core-ops init ' once and omit + /// --source-repo on subsequent invocations. + #[arg(long, value_name = "PATH", requires = "host")] + pub source_repo: Option, /// System-level Quadlet directory. #[arg(long, default_value = "/etc/containers/systemd")] pub quadlet_dir: PathBuf, @@ -261,9 +310,9 @@ pub struct ExplainArgs { #[cfg(test)] mod tests { - use super::{Cli, GLOBAL_AFTER_HELP}; + use super::{Cli, Commands, GLOBAL_AFTER_HELP}; use crate::build_info::{cli_license_notice, long_version_text}; - use clap::CommandFactory; + use clap::{CommandFactory, Parser}; #[test] fn long_version_includes_package_version() { @@ -286,4 +335,217 @@ mod tests { assert!(help.contains(cli_license_notice())); assert!(help.contains(GLOBAL_AFTER_HELP)); } + + // ---- spec/017: --source-repo flag parsing (FR-010..FR-016) ---- + + #[test] + fn plan_accepts_source_repo_with_host() { + let cli = Cli::try_parse_from([ + "core-ops", + "plan", + "--source-repo", + "/tmp/example", + "--host", + "edge-01", + ]) + .expect("plan should accept --source-repo with --host"); + match cli.command { + Commands::Plan(args) => { + assert_eq!( + args.source_repo.as_deref(), + Some(std::path::Path::new("/tmp/example")) + ); + assert_eq!(args.host.as_deref(), Some("edge-01")); + } + _ => panic!("expected Plan subcommand"), + } + } + + #[test] + fn apply_accepts_source_repo_with_host() { + let cli = Cli::try_parse_from([ + "core-ops", + "apply", + "--source-repo", + "/tmp/example", + "--host", + "edge-01", + ]) + .expect("apply should accept --source-repo with --host"); + match cli.command { + Commands::Apply(args) => { + assert_eq!( + args.source_repo.as_deref(), + Some(std::path::Path::new("/tmp/example")) + ); + assert_eq!(args.host.as_deref(), Some("edge-01")); + } + _ => panic!("expected Apply subcommand"), + } + } + + #[test] + fn explain_accepts_source_repo_with_host() { + let cli = Cli::try_parse_from([ + "core-ops", + "explain", + "--source-repo", + "/tmp/example", + "--host", + "edge-01", + "caddy.container", + ]) + .expect("explain should accept --source-repo with --host"); + match cli.command { + Commands::Explain(args) => { + assert_eq!( + args.source_repo.as_deref(), + Some(std::path::Path::new("/tmp/example")) + ); + assert_eq!(args.host.as_deref(), Some("edge-01")); + assert_eq!(args.object, "caddy.container"); + } + _ => panic!("expected Explain subcommand"), + } + } + + #[test] + fn plan_source_repo_without_host_errors() { + let err = Cli::try_parse_from([ + "core-ops", + "plan", + "--source-repo", + "/tmp/example", + ]) + .expect_err("plan --source-repo without --host should error"); + let msg = err.to_string(); + assert!( + msg.contains("--host") || msg.contains("host"), + "error should mention --host requirement: {msg}" + ); + } + + #[test] + fn apply_source_repo_without_host_errors() { + let err = Cli::try_parse_from([ + "core-ops", + "apply", + "--source-repo", + "/tmp/example", + ]) + .expect_err("apply --source-repo without --host should error"); + let msg = err.to_string(); + assert!( + msg.contains("--host") || msg.contains("host"), + "error should mention --host requirement: {msg}" + ); + } + + #[test] + fn explain_source_repo_without_host_errors() { + let err = Cli::try_parse_from([ + "core-ops", + "explain", + "--source-repo", + "/tmp/example", + "caddy.container", + ]) + .expect_err("explain --source-repo without --host should error"); + let msg = err.to_string(); + assert!( + msg.contains("--host") || msg.contains("host"), + "error should mention --host requirement: {msg}" + ); + } + + #[test] + fn init_rejects_source_repo() { + let err = Cli::try_parse_from([ + "core-ops", + "init", + "--source-repo", + "/tmp/example", + "/repo", + "main", + ]) + .expect_err("init must reject --source-repo"); + assert!( + err.to_string().contains("--source-repo") + || err.to_string().contains("unexpected"), + "expected unexpected-argument error: {err}" + ); + } + + #[test] + fn agent_rejects_source_repo() { + let err = Cli::try_parse_from([ + "core-ops", + "agent", + "--source-repo", + "/tmp/example", + ]) + .expect_err("agent must reject --source-repo"); + assert!( + err.to_string().contains("--source-repo") + || err.to_string().contains("unexpected"), + "expected unexpected-argument error: {err}" + ); + } + + #[test] + fn status_rejects_source_repo() { + let err = Cli::try_parse_from([ + "core-ops", + "status", + "--source-repo", + "/tmp/example", + ]) + .expect_err("status must reject --source-repo"); + assert!( + err.to_string().contains("--source-repo") + || err.to_string().contains("unexpected"), + "expected unexpected-argument error: {err}" + ); + } + + #[test] + fn plan_help_documents_source_repo_contract() { + let mut command = Cli::command(); + let plan_command = command.find_subcommand_mut("plan").expect("plan subcommand"); + let help = plan_command.render_long_help().to_string(); + assert!(help.contains("--source-repo"), "plan --help missing --source-repo"); + assert!(help.contains("--host"), "plan --help missing --host requirement"); + assert!( + help.contains("init"), + "plan --help missing init pointer per FR-016 contract" + ); + } + + #[test] + fn apply_help_documents_source_repo_contract() { + let mut command = Cli::command(); + let apply_command = command.find_subcommand_mut("apply").expect("apply subcommand"); + let help = apply_command.render_long_help().to_string(); + assert!(help.contains("--source-repo"), "apply --help missing --source-repo"); + assert!(help.contains("--host"), "apply --help missing --host requirement"); + assert!( + help.contains("init"), + "apply --help missing init pointer per FR-016 contract" + ); + } + + #[test] + fn explain_help_documents_source_repo_contract() { + let mut command = Cli::command(); + let explain_command = command + .find_subcommand_mut("explain") + .expect("explain subcommand"); + let help = explain_command.render_long_help().to_string(); + assert!(help.contains("--source-repo"), "explain --help missing --source-repo"); + assert!(help.contains("--host"), "explain --help missing --host requirement"); + assert!( + help.contains("init"), + "explain --help missing init pointer per FR-016 contract" + ); + } } diff --git a/src/cli/explain.rs b/src/cli/explain.rs index 7b3d47f..8c0900e 100644 --- a/src/cli/explain.rs +++ b/src/cli/explain.rs @@ -118,6 +118,53 @@ pub fn explain( }) } +/// Stateless `core-ops explain --source-repo` entry point (spec/017). +/// +/// Mirrors [`explain`] but does NOT consult persisted controller +/// state — no `last_applied_revision_from_state()` call, no +/// `last_applied_snapshot_for_scope()` deterministic-state read. +/// This honors the FR-011a / clarification Q5 contract: stateless +/// explain is pure-read and writes nothing anywhere; equally, it +/// reads nothing under `/var/lib/core-ops/`. Output is identical +/// regardless of whether the host has any prior init'd state. +pub fn explain_stateless( + deps: &ReconcileDependencies<'_>, + object_selector: &str, +) -> Result { + let result = reconcile_plan(deps)?; + let observed = (deps.read_observed)(&result.desired)?; + let scope_id = scope_id_for_observed(&observed); + let desired_snapshot = build_desired_snapshot_from_state(&result.desired, &scope_id); + let observed_snapshot = build_observed_snapshot(&observed, Some(&result.desired), &scope_id); + let verification_results = normalize_verification_results_for_desired( + &result.desired, + verify_state(&result.desired, &observed), + ); + let mut deterministic = reconcile_deterministic_plan_with_runtime( + &desired_snapshot, + // Stateless mode has no last_applied baseline by design + // (init'd state is never read or mutated, FR-013 / SC-009). + None, + &observed_snapshot, + &verification_results, + )? + .plan; + deterministic.requested_repository = result.desired.requested_repository.clone(); + deterministic.requested_ref = result.desired.requested_ref.clone(); + let explain = + build_explain_output(&deterministic, &verification_results, None, object_selector) + .ok_or_else(|| { + CoreError::new( + crate::core::types::FailureClass::Plan, + format!("managed object not found: {object_selector}"), + ) + })?; + Ok(ExplainCommandOutput { + human: format_explain_output_report(&explain), + machine: format_explain_output_json(&explain), + }) +} + fn scope_id_for_observed(observed: &crate::core::types::ObservedState) -> String { observed .host_info diff --git a/src/cli/plan.rs b/src/cli/plan.rs index f1a94a5..a5f5f4a 100644 --- a/src/cli/plan.rs +++ b/src/cli/plan.rs @@ -100,6 +100,73 @@ pub fn plan(deps: &ReconcileDependencies<'_>, verbose: bool) -> Result, + verbose: bool, +) -> Result { + let result = reconcile_plan(deps)?; + let observed = (deps.read_observed)(&result.desired)?; + let scope_id = scope_id_for_observed(&observed); + let desired_snapshot = build_desired_snapshot_from_state(&result.desired, &scope_id); + let observed_snapshot = build_observed_snapshot(&observed, Some(&result.desired), &scope_id); + let verification_results = normalize_verification_results_for_desired( + &result.desired, + verify_state(&result.desired, &observed), + ); + // Stateless mode has no last_applied baseline by design (init'd + // state is never read, FR-013 / SC-009). Treat as FirstRun for + // header-rendering purposes — the source path is always the + // primary identifier in the rendered header. + let run_display_state = if observed_snapshot.objects.is_empty() { + ApplyRunDisplayState::FirstRun + } else { + ApplyRunDisplayState::Recovery + }; + let mut deterministic = reconcile_deterministic_plan_with_runtime( + &desired_snapshot, + None, + &observed_snapshot, + &verification_results, + )?; + deterministic.plan.requested_repository = result.desired.requested_repository.clone(); + deterministic.plan.requested_ref = result.desired.requested_ref.clone(); + let diffs = result.diffs; + let mut audit = build_audit_record(&result.run.run_id, diffs.clone(), &result.plan, Vec::new()); + audit + .operator_messages + .push(summarize_evaluation(&result.desired)); + if !result.desired.mount_declarations.is_empty() { + audit.operator_messages.push(format!( + "mounts: native-artifacts={}, dependencies={}", + result.desired.mount_declarations.len(), + result.desired.mount_dependencies.len() + )); + } + let event = build_audit_event(&result.run, Some(&result.plan), &[], None); + let summary = format_deterministic_plan_report_with_options_and_state( + &deterministic.plan, + verbose, + run_display_state, + ); + Ok(PlanOutput { + summary, + machine: format_deterministic_plan_json(&deterministic.plan), + audit_record: audit, + audit_event: event, + }) +} + pub fn render_deterministic_plan( plan: &crate::core::types::DeterministicReconciliationPlan, ) -> DeterministicPlanOutput { diff --git a/src/cli/status.rs b/src/cli/status.rs index 04ba6d4..2cb29a8 100644 --- a/src/cli/status.rs +++ b/src/cli/status.rs @@ -299,6 +299,13 @@ fn meaningful_requested_ref<'a>(target: &str, requested_ref: Option<&'a str>) -> } fn short_revision(revision: &str) -> &str { + // spec/017 stateless-mode sentinels (`(stateless)`, `(stateless+dirty)`) + // begin with `(`, which is invalid in git ref names per + // `git check-ref-format`. Preserve them verbatim instead of + // truncating to 8 chars (which would cut to `(statele`). + if revision.starts_with('(') { + return revision; + } &revision[..revision.len().min(8)] } diff --git a/src/core/errors.rs b/src/core/errors.rs index 45c149c..80cdb52 100644 --- a/src/core/errors.rs +++ b/src/core/errors.rs @@ -9,6 +9,12 @@ use thiserror::Error; pub struct CoreError { pub class: FailureClass, pub message: String, + /// Optional process exit code override. `None` falls back to `1` + /// in `main`. Set explicitly by error sites that have a documented + /// exit-code contract — e.g., `--source-repo` path-shape errors + /// (`contracts/cli-flag.md` Error semantics: 64 / 65 / 66). + #[doc(hidden)] + pub exit_code: Option, } impl CoreError { @@ -16,6 +22,15 @@ impl CoreError { Self { class, message: message.into(), + exit_code: None, + } + } + + pub fn with_exit_code(class: FailureClass, message: impl Into, exit_code: i32) -> Self { + Self { + class, + message: message.into(), + exit_code: Some(exit_code), } } diff --git a/src/io/mod.rs b/src/io/mod.rs index b1d6922..5dff2d5 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -7,6 +7,7 @@ pub mod observed; pub mod quadlet; pub mod release_governance; pub mod repo; +pub mod source_ref; pub mod state; pub mod systemd; pub mod verification_artifacts; diff --git a/src/io/repo.rs b/src/io/repo.rs index 6df4c11..36c057a 100644 --- a/src/io/repo.rs +++ b/src/io/repo.rs @@ -188,6 +188,51 @@ pub fn load_desired_state(repo_source: &str, revision_id: &str) -> Result`. The init'd-mode +/// loader retains its existing clone-then-checkout semantics. +/// +/// Unlike init'd mode, this loader does not assume `repo_path` is a +/// git working tree — `revision_id` is derived from `requested_ref` +/// directly (it carries the SHA when the source is a clean git +/// checkout, or a sentinel otherwise) so non-git directories load +/// without a `git rev-parse HEAD` call (FR-015). +pub fn load_desired_state_from_path( + repo_path: &Path, + requested_repository: &str, + requested_ref: &str, +) -> Result { + if !repo_path.exists() { + return Err(RepoError::InvalidRepoSource( + repo_path.display().to_string(), + )); + } + if !repo_path.is_dir() { + return Err(RepoError::InvalidRepoSource( + repo_path.display().to_string(), + )); + } + validate_no_legacy_root_artifacts(repo_path)?; + let services_dir = repo_path.join("services"); + if !services_dir.exists() { + return Err(RepoError::MissingServicesDir(services_dir)); + } + load_layered_desired_state_with_revision( + repo_path, + requested_repository, + requested_ref, + Some(requested_ref.to_string()), + ) +} + pub fn load_layered_repo(repo_source: &str, revision_id: &str) -> Result { let temp = TempDir::new().map_err(|err| RepoError::GitCloneFailed(err.to_string()))?; if looks_like_url(repo_source) { @@ -249,6 +294,20 @@ fn load_layered_desired_state( repo_path: &Path, requested_repository: &str, requested_ref: &str, +) -> Result { + load_layered_desired_state_with_revision( + repo_path, + requested_repository, + requested_ref, + None, + ) +} + +fn load_layered_desired_state_with_revision( + repo_path: &Path, + requested_repository: &str, + requested_ref: &str, + revision_override: Option, ) -> Result { let services_dir = repo_path.join("services"); let hosts_dir = repo_path.join("hosts"); @@ -295,7 +354,10 @@ fn load_layered_desired_state( ) .map_err(|err| RepoError::ValidationFailed(err.to_string()))?; let workloads = workloads_from_evaluation(&output); - let resolved_revision = resolved_head_revision(repo_path)?; + let resolved_revision = match revision_override { + Some(value) => value, + None => resolved_head_revision(repo_path)?, + }; Ok(desired_state_from_workloads( repo_path, DesiredStateInputs { diff --git a/src/io/source_ref.rs b/src/io/source_ref.rs new file mode 100644 index 0000000..1a9b513 --- /dev/null +++ b/src/io/source_ref.rs @@ -0,0 +1,445 @@ +//! Stateless `--source-repo` provenance detection (spec/017). +//! +//! Used by `core-ops plan/apply/explain --source-repo ` to bypass +//! the persisted controller configuration written by `core-ops init` and +//! source desired state directly from a filesystem directory. +//! +//! Records path-based provenance per FR-013 + 2026-05-05 clarification Q3: +//! +//! | Source path state | `requested_ref` | +//! |------------------------------------------------|-------------------------| +//! | Non-git directory | `(stateless)` | +//! | Git working tree, dirty (`status --porcelain`) | `(stateless+dirty)` | +//! | Git working tree, clean at HEAD | `` | +//! +//! Sentinels begin with `(`, which is invalid in a git ref name per +//! `git check-ref-format`, so they cannot collide with real refs. +//! +//! Implementation shells out to the `git` binary via `std::process::Command`, +//! mirroring the established pattern at `src/cli/init.rs` and +//! `src/io/repo.rs`. No new runtime dependency on `git2` or similar. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +/// Path-based source-of-truth identifier carrying provenance for +/// stateless mode. Constructed by [`detect_provenance`]. +#[derive(Clone, Debug)] +pub struct StatelessSource { + /// Canonicalized, symlink-resolved absolute path to the source-repo. + pub repo_path: PathBuf, + /// Stringified `repo_path`, recorded as `desired_state.repository` + /// in audit + provenance. Always begins with `/` so it is + /// unambiguously distinguishable from a git URL (which contains + /// `:` for `https://` or `user@host:`). + pub requested_repository: String, + /// Either a full 40-char SHA hex (clean git checkout at HEAD), + /// `(stateless+dirty)` (dirty git working tree), or `(stateless)` + /// (non-git directory). Recorded as `desired_state.requested_ref` + /// in audit + provenance. Sentinels disambiguated by the leading + /// `(` character. + pub requested_ref: String, +} + +/// Errors surfaced from path-shaped validation in stateless mode. +/// Layout/parser errors continue to bubble up via `RepoError` and +/// surface with their existing exit-code mapping. +#[derive(Debug)] +pub enum SourceRefError { + /// `--source-repo ` does not exist on the filesystem + /// (`std::io::ErrorKind::NotFound` from `fs::metadata`). Mapped + /// to exit code 64 (`EX_USAGE`) per `contracts/cli-flag.md`. + PathMissing(PathBuf), + /// `--source-repo ` exists but is not a directory. + /// Mapped to exit code 64 (`EX_USAGE`). + PathNotDirectory(PathBuf), + /// Path metadata inspection failed for a non-`NotFound` reason + /// (typically `PermissionDenied` or other I/O error). Distinct + /// from `PathMissing` so automation can tell "directory does + /// not exist" from "directory exists but the controller cannot + /// inspect it". Mapped to exit code 66. + PathInaccessible { path: PathBuf, source: std::io::Error }, + /// Path canonicalization failed (e.g., symlink loop, insufficient + /// permissions on an intermediate component). Mapped to exit + /// code 66. + Canonicalize { path: PathBuf, source: std::io::Error }, +} + +impl SourceRefError { + /// Process exit code per `contracts/cli-flag.md` Error semantics + /// table. 64 = `EX_USAGE`, 65 = `EX_DATAERR`, 66 = path-shape. + pub fn exit_code(&self) -> i32 { + match self { + SourceRefError::PathMissing(_) | SourceRefError::PathNotDirectory(_) => 64, + SourceRefError::PathInaccessible { .. } | SourceRefError::Canonicalize { .. } => 66, + } + } +} + +impl std::fmt::Display for SourceRefError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SourceRefError::PathMissing(path) => { + write!(f, "--source-repo path does not exist: {}", path.display()) + } + SourceRefError::PathNotDirectory(path) => { + write!(f, "--source-repo path is not a directory: {}", path.display()) + } + SourceRefError::PathInaccessible { path, source } => write!( + f, + "--source-repo path could not be accessed: {}: {}", + path.display(), + source + ), + SourceRefError::Canonicalize { path, source } => write!( + f, + "--source-repo path could not be canonicalized: {}: {}", + path.display(), + source + ), + } + } +} + +impl std::error::Error for SourceRefError {} + +/// Detect path-based provenance for the stateless `--source-repo` flag. +/// +/// Validates `path` is an existing directory, canonicalizes it, then +/// classifies its git state. Returns a [`StatelessSource`] carrying +/// the canonical path and the resolved `requested_ref` value. +/// +/// Uses `fs::metadata` rather than `Path::exists()` / `Path::is_dir()` +/// so I/O errors (most commonly `PermissionDenied`) surface as +/// `PathInaccessible` instead of being collapsed to `PathMissing`. +/// Automation that distinguishes "does not exist" from "exists but +/// inaccessible" reads the documented exit code (64 vs 66). +/// +/// See module-level docs for the git-state classification table. +pub fn detect_provenance(path: &Path) -> Result { + let metadata = match std::fs::metadata(path) { + Ok(meta) => meta, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + return Err(SourceRefError::PathMissing(path.to_path_buf())); + } + Err(err) => { + return Err(SourceRefError::PathInaccessible { + path: path.to_path_buf(), + source: err, + }); + } + }; + if !metadata.is_dir() { + return Err(SourceRefError::PathNotDirectory(path.to_path_buf())); + } + let canonical = std::fs::canonicalize(path).map_err(|err| SourceRefError::Canonicalize { + path: path.to_path_buf(), + source: err, + })?; + let requested_repository = canonical.to_string_lossy().into_owned(); + let requested_ref = match classify_git_state(&canonical) { + GitClassification::NotGit => "(stateless)".to_string(), + GitClassification::Dirty => "(stateless+dirty)".to_string(), + GitClassification::Clean(sha) => sha, + GitClassification::ProbeFailed(reason) => { + // Per `contracts/cli-flag.md` Error semantics: probe + // failures (`git` missing, subprocess error, unexpected + // non-zero exit downstream of a positive + // `is-inside-work-tree`) fall back to `(stateless)` + // BUT emit a stderr warning so operators don't + // silently lose the distinction between an actually + // non-git source and a degraded probe. + eprintln!( + "warning: git ref detection failed for {}: {}; recording as non-git source", + canonical.display(), + reason + ); + "(stateless)".to_string() + } + }; + Ok(StatelessSource { + repo_path: canonical, + requested_repository, + requested_ref, + }) +} + +/// Outcome of probing the git state at a stateless `--source-repo` +/// path. Used by [`detect_provenance`] to map to the documented +/// `requested_ref` value (`(stateless)` / `(stateless+dirty)` / +/// 40-char SHA) plus emit a stderr warning when the probe degraded. +enum GitClassification { + /// `git` ran successfully and confirmed the path is not inside a + /// work tree. No warning emitted — this is the canonical + /// non-git stateless source. + NotGit, + /// `git` ran successfully and the working tree is clean at the + /// returned 40-char HEAD SHA. + Clean(String), + /// `git` ran successfully and the working tree has uncommitted + /// changes (modified / added / deleted / untracked). + Dirty, + /// A git subprocess failed in a way that prevents classification + /// (binary missing, unexpected non-zero exit downstream of a + /// positive `is-inside-work-tree`, malformed output). Carries + /// a short reason for the operator-facing warning. Per + /// `contracts/cli-flag.md` the caller falls back to `(stateless)`. + ProbeFailed(String), +} + +fn classify_git_state(path: &Path) -> GitClassification { + match is_inside_work_tree(path) { + Ok(false) => GitClassification::NotGit, + Err(reason) => GitClassification::ProbeFailed(reason), + Ok(true) => match working_tree_clean(path) { + Ok(false) => GitClassification::Dirty, + Err(reason) => GitClassification::ProbeFailed(reason), + Ok(true) => match head_sha(path) { + Ok(sha) => GitClassification::Clean(sha), + Err(reason) => GitClassification::ProbeFailed(reason), + }, + }, + } +} + +/// Probe whether `path` is inside a git work tree. +/// +/// `Ok(true)` — `git rev-parse --is-inside-work-tree` exited 0 +/// with stdout `true`. +/// `Ok(false)` — `git` ran successfully and definitively reported +/// "not a git repository" (the canonical non-git +/// case), or exit 0 with stdout `false`. +/// `Err(reason)` — the probe failed in a way that does NOT prove +/// `path` is not a git repo: the `git` binary failed +/// to spawn (missing from `$PATH`, fork error), or +/// git ran but exited non-zero for an unrecognized +/// reason (corrupt `.git/HEAD`, permission error +/// reading `.git/`, locked index, etc.). The caller +/// emits a stderr warning before falling back to +/// `(stateless)`. +/// +/// The stderr content is inspected to keep the canonical +/// "not a git repository" path warning-free while surfacing the +/// damaged-repo case (which would otherwise masquerade as a clean +/// non-git directory). +fn is_inside_work_tree(path: &Path) -> Result { + let output = Command::new("git") + .arg("-C") + .arg(path) + .args(["rev-parse", "--is-inside-work-tree"]) + .output() + .map_err(|err| format!("`git rev-parse --is-inside-work-tree` could not be spawned: {err}"))?; + if output.status.success() { + return Ok(String::from_utf8_lossy(&output.stdout).trim() == "true"); + } + let stderr = String::from_utf8_lossy(&output.stderr); + // The canonical non-git case prints "fatal: not a git repository + // (or any parent up to ...)". Treat that as a definitive `Ok(false)` + // — no probe-failure warning. Anything else (corrupt HEAD, locked + // index, permission error inside `.git/`, etc.) is a probe failure. + if stderr.contains("not a git repository") { + return Ok(false); + } + Err(format!( + "`git rev-parse --is-inside-work-tree` exited non-zero: {}", + stderr.trim() + )) +} + +/// Probe whether the working tree at `path` is clean. +/// +/// `Ok(true)` — `git status --porcelain` succeeded with empty output. +/// `Ok(false)` — `git status --porcelain` succeeded with non-empty output. +/// `Err(reason)` — subprocess failed unexpectedly (binary missing, +/// non-zero exit, etc.). Surfaced as a warning by +/// the caller; per `research.md` D1 step 5 we still +/// fall back to `(stateless)` so probe failure is +/// not conflated with an actually-dirty tree. +/// +/// Passes `--untracked-files=normal` explicitly so the probe is +/// independent of `status.showUntrackedFiles` set anywhere in the +/// user's gitconfig levels. Without this override, a repo (or user) +/// with `status.showUntrackedFiles=no` would silently classify an +/// uncommitted authoring edit as clean and emit the parent commit's +/// SHA instead of `(stateless+dirty)` — exactly the operator state +/// stateless mode is meant to flag. +fn working_tree_clean(path: &Path) -> Result { + let output = Command::new("git") + .arg("-C") + .arg(path) + .args([ + "status", + "--porcelain", + "--untracked-files=normal", + "--", + ".", + ]) + .output() + .map_err(|err| format!("`git status --porcelain` could not be spawned: {err}"))?; + if !output.status.success() { + return Err(format!( + "`git status --porcelain` exited non-zero: {}", + String::from_utf8_lossy(&output.stderr).trim() + )); + } + Ok(output.stdout.is_empty()) +} + +fn head_sha(path: &Path) -> Result { + let output = Command::new("git") + .arg("-C") + .arg(path) + .args(["rev-parse", "HEAD"]) + .output() + .map_err(|err| format!("`git rev-parse HEAD` could not be spawned: {err}"))?; + if !output.status.success() { + return Err(format!( + "`git rev-parse HEAD` exited non-zero: {}", + String::from_utf8_lossy(&output.stderr).trim() + )); + } + let sha = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if sha.len() == 40 && sha.chars().all(|c| c.is_ascii_hexdigit()) { + Ok(sha) + } else { + Err(format!( + "`git rev-parse HEAD` returned unexpected output: {sha:?}" + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::process::Command as ProcessCommand; + use tempfile::TempDir; + + fn run_git(repo: &Path, args: &[&str]) { + let status = ProcessCommand::new("git") + .arg("-C") + .arg(repo) + .args(args) + .env("GIT_AUTHOR_NAME", "fixture") + .env("GIT_AUTHOR_EMAIL", "fixture@example.com") + .env("GIT_COMMITTER_NAME", "fixture") + .env("GIT_COMMITTER_EMAIL", "fixture@example.com") + .status() + .expect("git invocation"); + assert!(status.success(), "git {:?} failed", args); + } + + #[test] + fn non_git_directory_records_stateless_sentinel() { + let tmp = TempDir::new().expect("tempdir"); + let result = detect_provenance(tmp.path()).expect("detect"); + assert_eq!(result.requested_ref, "(stateless)"); + assert_eq!( + result.repo_path, + std::fs::canonicalize(tmp.path()).unwrap() + ); + assert!(result.requested_repository.starts_with('/')); + } + + #[test] + fn clean_git_checkout_records_full_sha() { + let tmp = TempDir::new().expect("tempdir"); + run_git(tmp.path(), &["init", "-q"]); + std::fs::write(tmp.path().join("README"), "fixture\n").expect("write"); + run_git(tmp.path(), &["add", "."]); + run_git(tmp.path(), &["commit", "-q", "-m", "fixture"]); + + let result = detect_provenance(tmp.path()).expect("detect"); + assert_eq!(result.requested_ref.len(), 40); + assert!(result.requested_ref.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn dirty_working_tree_records_stateless_dirty_sentinel() { + let tmp = TempDir::new().expect("tempdir"); + run_git(tmp.path(), &["init", "-q"]); + std::fs::write(tmp.path().join("README"), "fixture\n").expect("write"); + run_git(tmp.path(), &["add", "."]); + run_git(tmp.path(), &["commit", "-q", "-m", "fixture"]); + // Untracked file → status --porcelain is non-empty. + std::fs::write(tmp.path().join("scratch.txt"), "wip\n").expect("write"); + + let result = detect_provenance(tmp.path()).expect("detect"); + assert_eq!(result.requested_ref, "(stateless+dirty)"); + } + + #[test] + fn dirty_detection_overrides_repo_show_untracked_files_no() { + // Repo (or user) config can set `status.showUntrackedFiles=no`, + // which would normally make `git status --porcelain` skip + // untracked files. The probe MUST override that with + // `--untracked-files=normal` so authoring edits in a + // stateless source-repo are still classified dirty. + let tmp = TempDir::new().expect("tempdir"); + run_git(tmp.path(), &["init", "-q"]); + std::fs::write(tmp.path().join("README"), "fixture\n").expect("write"); + run_git(tmp.path(), &["add", "."]); + run_git(tmp.path(), &["commit", "-q", "-m", "fixture"]); + // Pin the regression: locally configure the repo to hide + // untracked files. Without the `--untracked-files=normal` + // override, the next probe would falsely report clean. + run_git( + tmp.path(), + &["config", "--local", "status.showUntrackedFiles", "no"], + ); + std::fs::write(tmp.path().join("scratch.txt"), "wip\n").expect("write"); + + let result = detect_provenance(tmp.path()).expect("detect"); + assert_eq!( + result.requested_ref, "(stateless+dirty)", + "untracked files MUST be detected even when the repo \ + configures status.showUntrackedFiles=no" + ); + } + + #[test] + fn missing_path_yields_path_missing_error() { + let tmp = TempDir::new().expect("tempdir"); + let nonexistent = tmp.path().join("does-not-exist"); + let err = detect_provenance(&nonexistent).expect_err("missing path"); + match err { + SourceRefError::PathMissing(_) => {} + other => panic!("expected PathMissing, got {other:?}"), + } + assert_eq!( + detect_provenance(&nonexistent).unwrap_err().exit_code(), + 64 + ); + } + + #[test] + fn file_path_yields_path_not_directory_error() { + let tmp = TempDir::new().expect("tempdir"); + let file_path = tmp.path().join("a-file"); + std::fs::write(&file_path, "x").expect("write"); + let err = detect_provenance(&file_path).expect_err("not directory"); + match err { + SourceRefError::PathNotDirectory(_) => {} + other => panic!("expected PathNotDirectory, got {other:?}"), + } + assert_eq!(detect_provenance(&file_path).unwrap_err().exit_code(), 64); + } + + #[test] + fn sentinels_cannot_collide_with_real_git_refs() { + // `(` is reserved per `git check-ref-format`. + for sentinel in ["(stateless)", "(stateless+dirty)"] { + let status = ProcessCommand::new("git") + .args(["check-ref-format", "--", sentinel]) + .status(); + // Either git rejects it or the binary is missing; both are + // acceptable — the invariant is that sentinels are not + // valid refs that consumers might mistake for SHAs. + if let Ok(status) = status { + assert!( + !status.success(), + "sentinel {sentinel} unexpectedly accepted as a git ref" + ); + } + } + } +} diff --git a/src/main.rs b/src/main.rs index 35d046e..2138550 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ use core_ops::io::state::{ read_persisted_state, resolve_state_file, CONTROLLER_BUILD_TIME_ENV, CONTROLLER_REVISION_ENV, CONTROLLER_TREE_STATE_ENV, CONTROLLER_VERSION_ENV, }; +use core_ops::io::source_ref::{detect_provenance, SourceRefError}; use core_ops::io::systemd::SYSTEMD_UNIT_DIR_ENV; use core_ops::io::{audit as audit_io, observed, repo}; use log::LevelFilter; @@ -29,9 +30,10 @@ fn main() { init_logging(); let cli = Cli::parse(); if let Err(err) = run(cli) { + let exit_code = err.exit_code.unwrap_or(1); let report = cli_common::report_error(err); eprintln!("{:?}", report); - std::process::exit(1); + std::process::exit(exit_code); } } @@ -43,7 +45,6 @@ fn run(cli: Cli) -> Result<(), CoreError> { Ok(()) } Commands::Plan(args) => { - let (repo_source, rev) = resolve_repo_from_state(None)?; let quadlet_dir = args.quadlet_dir; let audit_dir = args.audit_dir; let json = args.json; @@ -51,6 +52,51 @@ fn run(cli: Cli) -> Result<(), CoreError> { set_systemd_unit_dir(&args.systemd_unit_dir); set_host_override(&args.host); + // Stateless mode (--source-repo): bypass init'd state lookup + // entirely. Per FR-012 + the dedicated `plan_stateless` + // engine, writes nothing to /var/lib/core-ops/ AND reads + // no persisted controller state — corrupt/unreadable + // /var/lib/core-ops/* cannot derail a stateless run. + // Honors --audit-dir when explicitly set (clarification Q4). + if let Some(source_repo) = args.source_repo { + let source = detect_provenance(&source_repo).map_err(map_source_ref_error)?; + let repo_path = source.repo_path.clone(); + let requested_repository = source.requested_repository.clone(); + let requested_ref = source.requested_ref.clone(); + let deps = ReconcileDependencies { + load_desired: &|| { + repo::load_desired_state_from_path( + &repo_path, + &requested_repository, + &requested_ref, + ) + .map_err(map_stateless_layout_error) + }, + read_observed: &|desired| { + observed::read_observed_state(&quadlet_dir, Some(desired), None) + .map_err(map_plan_error) + }, + apply_plan: &|_, _| Ok(()), + }; + let output = plan_cmd::plan_stateless(&deps, verbose)?; + audit_io::emit_journal_event(&output.audit_event).map_err(map_plan_error)?; + if let Some(dir) = audit_dir { + let audit_path = audit_io::write_audit_record(&dir, &output.audit_record) + .map_err(map_plan_error)?; + if !json { + println!("audit {}", audit_path); + } + } + if json { + println!("{}", output.machine); + } else { + println!("{}", output.summary); + } + return Ok(()); + } + + let (repo_source, rev) = resolve_repo_from_state(None)?; + let deps = ReconcileDependencies { load_desired: &|| { repo::load_desired_state(&repo_source, &rev).map_err(map_plan_error) @@ -86,14 +132,85 @@ fn run(cli: Cli) -> Result<(), CoreError> { let audit_dir = args.audit_dir; let json = args.json; let verbose = args.verbose; + let no_reload = args.no_reload; + set_systemd_unit_dir(&args.systemd_unit_dir); + set_host_override(&args.host); + + // Stateless mode (--source-repo): bypass init'd state, never + // mutate /var/lib/core-ops/status.json (FR-013, SC-009). + // Audit records are written; the persisted controller state + // is left byte-identical pre/post. + if let Some(source_repo) = args.source_repo { + if rollback_to.is_some() { + return Err(CoreError::new( + FailureClass::Apply, + "--rollback-to is incompatible with stateless --source-repo \ + (rollback requires the persisted retention chain set by 'core-ops init')" + .to_string(), + )); + } + let source = detect_provenance(&source_repo).map_err(map_source_ref_error)?; + let output = apply_cmd::apply_with_report_stateless( + &source, + &quadlet_dir, + !no_reload, + )?; + let run = output.result.run.clone(); + let synthetic = apply_cmd::synthetic_stateless_provenance( + output + .result + .desired + .requested_repository + .as_deref() + .unwrap_or(""), + output + .result + .desired + .requested_ref + .as_deref() + .unwrap_or(""), + &output.result.desired.revision_id, + run.status.clone(), + ); + let event = core_ops::core::audit::build_audit_event( + &run, + Some(&output.plan), + &output.result.verification_results, + Some(&synthetic), + ); + audit_io::emit_journal_event(&event).map_err(map_apply_error)?; + if let Some(dir) = audit_dir { + let mut record = core_ops::core::audit::build_audit_record( + &run.run_id, + Vec::new(), + &output.plan, + output.result.verification_results.clone(), + ); + record + .operator_messages + .push(core_ops::core::audit::summarize_evaluation( + &output.result.desired, + )); + let _ = audit_io::write_audit_record(&dir, &record).map_err(map_apply_error)?; + } + if json { + println!("{}", output.machine_report); + } else if verbose { + println!("{}", output.verbose_report); + } else { + println!("{}", output.human_report); + } + if run.status == RunStatus::Failure { + std::process::exit(1); + } + return Ok(()); + } + let state_file = if args.force_no_state { None } else { Some(resolve_state_file(args.state_file)) }; - let no_reload = args.no_reload; - set_systemd_unit_dir(&args.systemd_unit_dir); - set_host_override(&args.host); let mut streamed_human_output = false; let output = if let Some(target_revision_id) = rollback_to.as_deref() { @@ -256,6 +373,40 @@ fn run(cli: Cli) -> Result<(), CoreError> { Commands::Explain(args) => { set_systemd_unit_dir(&args.systemd_unit_dir); set_host_override(&args.host); + + // Stateless mode (--source-repo): pure-read; writes nothing + // anywhere AND reads no persisted controller state — uses + // the dedicated `explain_stateless` engine that bypasses + // last_applied / deterministic-state lookups (FR-011a). + if let Some(source_repo) = args.source_repo { + let source = detect_provenance(&source_repo).map_err(map_source_ref_error)?; + let repo_path = source.repo_path.clone(); + let requested_repository = source.requested_repository.clone(); + let requested_ref = source.requested_ref.clone(); + let deps = ReconcileDependencies { + load_desired: &|| { + repo::load_desired_state_from_path( + &repo_path, + &requested_repository, + &requested_ref, + ) + .map_err(map_stateless_layout_error) + }, + read_observed: &|desired| { + observed::read_observed_state(&args.quadlet_dir, Some(desired), None) + .map_err(map_plan_error) + }, + apply_plan: &|_, _| Ok(()), + }; + let output = explain_cmd::explain_stateless(&deps, &args.object)?; + if args.json { + println!("{}", output.machine); + } else { + println!("{}", output.human); + } + return Ok(()); + } + let (repo_source, revision) = explain_cmd::resolve_explain_target()?; let deps = ReconcileDependencies { @@ -388,6 +539,32 @@ fn map_apply_error(err: E) -> CoreError { CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } +/// Map `--source-repo` validation errors to `CoreError`, threading +/// the documented process exit code (`contracts/cli-flag.md` Error +/// semantics: 64 = `EX_USAGE` for missing/non-directory paths, 66 = +/// path-shape for canonicalize failures). The exit code is set on +/// `CoreError.exit_code` and consumed by `main()` so automation can +/// distinguish usage errors from unrelated apply failures via exit +/// status alone, without parsing stderr. +fn map_source_ref_error(err: SourceRefError) -> CoreError { + let exit_code = err.exit_code(); + CoreError::with_exit_code(FailureClass::Plan, err.to_string(), exit_code) +} + +/// Map a `RepoError` (parser/layout failure) surfaced from +/// `load_desired_state_from_path` in stateless mode to `CoreError` +/// with exit code 65 (`EX_DATAERR`) per `contracts/cli-flag.md` +/// Error semantics: " is a directory but layout is invalid → +/// 65". A directory that exists but is not a spec/016-conformant +/// source-repo (e.g., missing `services/`, legacy artifacts at the +/// root, malformed `service.yaml`) hits this path. Distinct from +/// usage errors (64) and path-shape errors (66) so automation can +/// classify by exit status alone. +fn map_stateless_layout_error(err: E) -> CoreError { + CoreError::with_exit_code(FailureClass::Plan, err.to_string(), 65) +} + + /// Read `(repository, requested_ref)` from state, allowing Detached state. /// Used only for the rollback path where Detached is a valid entry point. fn resolve_repo_from_state( diff --git a/tests/fixtures/provenance_state/valid-success.json b/tests/fixtures/provenance_state/valid-success.json index 4a1f59f..ef397df 100644 --- a/tests/fixtures/provenance_state/valid-success.json +++ b/tests/fixtures/provenance_state/valid-success.json @@ -1,7 +1,7 @@ { "schema_version": 1, "controller": { - "version": "2.1.1", + "version": "2.2.0", "revision": "8f3c2ab", "build_time": "2026-03-23T10:00:00Z", "tree_state": "clean" diff --git a/tests/integration/mod.rs b/tests/integration/mod.rs index eb68b61..3831006 100644 --- a/tests/integration/mod.rs +++ b/tests/integration/mod.rs @@ -27,6 +27,16 @@ mod test_service_selection; mod test_skill_install; mod test_socket_dropins; mod test_source_repo_layout; +// spec/017 — stateless --source-repo integration coverage and per-example tests. +mod test_examples_01_caddy_whoami; +mod test_examples_02_nextcloud; +mod test_examples_03_immich; +mod test_examples_04_traefik_authelia; +mod test_examples_05_observability; +mod test_stateless_apply; +mod test_stateless_authoring; +mod test_stateless_explain; +mod test_stateless_plan; mod test_systemd_units; mod test_unit_lifecycle; mod test_validation_fail; diff --git a/tests/integration/source_repo_support.rs b/tests/integration/source_repo_support.rs index 2da1560..a4f81a4 100644 --- a/tests/integration/source_repo_support.rs +++ b/tests/integration/source_repo_support.rs @@ -17,8 +17,6 @@ use core_ops::io::repo::{load_desired_state, RepoError, HOST_OVERRIDE_ENV}; use crate::integration::env_lock::path_lock; -const EXAMPLES_DIR: &str = "specs/016-source-repository-layout/examples"; - pub struct HostGuard(Option); impl HostGuard { @@ -37,10 +35,6 @@ impl Drop for HostGuard { } } -pub fn examples_root() -> PathBuf { - Path::new(env!("CARGO_MANIFEST_DIR")).join(EXAMPLES_DIR) -} - pub fn copy_dir_recursive(src: &Path, dst: &Path) -> std::io::Result<()> { std::fs::create_dir_all(dst)?; for entry in std::fs::read_dir(src)? { @@ -92,13 +86,6 @@ pub fn git_init_commit(repo: &Path) -> String { String::from_utf8_lossy(&head.stdout).trim().to_string() } -pub fn materialize_example(name: &str) -> (TempDir, String) { - let tmp = TempDir::new().expect("tempdir"); - copy_dir_recursive(&examples_root().join(name), tmp.path()).expect("copy example"); - let rev = git_init_commit(tmp.path()); - (tmp, rev) -} - pub fn materialize_skeleton() -> (TempDir, PathBuf, PathBuf) { let tmp = TempDir::new().expect("tempdir"); let services = tmp.path().join("services"); diff --git a/tests/integration/test_examples_01_caddy_whoami.rs b/tests/integration/test_examples_01_caddy_whoami.rs new file mode 100644 index 0000000..b0f7703 --- /dev/null +++ b/tests/integration/test_examples_01_caddy_whoami.rs @@ -0,0 +1,75 @@ +//! Per-example integration test for `examples/01-caddy-whoami/` (T016). +//! +//! Asserts: +//! - (a) parser load via `load_desired_state_from_path` succeeds. +//! - (b) resolved service catalog contains the expected unit names. +//! - (c) example root carries `README.md`. +//! - (d) `core-ops plan --source-repo examples/01-caddy-whoami --host example` +//! exits 0 (US1 AC1, SC-001/SC-003). + +use std::path::Path; +use std::process::Command; + +use core_ops::io::repo::{load_desired_state_from_path, HOST_OVERRIDE_ENV}; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::HostGuard; + +#[test] +fn example_01_caddy_whoami_parses_and_plans() { + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/01-caddy-whoami"); + + // (c) README at example root. + assert!( + example_dir.join("README.md").exists(), + "example root must carry README.md (FR-002)" + ); + + // (a) parser load succeeds + (b) catalog contains expected services. + { + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example"); + let desired = load_desired_state_from_path( + &example_dir, + example_dir.to_str().expect("utf-8 path"), + "(stateless)", + ) + .expect("parser load succeeds for 01-caddy-whoami"); + let unit_names: Vec = desired + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + for expected in ["caddy.container", "whoami.container"] { + assert!( + unit_names.iter().any(|n| n == expected), + "expected {expected} in {unit_names:?}" + ); + } + } + + // (d) `core-ops plan --source-repo --host example` exits 0. + let quadlet_dir = tempfile::TempDir::new().expect("tempdir"); + let output = Command::new(env!("CARGO_BIN_EXE_core-ops")) + .arg("plan") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .output() + .expect("invoke core-ops binary"); + assert!( + output.status.success(), + "`core-ops plan --source-repo {} --host example` exited non-zero.\nstdout:\n{}\nstderr:\n{}", + example_dir.display(), + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + assert!( + !output.stdout.is_empty(), + "plan output should be non-empty against a fresh quadlet dir" + ); +} diff --git a/tests/integration/test_examples_02_nextcloud.rs b/tests/integration/test_examples_02_nextcloud.rs new file mode 100644 index 0000000..e0e7999 --- /dev/null +++ b/tests/integration/test_examples_02_nextcloud.rs @@ -0,0 +1,74 @@ +//! Per-example integration test for `examples/02-nextcloud/` (T017). + +use std::path::Path; +use std::process::Command; + +use core_ops::io::repo::{load_desired_state_from_path, HOST_OVERRIDE_ENV}; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::HostGuard; + +#[test] +fn example_02_nextcloud_parses_and_plans() { + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/02-nextcloud"); + + assert!( + example_dir.join("README.md").exists(), + "example root must carry README.md (FR-002)" + ); + + { + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example"); + let desired = load_desired_state_from_path( + &example_dir, + example_dir.to_str().expect("utf-8 path"), + "(stateless)", + ) + .expect("parser load succeeds for 02-nextcloud"); + let unit_names: Vec = desired + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + for expected in [ + "nextcloud.container", + "nextcloud-db.container", + "nextcloud-redis.container", + "traefik-edge.container", + ] { + assert!( + unit_names.iter().any(|n| n == expected), + "expected {expected} in {unit_names:?}" + ); + } + // Config-root divergence: traefik-edge service ships traefik.yaml + // under /etc/traefik/ (config-root: traefik in service.yaml). + assert!( + desired + .managed_config_paths + .contains(&"/etc/traefik/traefik.yaml".to_string()), + "expected /etc/traefik/traefik.yaml in {:?}", + desired.managed_config_paths + ); + } + + let quadlet_dir = tempfile::TempDir::new().expect("tempdir"); + let output = Command::new(env!("CARGO_BIN_EXE_core-ops")) + .arg("plan") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .output() + .expect("invoke core-ops binary"); + assert!( + output.status.success(), + "`core-ops plan --source-repo {} --host example` exited non-zero.\nstderr:\n{}", + example_dir.display(), + String::from_utf8_lossy(&output.stderr), + ); +} diff --git a/tests/integration/test_examples_03_immich.rs b/tests/integration/test_examples_03_immich.rs new file mode 100644 index 0000000..50fc2e8 --- /dev/null +++ b/tests/integration/test_examples_03_immich.rs @@ -0,0 +1,66 @@ +//! Per-example integration test for `examples/03-immich/` (T018). + +use std::path::Path; +use std::process::Command; + +use core_ops::io::repo::{load_desired_state_from_path, HOST_OVERRIDE_ENV}; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::HostGuard; + +#[test] +fn example_03_immich_parses_and_plans() { + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/03-immich"); + + assert!( + example_dir.join("README.md").exists(), + "example root must carry README.md (FR-002)" + ); + + { + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example"); + let desired = load_desired_state_from_path( + &example_dir, + example_dir.to_str().expect("utf-8 path"), + "(stateless)", + ) + .expect("parser load succeeds for 03-immich"); + let unit_names: Vec = desired + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + for expected in [ + "immich-server.container", + "immich-database.container", + "immich-redis.container", + "immich-ml.container", + "traefik-edge.container", + ] { + assert!( + unit_names.iter().any(|n| n == expected), + "expected {expected} in {unit_names:?}" + ); + } + } + + let quadlet_dir = tempfile::TempDir::new().expect("tempdir"); + let output = Command::new(env!("CARGO_BIN_EXE_core-ops")) + .arg("plan") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .output() + .expect("invoke core-ops binary"); + assert!( + output.status.success(), + "`core-ops plan --source-repo {} --host example` exited non-zero.\nstderr:\n{}", + example_dir.display(), + String::from_utf8_lossy(&output.stderr), + ); +} diff --git a/tests/integration/test_examples_04_traefik_authelia.rs b/tests/integration/test_examples_04_traefik_authelia.rs new file mode 100644 index 0000000..c31fcf7 --- /dev/null +++ b/tests/integration/test_examples_04_traefik_authelia.rs @@ -0,0 +1,64 @@ +//! Per-example integration test for `examples/04-traefik-authelia/` (T019). + +use std::path::Path; +use std::process::Command; + +use core_ops::io::repo::{load_desired_state_from_path, HOST_OVERRIDE_ENV}; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::HostGuard; + +#[test] +fn example_04_traefik_authelia_parses_and_plans() { + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/04-traefik-authelia"); + + assert!( + example_dir.join("README.md").exists(), + "example root must carry README.md (FR-002)" + ); + + { + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example"); + let desired = load_desired_state_from_path( + &example_dir, + example_dir.to_str().expect("utf-8 path"), + "(stateless)", + ) + .expect("parser load succeeds for 04-traefik-authelia"); + let unit_names: Vec = desired + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + for expected in [ + "traefik.container", + "authelia.container", + "whoami.container", + ] { + assert!( + unit_names.iter().any(|n| n == expected), + "expected {expected} in {unit_names:?}" + ); + } + } + + let quadlet_dir = tempfile::TempDir::new().expect("tempdir"); + let output = Command::new(env!("CARGO_BIN_EXE_core-ops")) + .arg("plan") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .output() + .expect("invoke core-ops binary"); + assert!( + output.status.success(), + "`core-ops plan --source-repo {} --host example` exited non-zero.\nstderr:\n{}", + example_dir.display(), + String::from_utf8_lossy(&output.stderr), + ); +} diff --git a/tests/integration/test_examples_05_observability.rs b/tests/integration/test_examples_05_observability.rs new file mode 100644 index 0000000..77be9dc --- /dev/null +++ b/tests/integration/test_examples_05_observability.rs @@ -0,0 +1,65 @@ +//! Per-example integration test for `examples/05-observability/` (T020). + +use std::path::Path; +use std::process::Command; + +use core_ops::io::repo::{load_desired_state_from_path, HOST_OVERRIDE_ENV}; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::HostGuard; + +#[test] +fn example_05_observability_parses_and_plans() { + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/05-observability"); + + assert!( + example_dir.join("README.md").exists(), + "example root must carry README.md (FR-002)" + ); + + { + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example"); + let desired = load_desired_state_from_path( + &example_dir, + example_dir.to_str().expect("utf-8 path"), + "(stateless)", + ) + .expect("parser load succeeds for 05-observability"); + let unit_names: Vec = desired + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + for expected in [ + "prometheus.container", + "grafana.container", + "node-exporter.container", + "cadvisor.container", + ] { + assert!( + unit_names.iter().any(|n| n == expected), + "expected {expected} in {unit_names:?}" + ); + } + } + + let quadlet_dir = tempfile::TempDir::new().expect("tempdir"); + let output = Command::new(env!("CARGO_BIN_EXE_core-ops")) + .arg("plan") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .output() + .expect("invoke core-ops binary"); + assert!( + output.status.success(), + "`core-ops plan --source-repo {} --host example` exited non-zero.\nstderr:\n{}", + example_dir.display(), + String::from_utf8_lossy(&output.stderr), + ); +} diff --git a/tests/integration/test_idempotence.rs b/tests/integration/test_idempotence.rs index 3273516..9ff0a8d 100644 --- a/tests/integration/test_idempotence.rs +++ b/tests/integration/test_idempotence.rs @@ -93,10 +93,7 @@ fn repeated_runs_remain_converged() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } struct PathGuard { diff --git a/tests/integration/test_ordering.rs b/tests/integration/test_ordering.rs index 8b2979e..4724ce3 100644 --- a/tests/integration/test_ordering.rs +++ b/tests/integration/test_ordering.rs @@ -91,8 +91,5 @@ fn plan_orders_volume_before_container_before_socket() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Plan, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Plan, err.to_string()) } diff --git a/tests/integration/test_performance.rs b/tests/integration/test_performance.rs index 21745d5..7c3ce6c 100644 --- a/tests/integration/test_performance.rs +++ b/tests/integration/test_performance.rs @@ -155,10 +155,7 @@ fn plan_and_result_rendering_complete_within_interactive_budget() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } struct PathGuard { diff --git a/tests/integration/test_plan.rs b/tests/integration/test_plan.rs index 219bc80..2abbd40 100644 --- a/tests/integration/test_plan.rs +++ b/tests/integration/test_plan.rs @@ -504,10 +504,7 @@ impl Drop for EnvGuard { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Plan, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Plan, err.to_string()) } fn object( diff --git a/tests/integration/test_quadlet_artifacts.rs b/tests/integration/test_quadlet_artifacts.rs index 363a3fd..fa4c7f6 100644 --- a/tests/integration/test_quadlet_artifacts.rs +++ b/tests/integration/test_quadlet_artifacts.rs @@ -125,10 +125,7 @@ fn reconcile_apply_supports_socket_and_volume_quadlets() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } struct PathGuard { diff --git a/tests/integration/test_reboot_recovery.rs b/tests/integration/test_reboot_recovery.rs index b5b1d1f..5b3fc2b 100644 --- a/tests/integration/test_reboot_recovery.rs +++ b/tests/integration/test_reboot_recovery.rs @@ -147,10 +147,7 @@ fn apply_persists_status_snapshot_across_repeat_runs() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } struct PathGuard { diff --git a/tests/integration/test_reconcile_apply.rs b/tests/integration/test_reconcile_apply.rs index b79a59a..75e0cfc 100644 --- a/tests/integration/test_reconcile_apply.rs +++ b/tests/integration/test_reconcile_apply.rs @@ -218,10 +218,7 @@ fn reconcile_apply_starts_inactive_unit_when_runtime_recovery_is_required() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } #[test] diff --git a/tests/integration/test_source_repo_layout.rs b/tests/integration/test_source_repo_layout.rs index a2b640b..551913d 100644 --- a/tests/integration/test_source_repo_layout.rs +++ b/tests/integration/test_source_repo_layout.rs @@ -2,7 +2,7 @@ use core_ops::core::types::DesiredState; use core_ops::io::repo::RepoError; use crate::integration::source_repo_support::{ - git_init_commit, load_with_host, materialize_example, materialize_skeleton, write_host_yaml, + git_init_commit, load_with_host, materialize_skeleton, write_host_yaml, }; fn unit_names(state: &DesiredState) -> Vec { @@ -13,85 +13,13 @@ fn unit_names(state: &DesiredState) -> Vec { .collect() } -// ---- Example load tests (FR-001..FR-008, happy path) ---- - -#[test] -fn example_01_minimal_single_service_loads() { - let (repo, rev) = materialize_example("01-minimal-single-service"); - let state = load_with_host(repo.path(), &rev, "example-host").expect("load"); - let names = unit_names(&state); - assert!( - names.iter().any(|n| n == "whoami.container"), - "missing whoami.container: {names:?}" - ); - assert!( - state - .managed_config_paths - .contains(&"/etc/whoami/whoami.toml".to_string()), - "missing /etc/whoami/whoami.toml: {:?}", - state.managed_config_paths - ); - assert_eq!( - state.managed_config_roots, - vec!["/etc/whoami".to_string()] - ); -} - -#[test] -fn example_02_variant_config_root_loads() { - let (repo, rev) = materialize_example("02-variant-config-root"); - let state = load_with_host(repo.path(), &rev, "example-host").expect("load"); - let names = unit_names(&state); - assert!( - names.iter().any(|n| n == "traefik-dnschallenge.container"), - "missing container: {names:?}" - ); - // Service id is `traefik-dnschallenge` but config-root is `traefik`. - assert!( - state - .managed_config_paths - .contains(&"/etc/traefik/traefik.yaml".to_string()), - "config not rooted under /etc/traefik/: {:?}", - state.managed_config_paths - ); - assert_eq!(state.managed_config_roots, vec!["/etc/traefik".to_string()]); -} - -#[test] -fn example_03_multi_unit_with_dropins_loads() { - let (repo, rev) = materialize_example("03-multi-unit-with-dropins"); - let state = load_with_host(repo.path(), &rev, "example-host").expect("load"); - let names = unit_names(&state); - assert!( - names.iter().any(|n| n == "webhook-receiver.container"), - "missing container unit: {names:?}" - ); - assert!( - names.iter().any(|n| n == "webhook-receiver.socket"), - "missing socket unit: {names:?}" - ); -} - -#[test] -fn example_04_host_overlay_loads() { - let (repo, rev) = materialize_example("04-host-overlay"); - let state = load_with_host(repo.path(), &rev, "host-a").expect("load"); - let names = unit_names(&state); - assert!( - names.iter().any(|n| n == "node-exporter.container"), - "missing container unit: {names:?}" - ); - // The host's config/ provides a whole-file replacement; the resolved - // destination is still `/etc/node-exporter/node-exporter.env` (FR-010 - // invariant: rooted under /etc//). - assert!( - state - .managed_config_paths - .contains(&"/etc/node-exporter/node-exporter.env".to_string()), - "missing config target: {:?}", - state.managed_config_paths - ); -} +// Spec/017 supersession: the four `example_0X_*_loads` tests against the +// in-tree `specs/016-source-repository-layout/examples/` fixtures are +// removed alongside the fixtures themselves. Their layout-shape role is +// now covered by per-example integration tests under +// `tests/integration/test_examples__.rs`, which exercise the +// real-world examples published under `examples//` via the new +// stateless `--source-repo` CLI surface. // ---- Full systemd unit extension set (.timer / .target / .path) ---- // @@ -704,11 +632,33 @@ fn orphan_dropin_rejected() { #[test] fn repeated_load_yields_identical_workloads() { - let (repo, rev) = materialize_example("03-multi-unit-with-dropins"); - let first = - load_with_host(repo.path(), &rev, "example-host").expect("load first"); - let second = - load_with_host(repo.path(), &rev, "example-host").expect("load second"); + let (tmp, services, hosts) = materialize_skeleton(); + let svc = services.join("alpha"); + let svc_quadlet = svc.join("quadlet"); + let svc_systemd = svc.join("systemd"); + std::fs::create_dir_all(&svc_quadlet).unwrap(); + std::fs::create_dir_all(&svc_systemd).unwrap(); + std::fs::write( + svc_quadlet.join("alpha.container"), + "[Container]\nImage=alpine\n", + ) + .unwrap(); + std::fs::create_dir_all(svc_quadlet.join("alpha.container.d")).unwrap(); + std::fs::write( + svc_quadlet.join("alpha.container.d/10-resources.conf"), + "[Service]\nMemoryMax=128M\n", + ) + .unwrap(); + std::fs::write( + svc_systemd.join("alpha.socket"), + "[Socket]\nListenStream=8080\n[Install]\nWantedBy=sockets.target\n", + ) + .unwrap(); + write_host_yaml(&hosts, "example-host", &["alpha"]); + let rev = git_init_commit(tmp.path()); + + let first = load_with_host(tmp.path(), &rev, "example-host").expect("load first"); + let second = load_with_host(tmp.path(), &rev, "example-host").expect("load second"); // `repository_ref` and the internal `_repo_temp` paths differ between // calls (each call clones into a fresh TempDir). Compare every other diff --git a/tests/integration/test_stateless_apply.rs b/tests/integration/test_stateless_apply.rs new file mode 100644 index 0000000..a4c87f6 --- /dev/null +++ b/tests/integration/test_stateless_apply.rs @@ -0,0 +1,357 @@ +//! Stateless `core-ops apply --source-repo` integration tests +//! (T033-T035) for spec/017 US3. +//! +//! Asserts the audit chain carries path-based provenance across all +//! three working-tree shapes (clean SHA / `(stateless+dirty)` / +//! `(stateless)`) and that stateless apply does NOT mutate any prior +//! init'd controller state (FR-013, SC-009). + +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command as ProcessCommand; +use std::time::{SystemTime, UNIX_EPOCH}; + +use core_ops::cli::apply::{apply_with_report_stateless, synthetic_stateless_provenance}; +use core_ops::core::types::{ReconciliationStatus, RunStatus}; +use core_ops::io::repo::HOST_OVERRIDE_ENV; +use core_ops::io::source_ref::detect_provenance; +use core_ops::io::state::{persist_success_state, read_persisted_state}; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::{git_init_commit, HostGuard}; + +fn temp_dir(prefix: &str) -> PathBuf { + let mut path = std::env::temp_dir(); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time") + .as_nanos(); + path.push(format!("{prefix}_{nanos}")); + path +} + +fn write_systemctl_stub(dir: &Path) -> PathBuf { + let bin_path = dir.join("systemctl"); + fs::write(&bin_path, "#!/bin/sh\nexit 0\n").expect("write systemctl stub"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&bin_path).expect("metadata").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&bin_path, perms).expect("chmod"); + } + bin_path +} + +struct PathGuard { + previous: String, +} + +impl Drop for PathGuard { + fn drop(&mut self) { + std::env::set_var("PATH", &self.previous); + } +} + +fn write_alpha_layout(repo: &Path) { + let services = repo.join("services/alpha/quadlet"); + fs::create_dir_all(&services).expect("services dir"); + fs::write( + services.join("alpha.container"), + "[Container]\nImage=alpine\n", + ) + .expect("alpha.container"); + let hosts = repo.join("hosts/example-host"); + fs::create_dir_all(&hosts).expect("hosts dir"); + fs::write( + hosts.join("host.yaml"), + "host: example-host\nservices:\n - alpha\n", + ) + .expect("host.yaml"); +} + +fn install_systemctl_stub() -> (PathBuf, PathGuard) { + let stub_dir = temp_dir("core_ops_systemctl_stateless_apply"); + fs::create_dir_all(&stub_dir).expect("stub dir"); + write_systemctl_stub(&stub_dir); + let old_path = std::env::var("PATH").unwrap_or_default(); + let new_path = format!("{}:{}", stub_dir.display(), old_path); + std::env::set_var("PATH", new_path); + (stub_dir, PathGuard { previous: old_path }) +} + +#[test] +fn stateless_apply_records_path_based_provenance_in_audit_event() { + // T033: stateless apply against a synthetic source repo; + // (a) exit 0, (b) audit chain produced, (c) audit event carries + // path-based provenance (`desired_repository` = canonical path, + // `desired_requested_ref` = `(stateless+dirty)` for an + // uncommitted layout). + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example-host"); + + let source = temp_dir("core_ops_stateless_apply_src"); + fs::create_dir_all(&source).expect("source dir"); + write_alpha_layout(&source); + git_init_commit(&source); + // Introduce an uncommitted file → working tree dirty. + fs::write(source.join("scratch.txt"), "wip\n").expect("scratch"); + + let (_stub_dir, _path_guard) = install_systemctl_stub(); + let host_quadlets = temp_dir("core_ops_stateless_apply_qdir"); + fs::create_dir_all(&host_quadlets).expect("quadlet dir"); + + let stateless = detect_provenance(&source).expect("detect provenance"); + assert_eq!(stateless.requested_ref, "(stateless+dirty)"); + + let bundle = apply_with_report_stateless(&stateless, &host_quadlets, false) + .expect("stateless apply"); + + // (a)+(b): apply produced a populated bundle. + assert!(!bundle.human_report.is_empty(), "human report empty"); + assert!( + bundle.result.desired.requested_repository.is_some(), + "desired_state.requested_repository must be populated" + ); + + // (c): provenance fields surface the path-based source. + let repo = bundle + .result + .desired + .requested_repository + .as_deref() + .expect("requested_repository populated"); + assert_eq!( + Path::new(repo), + stateless.repo_path.as_path(), + "requested_repository must match canonical source path" + ); + let r#ref = bundle + .result + .desired + .requested_ref + .as_deref() + .expect("requested_ref populated"); + assert_eq!(r#ref, "(stateless+dirty)"); +} + +#[test] +fn stateless_apply_preserves_initd_persisted_state() { + // T034a / SC-009: pre-write init'd state to a tempfile, run + // stateless apply against a different source path with the same + // tempfile selected via CORE_OPS_STATE_FILE, assert + // `desired_state.repository` and `desired_state.requested_ref` + // are byte-identical pre/post. + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example-host"); + + // (1) Seed the init'd state file. + let state_file = temp_dir("core_ops_stateless_apply_state.json"); + persist_success_state( + &state_file, + "file:///var/lib/core-ops/init-source", + "init-rev-v1", + "deadbeefcafefeed1234567890abcdef12345678", + ) + .expect("persist init'd state"); + let pre = read_persisted_state(&state_file) + .expect("read pre-state") + .expect("pre-state present"); + + // (2) Stateless apply against an unrelated source path. + let source = temp_dir("core_ops_stateless_apply_other_src"); + fs::create_dir_all(&source).expect("source dir"); + write_alpha_layout(&source); + + let (_stub_dir, _path_guard) = install_systemctl_stub(); + let host_quadlets = temp_dir("core_ops_stateless_apply_other_qdir"); + fs::create_dir_all(&host_quadlets).expect("quadlet dir"); + let stateless = detect_provenance(&source).expect("detect provenance"); + let _bundle = apply_with_report_stateless(&stateless, &host_quadlets, false) + .expect("stateless apply"); + + // (3) Re-read the init'd state file and assert byte-identical + // desired_state fields. + let post = read_persisted_state(&state_file) + .expect("read post-state") + .expect("post-state present"); + assert_eq!( + pre.desired_state.repository, post.desired_state.repository, + "stateless apply MUST NOT mutate init'd desired_state.repository" + ); + assert_eq!( + pre.desired_state.requested_ref, post.desired_state.requested_ref, + "stateless apply MUST NOT mutate init'd desired_state.requested_ref" + ); +} + +#[test] +fn stateless_apply_provenance_shapes_match_working_tree_state() { + // T035: provenance-shape coverage — three sub-cases asserting + // `(stateless)` / `(stateless+dirty)` / SHA in the audit-bundle + // provenance under the three working-tree conditions. + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example-host"); + let (_stub_dir, _path_guard) = install_systemctl_stub(); + + // (a) Non-git → `(stateless)`. + { + let source = temp_dir("core_ops_stateless_apply_nongit"); + fs::create_dir_all(&source).expect("source dir"); + write_alpha_layout(&source); + let qdir = temp_dir("core_ops_stateless_apply_nongit_q"); + fs::create_dir_all(&qdir).expect("qdir"); + let stateless = detect_provenance(&source).expect("detect provenance"); + assert_eq!(stateless.requested_ref, "(stateless)"); + let bundle = apply_with_report_stateless(&stateless, &qdir, false) + .expect("apply non-git"); + assert_eq!( + bundle.result.desired.requested_ref.as_deref(), + Some("(stateless)") + ); + } + + // (b) Clean git checkout → 40-char SHA. + { + let source = temp_dir("core_ops_stateless_apply_clean"); + fs::create_dir_all(&source).expect("source dir"); + write_alpha_layout(&source); + let sha = git_init_commit(&source); + assert_eq!(sha.len(), 40); + let qdir = temp_dir("core_ops_stateless_apply_clean_q"); + fs::create_dir_all(&qdir).expect("qdir"); + let stateless = detect_provenance(&source).expect("detect provenance"); + assert_eq!(stateless.requested_ref, sha); + let bundle = apply_with_report_stateless(&stateless, &qdir, false) + .expect("apply clean"); + assert_eq!(bundle.result.desired.requested_ref.as_deref(), Some(sha.as_str())); + } + + // (c) Dirty git working tree → `(stateless+dirty)`. + { + let source = temp_dir("core_ops_stateless_apply_dirty"); + fs::create_dir_all(&source).expect("source dir"); + write_alpha_layout(&source); + let _ = git_init_commit(&source); + // Untracked file makes the tree dirty. + fs::write(source.join("scratch.txt"), "wip\n").expect("scratch"); + let qdir = temp_dir("core_ops_stateless_apply_dirty_q"); + fs::create_dir_all(&qdir).expect("qdir"); + let stateless = detect_provenance(&source).expect("detect provenance"); + assert_eq!(stateless.requested_ref, "(stateless+dirty)"); + let bundle = apply_with_report_stateless(&stateless, &qdir, false) + .expect("apply dirty"); + assert_eq!( + bundle.result.desired.requested_ref.as_deref(), + Some("(stateless+dirty)") + ); + } +} + +#[test] +fn synthetic_stateless_provenance_reflects_run_outcome() { + // The audit event built from the synthetic provenance must + // mirror the actual run outcome — emitting `reconciliation_status = + // "never_run"` (the original bug) misleads downstream audit + // consumers that key on reconciliation provenance. + let success = synthetic_stateless_provenance( + "/canonical/path", + "feedfacefeedfacefeedfacefeedfacefeedface", + "feedfacefeedfacefeedfacefeedfacefeedface", + RunStatus::Success, + ); + assert_eq!( + success.reconciliation.status, + ReconciliationStatus::Success + ); + assert_eq!( + success.reconciliation.last_attempted_revision.as_deref(), + Some("feedfacefeedfacefeedfacefeedfacefeedface") + ); + assert_eq!( + success.reconciliation.last_applied_revision.as_deref(), + Some("feedfacefeedfacefeedfacefeedfacefeedface"), + "Success runs MUST report the applied revision" + ); + assert_eq!(success.reconciliation.generation, 1); + + let failure = synthetic_stateless_provenance( + "/canonical/path", + "(stateless+dirty)", + "(stateless+dirty)", + RunStatus::Failure, + ); + assert_eq!( + failure.reconciliation.status, + ReconciliationStatus::Failed + ); + assert_eq!( + failure.reconciliation.last_attempted_revision.as_deref(), + Some("(stateless+dirty)"), + "Failed runs MUST still report the attempted revision" + ); + assert!( + failure.reconciliation.last_applied_revision.is_none(), + "Failed runs MUST NOT report an applied revision" + ); +} + +#[test] +fn stateless_apply_then_initd_plan_against_same_tree_does_not_surface_detached_state() { + // US3 AC3: after stateless apply lands on a host, a subsequent + // `core-ops init main --force` followed by + // `core-ops plan` (no flag) produces a normal init'd-mode plan + // with no detached-state header surfacing from the prior + // stateless apply. We exercise this at the parser level: the + // stateless apply path doesn't touch the persisted state file, + // so the subsequent init+plan flow sees a "fresh" host with no + // residual stateless artifacts. + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example-host"); + let (_stub_dir, _path_guard) = install_systemctl_stub(); + + let source = temp_dir("core_ops_stateless_to_initd_src"); + fs::create_dir_all(&source).expect("source dir"); + write_alpha_layout(&source); + + let qdir = temp_dir("core_ops_stateless_to_initd_qdir"); + fs::create_dir_all(&qdir).expect("qdir"); + + // Stateless apply phase. + let stateless = detect_provenance(&source).expect("detect provenance"); + apply_with_report_stateless(&stateless, &qdir, false).expect("stateless apply"); + + // Sanity check: after stateless apply, no canonical state file at + // `state_file` was created (we explicitly never wrote one). + let state_file = temp_dir("core_ops_stateless_to_initd_state.json"); + assert!( + !state_file.exists(), + "stateless apply must not have written {state_file:?}" + ); + + // Init'd-mode equivalence: `load_desired_state` (the init'd loader) + // against the same tree commits the stateless tempdir into a real + // git repo and resolves HEAD. We assert it loads cleanly — this + // is the parser-level analogue of `core-ops init && core-ops plan`. + git_init_commit(&source); + let head = ProcessCommand::new("git") + .arg("-C") + .arg(&source) + .args(["rev-parse", "HEAD"]) + .output() + .expect("git rev-parse"); + let head_sha = String::from_utf8_lossy(&head.stdout).trim().to_string(); + let initd = core_ops::io::repo::load_desired_state( + source.to_str().expect("utf-8 path"), + &head_sha, + ) + .expect("init'd load"); + // Init'd plan against the same tree resolves a non-empty workload + // catalog — i.e., the prior stateless apply did not leave residual + // state that would derail the init'd-mode flow. + assert!(!initd.workloads.is_empty(), "init'd workloads empty after transition"); +} diff --git a/tests/integration/test_stateless_authoring.rs b/tests/integration/test_stateless_authoring.rs new file mode 100644 index 0000000..9003293 --- /dev/null +++ b/tests/integration/test_stateless_authoring.rs @@ -0,0 +1,122 @@ +//! Stateless `--source-repo` authoring + iteration tests for US2 +//! (T030, T031). Validates that an operator can copy an example to a +//! scratch directory, rename hosts, edit configs, and re-run plan +//! without ever running `core-ops init` or `git init`. Then validates +//! that the stateless-to-init'd transition produces an equivalent +//! plan against the same source tree (US2 AC3). + +use std::path::Path; +use std::process::Command; + +use core_ops::io::repo::HOST_OVERRIDE_ENV; + +use crate::integration::env_lock::path_lock; +use crate::integration::source_repo_support::{copy_dir_recursive, git_init_commit, HostGuard}; + +fn coreops() -> Command { + Command::new(env!("CARGO_BIN_EXE_core-ops")) +} + +#[test] +fn copy_example_rename_host_and_iterate_succeeds() { + // T030 / US2 AC1: copy `examples/02-nextcloud/` to a scratch + // tempdir (no git init), rename `hosts/example/` to `hosts/myhost/`, + // edit `host.yaml`, run `core-ops plan --source-repo + // --host myhost` → exit 0. + let scratch = tempfile::TempDir::new().expect("scratch tempdir"); + let example_src = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/02-nextcloud"); + copy_dir_recursive(&example_src, scratch.path()).expect("copy example"); + + // Rename hosts/example/ → hosts/myhost/. + let old_host = scratch.path().join("hosts/example"); + let new_host = scratch.path().join("hosts/myhost"); + std::fs::rename(&old_host, &new_host).expect("rename host dir"); + // Update host.yaml's `host:` field to match the directory name + // (deny_unknown_fields hardening rejects mismatches). + let host_yaml = new_host.join("host.yaml"); + let body = std::fs::read_to_string(&host_yaml).expect("read host.yaml"); + let rewritten = body.replace("host: example", "host: myhost"); + std::fs::write(&host_yaml, rewritten).expect("write host.yaml"); + + let qdir = tempfile::TempDir::new().expect("quadlet tempdir"); + let output = coreops() + .arg("plan") + .arg("--source-repo") + .arg(scratch.path()) + .arg("--host") + .arg("myhost") + .arg("--quadlet-dir") + .arg(qdir.path()) + .output() + .expect("invoke core-ops"); + assert!( + output.status.success(), + "scratch-dir plan after host-rename should exit 0\nstderr:\n{}\nstdout:\n{}", + String::from_utf8_lossy(&output.stderr), + String::from_utf8_lossy(&output.stdout), + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("(stateless)"), + "non-git scratch dir must surface (stateless) sentinel: {stdout}" + ); +} + +#[test] +fn stateless_and_initd_plans_against_same_tree_are_equivalent() { + // T031 / US2 AC3: stateless plan against a scratch dir, then + // `git init && core-ops init && core-ops plan` (no --source-repo), + // assert the two plans produce equivalent action sets via the + // workload list comparison. We use the library API for the + // comparison so both modes share an identical assertion surface. + + use core_ops::io::repo::{load_desired_state, load_desired_state_from_path}; + + let scratch = tempfile::TempDir::new().expect("scratch tempdir"); + let example_src = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/01-caddy-whoami"); + copy_dir_recursive(&example_src, scratch.path()).expect("copy example"); + + // Run both loads under the path_lock + same CORE_OPS_HOST so the + // host resolver sees `example` for both. + let _lock = path_lock().lock().unwrap_or_else(|err| err.into_inner()); + let _host_guard = HostGuard::capture(); + std::env::set_var(HOST_OVERRIDE_ENV, "example"); + + // Stateless load: scratch dir as-is, no git, sentinel ref. + let stateless = load_desired_state_from_path( + scratch.path(), + scratch.path().to_str().expect("utf-8 path"), + "(stateless)", + ) + .expect("stateless load"); + + // Init'd-mode load: git_init_commit creates a real revision and + // `load_desired_state` clones into a tempdir + checks out HEAD. + let rev = git_init_commit(scratch.path()); + let initd = load_desired_state( + scratch.path().to_str().expect("utf-8 path"), + &rev, + ) + .expect("init'd load"); + + // Equivalence: same workload set (by systemd_unit_name), same + // managed_config_paths, same managed_config_roots. Provenance + // fields legitimately differ between modes — that's the point of + // the value-level conventions in data-model.md E1. + let stateless_units: Vec = stateless + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + let initd_units: Vec = initd + .workloads + .iter() + .map(|w| w.systemd_unit_name.clone()) + .collect(); + assert_eq!( + stateless_units, initd_units, + "stateless vs init'd workload sets diverged: {stateless_units:?} != {initd_units:?}" + ); + assert_eq!(stateless.managed_config_paths, initd.managed_config_paths); + assert_eq!(stateless.managed_config_roots, initd.managed_config_roots); +} diff --git a/tests/integration/test_stateless_explain.rs b/tests/integration/test_stateless_explain.rs new file mode 100644 index 0000000..44f75f2 --- /dev/null +++ b/tests/integration/test_stateless_explain.rs @@ -0,0 +1,160 @@ +//! Stateless `core-ops explain --source-repo` integration test (T022). +//! +//! Per FR-011a: explain accepts --source-repo, is read-only, requires +//! --host, writes nothing. Per spec.md SC-011 / T022 contract: this +//! test exercises **all five published examples** (one sub-test per +//! example) so SC-011's "any of the five published examples" coverage +//! is grounded in evidence rather than spot-checked. + +use std::path::Path; +use std::process::Command; + +fn coreops() -> Command { + Command::new(env!("CARGO_BIN_EXE_core-ops")) +} + +fn run_explain(example_slug: &str, object: &str) -> std::process::Output { + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join(example_slug); + let quadlet_dir = tempfile::TempDir::new().expect("tempdir").keep(); + coreops() + .arg("explain") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(&quadlet_dir) + .arg(object) + .output() + .expect("invoke core-ops") +} + +#[test] +fn stateless_explain_against_01_caddy_whoami_succeeds() { + let output = run_explain("01-caddy-whoami", "container/caddy.container"); + assert!( + output.status.success(), + "stderr: {}\nstdout: {}", + String::from_utf8_lossy(&output.stderr), + String::from_utf8_lossy(&output.stdout) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("caddy"), + "explain output should mention the inspected object: {stdout}" + ); +} + +#[test] +fn stateless_explain_against_02_nextcloud_succeeds() { + let output = run_explain("02-nextcloud", "container/nextcloud.container"); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn stateless_explain_against_03_immich_succeeds() { + let output = run_explain("03-immich", "container/immich-server.container"); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn stateless_explain_against_04_traefik_authelia_succeeds() { + let output = run_explain("04-traefik-authelia", "container/traefik.container"); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn stateless_explain_against_05_observability_succeeds() { + let output = run_explain("05-observability", "container/prometheus.container"); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn stateless_explain_output_is_unaffected_by_prior_initd_state() { + // FR-011a + clarification Q5: stateless explain must not consult + // persisted controller state. Asserts structurally: identical + // stateless explain invocations produce byte-identical output + // whether `CORE_OPS_STATE_FILE` points at a populated init'd state + // file (from an unrelated repository) or at an empty path. + + use core_ops::io::state::persist_success_state; + + let example_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("examples/01-caddy-whoami"); + let quadlet_dir = tempfile::TempDir::new().expect("quadlet tempdir"); + + // (1) Run stateless explain with `CORE_OPS_STATE_FILE` pointing at + // a populated init'd state file from an unrelated repository. + let with_state = tempfile::TempDir::new().expect("with-state tempdir"); + let state_file = with_state.path().join("status.json"); + persist_success_state( + &state_file, + "file:///var/lib/core-ops/some-other-repo", + "unrelated-rev-v9", + "feedfacefeedfacefeedfacefeedfacefeedface", + ) + .expect("persist init'd state"); + let with_state_output = coreops() + .arg("explain") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .arg("container/caddy.container") + .env("CORE_OPS_STATE_FILE", &state_file) + .output() + .expect("invoke core-ops with init'd state"); + assert!( + with_state_output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&with_state_output.stderr) + ); + + // (2) Run the same stateless explain with `CORE_OPS_STATE_FILE` + // pointing at a non-existent path (no init'd state). + let without_state = tempfile::TempDir::new().expect("without-state tempdir"); + let absent_state_file = without_state.path().join("nonexistent.json"); + let without_state_output = coreops() + .arg("explain") + .arg("--source-repo") + .arg(&example_dir) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(quadlet_dir.path()) + .arg("container/caddy.container") + .env("CORE_OPS_STATE_FILE", &absent_state_file) + .output() + .expect("invoke core-ops without init'd state"); + assert!( + without_state_output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&without_state_output.stderr) + ); + + // (3) Outputs must be byte-identical — stateless explain leaks no + // init'd-state context. + assert_eq!( + with_state_output.stdout, without_state_output.stdout, + "stateless explain output diverged based on persisted state contents" + ); +} diff --git a/tests/integration/test_stateless_plan.rs b/tests/integration/test_stateless_plan.rs new file mode 100644 index 0000000..b69d70c --- /dev/null +++ b/tests/integration/test_stateless_plan.rs @@ -0,0 +1,315 @@ +//! Stateless `core-ops plan --source-repo` integration test (T021). +//! +//! Covers the FR-010..FR-016 contract surface end-to-end via the +//! cargo-built binary: +//! - (a) non-git tempdir → exit 0, `(stateless)` provenance. +//! - (b) clean git checkout → exit 0, 40-char SHA provenance. +//! - (c) dirty working tree → exit 0, `(stateless+dirty)` provenance. +//! - (d) missing `--host` → clap exit 2. +//! - (e) non-directory path → exit non-zero with helpful diagnostic. +//! - (f) `--audit-dir` honored when explicitly set (clarification Q4). + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use crate::integration::source_repo_support::git_init_commit; + +fn write_minimal_layout(root: &Path) { + let services = root.join("services/alpha/quadlet"); + std::fs::create_dir_all(&services).expect("services dir"); + std::fs::write( + services.join("alpha.container"), + "[Container]\nImage=alpine\n", + ) + .expect("alpha.container"); + let hosts = root.join("hosts/example"); + std::fs::create_dir_all(&hosts).expect("hosts dir"); + std::fs::write( + hosts.join("host.yaml"), + "host: example\nservices:\n - alpha\n", + ) + .expect("host.yaml"); +} + +fn coreops() -> Command { + Command::new(env!("CARGO_BIN_EXE_core-ops")) +} + +fn run_plan(source: &Path, host: &str, quadlet_dir: &Path) -> std::process::Output { + coreops() + .arg("plan") + .arg("--source-repo") + .arg(source) + .arg("--host") + .arg(host) + .arg("--quadlet-dir") + .arg(quadlet_dir) + .output() + .expect("invoke core-ops") +} + +fn quadlet_dir() -> PathBuf { + tempfile::TempDir::new().expect("tempdir").keep() +} + +#[test] +fn stateless_plan_against_non_git_directory_succeeds_with_stateless_sentinel() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + write_minimal_layout(tmp.path()); + let qdir = quadlet_dir(); + let output = run_plan(tmp.path(), "example", &qdir); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("(stateless)"), + "expected `(stateless)` provenance in plan header, got:\n{stdout}" + ); +} + +#[test] +fn stateless_plan_against_clean_git_checkout_records_full_sha() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + write_minimal_layout(tmp.path()); + let sha = git_init_commit(tmp.path()); + assert_eq!(sha.len(), 40, "git_init_commit should return a full SHA"); + let qdir = quadlet_dir(); + let output = run_plan(tmp.path(), "example", &qdir); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + let short_sha = &sha[..8]; + assert!( + stdout.contains(short_sha), + "expected SHA prefix {short_sha} in plan header, got:\n{stdout}" + ); + assert!( + !stdout.contains("(stateless)"), + "clean checkout must not surface (stateless) sentinel, got:\n{stdout}" + ); +} + +#[test] +fn stateless_plan_against_dirty_working_tree_records_dirty_sentinel() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + write_minimal_layout(tmp.path()); + let _sha = git_init_commit(tmp.path()); + // Introduce uncommitted change (untracked file). + std::fs::write(tmp.path().join("scratch.txt"), "wip\n").expect("scratch"); + let qdir = quadlet_dir(); + let output = run_plan(tmp.path(), "example", &qdir); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("(stateless+dirty)"), + "expected `(stateless+dirty)` provenance in plan header, got:\n{stdout}" + ); +} + +#[test] +fn stateless_plan_without_host_errors_with_clap_diagnostic() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + write_minimal_layout(tmp.path()); + let output = coreops() + .arg("plan") + .arg("--source-repo") + .arg(tmp.path()) + .output() + .expect("invoke core-ops"); + assert!( + !output.status.success(), + "plan without --host must error; stdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("--host") || stderr.contains("host"), + "stderr must mention --host requirement: {stderr}" + ); +} + +#[test] +fn stateless_plan_against_non_directory_path_errors_with_exit_code_64() { + // Per `contracts/cli-flag.md` Error semantics: is not a + // directory → exit 64 (`EX_USAGE`). Asserts both the diagnostic + // and the documented exit status so automation can rely on it. + let tmp = tempfile::TempDir::new().expect("tempdir"); + let file_path = tmp.path().join("a-file"); + std::fs::write(&file_path, "x").expect("write"); + let qdir = quadlet_dir(); + let output = run_plan(&file_path, "example", &qdir); + assert!( + !output.status.success(), + "non-directory --source-repo must error" + ); + assert_eq!( + output.status.code(), + Some(64), + "non-directory --source-repo must exit 64 (EX_USAGE) per contracts/cli-flag.md" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("not a directory") || stderr.contains("does not exist"), + "stderr must reference path-shape error: {stderr}" + ); +} + +#[test] +fn stateless_plan_against_missing_path_errors_with_exit_code_64() { + // Per `contracts/cli-flag.md` Error semantics: does not + // exist → exit 64 (`EX_USAGE`). + let tmp = tempfile::TempDir::new().expect("tempdir"); + let missing = tmp.path().join("nope"); + let qdir = quadlet_dir(); + let output = run_plan(&missing, "example", &qdir); + assert!(!output.status.success(), "missing --source-repo must error"); + assert_eq!( + output.status.code(), + Some(64), + "missing --source-repo must exit 64 (EX_USAGE) per contracts/cli-flag.md" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("does not exist") || stderr.contains("missing"), + "stderr must mention missing path: {stderr}" + ); +} + +#[test] +fn stateless_plan_output_is_unaffected_by_prior_initd_state() { + // FR-012 + clarification Q2: stateless plan must not consult + // persisted controller state. Asserts structurally that + // identical stateless plan invocations produce byte-identical + // output whether `CORE_OPS_STATE_FILE` points at a populated + // init'd state file from an unrelated repository or at a + // non-existent path. + + use core_ops::io::state::persist_success_state; + + let tmp = tempfile::TempDir::new().expect("tempdir"); + write_minimal_layout(tmp.path()); + let qdir = quadlet_dir(); + + let with_state_dir = tempfile::TempDir::new().expect("with-state tempdir"); + let state_file = with_state_dir.path().join("status.json"); + persist_success_state( + &state_file, + "file:///var/lib/core-ops/some-other-repo", + "unrelated-rev-v9", + "feedfacefeedfacefeedfacefeedfacefeedface", + ) + .expect("persist init'd state"); + let with_state = coreops() + .arg("plan") + .arg("--source-repo") + .arg(tmp.path()) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(&qdir) + .env("CORE_OPS_STATE_FILE", &state_file) + .output() + .expect("invoke core-ops with init'd state"); + assert!( + with_state.status.success(), + "stderr: {}", + String::from_utf8_lossy(&with_state.stderr) + ); + + let without_state_dir = tempfile::TempDir::new().expect("without-state tempdir"); + let absent = without_state_dir.path().join("nonexistent.json"); + let without_state = coreops() + .arg("plan") + .arg("--source-repo") + .arg(tmp.path()) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(&qdir) + .env("CORE_OPS_STATE_FILE", &absent) + .output() + .expect("invoke core-ops without init'd state"); + assert!( + without_state.status.success(), + "stderr: {}", + String::from_utf8_lossy(&without_state.stderr) + ); + + assert_eq!( + with_state.stdout, without_state.stdout, + "stateless plan output diverged based on persisted state contents" + ); + let stdout = String::from_utf8_lossy(&with_state.stdout); + assert!( + !stdout.contains("[DETACHED]"), + "stateless plan must not surface a [DETACHED] header from unrelated init'd state: {stdout}" + ); +} + +#[test] +fn stateless_plan_against_invalid_layout_errors_with_exit_code_65() { + // Per `contracts/cli-flag.md` Error semantics: is a + // directory but layout is invalid → exit 65 (`EX_DATAERR`). + // Distinct from path-shape errors (64) so automation can + // classify malformed inputs from generic runtime failures. + let tmp = tempfile::TempDir::new().expect("tempdir"); + // Empty directory: passes the path-existence + is-directory + // checks, then fails at the parser layer with MissingServicesDir. + let qdir = quadlet_dir(); + let output = run_plan(tmp.path(), "example", &qdir); + assert!( + !output.status.success(), + "invalid layout must error; stdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + assert_eq!( + output.status.code(), + Some(65), + "invalid layout must exit 65 (EX_DATAERR) per contracts/cli-flag.md" + ); +} + +#[test] +fn stateless_plan_honors_explicit_audit_dir() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + write_minimal_layout(tmp.path()); + let qdir = quadlet_dir(); + let audit_dir = tempfile::TempDir::new().expect("audit tempdir"); + let output = coreops() + .arg("plan") + .arg("--source-repo") + .arg(tmp.path()) + .arg("--host") + .arg("example") + .arg("--quadlet-dir") + .arg(&qdir) + .arg("--audit-dir") + .arg(audit_dir.path()) + .output() + .expect("invoke core-ops"); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let entries: Vec<_> = std::fs::read_dir(audit_dir.path()) + .expect("read audit dir") + .collect::, _>>() + .expect("collect entries"); + assert!( + !entries.is_empty(), + "stateless plan with --audit-dir must write at least one audit record" + ); +} diff --git a/tests/integration/test_status_state.rs b/tests/integration/test_status_state.rs index cff5aa7..59766d2 100644 --- a/tests/integration/test_status_state.rs +++ b/tests/integration/test_status_state.rs @@ -90,10 +90,7 @@ esac } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Plan, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Plan, err.to_string()) } struct EnvGuard { diff --git a/tests/integration/test_verification.rs b/tests/integration/test_verification.rs index af0db83..b2a2966 100644 --- a/tests/integration/test_verification.rs +++ b/tests/integration/test_verification.rs @@ -94,10 +94,7 @@ fn reconcile_apply_reports_verification_failure() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } struct PathGuard { diff --git a/tests/integration/test_verification_rules.rs b/tests/integration/test_verification_rules.rs index 934ba9d..6c0ce60 100644 --- a/tests/integration/test_verification_rules.rs +++ b/tests/integration/test_verification_rules.rs @@ -132,10 +132,7 @@ fn verification_rules_accept_volume_inactive() { } fn map_io_error(err: E) -> CoreError { - CoreError { - class: core_ops::core::types::FailureClass::Apply, - message: err.to_string(), - } + CoreError::new(core_ops::core::types::FailureClass::Apply, err.to_string()) } struct PathGuard {