Skip to content

Commit b373985

Browse files
committed
Implement guarded local-agent quarantine
1 parent 3e8f458 commit b373985

1 file changed

Lines changed: 184 additions & 6 deletions

File tree

sourceosctl/commands/local_agent.py

Lines changed: 184 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
"""SourceOS local-agent runtime helpers.
22
33
This module is intentionally conservative: status, preflight, doctor, and logs are
4-
read-only. Mutating verbs require both --execute and --policy-ok and currently
5-
emit guarded plans unless explicitly implemented. The goal is to prevent the
6-
class of failure where a Nix-generated local service silently becomes opaque,
7-
unbounded persistence.
4+
read-only. Mutating verbs require both --execute and --policy-ok. The goal is to
5+
prevent the class of failure where a Nix-generated local service silently becomes
6+
opaque, unbounded persistence.
87
"""
98

109
from __future__ import annotations
1110

1211
import argparse
12+
import datetime as _dt
1313
import json
1414
import os
1515
import pathlib
@@ -18,7 +18,7 @@ class of failure where a Nix-generated local service silently becomes opaque,
1818
import shutil
1919
import subprocess
2020
import sys
21-
from dataclasses import dataclass
21+
from dataclasses import asdict, dataclass
2222
from typing import Iterable, Optional
2323

2424

@@ -75,6 +75,14 @@ class Check:
7575
remediation: Optional[str] = None
7676

7777

78+
@dataclass
79+
class ActionResult:
80+
action: str
81+
status: str
82+
detail: str
83+
path: Optional[str] = None
84+
85+
7886
def _expand(path: str) -> pathlib.Path:
7987
return pathlib.Path(os.path.expandvars(os.path.expanduser(path)))
8088

@@ -113,6 +121,14 @@ def _systemctl_binary() -> Optional[str]:
113121
return shutil.which("systemctl")
114122

115123

124+
def _timestamp() -> str:
125+
return _dt.datetime.now(tz=_dt.timezone.utc).astimezone().strftime("%Y%m%d-%H%M%S")
126+
127+
128+
def _iso_now() -> str:
129+
return _dt.datetime.now(tz=_dt.timezone.utc).astimezone().isoformat(timespec="seconds")
130+
131+
116132
def _authfile_is_empty_auth(path: pathlib.Path) -> tuple[bool, str]:
117133
if not path.exists():
118134
return False, "authfile missing"
@@ -360,6 +376,148 @@ def _print_checks(checks: Iterable[Check]) -> int:
360376
return worst
361377

362378

379+
def _write_text(path: pathlib.Path, content: str) -> ActionResult:
380+
try:
381+
path.parent.mkdir(parents=True, exist_ok=True)
382+
path.write_text(content)
383+
return ActionResult("write", "ok", f"wrote {path}", str(path))
384+
except Exception as exc: # noqa: BLE001
385+
return ActionResult("write", "warn", f"could not write {path}: {exc}", str(path))
386+
387+
388+
def _copy_if_exists(src: pathlib.Path, dst: pathlib.Path, label: str) -> ActionResult:
389+
if not src.exists():
390+
return ActionResult(label, "skip", f"missing: {src}", str(src))
391+
try:
392+
dst.parent.mkdir(parents=True, exist_ok=True)
393+
shutil.copy2(src, dst)
394+
return ActionResult(label, "ok", f"copied {src} -> {dst}", str(dst))
395+
except Exception as exc: # noqa: BLE001
396+
return ActionResult(label, "warn", f"could not copy {src}: {exc}", str(src))
397+
398+
399+
def _move_if_exists(src: pathlib.Path, dst: pathlib.Path, label: str) -> ActionResult:
400+
if not src.exists():
401+
return ActionResult(label, "skip", f"missing: {src}", str(src))
402+
try:
403+
dst.parent.mkdir(parents=True, exist_ok=True)
404+
shutil.move(str(src), str(dst))
405+
return ActionResult(label, "ok", f"moved {src} -> {dst}", str(dst))
406+
except Exception as exc: # noqa: BLE001
407+
return ActionResult(label, "warn", f"could not move {src}: {exc}", str(src))
408+
409+
410+
def _run_capture(cmd: list[str], out_path: pathlib.Path, action: str, timeout: int = 12) -> ActionResult:
411+
rc, out, err = _run(cmd, timeout=timeout)
412+
payload = {
413+
"command": cmd,
414+
"returncode": rc,
415+
"stdout": out,
416+
"stderr": err,
417+
"capturedAt": _iso_now(),
418+
}
419+
result = _write_text(out_path, json.dumps(payload, indent=2, sort_keys=True))
420+
if rc == 0:
421+
return ActionResult(action, result.status, f"captured {' '.join(cmd)}", str(out_path))
422+
return ActionResult(action, "warn", f"command returned {rc}: {' '.join(cmd)}", str(out_path))
423+
424+
425+
def _capture_launchd(agent: LocalAgent, qdir: pathlib.Path) -> list[ActionResult]:
426+
results: list[ActionResult] = []
427+
if platform.system() != "Darwin":
428+
return [ActionResult("launchd", "skip", "not a Darwin host")]
429+
launchctl = _launchctl_binary()
430+
if not launchctl:
431+
return [ActionResult("launchd", "warn", "launchctl not found")]
432+
domain_label = f"gui/{os.getuid()}/{agent.label}"
433+
user_plist = _expand(agent.user_plist)
434+
results.append(_run_capture([launchctl, "print", domain_label], qdir / "launchd-print.json", "launchd-print"))
435+
results.append(_run_capture([launchctl, "print-disabled", f"gui/{os.getuid()}"], qdir / "launchd-disabled.json", "launchd-disabled"))
436+
results.append(_run_capture([launchctl, "bootout", f"gui/{os.getuid()}", str(user_plist)], qdir / "launchd-bootout.json", "launchd-bootout"))
437+
results.append(_run_capture([launchctl, "disable", domain_label], qdir / "launchd-disable.json", "launchd-disable"))
438+
return results
439+
440+
441+
def _capture_podman(agent: LocalAgent, qdir: pathlib.Path) -> list[ActionResult]:
442+
podman = _podman_binary()
443+
if not podman:
444+
return [ActionResult("podman", "skip", "podman not found")]
445+
return [
446+
_run_capture([podman, "system", "connection", "list"], qdir / "podman-connections.json", "podman-connections"),
447+
_run_capture([podman, "machine", "list"], qdir / "podman-machines.json", "podman-machines"),
448+
_run_capture([podman, "--connection", agent.podman_connection, "info"], qdir / "podman-info.json", "podman-info"),
449+
_run_capture([podman, "--connection", agent.podman_connection, "ps", "-a", "--filter", f"name={agent.container_name}"], qdir / "podman-ps.json", "podman-ps"),
450+
_run_capture([podman, "--connection", agent.podman_connection, "image", "inspect", agent.runtime_image], qdir / "image-inspect.json", "image-inspect"),
451+
_run_capture([podman, "--connection", agent.podman_connection, "container", "inspect", agent.container_name], qdir / "container-inspect.json", "container-inspect"),
452+
]
453+
454+
455+
def _capture_redacted_auth(agent: LocalAgent, qdir: pathlib.Path) -> list[ActionResult]:
456+
targets = {
457+
"runtime-authfile-redacted.json": _expand(agent.authfile),
458+
"docker-config-redacted.json": _expand("~/.docker/config.json"),
459+
"containers-auth-redacted.json": _expand("~/.config/containers/auth.json"),
460+
}
461+
results = []
462+
for name, path in targets.items():
463+
results.append(_write_text(qdir / name, _redacted_json(path)))
464+
return results
465+
466+
467+
def _quarantine_agent(agent: LocalAgent, output_dir: pathlib.Path) -> tuple[pathlib.Path, list[ActionResult]]:
468+
qdir = output_dir / f"{agent.name}-{_timestamp()}"
469+
qdir.mkdir(parents=True, exist_ok=False)
470+
results: list[ActionResult] = []
471+
472+
checks = collect_checks(agent)
473+
results.append(_write_text(qdir / "checks.json", json.dumps([asdict(c) for c in checks], indent=2, sort_keys=True)))
474+
results.extend(_capture_launchd(agent, qdir))
475+
results.extend(_capture_podman(agent, qdir))
476+
results.extend(_capture_redacted_auth(agent, qdir))
477+
478+
user_plist = _expand(agent.user_plist)
479+
legacy_plist = pathlib.Path(agent.legacy_system_plist)
480+
app_log = _expand(agent.app_log)
481+
log_dir = _expand(agent.log_dir)
482+
results.append(_copy_if_exists(app_log, qdir / app_log.name, "copy-app-log"))
483+
if log_dir.exists():
484+
for candidate in log_dir.glob(f"{agent.name}*.log"):
485+
results.append(_copy_if_exists(candidate, qdir / candidate.name, "copy-related-log"))
486+
487+
# Move writable service definitions after evidence capture. System-wide legacy
488+
# plists may require sudo; in that case we report the warning and leave the
489+
# operator with a clear follow-up rather than silently failing.
490+
results.append(_move_if_exists(user_plist, qdir / f"{user_plist.name}.disabled", "move-user-plist"))
491+
results.append(_move_if_exists(legacy_plist, qdir / f"{legacy_plist.name}.disabled", "move-legacy-system-plist"))
492+
493+
manifest = {
494+
"agent": asdict(agent),
495+
"createdAt": _iso_now(),
496+
"platform": platform.platform(),
497+
"quarantineDir": str(qdir),
498+
"results": [asdict(r) for r in results],
499+
"operatorNotes": [
500+
"Review warning results; system-wide files may require sudo removal.",
501+
"Reinstall only through the SourceOS local-agent runtime contract.",
502+
],
503+
}
504+
results.append(_write_text(qdir / "manifest.json", json.dumps(manifest, indent=2, sort_keys=True)))
505+
remediation = [
506+
f"# Quarantine remediation for {agent.name}",
507+
"",
508+
"1. Review `manifest.json` and warning results.",
509+
"2. Confirm no legacy system-wide plist remains.",
510+
"3. Confirm Podman container state is not stuck in `Stopping` or `Removing`.",
511+
"4. Reinstall only after `sourceos-agent preflight` passes.",
512+
"",
513+
"Useful commands:",
514+
"",
515+
f"```bash\nsourceos-agent preflight {agent.name}\nsourceos-agent status {agent.name}\n```",
516+
]
517+
results.append(_write_text(qdir / "remediation.md", "\n".join(remediation) + "\n"))
518+
return qdir, results
519+
520+
363521
def list_agents(_args: argparse.Namespace) -> int:
364522
for agent in DEFAULT_AGENTS.values():
365523
print(f"{agent.name}\t{agent.scope}\t{agent.runtime}\t{agent.runtime_image}")
@@ -437,7 +595,21 @@ def restart(args: argparse.Namespace) -> int:
437595

438596

439597
def quarantine(args: argparse.Namespace) -> int:
440-
return _guarded_mutation(args, "quarantine")
598+
agent = _agent_or_error(args.agent)
599+
if not (args.execute and args.policy_ok):
600+
output = _expand(args.output_dir)
601+
print(f"planned quarantine: {agent.name}")
602+
print(f"would capture evidence and move writable service definitions under: {output}")
603+
print("mutation not executed; pass --execute --policy-ok to allow guarded local changes")
604+
return 0
605+
qdir, results = _quarantine_agent(agent, _expand(args.output_dir))
606+
print(f"quarantined {agent.name}: {qdir}")
607+
worst = 0
608+
for result in results:
609+
print(f"[{result.status}] {result.action}: {result.detail}")
610+
if result.status == "warn":
611+
worst = max(worst, 1)
612+
return worst
441613

442614

443615
def uninstall(args: argparse.Namespace) -> int:
@@ -480,6 +652,12 @@ def build_parser(prog: str = "sourceos-agent") -> argparse.ArgumentParser:
480652
if name in {"install", "stage", "start", "stop", "restart", "quarantine", "uninstall"}:
481653
p.add_argument("--execute", action="store_true", default=False, help="Execute guarded local mutation")
482654
p.add_argument("--policy-ok", action="store_true", default=False, help="Confirm policy approval")
655+
if name == "quarantine":
656+
p.add_argument(
657+
"--output-dir",
658+
default="~/Desktop/sourceos-quarantine",
659+
help="Directory where quarantine evidence folders are created",
660+
)
483661
p.set_defaults(func=func)
484662
return parser
485663

0 commit comments

Comments
 (0)