Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions platform/docs/BACKUP_AND_RECOVERY.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ archive traversal.
OpenClaw CLI path (`openclaw-cli`) or the local tar fallback path
(`tar-fallback`) so automation can tell which recovery mode actually ran.

Backup creation accepts a policy profile and an explicit fallback gate:

- `--profile` (default `control-plane`; supported: `control-plane`, `devflow-checkpoint`, `hypermemory-fast`, `full-data-plane`)
- `--dry-run` (render deterministic include/exclude/retention plan, do not write archives)
- `--allow-fallback` (permit tar fallback if OpenClaw backup create fails)

Default mode is fail-closed for OpenClaw backup create failures: if OpenClaw is
available but `openclaw backup create` fails, the command fails unless
`--allow-fallback` is set.

## Scheduled maintenance

StrongClaw host service activation now installs independent daily jobs:
Expand All @@ -47,6 +57,8 @@ launchd agents:
Commands:

- `clawops recovery --home-dir <home> backup-create`
- `clawops recovery --home-dir <home> backup-create --profile control-plane --dry-run`
- `clawops recovery --home-dir <home> backup-create --allow-fallback`
- `clawops recovery --home-dir <home> backup-verify latest`
- `clawops recovery --home-dir <home> prune-retention`

Expand Down
12 changes: 12 additions & 0 deletions src/clawops/assets/platform/docs/BACKUP_AND_RECOVERY.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ archive traversal.
OpenClaw CLI path (`openclaw-cli`) or the local tar fallback path
(`tar-fallback`) so automation can tell which recovery mode actually ran.

Backup creation accepts a policy profile and an explicit fallback gate:

- `--profile` (default `control-plane`; supported: `control-plane`, `devflow-checkpoint`, `hypermemory-fast`, `full-data-plane`)
- `--dry-run` (render deterministic include/exclude/retention plan, do not write archives)
- `--allow-fallback` (permit tar fallback if OpenClaw backup create fails)

Default mode is fail-closed for OpenClaw backup create failures: if OpenClaw is
available but `openclaw backup create` fails, the command fails unless
`--allow-fallback` is set.

## Scheduled maintenance

StrongClaw host service activation now installs independent daily jobs:
Expand All @@ -47,6 +57,8 @@ launchd agents:
Commands:

- `clawops recovery --home-dir <home> backup-create`
- `clawops recovery --home-dir <home> backup-create --profile control-plane --dry-run`
- `clawops recovery --home-dir <home> backup-create --allow-fallback`
- `clawops recovery --home-dir <home> backup-verify latest`
- `clawops recovery --home-dir <home> prune-retention`

Expand Down
19 changes: 19 additions & 0 deletions src/clawops/recovery/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Policy-driven recovery orchestration helpers."""

from clawops.recovery.models import BackupCreateExecution, BackupPlan, RecoveryProfile
from clawops.recovery.orchestrator import create_backup_execution
from clawops.recovery.policy import (
DEFAULT_RECOVERY_PROFILE,
RECOVERY_PROFILES,
ensure_recovery_profile,
)

__all__ = [
"BackupCreateExecution",
"BackupPlan",
"DEFAULT_RECOVERY_PROFILE",
"RECOVERY_PROFILES",
"RecoveryProfile",
"create_backup_execution",
"ensure_recovery_profile",
]
65 changes: 65 additions & 0 deletions src/clawops/recovery/backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Recovery backup backend strategies."""

from __future__ import annotations

import pathlib
from collections.abc import Callable
from typing import Protocol

from clawops.strongclaw_runtime import ExecResult

type WhichFunc = Callable[..., str | bytes | None]
type RunCommandFunc = Callable[..., ExecResult]
type TarWriter = Callable[
[pathlib.Path],
None,
]


class BackupBackend(Protocol):
"""Interface for backup backend execution."""

name: str

def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]:
"""Create one archive and return status + optional failure reason."""
...


class OpenClawBackupBackend:
"""OpenClaw CLI backend for backup creation."""

name = "openclaw-cli"

def __init__(self, *, which: WhichFunc, run_command: RunCommandFunc) -> None:
self._which = which
self._run_command = run_command

def is_available(self) -> bool:
"""Return whether the OpenClaw executable is available."""
return self._which("openclaw") is not None

def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]:
"""Create one archive through OpenClaw."""
result = self._run_command(
["openclaw", "backup", "create", str(archive_tmp_path)],
timeout_seconds=600,
)
if result.ok:
return True, None
detail = result.stderr.strip() or result.stdout.strip() or "openclaw backup create failed"
return False, detail


class TarBackupBackend:
"""StrongClaw tar fallback backend."""

name = "tar-fallback"

def __init__(self, *, writer: TarWriter) -> None:
self._writer = writer

def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]:
"""Create one archive through the fallback tar writer."""
self._writer(archive_tmp_path)
return True, None
14 changes: 14 additions & 0 deletions src/clawops/recovery/checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Checkpoint metadata contracts for recovery surfaces."""

from __future__ import annotations

import dataclasses


@dataclasses.dataclass(frozen=True, slots=True)
class CheckpointRecord:
"""Minimal checkpoint metadata record."""

checkpoint_id: str
scope: str
created_at_ms: int
51 changes: 51 additions & 0 deletions src/clawops/recovery/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Data models for the recovery subsystem."""

from __future__ import annotations

import dataclasses
import pathlib
from typing import Literal

type RecoveryProfile = Literal[
"control-plane",
"devflow-checkpoint",
"hypermemory-fast",
"full-data-plane",
]


@dataclasses.dataclass(frozen=True, slots=True)
class BackupPlan:
"""Deterministic backup plan payload."""

profile: RecoveryProfile
include_roots: tuple[pathlib.Path, ...]
exclude_roots: tuple[pathlib.Path, ...]
backend_candidates: tuple[str, ...]
estimated_bytes: int
estimated_file_count: int
retention: dict[str, object]

def to_payload(self) -> dict[str, object]:
"""Render a JSON-safe payload."""
return {
"profile": self.profile,
"include_roots": [path.as_posix() for path in self.include_roots],
"exclude_roots": [path.as_posix() for path in self.exclude_roots],
"backend_candidates": list(self.backend_candidates),
"estimated_bytes": self.estimated_bytes,
"estimated_file_count": self.estimated_file_count,
"retention": dict(self.retention),
}


@dataclasses.dataclass(frozen=True, slots=True)
class BackupCreateExecution:
"""Result of backup orchestration."""

plan: BackupPlan
dry_run: bool
archive_path: pathlib.Path | None = None
mode: str | None = None
fallback_used: bool = False
fallback_reason: str | None = None
82 changes: 82 additions & 0 deletions src/clawops/recovery/orchestrator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Recovery create/plan orchestration."""

from __future__ import annotations

import pathlib
import tarfile
import time
from collections.abc import Callable

from clawops.recovery.backends import OpenClawBackupBackend, TarBackupBackend, WhichFunc
from clawops.recovery.models import BackupCreateExecution, RecoveryProfile
from clawops.recovery.planner import build_backup_plan
from clawops.strongclaw_runtime import CommandError, ExecResult

type RunCommandFunc = Callable[..., ExecResult]
type TarWriter = Callable[[pathlib.Path], None]
type SafeUnlink = Callable[[pathlib.Path], None]


def create_backup_execution(
*,
home_dir: pathlib.Path,
openclaw_state_root: pathlib.Path,
backup_root: pathlib.Path,
legacy_backup_root: pathlib.Path,
profile: RecoveryProfile,
allow_fallback: bool,
dry_run: bool,
tar_writer: TarWriter,
safe_unlink: SafeUnlink,
which: WhichFunc,
run_command: RunCommandFunc,
) -> BackupCreateExecution:
"""Plan or create one recovery backup archive."""
plan = build_backup_plan(
profile=profile,
include_root=openclaw_state_root,
backup_root=backup_root,
legacy_backup_root=legacy_backup_root,
)
if dry_run:
return BackupCreateExecution(plan=plan, dry_run=True)

backup_root.mkdir(parents=True, exist_ok=True)
stamp = time.strftime("%Y%m%d-%H%M%S", time.localtime())
archive_path = backup_root / f"openclaw-{stamp}.tar.gz"
archive_tmp_path = backup_root / f".{archive_path.name}.tmp"
openclaw_backend = OpenClawBackupBackend(which=which, run_command=run_command)
fallback_reason: str | None = None
if openclaw_backend.is_available():
safe_unlink(archive_tmp_path)
backend_ok, backend_error = openclaw_backend.create(archive_tmp_path)
if backend_ok:
archive_tmp_path.replace(archive_path)
return BackupCreateExecution(
plan=plan,
dry_run=False,
archive_path=archive_path,
mode="openclaw-cli",
)
safe_unlink(archive_tmp_path)
if not allow_fallback:
raise CommandError(backend_error or "openclaw backup create failed")
fallback_reason = backend_error

safe_unlink(archive_tmp_path)
fallback_backend = TarBackupBackend(writer=tar_writer)
try:
fallback_backend.create(archive_tmp_path)
archive_tmp_path.replace(archive_path)
except (OSError, tarfile.TarError) as exc:
safe_unlink(archive_tmp_path)
safe_unlink(archive_path)
raise CommandError(f"backup creation failed: {exc}") from exc
return BackupCreateExecution(
plan=plan,
dry_run=False,
archive_path=archive_path,
mode="tar-fallback",
fallback_used=fallback_reason is not None,
fallback_reason=fallback_reason,
)
58 changes: 58 additions & 0 deletions src/clawops/recovery/planner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Deterministic recovery backup planning."""

from __future__ import annotations

import pathlib

from clawops.recovery.models import BackupPlan, RecoveryProfile
from clawops.recovery.policy import retention_for_profile


def _is_path_within(path: pathlib.Path, root: pathlib.Path) -> bool:
"""Return whether *path* is contained by *root*."""
try:
path.relative_to(root)
except ValueError:
return False
return True


def _estimate_plan(
include_root: pathlib.Path, exclude_roots: tuple[pathlib.Path, ...]
) -> tuple[int, int]:
"""Estimate bytes and file count for the plan."""
file_count = 0
total_bytes = 0
for path in sorted(include_root.rglob("*")):
if path.is_symlink() or not path.is_file():
continue
resolved = path.resolve()
if any(_is_path_within(resolved, root) for root in exclude_roots):
continue
file_count += 1
total_bytes += path.stat().st_size
return file_count, total_bytes


def build_backup_plan(
*,
profile: RecoveryProfile,
include_root: pathlib.Path,
backup_root: pathlib.Path,
legacy_backup_root: pathlib.Path,
) -> BackupPlan:
"""Build one deterministic backup plan."""
include_root_resolved = include_root.resolve()
backup_root_resolved = backup_root.resolve()
legacy_backup_root_resolved = legacy_backup_root.resolve()
exclude_roots = (backup_root_resolved, legacy_backup_root_resolved)
estimated_file_count, estimated_bytes = _estimate_plan(include_root_resolved, exclude_roots)
return BackupPlan(
profile=profile,
include_roots=(include_root_resolved,),
exclude_roots=exclude_roots,
backend_candidates=("openclaw-cli", "tar-fallback"),
estimated_bytes=estimated_bytes,
estimated_file_count=estimated_file_count,
retention=retention_for_profile(profile),
)
41 changes: 41 additions & 0 deletions src/clawops/recovery/policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Recovery policy defaults and profile validation."""

from __future__ import annotations

from typing import TypeGuard

from clawops.recovery.models import RecoveryProfile

RECOVERY_PROFILES: tuple[RecoveryProfile, ...] = (
"control-plane",
"devflow-checkpoint",
"hypermemory-fast",
"full-data-plane",
)
DEFAULT_RECOVERY_PROFILE: RecoveryProfile = "control-plane"


def ensure_recovery_profile(raw_profile: str) -> RecoveryProfile:
"""Validate and normalize one recovery profile."""
if not _is_recovery_profile(raw_profile):
choices = ", ".join(RECOVERY_PROFILES)
raise ValueError(
f"unsupported recovery profile {raw_profile!r}; expected one of: {choices}"
)
return raw_profile


def _is_recovery_profile(raw_profile: str) -> TypeGuard[RecoveryProfile]:
"""Return whether *raw_profile* is one of the declared recovery profiles."""
return raw_profile in RECOVERY_PROFILES


def retention_for_profile(profile: RecoveryProfile) -> dict[str, object]:
"""Return profile-specific retention policy metadata."""
if profile == "control-plane":
return {"daily": 7, "weekly": 2}
if profile == "devflow-checkpoint":
return {"active_run_latest": 3, "completed_runs": 5, "completed_retention_days": 7}
if profile == "hypermemory-fast":
return {"checkpoints": 5}
return {"weekly": 2}
Loading
Loading