|
| 1 | +"""office command helpers. |
| 2 | +
|
| 3 | +This module implements the first read-only / dry-run slice of the SourceOS |
| 4 | +Office Plane. It does not create, convert, or modify files. It renders |
| 5 | +structured plans that can later be executed under policy by LibreOffice, |
| 6 | +Collabora, ONLYOFFICE, Microsoft Graph, Google Workspace, or SourceOS-native |
| 7 | +backends. |
| 8 | +""" |
| 9 | + |
| 10 | +from __future__ import annotations |
| 11 | + |
| 12 | +import hashlib |
| 13 | +import json |
| 14 | +import mimetypes |
| 15 | +import os |
| 16 | +import platform |
| 17 | +import shutil |
| 18 | +import sys |
| 19 | +from pathlib import Path |
| 20 | +from typing import Any, Dict, Optional |
| 21 | + |
| 22 | + |
| 23 | +DEFAULT_WORKROOM_ID = "workroom-local-default" |
| 24 | +DEFAULT_OUTPUT_ROOT = "~/Documents/SourceOS/agent-output" |
| 25 | +DEFAULT_DOWNLOADS_ROOT = "~/Downloads/SourceOS/agent-downloads" |
| 26 | +DEFAULT_TEMPLATE_ROOT = "~/dev" |
| 27 | + |
| 28 | +OFFICE_ARTIFACT_SCHEMA = "https://socioprophet.io/schemas/workspace/office-artifact.schema.json" |
| 29 | +PROFESSIONAL_WORKROOM_SCHEMA = "https://socioprophet.io/schemas/workspace/professional-workroom.schema.json" |
| 30 | + |
| 31 | +SUPPORTED_ARTIFACT_TYPES = [ |
| 32 | + "document", |
| 33 | + "spreadsheet", |
| 34 | + "slide-deck", |
| 35 | + "pdf", |
| 36 | + "mail-draft", |
| 37 | + "calendar-item", |
| 38 | + "task-list", |
| 39 | + "note", |
| 40 | + "media-asset", |
| 41 | +] |
| 42 | + |
| 43 | +SUPPORTED_FORMATS = [ |
| 44 | + "docx", |
| 45 | + "odt", |
| 46 | + "md", |
| 47 | + "pdf", |
| 48 | + "xlsx", |
| 49 | + "ods", |
| 50 | + "csv", |
| 51 | + "pptx", |
| 52 | + "odp", |
| 53 | + "eml", |
| 54 | + "ics", |
| 55 | + "json", |
| 56 | + "txt", |
| 57 | + "png", |
| 58 | + "jpg", |
| 59 | + "wav", |
| 60 | + "m4a", |
| 61 | +] |
| 62 | + |
| 63 | +DEFAULT_BACKEND_BY_MODE = { |
| 64 | + "local-headless": "libreoffice", |
| 65 | + "browser-collab": "collabora", |
| 66 | + "remote-api": "microsoft-graph", |
| 67 | + "native": "sourceos-native", |
| 68 | + "manual-upload": "manual", |
| 69 | +} |
| 70 | + |
| 71 | + |
| 72 | +def _expand(path: str) -> str: |
| 73 | + return os.path.abspath(os.path.expanduser(path)) |
| 74 | + |
| 75 | + |
| 76 | +def _redact_home(path: str) -> str: |
| 77 | + home = str(Path.home()) |
| 78 | + expanded = _expand(path) |
| 79 | + if expanded == home: |
| 80 | + return "$HOME" |
| 81 | + if expanded.startswith(home + os.sep): |
| 82 | + return "$HOME" + expanded[len(home) :] |
| 83 | + return expanded |
| 84 | + |
| 85 | + |
| 86 | +def _print_json(payload: Dict[str, Any]) -> int: |
| 87 | + print(json.dumps(payload, indent=2, sort_keys=True)) |
| 88 | + return 0 |
| 89 | + |
| 90 | + |
| 91 | +def _which_any(candidates: list[str]) -> Optional[str]: |
| 92 | + for candidate in candidates: |
| 93 | + found = shutil.which(candidate) |
| 94 | + if found: |
| 95 | + return found |
| 96 | + return None |
| 97 | + |
| 98 | + |
| 99 | +def _libreoffice_path() -> Optional[str]: |
| 100 | + # Homebrew/macOS may expose `soffice` or `libreoffice`; Linux distributions vary. |
| 101 | + return _which_any(["soffice", "libreoffice", "lowriter"]) |
| 102 | + |
| 103 | + |
| 104 | +def _sha256(path: Path) -> Optional[str]: |
| 105 | + if not path.exists() or not path.is_file(): |
| 106 | + return None |
| 107 | + digest = hashlib.sha256() |
| 108 | + with path.open("rb") as handle: |
| 109 | + for chunk in iter(lambda: handle.read(1024 * 1024), b""): |
| 110 | + digest.update(chunk) |
| 111 | + return "sha256:" + digest.hexdigest() |
| 112 | + |
| 113 | + |
| 114 | +def _artifact_plan(args, operation: str) -> Dict[str, Any]: |
| 115 | + artifact_type = getattr(args, "artifact_type", None) or "document" |
| 116 | + fmt = getattr(args, "format", None) or "docx" |
| 117 | + title = getattr(args, "title", None) or "Untitled Office Artifact" |
| 118 | + workroom_id = getattr(args, "workroom_id", None) or DEFAULT_WORKROOM_ID |
| 119 | + output_root = getattr(args, "output_root", None) or DEFAULT_OUTPUT_ROOT |
| 120 | + backend = getattr(args, "backend", None) or "libreoffice" |
| 121 | + mode = getattr(args, "mode", None) or "local-headless" |
| 122 | + |
| 123 | + slug = title.lower().strip().replace(" ", "-") or "office-artifact" |
| 124 | + safe_slug = "".join(ch for ch in slug if ch.isalnum() or ch in "-_")[:80] or "office-artifact" |
| 125 | + storage_ref = f"sourceos-office://{workroom_id}/output/{safe_slug}.{fmt}" |
| 126 | + |
| 127 | + return { |
| 128 | + "type": "OfficeArtifactPlan", |
| 129 | + "specVersion": "0.1.0", |
| 130 | + "operation": operation, |
| 131 | + "dryRun": True, |
| 132 | + "contracts": { |
| 133 | + "officeArtifactSchema": OFFICE_ARTIFACT_SCHEMA, |
| 134 | + "professionalWorkroomSchema": PROFESSIONAL_WORKROOM_SCHEMA, |
| 135 | + }, |
| 136 | + "officeArtifact": { |
| 137 | + "schemaVersion": "v0.1", |
| 138 | + "artifactId": f"office-artifact-{safe_slug}", |
| 139 | + "workroomId": workroom_id, |
| 140 | + "artifactType": artifact_type, |
| 141 | + "title": title, |
| 142 | + "status": "draft", |
| 143 | + "format": fmt, |
| 144 | + "storageRef": storage_ref, |
| 145 | + "backend": { |
| 146 | + "engine": backend, |
| 147 | + "mode": mode, |
| 148 | + "versionRef": f"urn:srcos:office-backend:{backend}-{mode}", |
| 149 | + }, |
| 150 | + "agentRunRefs": [], |
| 151 | + "policyRefs": [], |
| 152 | + "evidenceRefs": [], |
| 153 | + "labels": { |
| 154 | + "sourceos.surface": "office-plane", |
| 155 | + "sourceos.operation": operation, |
| 156 | + }, |
| 157 | + }, |
| 158 | + "paths": { |
| 159 | + "outputRoot": _redact_home(output_root), |
| 160 | + "downloadsRoot": _redact_home(getattr(args, "downloads_root", DEFAULT_DOWNLOADS_ROOT)), |
| 161 | + "templateRoot": _redact_home(getattr(args, "template_root", DEFAULT_TEMPLATE_ROOT)), |
| 162 | + }, |
| 163 | + "sideEffectPolicy": { |
| 164 | + "createsFiles": operation in {"generate", "convert"}, |
| 165 | + "currentlyExecuted": False, |
| 166 | + "requiresHumanReviewBeforeExternalSend": True, |
| 167 | + "mailSendDeniedByDefault": True, |
| 168 | + }, |
| 169 | + } |
| 170 | + |
| 171 | + |
| 172 | +def doctor(args) -> int: |
| 173 | + """Inspect local Office Plane dependencies without mutating state.""" |
| 174 | + lo = _libreoffice_path() |
| 175 | + payload = { |
| 176 | + "type": "OfficeDoctor", |
| 177 | + "specVersion": "0.1.0", |
| 178 | + "host": { |
| 179 | + "system": platform.system(), |
| 180 | + "machine": platform.machine(), |
| 181 | + }, |
| 182 | + "backends": { |
| 183 | + "libreoffice": { |
| 184 | + "available": lo is not None, |
| 185 | + "path": lo, |
| 186 | + "mode": "local-headless", |
| 187 | + "role": "default SourceOS local render/generate/convert backend", |
| 188 | + }, |
| 189 | + "collabora": { |
| 190 | + "available": False, |
| 191 | + "mode": "browser-collab", |
| 192 | + "role": "future WOPI/browser collaboration backend", |
| 193 | + }, |
| 194 | + "onlyoffice": { |
| 195 | + "available": False, |
| 196 | + "mode": "browser-collab or document-builder", |
| 197 | + "role": "future optional DOCX/XLSX/PPTX builder/editor backend", |
| 198 | + }, |
| 199 | + }, |
| 200 | + "contracts": { |
| 201 | + "officeArtifactSchema": OFFICE_ARTIFACT_SCHEMA, |
| 202 | + "professionalWorkroomSchema": PROFESSIONAL_WORKROOM_SCHEMA, |
| 203 | + }, |
| 204 | + } |
| 205 | + return _print_json(payload) |
| 206 | + |
| 207 | + |
| 208 | +def plan(args) -> int: |
| 209 | + """Render an OfficeArtifact-compatible plan.""" |
| 210 | + return _print_json(_artifact_plan(args, "plan")) |
| 211 | + |
| 212 | + |
| 213 | +def generate(args) -> int: |
| 214 | + """Render a generation plan. Dry-run only.""" |
| 215 | + if not getattr(args, "dry_run", True): |
| 216 | + print("error: office generate is dry-run only in this release", file=sys.stderr) |
| 217 | + return 1 |
| 218 | + payload = _artifact_plan(args, "generate") |
| 219 | + payload["templateRef"] = getattr(args, "template", None) |
| 220 | + payload["generationInputs"] = { |
| 221 | + "promptRef": getattr(args, "prompt_ref", None), |
| 222 | + "dataRef": getattr(args, "data_ref", None), |
| 223 | + } |
| 224 | + return _print_json(payload) |
| 225 | + |
| 226 | + |
| 227 | +def convert(args) -> int: |
| 228 | + """Render a conversion plan. Dry-run only.""" |
| 229 | + if not getattr(args, "dry_run", True): |
| 230 | + print("error: office convert is dry-run only in this release", file=sys.stderr) |
| 231 | + return 1 |
| 232 | + payload = _artifact_plan(args, "convert") |
| 233 | + payload["conversion"] = { |
| 234 | + "input": _redact_home(args.input), |
| 235 | + "inputExists": Path(_expand(args.input)).exists(), |
| 236 | + "toFormat": args.to, |
| 237 | + "outputRoot": _redact_home(getattr(args, "output_root", DEFAULT_OUTPUT_ROOT)), |
| 238 | + "backendCommand": "soffice --headless --convert-to <format> --outdir <outputRoot> <input>", |
| 239 | + "willExecute": False, |
| 240 | + } |
| 241 | + return _print_json(payload) |
| 242 | + |
| 243 | + |
| 244 | +def inspect(args) -> int: |
| 245 | + """Inspect an Office artifact file without modifying it.""" |
| 246 | + path = Path(_expand(args.path)) |
| 247 | + if not path.exists(): |
| 248 | + print(f"error: office artifact not found: {args.path}", file=sys.stderr) |
| 249 | + return 1 |
| 250 | + if not path.is_file(): |
| 251 | + print(f"error: office artifact is not a file: {args.path}", file=sys.stderr) |
| 252 | + return 1 |
| 253 | + |
| 254 | + suffix = path.suffix.lower().lstrip(".") |
| 255 | + mime_type, _ = mimetypes.guess_type(str(path)) |
| 256 | + payload = { |
| 257 | + "type": "OfficeArtifactInspection", |
| 258 | + "specVersion": "0.1.0", |
| 259 | + "path": _redact_home(str(path)), |
| 260 | + "exists": True, |
| 261 | + "sizeBytes": path.stat().st_size, |
| 262 | + "format": suffix or None, |
| 263 | + "supportedFormat": suffix in SUPPORTED_FORMATS, |
| 264 | + "mimeType": mime_type, |
| 265 | + "sha256": _sha256(path), |
| 266 | + "contracts": { |
| 267 | + "officeArtifactSchema": OFFICE_ARTIFACT_SCHEMA, |
| 268 | + }, |
| 269 | + } |
| 270 | + return _print_json(payload) |
| 271 | + |
| 272 | + |
| 273 | +def evidence_inspect(args) -> int: |
| 274 | + """Inspect an Office Plane evidence JSON file.""" |
| 275 | + path = Path(args.path) |
| 276 | + if not path.exists(): |
| 277 | + print(f"error: evidence file not found: {path}", file=sys.stderr) |
| 278 | + return 1 |
| 279 | + try: |
| 280 | + payload = json.loads(path.read_text()) |
| 281 | + except json.JSONDecodeError as exc: |
| 282 | + print(f"error: invalid JSON: {exc}", file=sys.stderr) |
| 283 | + return 1 |
| 284 | + |
| 285 | + office_artifact = payload.get("officeArtifact", {}) if isinstance(payload, dict) else {} |
| 286 | + summary = { |
| 287 | + "path": str(path), |
| 288 | + "type": payload.get("type") if isinstance(payload, dict) else None, |
| 289 | + "artifactId": office_artifact.get("artifactId") if isinstance(office_artifact, dict) else payload.get("artifactId"), |
| 290 | + "workroomId": office_artifact.get("workroomId") if isinstance(office_artifact, dict) else payload.get("workroomId"), |
| 291 | + "artifactType": office_artifact.get("artifactType") if isinstance(office_artifact, dict) else payload.get("artifactType"), |
| 292 | + "format": office_artifact.get("format") if isinstance(office_artifact, dict) else payload.get("format"), |
| 293 | + "evidenceRefs": office_artifact.get("evidenceRefs", []) if isinstance(office_artifact, dict) else payload.get("evidenceRefs", []), |
| 294 | + } |
| 295 | + return _print_json(summary) |
0 commit comments