diff --git a/README.md b/README.md index c20302a..1f4998c 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ sourceosctl [--version] [] [options] | `sourceosctl office plan` | Render an OfficeArtifact-compatible workroom artifact plan | | `sourceosctl office generate --dry-run` | Render an Office generation plan without writing files | | `sourceosctl office generate --execute --policy-ok --format md|txt|json` | Write a guarded text/Markdown/JSON artifact and emit OfficeArtifactEvidence | +| `sourceosctl office generate --execute --policy-ok --format docx|xlsx|pptx` | Write a guarded minimal OOXML artifact and emit OfficeArtifactEvidence | | `sourceosctl office convert --to --dry-run` | Render a LibreOffice-style conversion plan without writing files | | `sourceosctl office convert --to --execute --policy-ok` | Run guarded local LibreOffice conversion and emit OfficeArtifactEvidence | | `sourceosctl office inspect ` | Inspect a local office artifact file and hash it | @@ -96,6 +97,9 @@ python3 bin/sourceosctl office doctor python3 bin/sourceosctl office plan --artifact-type slide-deck --format pptx --title "Demo Deck" python3 bin/sourceosctl office generate --dry-run --artifact-type document --format docx --title "Demo Report" python3 bin/sourceosctl office generate --execute --policy-ok --artifact-type document --format md --title "Demo Report" --evidence-out ./office-evidence.json +python3 bin/sourceosctl office generate --execute --policy-ok --artifact-type document --format docx --title "Demo Report" --evidence-out ./office-docx-evidence.json +python3 bin/sourceosctl office generate --execute --policy-ok --artifact-type spreadsheet --format xlsx --title "Demo Workbook" --evidence-out ./office-xlsx-evidence.json +python3 bin/sourceosctl office generate --execute --policy-ok --artifact-type slide-deck --format pptx --title "Demo Deck" --evidence-out ./office-pptx-evidence.json python3 bin/sourceosctl office convert ./example.docx --to pdf --dry-run python3 bin/sourceosctl office convert ./example.docx --to pdf --execute --policy-ok --evidence-out ./office-convert-evidence.json ``` @@ -145,10 +149,11 @@ Backends are modeled as an abstraction: - Microsoft Graph / Office 365 and Google Workspace: compatibility adapters, not core authority. - SourceOS-native: future native document surfaces. -Guarded Office execution is intentionally narrow: +Guarded Office execution is intentionally bounded: -- `office generate --execute --policy-ok` currently writes only `txt`, `md`, or `json` artifacts. -- Office binary generation (`docx`, `xlsx`, `pptx`, `odt`, `ods`, `odp`) remains disabled until template/render backends are hardened. +- `office generate --execute --policy-ok` writes `txt`, `md`, `json`, `docx`, `xlsx`, or `pptx` artifacts. +- DOCX/XLSX/PPTX generation uses a minimal dependency-light OOXML bootstrap builder, not a full template or collaboration engine. +- ODT/ODS/ODP and other binary formats remain conversion/backend territory until LibreOffice/Collabora/ONLYOFFICE template backends are hardened. - `office convert --execute --policy-ok` uses local LibreOffice/`soffice` when available. - All guarded Office execution emits or writes `OfficeArtifactEvidence`. - Email sending, external publishing, and calendar modification remain policy-gated side effects and are not enabled here. diff --git a/sourceosctl/commands/office.py b/sourceosctl/commands/office.py index 33af0af..c4ed946 100644 --- a/sourceosctl/commands/office.py +++ b/sourceosctl/commands/office.py @@ -1,9 +1,9 @@ """office command helpers. -This module implements SourceOS Office Plane planning plus the first guarded -local execution slice. Dry-run remains the default. File-writing behavior is -available only behind --execute --policy-ok, writes only to explicit output -roots, and emits OfficeArtifactEvidence-compatible JSON. +This module implements SourceOS Office Plane planning plus guarded local +execution. Dry-run remains the default. File-writing behavior is available +only behind --execute --policy-ok, writes only to explicit output roots, and +emits OfficeArtifactEvidence-compatible JSON. """ from __future__ import annotations @@ -20,6 +20,8 @@ from pathlib import Path from typing import Any, Dict, Optional +from sourceosctl.commands.ooxml import OOXML_GENERATION_FORMATS, write_ooxml_artifact + DEFAULT_WORKROOM_ID = "workroom-local-default" DEFAULT_OUTPUT_ROOT = "~/Documents/SourceOS/agent-output" @@ -63,6 +65,7 @@ ] TEXT_GENERATION_FORMATS = {"txt", "md", "json"} +GUARDED_GENERATION_FORMATS = TEXT_GENERATION_FORMATS | OOXML_GENERATION_FORMATS DEFAULT_BACKEND_BY_MODE = { "local-headless": "libreoffice", @@ -344,7 +347,7 @@ def plan(args) -> int: def generate(args) -> int: - """Render or execute a guarded text/json/markdown generation plan.""" + """Render or execute guarded text/json/OOXML generation.""" execute = bool(getattr(args, "execute", False)) payload = _artifact_plan(args, "generate") payload["templateRef"] = getattr(args, "template", None) @@ -361,9 +364,9 @@ def generate(args) -> int: return 1 fmt = payload["officeArtifact"]["format"] - if fmt not in TEXT_GENERATION_FORMATS: + if fmt not in GUARDED_GENERATION_FORMATS: print( - "error: guarded generation currently supports only txt, md, or json; use convert for Office binary formats", + "error: guarded generation currently supports txt, md, json, docx, xlsx, and pptx; use convert or backend adapters for other formats", file=sys.stderr, ) return 1 @@ -372,7 +375,8 @@ def generate(args) -> int: output_path.parent.mkdir(parents=True, exist_ok=True) if fmt == "json": output_path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8") - else: + notes = "sourceosctl guarded JSON Office Plane artifact generation" + elif fmt in TEXT_GENERATION_FORMATS: output_path.write_text( f"# {payload['officeArtifact']['title']}\n\n" "Generated by sourceosctl Office Plane guarded execution.\n\n" @@ -380,8 +384,24 @@ def generate(args) -> int: f"Artifact: {payload['officeArtifact']['artifactId']}\n", encoding="utf-8", ) + notes = "sourceosctl guarded text/Markdown Office Plane artifact generation" + else: + write_ooxml_artifact( + fmt=fmt, + path=output_path, + title=payload["officeArtifact"]["title"], + workroom_id=payload["officeArtifact"]["workroomId"], + artifact_id=payload["officeArtifact"]["artifactId"], + ) + notes = "sourceosctl guarded minimal OOXML Office Plane artifact generation" - evidence = _build_evidence(plan=payload, operation="generate", status="requires-review", output_path=output_path) + evidence = _build_evidence( + plan=payload, + operation="generate", + status="requires-review", + output_path=output_path, + notes=notes, + ) evidence_out = getattr(args, "evidence_out", None) if evidence_out: _write_json(evidence_out, evidence) diff --git a/sourceosctl/commands/ooxml.py b/sourceosctl/commands/ooxml.py new file mode 100644 index 0000000..04a28dc --- /dev/null +++ b/sourceosctl/commands/ooxml.py @@ -0,0 +1,192 @@ +"""Minimal OOXML artifact builders for SourceOS Office Plane. + +These helpers intentionally use only Python's standard library. They create +small, deterministic-enough DOCX/XLSX/PPTX ZIP packages for guarded local +artifact generation. They are not a replacement for LibreOffice, Collabora, +ONLYOFFICE, or a full template engine; they are the safe local bootstrap path +for simple agent-authored workroom artifacts. +""" + +from __future__ import annotations + +from html import escape +from pathlib import Path +from zipfile import ZIP_DEFLATED, ZipFile + + +OOXML_GENERATION_FORMATS = {"docx", "xlsx", "pptx"} + + +def _xml(text: str) -> str: + return escape(text, quote=True) + + +def _write_zip(path: Path, files: dict[str, str]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with ZipFile(path, "w", ZIP_DEFLATED) as zf: + for name in sorted(files): + zf.writestr(name, files[name]) + + +def write_ooxml_artifact( + *, + fmt: str, + path: Path, + title: str, + workroom_id: str, + artifact_id: str, +) -> None: + """Write a minimal OOXML artifact to path. + + Args: + fmt: one of docx, xlsx, pptx. + path: output path. + title: human-readable artifact title. + workroom_id: Professional Workroom id. + artifact_id: OfficeArtifact id. + """ + if fmt == "docx": + _write_docx(path=path, title=title, workroom_id=workroom_id, artifact_id=artifact_id) + return + if fmt == "xlsx": + _write_xlsx(path=path, title=title, workroom_id=workroom_id, artifact_id=artifact_id) + return + if fmt == "pptx": + _write_pptx(path=path, title=title, workroom_id=workroom_id, artifact_id=artifact_id) + return + raise ValueError(f"unsupported OOXML generation format: {fmt}") + + +def _write_docx(*, path: Path, title: str, workroom_id: str, artifact_id: str) -> None: + document = f''' + + + {_xml(title)} + Generated by SourceOS Office Plane guarded OOXML generation. + Workroom: {_xml(workroom_id)} + Artifact: {_xml(artifact_id)} + + + +''' + files = { + "[Content_Types].xml": ''' + + + + + +''', + "_rels/.rels": ''' + + + +''', + "word/document.xml": document, + } + _write_zip(path, files) + + +def _write_xlsx(*, path: Path, title: str, workroom_id: str, artifact_id: str) -> None: + rows = [ + ("Title", title), + ("Generated By", "SourceOS Office Plane guarded OOXML generation"), + ("Workroom", workroom_id), + ("Artifact", artifact_id), + ] + row_xml = [] + for idx, (key, value) in enumerate(rows, start=1): + row_xml.append( + f'''{_xml(key)}{_xml(value)}''' + ) + sheet = f''' + + + {''.join(row_xml)} + + +''' + files = { + "[Content_Types].xml": ''' + + + + + + +''', + "_rels/.rels": ''' + + + +''', + "xl/workbook.xml": ''' + + + +''', + "xl/_rels/workbook.xml.rels": ''' + + + +''', + "xl/worksheets/sheet1.xml": sheet, + } + _write_zip(path, files) + + +def _write_pptx(*, path: Path, title: str, workroom_id: str, artifact_id: str) -> None: + slide = f''' + + + + + + + + + {_xml(title)} + + + + + + Generated by SourceOS Office Plane guarded OOXML generation. + Workroom: {_xml(workroom_id)} + Artifact: {_xml(artifact_id)} + + + + + + +''' + files = { + "[Content_Types].xml": ''' + + + + + + +''', + "_rels/.rels": ''' + + + +''', + "ppt/presentation.xml": ''' + + + + + +''', + "ppt/_rels/presentation.xml.rels": ''' + + + +''', + "ppt/slides/slide1.xml": slide, + } + _write_zip(path, files) diff --git a/tests/test_office_cli.py b/tests/test_office_cli.py index 4ce7de5..7e67545 100644 --- a/tests/test_office_cli.py +++ b/tests/test_office_cli.py @@ -6,6 +6,7 @@ import sys import tempfile import unittest +import zipfile _REPO_ROOT = pathlib.Path(__file__).parent.parent sys.path.insert(0, str(_REPO_ROOT)) @@ -39,6 +40,13 @@ def _office_args(**overrides): return _Args(**values) +def _assert_zip_contains(path, members): + with zipfile.ZipFile(path, "r") as archive: + names = set(archive.namelist()) + for member in members: + assert member in names + + class TestOfficeCommands(unittest.TestCase): def test_office_doctor_direct(self): result = office.doctor(_Args()) @@ -91,12 +99,12 @@ def test_office_generate_execute_requires_policy_ok(self): ) self.assertEqual(office.generate(args), 1) - def test_office_generate_execute_rejects_binary_formats(self): + def test_office_generate_execute_rejects_unsupported_binary_formats(self): with tempfile.TemporaryDirectory() as tmpdir: args = _office_args( execute=True, policy_ok=True, - format="docx", + format="odt", output_root=tmpdir, template=None, prompt_ref=None, @@ -130,6 +138,83 @@ def test_office_generate_execute_writes_markdown_and_evidence(self): self.assertEqual(evidence["kind"], "OfficeArtifactEvidence") self.assertEqual(evidence["operation"], "generate") self.assertEqual(evidence["status"], "requires-review") + self.assertTrue(evidence["artifactHashes"][0]["sha256"].startswith("sha256:")) + + def test_office_generate_execute_writes_docx_package_and_evidence(self): + with tempfile.TemporaryDirectory() as tmpdir: + evidence_path = os.path.join(tmpdir, "evidence", "docx.json") + rc = main([ + "office", + "generate", + "--execute", + "--policy-ok", + "--artifact-type", + "document", + "--format", + "docx", + "--title", + "Safe Doc", + "--output-root", + tmpdir, + "--evidence-out", + evidence_path, + ]) + self.assertEqual(rc, 0) + output_path = os.path.join(tmpdir, "safe-doc.docx") + self.assertTrue(os.path.exists(output_path)) + _assert_zip_contains(output_path, ["[Content_Types].xml", "_rels/.rels", "word/document.xml"]) + with open(evidence_path, "r", encoding="utf-8") as handle: + evidence = json.load(handle) + self.assertEqual(evidence["format"], "docx") + self.assertEqual(evidence["artifactHashes"][0]["mimeType"], "application/vnd.openxmlformats-officedocument.wordprocessingml.document") + + def test_office_generate_execute_writes_xlsx_package(self): + with tempfile.TemporaryDirectory() as tmpdir: + rc = main([ + "office", + "generate", + "--execute", + "--policy-ok", + "--artifact-type", + "spreadsheet", + "--format", + "xlsx", + "--title", + "Safe Sheet", + "--output-root", + tmpdir, + ]) + self.assertEqual(rc, 0) + output_path = os.path.join(tmpdir, "safe-sheet.xlsx") + self.assertTrue(os.path.exists(output_path)) + _assert_zip_contains( + output_path, + ["[Content_Types].xml", "_rels/.rels", "xl/workbook.xml", "xl/worksheets/sheet1.xml"], + ) + + def test_office_generate_execute_writes_pptx_package(self): + with tempfile.TemporaryDirectory() as tmpdir: + rc = main([ + "office", + "generate", + "--execute", + "--policy-ok", + "--artifact-type", + "slide-deck", + "--format", + "pptx", + "--title", + "Safe Deck", + "--output-root", + tmpdir, + ]) + self.assertEqual(rc, 0) + output_path = os.path.join(tmpdir, "safe-deck.pptx") + self.assertTrue(os.path.exists(output_path)) + _assert_zip_contains( + output_path, + ["[Content_Types].xml", "_rels/.rels", "ppt/presentation.xml", "ppt/slides/slide1.xml"], + ) def test_office_generate_execute_rejects_whole_home_output_root(self): args = _office_args(