From 54e3eb019dffc7b2b13281d626c14cfbd8144e25 Mon Sep 17 00:00:00 2001 From: cyber-ayi <259769279+cyber-ayi@users.noreply.github.com> Date: Wed, 3 Jun 2026 20:30:24 -0700 Subject: [PATCH] =?UTF-8?q?feat(logbook):=20T3=20pipeline=20=E2=80=94=20wr?= =?UTF-8?q?iter/reader,=20tier=20routing,=20salient=20push?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The record's capture-layer mechanism (not its judgment): validate entries against logbook/schema/entry.schema.yaml, route by visibility tier, enforce the redaction gate, and push salient entries transport-not-brain. Closes the T3 acceptance in TASKS.md / issue #26. Language: Python — the pipeline touches the private record so it runs local at runtime (AGENTS.md rule 6) and co-locates with the Python L3 (ADR-0006). The TS harness stays on the world-facing side of the bridge seam. - schema.py: parse the entry.schema.yaml DSL into a typed SchemaSpec, so the validator binds to the file rather than a hand-copied field list. - entry.py / reader.py: Entry model + YAML-frontmatter parse/serialize; the synthetic sample round-trips (semantic + idempotent; body verbatim). - validate.py: structural validation, schema defaults applied (visibility -> private, redaction_checked -> false); tz-aware datetime required. - store.py: visibility tier -> on-disk path. Defaults OUTSIDE the repo (~/.commonplace/logbook), env-overridable (Obsidian vault); a stray entry can never be committed (AGENTS.md rule 2 / ADR-0005). - writer.py: validate -> redaction gate -> tier-routed write. Refuses to emit shareable/narrative without redaction_checked: true (RedactionRequiredError), even on dry-run. Content-addressed filenames -> idempotent re-emit. - salient.py: transport-not-brain push sinks (AstrBot / Discord-webhook fallback / Null default). No salience logic, LLM, or filtering here — that is the drive layer (T4/T5). Channel per exploration/gateway-selection.md. - cli.py: validate / route / emit. - logbook-ci.yml: a signal (not a hard gate, per adr-0001), 100% pipeline surface. A test asserts the repo tree holds NO real *.entry.md. Verified: pytest 89 passed, 100% line+branch coverage; editable install + console script smoke green; sample round-trips; no entries created in the repo. Session-Id: 019e909f-788f-7c38-bce3-26c9decd24ec Agent: cc-rc-bot Co-authored-by: cyber-ayi <259769279+cyber-ayi@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) --- .github/workflows/logbook-ci.yml | 42 ++++++ .gitignore | 1 + logbook/pipeline/README.md | 83 +++++++++++ logbook/pipeline/pyproject.toml | 45 ++++++ .../src/commonplace_logbook/__init__.py | 66 +++++++++ .../pipeline/src/commonplace_logbook/cli.py | 62 ++++++++ .../pipeline/src/commonplace_logbook/entry.py | 125 ++++++++++++++++ .../src/commonplace_logbook/errors.py | 36 +++++ .../src/commonplace_logbook/reader.py | 27 ++++ .../src/commonplace_logbook/salient.py | 140 ++++++++++++++++++ .../src/commonplace_logbook/schema.py | 129 ++++++++++++++++ .../pipeline/src/commonplace_logbook/store.py | 66 +++++++++ .../src/commonplace_logbook/validate.py | 117 +++++++++++++++ .../src/commonplace_logbook/writer.py | 61 ++++++++ logbook/pipeline/tests/conftest.py | 40 +++++ logbook/pipeline/tests/test_cli.py | 82 ++++++++++ logbook/pipeline/tests/test_entry.py | 60 ++++++++ .../pipeline/tests/test_no_real_entries.py | 37 +++++ logbook/pipeline/tests/test_roundtrip.py | 46 ++++++ logbook/pipeline/tests/test_salient.py | 113 ++++++++++++++ logbook/pipeline/tests/test_schema.py | 110 ++++++++++++++ logbook/pipeline/tests/test_store.py | 70 +++++++++ logbook/pipeline/tests/test_validate.py | 138 +++++++++++++++++ logbook/pipeline/tests/test_writer.py | 67 +++++++++ 24 files changed, 1763 insertions(+) create mode 100644 .github/workflows/logbook-ci.yml create mode 100644 logbook/pipeline/README.md create mode 100644 logbook/pipeline/pyproject.toml create mode 100644 logbook/pipeline/src/commonplace_logbook/__init__.py create mode 100644 logbook/pipeline/src/commonplace_logbook/cli.py create mode 100644 logbook/pipeline/src/commonplace_logbook/entry.py create mode 100644 logbook/pipeline/src/commonplace_logbook/errors.py create mode 100644 logbook/pipeline/src/commonplace_logbook/reader.py create mode 100644 logbook/pipeline/src/commonplace_logbook/salient.py create mode 100644 logbook/pipeline/src/commonplace_logbook/schema.py create mode 100644 logbook/pipeline/src/commonplace_logbook/store.py create mode 100644 logbook/pipeline/src/commonplace_logbook/validate.py create mode 100644 logbook/pipeline/src/commonplace_logbook/writer.py create mode 100644 logbook/pipeline/tests/conftest.py create mode 100644 logbook/pipeline/tests/test_cli.py create mode 100644 logbook/pipeline/tests/test_entry.py create mode 100644 logbook/pipeline/tests/test_no_real_entries.py create mode 100644 logbook/pipeline/tests/test_roundtrip.py create mode 100644 logbook/pipeline/tests/test_salient.py create mode 100644 logbook/pipeline/tests/test_schema.py create mode 100644 logbook/pipeline/tests/test_store.py create mode 100644 logbook/pipeline/tests/test_validate.py create mode 100644 logbook/pipeline/tests/test_writer.py diff --git a/.github/workflows/logbook-ci.yml b/.github/workflows/logbook-ci.yml new file mode 100644 index 0000000..bb479b5 --- /dev/null +++ b/.github/workflows/logbook-ci.yml @@ -0,0 +1,42 @@ +name: logbook-ci + +# Automated acceptance for the Python logbook pipeline (the record layer's +# writer/reader): schema validation, tier routing, the redaction gate, and the +# transport-not-brain salient push. Round-trips the synthetic sample and asserts +# the pipeline creates NO real entries in the repo (AGENTS.md rule 2 / ADR-0005). +# +# Runs on EVERY pull request as a signal (no paths filter). Like harness-ci it is +# NOT a hard ruleset-required check — cc waits for green before self-merging, and +# CI checks are signals not gates here (adr-0001, matching homelab-ops). pip is +# cached; the only dependency is PyYAML. + +on: + pull_request: + push: + branches: [main] + +permissions: + contents: read + +concurrency: + group: logbook-ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + logbook-ci: + name: logbook-ci + runs-on: ubuntu-latest + defaults: + run: + working-directory: logbook/pipeline + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: logbook/pipeline/pyproject.toml + - run: python -m pip install --upgrade pip + - run: pip install -e ".[dev]" + - name: test + coverage gate (100% pipeline surface) + run: pytest --cov --cov-report=term-missing diff --git a/.gitignore b/.gitignore index f216d1f..ee54e91 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,7 @@ __pycache__/ *.pyo dist/ build/ +*.egg-info/ .venv/ venv/ target/ diff --git a/logbook/pipeline/README.md b/logbook/pipeline/README.md new file mode 100644 index 0000000..05a3d4a --- /dev/null +++ b/logbook/pipeline/README.md @@ -0,0 +1,83 @@ +# logbook pipeline (T3) + +Writer/reader for logbook entries — the **mechanism** of the record's capture +layer. It validates against [`../schema/entry.schema.yaml`](../schema/entry.schema.yaml), +routes by visibility tier, enforces the redaction gate, and offers a +transport-not-brain salient-push hook. + +It provides the mechanism only. *What* to write and *what is salient* are the +drive layer's judgment (T4/T5), never this package's. + +## Language + +Python. The pipeline touches the **private record**, so it runs on a local model +host at runtime (AGENTS.md rule 6) and co-locates with the Python L3 cognition +layer (ADR-0006). The TypeScript harness (L2) stays on the world-facing side of +the JSON bridge seam; this is the record-facing side. + +## Discipline this enforces + +- **Visibility tiers** (AGENTS.md rule 3): every entry defaults to `private`. + The writer **refuses** to emit a `shareable`/`narrative` entry without + `redaction_checked: true` — a publication gate, raised as + `RedactionRequiredError`. Validation and the gate are separate: a `shareable` + entry with the flag unset is *well formed* but *not emittable*. +- **The record never enters this repo** (AGENTS.md rule 2 / ADR-0005). The store + roots **outside** the working tree by default (`~/.commonplace/logbook`), so a + stray entry can never be committed. The test suite asserts the repo tree holds + no `*.entry.md`. +- **Transport, not brain** (`exploration/gateway-selection.md`): the salient-push + sinks carry an already-chosen entry to the operator's channel; they contain no + salience logic, LLM call, or filtering. + +## Layout + +``` +src/commonplace_logbook/ + schema.py parse entry.schema.yaml (a small DSL) into a typed SchemaSpec + entry.py Entry model + markdown frontmatter parse/serialize + validate.py structural validation; applies schema defaults + store.py visibility-tier -> on-disk path (env-overridable, out-of-repo) + writer.py validate -> redaction gate -> tier-routed write + reader.py read/loads an entry, validating against the schema + salient.py transport-not-brain push sinks (AstrBot / Discord / Null) + cli.py validate / route / emit +``` + +## Usage + +```python +from commonplace_logbook import read_entry, write_entry, Entry, salient_push + +entry = read_entry("note.entry.md") # parse + validate +result = write_entry(entry) # validate, gate, route by tier +salient_push(entry, note="felt worth saying") # only if the agent judged it salient +``` + +CLI: + +```sh +commonplace-logbook validate note.entry.md # schema check +commonplace-logbook route note.entry.md # show tier + target path (no write) +commonplace-logbook emit note.entry.md # validate, gate, write to the store +``` + +## Configuration (env) + +| Variable | Purpose | Default | +|---|---|---| +| `COMMONPLACE_LOGBOOK_HOME` | store root for all tiers | `~/.commonplace/logbook` | +| `COMMONPLACE_OBSIDIAN_VAULT` | if set, `private` entries route here | unset | +| `COMMONPLACE_LOGBOOK_SCHEMA` | override the schema path | in-tree `../schema` | +| `COMMONPLACE_ASTRBOT_ENDPOINT` / `_TARGET` / `_TOKEN` | AstrBot push channel | unset | +| `COMMONPLACE_DISCORD_WEBHOOK` | thin Discord-webhook fallback channel | unset | + +With no channel configured, the push hook resolves to a `NullSink` (no network), +so it is safe to call unconditionally. + +## Develop + +```sh +pip install -e ".[dev]" +pytest --cov --cov-report=term-missing # 100% pipeline surface, mirrors logbook-ci +``` diff --git a/logbook/pipeline/pyproject.toml b/logbook/pipeline/pyproject.toml new file mode 100644 index 0000000..2eb1ea3 --- /dev/null +++ b/logbook/pipeline/pyproject.toml @@ -0,0 +1,45 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[project] +name = "commonplace-logbook" +version = "0.1.0" +description = "Writer/reader for commonplace logbook entries — schema validation, visibility-tier routing, transport-not-brain salient push." +readme = "README.md" +requires-python = ">=3.9" +license = { text = "AGPL-3.0-only" } +authors = [{ name = "commonplace dyad" }] +dependencies = [ + "PyYAML>=6.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8", + "pytest-cov>=5", +] + +[project.scripts] +commonplace-logbook = "commonplace_logbook.cli:main" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +pythonpath = ["src"] +testpaths = ["tests"] +addopts = "-ra" + +[tool.coverage.run] +branch = true +source = ["commonplace_logbook"] + +[tool.coverage.report] +show_missing = true +fail_under = 100 +exclude_lines = [ + "pragma: no cover", + "if TYPE_CHECKING:", + "raise NotImplementedError", +] diff --git a/logbook/pipeline/src/commonplace_logbook/__init__.py b/logbook/pipeline/src/commonplace_logbook/__init__.py new file mode 100644 index 0000000..79b80d7 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/__init__.py @@ -0,0 +1,66 @@ +"""commonplace logbook pipeline — writer/reader, tier routing, salient push. + +Validates against ``logbook/schema/entry.schema.yaml``; defaults every entry to +the ``private`` tier and refuses to emit ``shareable``/``narrative`` without +``redaction_checked: true`` (AGENTS.md rule 3). It provides the *mechanism* for +writing and pushing entries — never the judgment of what to write or what is +salient (that is the drive layer, T4/T5). +""" + +from __future__ import annotations + +from .entry import Entry, parse_markdown, to_markdown +from .errors import ( + EntryFormatError, + LogbookError, + RedactionRequiredError, + SchemaError, + ValidationError, +) +from .reader import loads, read_entry +from .salient import ( + AstrBotSink, + DiscordWebhookSink, + NullSink, + SalientSink, + format_message, + salient_push, + sink_from_env, +) +from .schema import SchemaSpec, load_schema +from .store import NARRATIVE, PRIVATE, SHAREABLE, StoreConfig, entry_filename +from .validate import validate_entry, validate_payload +from .writer import WriteResult, write_entry + +__all__ = [ + "Entry", + "parse_markdown", + "to_markdown", + "LogbookError", + "SchemaError", + "ValidationError", + "RedactionRequiredError", + "EntryFormatError", + "loads", + "read_entry", + "load_schema", + "SchemaSpec", + "validate_entry", + "validate_payload", + "write_entry", + "WriteResult", + "StoreConfig", + "entry_filename", + "PRIVATE", + "SHAREABLE", + "NARRATIVE", + "salient_push", + "sink_from_env", + "format_message", + "SalientSink", + "NullSink", + "AstrBotSink", + "DiscordWebhookSink", +] + +__version__ = "0.1.0" diff --git a/logbook/pipeline/src/commonplace_logbook/cli.py b/logbook/pipeline/src/commonplace_logbook/cli.py new file mode 100644 index 0000000..20fc167 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/cli.py @@ -0,0 +1,62 @@ +"""Thin CLI over the pipeline: validate / route / emit. + +Reads an entry's markdown from a path or stdin (``-``). Intended for local use; +``emit`` routes to the (out-of-repo) private store by default. +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import List, Optional + +from .errors import LogbookError +from .reader import loads +from .writer import write_entry + + +def _read_source(src: str) -> str: + if src == "-": + return sys.stdin.read() + return Path(src).read_text(encoding="utf-8") + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser(prog="commonplace-logbook", description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_val = sub.add_parser("validate", help="validate an entry against the schema") + p_val.add_argument("source", help="path to an entry .md, or '-' for stdin") + + p_route = sub.add_parser("route", help="show the tier + target path (no write)") + p_route.add_argument("source", help="path to an entry .md, or '-' for stdin") + + p_emit = sub.add_parser("emit", help="validate, gate, and write to the routed store") + p_emit.add_argument("source", help="path to an entry .md, or '-' for stdin") + p_emit.add_argument("--dry-run", action="store_true", help="route but do not write") + + args = parser.parse_args(argv) + + try: + entry = loads(_read_source(args.source)) + if args.cmd == "validate": + print(f"ok: valid {entry.visibility} {entry.type} entry") + return 0 + if args.cmd == "route": + result = write_entry(entry, dry_run=True) + print(f"{result.tier}\t{result.path}") + return 0 + if args.cmd == "emit": + result = write_entry(entry, dry_run=args.dry_run) + verb = "would write" if args.dry_run else "wrote" + print(f"{verb} [{result.tier}] {result.path}") + return 0 + except LogbookError as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + return 2 # pragma: no cover - argparse enforces a subcommand + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(main()) diff --git a/logbook/pipeline/src/commonplace_logbook/entry.py b/logbook/pipeline/src/commonplace_logbook/entry.py new file mode 100644 index 0000000..17f8647 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/entry.py @@ -0,0 +1,125 @@ +"""The :class:`Entry` model and the on-disk markdown serialization. + +An entry is YAML frontmatter (the typed fields) plus a markdown ``body``, +matching ``logbook/schema/sample.redacted.md``:: + + --- + visibility: private + ts: 2026-06-03T14:22:00-02:30 + type: reflection + actor: agent + --- + + body markdown here +""" + +from __future__ import annotations + +import datetime as _dt +from dataclasses import dataclass, field +from typing import Any, Dict + +import yaml + +from .errors import EntryFormatError + +_DELIM = "---" + + +@dataclass +class Entry: + """A logbook entry: typed frontmatter ``data`` + markdown ``body``.""" + + data: Dict[str, Any] = field(default_factory=dict) + body: str = "" + + @property + def visibility(self) -> Any: + return self.data.get("visibility") + + @property + def ts(self) -> Any: + return self.data.get("ts") + + @property + def type(self) -> Any: + return self.data.get("type") + + @property + def actor(self) -> Any: + return self.data.get("actor") + + @property + def redaction_checked(self) -> bool: + return bool(self.data.get("redaction_checked", False)) + + def as_validation_payload(self) -> Dict[str, Any]: + """Flatten to ``{**frontmatter, body}`` for schema validation. + + ``body`` is a schema field that lives in the markdown section rather + than the frontmatter, so validation needs it folded back in. + """ + payload = dict(self.data) + payload["body"] = self.body + return payload + + def to_markdown(self) -> str: + return to_markdown(self) + + +def _yamlify(value: Any) -> Any: + """Coerce values into stable YAML scalars. + + Datetimes are emitted as ISO-8601 strings (with the ``T`` separator and the + original UTC offset) so the serialized form is deterministic and re-reads to + an equal datetime, rather than PyYAML's space-separated default. + """ + if isinstance(value, _dt.datetime): + return value.isoformat() + if isinstance(value, dict): + return {k: _yamlify(v) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_yamlify(v) for v in value] + return value + + +def to_markdown(entry: Entry) -> str: + front = {k: _yamlify(v) for k, v in entry.data.items()} + fm = yaml.safe_dump( + front, + sort_keys=False, + allow_unicode=True, + default_flow_style=False, + ) + if not fm.endswith("\n"): # pragma: no cover - safe_dump always ends with a newline + fm += "\n" + body = entry.body.strip("\n") + return f"{_DELIM}\n{fm}{_DELIM}\n\n{body}\n" + + +def parse_markdown(text: str) -> Entry: + lines = text.split("\n") + if not lines or lines[0].strip() != _DELIM: + raise EntryFormatError("entry must start with a '---' frontmatter delimiter") + + end = None + for i in range(1, len(lines)): + if lines[i].strip() == _DELIM: + end = i + break + if end is None: + raise EntryFormatError("unterminated frontmatter: missing closing '---'") + + fm_text = "\n".join(lines[1:end]) + body = "\n".join(lines[end + 1 :]).strip("\n") + + try: + data = yaml.safe_load(fm_text) if fm_text.strip() else {} + except yaml.YAMLError as exc: + raise EntryFormatError(f"frontmatter is not valid YAML: {exc}") from exc + if data is None: + data = {} + if not isinstance(data, dict): + raise EntryFormatError("frontmatter must be a YAML mapping") + + return Entry(data=data, body=body) diff --git a/logbook/pipeline/src/commonplace_logbook/errors.py b/logbook/pipeline/src/commonplace_logbook/errors.py new file mode 100644 index 0000000..1c05874 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/errors.py @@ -0,0 +1,36 @@ +"""Exceptions raised by the logbook pipeline.""" + +from __future__ import annotations + + +class LogbookError(Exception): + """Base class for all pipeline errors.""" + + +class SchemaError(LogbookError): + """The schema file itself is malformed or unsupported.""" + + +class ValidationError(LogbookError): + """An entry does not conform to ``entry.schema.yaml``. + + ``problems`` is the list of human-readable field-level failures. + """ + + def __init__(self, problems: list[str]): + self.problems = list(problems) + super().__init__("; ".join(self.problems) if self.problems else "invalid entry") + + +class RedactionRequiredError(LogbookError): + """Refused to emit a ``shareable``/``narrative`` entry without ``redaction_checked: true``. + + AGENTS.md rule 3: tooling defaults to ``private`` and must refuse to emit a + non-private tier until a redaction pass is explicitly recorded on the entry. + This is a publication gate, not a validation failure — the entry is well + formed; it simply may not leave the private tier yet. + """ + + +class EntryFormatError(LogbookError): + """A serialized entry could not be parsed (missing/garbled frontmatter).""" diff --git a/logbook/pipeline/src/commonplace_logbook/reader.py b/logbook/pipeline/src/commonplace_logbook/reader.py new file mode 100644 index 0000000..fa3b131 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/reader.py @@ -0,0 +1,27 @@ +"""Read entries back from disk (or a string), validating against the schema.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional, Union + +from .entry import Entry, parse_markdown +from .schema import SchemaSpec, load_schema +from .validate import validate_entry + + +def loads(text: str, *, schema: Optional[SchemaSpec] = None, validate: bool = True) -> Entry: + entry = parse_markdown(text) + if validate: + entry = validate_entry(entry, schema if schema is not None else load_schema()) + return entry + + +def read_entry( + path: Union[str, Path], + *, + schema: Optional[SchemaSpec] = None, + validate: bool = True, +) -> Entry: + text = Path(path).read_text(encoding="utf-8") + return loads(text, schema=schema, validate=validate) diff --git a/logbook/pipeline/src/commonplace_logbook/salient.py b/logbook/pipeline/src/commonplace_logbook/salient.py new file mode 100644 index 0000000..a36e0bd --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/salient.py @@ -0,0 +1,140 @@ +"""Salient-push hook — **transport, not brain**. + +The agent's cognition (L3 / the drive layer) decides *whether* an entry is +salient; this module only *carries* an already-chosen entry to the operator's +async channel. There is deliberately no salience scoring, LLM call, or filtering +here — adding any would duplicate the brain in the transport, the exact failure +``exploration/gateway-selection.md`` warns against. + +Channel (converged in that note): **AstrBot over Discord, two-way**. A direct +Discord webhook is the documented thin fallback. The sink is pluggable; the +default :class:`NullSink` is a no-op so importing the pipeline never reaches the +network. + +Note on visibility: the push targets the operator's *own* dyad channel, not a +public dataset, so the redaction gate (a publication control) does not apply +here. A private entry may be surfaced to the operator; it is never published. +""" + +from __future__ import annotations + +import json +import os +import urllib.request +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional + +from .entry import Entry + +# transport(url, payload_bytes, headers) -> status_code +Transport = Callable[[str, bytes, Dict[str, str]], int] + +_BODY_EXCERPT = 280 + + +def format_message(entry: Entry, note: Optional[str] = None) -> str: + """Render a compact operator-facing notification. Formatting only.""" + head = f"[{entry.visibility}] {entry.type} · {entry.ts}" + body = (entry.body or "").strip() + if len(body) > _BODY_EXCERPT: + body = body[: _BODY_EXCERPT - 1].rstrip() + "…" + parts: List[str] = [head] + if note: + parts.append(note) + curiosity = entry.data.get("curiosity") or {} + question = curiosity.get("question") if isinstance(curiosity, dict) else None + if question: + parts.append(f"Q: {question}") + if body: + parts.append(body) + return "\n\n".join(parts) + + +def _urllib_post(url: str, payload: bytes, headers: Dict[str, str]) -> int: # pragma: no cover - network + req = urllib.request.Request(url, data=payload, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=10) as resp: + return resp.getcode() + + +class SalientSink: + """Interface: carry an entry to the operator. Implementations transport only.""" + + def push(self, entry: Entry, note: Optional[str] = None) -> None: + raise NotImplementedError + + +@dataclass +class NullSink(SalientSink): + """Default no-op sink. Records pushes in-memory for observability/tests.""" + + pushed: List[Dict[str, Any]] = field(default_factory=list) + + def push(self, entry: Entry, note: Optional[str] = None) -> None: + self.pushed.append({"entry": entry, "note": note, "message": format_message(entry, note)}) + + +@dataclass +class AstrBotSink(SalientSink): + """POST a message to an AstrBot inbound endpoint (plugin bridge / webhook). + + AstrBot is pure I/O here: its own LLM/persona pipeline is bypassed. The + payload is just the rendered message plus a routing target. + """ + + endpoint: str + target: Optional[str] = None + token: Optional[str] = None + transport: Transport = _urllib_post + + def push(self, entry: Entry, note: Optional[str] = None) -> None: + payload = {"message": format_message(entry, note), "visibility": entry.visibility} + if self.target: + payload["target"] = self.target + headers = {"Content-Type": "application/json"} + if self.token: + headers["Authorization"] = f"Bearer {self.token}" + self.transport(self.endpoint, json.dumps(payload).encode("utf-8"), headers) + + +@dataclass +class DiscordWebhookSink(SalientSink): + """Documented thin fallback: a plain Discord webhook (no discord.py needed).""" + + webhook_url: str + transport: Transport = _urllib_post + + def push(self, entry: Entry, note: Optional[str] = None) -> None: + payload = {"content": format_message(entry, note)} + headers = {"Content-Type": "application/json"} + self.transport(self.webhook_url, json.dumps(payload).encode("utf-8"), headers) + + +def sink_from_env(environ: Optional[dict] = None) -> SalientSink: + """Pick a sink from env config; :class:`NullSink` if no channel is configured.""" + environ = environ if environ is not None else os.environ + endpoint = environ.get("COMMONPLACE_ASTRBOT_ENDPOINT") + if endpoint: + return AstrBotSink( + endpoint=endpoint, + target=environ.get("COMMONPLACE_ASTRBOT_TARGET"), + token=environ.get("COMMONPLACE_ASTRBOT_TOKEN"), + ) + webhook = environ.get("COMMONPLACE_DISCORD_WEBHOOK") + if webhook: + return DiscordWebhookSink(webhook_url=webhook) + return NullSink() + + +def salient_push( + entry: Entry, + *, + sink: Optional[SalientSink] = None, + note: Optional[str] = None, +) -> None: + """The hook the agent calls when **it** has judged an entry salient. + + Pure transport: no salience decision is made here. ``sink`` defaults to the + env-configured channel (NullSink if none), so this is safe to call + unconditionally in environments without a channel. + """ + (sink if sink is not None else sink_from_env()).push(entry, note) diff --git a/logbook/pipeline/src/commonplace_logbook/schema.py b/logbook/pipeline/src/commonplace_logbook/schema.py new file mode 100644 index 0000000..de3ded2 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/schema.py @@ -0,0 +1,129 @@ +"""Load and model ``logbook/schema/entry.schema.yaml``. + +The schema file is a small hand-rolled DSL (not JSON Schema): a ``required`` and +an ``optional`` block, each mapping a field name to a spec with a ``type`` and, +depending on the type, ``values`` (enum), ``default``, or nested ``fields`` +(object). This module parses that DSL into :class:`SchemaSpec` so the validator +binds to the *file*, never to hand-copied field lists that could drift from it. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +import yaml + +from .errors import SchemaError + +SCALAR_TYPES = frozenset({"enum", "datetime", "markdown", "bool", "string"}) +SUPPORTED_VERSION = "commonplace/logbook-entry/v0.1" + +# The field that lives in the markdown body, not the frontmatter. +BODY_FIELD = "body" + +_UNSET = object() + + +@dataclass(frozen=True) +class FieldSpec: + name: str + type: str + required: bool + values: Optional[List[str]] = None + default: Any = _UNSET + fields: Dict[str, "FieldSpec"] = field(default_factory=dict) + + @property + def has_default(self) -> bool: + return self.default is not _UNSET + + +@dataclass(frozen=True) +class SchemaSpec: + version: str + fields: Dict[str, FieldSpec] + + @property + def frontmatter_fields(self) -> Dict[str, FieldSpec]: + return {n: f for n, f in self.fields.items() if n != BODY_FIELD} + + +def default_schema_path() -> Path: + """Resolve the canonical schema path. + + Honors ``COMMONPLACE_LOGBOOK_SCHEMA`` if set, else the in-tree + ``logbook/schema/entry.schema.yaml`` relative to this package. + """ + override = os.environ.get("COMMONPLACE_LOGBOOK_SCHEMA") + if override: + return Path(override).expanduser() + # src/commonplace_logbook/schema.py -> logbook/schema/entry.schema.yaml + return Path(__file__).resolve().parents[3] / "schema" / "entry.schema.yaml" + + +def _parse_field(name: str, raw: Any, *, required: bool) -> FieldSpec: + if not isinstance(raw, dict): + raise SchemaError(f"field {name!r}: expected a mapping, got {type(raw).__name__}") + ftype = raw.get("type") + if ftype not in SCALAR_TYPES and ftype != "object": + raise SchemaError(f"field {name!r}: unsupported type {ftype!r}") + + values = None + if ftype == "enum": + values = raw.get("values") + if not isinstance(values, list) or not values: + raise SchemaError(f"enum field {name!r}: requires a non-empty 'values' list") + values = [str(v) for v in values] + + subfields: Dict[str, FieldSpec] = {} + if ftype == "object": + raw_fields = raw.get("fields", {}) + if not isinstance(raw_fields, dict): + raise SchemaError(f"object field {name!r}: 'fields' must be a mapping") + # Object subfields are themselves optional unless re-declared required. + subfields = { + sub: _parse_field(f"{name}.{sub}", spec, required=False) + for sub, spec in raw_fields.items() + } + + default = raw["default"] if "default" in raw else _UNSET + return FieldSpec( + name=name, + type=ftype, + required=required, + values=values, + default=default, + fields=subfields, + ) + + +def load_schema(path: Optional[Path] = None) -> SchemaSpec: + schema_path = Path(path) if path is not None else default_schema_path() + if not schema_path.is_file(): + raise SchemaError(f"schema not found: {schema_path}") + raw = yaml.safe_load(schema_path.read_text(encoding="utf-8")) + if not isinstance(raw, dict): + raise SchemaError("schema root must be a mapping") + + version = raw.get("$schema") + if version != SUPPORTED_VERSION: + raise SchemaError( + f"unsupported schema version {version!r} (expected {SUPPORTED_VERSION!r})" + ) + + fields: Dict[str, FieldSpec] = {} + for block, is_required in (("required", True), ("optional", False)): + section = raw.get(block, {}) or {} + if not isinstance(section, dict): + raise SchemaError(f"'{block}' block must be a mapping") + for name, spec in section.items(): + if name in fields: + raise SchemaError(f"field {name!r} declared twice") + fields[name] = _parse_field(name, spec, required=is_required) + + if not fields: + raise SchemaError("schema declares no fields") + return SchemaSpec(version=version, fields=fields) diff --git a/logbook/pipeline/src/commonplace_logbook/store.py b/logbook/pipeline/src/commonplace_logbook/store.py new file mode 100644 index 0000000..55db50d --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/store.py @@ -0,0 +1,66 @@ +"""Visibility-tier routing to on-disk locations. + +The private record never lives in this repo (AGENTS.md rule 2 / ADR-0005). So +the default store roots **outside** the repo — ``~/.commonplace/logbook`` — and +every path is env-overridable for the operator's own setup (e.g. an Obsidian +vault). Nothing here writes into the working tree by default; a stray entry can +therefore never be committed. +""" + +from __future__ import annotations + +import datetime as _dt +import hashlib +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from .entry import Entry, to_markdown + +PRIVATE = "private" +SHAREABLE = "shareable" +NARRATIVE = "narrative" + +DEFAULT_HOME = Path("~/.commonplace/logbook") + + +@dataclass(frozen=True) +class StoreConfig: + home: Path + obsidian_vault: Optional[Path] = None + + @classmethod + def from_env(cls, environ: Optional[dict] = None) -> "StoreConfig": + environ = environ if environ is not None else os.environ + home = Path(environ.get("COMMONPLACE_LOGBOOK_HOME", str(DEFAULT_HOME))).expanduser() + vault_raw = environ.get("COMMONPLACE_OBSIDIAN_VAULT") + vault = Path(vault_raw).expanduser() if vault_raw else None + return cls(home=home, obsidian_vault=vault) + + def dir_for(self, tier: str) -> Path: + if tier == PRIVATE: + return self.obsidian_vault if self.obsidian_vault is not None else self.home / "entries" + if tier == SHAREABLE: + return self.home / "shareable" + if tier == NARRATIVE: + return self.home / "narrative" + raise ValueError(f"unknown visibility tier: {tier!r}") + + +def _ts_prefix(ts: object) -> str: + if isinstance(ts, _dt.datetime): + return ts.astimezone(_dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ") + return "undated" + + +def entry_filename(entry: Entry) -> str: + """Deterministic, sortable, collision-resistant filename for an entry. + + ``--.entry.md`` — content-addressed so + re-emitting the same entry overwrites in place (idempotent) rather than + accumulating duplicates. + """ + digest = hashlib.sha1(to_markdown(entry).encode("utf-8")).hexdigest()[:8] + etype = entry.type or "entry" + return f"{_ts_prefix(entry.ts)}-{etype}-{digest}.entry.md" diff --git a/logbook/pipeline/src/commonplace_logbook/validate.py b/logbook/pipeline/src/commonplace_logbook/validate.py new file mode 100644 index 0000000..d21bbb3 --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/validate.py @@ -0,0 +1,117 @@ +"""Validate an :class:`Entry` against the parsed schema. + +Validation is *structural*: required fields present, enum membership, datetime +parses with a timezone offset, object subfields well typed, no unknown keys. +Defaults declared in the schema (``visibility: private``, ``redaction_checked: +false``) are applied here. The redaction *gate* is deliberately NOT enforced +here — a ``shareable`` entry with ``redaction_checked: false`` is still well +formed; refusing to *emit* it is the writer's job (see :mod:`writer`). +""" + +from __future__ import annotations + +import datetime as _dt +from typing import Any, Dict, List, Tuple + +from .entry import Entry +from .errors import ValidationError +from .schema import FieldSpec, SchemaSpec, load_schema + + +def _parse_datetime(value: Any) -> Tuple[Any, List[str]]: + if isinstance(value, _dt.datetime): + dt = value + elif isinstance(value, str): + raw = value.strip() + if raw.endswith("Z"): + raw = raw[:-1] + "+00:00" + try: + dt = _dt.datetime.fromisoformat(raw) + except ValueError: + return None, [f"not a valid ISO-8601 datetime: {value!r}"] + else: + return None, [f"expected an ISO-8601 datetime string, got {type(value).__name__}"] + + if dt.tzinfo is None or dt.utcoffset() is None: + return None, ["datetime must carry a timezone offset (ISO-8601 with offset)"] + return dt, [] + + +def _check_field(spec: FieldSpec, value: Any) -> Tuple[Any, List[str]]: + prefix = spec.name + + if spec.type == "enum": + sval = value if isinstance(value, str) else str(value) + if sval not in (spec.values or []): + return None, [f"{prefix}: {value!r} is not one of {spec.values}"] + return sval, [] + + if spec.type == "datetime": + dt, errs = _parse_datetime(value) + return (dt, [f"{prefix}: {e}" for e in errs]) if errs else (dt, []) + + if spec.type in ("markdown", "string"): + if not isinstance(value, str): + return None, [f"{prefix}: expected a string, got {type(value).__name__}"] + return value, [] + + if spec.type == "bool": + if not isinstance(value, bool): + return None, [f"{prefix}: expected a bool, got {type(value).__name__}"] + return value, [] + + if spec.type == "object": + if not isinstance(value, dict): + return None, [f"{prefix}: expected a mapping, got {type(value).__name__}"] + normalized: Dict[str, Any] = {} + problems: List[str] = [] + for key in value: + if key not in spec.fields: + problems.append(f"{prefix}: unknown field {key!r}") + for sub_name, sub_spec in spec.fields.items(): + if sub_name not in value: + continue + norm, errs = _check_field(sub_spec, value[sub_name]) + problems.extend(errs) + if not errs: + normalized[sub_name] = norm + return (normalized, problems) if problems else (normalized, []) + + # Unreachable: schema loader rejects unknown types. + return None, [f"{prefix}: unsupported field type {spec.type!r}"] # pragma: no cover + + +def validate_payload(payload: Dict[str, Any], schema: SchemaSpec) -> Dict[str, Any]: + problems: List[str] = [] + normalized: Dict[str, Any] = {} + + for key in payload: + if key not in schema.fields: + problems.append(f"unknown field {key!r}") + + for name, spec in schema.fields.items(): + if name in payload: + norm, errs = _check_field(spec, payload[name]) + problems.extend(errs) + if not errs: + normalized[name] = norm + elif spec.has_default: + normalized[name] = spec.default + elif spec.required: + problems.append(f"missing required field {name!r}") + # else: optional, no default, absent -> simply omitted + + if problems: + raise ValidationError(sorted(problems)) + return normalized + + +def validate_entry(entry: Entry, schema: SchemaSpec = None) -> Entry: + """Return a normalized copy of ``entry`` (defaults applied, ts as datetime). + + Raises :class:`ValidationError` listing every structural problem found. + """ + schema = schema if schema is not None else load_schema() + normalized = validate_payload(entry.as_validation_payload(), schema) + body = normalized.pop("body", entry.body) + return Entry(data=normalized, body=body) diff --git a/logbook/pipeline/src/commonplace_logbook/writer.py b/logbook/pipeline/src/commonplace_logbook/writer.py new file mode 100644 index 0000000..c429e0a --- /dev/null +++ b/logbook/pipeline/src/commonplace_logbook/writer.py @@ -0,0 +1,61 @@ +"""Write an entry: validate, enforce the redaction gate, route by tier. + +The redaction gate (AGENTS.md rule 3) lives here: a ``shareable``/``narrative`` +entry is refused unless it carries ``redaction_checked: true``. ``private`` is +the default tier and is always emittable to the private store. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from .entry import Entry, to_markdown +from .errors import RedactionRequiredError +from .schema import SchemaSpec, load_schema +from .store import PRIVATE, StoreConfig, entry_filename +from .validate import validate_entry + +GATED_TIERS = ("shareable", "narrative") + + +@dataclass(frozen=True) +class WriteResult: + path: Path + tier: str + markdown: str + written: bool + + +def _assert_emittable(entry: Entry) -> None: + tier = entry.visibility or PRIVATE + if tier in GATED_TIERS and not entry.redaction_checked: + raise RedactionRequiredError( + f"refusing to emit a {tier!r} entry without redaction_checked: true " + "(AGENTS.md rule 3 — run the redaction pass and set the flag first)" + ) + + +def write_entry( + entry: Entry, + *, + schema: Optional[SchemaSpec] = None, + config: Optional[StoreConfig] = None, + dry_run: bool = False, +) -> WriteResult: + schema = schema if schema is not None else load_schema() + entry = validate_entry(entry, schema) + _assert_emittable(entry) + + config = config if config is not None else StoreConfig.from_env() + tier = entry.visibility or PRIVATE + target_dir = config.dir_for(tier) + path = target_dir / entry_filename(entry) + markdown = to_markdown(entry) + + if not dry_run: + target_dir.mkdir(parents=True, exist_ok=True) + path.write_text(markdown, encoding="utf-8") + + return WriteResult(path=path, tier=tier, markdown=markdown, written=not dry_run) diff --git a/logbook/pipeline/tests/conftest.py b/logbook/pipeline/tests/conftest.py new file mode 100644 index 0000000..12153d3 --- /dev/null +++ b/logbook/pipeline/tests/conftest.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import datetime as _dt +from pathlib import Path + +import pytest + +from commonplace_logbook import Entry, StoreConfig, load_schema + +REPO_LOGBOOK = Path(__file__).resolve().parents[2] +SAMPLE = REPO_LOGBOOK / "schema" / "sample.redacted.md" + + +@pytest.fixture(scope="session") +def schema(): + return load_schema() + + +@pytest.fixture +def sample_path() -> Path: + return SAMPLE + + +@pytest.fixture +def store(tmp_path) -> StoreConfig: + """A store rooted entirely in a tmp dir — never the repo.""" + return StoreConfig(home=tmp_path / "logbook") + + +def make_entry(visibility="private", redaction_checked=None, **extra) -> Entry: + data = { + "visibility": visibility, + "ts": _dt.datetime(2026, 6, 3, 14, 22, tzinfo=_dt.timezone.utc), + "type": "reflection", + "actor": "agent", + } + if redaction_checked is not None: + data["redaction_checked"] = redaction_checked + data.update(extra) + return Entry(data=data, body="a small note about going north.") diff --git a/logbook/pipeline/tests/test_cli.py b/logbook/pipeline/tests/test_cli.py new file mode 100644 index 0000000..e14c036 --- /dev/null +++ b/logbook/pipeline/tests/test_cli.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import pytest + +from commonplace_logbook.cli import main + + +def _write_entry_file(tmp_path, visibility="private", redaction=False): + text = ( + "---\n" + f"visibility: {visibility}\n" + "ts: 2026-06-03T14:22:00Z\n" + "type: reflection\n" + "actor: agent\n" + f"redaction_checked: {str(redaction).lower()}\n" + "---\n\nbody\n" + ) + p = tmp_path / "e.md" + p.write_text(text, encoding="utf-8") + return p + + +def test_validate_ok(tmp_path, capsys): + p = _write_entry_file(tmp_path) + assert main(["validate", str(p)]) == 0 + assert "ok: valid private" in capsys.readouterr().out + + +def test_validate_reports_error(tmp_path, capsys): + p = tmp_path / "bad.md" + p.write_text("---\nvisibility: public\n---\nbody\n", encoding="utf-8") + assert main(["validate", str(p)]) == 1 + assert "error:" in capsys.readouterr().err + + +def test_route_shows_tier_and_path(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("COMMONPLACE_LOGBOOK_HOME", str(tmp_path / "store")) + p = _write_entry_file(tmp_path) + assert main(["route", str(p)]) == 0 + out = capsys.readouterr().out + assert out.startswith("private\t") + assert "entries" in out + + +def test_emit_dry_run_writes_nothing(tmp_path, monkeypatch, capsys): + store = tmp_path / "store" + monkeypatch.setenv("COMMONPLACE_LOGBOOK_HOME", str(store)) + p = _write_entry_file(tmp_path) + assert main(["emit", str(p), "--dry-run"]) == 0 + assert "would write" in capsys.readouterr().out + assert not store.exists() + + +def test_emit_writes_to_store(tmp_path, monkeypatch, capsys): + store = tmp_path / "store" + monkeypatch.setenv("COMMONPLACE_LOGBOOK_HOME", str(store)) + p = _write_entry_file(tmp_path) + assert main(["emit", str(p)]) == 0 + assert "wrote [private]" in capsys.readouterr().out + assert list((store / "entries").glob("*.entry.md")) + + +def test_emit_gate_blocks_shareable(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("COMMONPLACE_LOGBOOK_HOME", str(tmp_path / "store")) + p = _write_entry_file(tmp_path, visibility="shareable", redaction=False) + assert main(["emit", str(p)]) == 1 + assert "redaction_checked" in capsys.readouterr().err + + +def test_stdin_source(monkeypatch, capsys): + text = "---\nvisibility: private\nts: 2026-06-03T14:22:00Z\ntype: action\nactor: dyad\n---\n\nb\n" + monkeypatch.setattr("sys.stdin", _FakeStdin(text)) + assert main(["validate", "-"]) == 0 + assert "ok: valid private action" in capsys.readouterr().out + + +class _FakeStdin: + def __init__(self, text): + self._text = text + + def read(self): + return self._text diff --git a/logbook/pipeline/tests/test_entry.py b/logbook/pipeline/tests/test_entry.py new file mode 100644 index 0000000..9346184 --- /dev/null +++ b/logbook/pipeline/tests/test_entry.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import pytest + +from commonplace_logbook import EntryFormatError, parse_markdown, to_markdown +from commonplace_logbook.entry import Entry + + +def test_no_leading_delimiter(): + with pytest.raises(EntryFormatError, match="must start with"): + parse_markdown("visibility: private\nbody") + + +def test_unterminated_frontmatter(): + with pytest.raises(EntryFormatError, match="unterminated"): + parse_markdown("---\nvisibility: private\nbody text") + + +def test_frontmatter_not_yaml(): + with pytest.raises(EntryFormatError, match="not valid YAML"): + parse_markdown("---\nfoo: [unclosed\n---\nbody") + + +def test_frontmatter_not_a_mapping(): + with pytest.raises(EntryFormatError, match="must be a YAML mapping"): + parse_markdown("---\n- a\n- b\n---\nbody") + + +def test_empty_frontmatter_yields_empty_data(): + entry = parse_markdown("---\n---\n\nbody here") + assert entry.data == {} + assert entry.body == "body here" + + +def test_blank_frontmatter_yields_empty_data(): + entry = parse_markdown("---\n\n---\n\nbody") + assert entry.data == {} + + +def test_to_markdown_method_matches_function(): + e = Entry(data={"visibility": "private"}, body="x") + assert e.to_markdown() == to_markdown(e) + + +def test_body_internal_blank_lines_preserved(): + e = Entry(data={"visibility": "private"}, body="para one\n\npara two") + assert parse_markdown(to_markdown(e)).body == "para one\n\npara two" + + +def test_yamlify_handles_lists(): + e = Entry(data={"tags": ["a", "b"], "visibility": "private"}, body="x") + md = to_markdown(e) + assert "- a" in md and "- b" in md + + +def test_null_frontmatter_yields_empty_data(): + # Non-blank frontmatter that parses to None (`null`) -> empty data. + entry = parse_markdown("---\nnull\n---\n\nbody") + assert entry.data == {} + assert entry.body == "body" diff --git a/logbook/pipeline/tests/test_no_real_entries.py b/logbook/pipeline/tests/test_no_real_entries.py new file mode 100644 index 0000000..5880d46 --- /dev/null +++ b/logbook/pipeline/tests/test_no_real_entries.py @@ -0,0 +1,37 @@ +"""Guard: the pipeline must never create a real entry inside this repo. + +AGENTS.md rule 2 / ADR-0005 — the record lives in a separate, gitignored repo. +The default store roots outside the working tree; this test asserts that and +that the test run left no ``*.entry.md`` anywhere under the repo. +""" + +from __future__ import annotations + +from pathlib import Path + +from commonplace_logbook import StoreConfig, write_entry + +from conftest import make_entry + +REPO_ROOT = Path(__file__).resolve().parents[3] + + +def test_default_store_root_is_outside_repo(): + home = StoreConfig.from_env(environ={}).home + assert REPO_ROOT not in home.parents + assert home != REPO_ROOT + + +def test_writing_uses_only_the_given_out_of_repo_config(tmp_path): + cfg = StoreConfig(home=tmp_path / "store") + result = write_entry(make_entry(), config=cfg) + assert tmp_path in result.path.parents + + +def test_repo_tree_contains_no_real_entries(): + offenders = [ + p + for p in REPO_ROOT.rglob("*.entry.md") + if ".git" not in p.parts + ] + assert offenders == [], f"real entries leaked into the repo: {offenders}" diff --git a/logbook/pipeline/tests/test_roundtrip.py b/logbook/pipeline/tests/test_roundtrip.py new file mode 100644 index 0000000..396c22d --- /dev/null +++ b/logbook/pipeline/tests/test_roundtrip.py @@ -0,0 +1,46 @@ +"""Acceptance: the synthetic sample round-trips with full fidelity.""" + +from __future__ import annotations + +import datetime as _dt + +from commonplace_logbook import loads, read_entry, to_markdown + + +def test_sample_reads_and_validates(sample_path): + entry = read_entry(sample_path) + assert entry.visibility == "shareable" + assert entry.type == "reflection" + assert entry.actor == "agent" + assert entry.redaction_checked is True + assert isinstance(entry.ts, _dt.datetime) + assert entry.ts.utcoffset() == _dt.timedelta(hours=-2, minutes=-30) + assert entry.data["curiosity"]["detour"] is True + assert "second clearing" in entry.body + + +def test_roundtrip_is_semantically_stable(sample_path): + original = read_entry(sample_path) + reparsed = loads(to_markdown(original)) + assert reparsed.data == original.data + assert reparsed.body == original.body + + +def test_roundtrip_serialization_is_idempotent(sample_path): + once = to_markdown(read_entry(sample_path)) + twice = to_markdown(loads(once)) + assert once == twice + + +def test_read_without_validation_skips_normalization(sample_path): + # validate=False -> raw parse, ts stays whatever YAML produced, no defaults. + raw = loads(sample_path.read_text(encoding="utf-8"), validate=False) + assert raw.visibility == "shareable" + assert "redaction_checked" in raw.data # present in this sample + + +def test_body_is_preserved_verbatim(sample_path): + raw = sample_path.read_text(encoding="utf-8") + # The body is everything after the closing frontmatter delimiter. + body_in_file = raw.split("---", 2)[2].strip("\n") + assert read_entry(sample_path).body == body_in_file diff --git a/logbook/pipeline/tests/test_salient.py b/logbook/pipeline/tests/test_salient.py new file mode 100644 index 0000000..0433dc8 --- /dev/null +++ b/logbook/pipeline/tests/test_salient.py @@ -0,0 +1,113 @@ +"""The salient-push hook is transport-only: it carries, it does not decide.""" + +from __future__ import annotations + +import json + +import pytest + +from commonplace_logbook import ( + AstrBotSink, + DiscordWebhookSink, + NullSink, + format_message, + salient_push, + sink_from_env, +) +from commonplace_logbook.salient import SalientSink + +from conftest import make_entry + + +class FakeTransport: + def __init__(self): + self.calls = [] + + def __call__(self, url, payload, headers): + self.calls.append((url, json.loads(payload.decode("utf-8")), headers)) + return 204 + + +def test_format_message_includes_tier_type_and_curiosity(): + e = make_entry(curiosity={"question": "what is past the ridge?"}) + msg = format_message(e, note="felt worth saying") + assert "[private]" in msg + assert "reflection" in msg + assert "felt worth saying" in msg + assert "Q: what is past the ridge?" in msg + + +def test_format_message_truncates_long_body(): + e = make_entry() + e.body = "x" * 1000 + msg = format_message(e) + assert msg.endswith("…") + assert len(msg) < 1000 + + +def test_format_message_omits_empty_body(): + e = make_entry() + e.body = "" + msg = format_message(e) + assert "[private]" in msg + assert msg.endswith("reflection · " + str(e.ts)) # no trailing body block + + +def test_null_sink_records_without_network(): + sink = NullSink() + salient_push(make_entry(), sink=sink, note="hi") + assert len(sink.pushed) == 1 + assert sink.pushed[0]["note"] == "hi" + + +def test_astrbot_sink_posts_payload(): + fake = FakeTransport() + sink = AstrBotSink(endpoint="http://astr/inbound", target="dyad", token="t", transport=fake) + salient_push(make_entry(visibility="private"), sink=sink) + url, body, headers = fake.calls[0] + assert url == "http://astr/inbound" + assert body["target"] == "dyad" + assert body["visibility"] == "private" + assert headers["Authorization"] == "Bearer t" + + +def test_astrbot_sink_without_target_or_token(): + fake = FakeTransport() + AstrBotSink(endpoint="http://astr", transport=fake).push(make_entry()) + _, body, headers = fake.calls[0] + assert "target" not in body + assert "Authorization" not in headers + + +def test_discord_webhook_sink_posts_content(): + fake = FakeTransport() + DiscordWebhookSink(webhook_url="http://discord/hook", transport=fake).push(make_entry()) + url, body, _ = fake.calls[0] + assert url == "http://discord/hook" + assert "content" in body + + +def test_sink_from_env_prefers_astrbot(): + sink = sink_from_env({"COMMONPLACE_ASTRBOT_ENDPOINT": "http://a", "COMMONPLACE_DISCORD_WEBHOOK": "http://d"}) + assert isinstance(sink, AstrBotSink) + + +def test_sink_from_env_falls_back_to_discord(): + sink = sink_from_env({"COMMONPLACE_DISCORD_WEBHOOK": "http://d"}) + assert isinstance(sink, DiscordWebhookSink) + + +def test_sink_from_env_defaults_to_null(): + assert isinstance(sink_from_env({}), NullSink) + + +def test_salient_push_resolves_sink_from_env(monkeypatch): + monkeypatch.delenv("COMMONPLACE_ASTRBOT_ENDPOINT", raising=False) + monkeypatch.delenv("COMMONPLACE_DISCORD_WEBHOOK", raising=False) + # No sink + no env channel -> NullSink, no network, no error. + salient_push(make_entry()) + + +def test_base_sink_is_abstract(): + with pytest.raises(NotImplementedError): + SalientSink().push(make_entry()) diff --git a/logbook/pipeline/tests/test_schema.py b/logbook/pipeline/tests/test_schema.py new file mode 100644 index 0000000..171fd3f --- /dev/null +++ b/logbook/pipeline/tests/test_schema.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import pytest + +from commonplace_logbook import SchemaError, load_schema +from commonplace_logbook.schema import BODY_FIELD, SUPPORTED_VERSION, default_schema_path + + +def test_loads_canonical_schema(schema): + assert schema.version == SUPPORTED_VERSION + assert schema.fields["visibility"].required is True + assert schema.fields["visibility"].default == "private" + assert schema.fields["visibility"].values == ["private", "shareable", "narrative"] + assert schema.fields["redaction_checked"].required is False + assert schema.fields["redaction_checked"].default is False + # object subfields parsed + assert set(schema.fields["curiosity"].fields) == {"question", "detour", "surprise"} + assert schema.fields["relation"].fields["kind"].type == "enum" + + +def test_body_is_not_a_frontmatter_field(schema): + assert BODY_FIELD in schema.fields + assert BODY_FIELD not in schema.frontmatter_fields + + +def test_default_path_env_override(monkeypatch, tmp_path): + target = tmp_path / "x.yaml" + monkeypatch.setenv("COMMONPLACE_LOGBOOK_SCHEMA", str(target)) + assert default_schema_path() == target + + +def test_missing_file_raises(tmp_path): + with pytest.raises(SchemaError, match="not found"): + load_schema(tmp_path / "nope.yaml") + + +def test_rejects_wrong_version(tmp_path): + p = tmp_path / "s.yaml" + p.write_text('$schema: "wrong/v9"\nrequired: {}\n', encoding="utf-8") + with pytest.raises(SchemaError, match="unsupported schema version"): + load_schema(p) + + +def test_rejects_non_mapping_root(tmp_path): + p = tmp_path / "s.yaml" + p.write_text("- a\n- b\n", encoding="utf-8") + with pytest.raises(SchemaError, match="root must be a mapping"): + load_schema(p) + + +def test_rejects_unknown_field_type(tmp_path): + p = tmp_path / "s.yaml" + p.write_text( + f'$schema: "{SUPPORTED_VERSION}"\nrequired:\n x: {{type: blob}}\n', encoding="utf-8" + ) + with pytest.raises(SchemaError, match="unsupported type"): + load_schema(p) + + +def test_rejects_enum_without_values(tmp_path): + p = tmp_path / "s.yaml" + p.write_text( + f'$schema: "{SUPPORTED_VERSION}"\nrequired:\n x: {{type: enum}}\n', encoding="utf-8" + ) + with pytest.raises(SchemaError, match="non-empty 'values'"): + load_schema(p) + + +def test_rejects_empty_schema(tmp_path): + p = tmp_path / "s.yaml" + p.write_text(f'$schema: "{SUPPORTED_VERSION}"\n', encoding="utf-8") + with pytest.raises(SchemaError, match="no fields"): + load_schema(p) + + +def test_rejects_non_mapping_field_spec(tmp_path): + p = tmp_path / "s.yaml" + p.write_text(f'$schema: "{SUPPORTED_VERSION}"\nrequired:\n x: "scalar"\n', encoding="utf-8") + with pytest.raises(SchemaError, match="expected a mapping"): + load_schema(p) + + +def test_rejects_object_fields_non_mapping(tmp_path): + p = tmp_path / "s.yaml" + p.write_text( + f'$schema: "{SUPPORTED_VERSION}"\n' + "required:\n x: {type: object, fields: [a, b]}\n", + encoding="utf-8", + ) + with pytest.raises(SchemaError, match="'fields' must be a mapping"): + load_schema(p) + + +def test_rejects_non_mapping_block(tmp_path): + p = tmp_path / "s.yaml" + p.write_text(f'$schema: "{SUPPORTED_VERSION}"\nrequired: 3\n', encoding="utf-8") + with pytest.raises(SchemaError, match="block must be a mapping"): + load_schema(p) + + +def test_rejects_duplicate_field(tmp_path): + p = tmp_path / "s.yaml" + p.write_text( + f'$schema: "{SUPPORTED_VERSION}"\n' + "required:\n x: {type: string}\n" + "optional:\n x: {type: bool}\n", + encoding="utf-8", + ) + with pytest.raises(SchemaError, match="declared twice"): + load_schema(p) diff --git a/logbook/pipeline/tests/test_store.py b/logbook/pipeline/tests/test_store.py new file mode 100644 index 0000000..653bcbc --- /dev/null +++ b/logbook/pipeline/tests/test_store.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from commonplace_logbook import NARRATIVE, PRIVATE, SHAREABLE, StoreConfig, entry_filename +from commonplace_logbook.store import DEFAULT_HOME + +from conftest import make_entry + + +def test_default_store_is_outside_the_repo(): + cfg = StoreConfig.from_env(environ={}) + home = cfg.home + assert home == DEFAULT_HOME.expanduser() + repo_root = Path(__file__).resolve().parents[3] + assert repo_root not in home.parents and home != repo_root + + +def test_env_overrides(tmp_path): + cfg = StoreConfig.from_env( + environ={ + "COMMONPLACE_LOGBOOK_HOME": str(tmp_path / "store"), + "COMMONPLACE_OBSIDIAN_VAULT": str(tmp_path / "vault"), + } + ) + assert cfg.home == tmp_path / "store" + assert cfg.obsidian_vault == tmp_path / "vault" + + +def test_private_routes_to_entries(tmp_path): + cfg = StoreConfig(home=tmp_path) + assert cfg.dir_for(PRIVATE) == tmp_path / "entries" + + +def test_private_routes_to_obsidian_when_set(tmp_path): + cfg = StoreConfig(home=tmp_path, obsidian_vault=tmp_path / "vault") + assert cfg.dir_for(PRIVATE) == tmp_path / "vault" + + +def test_shareable_and_narrative_dirs(tmp_path): + cfg = StoreConfig(home=tmp_path) + assert cfg.dir_for(SHAREABLE) == tmp_path / "shareable" + assert cfg.dir_for(NARRATIVE) == tmp_path / "narrative" + + +def test_unknown_tier_raises(tmp_path): + with pytest.raises(ValueError, match="unknown visibility tier"): + StoreConfig(home=tmp_path).dir_for("public") + + +def test_filename_is_deterministic_and_sortable(): + e = make_entry() + name = entry_filename(e) + assert name == entry_filename(e) # content-addressed -> stable + assert name.startswith("20260603T142200Z-reflection-") + assert name.endswith(".entry.md") + + +def test_filename_changes_with_content(): + a = entry_filename(make_entry(curiosity={"question": "a?"})) + b = entry_filename(make_entry(curiosity={"question": "b?"})) + assert a != b + + +def test_filename_undated_when_ts_not_datetime(): + e = make_entry() + e.data["ts"] = "raw-string" # not yet validated to a datetime + assert entry_filename(e).startswith("undated-reflection-") diff --git a/logbook/pipeline/tests/test_validate.py b/logbook/pipeline/tests/test_validate.py new file mode 100644 index 0000000..8cb028c --- /dev/null +++ b/logbook/pipeline/tests/test_validate.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import datetime as _dt + +import pytest + +from commonplace_logbook import Entry, ValidationError, validate_entry +from commonplace_logbook.entry import parse_markdown + +from conftest import make_entry + + +def test_defaults_applied(schema): + entry = Entry( + data={ + "ts": _dt.datetime(2026, 6, 3, tzinfo=_dt.timezone.utc), + "type": "observation", + "actor": "agent", + }, + body="hi", + ) + out = validate_entry(entry, schema) + assert out.visibility == "private" # default + assert out.redaction_checked is False # default + assert out.body == "hi" + + +def test_missing_required_collects_all(schema): + with pytest.raises(ValidationError) as ei: + validate_entry(Entry(data={}, body="x"), schema) + problems = ei.value.problems + assert any("ts" in p for p in problems) + assert any("type" in p for p in problems) + assert any("actor" in p for p in problems) + + +def test_empty_body_is_missing_required(schema): + # body comes from the markdown section; here it is present but empty -> still a str + out = validate_entry(make_entry(), schema) + assert isinstance(out.body, str) + + +def test_bad_enum_rejected(schema): + with pytest.raises(ValidationError, match="visibility"): + validate_entry(make_entry(visibility="public"), schema) + + +def test_naive_datetime_rejected(schema): + e = make_entry() + e.data["ts"] = _dt.datetime(2026, 6, 3, 12, 0) # no tzinfo + with pytest.raises(ValidationError, match="timezone offset"): + validate_entry(e, schema) + + +def test_string_datetime_parsed(schema): + e = make_entry() + e.data["ts"] = "2026-06-03T14:22:00-02:30" + out = validate_entry(e, schema) + assert out.ts.utcoffset() == _dt.timedelta(hours=-2, minutes=-30) + + +def test_z_suffix_datetime_parsed(schema): + e = make_entry() + e.data["ts"] = "2026-06-03T14:22:00Z" + out = validate_entry(e, schema) + assert out.ts.utcoffset() == _dt.timedelta(0) + + +def test_unparseable_datetime_rejected(schema): + e = make_entry() + e.data["ts"] = "not-a-date" + with pytest.raises(ValidationError, match="valid ISO-8601"): + validate_entry(e, schema) + + +def test_non_string_datetime_rejected(schema): + e = make_entry() + e.data["ts"] = 12345 + with pytest.raises(ValidationError, match="datetime string"): + validate_entry(e, schema) + + +def test_unknown_field_rejected(schema): + with pytest.raises(ValidationError, match="unknown field 'mood'"): + validate_entry(make_entry(mood="curious"), schema) + + +def test_bool_type_enforced(schema): + with pytest.raises(ValidationError, match="redaction_checked"): + validate_entry(make_entry(redaction_checked="yes"), schema) + + +def test_markdown_must_be_string(schema): + e = make_entry() + e.body = 42 # type: ignore[assignment] + with pytest.raises(ValidationError, match="body"): + validate_entry(e, schema) + + +def test_object_subfields_validated(schema): + e = make_entry(curiosity={"question": "why?", "detour": True, "surprise": "none"}) + out = validate_entry(e, schema) + assert out.data["curiosity"]["detour"] is True + + +def test_object_must_be_mapping(schema): + with pytest.raises(ValidationError, match="curiosity: expected a mapping"): + validate_entry(make_entry(curiosity="lots"), schema) + + +def test_object_unknown_subfield_rejected(schema): + with pytest.raises(ValidationError, match="unknown field 'mood'"): + validate_entry(make_entry(curiosity={"mood": "x"}), schema) + + +def test_object_subfield_type_enforced(schema): + with pytest.raises(ValidationError, match="detour"): + validate_entry(make_entry(curiosity={"detour": "maybe"}), schema) + + +def test_relation_enum_subfield(schema): + out = validate_entry( + make_entry(type="relation-event", relation={"kind": "repair", "detail": "we talked"}), + schema, + ) + assert out.data["relation"]["kind"] == "repair" + + +def test_relation_bad_enum(schema): + with pytest.raises(ValidationError, match="kind"): + validate_entry(make_entry(relation={"kind": "nope"}), schema) + + +def test_validate_entry_loads_default_schema(sample_path): + # No schema passed -> loads the canonical one. + entry = parse_markdown(sample_path.read_text(encoding="utf-8")) + out = validate_entry(entry) + assert out.visibility == "shareable" diff --git a/logbook/pipeline/tests/test_writer.py b/logbook/pipeline/tests/test_writer.py new file mode 100644 index 0000000..72f551d --- /dev/null +++ b/logbook/pipeline/tests/test_writer.py @@ -0,0 +1,67 @@ +"""The redaction gate and tier-routed writing — the heart of the acceptance.""" + +from __future__ import annotations + +import pytest + +from commonplace_logbook import RedactionRequiredError, read_entry, write_entry + +from conftest import make_entry + + +def test_private_writes_to_entries(store): + result = write_entry(make_entry(visibility="private"), config=store) + assert result.tier == "private" + assert result.path.parent == store.home / "entries" + assert result.path.exists() + # round-trips off disk + assert read_entry(result.path).body == "a small note about going north." + + +@pytest.mark.parametrize("tier", ["shareable", "narrative"]) +def test_gated_tiers_refused_without_redaction(tier, store): + with pytest.raises(RedactionRequiredError, match=tier): + write_entry(make_entry(visibility=tier, redaction_checked=False), config=store) + + +@pytest.mark.parametrize("tier", ["shareable", "narrative"]) +def test_gated_tiers_refused_when_flag_absent(tier, store): + # redaction_checked defaults to False -> still refused + with pytest.raises(RedactionRequiredError): + write_entry(make_entry(visibility=tier), config=store) + + +@pytest.mark.parametrize("tier", ["shareable", "narrative"]) +def test_gated_tiers_allowed_with_redaction(tier, store): + result = write_entry( + make_entry(visibility=tier, redaction_checked=True), config=store + ) + assert result.tier == tier + assert result.path.parent == store.home / tier + assert result.path.exists() + + +def test_gate_fires_even_on_dry_run(store): + with pytest.raises(RedactionRequiredError): + write_entry(make_entry(visibility="shareable"), config=store, dry_run=True) + + +def test_dry_run_does_not_touch_disk(store): + result = write_entry(make_entry(visibility="private"), config=store, dry_run=True) + assert result.written is False + assert not result.path.exists() + assert not store.home.exists() + + +def test_write_is_idempotent(store): + e = make_entry(visibility="private") + first = write_entry(e, config=store) + second = write_entry(e, config=store) + assert first.path == second.path # content-addressed + assert list((store.home / "entries").glob("*.entry.md")) == [first.path] + + +def test_invalid_entry_rejected_before_write(store): + with pytest.raises(Exception): + write_entry(make_entry(visibility="public"), config=store) + assert not store.home.exists()