diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 595c915..163ff1e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,20 +2,33 @@ name: CI on: pull_request: - push: - branches: [main] workflow_dispatch: permissions: contents: read jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install ruff + run: python -m pip install "ruff>=0.8,<1" + + - name: Run ruff + run: python -m ruff check . + test: runs-on: ubuntu-latest strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.12", "3.14"] steps: - uses: actions/checkout@v4 @@ -24,19 +37,32 @@ jobs: python-version: ${{ matrix.python-version }} - name: Compile Python - run: python -m py_compile serve.py link.py scripts/check_release_hygiene.py scripts/prepare_release.py mcp_package/link_mcp/server.py mcp_package/link_mcp/__main__.py mcp_package/link_mcp/__init__.py + run: python -m py_compile serve.py link.py scripts/check_release_hygiene.py scripts/check_runtime_duplication.py scripts/check_tool_contract.py scripts/prepare_release.py scripts/smoke_first_use.py scripts/smoke_http_viewer.py scripts/smoke_large_wiki.py scripts/smoke_mcp_stdio.py mcp_package/link_core/*.py mcp_package/link_mcp/server.py mcp_package/link_mcp/__main__.py mcp_package/link_mcp/__init__.py + + - name: Install MCP package for verification + run: python -m pip install ./mcp_package - name: Run tests run: python -m unittest discover -s tests + - name: Large wiki smoke test + if: matrix.python-version == '3.12' + run: python scripts/smoke_large_wiki.py --pages 1000 + - name: Demo health smoke test run: | + python scripts/smoke_first_use.py python link.py demo /tmp/link-demo-ci --force python /tmp/link-demo-ci/link.py doctor /tmp/link-demo-ci python /tmp/link-demo-ci/link.py rebuild-backlinks /tmp/link-demo-ci + python /tmp/link-demo-ci/link.py validate /tmp/link-demo-ci python /tmp/link-demo-ci/link.py doctor /tmp/link-demo-ci PYTHONPATH=mcp_package python link.py verify-mcp /tmp/link-demo-ci --python python + - name: HTTP viewer smoke test + if: matrix.python-version == '3.12' + run: python scripts/smoke_http_viewer.py + installer-syntax: runs-on: ubuntu-latest steps: @@ -57,6 +83,12 @@ jobs: - name: Check tracked files for secret-looking data run: python scripts/check_release_hygiene.py + - name: Check runtime duplication + run: python scripts/check_runtime_duplication.py + + - name: Check tool contract + run: python scripts/check_tool_contract.py + package: runs-on: ubuntu-latest steps: @@ -76,3 +108,12 @@ jobs: - name: Check package metadata working-directory: mcp_package run: python -m twine check dist/* + + - name: Install built wheel + working-directory: mcp_package + run: python -m pip install dist/*.whl + + - name: MCP stdio smoke test + run: | + python link.py demo /tmp/link-mcp-smoke --force + python scripts/smoke_mcp_stdio.py /tmp/link-mcp-smoke/wiki diff --git a/.gitignore b/.gitignore index 98c74bb..f2df51e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ wiki/explorations/* # Keep index and log templates but ignore personal content # (we ship empty versions in the repo) +wiki/_link_schema.json # OS junk .DS_Store @@ -39,6 +40,8 @@ autoresearch/ # Local demo output link-demo/ +.link-backups/ +.link-cache/ # Integration artifacts (generated by install scripts) CLAUDE.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c2ace5c..995552a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,240 @@ Release sections use `MAJOR.MINOR.PATCH` versions that match `link-mcp` on PyPI ## [Unreleased] +## [1.1.0] - 2026-05-08 + +### Highlights + +- Reframes Link as local personal memory for agents, with the Markdown wiki as the inspectable storage layer. +- Adds the first-use path around `link init`, `link serve`, the managed `link` command, demo proof prompts, and readiness checks. +- Adds the memory lifecycle: remember, recall, propose, capture, approve, review, archive, restore, forget, explain, profile, and audit. +- Adds smart query packets so MCP agents can retrieve budgeted memory, ranked wiki context, graph neighborhoods, and follow-up actions without scanning the whole wiki. +- Adds guided ingest/proposal UI, Memory Dashboard, larger graph controls, dark/light/system themes, and clearer local web navigation. +- Adds schema migration, validation gates, release hygiene, MCP contract checks, runtime duplication guardrails, and broader first-use/large-wiki smoke tests. + +### Added + +- Added Memory Mode foundation with `wiki/memories/`, `link.py remember`, `link.py recall`, and MCP `remember_memory`/`recall_memory` tools. +- Added a first-run demo memory page so Link presents as local agent memory, not only a wiki. +- Added Memory Profile views through `link.py profile`, MCP `memory_profile`, `/profile`, and `/api/memory-profile`. +- Added reversible memory lifecycle controls with `archive-memory`/`restore-memory` and MCP `archive_memory`/`restore_memory`; archived memories are hidden from recall by default. +- Added confirmed permanent memory deletion with `forget-memory` and MCP `forget_memory` for user-requested local forgetting. +- Added low-priority forget actions to memory review/explanation payloads so permanent deletion is discoverable but never the default next step. +- Added memory action commands to web inbox and explanation pages, including review, update, archive, restore, and low-priority forget actions. +- Added Memory Review Inbox with `memory-inbox`, `review-memory`, MCP `memory_inbox`/`review_memory`, `/inbox`, and `/api/memory-inbox`. +- Added Explain Memory views with `explain-memory`, MCP `explain_memory`, `/explain-memory`, and `/api/explain-memory` for provenance, review state, lifecycle, graph links, and recall readiness. +- Added `/propose`, a read-only local UI for turning pasted source/session notes into memory proposals without writing pages. +- Added guarded web approval actions on `/propose` with local-only `remember-memory` and `update-memory` APIs for explicitly saving selected proposals. +- Added a visible review gate to `/propose`, including manual-review states for duplicate/conflict proposals before durable memory writes. +- Kept web approval APIs on the safe path by ignoring duplicate/conflict override flags; use CLI or MCP only after explicit human review. +- Fixed duplicate proposal CLI commands so project-scoped updates preserve the normalized project key. +- Added top-level project reporting to accepted capture payloads so CLI and MCP agents can keep project-scoped memories straight. +- Added raw capture read-warning reporting so unreadable saved captures appear in CLI, MCP, local web inbox, brief, and audit diagnostics instead of disappearing silently. +- Hardened `link.py doctor` secret-content checks so unreadable scannable files fail closed instead of being skipped. +- Hardened local backups so archive failures remove partial `.tar.gz` files and return controlled CLI/MCP errors. +- Added backup-list warnings for unreadable local backup archives instead of failing the whole list operation. +- Hardened wiki validation so unreadable pages become structured `unreadable_page` errors instead of crashing validation. +- Hardened backlink rebuild commands so unreadable pages return controlled CLI, MCP, and local web errors. +- Hardened index rebuild commands so unreadable pages return controlled CLI, MCP, and local web errors. +- Hardened MCP and local web status calls so cache issues produce readiness warnings instead of crashing. +- Made the shared wiki cache skip unreadable pages with `cache_read_warnings` so search/query/graph can continue over readable pages. +- Added shared atomic write helpers and migrated Link state writes for schema markers, memory pages, indexes, backlinks, captures, raw source creation, logs, and demo files. +- Added a root `pyproject.toml` with conservative Ruff correctness checks and a CI lint job for pull requests. +- Optimized ingest status source matching with a reverse raw-path index instead of a raw-file by source-page nested scan. +- Removed a redundant memory index reread from direct memory resolution paths. +- Reused cached forward-link data during context retrieval to avoid an extra primary-page disk read. +- Folded validation backlink comparison into the validation page scan so Markdown pages are read once per validation pass. +- Added MCP `link_status` and `/api/status` for a compact readiness summary with version, wiki path, page/memory counts, optional validation, and safe next actions. +- Added search backend reporting to Link status payloads so agents and users can see whether local search is using SQLite FTS or the token fallback. +- Added `link.py status` so the same readiness summary is available before MCP or the local web server is connected. +- Added `link.py status --validate` to installer next-step output so new users have one readiness command after setup. +- Added `content_page_count` to Link status and first-run guidance for structurally ready but empty wikis. +- Added status warnings so cache or memory-read degradation is visible in CLI, HTTP, and MCP readiness payloads. +- Added shared Markdown renderer coverage under `link_core.markdown` so the local web UI's sanitized Markdown behavior is tested outside the HTTP monolith. +- Added an HTTP viewer smoke test that starts a generated demo server over localhost and verifies pages, JSON APIs, security headers, and local mutation guards. +- Added a GitHub Pages-ready product site under `docs/` with local-agent-memory positioning, demo visuals, quick start, MCP links, and security links. +- Added a `Why Link?` product page that explains where Link fits versus human-first notes apps, hosted memory APIs, stateful-agent runtimes, temporal graph memory systems, and plain file search. +- Clarified the local web viewer safety boundary in README/docs and startup output: the server binds to `127.0.0.1`, has no authentication, and should not be exposed without an added auth layer. +- Moved bundled demo wiki content into `link_core.demo` so the CLI module no longer carries the full demo payload inline. +- Moved the local web UI CSS/JavaScript assets into `link_core.web_assets` so `serve.py` stays focused on routing and rendering. +- Moved memory and raw-capture card rendering into `link_core.web_memory` so memory UI escaping and actions are covered outside the HTTP server. +- Moved the shared local web layout shell into `link_core.web_layout` so header/nav/theme/search behavior is tested outside the HTTP server. +- Moved local HTTP guard parsing and Host validation into `link_core.web_http` with isolated tests. +- Moved local viewer security header policy into `link_core.web_http` so browser hardening stays core-tested outside the HTTP server. +- Moved graph payload, category, and legend helpers into `link_core.web_graph` so graph-scale behavior is tested outside the HTTP monolith. +- Added a managed `~/.local/bin/link` command for global installs so users can run `link status --validate`, `link query`, and `link brief` without remembering wiki paths. +- Added a shared Link runtime version and `link --version`; CLI and local HTTP status now report the same release version as the package. +- Switched MCP status version reporting to the shared Link runtime version so source checkouts and installed packages cannot drift. +- Added `link init` to create or repair a normal Link wiki without loading demo content. +- Added `link serve` to start the local web viewer without remembering `serve.py` paths. +- Made `link verify-mcp` require the installed `link-mcp` version to match the local Link runtime before reporting ready. +- Made `link verify-mcp` print shell-quoted install and upgrade commands using the exact Python executable being verified. +- Made `link verify-mcp` import-check the MCP SDK dependency so broken partial installs no longer report ready. +- Made `link verify-mcp --json` return structured issue codes and repair actions for agent/tooling consumers. +- Improved local server startup errors with bounded port validation in both `link serve` and `serve.py`, plus clear next-port guidance when a port is already in use. +- Added `link benchmark` to measure local cache, search, smart query, and graph timings on a user's current wiki. +- Extended `link benchmark` and large-wiki smoke to prove bounded agent payload timings for graph summaries and page lists. +- Extended `link benchmark` and large-wiki smoke to prove the graph page's initial browser payload stays bounded on huge wikis. +- Added an ignored `.link-cache/` persistent page-record cache so unchanged large wikis can warm search/context indexes without rereading every Markdown page. +- Extended the first-use smoke to run `link graph-summary` and `link benchmark` so the demo value loop is release-gated. +- Made the local graph viewer start with a bounded overview for very large wikis, with an explicit full-graph load control. +- Hardened local write APIs by rejecting browser `Origin`/`Referer` headers that do not point at the local Link viewer. +- Added in-memory rate limiting for local write APIs so runaway local clients get structured JSON `429` responses with `Retry-After`. +- Added explicit local JSON `405` responses for browser preflight requests without granting CORS access. +- Added Content Security Policy headers to the local viewer and a stricter SVG asset policy. +- Added browser isolation and permissions-policy headers, and marked local JSON API responses `Cache-Control: no-store`. +- Marked local HTML pages and served static/raw files `Cache-Control: no-store` so private memory pages and source media are not browser-cached. +- Added shared legacy `Pragma`/`Expires` no-cache headers for local personal-memory responses. +- Returned hardened JSON `405` responses for unsupported local HTTP methods, including `TRACE` and `CONNECT`, instead of default server HTML. +- Hardened `HEAD` handling so local health/static checks return headers without bodies and always reset response state. +- Bounded local HTTP query, search, project, graph-summary, memory lookup, and proposal metadata parameters with the same text normalization used by CLI/MCP inputs. +- Bounded `/propose` page seed query values before rendering source/project form defaults. +- Added an interactive-readiness verdict and threshold warnings to `link benchmark` so larger local wikis are easier to evaluate. +- Added shared benchmark health checks to the large-wiki smoke so user-facing and CI scale verdicts stay aligned. +- Tightened ownership of generated search caches in CLI query and index rebuild paths so in-memory SQLite indexes are closed when short-lived operations finish. +- Hardened smart query budget normalization so unexpected or oversized adapter values safely fall back to `medium`. +- Added an explicit local HTTP API version header and status field for future integration compatibility. +- Added wiki schema markers with safe `link migrate`/MCP `migrate_wiki` migrations for future local format changes. +- Added first-run agent prompts to installer output so new users can immediately try brief, remember, and query workflows. +- Added `link prompts` to print the first-run natural agent prompts and local readiness checks on demand. +- Added `/prompts` and `/api/prompts` so browser-first users get the same starter prompt guidance as the CLI. +- Added MCP `starter_prompts` so MCP-only agents can retrieve the same first-run prompt guidance. +- Updated installed agent instructions and release hygiene so `starter_prompts` remains part of the public agent contract. +- Added guided `link ingest-status` output with structured JSON guidance, exact agent prompts, and follow-up validation commands. +- Added visible post-ingest checks to the CLI and local ingest UI so users see the rebuild/validate/status loop before relying on generated pages. +- Added `/ingest` and `/api/ingest-status` so the local UI shows pending raw files, graph health, and the next agent prompt. +- Added a local `/ingest` Add Raw Source form and `POST /api/raw-source` so browser-first users can paste a source, save it under `raw/`, block secret-looking values, and copy the next ingest prompt without remembering filesystem paths. +- Added ingest completion cards that show which raw files are represented, link to their source pages, and provide copyable memory/query prompts for post-ingest validation. +- Added the same represented-source completion summary to `link ingest-status` for terminal-first users. +- Added MCP `ingest_status` so MCP-only agents can inspect pending raw files and validation guidance. +- Added `link rebuild-index`, MCP `rebuild_index`, and `POST /api/rebuild-index` to regenerate the human-readable wiki catalog from current pages. +- Improved `doctor --fix` so it repairs index drift and rebuilds backlinks afterward. +- Added clearer product framing in the README and local home page for the distinction between source-backed wiki knowledge and explicit agent memory. +- Added a local raw-source picker to `/propose` with secret-aware loading for proposal-only memory workflows. +- Added shared proposal action hints so memory proposals include the safest approval prompt, local command, MCP tool, and arguments. +- Added a wider graph page layout with fullscreen mode so larger wikis can be explored without being squeezed into the reading column. +- Added large-graph controls for node search, type filtering, and selected-node neighborhood depth. +- Added a capped default graph overview for huge wikis so the canvas draws the most connected nodes first while search and selected neighborhoods still pull relevant nodes into view. +- Added bounded graph summaries through CLI, HTTP, and MCP so agents can inspect large graph structure without loading every node and edge into context. +- Made graph edge construction cache-backed so large graph rendering/export avoids rereading every Markdown page after cache warmup. +- Added bounded page-list payloads for MCP and HTTP so agents can inspect page metadata without dumping very large wikis into context. +- Added bounded backlink/page-link payloads for MCP and HTTP so hub pages do not flood agent context. +- Added a short local-server cache poll interval so hot navigation reuses the warmed wiki cache instead of rescanning every page for each request. +- Added duplicate protection for `remember`/`remember_memory`; strong duplicate memories are refused unless explicitly allowed. +- Added memory merge/update workflow with `update-memory` and MCP `update_memory`, including update counts, audit logs, backlink rebuilds, and review reset. +- Added proposal-only memory extraction with `propose-memories` and MCP `propose_memories` for chat/session notes. +- Added agent memory briefs with `link.py brief` and MCP `memory_brief` so agents can prime themselves with relevant local memory before a task. +- Added smart Link query packets with `link.py query`, MCP `query_link`, and `/api/query-link` so agents can retrieve budgeted memory, ranked wiki results, and graph context without reading the whole wiki. +- Added smart query budget reports and follow-up tool actions so agents know when context was truncated and how to continue without scanning the whole wiki. +- Added estimated character/token counts to smart query budget reports so agents can reason about context cost. +- Bounded agent-facing CLI query strings for `query`, `brief`, `graph-summary`, and `benchmark` to match the MCP server's safer input posture. +- Added provenance metadata to smart query memory and wiki packets so agents can explain why Link knows something without loading full pages. +- Added precomputed search word indexes so repeated wiki search and smart query calls avoid rebuilding per-page word sets on larger wikis. +- Added optional in-memory SQLite FTS search acceleration with token-index fallback so large local wikis stay fast without adding a server dependency. +- Improved smart query follow-ups so a truncated large-budget packet does not ask the agent to rerun the same large budget again. +- Added `link.py validate` as an ingest gate for agent-generated wiki pages, covering required frontmatter, type/directory alignment, required sections, dead links, and stale backlinks. +- Added MCP `validate_wiki` and `/api/validate` so agents can run the same ingest gate without shell access. +- Added a runtime duplication guard in CI to block new large copied helper bodies across CLI, web, and MCP runtimes. +- Added a tool contract guard in CI to keep public CLI commands, MCP tools, and README references from drifting. +- Tightened memory mutation adapters so CLI and MCP memory writes share more core behavior with fewer runtime-side exceptions. +- Extracted shared memory audit risk-factor logic into core so CLI, web, and MCP report the same health semantics. +- Extracted shared memory brief capture guidance into core and removed the last allowed large duplicate runtime helper. +- Added raw capture status to CLI and MCP memory briefs so session priming surfaces saved captures and secret-warning captures. +- Added `/brief` and `/api/memory-brief` so the local web UI and HTTP clients can get startup memory context, review warnings, and raw capture status. +- Added `memory-audit` and MCP `memory_audit` for a read-only health report covering memory backlog, raw captures, risk factors, and next actions. +- Added `/audit` and `/api/memory-audit` so the local web UI exposes the same read-only memory audit report. +- Added memory review and raw capture backlog checks to `link.py doctor`, while excluding proposal-only raw captures from ingest-status pending source counts. +- Added conflict detection for memory writes, updates, and proposals; contradictory active memories are surfaced before saving unless explicitly allowed. +- Added shared memory review action plans so inbox and explanation payloads tell agents whether to review, update, archive, restore, or edit metadata next. +- Added project-aware memory boundaries so project-scoped memories can carry a project key and recall/profile/brief keep other explicit projects out of context. +- Improved memory recall ranking so project-matched and reviewed memories win ties while archived/stale memories rank lower when explicitly included. +- Added `link.py capture-session` to save long session notes under `raw/memory-captures/` and return proposal-only memory candidates for human approval. +- Added MCP `capture_session` so agents can preserve long session notes locally before asking which memory proposals to write. +- Added secret-looking content warnings to CLI and MCP session capture results so pasted tokens can be redacted from local raw notes. +- Added `link.py accept-capture` to turn an approved raw-capture proposal into a durable memory through duplicate/conflict-safe writes. +- Added MCP `accept_capture` for approving saved capture proposals through the same duplicate/conflict-safe workflow. +- Added `link.py redact-capture` to replace secret-looking values in saved raw captures while logging only warning labels and counts. +- Added MCP `redact_capture` so agents can redact saved raw captures after user approval. +- Added `link.py delete-capture` with explicit confirmation for removing saved raw captures without logging capture contents. +- Added MCP `delete_capture` with explicit confirmation for removing saved raw captures. +- Added `link.py capture-inbox` to list saved raw captures, secret warnings, and accept/redact/delete commands. +- Added MCP `capture_inbox` to review saved raw captures with redacted snippets before accepting, redacting, or deleting them. +- Added raw capture visibility to `/memory` and `/api/memory-dashboard`, including accept/redact/delete commands and secret-warning counts. +- Added `/captures` and `/api/capture-inbox` for a dedicated local web/API raw capture inbox. +- Added project filtering to `/memory`, `/profile`, `/api/memory-dashboard`, `/api/memory-profile`, and `/api/memory-inbox`. +- Added project filtering to CLI and MCP memory inbox workflows. +- Added read-only web Memory Dashboard at `/memory` and `/api/memory-dashboard` for active memories, review queue, recent updates, archived memories, and next-action commands. +- Added recall readiness metadata to recalled memories so CLI, MCP, and brief payloads expose whether memory is ready, provisional, unsafe, or disabled. +- Added local web review/archive/restore memory actions backed by guarded HTTP POST endpoints; permanent forget remains command/tool-only. +- Added secure proposal-only HTTP endpoint `POST /api/propose-memories`; HTTP memory mutations are limited to local review/archive/restore actions. +- Added a graph node inspector so moving nodes no longer accidentally opens pages; double-click or Open page still navigates. +- Added an explicit `system`/`dark`/`light` theme toggle for the local web UI; dark mode now uses a black page background. +- Added a real MCP stdio smoke test for the built `link-mcp` wheel in CI. +- Added MCP `starter_prompts` coverage to the real stdio smoke test. +- Reused the shared starter prompt payload on the home page so UI, CLI, API, and MCP prompt guidance cannot drift. +- Normalized explicit starter prompt project names so CLI, HTTP, and MCP return consistent project slugs. +- Blocked normal ingest guidance for raw files with secret-looking values so users redact them before any agent reads them into wiki memory. +- Blocked normal ingest guidance for raw files Link cannot read and safety-scan, with explicit CLI, HTTP, and MCP payload diagnostics. +- Blocked normal ingest guidance when source pages cannot be read, because represented/pending raw counts may be incomplete. +- Switched raw-source secret detection to streaming file scans so large source folders do not get loaded into memory during ingest status checks. +- Added an explicit ingest `safety` summary across CLI, HTTP, and MCP payloads so agents do not need to infer whether raw sources are clear, warning-only, or blocked. +- Added copy buttons for guided ingest prompts and post-ingest checks in the local web UI. +- Made proposal source discovery stream secret scans, read only bounded previews, and return explicit source actions for load, redact, or split. +- Made proposal source discovery return explicit fix-access actions for raw files that cannot be read. +- Hardened direct proposal-source loading to reject oversized path inputs and hidden raw files, matching the source picker. +- Added benchmark health summaries and recommendations so `link benchmark` produces clearer proof-of-readiness output. +- Improved benchmark recommendations so slow search, cache, page-list, and graph paths get targeted repair guidance. +- Added README trust-gate guidance for ingest safety, proposal review, validation, benchmark readiness, duplicate/conflict checks, and first-run benchmarking. +- Clarified README guidance for `link verify-mcp`, including version parity, MCP SDK dependency, wiki path, and config checks. +- Clarified README and PyPI docs that status reports content/page/memory counts, not just structural page totals. +- Added a first-use smoke test for init, demo, status, query, brief, remember, capture, ingest-status, and validation workflows. +- Added `link prompts` coverage to the first-use smoke so CI validates the first-run agent prompt path. +- Added `doctor --fix` coverage for schema marker creation so one-command repairs stay release-gated. +- Added large-wiki smoke coverage for smart query budgets and graph generation across hundreds of pages. +- Added timing thresholds to large-wiki smoke coverage so major search/query/graph performance regressions fail early. +- Added release hygiene checks that protect the public agent instruction contract for `query_link`, `validate_wiki`, and `memory_brief`. +- Expanded release hygiene checks so public agent instructions must retain `ingest_status`, `get_graph_summary`, and `backup_wiki` guidance. +- Routed web memory/search limit parsing through the shared bounded-integer helper so local API endpoints handle invalid limits consistently. +- Hardened release hygiene so `server.json` cannot silently lose the `link-mcp` package version entry. +- Added release hygiene checks that block accidental outbound HTTP client code in tracked Python and shell runtimes. +- Expanded outbound-network release hygiene to catch stdlib `http.client` and `urllib` request aliases. +- Expanded outbound-network release hygiene to catch direct stdlib `socket` client imports while allowing the local `socketserver` viewer. +- Updated agent contract checks and installed instructions to include `link_status` for setup/readiness checks. +- Changed CI to run on pull requests and manual dispatch only, preserving GitHub minutes for the develop-branch workflow. +- Added CLI validation to the CI demo health smoke path so PRs catch broken generated wiki templates. +- Updated the PyPI package README with the current MCP tool contract, validation workflow, capture inbox, and permanent-forget guidance. +- Added PyPI package README trust notes for local-first privacy, bounded agent context, SQLite FTS, and graph-summary-first usage. +- Updated package classifiers and PR CI coverage for modern Python, including Python 3.14. +- Added Memory Dashboard next actions so the web UI and API surface the most important memory maintenance step. +- Extracted shared memory proposal logic into `link_core` so CLI, HTTP, and MCP proposal behavior stays aligned. +- Extracted shared raw capture path resolution and notes parsing into `link_core` so CLI and MCP capture operations use the same root-escape guard. +- Extracted shared frontmatter parsing and typed update helpers into `link_core` for safer memory metadata writes. +- Extracted shared memory record loading, review inbox, profile, and recall helpers into `link_core`. +- Extracted shared memory resolution, log lookup, and recall-state helpers into `link_core`. +- Extracted shared memory lifecycle mutations for archive, restore, review, and update workflows into `link_core`. +- Extracted shared memory creation for `remember` and `remember_memory` into `link_core`. +- Extracted shared wiki indexing, search, context, graph, and backlink helpers into `link_core`. +- Extracted shared search ranking and optional SQLite FTS helpers into `link_core.search` so wiki indexing stays separate from search execution. +- Extracted shared memory explanation/audit payloads into `link_core`. + +### Fixed + +- Tightened README onboarding and release examples around Link's local memory product value. +- Simplified onboarding docs and installed instructions around natural agent prompts and the short `link` command instead of path-heavy maintenance commands. +- Moved the local UI theme control into a compact header utility above search so it no longer wraps awkwardly in the navigation row. +- Reworked the local UI header into a clean brand/tools row with navigation tabs below it. +- Fixed installer MCP setup reporting so failed upgrades no longer masquerade as success by reusing an unrelated older global `link-mcp`. +- Fixed project-mode installer output so MCP wiki paths are absolute and next-step hints point at the project wiki instead of `~/link`. +- Fixed search/context matching for natural queries against hyphenated page slugs, e.g. `local first software` now finds `local-first-software`. +- Fixed missing HTTP context topics to return a controlled 400 JSON error. +- Hardened backlink rebuild over HTTP so local web rebuilds require JSON POST instead of a mutating GET. +- Hardened HTTP rebuild actions so local web index/backlink mutations require the explicit local-action header. +- Hardened local web startup so unsupported host/bind flags fail instead of implying public serving is supported. +- Hardened `/raw/` static serving so the local web viewer only serves supported media/PDF source assets. +- Tightened raw asset path resolution so `/raw/` URLs cannot route through non-raw static allowlists, including encoded parent-directory paths. +- Hardened HTTP memory mutation endpoints with an explicit `X-Link-Local-Action: true` header required by non-UI clients. +- Refreshed the checked-in demo backlink index so `link.py doctor .` reports a healthy graph. + ## [1.0.7] - 2026-05-04 ### Fixed @@ -28,6 +262,7 @@ Release sections use `MAJOR.MINOR.PATCH` versions that match `link-mcp` on PyPI ### Changed +- Moved raw capture inbox parsing, project filtering, snippet redaction, and command generation into shared `link_core.capture` helpers. - Polished the graph view with reset, label, and motion controls, keyboard focus, empty-state handling, cursor-centered zoom, and sticky dragged node placement. - Restructured README.md into a product-doc flow: promise, quick start, first 10 minutes, install paths, then reference and release details. - Switched release guidance to `release/*` branches and made changelog updates part of the release checklist. @@ -39,6 +274,7 @@ Release sections use `MAJOR.MINOR.PATCH` versions that match `link-mcp` on PyPI - Hardened installers to avoid silently using `--break-system-packages`; they now fall back to `~/.link-mcp-venv` and register MCP with the resolved Python. - Hardened the local viewer against unsafe graph JSON embedding, path-like wikilink targets, malformed static paths, and local path leakage from static file errors. +- Hardened the local viewer to reject unexpected `Host` headers in addition to binding to `127.0.0.1`. - Hardened `link-mcp` tool inputs for empty queries/topics and invalid search limits. - Expanded `doctor` and release hygiene checks for common credential filenames, private keys, and token patterns. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..8c53354 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,41 @@ +# Contributing To Link + +Thanks for helping improve Link. The goal is simple: make local agent memory +easier to trust, inspect, and use. + +Please open pull requests against `main` unless the maintainer asks for a +different target. The `develop` branch is a maintainer integration branch for +staging release work before it is proposed to `main`. + +## Before Opening A PR + +Run the local gate: + +```bash +python3 -m pip install "ruff>=0.8,<1" +python3 -m ruff check . +python3 -m unittest discover -s tests +python3 scripts/check_release_hygiene.py +python3 scripts/check_runtime_duplication.py +python3 scripts/check_tool_contract.py +git diff --check +``` + +For UI changes, include a screenshot or GIF. For installer, MCP, memory-write, +HTTP API, or automation changes, call that out explicitly in the PR description. + +## PR Description + +Include: + +- What changed. +- How you tested it. +- Whether it touches memory writes, installers, MCP behavior, HTTP endpoints, or + automation. +- Screenshots or GIFs for UI changes. + +Do not include personal wiki data, raw sources, registry tokens, `.env` files, or +local MCP credentials in a PR. + +Full contributor guide: +https://gowtham0992.github.io/link/contributing.html diff --git a/LINK.md b/LINK.md index e4f11ee..e2f6106 100644 --- a/LINK.md +++ b/LINK.md @@ -1,6 +1,6 @@ -# Link — Personal Knowledge Wiki +# Link — Local Agent Memory -You are the maintainer of a personal knowledge wiki called **Link**. Your job is to read raw sources, compile them into structured Wikipedia-style articles, maintain cross-references, and keep the wiki healthy over time. The human curates sources and asks questions. You do everything else. +You are the maintainer of local agent memory called **Link**. Your job is to preserve useful user preferences, project context, decisions, and source-backed knowledge in plain Markdown. The wiki is the storage format; durable local memory is the product. ## Architecture @@ -15,6 +15,7 @@ link/ │ ├── sources/ ← one summary page per ingested source │ ├── concepts/ ← concept/topic articles │ ├── entities/ ← people, orgs, projects, tools +│ ├── memories/ ← user preferences, decisions, project facts │ ├── comparisons/ ← side-by-side analyses │ └── explorations/ ← filed-back query results ├── serve.py ← local Wikipedia-style web viewer @@ -159,6 +160,45 @@ Who or what is this entity? What are they known for? Why do they matter in this - [[source-page-1]] ``` +### Memory Page (`wiki/memories/`) + +Use memory pages for durable user preferences, project decisions, stable facts about the user's work, and context agents should recall across sessions. These are directly captured memories, not neutral encyclopedia articles. + +```markdown +--- +type: memory +title: "Short Memory Title" +memory_type: preference | decision | project | fact | note +scope: user | project | global +project: "optional-project-slug" +status: active | stale | archived +date_captured: "2026-04-09T14:30:00Z" +updated_at: "" +update_count: 0 +source: "manual | conversation | mcp | raw/source.md" +last_update_source: "" +review_status: pending | reviewed | needs_update +tags: [memory, relevant-tag] +--- + +# Short Memory Title + +> **TLDR:** One sentence explaining what future agents should remember. + +## Memory + +The durable memory, written clearly enough for a future agent to use without rereading the original chat. + +## Use This When + +- Situation where this memory should affect future work. +- Another situation where this memory is relevant. + +## Source + +Where the memory came from and why it is trustworthy. +``` + ### Comparison Page (`wiki/comparisons/`) ```markdown @@ -234,11 +274,45 @@ How the answer was derived. Which pages were consulted. What connections were ma ## Operations -### 1. Ingest +### 1. Remember + +When the human says to remember something, capture it as local memory. Prefer the built-in command when `link.py` is available: + +```bash +python3 link.py remember "User prefers release/* branches for Link work." . --type preference --scope project --tags git,release +``` + +Rules: +- Only save memories the human explicitly asks to remember or confirms should be remembered. +- Keep memories specific and actionable. "User likes quality" is too vague; "User prefers release/* branches over codex/* branches" is useful. +- Use `memory_type: preference` for user preferences, `decision` for choices made, `project` for project context, `fact` for stable facts, and `note` for everything else. +- Use `scope: user` for broad personal preferences, `project` for the current project, and `global` for agent-wide principles. +- For `scope: project`, include a project key when you know it. `link.py` infers this from repo-local installs; otherwise pass `--project ` or MCP `project`. +- At the start of a session or substantial task, run `python3 link.py brief "" .` or MCP `memory_brief` when available. Treat this as the default way to prime yourself with local memory, review warnings, and saved raw capture status. +- For long chat/session notes, prefer `python3 link.py capture-session "" .` or MCP `capture_session`; it stores the raw note locally and returns proposal-only memory candidates. If you do not need to keep the raw note, run `python3 link.py propose-memories "" .` or MCP `propose_memories` instead. Do not write proposals until the human confirms. +- Use `python3 link.py capture-inbox .` or MCP `capture_inbox` to review saved raw captures, secret warnings, and the exact accept/redact/delete commands before changing capture state. +- When the human approves a captured proposal, run `python3 link.py accept-capture "" . --index ` or MCP `accept_capture`. If it reports a duplicate or conflict, stop and ask whether to update/archive the existing memory instead. +- If capture results report `secret_warnings`, ask the human whether to redact the raw capture. Use `python3 link.py redact-capture "" .` or MCP `redact_capture`; it replaces secret-looking values and logs labels/counts only. +- If the human asks to remove a raw capture, run `python3 link.py delete-capture "" . --confirm` or MCP `delete_capture` with `confirm: true`. Never delete captures without explicit confirmation. +- Run `python3 link.py recall "" .` before answering questions that might depend on remembered preferences or project decisions. +- Run `python3 link.py memory-audit .` or MCP `memory_audit` when the human asks what needs attention in Link memory. +- Run `python3 link.py profile .` when the human asks what Link knows or when you need a quick overview of remembered preferences, decisions, and project context. +- Run `python3 link.py memory-inbox .` or MCP `memory_inbox` to find pending, stale, invalid, or underspecified memories and follow each item's primary action. Pass `--project ` or MCP `project` when reviewing a specific project. +- If `remember` reports a duplicate candidate, inspect it with `python3 link.py explain-memory "" .` and merge new information with `python3 link.py update-memory "" "new detail" .` instead of creating another one. Use `--allow-duplicate` only when the human confirms it should be separate. +- If `remember`, `update-memory`, or `propose-memories` reports conflict candidates, stop and ask the human whether the older memory should be updated, archived, or allowed to coexist. Use `--allow-conflict` only when the human confirms both memories are true in different contexts. +- After updating a memory, review it again with the human because `update-memory` resets `review_status` to `pending`. +- After the human confirms a memory is accurate, run `python3 link.py review-memory "" .`. +- Run `python3 link.py explain-memory "" .` when the human asks why an agent knows something or whether a memory is safe to use. +- If a memory is stale or wrong, archive it with `python3 link.py archive-memory "" . --reason "why"`. Do not delete memory pages unless the human explicitly asks for permanent removal. +- Before broad repair work or risky local wiki edits, create a local backup with `python3 link.py backup .` or MCP `backup_wiki`. Do not include `raw/` unless the human explicitly asks because raw sources and captures can contain sensitive material. +- If the human explicitly asks Link to permanently forget a memory, use `python3 link.py forget-memory "" . --confirm` or MCP `forget_memory` with `confirm: true`. Prefer archive when reversible cleanup is enough, and do not create a backup that preserves the memory unless the human explicitly asks for one. +- Restore an archived memory with `python3 link.py restore-memory "" .`. + +### 2. Ingest When the human adds a new source to `raw/` and asks you to process it: -0. Run `python3 link.py ingest-status .` when `link.py` is available to see pending raw files and current graph state +0. Run `python3 link.py ingest-status .` when `link.py` is available to see pending raw files, current graph state, and the suggested ingest workflow. If it reports `blocked_secrets` or secret warnings, stop and ask the human to redact the flagged raw file before reading or ingesting it. 1. Read the source completely 2. Discuss key takeaways with the human (brief, 3-5 bullet points) 3. Create a source page in `wiki/sources/` following the template @@ -257,32 +331,40 @@ When the human adds a new source to `raw/` and asks you to process it: - After updating a page, re-read it as a whole. If it no longer reads as a coherent article, restructure it before moving on. - Watch for page bloat: if a sub-topic is growing past 2-3 paragraphs within an article, it likely deserves its own page. Split proactively. - Conversely, a new page must have enough substance to stand alone. If you cannot write at least a meaningful TLDR + Overview, fold the information into an existing page instead. -- After ingest completes, rebuild `wiki/_backlinks.json` by scanning all `[[wikilinks]]` across the wiki. +- After ingest completes, rebuild `wiki/index.md` and `wiki/_backlinks.json` so both the human catalog and graph index match the pages. +- After rebuilding index/backlinks, run MCP `validate_wiki`, `python3 link.py validate .`, or `GET /api/validate` when available. Treat validation errors as blockers before reporting ingest complete. **Image ingest rules:** - Images in `raw/` (png, jpg, webp, gif, svg) are valid sources. Use vision to understand what the image IS. - Create a source page for the image just like any other source. Describe what you see. - Embed the image in the source page using: `![description](/raw/filename.png)` -- The web viewer serves `raw/` files directly, so image paths just work. +- The web viewer serves supported `raw/` image/PDF assets directly, so image paths just work without exposing every raw file type. - For screenshots: describe the UI, layout, key elements, purpose. - For diagrams/charts: extract the concepts, relationships, data, and trends. - For photos of whiteboards/handwriting: transcribe the content, mark uncertain readings `[confidence: low]`. - For tweets/posts as images: extract the text, author, and key claims. - Link extracted concepts to existing wiki pages, same as text sources. -### 2. Query +### 3. Query When the human asks a question: -1. **If `serve.py` is running:** call `GET /api/context?topic=` — returns the best matching page plus all related pages via graph traversal in one call. This is faster and uses fewer tokens than reading index.md manually. -2. **If server is not running:** read `wiki/index.md` to find relevant pages (check `also:` aliases for matches), then check `wiki/_backlinks.json` for pages that reference the topic. -3. Read the relevant pages and synthesize an answer. -4. Cite your sources with [[wiki-links]]. -5. Ask the human: "Want me to file this?" Answers that are comparisons should file as comparison pages, not explorations. Match the result to the right page type. -6. If yes, create a page in the appropriate directory following its template. -7. Append to `wiki/log.md`. - -### 3. Lint +1. If you are connecting to Link for the first time or troubleshooting setup, call MCP `link_status`, run `python3 link.py status . --validate`, or call `GET /api/status?validate=true`. +2. If the human asks what to try after installing Link, call MCP `starter_prompts`, run `python3 link.py prompts .`, or call `GET /api/prompts`. +3. If status reports a missing or old schema marker, run MCP `migrate_wiki` when available or `python3 link.py migrate .` before other writes. +4. If the user asks to ingest or says they dropped files into `raw/`, use MCP `ingest_status`, `python3 link.py ingest-status .`, or `GET /api/ingest-status` to get pending files, the guided ingest plan, and the next prompt/checks. If the state is `blocked_secrets`, do not read or ingest flagged raw files until the human redacts them. +5. Start with the smart query path when available: MCP `query_link`, `python3 link.py query "" .`, or `GET /api/query-link?q=`. This returns a compact context packet with relevant memory, ranked wiki results, graph context, provenance, selection reasons, budget reports, and follow-up tool actions. Use provenance fields to explain why Link knows something. Do not read the whole wiki unless the packet is insufficient; if it is budget-limited, use the returned `follow_up` action first. +6. If the question only needs session priming or personal/project preferences, use `python3 link.py brief "" .` or MCP `memory_brief`. Use `profile`/`memory_profile` and `recall`/`recall_memory` afterward only when you need deeper detail. +7. **If you need full source-backed context for one topic:** call `GET /api/context?topic=` or MCP `get_context` — returns the best matching page plus related pages via graph traversal. +8. **If you need graph orientation on a large wiki:** use `python3 link.py graph-summary "" .`, `GET /api/graph-summary?topic=`, or MCP `get_graph_summary` before requesting the full graph. +9. **If server/MCP is not available:** read `wiki/index.md` to find relevant pages (check `also:` aliases for matches), then check `wiki/_backlinks.json` for pages that reference the topic. +10. Read only the relevant pages or packet items and synthesize an answer. +11. Cite your sources with [[wiki-links]]. +12. Ask the human: "Want me to file this?" Answers that are comparisons should file as comparison pages, not explorations. Match the result to the right page type. +13. If yes, create a page in the appropriate directory following its template. +14. Append to `wiki/log.md`. + +### 4. Lint When the human asks you to health-check the wiki (or periodically on your own): @@ -294,7 +376,9 @@ python3 link.py doctor . Use `python3 link.py doctor . --fix` only for safe mechanical repairs: creating missing Link directories/files and rebuilding `_backlinks.json`. Do not use it as a substitute for content review. -Treat doctor errors as blockers. Doctor warnings are quality issues to triage with the human. It checks required structure, dead links, stale backlinks, index drift, TLDR/query summaries, Sources sections, `source_count` consistency, isolated graph pages, raw-source coverage, and secret-looking filenames or file contents. +Run `python3 link.py validate .` after ingest or large page edits. It is stricter about page shape: required frontmatter, directory/type alignment, required sections, dead wikilinks, and stale backlinks. + +Treat doctor errors as blockers. Doctor warnings are quality issues to triage with the human. It checks required structure, dead links, stale backlinks, index drift, TLDR/query summaries, Sources sections, `source_count` consistency, isolated graph pages, raw-source coverage, memory review state, raw capture backlog, and secret-looking filenames or file contents. Run these checks and report findings: @@ -313,10 +397,10 @@ Run these checks and report findings: For each finding, suggest a specific action. Then ask the human which ones to execute. -Rebuild `wiki/_backlinks.json` after executing fixes. Prefer `python3 link.py rebuild-backlinks .` when `link.py` is available; otherwise call `GET /api/rebuild-backlinks` on the local server or rebuild manually. Append lint results to `wiki/log.md`. +Create a local backup before broad repairs with `python3 link.py backup .` or MCP `backup_wiki`. Rebuild `wiki/index.md` and `wiki/_backlinks.json` after executing fixes. Prefer `python3 link.py rebuild-index .` and `python3 link.py rebuild-backlinks .` when `link.py` is available; otherwise call `POST /api/rebuild-index` and `POST /api/rebuild-backlinks` with JSON `{}` on the local server or rebuild manually. Append lint results to `wiki/log.md`. -### 4. Research +### 5. Research When the human wants to find or capture new source material for the wiki. Research has three modes based on where the material comes from. @@ -378,6 +462,9 @@ When the human wants to find or capture new source material for the wiki. Resear ### Category B - [[example-person]] — One-line description. growing · 4 sources +### memories +- [[example-preference]] — One-line memory summary. preference · user + ### Category C - [[example-project]] — One-line description. seed · 1 source @@ -479,20 +566,34 @@ Structure (current format): Used during query to find related pages, and during lint to detect orphans and backlink imbalances. -**Rebuilding:** Run `python3 link.py rebuild-backlinks .` when `link.py` is available. Otherwise call `GET /api/rebuild-backlinks` on the local server (if running), or scan all `[[wikilinks]]` manually and write the file. Always rebuild after ingest and lint. +**Rebuilding:** Run `python3 link.py rebuild-index .` and `python3 link.py rebuild-backlinks .` when `link.py` is available. Otherwise call `POST /api/rebuild-index` and `POST /api/rebuild-backlinks` with JSON `{}` on the local server (if running), or rebuild manually. Always rebuild both after ingest and lint. ## Local Server API -`serve.py` exposes a local HTTP API at `http://localhost:3000`: +`serve.py` exposes a local HTTP API at `http://127.0.0.1:3000`: | Endpoint | Description | |----------|-------------| -| `GET /api/pages` | All pages with title, type, tags, aliases, maturity, tldr | +| `GET /api/page-list?limit=100&offset=0` | Bounded page metadata list for agents and large wikis, with follow-up pagination actions | +| `GET /api/pages` | Full page metadata list for local UI/export use | +| `GET /api/status?validate=true` | Readiness summary with page/memory counts, optional validation summary, and safe next actions | +| `GET /api/ingest-status` | Raw ingest state with pending files, represented-source completion cards, safety summary, graph health, and next prompts/checks | +| `GET /api/memory-brief?q=&project=` | Startup memory context: relevant memories, review warnings, capture status, and safe rules | +| `POST /api/raw-source` | Header `X-Link-Local-Action: true`; save pasted source text under `raw/`, reject secret-looking values, and return the next ingest prompt | +| `POST /api/propose-memories` | Propose memories from JSON `{ "text": "..." }` without writing pages | +| `POST /api/review-memory` | Header `X-Link-Local-Action: true`; JSON `{ "memory": "name", "note": "optional" }`; mark a memory reviewed | +| `POST /api/archive-memory` | Header `X-Link-Local-Action: true`; JSON `{ "memory": "name", "reason": "optional" }`; archive a memory from default recall | +| `POST /api/restore-memory` | Header `X-Link-Local-Action: true`; JSON `{ "memory": "name" }`; restore archived memory to active recall | +| `GET /api/capture-inbox?project=` | Saved raw captures with redacted snippets, warnings, and commands | | `GET /api/search?q=` | Ranked search — title, alias, tag, fulltext. Returns scores + snippets | | `GET /api/context?topic=` | Best matching page + inbound/forward links in one call | -| `GET /api/graph` | All nodes + edges for graph visualization | +| `GET /api/graph-summary?topic=&limit=40&depth=1` | Bounded graph overview or topic neighborhood for agents and large wikis | +| `GET /api/graph` | All nodes + edges for graph visualization/export | +| `GET /api/page-links?page=&limit=100&offset=0` | Bounded inbound/forward links for one page, with follow-up pagination actions | | `GET /api/backlinks` | Reverse link index | -| `GET /api/rebuild-backlinks` | Rebuild `_backlinks.json` by scanning all wikilinks | +| `POST /api/rebuild-index` | JSON `{}`; regenerate `wiki/index.md` from current pages | +| `POST /api/rebuild-backlinks` | JSON `{}`; rebuild `_backlinks.json` by scanning all wikilinks | +| `GET /api/validate?strict=true` | Validate generated wiki pages; failed gates return HTTP 422 with structured findings | During query operations, prefer `/api/context?topic=X` over reading files manually — it returns the primary page plus all related pages via graph traversal in one call. diff --git a/README.md b/README.md index 33b5fe5..edb313f 100644 --- a/README.md +++ b/README.md @@ -1,62 +1,117 @@

- Link + Link

# Link -Local personal memory for LLM agents. +**Local, source-backed memory for LLM agents.** -Link turns raw sources into a local Markdown wiki that agents can search, cite, traverse, and maintain over time. It implements the [LLM Wiki pattern](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f): keep knowledge outside the chat window, make every claim inspectable, and let the memory compound. +Link gives Codex, Claude, Cursor, Kiro, VS Code, Copilot, and other MCP clients +the same durable memory about you and your work. It stays on your machine as +plain Markdown, with sources, backlinks, graph context, review state, and an +audit trail you can inspect. + +It follows Andrej Karpathy's +[LLM Wiki pattern](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f): +keep knowledge outside the chat window, make claims inspectable, and let context +compound over time. [![GitHub](https://img.shields.io/github/stars/gowtham0992/link?style=flat)](https://github.com/gowtham0992/link) [![CI](https://github.com/gowtham0992/link/actions/workflows/ci.yml/badge.svg)](https://github.com/gowtham0992/link/actions/workflows/ci.yml) [![MCP Registry](https://img.shields.io/badge/MCP_Registry-io.github.gowtham0992%2Flink-blue)](https://registry.modelcontextprotocol.io/?q=io.github.gowtham0992%2Flink) [![PyPI](https://img.shields.io/pypi/v/link-mcp)](https://pypi.org/project/link-mcp/) -Release notes: [CHANGELOG.md](CHANGELOG.md) +[Product site](https://gowtham0992.github.io/link/) · +[First 10 minutes](https://gowtham0992.github.io/link/getting-started.html) · +[Why Link?](https://gowtham0992.github.io/link/why-link.html) · +[Web UI](https://gowtham0992.github.io/link/ui.html) · +[MCP setup](https://gowtham0992.github.io/link/mcp.html) · +[CLI](https://gowtham0992.github.io/link/cli.html) · +[Security](SECURITY.md) · +[Changelog](CHANGELOG.md) + +## Why It Exists + +Most agent sessions start from zero. You re-explain preferences, repo decisions, +project constraints, and why something matters. Link turns that repeated context +into local memory agents can query. + +| Pain | Link's answer | +|------|---------------| +| Agents forget you between sessions. | Save reviewed preferences, decisions, facts, and project context. | +| Notes are private or messy. | Keep raw sources local, then turn them into source-backed Markdown. | +| Context windows are expensive. | Return compact query packets with provenance and follow-up actions. | +| Memory needs trust. | Every page and memory can be inspected, reviewed, archived, or forgotten. | ## Quick Start -Try a finished, pre-ingested wiki before touching your own files: +Run the demo first. It creates a complete local wiki with raw sources, wiki +pages, one starter memory, graph data, and query packets ready to inspect. ```bash git clone https://github.com/gowtham0992/link.git cd link python3 link.py demo -cd link-demo -python3 serve.py +python3 link.py serve link-demo ``` Open: -- `http://localhost:3000` -- `http://localhost:3000/graph` - -The demo shows the full loop: raw notes, source pages, concept pages, backlinks, graph context, search, and MCP-ready retrieval. - -Check the demo: - -```bash -python3 link.py ingest-status . -python3 link.py doctor . +```text +http://127.0.0.1:3000 +http://127.0.0.1:3000/graph ``` -## First 10 Minutes - -Use this path to turn one real note into local agent memory. +The web viewer is for local use only. It binds to `127.0.0.1`, has no user +accounts or authentication, and should not be exposed to the internet unless you +add your own auth layer. -### 1. Create your wiki - -From the cloned `link/` checkout, not `link-demo/`: +Try the value loop: ```bash -bash integrations/kiro/install.sh +python3 link.py query "why does Link help agents?" link-demo --budget small +python3 link.py brief "working on agent memory" link-demo +python3 link.py benchmark "agent memory" link-demo +python3 link.py status --validate link-demo ``` -Use the installer for your agent if it is not Kiro: +The generated demo is the public proof wiki. The repo's root `wiki/` directory +is only a scaffold for local development and personal testing. Generated content +inside `wiki/`, `raw/`, and `link-demo/` is ignored by git so personal memory is +not published by accident. + +## Three Ways To Use Link + +Pick the surface that matches how you work. They all read and write the same +local Markdown wiki. + + + + + + + +
+ Web UI
+ Review memory, ingest, graph, audits, captures, and explanations in a local viewer.

+ Link web UI walkthrough +
+ CLI
+ Script readiness, query packets, briefs, validation, backup, benchmark, and repair.

+ Link CLI walkthrough +
+ MCP
+ Let Codex, Claude, Cursor, Kiro, VS Code, Copilot, and other agents recall memory.

+ Link MCP agent walkthrough +
+ +## Install For Your Agent + +Run one installer from the cloned checkout: ```bash bash integrations/codex/install.sh +bash integrations/kiro/install.sh bash integrations/claude-code/install.sh bash integrations/cursor/install.sh bash integrations/copilot/install.sh @@ -64,92 +119,23 @@ bash integrations/vscode/install.sh bash integrations/antigravity/install.sh ``` -This creates `~/link/`, installs or upgrades `link-mcp`, and writes lightweight agent instructions. Your wiki data is left alone on reinstall. +Installers create or update `~/link`, install or upgrade `link-mcp`, write +lightweight agent instructions, and preserve existing wiki data on reinstall. +Use `--project` when a repo needs separate project memory. -### 2. Add one source - -```bash -cat > ~/link/raw/first-memory.md <<'EOF' ---- -title: "First Link memory" -source_type: note -date_captured: 2026-05-04 ---- - -# First Link memory - -I am testing Link as local personal memory for agents. -Raw notes stay local. The agent turns them into source-cited wiki pages. -EOF -``` - -Check what is pending: - -```bash -python3 ~/link/link.py ingest-status ~/link -``` - -### 3. Ask your agent to ingest it - -In your agent chat: +Then ask your agent: ```text -ingest raw/first-memory.md into Link -``` - -The agent reads `~/link/LINK.md`, creates a source page under `wiki/sources/`, creates or updates concept/entity pages, updates `wiki/index.md`, appends `wiki/log.md`, and rebuilds backlinks. - -### 4. Verify the loop - -```bash -python3 ~/link/link.py doctor ~/link --fix -python3 ~/link/link.py ingest-status ~/link -python3 ~/link/link.py verify-mcp ~/link -``` - -Then ask your MCP-enabled agent: - -```text -query Link for first Link memory -``` - -If the agent answers from Link, the local memory loop is working. - -## Choose Your Path - -### I want to try Link - -Use the demo: - -```bash -python3 link.py demo -cd link-demo -python3 serve.py -``` - -### I want my agent to use Link - -Run the installer for your agent: - -```bash -bash integrations/kiro/install.sh # Kiro -bash integrations/claude-code/install.sh # Claude Code -bash integrations/codex/install.sh # Codex -bash integrations/cursor/install.sh # Cursor -bash integrations/copilot/install.sh # Copilot -bash integrations/vscode/install.sh # VS Code -bash integrations/antigravity/install.sh # Google Antigravity +is Link ready? +brief me from Link before we continue +ingest raw/notes.md into Link +remember that I prefer short release notes +query Link for the release process +what does Link remember about local personal memory? ``` -For project-specific memory instead of global `~/link`, add `--project`. - -To update after `git pull`, rerun the same installer. It refreshes code and instructions without replacing your wiki pages. - -The installers try the current `python3` first. If that Python is externally managed, they install `link-mcp` into `~/.link-mcp-venv` and register MCP with that venv Python. - -### I want MCP only - -Install `link-mcp` and point it at a wiki: +
+MCP-only install ```bash python3 -m pip install --upgrade link-mcp @@ -166,127 +152,66 @@ python3 -m pip install --upgrade link-mcp } ``` -On macOS/Homebrew Python, if pip reports `externally-managed-environment`, use a dedicated venv: +On macOS/Homebrew Python, if pip reports `externally-managed-environment`, use a +dedicated venv: ```bash python3 -m venv ~/.link-mcp-venv ~/.link-mcp-venv/bin/python -m pip install --upgrade pip link-mcp ``` -Then use that Python in your MCP config: +Full setup: [MCP guide](https://gowtham0992.github.io/link/mcp.html). +
-```json -{ - "mcpServers": { - "link": { - "command": "/Users/YOU/.link-mcp-venv/bin/python", - "args": ["-m", "link_mcp", "--wiki", "/Users/YOU/link/wiki"] - } - } -} -``` - -### I want to develop or release Link - -```bash -python3 -m unittest discover -s tests -python3 scripts/check_release_hygiene.py -python3 scripts/prepare_release.py 1.0.6 --dry-run -``` - -Release flow details are lower in this document. +## How Link Works -## Core Concepts - -| Concept | Meaning | -|---------|---------| -| `raw/` | Immutable sources: notes, papers, articles, transcripts, images, PDFs. | -| `wiki/` | Agent-maintained Markdown memory compiled from sources. | -| Source pages | One page per ingested source, stored under `wiki/sources/`. | -| Concept/entity pages | Synthesized knowledge pages with source citations and confidence tags. | -| `_backlinks.json` | Reverse and forward link index used by search, graph, HTTP API, and MCP. | -| `log.md` | Append-only audit trail of ingest, query, lint, and maintenance operations. | - -You curate sources and ask questions. The LLM writes and maintains the wiki. - -## Daily Workflow - -Add source material: - -```bash -cp notes.md ~/link/raw/ -python3 ~/link/link.py ingest-status ~/link -``` - -Ask your agent: +Link has one simple rule: ```text -ingest raw/notes.md into Link -``` - -Maintain the wiki: - -```bash -python3 ~/link/link.py doctor ~/link --fix -python3 ~/link/link.py rebuild-backlinks ~/link -python3 ~/link/link.py verify-mcp ~/link +Sources become wiki knowledge. +Explicit "remember" becomes agent memory. +Queries use both. ``` -View the wiki: - -```bash -cd ~/link -python3 serve.py -``` - -Open `http://localhost:3000`. - -Obsidian also works: open the `wiki/` folder as a vault. - -## Local Commands +The storage model is plain and inspectable: -| Command | What it does | -|---------|-------------| -| `python3 link.py demo` | Create `./link-demo` with a pre-ingested sample wiki. | -| `python3 link.py ingest-status ` | Show pending raw files and graph index status. | -| `python3 link.py doctor ` | Check structure, graph health, source hygiene, and secret-looking content. | -| `python3 link.py doctor --fix` | Create missing structure and repair backlinks safely. | -| `python3 link.py rebuild-backlinks ` | Regenerate `wiki/_backlinks.json`. | -| `python3 link.py verify-mcp ` | Verify `link-mcp` import and print MCP config. | +| Layer | What lives there | +|-------|------------------| +| `raw/` | Original notes, transcripts, articles, PDFs, screenshots, and project files. | +| `wiki/` | Source-backed pages, concepts, entities, explorations, comparisons, and memories. | +| MCP tools | Compact packets agents can use without dumping the whole wiki into context. | -## MCP Server +If a raw file was already ingested and later edited, `link ingest-status` marks it +as stale and tells your agent to refresh the existing source page instead of +creating a duplicate. -Link is listed on the [official MCP Registry](https://registry.modelcontextprotocol.io/?q=io.github.gowtham0992%2Flink) as `io.github.gowtham0992/link`. +## What Agents Get -Available tools: +- `query_link`: an answer-ready packet with relevant memories, pages, graph + neighborhood, reasons for selection, budget limits, and follow-up actions. +- `memory_brief`: a compact pre-work brief with user/project preferences, + active context, review warnings, and safe memory-use rules. +- `ingest_status`: exact next steps for raw files, including source safety, + stale ingest detection, validation, and memory proposal guidance. +- `remember_memory`: durable local memory with duplicate/conflict checks, + review state, provenance, and audit logging. +- `explain_memory`: why a memory exists, what it links to, whether it is ready + for recall, and what needs review. -| Tool | Description | -|------|-------------| -| `search_wiki` | Ranked search by title, alias, tag, and full text. Returns scores and snippets. | -| `get_context` | Primary tool. Returns the best page plus inbound and forward graph neighbors. | -| `get_pages` | Lists pages with metadata. Filter by category, type, or maturity. | -| `get_backlinks` | Returns inbound and forward links for one page. | -| `get_graph` | Returns all nodes and edges for graph reasoning. | -| `rebuild_backlinks` | Rebuilds `_backlinks.json` after ingest or maintenance. | +## Agent Contract -Use `get_context` for answering questions. It gives the agent the primary page plus its graph neighborhood in one call. +Agents should use Link in this order: -## HTTP API +1. `link_status` to check readiness and safe next actions. +2. `starter_prompts` when the user asks what to try first. +3. `ingest_status` before touching raw sources. +4. `query_link` for compact answer-ready context. +5. `memory_brief` before longer work. +6. `get_graph_summary` when graph context is useful but the full graph would be noisy. +7. `backup_wiki` before broad repair or migration work. +8. `validate_wiki` after ingest or broad wiki edits. -`serve.py` exposes the same local memory over HTTP while the web viewer is running. - -Local use only: `serve.py` binds to `127.0.0.1` and has no authentication. Do not expose it to the internet without adding auth. - -| Endpoint | Description | -|----------|-------------| -| `GET /api/pages` | All pages with title, type, tags, aliases, maturity, and TLDR. | -| `GET /api/search?q=` | Ranked search by title, alias, tag, TLDR, and full text. | -| `GET /api/context?topic=` | Best matching page plus inbound and forward graph links. | -| `GET /api/graph` | Nodes and edges for graph visualization. | -| `GET /api/backlinks` | Reverse and forward link index. | -| `GET /api/rebuild-backlinks` | Rebuild `_backlinks.json` by scanning wikilinks. | - -Search uses an in-memory token index. `/api/context` is the main endpoint for agents that need a topic and its surrounding graph. +Full MCP tool list: [MCP setup](https://gowtham0992.github.io/link/mcp.html). ## Privacy And Safety @@ -296,84 +221,57 @@ Link is local-first: - No hosted backend. - No external API calls from `serve.py` or `link-mcp`. - Raw sources and generated wiki pages are ignored by git by default. -- Registry token files and common secret-looking files are ignored and checked by release hygiene. +- `link backup` excludes `raw/` unless you explicitly pass `--include-raw`. +- Secret-looking values are detected in raw sources, captures, and release + hygiene checks. +- The local web server binds to `127.0.0.1` and is not meant to be exposed to + the internet without additional auth. Before sharing a repo, demo, or wiki: ```bash -python3 link.py doctor . +python3 link.py doctor +python3 link.py validate python3 scripts/check_release_hygiene.py ``` -Treat `doctor` errors as blockers. Warnings usually mean quality work: missing summaries, missing source sections, stale source counts, isolated pages, or raw files not represented in source pages. +More detail: [Security guide](https://gowtham0992.github.io/link/security.html). -Use `git push`, `git archive`, or clean build artifacts for public sharing. Do not zip a whole working directory; ignored local files, `.git/`, caches, raw sources, and build outputs can be included by accident. +## Documentation -## Develop And Release +| Need | Go here | +|------|---------| +| Run Link for the first time | [First 10 minutes](https://gowtham0992.github.io/link/getting-started.html) | +| Decide whether Link fits | [Why Link?](https://gowtham0992.github.io/link/why-link.html) | +| Use the local viewer | [Web UI](https://gowtham0992.github.io/link/ui.html) | +| Understand raw/wiki/memory | [Concepts](https://gowtham0992.github.io/link/concepts.html) | +| Configure MCP | [MCP setup](https://gowtham0992.github.io/link/mcp.html) | +| Find a command | [CLI reference](https://gowtham0992.github.io/link/cli.html) | +| Use local HTTP endpoints | [HTTP API](https://gowtham0992.github.io/link/api.html) | +| Review security boundaries | [Security model](https://gowtham0992.github.io/link/security.html) | +| Fix setup issues | [Troubleshooting](https://gowtham0992.github.io/link/troubleshooting.html) | -Run the local gate: +## Contributing + +Contributions should come through pull requests targeting `main`. The `develop` +branch is a maintainer integration branch for larger release work before it is +proposed to `main`. + +Before opening a PR: ```bash +python3 -m ruff check . python3 -m unittest discover -s tests -python3 -m py_compile link.py serve.py scripts/check_release_hygiene.py scripts/prepare_release.py mcp_package/link_mcp/server.py python3 scripts/check_release_hygiene.py -bash -n integrations/*/install.sh integrations/*/uninstall.sh integrations/_shared/*.sh +python3 scripts/check_runtime_duplication.py +python3 scripts/check_tool_contract.py git diff --check ``` -Prepare release files: - -```bash -python3 scripts/prepare_release.py 1.0.6 -``` - -This bumps the MCP version files and moves `CHANGELOG.md` `Unreleased` notes into a dated version section. - -After the release PR merges and CI passes: - -```bash -git switch main -git pull --ff-only -git tag -a v1.0.6 -m "v1.0.6" -git push origin v1.0.6 -cd mcp_package -python3 -c "from pathlib import Path; import shutil; shutil.rmtree('dist', ignore_errors=True); [shutil.rmtree(p, ignore_errors=True) for p in Path('.').glob('*.egg-info')]" -python3 -m build -python3 -m twine check dist/* -TWINE_USERNAME=__token__ python3 -m twine upload dist/* -mcp-publisher publish -``` - -Never reuse a published PyPI version or move a public release tag. If a release needs another fix, bump to the next version. - -## Project Structure - -```text -link/ -├── LINK.md # schema and instructions for agents -├── raw/ # source documents, ignored by git -├── wiki/ # compiled knowledge, ignored by git except scaffolding -│ ├── index.md # master catalog -│ ├── _backlinks.json # reverse and forward link index -│ ├── log.md # append-only operation history -│ ├── sources/ # one page per ingested source -│ ├── concepts/ # topic articles -│ ├── entities/ # people, orgs, projects -│ ├── comparisons/ # side-by-side analyses -│ └── explorations/ # filed query results -├── integrations/ # one-step setup per AI tool -├── mcp_package/ # PyPI package for link-mcp -├── scripts/ # release and hygiene tooling -├── serve.py # local web viewer and HTTP API -└── link.py # local utility CLI -``` +Full contributor guide: [Contributing](https://gowtham0992.github.io/link/contributing.html). -## Design Principles +Do not include personal wiki data, raw sources, registry tokens, `.env` files, or +local MCP credentials in a PR. -- Every claim links to a source. -- Confidence tags make uncertainty visible. -- `log.md` records wiki operations. -- Pages mature from seed to established. -- Agents should use `/api/context` or MCP `get_context` before reading files manually. -- The local web viewer has no runtime dependencies beyond Python stdlib. -- The wiki is plain Markdown, so it works with git, Obsidian, and normal editors. +If Link helps your agents remember better, [star it on GitHub](https://github.com/gowtham0992/link) +so more people can find it. diff --git a/SECURITY.md b/SECURITY.md index a35e0c2..1ed73fa 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,13 +3,35 @@ ## Local-first threat model Link is designed for local personal knowledge management. `serve.py` binds to -`127.0.0.1` and has no authentication, so it should not be exposed directly to -the public internet. +`127.0.0.1`, rejects host/bind flags, and rejects unexpected `Host` headers +outside `localhost`/`127.0.0.1`. It has no authentication, so it should not be +exposed directly to the public internet. + +Local write APIs also require the `X-Link-Local-Action` header. When a browser +supplies `Origin` or `Referer`, Link accepts local mutations only from +`localhost` or `127.0.0.1`. Local mutation endpoints are also rate-limited in +memory so a runaway local client receives JSON `429` responses instead of +unbounded writes. Link does not grant browser CORS access; preflight requests +receive local JSON `405` responses without `Access-Control-Allow-Origin`. +The local viewer sends a Content Security Policy that limits scripts, +connections, images, and framing to local-safe sources. It also sends browser +isolation and permissions-policy headers. HTML pages, JSON API responses, and +served local static/raw files use `Cache-Control: no-store` plus legacy +`Pragma`/`Expires` cache guards because they can contain personal memory +snippets or source media. The server and MCP package do not call external APIs, send telemetry, or require secrets. Raw sources and generated wiki pages are user data and are ignored by git by default. +Release hygiene fails if tracked Python or shell runtime code adds common +outbound HTTP clients or direct `curl`/`wget` calls. This keeps Link's +local-first promise testable instead of only documented. + +`link ingest-status` and MCP `ingest_status` scan raw source files locally for +secret-looking values. If a pending raw file is flagged, Link withholds the +normal ingest prompt until the file is redacted. + ## Sensitive files Do not commit: diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/.nojekyll @@ -0,0 +1 @@ + diff --git a/docs/api.html b/docs/api.html new file mode 100644 index 0000000..c8d6e69 --- /dev/null +++ b/docs/api.html @@ -0,0 +1,100 @@ + + + + + + + Link - HTTP API + + + + + +
+
+ Local HTTP +

HTTP endpoints for the local viewer.

+

serve.py exposes Link locally while the web viewer is running. It is a loopback-only API for personal tooling, not a hosted service.

+
+
+ +
+
+ +
+

Boundary

+

serve.py binds to 127.0.0.1, rejects unsafe host/bind flags and unexpected Host headers, and has no authentication. Do not expose it to the internet without adding auth.

+

Write actions require X-Link-Local-Action: true. Responses include X-Link-API-Version.

+ +

Read Endpoints

+
GET /api/status?validate=true
+GET /api/prompts?project=slug
+GET /api/ingest-status
+GET /api/page-list?limit=100&offset=0
+GET /api/pages
+GET /api/memory-dashboard?project=slug
+GET /api/memory-brief?q=task&project=slug
+GET /api/memory-audit?project=slug
+GET /api/memory-profile?project=slug
+GET /api/memory-inbox?project=slug
+GET /api/capture-inbox?project=slug
+GET /api/explain-memory?memory=name
+GET /api/query-link?q=query&budget=small|medium|large
+GET /api/validate?strict=true
+GET /api/proposal-sources
+GET /api/proposal-source?path=raw/file.md
+GET /api/search?q=query
+GET /api/context?topic=topic
+GET /api/graph-summary?topic=topic&limit=40&depth=1
+GET /api/graph
+GET /api/page-links?page=name&limit=100&offset=0
+ +

Write Endpoints

+
POST /api/raw-source
+POST /api/propose-memories
+POST /api/remember-memory
+POST /api/update-memory
+POST /api/review-memory
+POST /api/archive-memory
+POST /api/restore-memory
+POST /api/rebuild-backlinks
+POST /api/rebuild-index
+

Web memory approval APIs intentionally do not honor duplicate/conflict override flags. If Link reports a duplicate or conflict, review the existing memory and use the CLI or MCP tool explicitly after deciding what should coexist.

+ +

Large Wiki Endpoints

+

Agents and integrations should prefer bounded endpoints over full dumps:

+
    +
  • /api/query-link for answer-ready context.
  • +
  • /api/page-list instead of /api/pages when paging matters.
  • +
  • /api/graph-summary instead of /api/graph for agent context.
  • +
  • /api/page-links for paginated inbound and forward links.
  • +
+
+
+
+ + + + diff --git a/docs/assets/link-brief-dark.png b/docs/assets/link-brief-dark.png new file mode 100644 index 0000000..542c908 Binary files /dev/null and b/docs/assets/link-brief-dark.png differ diff --git a/docs/assets/link-cli-tour.gif b/docs/assets/link-cli-tour.gif new file mode 100644 index 0000000..10628a1 Binary files /dev/null and b/docs/assets/link-cli-tour.gif differ diff --git a/docs/assets/link-explain-memory-dark.png b/docs/assets/link-explain-memory-dark.png new file mode 100644 index 0000000..43daa95 Binary files /dev/null and b/docs/assets/link-explain-memory-dark.png differ diff --git a/docs/assets/link-graph-dark.png b/docs/assets/link-graph-dark.png new file mode 100644 index 0000000..e9b2034 Binary files /dev/null and b/docs/assets/link-graph-dark.png differ diff --git a/docs/assets/link-home-dark.png b/docs/assets/link-home-dark.png new file mode 100644 index 0000000..e3260f3 Binary files /dev/null and b/docs/assets/link-home-dark.png differ diff --git a/docs/assets/link-ingest-dark.png b/docs/assets/link-ingest-dark.png new file mode 100644 index 0000000..2d0d2b9 Binary files /dev/null and b/docs/assets/link-ingest-dark.png differ diff --git a/docs/assets/link-mcp-agent-chat.gif b/docs/assets/link-mcp-agent-chat.gif new file mode 100644 index 0000000..25d6080 Binary files /dev/null and b/docs/assets/link-mcp-agent-chat.gif differ diff --git a/docs/assets/link-memory-dashboard-dark.png b/docs/assets/link-memory-dashboard-dark.png new file mode 100644 index 0000000..8e119f4 Binary files /dev/null and b/docs/assets/link-memory-dashboard-dark.png differ diff --git a/docs/assets/link-product-tour-dark.gif b/docs/assets/link-product-tour-dark.gif new file mode 100644 index 0000000..bc47936 Binary files /dev/null and b/docs/assets/link-product-tour-dark.gif differ diff --git a/docs/assets/link-ui-tour.gif b/docs/assets/link-ui-tour.gif new file mode 100644 index 0000000..bc47936 Binary files /dev/null and b/docs/assets/link-ui-tour.gif differ diff --git a/docs/assets/logo.svg b/docs/assets/logo.svg new file mode 100644 index 0000000..b86a888 --- /dev/null +++ b/docs/assets/logo.svg @@ -0,0 +1,71 @@ + + Link logo + A local memory graph mark formed from connected nodes in the shape of an L. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/assets/site.css b/docs/assets/site.css new file mode 100644 index 0000000..5518152 --- /dev/null +++ b/docs/assets/site.css @@ -0,0 +1,582 @@ +:root { + color-scheme: light; + --paper: #fff4b8; + --paper-2: #fffdf1; + --ink: #17120c; + --muted: #5f5549; + --border: #17120c; + --green: #54c79d; + --blue: #5b8cff; + --red: #ff6d5f; + --purple: #a87cff; + --yellow: #ffd342; + --black: #0b0b0b; + --shadow: 6px 6px 0 var(--border); + font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; +} + +* { box-sizing: border-box; } +html { scroll-behavior: smooth; } +body { + margin: 0; + color: var(--ink); + background: + linear-gradient(rgba(23, 18, 12, 0.035) 1px, transparent 1px), + linear-gradient(90deg, rgba(23, 18, 12, 0.035) 1px, transparent 1px), + var(--paper); + background-size: 22px 22px; + line-height: 1.55; +} + +a { color: inherit; } + +.site-nav { + position: sticky; + z-index: 10; + top: 0; + display: flex; + align-items: center; + justify-content: space-between; + gap: 24px; + padding: 14px clamp(18px, 4vw, 54px); + background: var(--paper-2); + border-bottom: 3px solid var(--border); +} + +.brand { + display: flex; + align-items: center; + gap: 10px; + font-family: Georgia, serif; + font-size: 28px; + font-weight: 800; + letter-spacing: 0; + text-decoration: none; + white-space: nowrap; +} + +.brand img { + width: 34px; + height: 34px; + border: 2px solid var(--border); + background: var(--black); +} + +.nav-links { + display: flex; + align-items: center; + flex-wrap: wrap; + gap: 8px; + color: var(--ink); + font-size: 14px; + font-weight: 800; +} + +.nav-links a { + padding: 6px 10px; + border: 2px solid transparent; + text-decoration: none; +} + +.nav-links a:hover, +.nav-links a[aria-current="page"] { + background: var(--yellow); + border-color: var(--border); +} + +.hero { + position: relative; + overflow: hidden; + min-height: 88vh; + display: grid; + align-items: end; + padding: 74px clamp(20px, 5vw, 72px) 46px; + background: + linear-gradient(rgba(23, 18, 12, 0.055) 1px, transparent 1px), + linear-gradient(90deg, rgba(23, 18, 12, 0.055) 1px, transparent 1px), + linear-gradient(135deg, #fff9d1 0%, #fff4b8 58%, #ffd342 100%); + background-size: 28px 28px, 28px 28px, auto; + border-bottom: 5px solid var(--border); +} + +.hero-copy { + position: relative; + z-index: 1; + max-width: 960px; +} + +.page-hero { + padding: 70px clamp(20px, 5vw, 72px) 52px; + border-bottom: 5px solid var(--border); + background: + linear-gradient(rgba(23, 18, 12, 0.055) 1px, transparent 1px), + linear-gradient(90deg, rgba(23, 18, 12, 0.055) 1px, transparent 1px), + linear-gradient(135deg, #fffdf1 0%, #fff4b8 76%, #ffd342 100%); + background-size: 28px 28px, 28px 28px, auto; +} + +.hero-inner, +.section-inner { + max-width: 1160px; + margin: 0 auto; +} + +.eyebrow, +.section-kicker { + display: inline-flex; + align-items: center; + gap: 8px; + margin: 0 0 16px; + padding: 7px 10px; + background: var(--green); + border: 2px solid var(--border); + color: var(--ink); + font-size: 13px; + font-weight: 900; + text-transform: uppercase; + letter-spacing: 0.08em; + box-shadow: 3px 3px 0 var(--border); +} + +.section-kicker { + background: var(--black); + color: var(--paper-2); + font-family: "SFMono-Regular", Consolas, monospace; + font-size: 12px; +} + +h1, +h2 { + margin: 0; + font-family: Georgia, "Times New Roman", serif; + letter-spacing: 0; +} + +h1 { + max-width: 900px; + font-size: clamp(46px, 6.2vw, 84px); + line-height: 0.94; +} + +h2 { + max-width: 820px; + margin-bottom: 18px; + font-size: clamp(32px, 5vw, 58px); + line-height: 1.03; +} + +h3 { + margin: 0 0 10px; + font-size: 19px; +} + +.hero p, +.page-hero p { + max-width: 760px; + margin: 24px 0 0; + color: var(--ink); + font-size: clamp(18px, 2vw, 24px); + font-weight: 650; +} + +.lead { + max-width: 780px; + margin: 0; + color: var(--muted); + font-size: 19px; + font-weight: 560; +} + +.hero-notes, +.actions { + display: flex; + flex-wrap: wrap; + gap: 8px; + margin: 22px 0 0; + padding: 0; + list-style: none; +} + +.hero-notes li, +.chip { + padding: 7px 10px; + background: var(--paper-2); + border: 2px solid var(--border); + font-family: "SFMono-Regular", Consolas, monospace; + font-size: 13px; + font-weight: 800; +} + +.actions { + gap: 12px; + margin-top: 28px; +} + +.button { + display: inline-flex; + align-items: center; + justify-content: center; + min-height: 46px; + padding: 0 18px; + border: 3px solid var(--border); + background: var(--paper-2); + color: var(--ink); + text-decoration: none; + font-weight: 900; + box-shadow: 4px 4px 0 var(--border); + transition: transform 120ms ease, box-shadow 120ms ease; +} + +.button:hover { + transform: translate(2px, 2px); + box-shadow: 2px 2px 0 var(--border); +} + +.button.primary { + background: var(--yellow); + border-color: var(--border); + color: var(--ink); +} + +main { background: transparent; } + +section, +.doc-section { + scroll-margin-top: 82px; + padding: 74px clamp(20px, 5vw, 72px); + border-bottom: 4px solid var(--border); +} + +section:nth-child(2n), +.doc-section.alt { background: var(--paper-2); } + +.grid { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: 16px; + margin-top: 34px; +} + +.grid.two-up { + grid-template-columns: repeat(2, minmax(0, 1fr)); +} + +.feature, +.panel { + min-height: 198px; + padding: 20px; + border: 3px solid var(--border); + background: var(--paper-2); + box-shadow: var(--shadow); +} + +.feature:nth-child(3n + 1) { background: #dff8ef; } +.feature:nth-child(3n + 2) { background: #e8edff; } +.feature:nth-child(3n + 3) { background: #ffe1dc; } + +.feature p, +.panel p, +.panel li { + color: var(--ink); + font-size: 15px; +} + +.flow { + display: grid; + grid-template-columns: 0.9fr 1.1fr; + gap: 30px; + align-items: center; + margin-top: 34px; +} + +.screenshot-row { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 22px; + margin-top: 34px; +} + +.media-grid { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: 20px; + margin-top: 34px; +} + +.media-card { + padding: 12px; + border: 3px solid var(--border); + background: var(--paper-2); + box-shadow: var(--shadow); +} + +.media-card img { + width: 100%; + display: block; + border: 3px solid var(--border); + background: var(--black); +} + +.media-card h3 { + margin: 16px 8px 8px; +} + +.media-card p { + margin: 0 8px 10px; + color: var(--ink); + font-size: 15px; +} + +.flow img, +.screenshot-row img { + width: 100%; + display: block; + border: 4px solid var(--border); + background: var(--black); + box-shadow: var(--shadow); +} + +.screenshot-card { + margin: 0; + padding: 12px; + border: 3px solid var(--border); + background: var(--paper-2); + box-shadow: var(--shadow); + overflow: hidden; +} + +.screenshot-card img { + width: 100%; + display: block; + border-width: 3px; + box-shadow: none; +} + +.media-wide { + max-width: 900px; + margin: 0 0 28px; +} + +.doc-content > .screenshot-card { + max-width: 900px; +} + +figcaption { + margin-top: 10px; + font-family: "SFMono-Regular", Consolas, monospace; + font-size: 12px; + font-weight: 800; + color: var(--muted); +} + +pre { + overflow-x: auto; + margin: 0; + padding: 20px; + background: var(--black); + border: 4px solid var(--border); + color: #f7f0d8; + font-size: 14px; + line-height: 1.6; + box-shadow: var(--shadow); +} + +code { font-family: "SFMono-Regular", Consolas, monospace; } + +.compare, +.doc-table, +.choice-table { + display: grid; + grid-template-columns: repeat(4, minmax(0, 1fr)); + gap: 0; + margin-top: 34px; + border: 3px solid var(--border); + background: var(--border); + box-shadow: var(--shadow); +} + +.compare div, +.doc-table div, +.choice-table div { + background: var(--paper-2); + padding: 18px; + border-right: 2px solid var(--border); +} + +.compare div:last-child, +.doc-table div:last-child { border-right: 0; } + +.compare strong, +.doc-table strong, +.choice-table strong { + display: block; + margin-bottom: 8px; + color: var(--red); + font-family: "SFMono-Regular", Consolas, monospace; + text-transform: uppercase; + font-size: 12px; +} + +.choice-table { + grid-template-columns: 1fr; +} + +.choice-table div { + display: grid; + grid-template-columns: 0.75fr 1fr 1fr; + gap: 0; + padding: 0; + border-right: 0; + border-bottom: 2px solid var(--border); +} + +.choice-table div:last-child { + border-bottom: 0; +} + +.choice-table span, +.choice-table strong { + margin: 0; + padding: 16px; + border-right: 2px solid var(--border); +} + +.choice-table span:last-child, +.choice-table strong:last-child { + border-right: 0; +} + +.choice-table span:first-child { + font-weight: 900; +} + +.doc-grid { + display: grid; + grid-template-columns: minmax(0, 260px) minmax(0, 1fr); + gap: 34px; + align-items: start; +} + +.toc { + position: sticky; + top: 92px; + padding: 18px; + border: 3px solid var(--border); + background: #e8edff; + box-shadow: var(--shadow); +} + +.toc strong { + display: block; + margin-bottom: 10px; + font-family: "SFMono-Regular", Consolas, monospace; + font-size: 12px; + text-transform: uppercase; +} + +.toc a { + display: block; + margin: 8px 0; + font-weight: 800; +} + +.doc-content { + min-width: 0; +} + +.doc-content h2 { + margin-top: 44px; +} + +.doc-content h2:first-child { + margin-top: 0; +} + +.doc-content p, +.doc-content li { + max-width: 820px; +} + +.callout { + margin: 24px 0; + padding: 18px; + border: 3px solid var(--border); + background: #dff8ef; + box-shadow: var(--shadow); +} + +.callout strong { + display: block; + margin-bottom: 6px; +} + +footer { + padding: 42px clamp(20px, 5vw, 72px); + color: var(--paper-2); + background: var(--black); +} + +footer a { + color: var(--paper-2); +} + +@media (max-width: 920px) { + .doc-grid, + .grid, + .grid.two-up, + .flow, + .screenshot-row, + .media-grid, + .compare, + .doc-table, + .choice-table, + .choice-table div { + grid-template-columns: 1fr; + } + + .toc { + position: static; + } + + .compare div, + .doc-table div { + border-right: 0; + border-bottom: 2px solid var(--border); + } + + .compare div:last-child, + .doc-table div:last-child { + border-bottom: 0; + } + + .choice-table span, + .choice-table strong { + border-right: 0; + border-bottom: 2px solid var(--border); + } + + .choice-table span:last-child, + .choice-table strong:last-child { + border-bottom: 0; + } +} + +@media (max-width: 820px) { + .site-nav { + position: static; + align-items: flex-start; + flex-direction: column; + } + + .nav-links { gap: 6px; } + + .hero { + min-height: 78vh; + padding-top: 48px; + background: + linear-gradient(rgba(23, 18, 12, 0.055) 1px, transparent 1px), + linear-gradient(90deg, rgba(23, 18, 12, 0.055) 1px, transparent 1px), + linear-gradient(135deg, #fff9d1 0%, #fff4b8 68%, #ffd342 100%); + background-size: 26px 26px, 26px 26px, auto; + } +} diff --git a/docs/cli.html b/docs/cli.html new file mode 100644 index 0000000..6518945 --- /dev/null +++ b/docs/cli.html @@ -0,0 +1,138 @@ + + + + + + + Link - CLI Reference + + + + + +
+
+ Command reference +

Local commands for daily memory work.

+

After a global installer run, use link <command>. From a source checkout, use python3 link.py <command>.

+
+
+ +
+
+ +
+

CLI Tour

+
+ Animated Link CLI walkthrough +
Status, query, brief, and benchmark are the core terminal loop.
+
+ +

Daily Loop

+
link serve
+link ingest-status
+link remember "User prefers feature branches for Link work." --type preference --scope project --project link
+link brief "working on Link release" --project link
+link query "what should I know before changing the MCP tools?" --budget small --project link
+link validate
+ +

Memory Commands

+
+

link remember

Save a local agent memory. Strong duplicates and likely conflicts are refused unless explicitly allowed.

+

link recall

Search local memories with recall readiness and project-aware filtering.

+

link explain-memory

Show provenance, lifecycle, graph links, review issues, and recall readiness.

+

link update-memory

Merge new text into an existing memory and reset review state.

+

link archive-memory

Reversibly hide a stale or wrong memory from default recall.

+

link forget-memory

Permanently delete a memory after explicit confirmation.

+
+ +

Capture Workflow

+

Use captures for longer chat notes or session summaries that should be reviewed before becoming durable memory.

+
link capture-session session-notes.md --project link
+link capture-inbox --project link
+link accept-capture raw/memory-captures/<capture>.md --index 1
+link redact-capture raw/memory-captures/<capture>.md
+link delete-capture raw/memory-captures/<capture>.md --confirm
+ +

Maintenance

+
link backup
+link doctor --fix
+link status --validate
+link memory-audit
+link benchmark "agent memory"
+link rebuild-index
+link rebuild-backlinks
+link validate
+link verify-mcp
+

Use link backup before broad repair work. Use link benchmark when a wiki starts to feel slow.

+ +

All Commands

+
link --version
+link init [dir]
+link serve [dir] [--port 3000]
+link prompts [dir] [--project slug]
+link status [--validate]
+link backup [--label name] [--include-raw]
+link ingest-status
+link remember "text" [--project slug]
+link propose-memories <file-or-text> [--project slug]
+link capture-session <file-or-text> [--project slug]
+link capture-inbox [--project slug]
+link accept-capture <capture> [--index N]
+link redact-capture <capture>
+link delete-capture <capture> --confirm
+link query "task" [--budget small|medium|large] [--project slug]
+link graph-summary ["topic"] [--limit 40] [--depth 1]
+link benchmark ["query"] [--budget small|medium|large] [--project slug]
+link brief "task" [--project slug]
+link memory-audit [--project slug]
+link recall "query" [--project slug]
+link profile [--project slug]
+link memory-inbox [--project slug]
+link review-memory <name>
+link explain-memory <name>
+link update-memory <name> "text" [--project slug]
+link archive-memory <name>
+link restore-memory <name>
+link forget-memory <name> --confirm
+link doctor
+link doctor --fix
+link migrate
+link validate [--strict]
+link rebuild-index
+link rebuild-backlinks
+link verify-mcp [--json]
+python3 link.py demo
+python3 link.py query-link "task" [dir]
+

query-link is kept as an internal/backward-compatible alias. Prefer link query in user-facing docs.

+
+
+
+ + + + diff --git a/docs/concepts.html b/docs/concepts.html new file mode 100644 index 0000000..73eb243 --- /dev/null +++ b/docs/concepts.html @@ -0,0 +1,109 @@ + + + + + + + Link - Concepts + + + + + +
+
+ Mental model +

The wiki is the storage. Memory is the product.

+

Link keeps knowledge outside the chat window, makes claims inspectable, and gives every local agent the same source-backed context.

+
+
+ +
+
+ +
+

Storage Layers

+
raw sources -> agent ingest -> Markdown wiki -> backlinks/graph -> MCP recall
+                         \
+                          -> direct memories -> review/update/archive
+
+ Public repo versus local memory + Link deliberately keeps generated raw/, wiki/, and link-demo/ content out of git. The tracked root wiki is scaffolding; python3 link.py demo creates the product-story demo wiki. +
+
+
raw/Immutable notes, papers, articles, transcripts, images, and PDFs.
+
wiki/sources/One source page per ingested raw file.
+
wiki/memories/Preferences, decisions, project facts, and user context.
+
indexesBacklinks, page index, local cache, token index, and optional SQLite FTS.
+
+

Markdown remains the source of truth. Derived indexes can be rebuilt. Agents maintain the files, but the files stay inspectable in git, Obsidian, or any editor.

+ +

Three User Moves

+

Link deliberately separates knowledge from memory:

+
ingest raw/file.md into Link
+remember that I prefer short release notes
+query Link for the release process
+

Raw files do not silently personalize future agents. Ingest creates source-backed wiki knowledge. Explicit remember creates durable user or project memory.

+ +

Memory Lifecycle

+

A memory is a Markdown page with status, scope, source, review state, graph links, and local log entries. It can be proposed, remembered, reviewed, updated, archived, restored, explained, or forgotten.

+
+

Propose

Generate candidate memories from chat notes or raw captures without writing durable memory.

+

Approve

Save only the memories the user explicitly wants agents to carry forward.

+

Explain

Show why a memory exists, whether it is recall-ready, and what graph links support it.

+
+ +

Smart Query Packets

+

query_link is designed for agents. It returns a compact packet with relevant memory, ranked wiki pages, graph context, provenance, budget reports, estimated size, and follow-up actions.

+

Budget tiers keep context predictable:

+
    +
  • small: fast, compact context for most questions.
  • +
  • medium: more memory and page context for active tasks.
  • +
  • large: broader context when the agent needs to plan or compare.
  • +
+ +

Graph Context

+

Link builds forward and reverse wikilinks so agents can inspect what links to a page and what that page references. Large graphs open as bounded overviews first, with explicit controls for type filters, search, neighborhood depth, and full-graph loading.

+
+ Link graph view +
Graph context stays bounded by default so it remains useful for humans and agents.
+
+ +

Scale Model

+

Link uses local caching, token indexes, bounded page APIs, bounded graph summaries, and optional in-memory SQLite FTS5 for fast search. If SQLite is unavailable, Link falls back to the token index.

+
link benchmark "agent memory"
+link graph-summary "local memory" --limit 40 --depth 1
+link validate
+link rebuild-backlinks
+

Use link benchmark when a wiki starts to feel slow. It reports cache, search, query, graph payload, backend, and readiness recommendations.

+
+
+
+ + + + diff --git a/docs/contributing.html b/docs/contributing.html new file mode 100644 index 0000000..c150f0e --- /dev/null +++ b/docs/contributing.html @@ -0,0 +1,105 @@ + + + + + + + Link - Contributing + + + + + +
+
+ Contributing +

Keep Link local, inspectable, and reliable.

+

Contributions should make agent memory easier to trust: clearer onboarding, safer writes, faster retrieval, better validation, and tighter docs.

+
+
+ +
+
+ +
+

Branches

+

Contributions should come through pull requests. Please target main unless the maintainer asks for a different branch. The develop branch is a maintainer integration branch for staging larger release work before it is proposed to main.

+ +

Before Opening A PR

+
python3 -m pip install "ruff>=0.8,<1"
+python3 -m ruff check .
+python3 -m unittest discover -s tests
+python3 -m py_compile link.py serve.py scripts/check_release_hygiene.py scripts/check_runtime_duplication.py scripts/check_tool_contract.py scripts/prepare_release.py scripts/smoke_first_use.py scripts/smoke_http_viewer.py scripts/smoke_large_wiki.py scripts/smoke_mcp_stdio.py mcp_package/link_core/*.py mcp_package/link_mcp/server.py mcp_package/link_mcp/__main__.py mcp_package/link_mcp/__init__.py
+python3 scripts/smoke_first_use.py
+python3 scripts/smoke_http_viewer.py
+python3 scripts/smoke_large_wiki.py --pages 1000
+python3 scripts/check_release_hygiene.py
+python3 scripts/check_runtime_duplication.py
+python3 scripts/check_tool_contract.py
+bash -n integrations/*/install.sh integrations/*/uninstall.sh integrations/_shared/*.sh
+python3 link.py demo /tmp/link-mcp-smoke --force
+PYTHONPATH=mcp_package python3 scripts/smoke_mcp_stdio.py /tmp/link-mcp-smoke/wiki
+git diff --check
+ +

PR Description

+
    +
  • What changed.
  • +
  • How you tested it.
  • +
  • Whether it touches memory writes, installers, MCP behavior, HTTP endpoints, or automation.
  • +
  • Screenshots or GIFs for UI changes.
  • +
+

Do not include personal wiki data, raw sources, registry tokens, .env files, or local MCP credentials in a PR.

+ +

Project Structure

+
link/
+|-- LINK.md              # schema and instructions for agents
+|-- raw/                 # source documents, ignored by git
+|-- .link-cache/         # local derived cache, ignored by git
+|-- wiki/                # compiled knowledge, ignored by git except scaffolding
+|-- docs/                # GitHub Pages product and docs site
+|-- integrations/        # one-step setup per AI tool
+|-- mcp_package/         # PyPI package for link-mcp and shared link_core
+|-- scripts/             # release and hygiene tooling
+|-- serve.py             # local web viewer and HTTP API
+`-- link.py              # local utility CLI
+ +

Design Principles

+
    +
  • Every claim links to a source.
  • +
  • Confidence tags make uncertainty visible.
  • +
  • log.md records wiki operations.
  • +
  • Pages mature from seed to established.
  • +
  • Agents should use query_link first, then follow up with graph/context tools only when the compact packet is insufficient.
  • +
  • The local web viewer has no runtime dependencies beyond Python stdlib.
  • +
  • The wiki is plain Markdown, so it works with git, Obsidian, and normal editors.
  • +
+
+
+
+ + + + diff --git a/docs/getting-started.html b/docs/getting-started.html new file mode 100644 index 0000000..faacb83 --- /dev/null +++ b/docs/getting-started.html @@ -0,0 +1,137 @@ + + + + + + + Link - First 10 Minutes + + + + + +
+
+ First 10 minutes +

Turn one local note into agent memory.

+

Start with the demo, add one real source, save one explicit memory, then ask an MCP-enabled agent to query Link.

+
+
+ +
+
+ +
+

1. Run The Demo

+

The demo is the fastest proof of value. It already has raw sources, wiki pages, memories, backlinks, and graph data.

+
git clone https://github.com/gowtham0992/link.git
+cd link
+python3 link.py demo
+python3 link.py serve link-demo
+
+ Judge the generated demo + The repo's root wiki/ is only a scaffold for local development and personal testing. Generated content in wiki/, raw/, and link-demo/ is ignored by git so private memory is not published by accident. +
+

The demo includes one pending memory intentionally, so the review inbox and explain-memory workflow are visible. Run link review-memory prefer-local-personal-memory link-demo if you want memory audit to be fully clear.

+

Open http://127.0.0.1:3000, then inspect /brief, /memory, /audit, /captures, /propose, and /graph. Link accepts localhost too, but the numeric loopback address avoids slow IPv6 fallback in some Safari setups.

+
python3 link.py query "why does Link help agents?" link-demo --budget small
+python3 link.py brief "working on agent memory" link-demo
+python3 link.py benchmark "agent memory" link-demo
+python3 link.py status --validate link-demo
+ +

2. Install Link For Your Agent

+

From the cloned checkout, run the installer for the agent you use. Re-running the same installer updates code and instructions without replacing existing wiki data.

+
bash integrations/codex/install.sh
+bash integrations/kiro/install.sh
+bash integrations/claude-code/install.sh
+bash integrations/cursor/install.sh
+bash integrations/copilot/install.sh
+bash integrations/vscode/install.sh
+bash integrations/antigravity/install.sh
+

Use --project for a repo-local Link install. Project-scoped memory then stays separate from other project memory while still allowing broad user memory to be recalled.

+ +

3. Add One Source

+

Open the local viewer and use ingest -> Add Raw Source, or write a first note directly:

+
mkdir -p ~/link/raw
+cat > ~/link/raw/first-memory.md <<'EOF'
+---
+title: "First Link memory"
+source_type: note
+date_captured: 2026-05-04
+---
+
+# First Link memory
+
+I am testing Link as local personal memory for agents.
+Raw notes stay local. The agent turns them into source-cited wiki pages.
+EOF
+

Check pending work:

+
link ingest-status
+
+ Safety gate + Link blocks normal ingest guidance when raw files contain secret-looking values or cannot be read safely. Redact or fix those local files first. +
+
+ Existing Link data + If you already have files in ~/link/raw/, link ingest-status may point to a different pending file first. If first-memory.md was already ingested and you overwrite it, Link marks that raw file as stale and asks the agent to refresh the existing source page. +
+ +

4. Save One Direct Memory

+

Use natural language with an agent:

+
remember that I am testing Link as local personal memory for agents
+brief me from Link before we continue
+what does Link remember about local personal memory?
+

Or use the CLI:

+
link remember "I am testing Link as local personal memory for agents." --type preference --scope user --tags onboarding
+link brief "local personal memory"
+link recall "local personal memory"
+link profile
+link memory-audit
+ +

5. Ask The Agent To Ingest

+

In your agent chat, ask:

+
ingest raw/first-memory.md into Link
+

The agent reads ~/link/LINK.md, creates a source page, updates concepts/entities when useful, updates the index and log, rebuilds backlinks, and validates generated pages.

+

Return to /ingest after the agent finishes. Link shows which raw files are represented and gives follow-up prompts for proposals or retrieval checks.

+ +

6. Verify The Loop

+
link doctor --fix
+link status --validate
+link ingest-status
+link validate
+link memory-audit
+link verify-mcp
+

link verify-mcp should report Result: ready. Then ask your MCP-enabled agent:

+
query Link for first Link memory
+

If the answer comes from Link, local agent memory is working.

+
+
+
+ + + + diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..3359db7 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,175 @@ + + + + + + + Link - Local Agent Memory + + + + + +
+
+ Local agent memory +

Link gives every agent the same memory.

+

Source-backed Markdown memory for Codex, Claude, Cursor, Kiro, VS Code, Copilot, and any MCP client. Local files. Inspectable sources. Budgeted context.

+
    +
  • raw/
  • +
  • wiki/
  • +
  • memories/
  • +
  • graph
  • +
  • MCP
  • +
+ +
+
+ +
+
+
+

The product

+

Not another notes app. A local memory layer agents can actually use.

+

Link turns raw notes, transcripts, project context, and explicit memories into a source-backed wiki. Agents query a compact packet instead of reading your entire folder.

+
+

Personal memory

Preferences, decisions, facts, and project context stay durable across agent sessions.

+

Source-backed wiki

Raw sources compile into Markdown pages with citations, backlinks, and reviewable provenance.

+

MCP-native recall

One local memory works across MCP-capable tools through the same query, brief, graph, and memory lifecycle tools.

+

Budgeted context

Smart query packets return the right memory, pages, graph neighborhood, and follow-up actions without flooding tokens.

+

Private by default

No hosted backend, no telemetry, no cloud lock-in. Your memory stays on disk as plain Markdown.

+

Auditable lifecycle

Capture, propose, approve, review, archive, restore, forget, and explain what Link remembers.

+
+
+
+ +
+
+
+

First 2 minutes

+

Run a finished memory wiki locally.

+

The demo includes raw sources, wiki pages, one starter memory, backlinks, graph context, and a compact query packet.

+ +
+
git clone https://github.com/gowtham0992/link.git
+cd link
+python3 link.py demo
+python3 link.py serve link-demo
+
+# then try
+python3 link.py query "why does Link help agents?" link-demo --budget small
+python3 link.py brief "working on agent memory" link-demo
+python3 link.py benchmark "agent memory" link-demo
+
+
+ +
+
+

Three surfaces

+

Review it, script it, or let an agent call it.

+

The web UI, CLI, and MCP server all operate on the same local Markdown wiki. Pick the surface for the job instead of learning three separate systems.

+
+
+ Animated Link web UI walkthrough +

Web UI

+

Inspect memory, ingest sources, review captures, browse pages, explain recall, and explore the graph.

+
+
+ Animated Link CLI walkthrough +

CLI

+

Run status checks, query packets, briefs, validation, backup, benchmark, and repair from a terminal.

+
+
+ Animated Link MCP agent walkthrough +

MCP

+

Give Codex, Claude, Cursor, Kiro, VS Code, Copilot, and other agents the same local memory.

+
+
+
+
+ +
+
+

Visible trust

+

Memory is inspectable, reviewable, and explainable.

+

A memory is not a hidden vector. It is a Markdown page with status, scope, source, review state, graph links, and an audit trail.

+
+
+ Link memory dashboard +
Memory dashboard: profile, inbox, captures, and review states.
+
+
+ Link explain memory view +
Explain memory: why Link knows something and whether it is ready to recall.
+
+
+
+
+ +
+
+
+ Link graph view +
Graph view: bounded by default, expandable when you need the whole neighborhood.
+
+
+

Agent contract

+

Agents get a small set of reliable moves.

+

Check readiness, brief before work, ingest raw files, remember explicit facts, query smart context, validate after writes, and explain why a memory exists.

+
+
BeforeRepeated context in every chat.
+
AfterShared local memory across agents.
+
StoragePlain Markdown and JSON indexes.
+
ScaleSQLite FTS, bounded graph payloads, local cache.
+
+
+
+
+ +
+
+

Read next

+

Start small, then make it your agent memory.

+

The docs are arranged by user path: try the demo, understand the model, wire MCP, then use the CLI and maintenance tools when you need them.

+
+

First 10 minutes

Run the demo, add one source, save one direct memory, and verify the loop.

+

Why Link?

Understand where Link fits versus notes apps, hosted memory APIs, agent runtimes, and graph memory systems.

+

Web UI

Use the local viewer for ingest, memory review, graph exploration, and human-readable audits.

+

Concepts

Understand raw sources, wiki pages, memories, graph indexes, and budgeted query packets.

+

MCP setup

Install the MCP server and teach local agents how to use Link reliably.

+

CLI reference

Every local command, grouped by daily workflow and maintenance jobs.

+

HTTP API

Local endpoints for status, query, memory, graph, validation, and web UI actions.

+

Security model

Local-first constraints, secret scanning, backup behavior, and HTTP safety boundaries.

+

Contributing

PR expectations, test gates, branch policy, and what not to include in public changes.

+

Troubleshooting

Fix MCP setup, blocked ingest, stale graph indexes, slow wikis, and Python packaging issues.

+
+
+
+
+ +
+
MIT licensed. Built for local-first agent memory. GitHub · If Link helps your agents remember better, star it on GitHub.
+
+ + diff --git a/docs/mcp.html b/docs/mcp.html new file mode 100644 index 0000000..c2f8702 --- /dev/null +++ b/docs/mcp.html @@ -0,0 +1,157 @@ + + + + + + + Link - MCP Setup + + + + + +
+
+ Agent integration +

One local memory, many agents.

+

Link is listed as io.github.gowtham0992/link on the MCP Registry and also ships as the link-mcp PyPI package.

+
+
+ +
+
+ +
+

MCP Tour

+
+ Animated Link MCP agent walkthrough +
Agents use natural prompts, then call Link tools for readiness, briefs, query packets, and reviewed memory writes.
+
+ +

Agent Installers

+

Use the installer for the agent you use most. Installers create or update ~/link, install link-mcp, write short agent instructions, and preserve existing wiki data.

+
bash integrations/codex/install.sh
+bash integrations/kiro/install.sh
+bash integrations/claude-code/install.sh
+bash integrations/cursor/install.sh
+bash integrations/copilot/install.sh
+bash integrations/vscode/install.sh
+bash integrations/antigravity/install.sh
+

Use --project from a repo when memory should be project-scoped.

+ +

MCP Only

+
python3 -m pip install --upgrade link-mcp
+
{
+  "mcpServers": {
+    "link": {
+      "command": "python3",
+      "args": ["-m", "link_mcp", "--wiki", "~/link/wiki"]
+    }
+  }
+}
+

On macOS/Homebrew Python, if pip reports externally-managed-environment, use a dedicated venv:

+
python3 -m venv ~/.link-mcp-venv
+~/.link-mcp-venv/bin/python -m pip install --upgrade pip link-mcp
+
{
+  "mcpServers": {
+    "link": {
+      "command": "/Users/YOU/.link-mcp-venv/bin/python",
+      "args": ["-m", "link_mcp", "--wiki", "/Users/YOU/link/wiki"]
+    }
+  }
+}
+ +

Predictable Agent Workflow

+

Agents should use Link in this order:

+
    +
  1. Call link_status when connecting or troubleshooting.
  2. +
  3. Call starter_prompts when the user asks what to try next.
  4. +
  5. Call ingest_status before ingesting raw files.
  6. +
  7. Call query_link first for most answers.
  8. +
  9. Call memory_brief before longer work.
  10. +
  11. Call get_graph_summary when graph context is needed.
  12. +
  13. Call backup_wiki before broad repair work.
  14. +
  15. Call validate_wiki after ingest or substantial page edits.
  16. +
+ +

MCP Tools

+

Full tool set:

+
link_status
+starter_prompts
+migrate_wiki
+ingest_status
+query_link
+validate_wiki
+backup_wiki
+memory_brief
+memory_audit
+memory_profile
+memory_inbox
+review_memory
+explain_memory
+search_wiki
+recall_memory
+remember_memory
+propose_memories
+capture_session
+capture_inbox
+accept_capture
+redact_capture
+delete_capture
+update_memory
+archive_memory
+restore_memory
+forget_memory
+get_context
+get_pages
+get_backlinks
+get_graph_summary
+get_graph
+rebuild_index
+rebuild_backlinks
+

Memory write tools return duplicate_candidates or conflict_candidates when review, update, or archive is safer than creating another memory page.

+

Project-aware tools accept an optional project argument. When set, Link returns broad user/global memory plus memories for that project, while excluding memories from other explicit projects.

+ +

Verify Setup

+
link verify-mcp
+link status --validate
+link prompts
+

link verify-mcp --json is useful when an agent or script should read structured issues and next actions.

+
+ Natural prompts to try +

is Link ready?

+

brief me from Link before we continue

+

query Link for my current project context

+

remember that I prefer short release notes

+
+
+
+
+ + + + diff --git a/docs/security.html b/docs/security.html new file mode 100644 index 0000000..ae0e150 --- /dev/null +++ b/docs/security.html @@ -0,0 +1,92 @@ + + + + + + + Link - Security + + + + + +
+
+ Local-first +

Your agent memory should belong to you.

+

Link has no hosted backend, no telemetry, no external API calls, and no cloud account requirement. The safety model is local files, explicit writes, and inspectable provenance.

+
+
+ +
+
+ +
+

Privacy Model

+
    +
  • No telemetry.
  • +
  • No hosted backend.
  • +
  • No external API calls from serve.py or link-mcp.
  • +
  • Raw sources and generated wiki pages are ignored by git by default.
  • +
  • SQLite search, when available, is an in-memory derived index. Markdown remains the source of truth.
  • +
+ +

Secret Handling

+

Link scans raw sources, captures, release files, and public artifacts for secret-looking values. It detects common API keys and token formats, warns without logging secret values, and refuses normal ingest guidance when raw safety cannot be established.

+
link ingest-status
+link capture-inbox
+link redact-capture raw/memory-captures/<capture>.md
+python3 scripts/check_release_hygiene.py
+
+ Rule + Redact or remove secret-looking values locally before asking an agent to ingest a file. Link is not a secret manager. +
+ +

HTTP Boundary

+

The local viewer binds to 127.0.0.1, rejects unexpected host/bind flags and unexpected Host headers, and has no authentication. Do not expose it to the internet without adding an auth layer.

+

HTTP write actions require X-Link-Local-Action: true. Responses include X-Link-API-Version. Proposal analysis does not write pages.

+ +

Backups

+

link backup and MCP backup_wiki write local .link-backups/ archives. Raw sources are excluded unless explicitly requested.

+
link backup
+link backup --include-raw
+link doctor --fix
+

Run a backup before broad repair work or large generated changes.

+ +

Before Sharing A Repo Or Wiki

+
python3 link.py doctor
+python3 link.py validate
+python3 scripts/check_release_hygiene.py
+git diff --check
+

Use git push, git archive, or clean build artifacts for public sharing. Do not zip a whole working directory; ignored local files, .git/, caches, raw sources, and build outputs can be included by accident.

+

See SECURITY.md for vulnerability reporting.

+
+
+
+ + + + diff --git a/docs/troubleshooting.html b/docs/troubleshooting.html new file mode 100644 index 0000000..95fce32 --- /dev/null +++ b/docs/troubleshooting.html @@ -0,0 +1,94 @@ + + + + + + + Link - Troubleshooting + + + + + +
+
+ Troubleshooting +

Start with status, then repair deliberately.

+

Most Link issues are local path, stale graph, MCP Python, raw safety, or validation problems. The commands below are safe first checks.

+
+
+ +
+
+ +
+

Is Link Ready?

+
link status --validate
+link doctor
+link prompts
+

If link is not on your PATH, run from the source checkout with python3 link.py, or add ~/.local/bin to your shell path.

+ +

MCP Is Not Visible

+
link verify-mcp
+python3 -m pip index versions link-mcp
+

Restart the MCP client after changing its config. If your installer printed a venv Python path, use that exact path in the MCP config.

+ +

Ingest Is Blocked

+
link ingest-status
+

Blocked ingest usually means a raw source has secret-looking values, cannot be read safely, or source representation counts may be incomplete. Redact or fix the local file, then ask the agent to ingest again.

+ +

Graph Is Stale

+
link rebuild-index
+link rebuild-backlinks
+link validate
+

Run this after manual Obsidian edits, hand-written wikilinks, or a failed ingest.

+ +

Demo Looks Stale

+

link-demo/ is generated local output and is ignored by git. If it was created with an older Link version, regenerate it:

+
python3 link.py demo link-demo --force
+python3 link.py status link-demo --validate
+python3 link.py query "why does Link help agents?" link-demo --budget small
+

The current generated demo should include three raw sources, source-backed wiki pages, one starter memory, one exploration, current backlinks, and schema v1.

+ +

The Wiki Feels Slow

+
link benchmark "agent memory"
+link graph-summary "agent memory" --limit 40 --depth 1
+

Large graph views intentionally open bounded first. Use type filters, node search, depth controls, and explicit full-graph load only when you need everything.

+ +

pip Is Blocked By Homebrew Python

+

If pip reports externally-managed-environment, avoid forcing packages into Homebrew Python. Use a dedicated venv:

+
python3 -m venv ~/.link-mcp-venv
+~/.link-mcp-venv/bin/python -m pip install --upgrade pip link-mcp
+

The current installers do this automatically when needed.

+
+
+
+ + + + diff --git a/docs/ui.html b/docs/ui.html new file mode 100644 index 0000000..a1a9d1d --- /dev/null +++ b/docs/ui.html @@ -0,0 +1,80 @@ + + + + + + + Link - Web UI + + + + + +
+
+ Local viewer +

The web UI is where humans trust the memory.

+

Use it to see what Link knows, inspect source-backed pages, review proposed memories, browse captures, validate ingest, and explore graph neighborhoods.

+
+
+ +
+
+ +
+

UI Tour

+
+ Animated Link web UI walkthrough +
Prompts, ingest, brief, memory review, and graph exploration in the local viewer.
+
+ +

Daily Loop

+
+

Start from prompts

The home page gives first-run prompts an agent can act on without you memorizing file paths or command syntax.

+

Check ingest

The ingest view shows pending raw files, represented sources, stale ingests, safety warnings, and post-ingest checks.

+

Review memory

Use the memory dashboard, inbox, profile, audit, and explain views to keep recall useful instead of mysterious.

+

Validate after writes

Run validation from the CLI or MCP after ingest and broad repairs so dead links, missing sections, and stale graph indexes do not linger.

+
+ +

Graph At Scale

+

The graph opens as a bounded overview on large wikis. Search, filters, and focused neighborhoods can pull from the full graph data without forcing the browser to render every node at once.

+
+ Link graph view +
Use search, type filters, neighborhood focus, fullscreen, labels, and motion controls for large graphs.
+
+ +

Start It

+
link serve
+
+# from a source checkout
+python3 link.py serve link-demo
+

The server binds to 127.0.0.1 and is intended for local use. Do not expose it to the internet without adding your own authentication layer.

+
+
+
+ + + + diff --git a/docs/why-link.html b/docs/why-link.html new file mode 100644 index 0000000..8ee2bc6 --- /dev/null +++ b/docs/why-link.html @@ -0,0 +1,85 @@ + + + + + + + Link - Why Link? + + + + + +
+
+ Positioning +

Link is not a notes app. It is local memory for agents.

+

The wiki is the inspectable storage layer. The product is shared, source-backed memory that local agents can query without starting every session from zero.

+
+
+ +
+
+ +
+

Best Fit

+

Use Link when you want one local memory layer that multiple agents can share. It is strongest for developer and power-user workflows where privacy, provenance, and inspectable files matter.

+
+

Personal agent memory

Preferences, decisions, project conventions, and active context that should survive between Codex, Kiro, Claude, Cursor, and other MCP clients.

+

Source-backed knowledge

Raw notes, transcripts, release notes, articles, and project files that should become cited Markdown pages instead of hidden context.

+

Inspectable retrieval

Smart query packets with reasons, sources, graph links, budgets, and follow-up actions instead of full-folder dumps.

+

Local ownership

Plain Markdown and JSON indexes on your machine. No hosted backend, telemetry, or required cloud account.

+
+ +

How To Choose

+
+
If you need...Use Link when...Use another category when...
+
Human-first notesYou want agents to query and maintain the notes as memory.You mainly want a polished human writing app, mobile sync, plugins, and manual note-taking.
+
Hosted app memoryYou want local files, no cloud account, and MCP access from many desktop agents.You are building a hosted product and need managed APIs, user accounts, dashboards, and cloud-scale operations.
+
Stateful agent runtimeYou already use agents and need shared memory outside any one runtime.You want an entire agent platform with its own execution loop, hosted state, and deployment model.
+
Temporal business graphYou need personal/project memory with explicit review and source-backed Markdown.You need automatic entity extraction, temporal fact invalidation, multi-user business data, and enterprise integrations.
+
Plain file searchYou want search plus memory lifecycle, graph context, MCP tools, validation, and provenance.You only need grep, ripgrep, or a folder of notes with no agent-memory workflow.
+
+ +

Trust Model

+

Link deliberately avoids hidden memory. A durable memory is a Markdown page with scope, status, review state, source/provenance, graph links, and a local audit trail. Agents can recall it, but humans can inspect, edit, archive, restore, or forget it.

+
raw/file.md -> wiki/sources/file.md -> linked concepts
+remember "preference" -> wiki/memories/preference.md
+query_link -> compact context + why_selected + follow_up actions
+ +

Boundaries

+
    +
  • Link is local-first personal software, not a hosted SaaS backend.
  • +
  • The local web viewer has no authentication and must stay on loopback unless you add your own auth layer.
  • +
  • Ingest quality depends on the agent that reads and writes pages; Link provides schema, validation, safety gates, and review workflows.
  • +
  • Generated memory is not automatically trusted. Use proposal, review, explain, archive, and forget workflows.
  • +
+
+
+
+ + + + diff --git a/integrations/README.md b/integrations/README.md index 2134b98..b86beea 100644 --- a/integrations/README.md +++ b/integrations/README.md @@ -1,15 +1,22 @@ -# Integrations +# Link Integrations -One-step setup for your AI tool. Default is global — one central wiki at `~/link/` that works across all projects. +One-step setup for local agents. The default mode creates one central Link wiki +at `~/link/` and teaches your agent how to use it as local personal memory. ## Quick start ```bash git clone https://github.com/gowtham0992/link.git ~/link-repo -bash ~/link-repo/integrations/kiro/install.sh +bash ~/link-repo/integrations/codex/install.sh ``` -That's it. Kiro now knows about Link in every project, and your wiki lives at `~/link/`. +Pick the installer that matches your agent. After install, try: + +```text +is Link ready? +brief me from Link before we continue +query Link for what you know about this project +``` ## All integrations @@ -31,11 +38,17 @@ That's it. Kiro now knows about Link in every project, and your wiki lives at `~ ## What the install does -1. Writes a small instruction file for your tool (so it knows Link exists) -2. Scaffolds wiki structure at `~/link/` (or current dir with `--project`) -3. Installs or upgrades `link-mcp` using normal pip first, then `~/.link-mcp-venv` if system Python is externally managed - -The instruction file is minimal — it just tells the agent that Link exists and to read `LINK.md` when you say "ingest", "query", "lint", or "research". It doesn't interfere with normal coding work. +1. Upserts a small Link instruction block without overwriting your existing instructions. +2. Scaffolds wiki structure at `~/link/` or the current directory with `--project`. +3. Installs or upgrades `link-mcp`, using `~/.link-mcp-venv` when system Python is externally managed. +4. Writes `.link-mcp-python` so clients can use the Python that actually has `link-mcp`. +5. Adds `~/.local/bin/link` for global installs, so checks are short: `link status --validate`. +6. Prints next prompts and verification commands for your install mode. + +The instruction file is intentionally small. It tells the agent to check +`link_status`, use `query_link` for compact context, use `memory_brief` before +personalized/project work, validate after ingest, and read `LINK.md` only when it +needs the full local protocol. ## Uninstall diff --git a/integrations/_shared/instructions.sh b/integrations/_shared/instructions.sh index 2c4d10e..c6001cf 100644 --- a/integrations/_shared/instructions.sh +++ b/integrations/_shared/instructions.sh @@ -14,13 +14,14 @@ from pathlib import Path target = Path(os.environ["LINK_TARGET"]).expanduser() source = Path(os.environ["LINK_SOURCE"]).read_text(encoding="utf-8").rstrip() -header = "## Link — Personal Knowledge Wiki" +headers = ["## Link — Local Agent Memory", "## Link — Personal Knowledge Wiki"] existing = "" if target.exists(): existing = target.read_text(encoding="utf-8", errors="replace") -pattern = re.compile(rf"(^|\n){re.escape(header)}\n.*?(?=\n## |\Z)", re.DOTALL) +header_pattern = "|".join(re.escape(header) for header in headers) +pattern = re.compile(rf"(^|\n)(?:{header_pattern})\n.*?(?=\n## |\Z)", re.DOTALL) match = pattern.search(existing) if match: prefix = "\n" if match.group(1) else "" @@ -33,3 +34,31 @@ target.write_text(updated, encoding="utf-8") PYEOF echo "$label → $target" } + +link_print_next_steps() { + local mode="${1:---global}" + + echo "" + echo "Done." + if [ "$mode" = "--project" ]; then + echo " Drop sources into raw/." + echo " View wiki: python3 link.py serve" + echo " Print starter prompts: python3 link.py prompts" + echo " Try in your agent:" + echo " is Link ready?" + echo " brief me from Link before we continue" + echo " remember that this project uses Link for local agent memory" + echo " query Link for what this project remembers" + echo " ingest raw/ into Link" + else + echo " Drop sources into ~/link/raw/." + echo " View wiki: link serve" + echo " Print starter prompts: link prompts" + echo " Try in your agent:" + echo " is Link ready?" + echo " brief me from Link before we continue" + echo " remember that I prefer local-first agent memory" + echo " query Link for what you know about me" + echo " ingest raw/ into Link" + fi +} diff --git a/integrations/_shared/link-instructions-project.md b/integrations/_shared/link-instructions-project.md index 194cb96..8b82b43 100644 --- a/integrations/_shared/link-instructions-project.md +++ b/integrations/_shared/link-instructions-project.md @@ -1,7 +1,31 @@ -## Link — Personal Knowledge Wiki +## Link — Local Agent Memory -This project has a Link wiki. Raw sources in `raw/`, compiled wiki in `wiki/`. +This project has a Link wiki. Raw sources live in `raw/`, compiled wiki pages in `wiki/`, and direct memories in `wiki/memories/`. -When the user says **"ingest"**, **"query"**, **"lint"**, or **"research"**, read `LINK.md` for instructions and follow the protocol. +If you are unsure whether this project Link wiki is ready, use MCP `link_status` when available, or run `python3 link.py status --validate`. + +If the user asks what to try after installing Link, use MCP `starter_prompts` when available, or run `python3 link.py prompts`. + +If status reports a missing or old schema marker, use MCP `migrate_wiki` when available, or run `python3 link.py migrate`, before other writes. + +When the user asks to ingest or drops files into `raw/`, use MCP `ingest_status` when available, or run `python3 link.py ingest-status`, then follow its guided plan before deciding what to process. If it reports `blocked_secrets` or secret warnings, do not read or ingest flagged raw files until the user redacts them. + +When answering a substantive project question, start with MCP `query_link` when available, or run `python3 link.py query ""`. + +When you only need graph orientation, especially for a large wiki, prefer MCP `get_graph_summary` or `python3 link.py graph-summary ""` before requesting the full graph. + +When starting project-specific work, prime yourself with Link first: use MCP `memory_brief` when available, or run `python3 link.py brief ""`. Project installs infer the current repo as the memory project key, so project-scoped memories stay separate from other repos while broad user memories still apply. + +Before broad repairs or risky local wiki edits, create a local backup with MCP `backup_wiki` when available, or run `python3 link.py backup`. Do not include `raw/` unless the user explicitly asks. + +For long session notes, use `python3 link.py capture-session ""` to store a local raw capture and produce memory proposals without writing durable memories. +Use MCP `capture_inbox` when available, or `python3 link.py capture-inbox`, to review saved captures, warnings, and next-step commands. +When the human approves a proposal from a capture, use `python3 link.py accept-capture "" --index `. +If a capture reports secret warnings, ask before running `python3 link.py redact-capture ""`. +Only delete a raw capture after explicit confirmation: `python3 link.py delete-capture "" --confirm`. + +After ingesting raw sources or making substantial wiki edits, use MCP `rebuild_index`, `rebuild_backlinks`, and `validate_wiki` when available, or run `python3 link.py rebuild-index`, `python3 link.py rebuild-backlinks`, and `python3 link.py validate`, before saying the wiki is updated. + +When the user says **"remember"**, **"recall"**, **"ingest"**, **"query"**, **"lint"**, or **"research"**, read `LINK.md` for instructions and follow the protocol. Otherwise, don't interfere — just be a normal assistant. diff --git a/integrations/_shared/link-instructions.md b/integrations/_shared/link-instructions.md index 32d3ef3..031460c 100644 --- a/integrations/_shared/link-instructions.md +++ b/integrations/_shared/link-instructions.md @@ -1,7 +1,25 @@ -## Link — Personal Knowledge Wiki +## Link — Local Agent Memory -A personal knowledge wiki lives at `~/link/`. It has raw sources in `~/link/raw/` and compiled wiki pages in `~/link/wiki/`. +Local agent memory lives at `~/link/`. It has raw sources in `~/link/raw/`, compiled wiki pages in `~/link/wiki/`, and direct memories in `~/link/wiki/memories/`. -When the user says **"ingest"**, **"query"**, **"lint"**, or **"research"**, read `~/link/LINK.md` for instructions and follow the protocol. Use terminal commands to access `~/link/` since it's outside the workspace. +If you are unsure whether Link is ready, use MCP `link_status` when available, or run `link status --validate`. + +If the user asks what to try after installing Link, use MCP `starter_prompts` when available, or run `link prompts`. + +If status reports a missing or old schema marker, use MCP `migrate_wiki` when available, or run `link migrate`, before other writes. + +When the user asks to ingest or drops files into `raw/`, use MCP `ingest_status` when available, or run `link ingest-status`, then follow its guided plan before deciding what to process. If it reports `blocked_secrets` or secret warnings, do not read or ingest flagged raw files until the user redacts them. + +When answering a substantive question that may need local memory or wiki context, start with MCP `query_link` when available, or run `link query ""`. + +When you only need graph orientation, especially for a large wiki, prefer MCP `get_graph_summary` or `link graph-summary ""` before requesting the full graph. + +When starting personalized or project-specific work, prime yourself with Link first: use MCP `memory_brief` when available, or run `link brief ""`. + +Before broad repairs or risky local wiki edits, create a local backup with MCP `backup_wiki` when available, or run `link backup`. Do not include `raw/` unless the user explicitly asks. + +After ingesting raw sources or making substantial wiki edits, use MCP `rebuild_index`, `rebuild_backlinks`, and `validate_wiki` when available, or run `link rebuild-index`, `link rebuild-backlinks`, and `link validate`, before saying the wiki is updated. + +When the user says **"remember"**, **"recall"**, **"ingest"**, **"query"**, **"lint"**, or **"research"**, read `~/link/LINK.md` for instructions and follow the protocol. Use terminal commands to access `~/link/` since it's outside the workspace. Otherwise, don't interfere — just be a normal assistant. diff --git a/integrations/_shared/scaffold.sh b/integrations/_shared/scaffold.sh index f7f0f2e..8d3a49a 100755 --- a/integrations/_shared/scaffold.sh +++ b/integrations/_shared/scaffold.sh @@ -15,12 +15,51 @@ LINK_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" MODE="${1:---global}" if [ "$MODE" = "--project" ]; then - TARGET_DIR="." + TARGET_DIR="$(pwd)" else TARGET_DIR="$HOME/link" mkdir -p "$TARGET_DIR" fi +shell_quote() { + printf "'%s'" "$(printf "%s" "$1" | sed "s/'/'\\\\''/g")" +} + +install_link_cli_wrapper() { + if [ "$MODE" = "--project" ] || [ ! -f "$TARGET_DIR/link.py" ]; then + return + fi + + LINK_CLI_DIR="${LINK_CLI_DIR:-$HOME/.local/bin}" + LINK_CLI_BIN="$LINK_CLI_DIR/link" + LINK_CLI_MARKER="# Link command wrapper" + + mkdir -p "$LINK_CLI_DIR" + + if [ -e "$LINK_CLI_BIN" ] && ! grep -q "$LINK_CLI_MARKER" "$LINK_CLI_BIN" 2>/dev/null; then + echo " · $LINK_CLI_BIN already exists and is not a Link wrapper; not overwriting." + echo " Fallback: cd \"$TARGET_DIR\" && python3 link.py status --validate" + return + fi + + TARGET_DIR_Q="$(shell_quote "$TARGET_DIR")" + LINK_PY_Q="$(shell_quote "$TARGET_DIR/link.py")" + cat > "$LINK_CLI_BIN" < "$TARGET_DIR/wiki/_backlinks.json" + printf '{\n "backlinks": {},\n "forward": {}\n}\n' > "$TARGET_DIR/wiki/_backlinks.json" echo " Created wiki/_backlinks.json" fi @@ -83,13 +128,15 @@ if [ "$IS_UPDATE" = false ]; then echo " Wiki structure created at $TARGET_DIR" else # On update: ensure directory structure exists (in case new dirs were added) - for dir in raw wiki/sources wiki/concepts wiki/entities wiki/comparisons wiki/explorations; do + for dir in raw wiki/sources wiki/concepts wiki/entities wiki/memories wiki/comparisons wiki/explorations; do mkdir -p "$TARGET_DIR/$dir" done fi echo " Wiki ready at $TARGET_DIR" +install_link_cli_wrapper + # ── MCP server: install link-mcp package ───────────────────────────── echo "" echo " Setting up MCP server..." @@ -105,21 +152,46 @@ fi LINK_MCP_PYTHON="python3" LINK_MCP_VENV="${LINK_MCP_VENV:-$HOME/.link-mcp-venv}" LINK_MCP_VENV_PYTHON="$LINK_MCP_VENV/bin/python" +LINK_MCP_MARKER="$TARGET_DIR/.link-mcp-python" +LINK_MCP_INSTALLED=false +LINK_MCP_REUSED=false if python3 -m pip install --upgrade "$LINK_MCP_PACKAGE" -q 2>/dev/null; then LINK_MCP_PYTHON="python3" + LINK_MCP_INSTALLED=true elif python3 -m venv "$LINK_MCP_VENV" 2>/dev/null \ && "$LINK_MCP_VENV_PYTHON" -m pip install --upgrade pip -q 2>/dev/null \ && "$LINK_MCP_VENV_PYTHON" -m pip install --upgrade "$LINK_MCP_PACKAGE" -q 2>/dev/null; then LINK_MCP_PYTHON="$LINK_MCP_VENV_PYTHON" + LINK_MCP_INSTALLED=true fi -if "$LINK_MCP_PYTHON" -c "import link_mcp" 2>/dev/null; then - printf '%s\n' "$LINK_MCP_PYTHON" > "$TARGET_DIR/.link-mcp-python" - echo " ✓ link-mcp installed" +if [ "$LINK_MCP_INSTALLED" = false ] && [ -f "$LINK_MCP_MARKER" ]; then + LINK_MCP_MARKER_PYTHON="$(cat "$LINK_MCP_MARKER")" + if [ -n "$LINK_MCP_MARKER_PYTHON" ] && "$LINK_MCP_MARKER_PYTHON" -c "import link_mcp" 2>/dev/null; then + LINK_MCP_PYTHON="$LINK_MCP_MARKER_PYTHON" + LINK_MCP_INSTALLED=true + LINK_MCP_REUSED=true + fi +elif [ "$LINK_MCP_INSTALLED" = false ] && [ -x "$LINK_MCP_VENV_PYTHON" ] && "$LINK_MCP_VENV_PYTHON" -c "import link_mcp" 2>/dev/null; then + LINK_MCP_PYTHON="$LINK_MCP_VENV_PYTHON" + LINK_MCP_INSTALLED=true + LINK_MCP_REUSED=true +fi + +if [ "$LINK_MCP_INSTALLED" = true ] && "$LINK_MCP_PYTHON" -c "import link_mcp" 2>/dev/null; then + printf '%s\n' "$LINK_MCP_PYTHON" > "$LINK_MCP_MARKER" + if [ "$LINK_MCP_REUSED" = true ]; then + echo " ✓ existing link-mcp available" + else + echo " ✓ link-mcp installed" + fi if [ "$LINK_MCP_PYTHON" != "python3" ]; then echo " ✓ MCP Python: $LINK_MCP_PYTHON" fi + if [ "$LINK_MCP_REUSED" = true ]; then + echo " · Automatic upgrade did not complete; run verify-mcp to confirm the installed version." + fi echo "" echo " Add to your MCP client config:" echo ' {' @@ -141,10 +213,35 @@ fi if [ -f "$TARGET_DIR/link.py" ]; then echo "" - echo " Check wiki health:" - echo " python3 \"$TARGET_DIR/link.py\" doctor \"$TARGET_DIR\"" - echo " Verify MCP setup:" - echo " python3 \"$TARGET_DIR/link.py\" verify-mcp \"$TARGET_DIR\"" - echo " Repair stale graph index:" - echo " python3 \"$TARGET_DIR/link.py\" rebuild-backlinks \"$TARGET_DIR\"" + if [ "$MODE" = "--project" ]; then + echo " Check Link readiness:" + echo " python3 link.py status --validate" + echo " Print starter prompts:" + echo " python3 link.py prompts" + echo " Check wiki health:" + echo " python3 link.py doctor" + echo " Create a local backup:" + echo " python3 link.py backup" + echo " Validate ingest output:" + echo " python3 link.py validate" + echo " Verify MCP setup:" + echo " python3 link.py verify-mcp" + echo " Repair stale graph index:" + echo " python3 link.py rebuild-backlinks" + else + echo " Check Link readiness:" + echo " link status --validate" + echo " Print starter prompts:" + echo " link prompts" + echo " Check wiki health:" + echo " link doctor" + echo " Create a local backup:" + echo " link backup" + echo " Validate ingest output:" + echo " link validate" + echo " Verify MCP setup:" + echo " link verify-mcp" + echo " Repair stale graph index:" + echo " link rebuild-backlinks" + fi fi diff --git a/integrations/antigravity/install.sh b/integrations/antigravity/install.sh index 676a8e6..f2495ac 100755 --- a/integrations/antigravity/install.sh +++ b/integrations/antigravity/install.sh @@ -38,10 +38,8 @@ if [ -f "$MCP_MARKER" ]; then MCP_PYTHON="$(cat "$MCP_MARKER")" fi -echo "" -echo "Done." -echo " Drop sources into ~/link/raw/ and say 'ingest' to process them." -echo " View wiki: python ~/link/serve.py" echo "" echo " MCP: add to ~/.gemini/settings.json:" echo " { \"mcpServers\": { \"link\": { \"command\": \"$MCP_PYTHON\", \"args\": [\"-m\", \"link_mcp\", \"--wiki\", \"$WIKI_PATH\"] } } }" + +link_print_next_steps "$MODE" diff --git a/integrations/antigravity/uninstall.sh b/integrations/antigravity/uninstall.sh index e570dca..a867655 100755 --- a/integrations/antigravity/uninstall.sh +++ b/integrations/antigravity/uninstall.sh @@ -15,7 +15,7 @@ if [ ! -f "$TARGET" ]; then echo "No $TARGET found"; exit 0; fi python3 -c " import re, os text = open('$TARGET').read() -cleaned = re.sub(r'\n*## Link — Personal Knowledge Wiki\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() +cleaned = re.sub(r'\n*## Link — (?:Local Agent Memory|Personal Knowledge Wiki)\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() if cleaned: open('$TARGET', 'w').write(cleaned + '\n') print('Link section removed from $TARGET') diff --git a/integrations/claude-code/install.sh b/integrations/claude-code/install.sh index 23eabe8..9aafed6 100755 --- a/integrations/claude-code/install.sh +++ b/integrations/claude-code/install.sh @@ -68,7 +68,4 @@ else echo " { \"mcpServers\": { \"link\": { \"command\": \"$MCP_PYTHON\", \"args\": [\"-m\", \"link_mcp\", \"--wiki\", \"$WIKI_PATH\"] } } }" fi -echo "" -echo "Done." -echo " Drop sources into ~/link/raw/ and say 'ingest' to process them." -echo " View wiki: python ~/link/serve.py" +link_print_next_steps "$MODE" diff --git a/integrations/claude-code/uninstall.sh b/integrations/claude-code/uninstall.sh index 4ea0438..8d62ed1 100755 --- a/integrations/claude-code/uninstall.sh +++ b/integrations/claude-code/uninstall.sh @@ -15,7 +15,7 @@ if [ ! -f "$TARGET" ]; then echo "No $TARGET found"; exit 0; fi python3 -c " import re, os text = open('$TARGET').read() -cleaned = re.sub(r'\n*## Link — Personal Knowledge Wiki\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() +cleaned = re.sub(r'\n*## Link — (?:Local Agent Memory|Personal Knowledge Wiki)\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() if cleaned: open('$TARGET', 'w').write(cleaned + '\n') print('Link section removed from $TARGET') diff --git a/integrations/codex/install.sh b/integrations/codex/install.sh index 658b78b..22645c5 100755 --- a/integrations/codex/install.sh +++ b/integrations/codex/install.sh @@ -40,12 +40,6 @@ if [ -f "$MCP_MARKER" ]; then MCP_PYTHON="$(cat "$MCP_MARKER")" fi -echo "" -echo "Done." -echo " Drop sources into ~/link/raw/ and say 'ingest' to process them." -echo " View wiki: python ~/link/serve.py" -echo "" - # Auto-register MCP in ~/.codex/config.toml CODEX_CONFIG="$HOME/.codex/config.toml" if [ -f "$CODEX_CONFIG" ]; then @@ -78,3 +72,5 @@ elif [ ! -f "$CODEX_CONFIG" ]; then echo " command = \"$MCP_PYTHON\"" echo " args = [\"-m\", \"link_mcp\", \"--wiki\", \"$WIKI_PATH\"]" fi + +link_print_next_steps "$MODE" diff --git a/integrations/codex/uninstall.sh b/integrations/codex/uninstall.sh index 0d0b8a5..7b51baf 100755 --- a/integrations/codex/uninstall.sh +++ b/integrations/codex/uninstall.sh @@ -15,7 +15,7 @@ if [ ! -f "$TARGET" ]; then echo "No $TARGET found"; exit 0; fi python3 -c " import re, os text = open('$TARGET').read() -cleaned = re.sub(r'\n*## Link — Personal Knowledge Wiki\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() +cleaned = re.sub(r'\n*## Link — (?:Local Agent Memory|Personal Knowledge Wiki)\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() if cleaned: open('$TARGET', 'w').write(cleaned + '\n') print('Link section removed from $TARGET') diff --git a/integrations/copilot/install.sh b/integrations/copilot/install.sh index 4a6a847..076788f 100755 --- a/integrations/copilot/install.sh +++ b/integrations/copilot/install.sh @@ -35,10 +35,8 @@ if [ -f "$MCP_MARKER" ]; then MCP_PYTHON="$(cat "$MCP_MARKER")" fi -echo "" -echo "Done." -echo " Drop sources into raw/ and say 'ingest' to process them." -echo " View wiki: python ~/link/serve.py" echo "" echo " MCP: add to your Copilot MCP config:" echo " { \"mcpServers\": { \"link\": { \"command\": \"$MCP_PYTHON\", \"args\": [\"-m\", \"link_mcp\", \"--wiki\", \"$WIKI_PATH\"] } } }" + +link_print_next_steps "$MODE" diff --git a/integrations/copilot/uninstall.sh b/integrations/copilot/uninstall.sh index 9fd5815..2f7bbad 100755 --- a/integrations/copilot/uninstall.sh +++ b/integrations/copilot/uninstall.sh @@ -8,7 +8,7 @@ if [ ! -f "$TARGET" ]; then echo "No $TARGET found"; exit 0; fi python3 -c " import re, os text = open('$TARGET').read() -cleaned = re.sub(r'\n*## Link — Personal Knowledge Wiki\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() +cleaned = re.sub(r'\n*## Link — (?:Local Agent Memory|Personal Knowledge Wiki)\n.*?(?=\n## |\Z)', '', text, flags=re.DOTALL).rstrip() if cleaned: open('$TARGET', 'w').write(cleaned + '\n') print('Link section removed from $TARGET') diff --git a/integrations/cursor/install.sh b/integrations/cursor/install.sh index 057c504..b9772c7 100755 --- a/integrations/cursor/install.sh +++ b/integrations/cursor/install.sh @@ -9,6 +9,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" MODE="${1:---global}" +. "$SCRIPT_DIR/../_shared/instructions.sh" if [ "$MODE" = "--global" ]; then INSTRUCTIONS=$(cat "$SCRIPT_DIR/../_shared/link-instructions.md") @@ -76,7 +77,4 @@ elif [ ! -f "$MCP_CONFIG" ]; then echo " { \"mcpServers\": { \"link\": { \"command\": \"$MCP_PYTHON\", \"args\": [\"-m\", \"link_mcp\", \"--wiki\", \"$WIKI_PATH\"] } } }" fi -echo "" -echo "Done." -echo " Drop sources into ~/link/raw/ and say 'ingest' to process them." -echo " View wiki: python ~/link/serve.py" +link_print_next_steps "$MODE" diff --git a/integrations/kiro/install.sh b/integrations/kiro/install.sh index d641f78..fe09536 100755 --- a/integrations/kiro/install.sh +++ b/integrations/kiro/install.sh @@ -11,6 +11,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" MODE="${1:---global}" +. "$SCRIPT_DIR/../_shared/instructions.sh" if [ "$MODE" = "--global" ]; then INSTRUCTIONS=$(cat "$SCRIPT_DIR/../_shared/link-instructions.md") @@ -54,10 +55,7 @@ except Exception as e: PYEOF fi - echo "" - echo "Done." - echo " Drop sources into ~/link/raw/ and say 'ingest' to process them." - echo " View wiki: python ~/link/serve.py" + link_print_next_steps "$MODE" elif [ "$MODE" = "--project" ]; then INSTRUCTIONS=$(cat "$SCRIPT_DIR/../_shared/link-instructions-project.md") @@ -68,8 +66,7 @@ elif [ "$MODE" = "--project" ]; then echo "Link steering → $TARGET" bash "$SCRIPT_DIR/../_shared/scaffold.sh" --project - echo "" - echo "Done. Drop sources into raw/ and say 'ingest' to process them." + link_print_next_steps "$MODE" else echo "Usage: bash install.sh [--project]" exit 1 diff --git a/integrations/vscode/install.sh b/integrations/vscode/install.sh index 11590f7..0823504 100755 --- a/integrations/vscode/install.sh +++ b/integrations/vscode/install.sh @@ -9,6 +9,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" MODE="${1:---global}" +. "$SCRIPT_DIR/../_shared/instructions.sh" TARGET=".vscode/settings.json" mkdir -p .vscode @@ -38,7 +39,8 @@ if not isinstance(instructions, list): instructions = [] instructions = [ i for i in instructions - if '## Link — Personal Knowledge Wiki' not in i.get('text', '') + if '## Link — Local Agent Memory' not in i.get('text', '') + and '## Link — Personal Knowledge Wiki' not in i.get('text', '') and 'Link, an LLM-maintained knowledge wiki' not in i.get('text', '') ] instructions.append({'text': instructions_text}) @@ -60,10 +62,8 @@ if [ -f "$MCP_MARKER" ]; then MCP_PYTHON="$(cat "$MCP_MARKER")" fi -echo "" -echo "Done." -echo " Drop sources into raw/ and say 'ingest' to process them." -echo " View wiki: python ~/link/serve.py" echo "" echo " MCP: add to .vscode/mcp.json:" echo " { \"servers\": { \"link\": { \"type\": \"stdio\", \"command\": \"$MCP_PYTHON\", \"args\": [\"-m\", \"link_mcp\", \"--wiki\", \"$WIKI_PATH\"] } } }" + +link_print_next_steps "$MODE" diff --git a/integrations/vscode/uninstall.sh b/integrations/vscode/uninstall.sh index 56ff332..9f4f708 100755 --- a/integrations/vscode/uninstall.sh +++ b/integrations/vscode/uninstall.sh @@ -11,7 +11,8 @@ settings = json.load(open('$TARGET')) instructions = settings.get('github.copilot.chat.codeGeneration.instructions', []) filtered = [ i for i in instructions - if '## Link — Personal Knowledge Wiki' not in i.get('text', '') + if '## Link — Local Agent Memory' not in i.get('text', '') + and '## Link — Personal Knowledge Wiki' not in i.get('text', '') and 'Link, an LLM-maintained knowledge wiki' not in i.get('text', '') ] if len(filtered) < len(instructions): diff --git a/link.py b/link.py index 300ef5c..0c02d6e 100644 --- a/link.py +++ b/link.py @@ -2,9 +2,34 @@ """Small Link command runner. Usage: + python link.py init [target] + python link.py serve [target] python link.py demo [target] + python link.py prompts [target] + python link.py status [target] + python link.py backup [target] python link.py doctor [target] + python link.py migrate [target] + python link.py validate [target] python link.py ingest-status [target] + python link.py remember "memory text" [target] + python link.py propose-memories [target] + python link.py capture-inbox [target] + python link.py update-memory "new memory text" [target] + python link.py query "task or question" [target] + python link.py graph-summary ["topic"] [target] + python link.py benchmark ["query"] [target] + python link.py brief ["task or question"] [target] + python link.py recall "query" [target] + python link.py profile [target] + python link.py memory-audit [target] + python link.py archive-memory [target] + python link.py restore-memory [target] + python link.py forget-memory [target] --confirm + python link.py memory-inbox [target] + python link.py review-memory [target] + python link.py explain-memory [target] + python link.py rebuild-index [target] python link.py rebuild-backlinks [target] python link.py verify-mcp [target] """ @@ -13,17 +38,19 @@ import argparse import fnmatch import json +import os import re +import shlex import shutil import subprocess import sys +import time from pathlib import Path -from typing import Callable +from typing import Callable, Mapping ROOT = Path(__file__).resolve().parent DEFAULT_DEMO_DIR = "link-demo" -DEMO_MARKER = ".link-demo" WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") SECRET_NAME_PATTERNS = ( ".env", @@ -44,19 +71,9 @@ "id_ed25519", "service-account*.json", ) -SECRET_VALUE_PATTERNS = ( - ("Anthropic API key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}\b")), - ("OpenAI API key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b")), - ("GitHub token", re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b")), - ("AWS access key", re.compile(r"\bA[SK]IA[0-9A-Z]{16}\b")), - ("PyPI token", re.compile(r"\bpypi-[A-Za-z0-9_-]{20,}\b")), - ("Google API key", re.compile(r"\bAIza[0-9A-Za-z_-]{35}\b")), - ("Slack token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{20,}\b")), - ("Stripe live secret key", re.compile(r"\bsk_live_[A-Za-z0-9]{20,}\b")), - ("Private key block", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")), -) SKIP_SCAN_DIRS = { ".git", + ".link-backups", "__pycache__", ".pytest_cache", ".ruff_cache", @@ -81,598 +98,437 @@ ".whl", ".zip", } +MEMORY_TYPES = ("preference", "decision", "project", "fact", "note") +MEMORY_SCOPES = ("user", "project", "global") + +_BUNDLED_CORE = ROOT / "mcp_package" +if (_BUNDLED_CORE / "link_core").exists(): + sys.path.insert(0, str(_BUNDLED_CORE)) + +from link_core.memory import ( + add_capture_review_to_brief as _core_add_capture_review_to_brief, + count_values as _core_count_values, + default_project_for_target as _core_default_project_for_target, + forget_memory_page as _core_forget_memory_page, + mark_memory_reviewed as _core_mark_memory_reviewed, + memory_brief as _core_memory_brief, + memory_explanation as _core_memory_explanation, + memory_inbox as _core_memory_inbox, + memory_profile as _core_memory_profile, + memory_audit_report as _core_memory_audit_report, + memory_records as _core_memory_records, + memory_review_issues as _core_memory_review_issues, + propose_memories_from_text as _core_propose_memories_from_text, + recall_memories as _core_recall_memories, + recent_memories as _core_recent_memories, + resolve_memory_page as _core_resolve_memory_page, + set_memory_status as _core_set_memory_status, + top_tags as _core_top_tags, + update_memory_page as _core_update_memory_page, + write_memory_page as _core_write_memory_page, +) +from link_core.backup import ( + BackupError as _CoreBackupError, + create_backup as _core_create_backup, + list_backups as _core_list_backups, +) +from link_core.benchmark import ( + benchmark_health as _core_benchmark_health, +) +from link_core.demo import ( + DEMO_FILES, + DEMO_MARKER, +) +from link_core.capture import ( + capture_filename as _core_capture_filename, + capture_inbox as _core_capture_inbox, + capture_notes_from_markdown as _core_capture_notes_from_markdown, + capture_records as _core_capture_records, + capture_review_summary as _core_capture_review_summary, + capture_title as _core_capture_title, + cli_capture_commands as _core_cli_capture_commands, + resolve_capture_file as _core_resolve_capture_file, +) +from link_core.files import ( + atomic_write_json as _core_atomic_write_json, + atomic_write_text as _core_atomic_write_text, +) +from link_core.frontmatter import ( + frontmatter_string as _frontmatter_string, + parse_frontmatter as _parse_frontmatter, +) +from link_core.ingest import ( + collect_ingest_status as _core_collect_ingest_status, + normalize_link_index as _core_normalize_link_index, +) +from link_core.log import ( + append_log as _core_append_log, + utc_timestamp as _core_utc_timestamp, + write_default_log as _core_write_default_log, +) +from link_core.schema import ( + migrate_wiki as _core_migrate_wiki, + schema_status as _core_schema_status, +) +from link_core.security import ( + clean_text_input as _clean_text_input, + redact_secret_values as _redact_secret_values, + secret_value_warnings as _secret_value_warnings, +) +from link_core.query import ( + query_link as _core_query_link, +) +from link_core.prompts import ( + starter_prompt_payload as _core_starter_prompt_payload, +) +from link_core.web_graph import ( + GRAPH_INITIAL_SUMMARY_EDGE_LIMIT as _core_graph_initial_summary_edge_limit, + GRAPH_INITIAL_SUMMARY_NODE_LIMIT as _core_graph_initial_summary_node_limit, + graph_initial_payload as _core_graph_initial_payload, + graph_needs_bounded_overview as _core_graph_needs_bounded_overview, +) +from link_core.validation import ( + validate_wiki as _core_validate_wiki, +) +from link_core.version import ( + LINK_VERSION, +) +from link_core.status import ( + link_status as _core_link_status, +) +from link_core.wiki import ( + build_backlinks as _core_build_backlinks, + build_wiki_cache as _core_build_wiki_cache, + close_wiki_cache as _core_close_wiki_cache, + graph_data as _core_graph_data, + graph_summary as _core_graph_summary, + list_pages as _core_list_pages, + rebuild_index as _core_rebuild_index, + search_pages as _core_search_pages, +) +del _BUNDLED_CORE -DEMO_FILES: dict[str, str] = { - "raw/agent-memory-session.md": """--- -title: "Agent memory session" -source_type: demo-note -date_captured: 2026-05-02 -author: Link demo -tags: [agents, memory, local-first] ---- - -# Agent memory session - -An AI coding agent keeps losing project context between sessions. The team wants durable memory that is local, inspectable, and easy to cite. - -Key decisions: - -- Keep raw source notes immutable. -- Compile sources into durable wiki pages. -- Use [[agent-memory]] as the interface between past work and future agents. -- Prefer [[local-first-software]] so the knowledge base stays under user control. -- Expose context through MCP so agents can retrieve graph neighborhoods instead of reading every file. -""", - "raw/transformer-reading-notes.md": """--- -title: "Transformer reading notes" -source_type: demo-note -date_captured: 2026-05-02 -author: Link demo -tags: [ai, transformers, retrieval] ---- - -# Transformer reading notes - -Transformers made long-context sequence modeling practical by replacing recurrence with attention. Modern LLM systems often pair transformer models with external retrieval. - -Connections: - -- [[transformers]] provide the model architecture. -- [[retrieval-augmented-generation]] provides fresh or private context. -- [[agent-memory]] gives agents persistent project knowledge outside a single chat. -""", - "raw/local-release-notes.md": """--- -title: "Local release notes" -source_type: demo-note -date_captured: 2026-05-02 -author: Link demo -tags: [release, graph, mcp] ---- - -# Local release notes - -The product team ships a local wiki viewer, MCP server, and graph view. The release focuses on making agent memory visible and auditable. - -Notable changes: - -- [[link]] exposes search, context, backlinks, and graph tools. -- [[knowledge-graph]] shows concepts, sources, and entities as connected pages. -- [[local-first-software]] keeps the source material on disk. -""", - "wiki/sources/agent-memory-session.md": """--- -type: source -title: "Agent memory session" -author: "Link demo" -date_published: "2026-05-02" -date_ingested: "2026-05-02" -source_url: "local demo note" -tags: [agents, memory, local-first] -confidence: high -aliases: ["memory demo note"] ---- - -# Agent memory session - -> **TLDR:** A demo note about turning local project sources into durable context for future agents. - -## Summary - -The source describes an AI coding workflow where an agent repeatedly loses project context between sessions. It proposes raw source notes, compiled wiki pages, and MCP retrieval as a durable memory layer. *Source: [[agent-memory-session]]* `[confidence: high]` - -The note emphasizes local control and inspectability. Raw sources stay immutable, while generated wiki pages become the maintained knowledge layer. *Source: [[agent-memory-session]]* `[confidence: high]` - -## Key Claims - -- **Agent memory should be durable** so future sessions can recover project context. `[confidence: high]` -- **Raw notes should remain immutable** while wiki pages evolve. `[confidence: high]` -- **MCP makes memory agent-readable** through structured tools instead of ad hoc file scans. `[confidence: high]` - -## Connections - -- Defines a need for [[agent-memory]]. -- Supports [[local-first-software]] as the storage model. -- Connects [[link]] to agent workflows through MCP. - -## Raw Source - -`raw/agent-memory-session.md` -""", - "wiki/sources/transformer-reading-notes.md": """--- -type: source -title: "Transformer reading notes" -author: "Link demo" -date_published: "2026-05-02" -date_ingested: "2026-05-02" -source_url: "local demo note" -tags: [ai, transformers, retrieval] -confidence: high -aliases: ["transformer demo note"] ---- - -# Transformer reading notes - -> **TLDR:** A demo note linking transformers, retrieval, and persistent agent memory. - -## Summary - -The source frames [[transformers]] as the architecture behind modern LLM systems and connects them to external retrieval. It treats retrieval and memory as practical complements to model context. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -The note links [[retrieval-augmented-generation]] to [[agent-memory]] because both bring outside context into model workflows. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## Key Claims - -- **Transformers replaced recurrence with attention** for sequence modeling. `[confidence: high]` -- **External retrieval complements LLM context** when information is fresh, private, or project-specific. `[confidence: high]` -- **Persistent agent memory stores knowledge outside one chat session.** `[confidence: high]` - -## Connections - -- Explains why [[transformers]] matter to LLM systems. -- Connects [[retrieval-augmented-generation]] to persistent context. -- Supports [[agent-memory]] as a local retrieval layer. - -## Raw Source - -`raw/transformer-reading-notes.md` -""", - "wiki/sources/local-release-notes.md": """--- -type: source -title: "Local release notes" -author: "Link demo" -date_published: "2026-05-02" -date_ingested: "2026-05-02" -source_url: "local demo note" -tags: [release, graph, mcp] -confidence: high -aliases: ["demo release note"] ---- - -# Local release notes - -> **TLDR:** A demo release note showing Link as a local wiki viewer, graph, and MCP memory server. - -## Summary - -The source describes a release centered on making agent memory visible and auditable. It identifies a local wiki viewer, MCP server, and graph view as the main product surfaces. *Source: [[local-release-notes]]* `[confidence: high]` - -The note connects [[link]] with [[knowledge-graph]] and [[local-first-software]], showing how local markdown can become both a human-readable wiki and agent-readable memory. *Source: [[local-release-notes]]* `[confidence: high]` - -## Key Claims - -- **Link exposes search, context, backlinks, and graph tools.** `[confidence: high]` -- **Graph views make relationships inspectable.** `[confidence: high]` -- **Local-first storage keeps source material under user control.** `[confidence: high]` - -## Connections - -- Describes [[link]] product surfaces. -- Connects [[knowledge-graph]] to visible agent memory. -- Supports [[local-first-software]] as the privacy model. - -## Raw Source - -`raw/local-release-notes.md` -""", - "wiki/concepts/agent-memory.md": """--- -type: concept -title: "Agent memory" -aliases: ["AI memory", "agent context", "durable context"] -date_created: "2026-05-02" -date_updated: "2026-05-02" -source_count: 2 -tags: [agents, memory, mcp] -maturity: growing ---- - -# Agent memory - -> **TLDR:** Agent memory is durable, inspectable context that lets AI agents recover prior project knowledge across sessions. - -## Overview - -Agent memory addresses a common failure mode in AI workflows: each new session starts without the full project history. In Link, memory is stored as markdown wiki pages compiled from immutable raw sources. *Source: [[agent-memory-session]]* `[confidence: high]` - -This memory is useful because agents can query a focused topic and receive the primary page plus related graph context. That is more efficient than reading every source file. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## How It Works - -1. A user drops source material into `raw/`. -2. An agent compiles durable pages into `wiki/`. -3. Link builds search indexes and backlinks. -4. MCP tools return focused graph context to future agents. - -## Key Facts - -- **Agent memory should be durable** so future sessions can recover project context. *Source: [[agent-memory-session]]* `[confidence: high]` -- **MCP makes memory agent-readable** through structured tools. *Source: [[agent-memory-session]]* `[confidence: high]` -- **Persistent memory complements LLM context windows** by storing knowledge outside a single chat. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## Open Questions - -- Which memories should be promoted from raw notes into stable wiki pages? -- How should agents detect stale project decisions? - -## Related - -- [[link]] - provides the local wiki and MCP layer. -- [[retrieval-augmented-generation]] - retrieves external context for model workflows. -- [[local-first-software]] - keeps memory under user control. - -## Sources - -- [[agent-memory-session]] -- [[transformer-reading-notes]] -""", - "wiki/concepts/retrieval-augmented-generation.md": """--- -type: concept -title: "Retrieval-augmented generation" -aliases: ["RAG", "retrieval augmented generation"] -date_created: "2026-05-02" -date_updated: "2026-05-02" -source_count: 1 -tags: [ai, retrieval, context] -maturity: seed ---- - -# Retrieval-augmented generation - -> **TLDR:** Retrieval-augmented generation brings external context into model workflows before generation. - -## Overview - -Retrieval-augmented generation pairs a model with a retrieval layer. Instead of relying only on model weights or the current chat, a system fetches relevant external context first. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -In Link, the retrieval layer is a local markdown wiki exposed through search, context, backlinks, and graph tools. This makes [[agent-memory]] inspectable instead of hidden in a proprietary store. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## Key Facts - -- **External retrieval complements LLM context** when information is fresh, private, or project-specific. *Source: [[transformer-reading-notes]]* `[confidence: high]` -- **Persistent memory can be modeled as retrieval** over durable local pages. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## Related - -- [[agent-memory]] - a local memory use case for retrieval. -- [[transformers]] - the model architecture that often consumes retrieved context. -- [[link]] - provides the local retrieval surface. - -## Sources - -- [[transformer-reading-notes]] -""", - "wiki/concepts/transformers.md": """--- -type: concept -title: "Transformers" -aliases: ["transformer architecture", "LLM architecture"] -date_created: "2026-05-02" -date_updated: "2026-05-02" -source_count: 1 -tags: [ai, models, attention] -maturity: seed ---- - -# Transformers - -> **TLDR:** Transformers are neural architectures that use attention to model relationships across sequences. - -## Overview - -Transformers are presented in the demo source as the architecture behind many modern LLM systems. They made long-context sequence modeling practical by replacing recurrence with attention. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -The source connects transformers to [[retrieval-augmented-generation]] because modern LLM workflows often combine model context with retrieved project or domain knowledge. *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## Key Facts - -- **Transformers use attention for sequence modeling.** *Source: [[transformer-reading-notes]]* `[confidence: high]` -- **Transformer systems often benefit from retrieved context.** *Source: [[transformer-reading-notes]]* `[confidence: high]` - -## Related - -- [[retrieval-augmented-generation]] - supplies outside context to model workflows. -- [[agent-memory]] - stores project context for future sessions. - -## Sources - -- [[transformer-reading-notes]] -""", - "wiki/concepts/local-first-software.md": """--- -type: concept -title: "Local-first software" -aliases: ["local first", "local-first"] -date_created: "2026-05-02" -date_updated: "2026-05-02" -source_count: 2 -tags: [privacy, storage, software] -maturity: growing ---- - -# Local-first software - -> **TLDR:** Local-first software keeps user data on disk in formats the user can inspect, back up, and move. - -## Overview -Local-first software is a product design choice where the user's data remains directly accessible on their machine. In the demo sources, this matters because [[agent-memory]] can contain project decisions and source notes. *Source: [[agent-memory-session]]* `[confidence: high]` +def _build_backlinks(wiki_dir: Path) -> dict[str, dict[str, list[str]]]: + return _core_build_backlinks(wiki_dir, body_only=False) -Link follows this model by storing raw sources and wiki pages as markdown files. The graph and MCP server read those files rather than sending them to a hosted backend. *Source: [[local-release-notes]]* `[confidence: high]` -## Key Facts +def _wiki_page_records(wiki_dir: Path) -> list[dict[str, object]]: + records: list[dict[str, object]] = [] + for md in _wiki_pages(wiki_dir): + text = md.read_text(encoding="utf-8", errors="replace") + meta, body = _parse_frontmatter(text) + records.append({ + "path": md, + "rel": str(md.relative_to(wiki_dir)), + "stem": md.stem.lower(), + "meta": meta, + "body": body, + }) + return records -- **Raw notes stay immutable** while generated wiki pages evolve. *Source: [[agent-memory-session]]* `[confidence: high]` -- **Local markdown keeps memory inspectable.** *Source: [[local-release-notes]]* `[confidence: high]` -## Related +def _wiki_pages(wiki_dir: Path) -> list[Path]: + return sorted( + md for md in wiki_dir.rglob("*.md") + if not md.name.startswith(".") + ) -- [[link]] - implements local-first agent memory. -- [[agent-memory]] - benefits from local, inspectable storage. -- [[knowledge-graph]] - visualizes local wiki relationships. -## Sources +def _page_stems(wiki_dir: Path) -> set[str]: + return {md.stem.lower() for md in _wiki_pages(wiki_dir)} -- [[agent-memory-session]] -- [[local-release-notes]] -""", - "wiki/concepts/knowledge-graph.md": """--- -type: concept -title: "Knowledge graph" -aliases: ["graph view", "wiki graph"] -date_created: "2026-05-02" -date_updated: "2026-05-02" -source_count: 1 -tags: [graph, wiki, visualization] -maturity: seed ---- -# Knowledge graph +def _load_backlinks(path: Path) -> tuple[dict[str, dict[str, list[str]]] | None, str | None]: + if not path.exists(): + return None, "missing wiki/_backlinks.json" + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except Exception as exc: + return None, f"invalid wiki/_backlinks.json: {exc}" + if "backlinks" in raw or "forward" in raw: + backlinks = raw.get("backlinks", {}) + forward = raw.get("forward", {}) + else: + backlinks = raw + forward = {} + if not isinstance(backlinks, dict) or not isinstance(forward, dict): + return None, "wiki/_backlinks.json must contain object maps" + return {"backlinks": backlinks, "forward": forward}, None -> **TLDR:** A knowledge graph shows wiki pages as nodes and wikilinks as relationships. -## Overview +def _resolve_wiki_dir(target: Path) -> Path: + target = target.expanduser().resolve() + if target.name == "wiki" and (target / "index.md").exists(): + return target + return target / "wiki" -In Link, the knowledge graph makes relationships between sources, concepts, and entities visible. This helps users inspect what an agent has connected and where a claim came from. *Source: [[local-release-notes]]* `[confidence: high]` -The graph supports the same mental model as MCP context retrieval: a topic is not isolated, it lives in a neighborhood of related pages. *Source: [[local-release-notes]]* `[confidence: high]` +def _resolve_link_root(target: Path) -> Path: + target = target.expanduser().resolve() + if target.name == "wiki" and (target / "index.md").exists(): + return target.parent + return target -## Key Facts -- **Graph views make relationships inspectable.** *Source: [[local-release-notes]]* `[confidence: high]` -- **Wikilinks provide the graph edges.** *Source: [[local-release-notes]]* `[confidence: high]` +def _default_project(target: Path) -> str: + return _core_default_project_for_target(target) -## Related -- [[link]] - renders the graph. -- [[agent-memory]] - uses graph context to recover related knowledge. -- [[local-first-software]] - keeps graph data in markdown files. +def _utc_timestamp() -> str: + return _core_utc_timestamp() -## Sources -- [[local-release-notes]] -""", - "wiki/entities/link.md": """--- -type: entity -title: "Link" -entity_type: project -aliases: ["Link wiki", "Link MCP"] -date_created: "2026-05-02" -date_updated: "2026-05-02" -tags: [wiki, mcp, agents, local-first] -source_count: 2 -maturity: growing ---- +def _memory_records(wiki_dir: Path) -> list[dict[str, object]]: + return _core_memory_records(wiki_dir) -# Link -> **TLDR:** Link is a local-first wiki and MCP server that turns source notes into durable memory for AI agents. +def _memory_review_issues(record: dict[str, object]) -> list[dict[str, str]]: + return _core_memory_review_issues(record, review_command="review-memory") -## Overview -Link stores source material in `raw/` and compiled wiki pages in `wiki/`. The web viewer makes the wiki readable by humans, while MCP tools make the same knowledge readable by agents. *Source: [[local-release-notes]]* `[confidence: high]` +def _memory_inbox( + wiki_dir: Path, + limit: int = 20, + include_archived: bool = False, + project: str | None = None, +) -> dict[str, object]: + return _core_memory_inbox( + _memory_records(wiki_dir), + limit=limit, + include_archived=include_archived, + review_command="review-memory", + project=project, + ) -The demo positions Link as a local [[agent-memory]] layer. It keeps knowledge inspectable through markdown and navigable through a [[knowledge-graph]]. *Source: [[agent-memory-session]]* `[confidence: high]` -## Key Contributions +def _memory_explanation(wiki_dir: Path, identifier: str) -> dict[str, object]: + return _core_memory_explanation( + wiki_dir, + identifier, + records=_memory_records(wiki_dir), + review_command="review-memory", + backlinks_body_only=False, + ) -- Provides search, context, backlinks, and graph tools. *Source: [[local-release-notes]]* `[confidence: high]` -- Keeps source material local and inspectable. *Source: [[local-release-notes]]* `[confidence: high]` -- Gives future agents durable project context. *Source: [[agent-memory-session]]* `[confidence: high]` -## Connections +def _count_values(records: list[dict[str, object]], field: str) -> dict[str, int]: + return _core_count_values(records, field) -- Implements [[agent-memory]]. -- Uses [[local-first-software]] as the storage model. -- Exposes a [[knowledge-graph]] for human inspection. -- Supports [[retrieval-augmented-generation]] workflows through MCP. -## Sources +def _top_tags(records: list[dict[str, object]], limit: int = 12) -> list[dict[str, object]]: + return _core_top_tags(records, limit=limit) -- [[agent-memory-session]] -- [[local-release-notes]] -""", - "wiki/explorations/why-link-helps-agents.md": """--- -type: exploration -title: "Why Link helps agents" -date_created: "2026-05-02" -query: "Why does Link help AI agents?" -aliases: ["agent memory demo answer"] -tags: [agents, memory, demo] ---- -# Why Link helps agents +def _recent_memories(records: list[dict[str, object]]) -> list[dict[str, object]]: + return _core_recent_memories(records) -> **Query:** Why does Link help AI agents? -## Answer +def _memory_profile(wiki_dir: Path, limit: int = 10, project: str | None = None) -> dict[str, object]: + return _core_memory_profile( + _memory_records(wiki_dir), + limit=limit, + review_command="review-memory", + project=project, + ) -Link helps agents because it turns past project material into durable, queryable context. Instead of starting each session from a blank chat, an agent can ask for [[agent-memory]] and receive the main page plus related concepts, sources, and entities. -The important part is inspectability. The memory is just markdown, the relationships are just wikilinks, and the graph shows what the agent can retrieve. This fits [[local-first-software]] and makes the memory easier to audit. +def _memory_brief(wiki_dir: Path, query: str = "", limit: int = 6, project: str | None = None) -> dict[str, object]: + return _core_memory_brief( + _memory_records(wiki_dir), + query=query, + limit=limit, + review_command="review-memory", + project=project, + ) -## Reasoning -The answer combines [[agent-memory-session]], [[transformer-reading-notes]], and [[local-release-notes]]. Together they show Link as a local retrieval layer for AI workflows: sources become pages, pages form a [[knowledge-graph]], and MCP exposes that graph to agents. +def _query_link(wiki_dir: Path, query: str, budget: str = "medium", project: str | None = None) -> dict[str, object]: + cache = _core_build_wiki_cache(wiki_dir) + try: + return _core_query_link( + wiki_dir, + query, + cache, + _memory_records(wiki_dir), + budget=budget, + project=project, + review_command="review-memory", + ) + finally: + _core_close_wiki_cache(cache) + + +def _recall_memories( + wiki_dir: Path, + query: str, + limit: int = 10, + include_archived: bool = False, + project: str | None = None, +) -> list[dict[str, object]]: + return _core_recall_memories( + _memory_records(wiki_dir), + query, + limit=limit, + include_archived=include_archived, + project=project, + ) -## Sources Consulted -- [[agent-memory]] -- [[link]] -- [[knowledge-graph]] -- [[retrieval-augmented-generation]] -""", - "wiki/index.md": """# Link Demo Wiki Index +def _propose_memories_from_text( + wiki_dir: Path, + text: str, + source: str = "inline", + limit: int = 10, + project: str | None = None, +) -> dict[str, object]: + return _core_propose_memories_from_text( + text, + _memory_records(wiki_dir), + source=source, + limit=limit, + writes_memory=False, + project=project, + ) -> Last updated: 2026-05-02 | 10 pages | 3 sources -## Categories +def _append_log(wiki_dir: Path, timestamp: str, operation: str, description: str, lines: list[str]) -> None: + _core_append_log(wiki_dir, timestamp, operation, description, lines) -### concepts -- [[agent-memory]] - Durable, inspectable context for AI agents. growing - 2 sources - also: AI memory, agent context -- [[retrieval-augmented-generation]] - Retrieves external context before generation. seed - 1 source - also: RAG -- [[transformers]] - Attention-based model architecture behind modern LLM systems. seed - 1 source -- [[local-first-software]] - Keeps user data on disk in inspectable formats. growing - 2 sources -- [[knowledge-graph]] - Shows pages as nodes and wikilinks as edges. seed - 1 source -### entities -- [[link]] - Local-first wiki and MCP memory server for agents. growing - 2 sources - also: Link MCP +def _resolve_memory_page(wiki_dir: Path, identifier: str) -> tuple[Path | None, dict[str, object] | None, str | None]: + return _core_resolve_memory_page(wiki_dir, identifier, records=_memory_records(wiki_dir)) -### sources -- [[agent-memory-session]] - Demo note on durable project context. high -- [[transformer-reading-notes]] - Demo note connecting transformers, retrieval, and memory. high -- [[local-release-notes]] - Demo note on Link surfaces and graph visibility. high -### explorations -- [[why-link-helps-agents]] - Filed answer explaining Link as durable agent memory. +def _memory_runtime(target: Path) -> tuple[Path, list[dict[str, object]]]: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + raise FileNotFoundError(f"missing wiki directory: {wiki_dir}") + return wiki_dir, _memory_records(wiki_dir) -## Recent -| Date | Operation | Pages Touched | -|------|-----------|---------------| -| 2026-05-02 | demo: create first-run sample wiki | 10 pages | -""", - "wiki/log.md": """# Link Demo Wiki Log - -*Append-only record of demo wiki operations.* - ---- - -## [2026-05-02T00:00:00Z] demo | create first-run sample wiki - -- Source: raw/agent-memory-session.md -- Source: raw/transformer-reading-notes.md -- Source: raw/local-release-notes.md -- Created: sources/agent-memory-session.md -- Created: sources/transformer-reading-notes.md -- Created: sources/local-release-notes.md -- Created: concepts/agent-memory.md -- Created: concepts/retrieval-augmented-generation.md -- Created: concepts/transformers.md -- Created: concepts/local-first-software.md -- Created: concepts/knowledge-graph.md -- Created: entities/link.md -- Created: explorations/why-link-helps-agents.md -- Rebuilt: wiki/_backlinks.json -- Pages touched: 10 - ---- -""", -} +def _log_writer_for(wiki_dir: Path) -> Callable[[str, str, str, list[str]], None]: + return lambda ts, operation, description, lines: _append_log( + wiki_dir, + ts, + operation, + description, + lines, + ) -def _build_backlinks(wiki_dir: Path) -> dict[str, dict[str, list[str]]]: - backlinks: dict[str, list[str]] = {} - forward: dict[str, list[str]] = {} - for md in sorted(wiki_dir.rglob("*.md")): - if md.name.startswith("."): - continue - source = md.stem.lower() - text = md.read_text(encoding="utf-8", errors="replace") - for match in WIKILINK_RE.finditer(text): - target = match.group(1).strip().lower() - if not target or target == source: - continue - backlinks.setdefault(target, []) - if source not in backlinks[target]: - backlinks[target].append(source) - forward.setdefault(source, []) - if target not in forward[source]: - forward[source].append(target) - return {"backlinks": backlinks, "forward": forward} - - -def _parse_frontmatter(text: str) -> tuple[dict[str, str], str]: - if not text.startswith("---\n"): - return {}, text - end = text.find("\n---", 4) - if end == -1: - return {}, text - frontmatter = text[4:end] - body = text[end + 4:].lstrip("\n") - meta: dict[str, str] = {} - for line in frontmatter.splitlines(): - if ":" not in line or line.lstrip().startswith("#"): - continue - key, value = line.split(":", 1) - meta[key.strip()] = value.strip().strip("\"'") - return meta, body +def _rebuild_memory_backlinks(wiki_dir: Path) -> bool: + try: + backlinks = _build_backlinks(wiki_dir) + except OSError as exc: + print(f"Could not rebuild backlinks: {exc}", file=sys.stderr) + return False + _core_atomic_write_json(wiki_dir / "_backlinks.json", backlinks) + return True + + +def _memory_mutation_options( + wiki_dir: Path, + records: list[dict[str, object]], + timestamp: str | None, + project: str | None = None, +) -> dict[str, object]: + return { + "timestamp": timestamp or _utc_timestamp(), + "records": records, + "project": project, + "log_writer": _log_writer_for(wiki_dir), + "rebuild_backlinks": lambda: _rebuild_memory_backlinks(wiki_dir), + } -def _wiki_page_records(wiki_dir: Path) -> list[dict[str, object]]: - records: list[dict[str, object]] = [] - for md in _wiki_pages(wiki_dir): - text = md.read_text(encoding="utf-8", errors="replace") - meta, body = _parse_frontmatter(text) - records.append({ - "path": md, - "rel": str(md.relative_to(wiki_dir)), - "stem": md.stem.lower(), - "meta": meta, - "body": body, - }) - return records +def _required_memory_text(text: str, message: str) -> str: + clean_text = text.strip() + if not clean_text: + raise ValueError(message) + return clean_text -def _wiki_pages(wiki_dir: Path) -> list[Path]: - return sorted( - md for md in wiki_dir.rglob("*.md") - if not md.name.startswith(".") +def _set_memory_status( + target: Path, + identifier: str, + status: str, + reason: str | None = None, + timestamp: str | None = None, +) -> dict[str, object]: + wiki_dir, records = _memory_runtime(target) + return _core_set_memory_status( + wiki_dir, + identifier, + status, + reason=reason, + timestamp=timestamp or _utc_timestamp(), + records=records, + log_writer=_log_writer_for(wiki_dir), ) -def _page_stems(wiki_dir: Path) -> set[str]: - return {md.stem.lower() for md in _wiki_pages(wiki_dir)} +def _mark_memory_reviewed( + target: Path, + identifier: str, + note: str | None = None, + timestamp: str | None = None, +) -> dict[str, object]: + wiki_dir, records = _memory_runtime(target) + return _core_mark_memory_reviewed( + wiki_dir, + identifier, + note=note, + timestamp=timestamp or _utc_timestamp(), + records=records, + review_command="review-memory", + log_writer=_log_writer_for(wiki_dir), + ) -def _load_backlinks(path: Path) -> tuple[dict[str, dict[str, list[str]]] | None, str | None]: - if not path.exists(): - return None, "missing wiki/_backlinks.json" - try: - raw = json.loads(path.read_text(encoding="utf-8")) - except Exception as exc: - return None, f"invalid wiki/_backlinks.json: {exc}" - if "backlinks" in raw or "forward" in raw: - backlinks = raw.get("backlinks", {}) - forward = raw.get("forward", {}) - else: - backlinks = raw - forward = {} - if not isinstance(backlinks, dict) or not isinstance(forward, dict): - return None, "wiki/_backlinks.json must contain object maps" - return {"backlinks": backlinks, "forward": forward}, None +def _update_memory_page( + target: Path, + identifier: str, + text: str, + source: str = "manual", + timestamp: str | None = None, + allow_conflict: bool = False, + project: str | None = None, +) -> dict[str, object]: + wiki_dir, records = _memory_runtime(target) + clean_text = _required_memory_text(text, "memory update text required") + options = _memory_mutation_options(wiki_dir, records, timestamp, project) + + return _core_update_memory_page( + wiki_dir, identifier, clean_text, source=source, + review_command="review-memory", allow_conflict=allow_conflict, + **options, + ) -def _resolve_wiki_dir(target: Path) -> Path: - target = target.expanduser().resolve() - if target.name == "wiki" and (target / "index.md").exists(): - return target - return target / "wiki" +def _write_memory_page( + target: Path, text: str, title: str | None = None, + memory_type: str = "note", scope: str = "user", + tags: str | None = None, source: str = "manual", + timestamp: str | None = None, allow_duplicate: bool = False, + allow_conflict: bool = False, project: str | None = None, +) -> dict[str, object]: + wiki_dir, records = _memory_runtime(target) + clean_text = _required_memory_text(text, "memory text required") + options = _memory_mutation_options(wiki_dir, records, timestamp, project) + + return _core_write_memory_page( + wiki_dir, clean_text, title=title, memory_type=memory_type, + scope=scope, tags=tags, source=source, + allow_duplicate=allow_duplicate, allow_conflict=allow_conflict, + **options, + ) def _normalize_link_index(data: dict[str, dict[str, list[str]]]) -> dict[str, dict[str, list[str]]]: - normalized: dict[str, dict[str, list[str]]] = {"backlinks": {}, "forward": {}} - for section in ("backlinks", "forward"): - for key, values in data.get(section, {}).items(): - if isinstance(values, list): - normalized[section][key.lower()] = sorted({str(v).lower() for v in values}) - return normalized + return _core_normalize_link_index(data) def _find_dead_links(wiki_dir: Path) -> list[str]: @@ -698,104 +554,44 @@ def _find_unindexed_pages(wiki_dir: Path) -> list[str]: return sorted(stem for stem in _page_stems(wiki_dir) if stem not in indexed and stem not in roots) -def _find_uningested_raw(target: Path) -> list[str]: +def _raw_ingest_findings(target: Path) -> dict[str, list[str]]: target = target.expanduser().resolve() status = _collect_ingest_status(target) - return [item["raw"].removeprefix("raw/") for item in status["pending_raw"]] - - -def _raw_source_files(raw_dir: Path) -> list[Path]: - if not raw_dir.exists(): - return [] - files: list[Path] = [] - for path in sorted(raw_dir.rglob("*")): - if not path.is_file() or path.name.startswith("."): + pending = status.get("pending_raw") if isinstance(status.get("pending_raw"), list) else [] + findings = { + "new": [], + "stale": [], + "blocked": [], + } + for item in pending: + if not isinstance(item, dict): continue - if any(part in SKIP_SCAN_DIRS for part in path.relative_to(raw_dir).parts): + raw_rel = str(item.get("raw") or "") + if not raw_rel: continue - files.append(path) - return files + if item.get("scan_error") or item.get("secret_warnings"): + findings["blocked"].append(raw_rel) + elif item.get("stale"): + findings["stale"].append(raw_rel) + else: + findings["new"].append(raw_rel) + return {key: sorted(values) for key, values in findings.items()} -def _source_page_texts(wiki_dir: Path) -> dict[str, str]: - sources_dir = wiki_dir / "sources" - if not sources_dir.exists(): - return {} - texts: dict[str, str] = {} - for page in sorted(sources_dir.rglob("*.md")): - if page.name.startswith("."): - continue - texts[page.stem.lower()] = page.read_text(encoding="utf-8", errors="replace") - return texts +def _collect_ingest_status(target: Path) -> dict[str, object]: + return _core_collect_ingest_status(target, skip_dirs=SKIP_SCAN_DIRS) -def _backlinks_health(wiki_dir: Path) -> tuple[str, str]: - current, load_error = _load_backlinks(wiki_dir / "_backlinks.json") - if load_error: - return "missing" if "missing" in load_error else "invalid", load_error - expected = _build_backlinks(wiki_dir) - if current is not None and _normalize_link_index(current) == _normalize_link_index(expected): - return "current", "wiki/_backlinks.json is current" - return "stale", "wiki/_backlinks.json is stale" - - -def _collect_ingest_status(target: Path) -> dict[str, object]: - target = target.expanduser().resolve() - raw_dir = target / "raw" - wiki_dir = target / "wiki" - raw_files = _raw_source_files(raw_dir) - source_texts = _source_page_texts(wiki_dir) - - represented_raw: list[dict[str, object]] = [] - pending_raw: list[dict[str, object]] = [] - for raw_path in raw_files: - rel = raw_path.relative_to(target).as_posix() - matches = [ - source_name - for source_name, source_text in source_texts.items() - if rel in source_text - ] - item = { - "raw": rel, - "size_bytes": raw_path.stat().st_size, - "source_pages": matches, - } - if matches: - represented_raw.append(item) - else: - pending_raw.append(item) - - backlinks_status, backlinks_message = ( - _backlinks_health(wiki_dir) - if wiki_dir.exists() - else ("missing", "missing wiki directory") - ) - - return { - "target": str(target), - "raw_count": len(raw_files), - "source_page_count": len(source_texts), - "represented_count": len(represented_raw), - "pending_count": len(pending_raw), - "represented_raw": represented_raw, - "pending_raw": pending_raw, - "backlinks_status": backlinks_status, - "backlinks_message": backlinks_message, - "has_raw_dir": raw_dir.exists(), - "has_wiki_dir": wiki_dir.exists(), - } - - -def _find_pages_missing_summaries(wiki_dir: Path) -> list[str]: - missing: list[str] = [] - for record in _wiki_page_records(wiki_dir): - stem = str(record["stem"]) - if stem in {"index", "log"}: - continue - body = str(record["body"]) - if "> **TLDR:**" not in body and "> **Query:**" not in body: - missing.append(str(record["rel"])) - return sorted(missing) +def _find_pages_missing_summaries(wiki_dir: Path) -> list[str]: + missing: list[str] = [] + for record in _wiki_page_records(wiki_dir): + stem = str(record["stem"]) + if stem in {"index", "log"}: + continue + body = str(record["body"]) + if "> **TLDR:**" not in body and "> **Query:**" not in body: + missing.append(str(record["rel"])) + return sorted(missing) def _find_pages_missing_source_sections(wiki_dir: Path) -> list[str]: @@ -839,6 +635,92 @@ def _find_source_count_mismatches(wiki_dir: Path) -> list[str]: return sorted(mismatches) +def _raw_source_refs(text: str) -> list[str]: + refs: list[str] = [] + for pattern in (r"`(raw/[^`\n]+)`", r"(?()]+)"): + for match in re.finditer(pattern, text): + value = match.group(1).strip().rstrip(".,;:]") + if value and value not in refs: + refs.append(value) + return refs + + +def _body_with_tldr(body: str, title: str) -> str: + if re.search(r">\s*\*\*(?:TLDR|Query):\*\*", body, flags=re.IGNORECASE): + return body + summary = f"> **TLDR:** {title} source notes.\n\n" + heading = re.search(r"^#\s+.+\n", body, flags=re.MULTILINE) + if heading: + return body[: heading.end()] + "\n" + summary + body[heading.end():].lstrip("\n") + return summary + body.lstrip("\n") + + +def _append_section(body: str, title: str, content: str) -> str: + return body.rstrip() + f"\n\n## {title}\n\n{content.strip()}\n" + + +def _repair_source_page_validation_shape(page: Path, findings: list[dict[str, str]]) -> bool: + text = page.read_text(encoding="utf-8", errors="replace") + frontmatter_match = re.match(r"\A---\n.*?\n---\n?", text, flags=re.DOTALL) + if not frontmatter_match: + return False + prefix = frontmatter_match.group(0).rstrip("\n") + "\n\n" + body = text[frontmatter_match.end():].lstrip("\n") + meta, parsed_body = _parse_frontmatter(text) + if not isinstance(meta, dict) or str(meta.get("type") or "").strip() != "source": + return False + body = parsed_body + title = str(meta.get("title") or page.stem).strip() or page.stem + messages = [str(finding.get("message") or "") for finding in findings] + codes = {str(finding.get("code") or "") for finding in findings} + changed = False + + if "missing_summary" in codes: + updated = _body_with_tldr(body, title) + changed = changed or updated != body + body = updated + + if any("## Summary" in message for message in messages): + body = _append_section(body, "Summary", f"{title} source notes.") + changed = True + + if any("## Raw Source" in message for message in messages): + refs = _raw_source_refs(text) + if refs: + body = _append_section(body, "Raw Source", f"`{refs[0]}`") + changed = True + + if changed: + _core_atomic_write_text(page, prefix + body.rstrip() + "\n") + return changed + + +def _repair_validation_findings(wiki_dir: Path) -> list[str]: + payload = _core_validate_wiki(wiki_dir) + findings_by_path: dict[str, list[dict[str, str]]] = {} + for finding in payload.get("findings", []): + if not isinstance(finding, dict): + continue + path = str(finding.get("path") or "") + code = str(finding.get("code") or "") + if not path.startswith("sources/"): + continue + if code != "missing_summary" and code != "missing_required_section": + continue + findings_by_path.setdefault(path, []).append(finding) + + fixes: list[str] = [] + for rel, findings in sorted(findings_by_path.items()): + page = wiki_dir / rel + try: + repaired = _repair_source_page_validation_shape(page, findings) + except OSError: + repaired = False + if repaired: + fixes.append(f"repaired validation shape for wiki/{rel}") + return fixes + + def _find_isolated_pages(wiki_dir: Path) -> list[str]: stems = _page_stems(wiki_dir) records = _wiki_page_records(wiki_dir) @@ -889,18 +771,19 @@ def _iter_scannable_files(target: Path) -> list[Path]: return sorted(files) -def _find_sensitive_values(target: Path) -> list[str]: +def _find_sensitive_values(target: Path) -> tuple[list[str], list[str]]: matches: list[str] = [] + read_errors: list[str] = [] for path in _iter_scannable_files(target): try: text = path.read_text(encoding="utf-8", errors="replace") - except OSError: + except OSError as exc: + read_errors.append(f"{path.relative_to(target)} ({exc})") continue - for label, pattern in SECRET_VALUE_PATTERNS: - if pattern.search(text): - matches.append(f"{path.relative_to(target)} ({label})") - break - return sorted(matches) + warnings = _secret_value_warnings(text) + if warnings: + matches.append(f"{path.relative_to(target)} ({warnings[0]})") + return sorted(matches), sorted(read_errors) def _required_paths(target: Path) -> list[Path]: @@ -915,25 +798,14 @@ def _required_paths(target: Path) -> list[Path]: wiki_dir / "sources", wiki_dir / "concepts", wiki_dir / "entities", + wiki_dir / "memories", wiki_dir / "comparisons", wiki_dir / "explorations", ] -def _write_default_index(path: Path) -> None: - path.write_text( - "# Link Wiki Index\n\n" - "> Last updated: not yet ingested | 0 pages | 0 sources\n\n" - "## Categories\n\n" - "## Recent\n\n" - "| Date | Operation | Pages Touched |\n" - "|------|-----------|---------------|\n", - encoding="utf-8", - ) - - def _write_default_log(path: Path) -> None: - path.write_text("# Link Wiki Log\n\n*Append-only record of wiki operations.*\n", encoding="utf-8") + _core_write_default_log(path) def _apply_doctor_fixes(target: Path) -> list[str]: @@ -948,24 +820,41 @@ def _apply_doctor_fixes(target: Path) -> list[str]: path.mkdir(parents=True, exist_ok=True) fixes.append(f"created {path.relative_to(target)}") - index_path = wiki_dir / "index.md" - if not index_path.exists(): - _write_default_index(index_path) - fixes.append("created wiki/index.md") - log_path = wiki_dir / "log.md" if not log_path.exists(): _write_default_log(log_path) fixes.append("created wiki/log.md") if wiki_dir.exists(): + index_path = wiki_dir / "index.md" + index_missing = not index_path.exists() + unindexed = [] if index_missing else _find_unindexed_pages(wiki_dir) + if index_missing or unindexed: + _core_rebuild_index(wiki_dir) + fixes.append("created wiki/index.md" if index_missing else "rebuilt wiki/index.md") + backlinks_path = wiki_dir / "_backlinks.json" current, load_error = _load_backlinks(backlinks_path) expected = _build_backlinks(wiki_dir) if load_error or current is None or _normalize_link_index(current) != _normalize_link_index(expected): - backlinks_path.write_text(json.dumps(expected, indent=2) + "\n", encoding="utf-8") + _core_atomic_write_json(backlinks_path, expected) fixes.append("rebuilt wiki/_backlinks.json") + migration = _core_migrate_wiki(wiki_dir) + if not migration["ok"]: + fixes.append(f"schema migration skipped: {migration['error']}") + else: + fixes.extend(f"schema: {item}" for item in migration["changes"]) + + validation_repairs = _repair_validation_findings(wiki_dir) + fixes.extend(validation_repairs) + if validation_repairs: + current, load_error = _load_backlinks(backlinks_path) + expected = _build_backlinks(wiki_dir) + if load_error or current is None or _normalize_link_index(current) != _normalize_link_index(expected): + _core_atomic_write_json(backlinks_path, expected) + fixes.append("rebuilt wiki/_backlinks.json") + return fixes @@ -1012,149 +901,1905 @@ def doctor(target: Path, fix: bool = False) -> int: else: print("OK index lists wiki pages") - current, load_error = _load_backlinks(wiki_dir / "_backlinks.json") - if load_error: - errors.append(load_error) - elif current is not None: - expected = _build_backlinks(wiki_dir) - if _normalize_link_index(current) != _normalize_link_index(expected): - errors.append("wiki/_backlinks.json is stale; run: python3 link.py rebuild-backlinks .") - else: - print("OK backlinks are current") + current, load_error = _load_backlinks(wiki_dir / "_backlinks.json") + if load_error: + errors.append(load_error) + elif current is not None: + expected = _build_backlinks(wiki_dir) + if _normalize_link_index(current) != _normalize_link_index(expected): + errors.append("wiki/_backlinks.json is stale; run: python3 link.py rebuild-backlinks .") + else: + print("OK backlinks are current") + + schema = _core_schema_status(wiki_dir) + if schema["status"] == "current": + print(f"OK wiki schema v{schema['version']}") + elif schema["status"] in {"missing", "old"}: + warnings.append("wiki schema marker needs migration; run: link migrate") + elif schema["status"] == "newer": + errors.append(str(schema["error"])) + else: + errors.append(str(schema["error"] or "invalid wiki schema marker")) + + missing_summaries = _find_pages_missing_summaries(wiki_dir) + if missing_summaries: + warnings.append("pages missing TLDR/query summary: " + ", ".join(missing_summaries[:8])) + else: + print("OK wiki pages have summaries") + + missing_sources = _find_pages_missing_source_sections(wiki_dir) + if missing_sources: + warnings.append("source-backed pages missing Sources section: " + ", ".join(missing_sources[:8])) + else: + print("OK source-backed pages cite sources") + + source_count_mismatches = _find_source_count_mismatches(wiki_dir) + if source_count_mismatches: + warnings.append("source_count metadata mismatch: " + ", ".join(source_count_mismatches[:8])) + else: + print("OK source_count metadata matches Sources sections") + + validation = _core_validate_wiki(wiki_dir) + doctor_validation_codes = { + "invalid_directory", + "missing_frontmatter", + "missing_frontmatter_field", + "missing_required_section", + "type_directory_mismatch", + "unreadable_page", + } + validation_errors = [ + finding + for finding in validation.get("findings", []) + if isinstance(finding, dict) + and finding.get("severity") == "error" + and str(finding.get("code") or "") in doctor_validation_codes + ] + if validation_errors: + details = [ + f"{finding.get('path')} [{finding.get('code')}] {finding.get('message')}" + for finding in validation_errors[:8] + ] + errors.append("validation errors: " + "; ".join(details)) + else: + print("OK ingest validation gate") + + isolated = _find_isolated_pages(wiki_dir) + if isolated: + warnings.append("isolated wiki pages: " + ", ".join(isolated[:8])) + else: + print("OK graph has no isolated wiki pages") + + memory_review = _memory_inbox(wiki_dir, limit=8, include_archived=True) + if memory_review["review_count"]: + names = ", ".join(item["name"] for item in memory_review["items"][:8]) + warnings.append(f"memories need review: {names}") + else: + print("OK memories are reviewed") + + captures = _capture_records(target, limit=50) + capture_warning_count = sum(1 for capture in captures if capture["warning_count"]) + if captures: + warnings.append(f"raw memory captures pending review: {len(captures)}") + else: + print("OK no raw memory captures pending review") + if capture_warning_count: + warnings.append(f"raw memory captures with secret warnings: {capture_warning_count}") + + raw_ingest_findings = _raw_ingest_findings(target) + if raw_ingest_findings["blocked"]: + warnings.append("raw files blocked before ingest: " + ", ".join(raw_ingest_findings["blocked"][:8])) + if raw_ingest_findings["stale"]: + warnings.append("raw files need source refresh: " + ", ".join(raw_ingest_findings["stale"][:8])) + if raw_ingest_findings["new"]: + warnings.append("raw files not referenced by wiki source pages: " + ", ".join(raw_ingest_findings["new"][:8])) + if not any(raw_ingest_findings.values()) and raw_dir.exists(): + print("OK raw files are represented in wiki sources") + + sensitive_names = _find_sensitive_filenames(target) + if sensitive_names: + errors.append("sensitive-looking filenames present: " + ", ".join(sensitive_names[:8])) + else: + print("OK no sensitive-looking filenames") + + sensitive_values, sensitive_read_errors = _find_sensitive_values(target) + if sensitive_values: + errors.append("sensitive-looking file contents present: " + ", ".join(sensitive_values[:8])) + else: + print("OK no sensitive-looking file contents") + if sensitive_read_errors: + errors.append("could not scan file contents for secrets: " + ", ".join(sensitive_read_errors[:8])) + + if warnings: + print("") + print("Warnings:") + for warning in warnings: + print(f"- {warning}") + + if errors: + print("") + print("Errors:") + for error in errors: + print(f"- {error}") + print("") + print("Result: needs attention") + return 1 + + print("") + print("Result: healthy") + return 0 + + +def validate(target: Path, strict: bool = False, json_output: bool = False) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + payload = _core_validate_wiki(wiki_dir, strict=strict) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 if payload["passed"] else 1 + + print(f"Link validate: {wiki_dir}") + print("") + if payload["findings"]: + for finding in payload["findings"]: + label = str(finding["severity"]).upper() + print(f"{label} {finding['path']} [{finding['code']}] {finding['message']}") + else: + print("OK wiki pages satisfy the ingest validation gate") + print("") + print( + f"Result: {'passed' if payload['passed'] else 'failed'} " + f"({payload['error_count']} errors, {payload['warning_count']} warnings)" + ) + return 0 if payload["passed"] else 1 + + +def migrate(target: Path, json_output: bool = False) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + payload = _core_migrate_wiki(wiki_dir) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 if payload["ok"] else 1 + + print(f"Link migrate: {wiki_dir}") + print("") + previous = payload["previous"] + schema = payload["schema"] + print(f"Previous schema: {previous['status']}") + print(f"Current schema: {schema['status']} v{schema.get('version')}") + changes = payload["changes"] + if changes: + print("") + print("Changes:") + for item in changes: + print(f"- {item}") + else: + print("") + print("Changes: none") + if payload["ok"]: + print("") + print("Result: current") + return 0 + print("") + print(f"Result: failed ({payload['error']})") + return 1 + + +def status(target: Path, include_validation: bool = False, json_output: bool = False) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + payload = _core_link_status(wiki_dir, version=LINK_VERSION, include_validation=include_validation) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 if payload["ready"] else 1 + + print(f"Link status: {wiki_dir}") + print("") + print(f"Version: {payload.get('version') or LINK_VERSION}") + print(f"Ready: {'yes' if payload['ready'] else 'no'}") + print(f"Pages: {payload['page_count']}") + print(f"Content pages: {payload.get('content_page_count', payload['page_count'])}") + print( + f"Memories: {payload['memory_count']} total · " + f"{payload['active_memory_count']} active · " + f"{payload['needs_review_count']} need review" + ) + print(f"Search backend: {payload.get('search_backend', 'unknown')}") + schema = payload.get("schema") or {} + if isinstance(schema, dict): + schema_status = schema.get("status", "unknown") + schema_version = schema.get("version") + if schema_status == "current": + print(f"Schema: current v{schema_version}") + else: + print(f"Schema: {schema_status}") + if payload["missing"]: + print("Missing: " + ", ".join(str(item) for item in payload["missing"])) + validation = payload["validation"] + if validation.get("checked"): + print( + "Validation: " + f"{'passed' if validation.get('passed') else 'failed'} " + f"({validation.get('error_count', 0)} errors, {validation.get('warning_count', 0)} warnings)" + ) + else: + print("Validation: not checked (use --validate)") + warnings = payload.get("warnings") or [] + if warnings: + print("Warnings:") + for warning in warnings: + detail = f" ({warning.get('detail')})" if warning.get("detail") else "" + print(f"- {warning.get('code')}: {warning.get('message')}{detail}") + print("") + print("Next:") + for action in payload["next_actions"]: + args = action.get("arguments") or {} + suffix = f" {json.dumps(args, ensure_ascii=False)}" if args else "" + print(f"- {action['tool']}: {action['label']}{suffix}") + return 0 if payload["ready"] else 1 + + +def backup( + target: Path, + *, + label: str = "manual", + include_raw: bool = False, + list_only: bool = False, + json_output: bool = False, +) -> int: + target = _resolve_link_root(target) + if list_only: + payload = _core_list_backups(target) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + print(f"Link backups: {payload['backup_dir']}") + print("") + if not payload["backups"]: + print("No backups found.") + for warning in payload.get("warnings") or []: + print(f"Warning: could not read backup {warning.get('backup')}: {warning.get('error')}") + if not payload["backups"]: + return 0 + for item in payload["backups"]: + print(f"- {item['name']} ({item['bytes']} bytes)") + return 0 + + try: + payload = _core_create_backup(target, label=label, include_raw=include_raw) + except (FileNotFoundError, _CoreBackupError) as exc: + if json_output: + print(json.dumps({"created": False, "error": str(exc)}, indent=2)) + else: + print(str(exc), file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + print(f"Link backup created: {payload['path']}") + print(f"Included: {', '.join(payload['included'])}") + print(f"Files: {payload['file_count']}") + print(f"Size: {payload['bytes']} bytes") + if not include_raw: + print("Note: raw/ was excluded by default because it may contain sensitive source material.") + if payload["pruned"]: + print("Pruned old backups: " + ", ".join(payload["pruned"])) + return 0 + + +def ingest_status(target: Path, json_output: bool = False) -> int: + target = target.expanduser().resolve() + status = _collect_ingest_status(target) + + if json_output: + print(json.dumps(status, indent=2)) + return 0 if status["has_raw_dir"] and status["has_wiki_dir"] else 1 + + print(f"Link ingest status: {target}") + print("") + if not status["has_raw_dir"]: + print("Missing raw/ directory") + if not status["has_wiki_dir"]: + print("Missing wiki/ directory") + if not status["has_raw_dir"] or not status["has_wiki_dir"]: + print("") + print("Next:") + print(" Run an installer or initialize this directory: link init") + return 1 + + print(f"Raw files: {status['raw_count']}") + print(f"Source pages: {status['source_page_count']}") + if int(status.get("source_read_warning_count") or 0): + print(f"Source page read warnings: {status['source_read_warning_count']}") + print(f"Represented in wiki/sources: {status['represented_count']}") + print(f"Pending ingest: {status['pending_count']}") + if int(status.get("stale_count") or 0): + print(f"Stale represented raw: {status['stale_count']}") + print(f"Backlinks: {status['backlinks_status']} ({status['backlinks_message']})") + safety = status.get("safety") if isinstance(status.get("safety"), dict) else {} + if safety: + print(f"Safety: {safety.get('status')} ({safety.get('summary')})") + guidance = status["guidance"] + if isinstance(guidance, dict): + print(f"Guidance: {guidance['summary']}") + + pending_raw = status["pending_raw"] + if pending_raw: + print("") + print("Pending raw files:") + for item in pending_raw[:20]: + warnings = item.get("secret_warnings") if isinstance(item.get("secret_warnings"), list) else [] + scan_error = str(item.get("scan_error") or "") + if scan_error: + print(f"- {item['raw']} [fix access before ingest: {scan_error}]") + elif warnings: + labels = ", ".join(str(label) for label in warnings) + print(f"- {item['raw']} [redact before ingest: {labels}]") + elif item.get("stale"): + reason = str(item.get("stale_reason") or "raw changed after wiki source page") + print(f"- {item['raw']} [refresh source page: {reason}]") + else: + print(f"- {item['raw']}") + if len(pending_raw) > 20: + print(f"- ... {len(pending_raw) - 20} more") + source_warnings = status.get("source_read_warnings") if isinstance(status.get("source_read_warnings"), list) else [] + if source_warnings: + print("") + print("Source page warnings:") + for item in source_warnings[:20]: + if isinstance(item, dict): + print(f"- {item.get('page')} [fix access: {item.get('error')}]") + + print("") + print("Next:") + if isinstance(guidance, dict): + agent_prompt = guidance.get("agent_prompt") + if agent_prompt: + print(f" Ask your agent: {agent_prompt}") + for command in guidance.get("commands", []): + print(f" Run: {command}") + notes = guidance.get("notes") or [] + for note in notes[:2]: + print(f" Note: {note}") + + plan = status.get("plan") if isinstance(status.get("plan"), dict) else {} + steps = plan.get("steps") if isinstance(plan.get("steps"), list) else [] + batch = plan.get("batch") if isinstance(plan.get("batch"), list) else [] + post_checks = plan.get("post_checks") if isinstance(plan.get("post_checks"), list) else [] + if plan: + print("") + print(f"Suggested workflow: {plan.get('title')}") + summary = plan.get("summary") + if summary: + print(f" {summary}") + memory_prompt = plan.get("memory_prompt") + if memory_prompt: + print(f" Memory review: {memory_prompt}") + for index, step in enumerate(steps[:6], start=1): + print(f" {index}. {step}") + if batch: + print(" Batch:") + for item in batch[:5]: + subject = item.get("raw") or item.get("page") or "" + target_page = item.get("target_source_page") or item.get("suggested_source_page") or item.get("error") or "" + print(f" - {subject} -> {target_page}") + if post_checks: + print(" Post-ingest checks:") + for check in post_checks[:6]: + print(f" - {check}") + + completion = status.get("completion") if isinstance(status.get("completion"), dict) else {} + completion_items = completion.get("items") if isinstance(completion.get("items"), list) else [] + if completion_items: + print("") + print(f"Ingest completion: {completion.get('summary')}") + for item in completion_items[:8]: + pages = item.get("source_pages") if isinstance(item.get("source_pages"), list) else [] + page_labels = [] + for page in pages: + if isinstance(page, dict): + label = page.get("path") or page.get("name") + if label: + page_labels.append(str(label)) + target_pages = ", ".join(page_labels) if page_labels else "source page missing" + print(f" - {item.get('raw')} -> {target_pages}") + memory_prompt = item.get("memory_prompt") + if memory_prompt: + print(f" Memory review: {memory_prompt}") + query_prompt = item.get("query_prompt") + if query_prompt: + print(f" Retrieval check: {query_prompt}") + if completion.get("has_more"): + represented_count = int(completion.get("represented_count") or 0) + shown_count = int(completion.get("shown_count") or 0) + print(f" ... {represented_count - shown_count} more represented raw source(s)") + next_prompt = completion.get("next_prompt") + if next_prompt: + print(f" Next check: {next_prompt}") + + return 0 + + +def rebuild_backlinks(target: Path) -> int: + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + try: + backlinks = _build_backlinks(wiki_dir) + except OSError as exc: + print(f"Could not rebuild backlinks: {exc}", file=sys.stderr) + return 1 + out_path = wiki_dir / "_backlinks.json" + _core_atomic_write_json(out_path, backlinks) + page_count = len(_wiki_pages(wiki_dir)) + edge_count = sum(len(targets) for targets in backlinks["forward"].values()) + print(f"Rebuilt {out_path}") + print(f"Pages: {page_count}") + print(f"Edges: {edge_count}") + return 0 + + +def rebuild_index(target: Path) -> int: + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + try: + result = _core_rebuild_index(wiki_dir) + except OSError as exc: + print(f"Could not rebuild index: {exc}", file=sys.stderr) + return 1 + print(f"Rebuilt {wiki_dir / 'index.md'}") + print(f"Pages: {result['page_count']}") + print(f"Sources: {result['source_count']}") + print(f"Memories: {result['memory_count']}") + print("Next: run python3 link.py rebuild-backlinks before validation") + return 0 + + +def remember( + target: Path, + text: str, + title: str | None = None, + memory_type: str = "note", + scope: str = "user", + tags: str | None = None, + source: str = "manual", + allow_duplicate: bool = False, + allow_conflict: bool = False, + project: str | None = None, + json_output: bool = False, +) -> int: + if not text or not text.strip(): + print("Memory text is required", file=sys.stderr) + return 1 + try: + result = _write_memory_page( + target, + text, + title=title, + memory_type=memory_type, + scope=scope, + tags=tags, + source=source, + allow_duplicate=allow_duplicate, + allow_conflict=allow_conflict, + project=project or _default_project(target), + ) + except (FileNotFoundError, ValueError) as exc: + print(f"Could not remember: {exc}", file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(result, indent=2)) + return 0 + + if not result.get("created"): + if result.get("conflict"): + print("Possible conflicting memory found") + print(f"Title requested: {result['title']}") + print(f"Type: {result['memory_type']}") + print(f"Scope: {result['scope']}") + print("") + print("Conflict candidates:") + for candidate in result.get("conflict_candidates", []): + reasons = ", ".join(candidate.get("conflict_reasons", [])) + print(f"- {candidate['title']} ({candidate['path']})") + if reasons: + print(f" Reasons: {reasons}") + print("") + print("Next:") + first = next(iter(result.get("conflict_candidates", [])), None) + if first: + print(f" python3 link.py explain-memory \"{first['name']}\" .") + print(" Update/archive the old memory, or use --allow-conflict only if both should coexist.") + return 0 + print("Similar memory already exists") + print(f"Title requested: {result['title']}") + print(f"Type: {result['memory_type']}") + print(f"Scope: {result['scope']}") + print("") + print("Existing candidates:") + for candidate in result.get("candidates", []): + print(f"- {candidate['title']} ({candidate['path']})") + print("") + print("Next:") + first = next(iter(result.get("candidates", [])), None) + if first: + print(f" python3 link.py explain-memory \"{first['name']}\" .") + print(" Use --allow-duplicate only if this should be a separate memory.") + return 0 + + print("Memory saved") + print(f"Title: {result['title']}") + print(f"Path: {result['path']}") + print(f"Type: {result['memory_type']}") + print(f"Scope: {result['scope']}") + if result.get("project"): + print(f"Project: {result['project']}") + print("") + print("Next:") + print(f" python3 link.py recall \"{result['title']}\" .") + return 0 + + +def _read_proposal_input(target: Path, value: str) -> tuple[str, str]: + raw = value.strip() + candidates = [Path(raw).expanduser()] + target_path = target.expanduser() + if not Path(raw).is_absolute(): + candidates.append((target_path / raw).expanduser()) + for candidate in candidates: + try: + is_file = candidate.exists() and candidate.is_file() + except OSError: + is_file = False + if is_file: + return candidate.read_text(encoding="utf-8", errors="replace"), str(candidate) + return value, "inline" + + +def propose_memories( + target: Path, + source_input: str, + limit: int = 10, + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + text, source = _read_proposal_input(target, source_input) + if not text.strip(): + print("Memory proposal input is required", file=sys.stderr) + return 1 + result = _propose_memories_from_text( + wiki_dir, + text, + source=source, + limit=max(1, min(limit, 20)), + project=project or _default_project(target), + ) + + if json_output: + print(json.dumps(result, indent=2)) + return 0 + + print("Memory proposals") + print(f"Source: {result['source']}") + if result.get("project"): + print(f"Project: {result['project']}") + print(f"Count: {result['count']}") + if not result["proposals"]: + print("No durable memory candidates found.") + return 0 + for index, proposal in enumerate(result["proposals"], start=1): + print("") + print(f"{index}. {proposal['title']} [{proposal['confidence']}]") + print(f" Type: {proposal['memory_type']} | Scope: {proposal['scope']}") + if proposal.get("project"): + print(f" Project: {proposal['project']}") + print(f" Action: {proposal['suggested_action']}") + print(f" Memory: {proposal['memory']}") + primary_action = proposal.get("primary_action") if isinstance(proposal.get("primary_action"), dict) else {} + if primary_action.get("command"): + print(f" Command: {primary_action['command']}") + if proposal["duplicate_candidates"]: + first = proposal["duplicate_candidates"][0] + print(f" Duplicate candidate: {first['title']} ({first['path']})") + print("") + print("Next:") + print(" Use remember for new memories, or update-memory for duplicate candidates.") + return 0 + + +def capture_session( + target: Path, + source_input: str, + title: str | None = None, + limit: int = 10, + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + root = _resolve_link_root(target) + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + + text, source = _read_proposal_input(root, source_input) + if not text.strip(): + print("Session capture input is required", file=sys.stderr) + return 1 + + timestamp = _utc_timestamp() + project_name = project or _default_project(root) + capture_title = _core_capture_title(text, source, title, default_source="inline", path_source=True) + secret_warnings = _secret_value_warnings(text) + capture_dir = root / "raw" / "memory-captures" + capture_dir.mkdir(parents=True, exist_ok=True) + capture_path = _core_capture_filename(timestamp, capture_title, capture_dir) + project_line = f'project: "{_frontmatter_string(project_name)}"\n' if project_name else "" + _core_atomic_write_text( + capture_path, + f"""--- +title: "{_frontmatter_string(capture_title)}" +source_type: conversation +date_captured: "{timestamp}" +{project_line}--- + +# {capture_title} + +Captured locally for Link memory review. This raw note is proposal-only until the user approves durable memories. + +## Source Input + +{source} + +## Notes + +{text.strip()} +""", + ) + rel_path = capture_path.relative_to(root).as_posix() + result = _propose_memories_from_text( + wiki_dir, + text, + source=rel_path, + limit=max(1, min(limit, 20)), + project=project_name, + ) + payload = { + "captured": True, + "path": rel_path, + "source_input": source, + "title": capture_title, + "project": project_name, + "secret_warnings": secret_warnings, + "proposals": result, + } + _append_log( + wiki_dir, + timestamp, + "capture-session", + f"Captured proposal-only session notes at {rel_path}", + [ + f"Source input: {source}", + f"Project: {project_name or 'none'}", + f"Secret warnings: {', '.join(secret_warnings) if secret_warnings else 'none'}", + f"Proposals: {result['count']}", + ], + ) + + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + print("Session captured") + print(f"Path: {rel_path}") + if project_name: + print(f"Project: {project_name}") + if secret_warnings: + print("Secret-looking content: " + ", ".join(secret_warnings)) + print(f"Proposals: {result['count']}") + if not result["proposals"]: + print("No durable memory candidates found.") + return 0 + for index, proposal in enumerate(result["proposals"], start=1): + print("") + print(f"{index}. {proposal['title']} [{proposal['confidence']}]") + print(f" Type: {proposal['memory_type']} | Scope: {proposal['scope']}") + if proposal.get("project"): + print(f" Project: {proposal['project']}") + print(f" Action: {proposal['suggested_action']}") + print(f" Memory: {proposal['memory']}") + print("") + print("Next:") + print(" Ask the user which proposals to remember, update, or discard.") + return 0 + + +def _resolve_capture_file(root: Path, capture: str) -> Path | None: + return _core_resolve_capture_file(root, capture) + + +def _capture_records(target: Path, limit: int = 20, project: str | None = None) -> list[dict[str, object]]: + root = _resolve_link_root(target) + return _core_capture_records( + root, + limit=limit, + project=project, + commands_for=_core_cli_capture_commands, + ) + + +def capture_inbox( + target: Path, + limit: int = 20, + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + root = _resolve_link_root(target) + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + payload = _core_capture_inbox( + root, + limit=limit, + project=project, + commands_for=_core_cli_capture_commands, + ) + project_name = str(payload["project"]) + captures = payload["captures"] + warning_count = int(payload["warning_count"]) + read_warning_count = int(payload.get("read_warning_count") or 0) + read_warnings = payload.get("read_warnings") if isinstance(payload.get("read_warnings"), list) else [] + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + print("Raw capture inbox") + if project_name: + print(f"Project: {project_name}") + print( + f"{len(captures)} readable capture{'s' if len(captures) != 1 else ''} · " + f"{warning_count} with secret-looking warnings · {read_warning_count} read warnings" + ) + if read_warnings: + print("") + print("Capture read warnings:") + for warning in read_warnings[:20]: + print(f" {warning.get('capture')}: {warning.get('error')}") + if not captures: + print("") + print("No readable saved raw captures.") + return 0 + for index, capture in enumerate(captures, start=1): + print("") + print(f"{index}. {capture['title']}") + print(f" Path: {capture['path']}") + if capture["project"]: + print(f" Project: {capture['project']}") + if capture["secret_warnings"]: + print(" Secret-looking values: " + ", ".join(capture["secret_warnings"])) + print(f" Accept: {capture['commands']['accept']}") + if capture["secret_warnings"]: + print(f" Redact: {capture['commands']['redact']}") + print(f" Delete: {capture['commands']['delete']}") + return 0 + + +def _capture_review_summary(target: Path, project: str | None = None, limit: int = 3) -> dict[str, object]: + root = _resolve_link_root(target) + summary = _core_capture_review_summary( + root, + limit=limit, + project=project, + commands_for=_core_cli_capture_commands, + ) + summary["next_action"] = f'python3 link.py capture-inbox "{root}"' + if summary["project"]: + summary["next_action"] = f'python3 link.py capture-inbox "{root}" --project "{summary["project"]}"' + return summary + + +def accept_capture( + target: Path, + capture: str, + index: int = 1, + title: str | None = None, + memory_type: str | None = None, + scope: str | None = None, + tags: str | None = None, + project: str | None = None, + allow_duplicate: bool = False, + allow_conflict: bool = False, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + root = _resolve_link_root(target) + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + capture_path = _resolve_capture_file(root, capture) + if capture_path is None: + print(f"Capture not found under {root}: {capture}", file=sys.stderr) + return 1 + if index < 1: + print("Proposal index must be 1 or greater", file=sys.stderr) + return 1 + + raw_text = capture_path.read_text(encoding="utf-8", errors="replace") + meta, notes = _core_capture_notes_from_markdown(raw_text) + if not notes: + print(f"Capture has no notes: {capture_path}", file=sys.stderr) + return 1 + + rel_path = capture_path.relative_to(root).as_posix() + project_name = project or str(meta.get("project") or "") or _default_project(root) + proposals = _propose_memories_from_text( + wiki_dir, + notes, + source=rel_path, + limit=max(1, min(max(index, 10), 50)), + project=project_name, + ) + if index > len(proposals["proposals"]): + print(f"Capture has {len(proposals['proposals'])} proposal(s); index {index} is unavailable", file=sys.stderr) + return 1 + proposal = proposals["proposals"][index - 1] + chosen_scope = scope or str(proposal["scope"]) + chosen_project = project_name if chosen_scope == "project" else "" + result = _write_memory_page( + target, + str(proposal["memory"]), + title=title or str(proposal["title"]), + memory_type=memory_type or str(proposal["memory_type"]), + scope=chosen_scope, + tags=tags, + source=rel_path, + allow_duplicate=allow_duplicate, + allow_conflict=allow_conflict, + project=chosen_project, + ) + payload = { + "accepted": bool(result.get("created")), + "capture": rel_path, + "proposal_index": index, + "project": str(result.get("project") or proposal.get("project") or ""), + "proposal": proposal, + "result": result, + } + if result.get("created"): + _append_log( + wiki_dir, + _utc_timestamp(), + "accept-capture", + f"Accepted proposal {index} from {rel_path}", + [ + f"Memory: {result['path']}", + f"Project: {result.get('project') or 'none'}", + ], + ) + + if json_output: + print(json.dumps(payload, indent=2)) + return 0 if payload["accepted"] else 1 + + if not payload["accepted"]: + duplicate_candidates = result.get("duplicate_candidates") or result.get("candidates") + if duplicate_candidates: + first = duplicate_candidates[0] + print(f"Duplicate candidate: {first['title']} ({first['path']})") + elif result.get("conflict_candidates"): + first = result["conflict_candidates"][0] + print(f"Conflict candidate: {first['title']} ({first['path']})") + else: + print("Capture proposal was not accepted.") + return 1 + + print("Capture proposal accepted") + print(f"Capture: {rel_path}") + print(f"Proposal: {index}") + print(f"Memory: {result['path']}") + if result.get("project"): + print(f"Project: {result['project']}") + print("") + print("Next:") + print(f" python3 link.py review-memory \"{result['name']}\" .") + return 0 + + +def redact_capture( + target: Path, + capture: str, + replacement: str = "[redacted-secret]", + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + root = _resolve_link_root(target) + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + capture_path = _resolve_capture_file(root, capture) + if capture_path is None: + print(f"Capture not found under {root}: {capture}", file=sys.stderr) + return 1 + + original = capture_path.read_text(encoding="utf-8", errors="replace") + redacted, labels, replacement_count = _redact_secret_values(original, replacement=replacement) + rel_path = capture_path.relative_to(root).as_posix() + if replacement_count: + _core_atomic_write_text(capture_path, redacted) + _append_log( + wiki_dir, + _utc_timestamp(), + "redact-capture", + f"Redacted secret-looking values from {rel_path}", + [ + f"Labels: {', '.join(labels)}", + f"Replacement count: {replacement_count}", + ], + ) + payload = { + "redacted": bool(replacement_count), + "path": rel_path, + "labels": labels, + "replacement_count": replacement_count, + } + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + if replacement_count: + print("Capture redacted") + print(f"Path: {rel_path}") + print("Labels: " + ", ".join(labels)) + print(f"Replacement count: {replacement_count}") + else: + print("No secret-looking values found.") + print(f"Path: {rel_path}") + return 0 + + +def delete_capture( + target: Path, + capture: str, + confirm: bool = False, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + root = _resolve_link_root(target) + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + capture_path = _resolve_capture_file(root, capture) + if capture_path is None: + print(f"Capture not found under {root}: {capture}", file=sys.stderr) + return 1 + rel_path = capture_path.relative_to(root).as_posix() + payload = { + "deleted": False, + "path": rel_path, + "confirmation_required": not confirm, + } + if not confirm: + if json_output: + print(json.dumps(payload, indent=2)) + else: + print("Confirmation required.") + print(f"Run: python3 link.py delete-capture \"{rel_path}\" . --confirm") + return 1 + + capture_path.unlink() + _append_log( + wiki_dir, + _utc_timestamp(), + "delete-capture", + f"Deleted raw capture {rel_path}", + ["Deleted file only; capture contents were not logged."], + ) + payload["deleted"] = True + payload["confirmation_required"] = False + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + print("Capture deleted") + print(f"Path: {rel_path}") + return 0 + + +def update_memory( + target: Path, + identifier: str, + text: str, + source: str = "manual", + allow_conflict: bool = False, + project: str | None = None, + json_output: bool = False, +) -> int: + if not text or not text.strip(): + print("Memory update text is required", file=sys.stderr) + return 1 + try: + result = _update_memory_page( + target, + identifier, + text, + source=source, + allow_conflict=allow_conflict, + project=project or _default_project(target), + ) + except (FileNotFoundError, ValueError) as exc: + print(f"Could not update memory: {exc}", file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(result, indent=2)) + return 0 + + if not result.get("updated") and result.get("conflict"): + print("Possible conflicting memory found") + print(f"Memory being updated: {result['title']} ({result['path']})") + print("") + print("Conflict candidates:") + for candidate in result.get("conflict_candidates", []): + reasons = ", ".join(candidate.get("conflict_reasons", [])) + print(f"- {candidate['title']} ({candidate['path']})") + if reasons: + print(f" Reasons: {reasons}") + print("") + print("Next:") + first = next(iter(result.get("conflict_candidates", [])), None) + if first: + print(f" python3 link.py explain-memory \"{first['name']}\" .") + print(" Update/archive the conflicting memory, or use --allow-conflict only if both should coexist.") + return 0 + + print("Memory updated") + print(f"Title: {result['title']}") + print(f"Path: {result['path']}") + print(f"Update count: {result['update_count']}") + print(f"Review: {result['previous_review_status']} -> {result['review_status']}") + print("") + print("Next:") + print(f" python3 link.py explain-memory \"{result['name']}\" .") + print(f" python3 link.py review-memory \"{result['name']}\" .") + return 0 + + +def recall( + target: Path, + query: str, + limit: int = 10, + json_output: bool = False, + include_archived: bool = False, + project: str | None = None, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + project_name = project or _default_project(target) + results = _recall_memories( + wiki_dir, + query, + limit=limit, + include_archived=include_archived, + project=project_name, + ) + + if json_output: + print(json.dumps({ + "query": query, + "count": len(results), + "include_archived": include_archived, + "project": project_name, + "memories": results, + }, indent=2)) + return 0 + + print(f"Link memory recall: {query}") + if project_name: + print(f"Project: {project_name}") + if include_archived: + print("Including archived/stale memories") + print("") + if not results: + print("No matching memories found.") + print("") + print("Next:") + print(" Add one: python3 link.py remember \"Memory to keep\" .") + return 0 + + print(f"{len(results)} memor{'y' if len(results) == 1 else 'ies'}") + for record in results: + print(f"- {record['title']} ({record['memory_type']} · {record['scope']})") + print(f" {record['path']}") + recall = record.get("recall") if isinstance(record.get("recall"), dict) else {} + if recall.get("state"): + print(f" Recall: {recall['state']}") + summary = record.get("tldr") or record.get("snippet") + if summary: + print(f" {summary}") + return 0 + + +def archive_memory(target: Path, identifier: str, reason: str | None = None, json_output: bool = False) -> int: + try: + result = _set_memory_status(target, identifier, "archived", reason=reason) + except (FileNotFoundError, ValueError) as exc: + print(f"Could not archive memory: {exc}", file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(result, indent=2)) + return 0 + + if result["updated"]: + print("Memory archived") + else: + print("Memory already archived") + print(f"Title: {result['title']}") + print(f"Path: {result['path']}") + print(f"Previous status: {result['previous_status']}") + print(f"Status: {result['status']}") + print("") + print("Next:") + print(f" Restore: python3 link.py restore-memory \"{result['name']}\" .") + return 0 + + +def restore_memory(target: Path, identifier: str, json_output: bool = False) -> int: + try: + result = _set_memory_status(target, identifier, "active") + except (FileNotFoundError, ValueError) as exc: + print(f"Could not restore memory: {exc}", file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(result, indent=2)) + return 0 + + if result["updated"]: + print("Memory restored") + else: + print("Memory already active") + print(f"Title: {result['title']}") + print(f"Path: {result['path']}") + print(f"Previous status: {result['previous_status']}") + print(f"Status: {result['status']}") + return 0 + + +def forget_memory(target: Path, identifier: str, confirm: bool = False, json_output: bool = False) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + + def rebuild_memory_backlinks() -> bool: + backlinks = _build_backlinks(wiki_dir) + _core_atomic_write_json(wiki_dir / "_backlinks.json", backlinks) + return True + + result = _core_forget_memory_page( + wiki_dir, + identifier, + confirm=confirm, + records=_memory_records(wiki_dir), + timestamp=_utc_timestamp(), + log_writer=lambda ts, operation, description, lines: _append_log( + wiki_dir, + ts, + operation, + description, + lines, + ), + rebuild_backlinks=rebuild_memory_backlinks, + ) + if json_output: + print(json.dumps(result, indent=2)) + return 0 if result.get("forgotten") else 1 + + if not result.get("found"): + print(f"Memory not found: {identifier}", file=sys.stderr) + return 1 + if result.get("confirmation_required"): + print("Confirmation required.") + print(f"Run: python3 link.py forget-memory \"{result['name']}\" . --confirm") + return 1 + + print("Memory forgotten") + print(f"Title: {result['title']}") + print(f"Deleted: {result['path']}") + print(f"Backlinks rebuilt: {'yes' if result.get('backlinks_rebuilt') else 'no'}") + return 0 + + +def memory_inbox( + target: Path, + limit: int = 20, + include_archived: bool = False, + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + inbox = _memory_inbox(wiki_dir, limit=limit, include_archived=include_archived, project=project) + + if json_output: + print(json.dumps(inbox, indent=2)) + return 0 + + print(f"Link memory inbox: {target}") + if inbox.get("project"): + print(f"Project: {inbox['project']}") + if include_archived: + print("Including archived memories") + print("") + review_count = inbox["review_count"] + print(f"{review_count} memor{'y' if review_count == 1 else 'ies'} need review") + if inbox["counts_by_severity"]: + print(f"Severity: {_format_counts(inbox['counts_by_severity'])}") + print("") + if not inbox["items"]: + print("Inbox is clear.") + return 0 + + for item in inbox["items"]: + print(f"- {item['title']} ({item['memory_type']} · {item['scope']} · {item['status']})") + print(f" {item['path']}") + for issue in item["issues"]: + print(f" [{issue['severity']}] {issue['code']}: {issue['message']}") + primary = item.get("primary_action") or {} + if primary: + print(f" Next: {primary['label']} - {primary['description']}") + print(f" Command: {primary['command']}") + actions = [ + action + for action in item.get("actions", []) + if action.get("kind") != primary.get("kind") + ][:3] + if actions: + labels = ", ".join(str(action.get("label") or "") for action in actions) + print(f" Other actions: {labels}") + return 0 + + +def review_memory(target: Path, identifier: str, note: str | None = None, json_output: bool = False) -> int: + try: + result = _mark_memory_reviewed(target, identifier, note=note) + except (FileNotFoundError, ValueError) as exc: + print(f"Could not review memory: {exc}", file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(result, indent=2)) + return 0 + + if result["updated"]: + print("Memory reviewed") + else: + print("Memory was already reviewed") + print(f"Title: {result['title']}") + print(f"Path: {result['path']}") + print(f"Previous review status: {result['previous_review_status']}") + print(f"Review status: {result['review_status']}") + if result["remaining_issue_count"]: + print("") + print(f"{result['remaining_issue_count']} issue{'s' if result['remaining_issue_count'] != 1 else ''} still need attention:") + for issue in result["remaining_issues"]: + print(f"- [{issue['severity']}] {issue['code']}: {issue['message']}") + return 0 + + +def explain_memory(target: Path, identifier: str, json_output: bool = False) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + try: + explanation = _memory_explanation(wiki_dir, identifier) + except ValueError as exc: + print(f"Could not explain memory: {exc}", file=sys.stderr) + return 1 + + if json_output: + print(json.dumps(explanation, indent=2)) + return 0 + + memory = explanation["memory"] + recall_info = explanation["recall"] + review = explanation["review"] + provenance = explanation["provenance"] + lifecycle = explanation["lifecycle"] + graph = explanation["graph"] + + print(f"Link memory explanation: {memory['title']}") + print("") + print(f"Path: {memory['path']}") + print(f"Type: {memory['memory_type']} · Scope: {memory['scope']} · Status: {lifecycle['status']}") + print(f"Source: {provenance['source'] or 'missing'}") + print(f"Captured: {provenance['date_captured'] or 'missing'}") + print(f"Review: {review['status']} · Issues: {review['issue_count']}") + print(f"Recall: {recall_info['state']} ({'enabled' if recall_info['default_enabled'] else 'disabled'} by default)") + print(f"Reason: {recall_info['reason']}") + summary = memory.get("tldr") or memory.get("snippet") + if summary: + print("") + print(f"Summary: {summary}") + if review["issues"]: + print("") + print("Review issues:") + for issue in review["issues"]: + print(f"- [{issue['severity']}] {issue['code']}: {issue['message']}") + print(f" Action: {issue['suggested_action']}") + print("") + print("Graph:") + print(f"- Forward links: {', '.join(graph['forward']) if graph['forward'] else 'none'}") + print(f"- Inbound links: {', '.join(graph['inbound']) if graph['inbound'] else 'none'}") + if explanation["log_entries"]: + print("") + print("Recent lifecycle log:") + for entry in explanation["log_entries"][-3:]: + first_line = next((line for line in entry.splitlines() if line.strip().startswith("## ")), "") + print(f"- {first_line[3:] if first_line.startswith('## ') else first_line or 'log entry'}") + return 0 + + +def _format_counts(counts: dict[str, int]) -> str: + if not counts: + return "none" + return ", ".join(f"{name}: {count}" for name, count in counts.items()) + + +def _print_memory_list(title: str, records: list[dict[str, object]], empty: str = "none") -> None: + print(title) + if not records: + print(f"- {empty}") + return + for record in records: + print(f"- {record['title']} ({record['memory_type']} · {record['scope']})") + print(f" {record['path']}") + summary = record.get("tldr") or record.get("snippet") + if summary: + print(f" {summary}") + + +def query( + target: Path, + query_text: str, + budget: str = "medium", + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + query_text = _clean_text_input(query_text, max_len=500) + project_name = project or _default_project(target) + payload = _query_link(wiki_dir, query_text, budget=budget, project=project_name) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + if not payload.get("found"): + print(f"No Link context found for: {query_text}") + if payload.get("error"): + print(f"Error: {payload['error']}") + return 1 + return 0 + + print(f"Link context packet: {payload['query']}") + if payload.get("project"): + print(f"Project: {payload['project']}") + strategy = payload["strategy"] + print(f"Budget: {payload['budget']} · Mode: {strategy['mode']}") + print("") + + memory = payload["memory"] + print(f"Memory ({memory['count']})") + for item in memory["items"]: + print(f"- {item['title']} ({item.get('memory_type', 'memory')} · {item.get('scope', '')})") + print(f" {item.get('summary', '')}") + recall_info = item.get("recall", {}) + if isinstance(recall_info, dict) and recall_info.get("state"): + print(f" Recall: {recall_info['state']} · {item['why_selected']}") + if not memory["items"]: + print("- none") + + wiki = payload["wiki"] + print("") + print(f"Wiki ({len(wiki['pages'])} pages · primary: {wiki['primary'] or 'none'})") + for item in wiki["pages"]: + print(f"- [{item['relationship']}] {item['title']} ({item.get('type', '')})") + content = " ".join(str(item.get("content", "")).split()) + if content: + print(f" {content[:240]}{'...' if len(content) > 240 else ''}") + print(f" Why: {item['why_selected']}") + if not wiki["pages"]: + print("- none") + + print("") + print("Agent guidance") + for item in payload["agent_guidance"]: + print(f"- {item}") + return 0 + + +def graph_summary( + target: Path, + topic: str = "", + limit: int = 40, + depth: int = 1, + max_edges: int = 120, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + topic = _clean_text_input(topic, max_len=500) + cache = _core_build_wiki_cache(wiki_dir) + payload = _core_graph_summary( + cache, + topic=topic, + limit=limit, + depth=depth, + max_edges=max_edges, + ) + _core_close_wiki_cache(cache) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + title = "Link graph summary" + if topic: + title += f": {topic}" + print(title) + print(f"Mode: {payload['mode']} · Search backend: {payload['search_backend']}") + print( + "Scale: " + f"{payload['node_count']} nodes · {payload['edge_count']} edges · " + f"returned {payload['returned_nodes']} nodes/{payload['returned_edges']} edges" + ) + if payload.get("truncated"): + print("Scope: bounded for agent context; use follow-up actions only if needed.") + print("") + print("Nodes") + for node in payload["nodes"]: + print(f"- {node['title']} ({node['id']} · degree {node['degree']})") + if node.get("summary"): + print(f" {node['summary']}") + print(f" Why: {node['why_selected']}") + if not payload["nodes"]: + print("- none") + print("") + print("Follow-up") + for action in payload["follow_up"]: + tool = action.get("tool", "") + args = action.get("arguments", {}) + when = action.get("when", "") + suffix = f" — {when}" if when else "" + print(f"- {tool} {json.dumps(args, ensure_ascii=False) if args else ''}{suffix}".rstrip()) + return 0 - missing_summaries = _find_pages_missing_summaries(wiki_dir) - if missing_summaries: - warnings.append("pages missing TLDR/query summary: " + ", ".join(missing_summaries[:8])) - else: - print("OK wiki pages have summaries") - missing_sources = _find_pages_missing_source_sections(wiki_dir) - if missing_sources: - warnings.append("source-backed pages missing Sources section: " + ", ".join(missing_sources[:8])) - else: - print("OK source-backed pages cite sources") +def _timed(label: str, fn: Callable[[], object]) -> tuple[str, object, float]: + start = time.perf_counter() + value = fn() + return label, value, time.perf_counter() - start - source_count_mismatches = _find_source_count_mismatches(wiki_dir) - if source_count_mismatches: - warnings.append("source_count metadata mismatch: " + ", ".join(source_count_mismatches[:8])) - else: - print("OK source_count metadata matches Sources sections") - isolated = _find_isolated_pages(wiki_dir) - if isolated: - warnings.append("isolated wiki pages: " + ", ".join(isolated[:8])) - else: - print("OK graph has no isolated wiki pages") +def _benchmark_graph_initial_payload(cache: dict[str, object], full_graph: object) -> dict[str, object]: + if not isinstance(full_graph, Mapping): + return _core_graph_initial_payload({"nodes": [], "edges": []}) + summary_graph = None + if _core_graph_needs_bounded_overview(full_graph): + summary = _core_graph_summary( + cache, + limit=_core_graph_initial_summary_node_limit, + depth=1, + max_edges=_core_graph_initial_summary_edge_limit, + ) + summary_graph = { + "nodes": summary.get("nodes", []), + "edges": summary.get("edges", []), + } + return _core_graph_initial_payload(full_graph, summary_graph=summary_graph) - uningested = _find_uningested_raw(target) - if uningested: - warnings.append("raw files not referenced by wiki pages: " + ", ".join(uningested[:8])) - elif raw_dir.exists(): - print("OK raw files are represented in wiki sources") - sensitive_names = _find_sensitive_filenames(target) - if sensitive_names: - errors.append("sensitive-looking filenames present: " + ", ".join(sensitive_names[:8])) - else: - print("OK no sensitive-looking filenames") +def benchmark( + target: Path, + query_text: str = "agent memory", + budget: str = "small", + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + query_text = _clean_text_input(query_text, max_len=500) + project_name = project or _default_project(target) + timings: dict[str, float] = {} + + label, cache, elapsed = _timed("cache", lambda: _core_build_wiki_cache(wiki_dir)) + timings[label] = elapsed + label, results, elapsed = _timed("search", lambda: _core_search_pages(query_text, cache, limit=20)) + timings[label] = elapsed + label, packet, elapsed = _timed( + "query", + lambda: _core_query_link( + wiki_dir, + query_text, + cache, + _memory_records(wiki_dir), + budget=budget, + project=project_name, + review_command="review-memory", + ), + ) + timings[label] = elapsed + label, graph_summary_payload, elapsed = _timed( + "graph_summary", + lambda: _core_graph_summary(cache, topic=query_text, limit=40, depth=1, max_edges=120), + ) + timings[label] = elapsed + label, page_list_payload, elapsed = _timed( + "page_list", + lambda: _core_list_pages(cache, limit=100), + ) + timings[label] = elapsed + label, graph, elapsed = _timed("graph", lambda: _core_graph_data(cache)) + timings[label] = elapsed + label, graph_initial_payload, elapsed = _timed( + "graph_initial", + lambda: _benchmark_graph_initial_payload(cache, graph), + ) + timings[label] = elapsed - sensitive_values = _find_sensitive_values(target) - if sensitive_values: - errors.append("sensitive-looking file contents present: " + ", ".join(sensitive_values[:8])) - else: - print("OK no sensitive-looking file contents") + budget_report = packet.get("budget_report", {}) if isinstance(packet, dict) else {} + graph_summary_info = graph_summary_payload if isinstance(graph_summary_payload, Mapping) else {} + page_list_info = page_list_payload if isinstance(page_list_payload, Mapping) else {} + graph_initial_info = graph_initial_payload if isinstance(graph_initial_payload, Mapping) else {} + payload = { + "target": str(target), + "wiki": str(wiki_dir), + "query": query_text, + "budget": budget, + "project": project_name, + "pages": len(cache.get("pages", [])), + "memories": len(_memory_records(wiki_dir)), + "edges": len(graph.get("edges", [])) if isinstance(graph, dict) else 0, + "graph_summary": { + "returned_nodes": graph_summary_info.get("returned_nodes", 0), + "returned_edges": graph_summary_info.get("returned_edges", 0), + "truncated": bool(graph_summary_info.get("truncated")), + }, + "page_list": { + "returned_count": page_list_info.get("returned_count", 0), + "truncated": bool(page_list_info.get("truncated")), + }, + "graph_initial": { + "mode": graph_initial_info.get("graph_mode", "unknown"), + "nodes": graph_initial_info.get("node_count", 0), + "edges": graph_initial_info.get("edge_count", 0), + "total_nodes": graph_initial_info.get("total_node_count", 0), + "total_edges": graph_initial_info.get("total_edge_count", 0), + }, + "search_backend": str(cache.get("search_backend") or "token-index"), + "search_results": len(results) if isinstance(results, list) else 0, + "context_items": len(packet.get("context_packet", [])) if isinstance(packet, dict) else 0, + "found": bool(packet.get("found")) if isinstance(packet, dict) else False, + "timings": {key: round(value, 4) for key, value in timings.items()}, + "budget_report": budget_report, + } + payload["health"] = _core_benchmark_health(payload) + _core_close_wiki_cache(cache) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 - if warnings: + print(f"Link benchmark: {target}") + print(f"Query: {query_text}") + if project_name: + print(f"Project: {project_name}") + print("") + print(f"Scale: {payload['pages']} pages · {payload['memories']} memories · {payload['edges']} edges") + print(f"Search backend: {payload['search_backend']}") + graph_summary_info = payload["graph_summary"] + page_list_info = payload["page_list"] + graph_initial_info = payload["graph_initial"] + print(f"Results: {payload['search_results']} search results · {payload['context_items']} context items") + if isinstance(graph_summary_info, Mapping) and isinstance(page_list_info, Mapping): + print( + "Agent-safe payloads: " + f"graph summary {graph_summary_info.get('returned_nodes', 0)} nodes/" + f"{graph_summary_info.get('returned_edges', 0)} edges · " + f"page list {page_list_info.get('returned_count', 0)} pages" + ) + if isinstance(graph_initial_info, Mapping): + print( + "Graph page initial load: " + f"{graph_initial_info.get('mode', 'unknown')} · " + f"{graph_initial_info.get('nodes', 0)}/{graph_initial_info.get('total_nodes', 0)} nodes" + ) + health = payload["health"] + if isinstance(health, Mapping): + print(f"Verdict: {health.get('label', 'unknown')}") + if health.get("summary"): + print(f"Health: {health.get('summary')}") + print("") + print("Timings") + for key in ("cache", "search", "query", "graph_summary", "page_list", "graph_initial", "graph"): + print(f"- {key}: {payload['timings'][key]:.4f}s") + if isinstance(health, Mapping) and health.get("warnings"): print("") - print("Warnings:") - for warning in warnings: + print("Warnings") + for warning in health["warnings"]: print(f"- {warning}") + recommendations = health.get("recommendations") + if isinstance(recommendations, list) and recommendations: + print("") + print("Recommendations") + for recommendation in recommendations: + print(f"- {recommendation}") + if isinstance(budget_report, dict): + packet_report = budget_report.get("context_packet") + if isinstance(packet_report, dict): + print("") + print( + "Packet: " + f"{packet_report.get('estimated_chars', 0)} chars · " + f"{packet_report.get('estimated_tokens', 0)} tokens · " + f"has_more={packet_report.get('has_more', False)}" + ) + print("") + print(f"Result: {'found' if payload['found'] else 'no matching context'}") + return 0 - if errors: - print("") - print("Errors:") - for error in errors: - print(f"- {error}") - print("") - print("Result: needs attention") + +def brief( + target: Path, + query: str = "", + limit: int = 6, + project: str | None = None, + json_output: bool = False, +) -> int: + target = target.expanduser().resolve() + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) return 1 + query = _clean_text_input(query, max_len=500) + project_name = project or _default_project(target) + payload = _memory_brief(wiki_dir, query=query, limit=limit, project=project_name) + payload = _core_add_capture_review_to_brief( + payload, + _capture_review_summary(target, project=project_name), + ) + + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + title = "Link memory brief" + if query: + title += f": {query}" + print(title) + if project_name: + print(f"Project: {project_name}") print("") - print("Result: healthy") + profile_data = payload["profile"] + print( + f"{profile_data['active_count']} active memories · " + f"{payload['relevant_count']} relevant · " + f"{payload['review']['count']} need review" + ) + print(f"Types: {_format_counts(profile_data['by_type'])}") + print(f"Scopes: {_format_counts(profile_data['by_scope'])}") + print("") + + _print_memory_list("Relevant memories", payload["relevant_memories"]) + if payload["review"]["items"]: + print("") + print("Review queue") + for item in payload["review"]["items"][:3]: + print(f"- {item['title']} ({item['memory_type']} · {item['scope']})") + first_issue = item["issues"][0] + print(f" [{first_issue['severity']}] {first_issue['code']}: {first_issue['message']}") + if payload["captures"]["items"]: + print("") + print("Raw captures") + print(f"{payload['captures']['count']} saved · {payload['captures']['warning_count']} with secret-looking warnings") + for capture in payload["captures"]["items"]: + print(f"- {capture['title']} ({capture['path']})") + if capture["secret_warnings"]: + print(" Warnings: " + ", ".join(capture["secret_warnings"])) + print(f" Next: {payload['captures']['next_action']}") + print("") + print("Agent guidance") + for item in payload["agent_guidance"]: + print(f"- {item}") return 0 -def ingest_status(target: Path, json_output: bool = False) -> int: +def profile(target: Path, limit: int = 10, project: str | None = None, json_output: bool = False) -> int: target = target.expanduser().resolve() - status = _collect_ingest_status(target) + wiki_dir = _resolve_wiki_dir(target) + if not wiki_dir.exists(): + print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) + return 1 + project_name = project or _default_project(target) + profile_data = _memory_profile(wiki_dir, limit=limit, project=project_name) if json_output: - print(json.dumps(status, indent=2)) - return 0 if status["has_raw_dir"] and status["has_wiki_dir"] else 1 + print(json.dumps(profile_data, indent=2)) + return 0 - print(f"Link ingest status: {target}") + print(f"Link memory profile: {target}") + if project_name: + print(f"Project: {project_name}") print("") - if not status["has_raw_dir"]: - print("Missing raw/ directory") - if not status["has_wiki_dir"]: - print("Missing wiki/ directory") - if not status["has_raw_dir"] or not status["has_wiki_dir"]: + memory_count = profile_data["memory_count"] + active_count = profile_data["active_count"] + review_count = profile_data["review_count"] + print(f"{memory_count} memor{'y' if memory_count == 1 else 'ies'} · {active_count} active · {review_count} need review") + print(f"Types: {_format_counts(profile_data['by_type'])}") + print(f"Scopes: {_format_counts(profile_data['by_scope'])}") + if profile_data["by_project"]: + print(f"Projects: {_format_counts(profile_data['by_project'])}") + print(f"Status: {_format_counts(profile_data['by_status'])}") + tags = ", ".join( + f"{item['tag']} ({item['count']})" + for item in profile_data["top_tags"] + ) + if tags: + print(f"Tags: {tags}") + print("") + + if memory_count == 0: + print("No memories found.") print("") print("Next:") - print(" Run an installer or create a demo: python3 link.py demo") - return 1 - - print(f"Raw files: {status['raw_count']}") - print(f"Source pages: {status['source_page_count']}") - print(f"Represented in wiki/sources: {status['represented_count']}") - print(f"Pending ingest: {status['pending_count']}") - print(f"Backlinks: {status['backlinks_status']} ({status['backlinks_message']})") + print(" Add one: python3 link.py remember \"Memory to keep\" .") + return 0 - pending_raw = status["pending_raw"] - if pending_raw: + _print_memory_list("Recent memories", profile_data["recent"]) + print("") + _print_memory_list("Preferences", profile_data["preferences"]) + print("") + _print_memory_list("Decisions", profile_data["decisions"]) + print("") + _print_memory_list("Project context", profile_data["projects"]) + if profile_data["archived"]: print("") - print("Pending raw files:") - for item in pending_raw[:20]: - print(f"- {item['raw']}") - if len(pending_raw) > 20: - print(f"- ... {len(pending_raw) - 20} more") + _print_memory_list("Archived memories", profile_data["archived"]) + return 0 - print("") - print("Next:") - if pending_raw: - first_pending = pending_raw[0]["raw"] - print(f" Ask your agent: ingest {first_pending}") - print(" After ingest: python3 link.py rebuild-backlinks .") - print(" Then check: python3 link.py doctor .") - elif status["backlinks_status"] != "current": - print(" Repair graph index: python3 link.py rebuild-backlinks .") - print(" Then check: python3 link.py doctor .") - else: - print(" No pending raw files. Run: python3 link.py doctor .") - return 0 +def _cli_memory_audit_actions( + target: Path, + inbox: dict[str, object], + captures: dict[str, object], + risk_factors: list[dict[str, object]], + project_name: str, +) -> list[dict[str, object]]: + root = _resolve_link_root(target) + project_arg = f' --project "{project_name}"' if project_name else "" + return [ + { + "label": "Review memory inbox", + "command": f'python3 link.py memory-inbox "{root}"{project_arg}', + "recommended": bool(inbox["review_count"]), + }, + { + "label": "Review raw captures", + "command": f'python3 link.py capture-inbox "{root}"{project_arg}', + "recommended": bool(captures["count"] or captures.get("read_warning_count")), + }, + { + "label": "Run doctor", + "command": f'python3 link.py doctor "{root}"', + "recommended": not risk_factors, + }, + ] -def rebuild_backlinks(target: Path) -> int: +def _memory_audit_payload(target: Path, wiki_dir: Path, limit: int = 10, project: str | None = None) -> dict[str, object]: + project_name = project or _default_project(target) + profile_data = _memory_profile(wiki_dir, limit=limit, project=project_name) + inbox = _memory_inbox(wiki_dir, limit=limit, include_archived=True, project=project_name) + captures = _capture_review_summary(target, project=project_name, limit=min(limit, 10)) + payload = _core_memory_audit_report(profile_data, inbox, captures, [], project=project_name) + payload["next_actions"] = _cli_memory_audit_actions( + target, + inbox, + captures, + payload["risk_factors"], + str(payload["project"]), + ) + return payload + + +def memory_audit(target: Path, limit: int = 10, project: str | None = None, json_output: bool = False) -> int: + target = target.expanduser().resolve() wiki_dir = _resolve_wiki_dir(target) if not wiki_dir.exists(): print(f"Missing wiki directory: {wiki_dir}", file=sys.stderr) return 1 - backlinks = _build_backlinks(wiki_dir) - out_path = wiki_dir / "_backlinks.json" - out_path.write_text(json.dumps(backlinks, indent=2) + "\n", encoding="utf-8") - page_count = len(_wiki_pages(wiki_dir)) - edge_count = sum(len(targets) for targets in backlinks["forward"].values()) - print(f"Rebuilt {out_path}") - print(f"Pages: {page_count}") - print(f"Edges: {edge_count}") + payload = _memory_audit_payload(target, wiki_dir, limit=limit, project=project) + + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + print(f"Link memory audit: {target}") + if payload["project"]: + print(f"Project: {payload['project']}") + print(f"Status: {payload['status']}") + print("") + profile_data = payload["profile"] + print( + f"Memories: {profile_data['memory_count']} total · " + f"{profile_data['active_count']} active · " + f"{profile_data['review_count']} need review" + ) + print( + f"Raw captures: {payload['captures']['count']} saved · " + f"{payload['captures']['warning_count']} with secret-looking warnings · " + f"{payload['captures'].get('read_warning_count', 0)} read warnings" + ) + if payload["risk_factors"]: + print("") + print("Needs attention") + for factor in payload["risk_factors"]: + print(f"- {factor['code']}: {factor['message']}") + print("") + print("Next actions") + for action in payload["next_actions"]: + marker = "recommended" if action["recommended"] else "optional" + print(f"- {action['label']} ({marker})") + print(f" {action['command']}") return 0 def _check_link_mcp_import(python_cmd: str) -> dict[str, object]: code = ( - "import json, link_mcp; " - "print(json.dumps({'installed': True, 'version': getattr(link_mcp, '__version__', 'unknown')}))" + "import json\n" + "status = {'installed': False, 'version': None, 'mcp_sdk': False, 'error': None}\n" + "try:\n" + " import link_mcp\n" + " status['installed'] = True\n" + " status['version'] = getattr(link_mcp, '__version__', 'unknown')\n" + " from mcp.server.fastmcp import FastMCP\n" + " status['mcp_sdk'] = True\n" + "except Exception as exc:\n" + " status['error'] = str(exc)\n" + "print(json.dumps(status))\n" ) try: result = subprocess.run( @@ -1176,7 +2821,8 @@ def _check_link_mcp_import(python_cmd: str) -> dict[str, object]: return { "installed": bool(data.get("installed")), "version": data.get("version") or "unknown", - "error": None, + "mcp_sdk": bool(data.get("mcp_sdk")), + "error": data.get("error"), } @@ -1191,6 +2837,84 @@ def _mcp_config(python_cmd: str, wiki_dir: Path) -> dict[str, object]: } +def _display_command(parts: list[str]) -> str: + if os.name == "nt": + return subprocess.list2cmdline(parts) + return shlex.join(parts) + + +def _mcp_verify_action(tool: str, label: str, command: list[str]) -> dict[str, object]: + return { + "tool": tool, + "label": label, + "command": command, + "command_text": _display_command(command), + } + + +def _mcp_verify_guidance( + *, + target: Path, + python_cmd: str, + import_status: Mapping[str, object], + mcp_sdk_ready: bool, + version_matches: bool, + wiki_exists: bool, +) -> tuple[list[dict[str, str]], list[dict[str, object]]]: + installed = bool(import_status.get("installed")) + issues: list[dict[str, str]] = [] + next_actions: list[dict[str, object]] = [] + + if not installed: + issues.append({ + "code": "link_mcp_missing", + "message": "link-mcp is not importable from the configured Python.", + }) + next_actions.append( + _mcp_verify_action( + "install_link_mcp", + "Install link-mcp in the configured Python environment", + [python_cmd, "-m", "pip", "install", "--upgrade", "link-mcp"], + ) + ) + else: + if not mcp_sdk_ready: + issues.append({ + "code": "mcp_sdk_missing", + "message": "link-mcp is installed, but the MCP SDK dependency is missing.", + }) + next_actions.append( + _mcp_verify_action( + "reinstall_link_mcp", + f"Reinstall link-mcp dependencies for Link {LINK_VERSION}", + [python_cmd, "-m", "pip", "install", "--upgrade", f"link-mcp=={LINK_VERSION}"], + ) + ) + if not version_matches: + issues.append({"code": "version_mismatch", "message": f"link-mcp must match Link {LINK_VERSION}."}) + next_actions.append( + _mcp_verify_action( + "upgrade_link_mcp", + f"Upgrade link-mcp to Link {LINK_VERSION}", + [python_cmd, "-m", "pip", "install", "--upgrade", f"link-mcp=={LINK_VERSION}"], + ) + ) + if not wiki_exists: + issues.append({ + "code": "wiki_missing", + "message": "The configured Link wiki directory does not exist.", + }) + next_actions.append( + _mcp_verify_action( + "init_wiki", + "Create or repair the local Link wiki", + [sys.executable, str(ROOT / "link.py"), "init", str(target)], + ) + ) + + return issues, next_actions + + def _resolve_mcp_python(target: Path, wiki_dir: Path, python_cmd: str | None) -> str: if python_cmd: return str(Path(python_cmd).expanduser()) @@ -1217,16 +2941,34 @@ def verify_mcp( import_status = import_check(python_cmd) wiki_exists = wiki_dir.exists() and wiki_dir.is_dir() config = _mcp_config(python_cmd, wiki_dir) - ready = bool(import_status.get("installed")) and wiki_exists + installed_version = str(import_status.get("version") or "") + mcp_sdk_ready = bool(import_status.get("mcp_sdk", import_status.get("installed"))) + version_matches = bool(import_status.get("installed")) and installed_version == LINK_VERSION + ready = bool(import_status.get("installed")) and mcp_sdk_ready and wiki_exists and version_matches + normalized_import_status = dict(import_status) + normalized_import_status.setdefault("mcp_sdk", mcp_sdk_ready) + normalized_import_status.setdefault("error", None) + issues, next_actions = _mcp_verify_guidance( + target=target, + python_cmd=python_cmd, + import_status=normalized_import_status, + mcp_sdk_ready=mcp_sdk_ready, + version_matches=version_matches, + wiki_exists=wiki_exists, + ) status = { "ready": ready, "python": python_cmd, - "link_mcp": import_status, + "expected_version": LINK_VERSION, + "version_matches": version_matches, + "link_mcp": normalized_import_status, "wiki": { "path": str(wiki_dir), "exists": wiki_exists, }, "config": config, + "issues": issues, + "next_actions": next_actions, } if json_output: @@ -1238,6 +2980,13 @@ def verify_mcp( print(f"Python: {python_cmd}") if import_status.get("installed"): print(f"link-mcp: installed ({import_status.get('version')})") + if not mcp_sdk_ready: + print("MCP SDK: missing") + error = import_status.get("error") + if error: + print(f"Import error: {error}") + if not version_matches: + print(f"Expected version: {LINK_VERSION}") else: print("link-mcp: missing") error = import_status.get("error") @@ -1256,46 +3005,133 @@ def verify_mcp( print("") print("Next:") + python_pip = [python_cmd, "-m", "pip", "install", "--upgrade"] if not import_status.get("installed"): - print(" Install: python3 -m pip install --upgrade link-mcp") + print(f" Install: {_display_command([*python_pip, 'link-mcp'])}") print(" macOS/Homebrew fallback:") print(" python3 -m venv ~/.link-mcp-venv") print(" ~/.link-mcp-venv/bin/python -m pip install --upgrade pip link-mcp") print(" Then rerun with: python3 link.py verify-mcp . --python ~/.link-mcp-venv/bin/python") + elif not mcp_sdk_ready: + print(f" Reinstall link-mcp dependencies for Link {LINK_VERSION}:") + print(f" {_display_command([*python_pip, f'link-mcp=={LINK_VERSION}'])}") + elif not version_matches: + print(f" Upgrade link-mcp to match Link {LINK_VERSION}:") + print(f" {_display_command([*python_pip, f'link-mcp=={LINK_VERSION}'])}") if not wiki_exists: - print(" Create a wiki with an installer, or try: python3 link.py demo") + print(" Create a wiki with an installer, or try: python3 link.py init") print("") print("Result: needs attention") return 1 def _copy_runtime_files(target: Path) -> None: + target.mkdir(parents=True, exist_ok=True) for name in ("serve.py", "link.py", "LINK.md", ".linkignore"): src = ROOT / name - if src.exists(): - shutil.copy2(src, target / name) + dst = target / name + if src.exists() and src.resolve() != dst.resolve(): + shutil.copy2(src, dst) + core_src = ROOT / "mcp_package" / "link_core" + if not core_src.exists(): + core_src = ROOT / "link_core" + if core_src.exists(): + core_target = target / "link_core" + core_target.mkdir(exist_ok=True) + for src in core_src.glob("*.py"): + dst = core_target / src.name + if src.resolve() != dst.resolve(): + shutil.copy2(src, dst) for name in ("logo.png", "logo.svg"): src = ROOT / name - if src.exists(): - shutil.copy2(src, target / name) + dst = target / name + if src.exists() and src.resolve() != dst.resolve(): + shutil.copy2(src, dst) + + +def init_wiki(target: Path) -> int: + target = target.expanduser().resolve() + target.mkdir(parents=True, exist_ok=True) + _copy_runtime_files(target) + fixes = _apply_doctor_fixes(target) + + print(f"Link wiki ready at {target}") + if fixes: + print("") + print("Initialized:") + for item in fixes: + print(f" - {item}") + print("") + print("Next:") + print(" link status --validate") + print(" link serve") + print(" Drop sources into raw/ and ask your agent: ingest raw/ into Link") + return 0 + + +def starter_prompts(target: Path, project: str | None = None, json_output: bool = False) -> int: + payload = _core_starter_prompt_payload(target, project=project) + if json_output: + print(json.dumps(payload, indent=2)) + return 0 + + print(f"Link starter prompts: {payload['target']}") + if payload["project"]: + print(f"Project: {payload['project']}") + print("") + print("Ask your agent") + for item in payload["prompts"]: + print(f"- {item['prompt']}") + print(f" When: {item['when']}") + print("") + print("Local checks") + for command in payload["commands"]: + print(f"- {command}") + return 0 + + +def serve_wiki(target: Path, port: int = 3000) -> int: + target = target.expanduser().resolve() + if port < 1 or port > 65535: + print("--port must be between 1 and 65535") + return 1 + serve_path = ROOT / "serve.py" + if not serve_path.exists(): + serve_path = target / "serve.py" + if not serve_path.exists(): + print(f"Link viewer missing: {serve_path}") + print("") + print("Next:") + print(f" {_display_command(['link', 'init', str(target)])}") + return 1 + if not (target / "wiki").exists(): + print(f"Link wiki missing: {target / 'wiki'}") + print("") + print("Next:") + print(f" {_display_command(['link', 'init', str(target)])}") + return 1 + try: + return subprocess.run( + [sys.executable, str(serve_path), "--root", str(target), "--port", str(port)] + ).returncode + except KeyboardInterrupt: + return 130 -def create_demo(target: Path, force: bool = False) -> None: +def create_demo(target: Path, force: bool = False) -> int: target = target.expanduser().resolve() if target.exists() and any(target.iterdir()): marker = target / DEMO_MARKER if not force: - raise SystemExit( - f"{target} already exists. Re-run with --force to replace a Link demo directory." - ) + print(f"{target} already exists. Re-run with --force to replace a Link demo directory.", file=sys.stderr) + return 1 if not marker.exists(): - raise SystemExit( - f"{target} does not look like a Link demo directory; refusing to overwrite it." - ) + print(f"{target} does not look like a Link demo directory; refusing to overwrite it.", file=sys.stderr) + return 1 shutil.rmtree(target) target.mkdir(parents=True, exist_ok=True) - (target / DEMO_MARKER).write_text("Link demo directory\n", encoding="utf-8") + _core_atomic_write_text(target / DEMO_MARKER, "Link demo directory\n") _copy_runtime_files(target) for directory in ( @@ -1303,6 +3139,7 @@ def create_demo(target: Path, force: bool = False) -> None: "wiki/sources", "wiki/concepts", "wiki/entities", + "wiki/memories", "wiki/comparisons", "wiki/explorations", ): @@ -1313,40 +3150,236 @@ def create_demo(target: Path, force: bool = False) -> None: for rel, content in DEMO_FILES.items(): path = target / rel path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content.strip() + "\n", encoding="utf-8") + _core_atomic_write_text(path, content.strip() + "\n") backlinks = _build_backlinks(target / "wiki") - (target / "wiki/_backlinks.json").write_text( - json.dumps(backlinks, indent=2), encoding="utf-8" - ) + _core_atomic_write_json(target / "wiki/_backlinks.json", backlinks) + _core_migrate_wiki(target / "wiki") print(f"Link demo created at {target}") print("") print("View it:") - print(f" cd {target}") - print(" python3 serve.py") + print(f" {_display_command(['python3', 'link.py', 'serve', str(target)])}") + print("") + print("Try the value loop:") + print(f" {_display_command(['python3', 'link.py', 'query', 'why does Link help agents?', str(target), '--budget', 'small'])}") + print(f" {_display_command(['python3', 'link.py', 'brief', 'working on agent memory', str(target)])}") + print(f" {_display_command(['python3', 'link.py', 'memory-audit', str(target)])}") + print("") + print("Guide:") + print(f" {target / 'START_HERE.md'}") print("") print("Then open:") - print(" http://localhost:3000") - print(" http://localhost:3000/graph") + print(" http://127.0.0.1:3000") + print(" http://127.0.0.1:3000/graph") + return 0 def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(prog="link.py", description="Link command runner") + parser.add_argument("--version", action="version", version=f"Link {LINK_VERSION}") sub = parser.add_subparsers(dest="command", required=True) + init_cmd = sub.add_parser("init", help="create or repair a normal Link wiki") + init_cmd.add_argument("target", nargs="?", default=".") + + serve_cmd = sub.add_parser("serve", help="start the local Link web viewer") + serve_cmd.add_argument("target", nargs="?", default=".") + serve_cmd.add_argument("--port", type=int, default=3000) + demo = sub.add_parser("demo", help="create a pre-ingested sample Link wiki") demo.add_argument("target", nargs="?", default=DEFAULT_DEMO_DIR) demo.add_argument("--force", action="store_true", help="replace an existing Link demo directory") + prompts_cmd = sub.add_parser("prompts", help="print first-run agent prompts and local checks") + prompts_cmd.add_argument("target", nargs="?", default=".") + prompts_cmd.add_argument("--project", default=None, help="project slug for project-scoped prompt examples") + prompts_cmd.add_argument("--json", action="store_true", help="print machine-readable prompt data") + + status_cmd = sub.add_parser("status", help="show Link readiness, counts, and next actions") + status_cmd.add_argument("target", nargs="?", default=".") + status_cmd.add_argument("--validate", action="store_true", help="include the ingest validation gate summary") + status_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + backup_cmd = sub.add_parser("backup", help="create or list local wiki backup archives") + backup_cmd.add_argument("target", nargs="?", default=".") + backup_cmd.add_argument("--label", default="manual", help="short label for the backup filename") + backup_cmd.add_argument("--include-raw", action="store_true", help="also include raw/ sources and captures") + backup_cmd.add_argument("--list", action="store_true", dest="list_only", help="list recent backups instead of creating one") + backup_cmd.add_argument("--json", action="store_true", help="print machine-readable backup status") + doctor_cmd = sub.add_parser("doctor", help="check a Link wiki for common health issues") doctor_cmd.add_argument("target", nargs="?", default=".") doctor_cmd.add_argument("--fix", action="store_true", help="repair safe structural and backlink issues") + migrate_cmd = sub.add_parser("migrate", help="apply safe Link wiki schema migrations") + migrate_cmd.add_argument("target", nargs="?", default=".") + migrate_cmd.add_argument("--json", action="store_true", help="print machine-readable migration status") + + validate_cmd = sub.add_parser("validate", help="validate wiki pages before accepting ingest output") + validate_cmd.add_argument("target", nargs="?", default=".") + validate_cmd.add_argument("--strict", action="store_true", help="fail on warnings as well as errors") + validate_cmd.add_argument("--json", action="store_true", help="print machine-readable validation findings") + ingest_status_cmd = sub.add_parser("ingest-status", help="show raw files pending wiki ingestion") ingest_status_cmd.add_argument("target", nargs="?", default=".") ingest_status_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + remember_cmd = sub.add_parser("remember", help="save a local agent memory") + remember_cmd.add_argument("text", help="memory text to save") + remember_cmd.add_argument("target", nargs="?", default=".") + remember_cmd.add_argument("--title", default=None, help="memory page title") + remember_cmd.add_argument("--type", choices=MEMORY_TYPES, default="note", dest="memory_type") + remember_cmd.add_argument("--scope", choices=MEMORY_SCOPES, default="user") + remember_cmd.add_argument("--tags", default=None, help="comma-separated tags") + remember_cmd.add_argument("--source", default="manual", help="where this memory came from") + remember_cmd.add_argument("--project", default=None, help="project key for project-scoped memories") + remember_cmd.add_argument("--allow-duplicate", action="store_true", help="create a new memory even if a strong duplicate exists") + remember_cmd.add_argument("--allow-conflict", action="store_true", help="create a memory even if it may conflict with an active memory") + remember_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + propose_cmd = sub.add_parser("propose-memories", help="propose durable memories from chat or session notes without writing them") + propose_cmd.add_argument("source_input", help="text or path to a note/session file") + propose_cmd.add_argument("target", nargs="?", default=".") + propose_cmd.add_argument("--limit", type=int, default=10) + propose_cmd.add_argument("--project", default=None, help="project key for duplicate/conflict checks") + propose_cmd.add_argument("--json", action="store_true", help="print machine-readable proposals") + + capture_cmd = sub.add_parser("capture-session", help="save session notes to raw/ and propose memories") + capture_cmd.add_argument("source_input", help="text or path to a chat/session note") + capture_cmd.add_argument("target", nargs="?", default=".") + capture_cmd.add_argument("--title", default=None, help="title for the raw capture note") + capture_cmd.add_argument("--limit", type=int, default=10) + capture_cmd.add_argument("--project", default=None, help="project key for proposal checks") + capture_cmd.add_argument("--json", action="store_true", help="print machine-readable capture details") + + capture_inbox_cmd = sub.add_parser("capture-inbox", help="list saved raw session captures") + capture_inbox_cmd.add_argument("target", nargs="?", default=".") + capture_inbox_cmd.add_argument("--limit", type=int, default=20) + capture_inbox_cmd.add_argument("--project", default=None, help="include global captures plus this project") + capture_inbox_cmd.add_argument("--json", action="store_true", help="print machine-readable capture inbox") + + accept_capture_cmd = sub.add_parser("accept-capture", help="accept one proposal from a raw session capture") + accept_capture_cmd.add_argument("capture", help="raw capture path or filename") + accept_capture_cmd.add_argument("target", nargs="?", default=".") + accept_capture_cmd.add_argument("--index", type=int, default=1, help="1-based proposal index to accept") + accept_capture_cmd.add_argument("--title", default=None, help="override accepted memory title") + accept_capture_cmd.add_argument("--type", dest="memory_type", choices=MEMORY_TYPES, default=None) + accept_capture_cmd.add_argument("--scope", choices=MEMORY_SCOPES, default=None) + accept_capture_cmd.add_argument("--tags", default=None, help="comma-separated tags") + accept_capture_cmd.add_argument("--project", default=None, help="project key for accepted project memory") + accept_capture_cmd.add_argument("--allow-duplicate", action="store_true", help="create a new memory even if a strong duplicate exists") + accept_capture_cmd.add_argument("--allow-conflict", action="store_true", help="create a memory even if it may conflict with an active memory") + accept_capture_cmd.add_argument("--json", action="store_true", help="print machine-readable acceptance details") + + redact_capture_cmd = sub.add_parser("redact-capture", help="redact secret-looking values from a raw session capture") + redact_capture_cmd.add_argument("capture", help="raw capture path or filename") + redact_capture_cmd.add_argument("target", nargs="?", default=".") + redact_capture_cmd.add_argument("--replacement", default="[redacted-secret]", help="replacement text") + redact_capture_cmd.add_argument("--json", action="store_true", help="print machine-readable redaction details") + + delete_capture_cmd = sub.add_parser("delete-capture", help="delete a raw session capture after explicit confirmation") + delete_capture_cmd.add_argument("capture", help="raw capture path or filename") + delete_capture_cmd.add_argument("target", nargs="?", default=".") + delete_capture_cmd.add_argument("--confirm", action="store_true", help="required to delete the capture") + delete_capture_cmd.add_argument("--json", action="store_true", help="print machine-readable deletion details") + + update_memory_cmd = sub.add_parser("update-memory", help="merge new text into an existing memory") + update_memory_cmd.add_argument("identifier", help="memory page name, title, or path") + update_memory_cmd.add_argument("text", help="new memory text to merge") + update_memory_cmd.add_argument("target", nargs="?", default=".") + update_memory_cmd.add_argument("--source", default="manual", help="where this update came from") + update_memory_cmd.add_argument("--project", default=None, help="project key for conflict checks") + update_memory_cmd.add_argument("--allow-conflict", action="store_true", help="update even if the text may conflict with another active memory") + update_memory_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + recall_cmd = sub.add_parser("recall", help="search local agent memories") + recall_cmd.add_argument("query", help="memory query") + recall_cmd.add_argument("target", nargs="?", default=".") + recall_cmd.add_argument("--limit", type=int, default=10) + recall_cmd.add_argument("--include-archived", action="store_true", help="include archived and stale memories") + recall_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + recall_cmd.add_argument("--json", action="store_true", help="print machine-readable results") + + query_cmd = sub.add_parser("query", aliases=["query-link"], help="build a compact answer-ready Link context packet") + query_cmd.add_argument("query", help="task or question to retrieve memory and wiki context for") + query_cmd.add_argument("target", nargs="?", default=".") + query_cmd.add_argument("--budget", choices=("small", "medium", "large"), default="medium") + query_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + query_cmd.add_argument("--json", action="store_true", help="print machine-readable context packet") + + graph_summary_cmd = sub.add_parser("graph-summary", help="show a bounded graph summary for agent context budgets") + graph_summary_cmd.add_argument("topic", nargs="?", default="", help="optional topic/query for a bounded neighborhood") + graph_summary_cmd.add_argument("target", nargs="?", default=".") + graph_summary_cmd.add_argument("--limit", type=int, default=40, help="maximum returned nodes") + graph_summary_cmd.add_argument("--depth", type=int, default=1, help="neighborhood depth for topic mode") + graph_summary_cmd.add_argument("--max-edges", type=int, default=120, help="maximum returned edges") + graph_summary_cmd.add_argument("--json", action="store_true", help="print machine-readable graph summary") + + benchmark_cmd = sub.add_parser("benchmark", help="measure local search, query, and graph performance") + benchmark_cmd.add_argument("query", nargs="?", default="agent memory", help="query to benchmark") + benchmark_cmd.add_argument("target", nargs="?", default=".") + benchmark_cmd.add_argument("--budget", choices=("small", "medium", "large"), default="small") + benchmark_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + benchmark_cmd.add_argument("--json", action="store_true", help="print machine-readable benchmark data") + + brief_cmd = sub.add_parser("brief", help="prime an agent with relevant local memory") + brief_cmd.add_argument("query", nargs="?", default="", help="optional task or question to retrieve memory for") + brief_cmd.add_argument("target", nargs="?", default=".") + brief_cmd.add_argument("--limit", type=int, default=6) + brief_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + brief_cmd.add_argument("--json", action="store_true", help="print machine-readable memory brief") + + profile_cmd = sub.add_parser("profile", help="show what Link remembers") + profile_cmd.add_argument("target", nargs="?", default=".") + profile_cmd.add_argument("--limit", type=int, default=10) + profile_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + profile_cmd.add_argument("--json", action="store_true", help="print machine-readable profile") + + audit_cmd = sub.add_parser("memory-audit", help="audit memory health, review backlog, and raw captures") + audit_cmd.add_argument("target", nargs="?", default=".") + audit_cmd.add_argument("--limit", type=int, default=10) + audit_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + audit_cmd.add_argument("--json", action="store_true", help="print machine-readable audit") + + archive_cmd = sub.add_parser("archive-memory", help="archive a stale or unwanted memory") + archive_cmd.add_argument("identifier", help="memory page name, title, or path") + archive_cmd.add_argument("target", nargs="?", default=".") + archive_cmd.add_argument("--reason", default=None, help="why this memory is being archived") + archive_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + restore_cmd = sub.add_parser("restore-memory", help="restore an archived memory to active status") + restore_cmd.add_argument("identifier", help="memory page name, title, or path") + restore_cmd.add_argument("target", nargs="?", default=".") + restore_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + forget_cmd = sub.add_parser("forget-memory", help="permanently delete a memory after explicit confirmation") + forget_cmd.add_argument("identifier", help="memory page name, title, or path") + forget_cmd.add_argument("target", nargs="?", default=".") + forget_cmd.add_argument("--confirm", action="store_true", help="required to delete the memory") + forget_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + inbox_cmd = sub.add_parser("memory-inbox", help="show memories that need review") + inbox_cmd.add_argument("target", nargs="?", default=".") + inbox_cmd.add_argument("--limit", type=int, default=20) + inbox_cmd.add_argument("--include-archived", action="store_true", help="include archived memories") + inbox_cmd.add_argument("--project", default=None, help="include user/global memories plus this project's memories") + inbox_cmd.add_argument("--json", action="store_true", help="print machine-readable inbox") + + review_cmd = sub.add_parser("review-memory", help="mark a memory as reviewed") + review_cmd.add_argument("identifier", help="memory page name, title, or path") + review_cmd.add_argument("target", nargs="?", default=".") + review_cmd.add_argument("--note", default=None, help="optional review note") + review_cmd.add_argument("--json", action="store_true", help="print machine-readable status") + + explain_cmd = sub.add_parser("explain-memory", help="explain why a memory exists and whether it is recall-ready") + explain_cmd.add_argument("identifier", help="memory page name, title, or path") + explain_cmd.add_argument("target", nargs="?", default=".") + explain_cmd.add_argument("--json", action="store_true", help="print machine-readable explanation") + + rebuild_index_cmd = sub.add_parser("rebuild-index", help="regenerate wiki/index.md from current pages") + rebuild_index_cmd.add_argument("target", nargs="?", default=".") + rebuild_cmd = sub.add_parser("rebuild-backlinks", help="rebuild wiki/_backlinks.json") rebuild_cmd.add_argument("target", nargs="?", default=".") @@ -1356,13 +3389,168 @@ def main(argv: list[str] | None = None) -> int: verify_mcp_cmd.add_argument("--python", default=None, help="Python executable to verify") args = parser.parse_args(argv) + if args.command == "init": + return init_wiki(Path(args.target)) + if args.command == "serve": + return serve_wiki(Path(args.target), port=args.port) if args.command == "demo": - create_demo(Path(args.target), force=args.force) - return 0 + return create_demo(Path(args.target), force=args.force) + if args.command == "prompts": + return starter_prompts(Path(args.target), project=args.project, json_output=args.json) + if args.command == "status": + return status(Path(args.target), include_validation=args.validate, json_output=args.json) + if args.command == "backup": + return backup( + Path(args.target), + label=args.label, + include_raw=args.include_raw, + list_only=args.list_only, + json_output=args.json, + ) if args.command == "doctor": return doctor(Path(args.target), fix=args.fix) + if args.command == "migrate": + return migrate(Path(args.target), json_output=args.json) + if args.command == "validate": + return validate(Path(args.target), strict=args.strict, json_output=args.json) if args.command == "ingest-status": return ingest_status(Path(args.target), json_output=args.json) + if args.command == "remember": + return remember( + Path(args.target), + args.text, + title=args.title, + memory_type=args.memory_type, + scope=args.scope, + tags=args.tags, + source=args.source, + project=args.project, + allow_duplicate=args.allow_duplicate, + allow_conflict=args.allow_conflict, + json_output=args.json, + ) + if args.command == "propose-memories": + return propose_memories( + Path(args.target), + args.source_input, + limit=args.limit, + project=args.project, + json_output=args.json, + ) + if args.command == "capture-session": + return capture_session( + Path(args.target), + args.source_input, + title=args.title, + limit=args.limit, + project=args.project, + json_output=args.json, + ) + if args.command == "capture-inbox": + return capture_inbox( + Path(args.target), + limit=args.limit, + project=args.project, + json_output=args.json, + ) + if args.command == "accept-capture": + return accept_capture( + Path(args.target), + args.capture, + index=args.index, + title=args.title, + memory_type=args.memory_type, + scope=args.scope, + tags=args.tags, + project=args.project, + allow_duplicate=args.allow_duplicate, + allow_conflict=args.allow_conflict, + json_output=args.json, + ) + if args.command == "redact-capture": + return redact_capture( + Path(args.target), + args.capture, + replacement=args.replacement, + json_output=args.json, + ) + if args.command == "delete-capture": + return delete_capture( + Path(args.target), + args.capture, + confirm=args.confirm, + json_output=args.json, + ) + if args.command == "update-memory": + return update_memory( + Path(args.target), + args.identifier, + args.text, + source=args.source, + allow_conflict=args.allow_conflict, + project=args.project, + json_output=args.json, + ) + if args.command == "recall": + return recall( + Path(args.target), + args.query, + limit=args.limit, + json_output=args.json, + include_archived=args.include_archived, + project=args.project, + ) + if args.command in {"query", "query-link"}: + return query( + Path(args.target), + args.query, + budget=args.budget, + project=args.project, + json_output=args.json, + ) + if args.command == "graph-summary": + return graph_summary( + Path(args.target), + topic=args.topic, + limit=args.limit, + depth=args.depth, + max_edges=args.max_edges, + json_output=args.json, + ) + if args.command == "benchmark": + return benchmark( + Path(args.target), + query_text=args.query, + budget=args.budget, + project=args.project, + json_output=args.json, + ) + if args.command == "brief": + return brief(Path(args.target), query=args.query, limit=args.limit, project=args.project, json_output=args.json) + if args.command == "profile": + return profile(Path(args.target), limit=args.limit, project=args.project, json_output=args.json) + if args.command == "memory-audit": + return memory_audit(Path(args.target), limit=args.limit, project=args.project, json_output=args.json) + if args.command == "archive-memory": + return archive_memory(Path(args.target), args.identifier, reason=args.reason, json_output=args.json) + if args.command == "restore-memory": + return restore_memory(Path(args.target), args.identifier, json_output=args.json) + if args.command == "forget-memory": + return forget_memory(Path(args.target), args.identifier, confirm=args.confirm, json_output=args.json) + if args.command == "memory-inbox": + return memory_inbox( + Path(args.target), + limit=args.limit, + include_archived=args.include_archived, + project=args.project, + json_output=args.json, + ) + if args.command == "review-memory": + return review_memory(Path(args.target), args.identifier, note=args.note, json_output=args.json) + if args.command == "explain-memory": + return explain_memory(Path(args.target), args.identifier, json_output=args.json) + if args.command == "rebuild-index": + return rebuild_index(Path(args.target)) if args.command == "rebuild-backlinks": return rebuild_backlinks(Path(args.target)) if args.command == "verify-mcp": diff --git a/mcp_package/README.md b/mcp_package/README.md index 0df0a79..78a87c7 100644 --- a/mcp_package/README.md +++ b/mcp_package/README.md @@ -2,93 +2,152 @@ -MCP server for the [Link](https://github.com/gowtham0992/link) personal knowledge wiki. Exposes your wiki as MCP tools — search, query context, and traverse the knowledge graph without reading files directly. +MCP server for [Link](https://github.com/gowtham0992/link), local personal memory for agents. Exposes memories and wiki context as MCP tools so agents can recall preferences, decisions, project context, sources, and graph neighborhoods without reading files directly. Listed on the [official MCP Registry](https://registry.modelcontextprotocol.io) as `io.github.gowtham0992/link`. Release notes: [CHANGELOG.md](https://github.com/gowtham0992/link/blob/main/CHANGELOG.md) -## Install +## What You Need -```bash -python3 -m pip install --upgrade link-mcp -``` +`link-mcp` is the MCP server. It needs a Link wiki to read from. The normal +wiki location is `~/link/wiki`, created by the main Link installers. -If macOS/Homebrew Python reports `externally-managed-environment`, install into a dedicated venv: +Recommended setup: ```bash -python3 -m venv ~/.link-mcp-venv -~/.link-mcp-venv/bin/python -m pip install --upgrade pip link-mcp +git clone https://github.com/gowtham0992/link.git +bash link/integrations/codex/install.sh # or claude-code, cursor, kiro, vscode ``` -Then use the venv Python in your MCP config: +The installer scaffolds `~/link/`, installs or upgrades `link-mcp`, writes agent +instructions, and prints the exact MCP config for your machine. -```json -{ - "mcpServers": { - "link": { - "command": "/Users/YOU/.link-mcp-venv/bin/python", - "args": ["-m", "link_mcp", "--wiki", "/Users/YOU/link/wiki"] - } - } -} +After install, ask your agent: + +```text +is Link ready? +brief me from Link before we continue +query Link for what you know about this project ``` -Replace `/Users/YOU` with your absolute home path. +## MCP-Only Install -## Quick setup (Kiro) +Use this when you already have a Link wiki and only need the MCP package. ```bash -git clone https://github.com/gowtham0992/link.git -bash link/integrations/kiro/install.sh +python3 -m pip install --upgrade link-mcp ``` -This installs `link-mcp`, scaffolds `~/link/`, and registers the MCP server in `~/.kiro/settings/mcp.json` automatically. +If macOS/Homebrew Python reports `externally-managed-environment`, use a +dedicated venv: -## Manual setup (any MCP client) - -1. Scaffold your wiki: ```bash -git clone https://github.com/gowtham0992/link.git -bash link/integrations/kiro/install.sh # or claude-code, cursor, codex +python3 -m venv ~/.link-mcp-venv +~/.link-mcp-venv/bin/python -m pip install --upgrade pip link-mcp ``` -2. Add to your MCP client config: +Then add the server to your MCP client config. Use an absolute wiki path: + ```json { "mcpServers": { "link": { "command": "python3", - "args": ["-m", "link_mcp"] + "args": ["-m", "link_mcp", "--wiki", "/Users/YOU/link/wiki"] } } } ``` -Custom wiki path: +If you installed into the venv, use the venv Python: + ```json { "mcpServers": { "link": { - "command": "python3", - "args": ["-m", "link_mcp", "--wiki", "~/my-wiki/wiki"] + "command": "/Users/YOU/.link-mcp-venv/bin/python", + "args": ["-m", "link_mcp", "--wiki", "/Users/YOU/link/wiki"] } } } ``` +Replace `/Users/YOU` with your absolute home path. The default wiki is +`~/link/wiki/`; override with `--wiki /path/to/wiki`. + +## Agent Workflow + +Most agents should call: + +1. `link_status(include_validation=true)` when connecting or troubleshooting. +2. `starter_prompts()` when the user asks what to try after install. +3. `memory_brief(query="")` before personalized or project work. +4. `query_link(query="", budget="small")` for compact answer-ready context. +5. `ingest_status()` when the user drops files into `raw/`. +6. `validate_wiki(strict=true)` after ingest or large edits. + +Use `remember_memory` only when the user explicitly approves saving durable +memory. Use `propose_memories` or `capture_session` for proposal-only review. +For local CLI setup checks, `link verify-mcp --json` returns structured +`issues` and `next_actions` that agents and scripts can consume without parsing +terminal text. +In the local web proposal picker, unreadable raw files are surfaced as +`Fix access` instead of being loaded as empty proposal text. + +## Privacy and Scale + +- Local-first: `link-mcp` reads the wiki path you configure and does not call + external APIs, send telemetry, or require API keys. +- Bounded by default: `query_link`, `get_pages`, `get_backlinks`, and + `get_graph_summary` are designed for agent context budgets so large wikis do + not have to be dumped into a chat. +- Large-wiki search uses in-memory SQLite FTS when Python provides it, with a + token-index fallback when FTS is unavailable. +- Use `get_graph_summary` before `get_graph` unless the user explicitly needs a + full graph export. + ## Tools | Tool | Description | |------|-------------| +| `link_status(include_validation?)` | Readiness summary with package version, wiki path, content/page/memory counts, optional validation summary, warnings, and safe next actions. | +| `starter_prompts(project?)` | First-run natural agent prompts plus local readiness/check commands. | +| `migrate_wiki()` | Apply safe, idempotent wiki schema migrations when `link_status` reports a missing or old schema marker. | +| `ingest_status()` | Raw source ingest state with pending files, graph health, raw safety/access diagnostics, the next agent prompt, guided plan, and follow-up checks. | +| `query_link(query, budget?, project?)` | Build a compact answer-ready packet from local memory, ranked wiki search, graph-neighborhood context, provenance, budget reports with estimated packet size, and follow-up actions. | +| `validate_wiki(strict?)` | Validate agent-generated wiki pages after ingest or large edits: frontmatter, type/directory alignment, required sections, dead links, and backlink freshness. | +| `backup_wiki(label?, include_raw?, list_only?)` | Create or list local `.link-backups/` archives before broad repairs or risky wiki edits; raw sources are excluded by default. | +| `memory_brief(query?, limit?, project?)` | Prime the agent before answering or coding with profile counts, relevant memories, review warnings, and safe memory rules. | +| `memory_audit(limit?, project?)` | Read-only health report for memory review backlog, saved raw captures, risk factors, and next actions. | +| `memory_profile(limit?, project?)` | Summarize what Link remembers by type, scope, status, recency, preferences, decisions, and project context. | +| `memory_inbox(limit?, include_archived?)` | List memories that need user review, cleanup, or stronger metadata with primary actions and tool-call hints. | +| `review_memory(identifier, note?)` | Mark a confirmed memory as reviewed. | +| `explain_memory(identifier)` | Explain provenance, lifecycle, graph links, review issues, and recall readiness for one memory. | +| `recall_memory(query, limit?, include_archived?, project?)` | Search durable local memories for preferences, decisions, and project context. | +| `remember_memory(memory, title?, memory_type?, scope?, tags?, source?, allow_duplicate?, allow_conflict?, project?)` | Save an explicit user-approved local memory under `wiki/memories/`; strong duplicates and likely conflicts require explicit override. | +| `propose_memories(text, source?, limit?, project?)` | Propose durable memories from chat/session notes without writing them. | +| `capture_session(text, title?, source?, limit?, project?)` | Save long chat/session notes under `raw/memory-captures/` and return proposal-only memory candidates plus secret-looking content warnings. | +| `capture_inbox(limit?, project?)` | Review saved raw captures with redacted snippets, secret-warning labels, and accept/redact/delete commands. | +| `accept_capture(capture, index?, title?, memory_type?, scope?, tags?, project?, allow_duplicate?, allow_conflict?)` | Accept one proposal from a saved raw capture using duplicate/conflict-safe memory writes. | +| `redact_capture(capture, replacement?)` | Redact secret-looking values from a saved raw capture after user approval. | +| `delete_capture(capture, confirm?)` | Delete a saved raw capture after explicit confirmation. | +| `update_memory(identifier, memory, source?, allow_conflict?, project?)` | Merge new information into an existing memory, blocking likely conflicts with other active memories by default. | +| `archive_memory(identifier, reason?)` | Archive stale or wrong memory without deleting the Markdown page. | +| `restore_memory(identifier)` | Restore archived memory to active status. | +| `forget_memory(identifier, confirm?)` | Permanently delete a memory only after explicit user confirmation; prefer archive for reversible cleanup. | | `search_wiki(query, limit?)` | Ranked search — title (20pts), alias (8pts), tag (5pts), fulltext (2pts). Returns scores + snippets. | | `get_context(topic)` | **Primary tool.** Best matching page (full content) + inbound/forward graph links in one call. | -| `get_pages(category?, type?, maturity?)` | All pages with metadata. Filter by category, type, or maturity. | -| `get_backlinks(page_name)` | Inbound + forward links for a page. | -| `get_graph()` | All nodes + edges for graph reasoning. | +| `get_pages(category?, type?, maturity?, limit?, offset?, include_all?)` | Bounded page metadata list with filters and follow-up pagination actions; set `include_all=true` only for explicit full metadata export. | +| `get_backlinks(page_name, limit?, offset?, include_all?)` | Bounded inbound + forward links for a page, with total counts and follow-up pagination actions. | +| `get_graph_summary(topic?, limit?, depth?, max_edges?)` | Bounded graph overview or topic neighborhood for large wikis and agent context budgets. | +| `get_graph()` | Full graph export with all nodes + edges; prefer `get_graph_summary` first on large wikis. | +| `rebuild_index()` | Regenerate `wiki/index.md` from current pages so the human-readable catalog stays complete. | | `rebuild_backlinks()` | Rebuild `_backlinks.json` after ingest or lint. | -**Use `get_context` for answering questions** — one call returns the primary page plus all related pages via graph traversal. Eliminates the token waste of reading index.md every session. +Use `link_status` when connecting to Link or troubleshooting setup; if the user asks what to try after install, call `starter_prompts`. If status reports a missing or old schema marker, call `migrate_wiki` before other writes. Use `ingest_status` when the user drops files into `raw/` or asks what still needs ingest; if it returns `blocked_secrets`, `blocked_raw_access`, `blocked_source_access`, scan warnings, or secret warnings, do not read or ingest flagged raw files until the user redacts them, fixes local file access, or repairs unreadable source pages. Start with `query_link` for substantive questions that may need both local memory and wiki context. Use each item provenance to explain why Link knows something; if `budget_report` says context was truncated, use the returned `follow_up` action before scanning files manually. Use `memory_brief`, passing the user's task as `query` when available, at session start or before personalized/project work. Pass `project` for repo-specific work so Link returns broad user/global memory plus that project's memory, while keeping other explicit projects out of recall and duplicate/conflict checks. After ingesting sources or substantially editing wiki pages, call `rebuild_index`, `rebuild_backlinks`, then `validate_wiki`, before saying the wiki is updated. Use `backup_wiki` before broad repairs or risky local wiki edits; raw sources are excluded unless the user explicitly asks to include them. Use `memory_profile` to inspect the user/project memory shape, `memory_audit` to see review/capture risks, `memory_inbox` to find memories needing human review and the primary action for each item, `explain_memory` to audit why a memory exists, then `recall_memory` for focused preferences, decisions, and project context. Use `capture_session` for long chat/session notes that should be preserved locally before approval; use `propose_memories` when no raw capture is needed. Both return candidates only. Use `capture_inbox` to review saved captures before accepting, redacting, or deleting them. If `capture_session` reports secret warnings, ask before calling `redact_capture`. Use `accept_capture` only after the user approves one captured proposal. Use `delete_capture` only after explicit user confirmation. If `remember_memory` or `accept_capture` returns duplicate candidates, use `update_memory` on the existing memory unless the user confirms a separate memory. If it returns conflict candidates, ask the user whether to update or archive the older memory before forcing a conflict. Use `archive_memory`, not deletion, when a memory is stale or wrong. Use `forget_memory` only when the user explicitly asks for permanent deletion. Use `get_context` when you need the full primary source page after `query_link` shows it is relevant. Use `get_graph_summary` before `get_graph` when the wiki may be large or the agent only needs graph orientation. +Web approval APIs keep the safe path only: duplicate/conflict overrides should +go through CLI or MCP after explicit human review. ## Wiki location diff --git a/mcp_package/link_core/__init__.py b/mcp_package/link_core/__init__.py new file mode 100644 index 0000000..8c6c27a --- /dev/null +++ b/mcp_package/link_core/__init__.py @@ -0,0 +1 @@ +"""Shared Link runtime helpers.""" diff --git a/mcp_package/link_core/backup.py b/mcp_package/link_core/backup.py new file mode 100644 index 0000000..dbc924b --- /dev/null +++ b/mcp_package/link_core/backup.py @@ -0,0 +1,168 @@ +"""Local backup helpers for Link wiki data.""" +from __future__ import annotations + +import re +import tarfile +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +BACKUP_DIR_NAME = ".link-backups" +DEFAULT_BACKUP_LIMIT = 20 + + +class BackupError(RuntimeError): + """Raised when a backup archive cannot be completed safely.""" + + +def _utc_timestamp() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _filename_timestamp() -> str: + return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + + +def _safe_label(label: str) -> str: + slug = re.sub(r"[^a-z0-9]+", "-", str(label).lower()).strip("-") + return slug or "manual" + + +def _iter_files(root: Path) -> list[Path]: + if not root.exists(): + return [] + return sorted( + path + for path in root.rglob("*") + if path.is_file() and not path.is_symlink() + ) + + +def _unique_backup_path(backup_dir: Path, label: str) -> Path: + stem = f"{_filename_timestamp()}-{_safe_label(label)}" + candidate = backup_dir / f"{stem}.tar.gz" + suffix = 2 + while candidate.exists(): + candidate = backup_dir / f"{stem}-{suffix}.tar.gz" + suffix += 1 + return candidate + + +def _prune_backups(backup_dir: Path, limit: int) -> list[str]: + if limit <= 0: + return [] + backups = sorted( + backup_dir.glob("*.tar.gz"), + key=lambda path: (path.stat().st_mtime, path.name), + reverse=True, + ) + pruned: list[str] = [] + for path in backups[limit:]: + try: + path.unlink() + pruned.append(path.name) + except OSError: + continue + return pruned + + +def create_backup( + link_root: Path, + *, + label: str = "manual", + include_raw: bool = False, + max_backups: int = DEFAULT_BACKUP_LIMIT, +) -> dict[str, Any]: + """Create a timestamped local backup archive for a Link root. + + The default archive includes only ``wiki/`` because ``raw/`` can contain + pasted source files or session captures with sensitive material. Callers + must opt in with ``include_raw=True`` when they intentionally want raw + sources copied into the local backup. + """ + root = link_root.expanduser().resolve() + wiki_dir = root / "wiki" + if not wiki_dir.exists() or not wiki_dir.is_dir(): + raise FileNotFoundError(f"Link wiki not found at {wiki_dir}") + + backup_dir = root / BACKUP_DIR_NAME + backup_dir.mkdir(parents=True, exist_ok=True) + backup_path = _unique_backup_path(backup_dir, label) + + included_roots: list[tuple[str, Path]] = [("wiki", wiki_dir)] + raw_dir = root / "raw" + if include_raw and raw_dir.exists() and raw_dir.is_dir(): + included_roots.append(("raw", raw_dir)) + + file_count = 0 + current_arcname = "" + try: + with tarfile.open(backup_path, "w:gz") as tar: + for prefix, source_root in included_roots: + for path in _iter_files(source_root): + rel = path.relative_to(source_root) + current_arcname = (Path(prefix) / rel).as_posix() + tar.add(path, arcname=current_arcname, recursive=False) + file_count += 1 + except (OSError, tarfile.TarError) as exc: + try: + backup_path.unlink(missing_ok=True) + except OSError: + pass + detail = f" while adding {current_arcname}" if current_arcname else "" + raise BackupError(f"backup failed{detail}: {exc}") from exc + + pruned = _prune_backups(backup_dir, int(max_backups)) + return { + "created": True, + "path": str(backup_path), + "name": backup_path.name, + "created_at": _utc_timestamp(), + "included": [name for name, _ in included_roots], + "include_raw": include_raw, + "file_count": file_count, + "bytes": backup_path.stat().st_size, + "retention_limit": int(max_backups), + "pruned": pruned, + "privacy_note": "raw/ is excluded by default because it may contain sensitive source material", + } + + +def list_backups(link_root: Path, *, limit: int = 20) -> dict[str, Any]: + """Return recent local backups for a Link root.""" + root = link_root.expanduser().resolve() + backup_dir = root / BACKUP_DIR_NAME + backups: list[dict[str, Any]] = [] + warnings: list[dict[str, str]] = [] + if backup_dir.exists(): + archive_stats: list[tuple[Path, Any]] = [] + for path in backup_dir.glob("*.tar.gz"): + try: + archive_stats.append((path, path.stat())) + except OSError as exc: + warnings.append({"backup": path.name, "error": str(exc) or exc.__class__.__name__}) + for path, stat in sorted( + archive_stats, + key=lambda item: (item[1].st_mtime, item[0].name), + reverse=True, + )[: max(int(limit), 0)]: + created_at = ( + datetime.fromtimestamp(stat.st_mtime, timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + backups.append({ + "name": path.name, + "path": str(path), + "bytes": stat.st_size, + "created_at": created_at, + }) + return { + "backup_dir": str(backup_dir), + "count": len(backups), + "warning_count": len(warnings), + "warnings": warnings, + "backups": backups, + } diff --git a/mcp_package/link_core/benchmark.py b/mcp_package/link_core/benchmark.py new file mode 100644 index 0000000..755cbc2 --- /dev/null +++ b/mcp_package/link_core/benchmark.py @@ -0,0 +1,58 @@ +"""Shared benchmark health helpers for Link.""" +from __future__ import annotations + +from typing import Mapping + + +BENCHMARK_THRESHOLDS_SECONDS = { + "cache": 5.0, + "search": 1.0, + "query": 3.0, + "graph_summary": 1.0, + "page_list": 0.5, + "graph_initial": 1.0, + "graph": 2.0, +} + + +def benchmark_health(payload: Mapping[str, object]) -> dict[str, object]: + """Return a compact interactive-readiness verdict for benchmark output.""" + timings = payload.get("timings") + if not isinstance(timings, Mapping): + timings = {} + warnings: list[str] = [] + slow_paths: list[str] = [] + for label, ceiling in BENCHMARK_THRESHOLDS_SECONDS.items(): + elapsed = timings.get(label) + if isinstance(elapsed, (int, float)) and elapsed > ceiling: + warnings.append(f"{label} took {elapsed:.4f}s, above the {ceiling:.1f}s interactive target") + slow_paths.append(label) + large_token_fallback = int(payload.get("pages") or 0) >= 1000 and payload.get("search_backend") != "sqlite-fts" + if large_token_fallback: + warnings.append("large wiki is using token-index fallback; SQLite FTS would improve search headroom") + if warnings: + summary = "Review recommended before relying on this wiki for interactive agent work." + recommendations = [ + "Run link doctor --fix and link benchmark again after repairing wiki/index state.", + ] + if large_token_fallback or "search" in slow_paths or "query" in slow_paths: + recommendations.append("Use a Python build with sqlite3/FTS5 enabled for large local wikis.") + if "cache" in slow_paths: + recommendations.append("Inspect unusually large pages or raw-source references; cache time is dominated by local file reads.") + if "graph_initial" in slow_paths or "graph" in slow_paths: + recommendations.append("Use graph-summary, search, and focused neighborhoods instead of loading the full graph first.") + if "page_list" in slow_paths: + recommendations.append("Use bounded page-list pagination instead of asking an agent to enumerate every page.") + if not any(path in slow_paths for path in ("cache", "search", "query", "graph_summary", "page_list", "graph_initial", "graph")): + recommendations.append("Inspect unusually large pages or raw-source references if interaction still feels slow.") + else: + summary = "Ready for interactive local agent memory." + recommendations = [] + return { + "status": "warn" if warnings else "pass", + "label": "review" if warnings else "interactive", + "summary": summary, + "thresholds_seconds": BENCHMARK_THRESHOLDS_SECONDS, + "warnings": warnings, + "recommendations": recommendations, + } diff --git a/mcp_package/link_core/capture.py b/mcp_package/link_core/capture.py new file mode 100644 index 0000000..e21f9c6 --- /dev/null +++ b/mcp_package/link_core/capture.py @@ -0,0 +1,209 @@ +"""Shared raw capture helpers for Link CLI and MCP runtimes.""" +from __future__ import annotations + +import re +from pathlib import Path +from typing import Callable + +from .frontmatter import parse_frontmatter +from .memory import normalize_project, slugify +from .security import redact_secret_values, secret_value_warnings + + +CaptureCommands = Callable[[str], dict[str, str]] + + +def capture_title( + text: str, + source: str = "", + title: str | None = None, + *, + default_source: str = "inline", + path_source: bool = False, + max_source_len: int = 120, +) -> str: + """Build a stable human-readable title for saved raw memory captures.""" + if title and title.strip(): + return " ".join(title.split()) + + source_value = " ".join(str(source or "").split()) + if source_value and source_value != default_source: + if path_source: + stem = Path(source_value).stem.replace("-", " ").replace("_", " ").strip() + if stem: + return f"Memory capture: {stem.title()}" + else: + return f"Memory capture: {source_value[:max_source_len]}" + + first_line = next((line.strip() for line in text.splitlines() if line.strip()), "Session notes") + short = " ".join(first_line.split()[:10]).strip(" .") + return f"Memory capture: {short or 'Session notes'}" + + +def capture_filename(timestamp: str, title: str, raw_dir: Path) -> Path: + """Return a unique capture path under raw_dir for the given timestamp/title.""" + safe_stamp = str(timestamp).replace("-", "").replace(":", "") + title_slug = slugify(title.replace("Memory capture:", ""), fallback="session-notes") + base = f"{safe_stamp}-{title_slug}" + candidate = raw_dir / f"{base}.md" + counter = 2 + while candidate.exists(): + candidate = raw_dir / f"{base}-{counter}.md" + counter += 1 + return candidate + + +def resolve_capture_file(root: Path, capture: str, *, max_len: int | None = None) -> Path | None: + """Resolve a user-provided raw capture path without escaping the Link root.""" + raw = str(capture or "").strip() + if max_len is not None: + raw = raw[:max_len] + if not raw: + return None + + root = root.expanduser().resolve() + raw_path = Path(raw).expanduser() + candidates = [raw_path] + if not raw_path.is_absolute(): + candidates.extend([ + root / raw, + root / "raw" / "memory-captures" / raw, + root / "raw" / "memory-captures" / f"{raw}.md", + ]) + + for candidate in candidates: + try: + resolved = candidate.resolve() + except OSError: + continue + if not resolved.is_file(): + continue + try: + resolved.relative_to(root) + except ValueError: + continue + return resolved + return None + + +def capture_notes_from_markdown(text: str) -> tuple[dict[str, object], str]: + """Return capture frontmatter and the `## Notes` body when present.""" + meta, body = parse_frontmatter(text) + match = re.search(r"^## Notes\s*(.*?)(?=^## |\Z)", body, flags=re.MULTILINE | re.DOTALL) + notes = match.group(1).strip() if match else body.strip() + return meta, notes + + +def cli_capture_commands(rel_path: str) -> dict[str, str]: + return { + "accept": f'python3 link.py accept-capture "{rel_path}" . --index 1', + "redact": f'python3 link.py redact-capture "{rel_path}" .', + "delete": f'python3 link.py delete-capture "{rel_path}" . --confirm', + } + + +def mcp_capture_commands(rel_path: str) -> dict[str, str]: + return { + "accept": f'accept_capture(capture="{rel_path}", index=1)', + "redact": f'redact_capture(capture="{rel_path}")', + "delete": f'delete_capture(capture="{rel_path}", confirm=true)', + } + + +def capture_records( + root: Path, + limit: int = 20, + project: str | None = None, + commands_for: CaptureCommands | None = None, + *, + read_warnings: list[dict[str, str]] | None = None, +) -> list[dict[str, object]]: + root = root.expanduser().resolve() + capture_dir = root / "raw" / "memory-captures" + if not capture_dir.exists(): + return [] + project_name = normalize_project(project) + command_builder = commands_for or cli_capture_commands + records: list[dict[str, object]] = [] + for path in sorted(capture_dir.rglob("*.md")): + if path.name.startswith("."): + continue + rel = path.relative_to(root).as_posix() + try: + text = path.read_text(encoding="utf-8", errors="replace") + stat = path.stat() + except OSError as exc: + if read_warnings is not None: + read_warnings.append({ + "capture": rel, + "error": str(exc) or exc.__class__.__name__, + }) + continue + meta, notes = capture_notes_from_markdown(text) + capture_project = normalize_project(str(meta.get("project") or "")) + if project_name and capture_project and capture_project != project_name: + continue + warnings = secret_value_warnings(text) + safe_notes, _, _ = redact_secret_values(notes) + records.append({ + "path": rel, + "title": str(meta.get("title") or path.stem), + "project": capture_project, + "date_captured": str(meta.get("date_captured") or ""), + "size_bytes": stat.st_size, + "secret_warnings": warnings, + "warning_count": len(warnings), + "snippet": re.sub(r"\s+", " ", safe_notes).strip()[:180], + "commands": command_builder(rel), + }) + records.sort(key=lambda item: (str(item["date_captured"]), str(item["path"])), reverse=True) + return records[:max(1, min(limit, 50))] + + +def capture_inbox( + root: Path, + limit: int = 20, + project: str | None = None, + commands_for: CaptureCommands | None = None, +) -> dict[str, object]: + project_name = normalize_project(project) + read_warnings: list[dict[str, str]] = [] + captures = capture_records( + root, + limit=limit, + project=project_name, + commands_for=commands_for, + read_warnings=read_warnings, + ) + return { + "count": len(captures), + "warning_count": sum(1 for capture in captures if capture["warning_count"]), + "read_warning_count": len(read_warnings), + "read_warnings": read_warnings, + "project": project_name, + "captures": captures, + } + + +def capture_review_summary( + root: Path, + limit: int = 3, + project: str | None = None, + commands_for: CaptureCommands | None = None, +) -> dict[str, object]: + """Return compact capture backlog context for briefs, audits, and dashboards.""" + payload = capture_inbox( + root, + limit=50, + project=project, + commands_for=commands_for, + ) + captures = payload["captures"] if isinstance(payload.get("captures"), list) else [] + return { + "count": len(captures), + "warning_count": int(payload.get("warning_count") or 0), + "read_warning_count": int(payload.get("read_warning_count") or 0), + "read_warnings": payload.get("read_warnings") if isinstance(payload.get("read_warnings"), list) else [], + "project": str(payload.get("project") or ""), + "items": captures[:max(1, min(limit, 10))], + } diff --git a/mcp_package/link_core/demo.py b/mcp_package/link_core/demo.py new file mode 100644 index 0000000..2669a19 --- /dev/null +++ b/mcp_package/link_core/demo.py @@ -0,0 +1,572 @@ +"""Bundled first-run demo wiki content for Link.""" +from __future__ import annotations + + +DEMO_MARKER = ".link-demo" +DEMO_FILES: dict[str, str] = { + "START_HERE.md": """# Link Demo: Start Here + +This demo is already ingested. It shows the full loop: source notes, wiki pages, +agent memory, backlinks, graph context, and a compact query packet. + +## Try These Agent Prompts + +```text +is Link ready? +query Link for why Link helps agents +brief me from Link before we continue +what does Link remember about local personal memory? +explain why Link remembers local personal memory +``` + +## Try These CLI Checks + +```bash +python3 link.py query "why does Link help agents?" . --budget small +python3 link.py brief "working on agent memory" . +python3 link.py memory-audit . +python3 link.py status --validate . +``` + +## What To Look For + +- The query packet includes both memory and source-backed wiki context. +- The packet is budget-limited, so agents do not need to read the whole wiki. +- The memory entry is inspectable under `wiki/memories/`. +- The graph view shows how sources, concepts, memories, and explorations connect. + +Open the local viewer: + +```bash +python3 link.py serve . +``` + +Then visit `http://127.0.0.1:3000`, `http://127.0.0.1:3000/brief`, and +`http://127.0.0.1:3000/graph`. +""", + "raw/agent-memory-session.md": """--- +title: "Agent memory session" +source_type: demo-note +date_captured: 2026-05-02 +author: Link demo +tags: [agents, memory, local-first] +--- + +# Agent memory session + +An AI coding agent keeps losing project context between sessions. The team wants durable memory that is local, inspectable, and easy to cite. + +Key decisions: + +- Keep raw source notes immutable. +- Compile sources into durable wiki pages. +- Use [[agent-memory]] as the interface between past work and future agents. +- Prefer [[local-first-software]] so the knowledge base stays under user control. +- Expose context through MCP so agents can retrieve graph neighborhoods instead of reading every file. +""", + "raw/transformer-reading-notes.md": """--- +title: "Transformer reading notes" +source_type: demo-note +date_captured: 2026-05-02 +author: Link demo +tags: [ai, transformers, retrieval] +--- + +# Transformer reading notes + +Transformers made long-context sequence modeling practical by replacing recurrence with attention. Modern LLM systems often pair transformer models with external retrieval. + +Connections: + +- [[transformers]] provide the model architecture. +- [[retrieval-augmented-generation]] provides fresh or private context. +- [[agent-memory]] gives agents persistent project knowledge outside a single chat. +""", + "raw/local-release-notes.md": """--- +title: "Local release notes" +source_type: demo-note +date_captured: 2026-05-02 +author: Link demo +tags: [release, graph, mcp] +--- + +# Local release notes + +The product team ships a local wiki viewer, MCP server, and graph view. The release focuses on making agent memory visible and auditable. + +Notable changes: + +- [[link]] exposes search, context, backlinks, and graph tools. +- [[knowledge-graph]] shows concepts, sources, and entities as connected pages. +- [[local-first-software]] keeps the source material on disk. +""", + "wiki/sources/agent-memory-session.md": """--- +type: source +title: "Agent memory session" +author: "Link demo" +date_published: "2026-05-02" +date_ingested: "2026-05-02" +source_url: "local demo note" +tags: [agents, memory, local-first] +confidence: high +aliases: ["memory demo note"] +--- + +# Agent memory session + +> **TLDR:** A demo note about turning local project sources into durable context for future agents. + +## Summary + +The source describes an AI coding workflow where an agent repeatedly loses project context between sessions. It proposes raw source notes, compiled wiki pages, and MCP retrieval as a durable memory layer. *Source: [[agent-memory-session]]* `[confidence: high]` + +The note emphasizes local control and inspectability. Raw sources stay immutable, while generated wiki pages become the maintained knowledge layer. *Source: [[agent-memory-session]]* `[confidence: high]` + +## Key Claims + +- **Agent memory should be durable** so future sessions can recover project context. `[confidence: high]` +- **Raw notes should remain immutable** while wiki pages evolve. `[confidence: high]` +- **MCP makes memory agent-readable** through structured tools instead of ad hoc file scans. `[confidence: high]` + +## Connections + +- Defines a need for [[agent-memory]]. +- Supports [[local-first-software]] as the storage model. +- Connects [[link]] to agent workflows through MCP. + +## Raw Source + +`raw/agent-memory-session.md` +""", + "wiki/sources/transformer-reading-notes.md": """--- +type: source +title: "Transformer reading notes" +author: "Link demo" +date_published: "2026-05-02" +date_ingested: "2026-05-02" +source_url: "local demo note" +tags: [ai, transformers, retrieval] +confidence: high +aliases: ["transformer demo note"] +--- + +# Transformer reading notes + +> **TLDR:** A demo note linking transformers, retrieval, and persistent agent memory. + +## Summary + +The source frames [[transformers]] as the architecture behind modern LLM systems and connects them to external retrieval. It treats retrieval and memory as practical complements to model context. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +The note links [[retrieval-augmented-generation]] to [[agent-memory]] because both bring outside context into model workflows. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## Key Claims + +- **Transformers replaced recurrence with attention** for sequence modeling. `[confidence: high]` +- **External retrieval complements LLM context** when information is fresh, private, or project-specific. `[confidence: high]` +- **Persistent agent memory stores knowledge outside one chat session.** `[confidence: high]` + +## Connections + +- Explains why [[transformers]] matter to LLM systems. +- Connects [[retrieval-augmented-generation]] to persistent context. +- Supports [[agent-memory]] as a local retrieval layer. + +## Raw Source + +`raw/transformer-reading-notes.md` +""", + "wiki/sources/local-release-notes.md": """--- +type: source +title: "Local release notes" +author: "Link demo" +date_published: "2026-05-02" +date_ingested: "2026-05-02" +source_url: "local demo note" +tags: [release, graph, mcp] +confidence: high +aliases: ["demo release note"] +--- + +# Local release notes + +> **TLDR:** A demo release note showing Link as a local wiki viewer, graph, and MCP memory server. + +## Summary + +The source describes a release centered on making agent memory visible and auditable. It identifies a local wiki viewer, MCP server, and graph view as the main product surfaces. *Source: [[local-release-notes]]* `[confidence: high]` + +The note connects [[link]] with [[knowledge-graph]] and [[local-first-software]], showing how local markdown can become both a human-readable wiki and agent-readable memory. *Source: [[local-release-notes]]* `[confidence: high]` + +## Key Claims + +- **Link exposes search, context, backlinks, and graph tools.** `[confidence: high]` +- **Graph views make relationships inspectable.** `[confidence: high]` +- **Local-first storage keeps source material under user control.** `[confidence: high]` + +## Connections + +- Describes [[link]] product surfaces. +- Connects [[knowledge-graph]] to visible agent memory. +- Supports [[local-first-software]] as the privacy model. + +## Raw Source + +`raw/local-release-notes.md` +""", + "wiki/concepts/agent-memory.md": """--- +type: concept +title: "Agent memory" +aliases: ["AI memory", "agent context", "durable context"] +date_created: "2026-05-02" +date_updated: "2026-05-02" +source_count: 2 +tags: [agents, memory, mcp] +maturity: growing +--- + +# Agent memory + +> **TLDR:** Agent memory is durable, inspectable context that lets AI agents recover prior project knowledge across sessions. + +## Overview + +Agent memory addresses a common failure mode in AI workflows: each new session starts without the full project history. In Link, memory is stored as markdown wiki pages compiled from immutable raw sources. *Source: [[agent-memory-session]]* `[confidence: high]` + +This memory is useful because agents can query a focused topic and receive the primary page plus related graph context. That is more efficient than reading every source file. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## How It Works + +1. A user drops source material into `raw/`. +2. An agent compiles durable pages into `wiki/`. +3. Link builds search indexes and backlinks. +4. MCP tools return focused graph context to future agents. + +## Key Facts + +- **Agent memory should be durable** so future sessions can recover project context. *Source: [[agent-memory-session]]* `[confidence: high]` +- **MCP makes memory agent-readable** through structured tools. *Source: [[agent-memory-session]]* `[confidence: high]` +- **Persistent memory complements LLM context windows** by storing knowledge outside a single chat. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## Open Questions + +- Which memories should be promoted from raw notes into stable wiki pages? +- How should agents detect stale project decisions? + +## Related + +- [[link]] - provides the local wiki and MCP layer. +- [[retrieval-augmented-generation]] - retrieves external context for model workflows. +- [[local-first-software]] - keeps memory under user control. + +## Sources + +- [[agent-memory-session]] +- [[transformer-reading-notes]] +""", + "wiki/concepts/retrieval-augmented-generation.md": """--- +type: concept +title: "Retrieval-augmented generation" +aliases: ["RAG", "retrieval augmented generation"] +date_created: "2026-05-02" +date_updated: "2026-05-02" +source_count: 1 +tags: [ai, retrieval, context] +maturity: seed +--- + +# Retrieval-augmented generation + +> **TLDR:** Retrieval-augmented generation brings external context into model workflows before generation. + +## Overview + +Retrieval-augmented generation pairs a model with a retrieval layer. Instead of relying only on model weights or the current chat, a system fetches relevant external context first. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +In Link, the retrieval layer is a local markdown wiki exposed through search, context, backlinks, and graph tools. This makes [[agent-memory]] inspectable instead of hidden in a proprietary store. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## Key Facts + +- **External retrieval complements LLM context** when information is fresh, private, or project-specific. *Source: [[transformer-reading-notes]]* `[confidence: high]` +- **Persistent memory can be modeled as retrieval** over durable local pages. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## Related + +- [[agent-memory]] - a local memory use case for retrieval. +- [[transformers]] - the model architecture that often consumes retrieved context. +- [[link]] - provides the local retrieval surface. + +## Sources + +- [[transformer-reading-notes]] +""", + "wiki/concepts/transformers.md": """--- +type: concept +title: "Transformers" +aliases: ["transformer architecture", "LLM architecture"] +date_created: "2026-05-02" +date_updated: "2026-05-02" +source_count: 1 +tags: [ai, models, attention] +maturity: seed +--- + +# Transformers + +> **TLDR:** Transformers are neural architectures that use attention to model relationships across sequences. + +## Overview + +Transformers are presented in the demo source as the architecture behind many modern LLM systems. They made long-context sequence modeling practical by replacing recurrence with attention. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +The source connects transformers to [[retrieval-augmented-generation]] because modern LLM workflows often combine model context with retrieved project or domain knowledge. *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## Key Facts + +- **Transformers use attention for sequence modeling.** *Source: [[transformer-reading-notes]]* `[confidence: high]` +- **Transformer systems often benefit from retrieved context.** *Source: [[transformer-reading-notes]]* `[confidence: high]` + +## Related + +- [[retrieval-augmented-generation]] - supplies outside context to model workflows. +- [[agent-memory]] - stores project context for future sessions. + +## Sources + +- [[transformer-reading-notes]] +""", + "wiki/concepts/local-first-software.md": """--- +type: concept +title: "Local-first software" +aliases: ["local first", "local-first"] +date_created: "2026-05-02" +date_updated: "2026-05-02" +source_count: 2 +tags: [privacy, storage, software] +maturity: growing +--- + +# Local-first software + +> **TLDR:** Local-first software keeps user data on disk in formats the user can inspect, back up, and move. + +## Overview + +Local-first software is a product design choice where the user's data remains directly accessible on their machine. In the demo sources, this matters because [[agent-memory]] can contain project decisions and source notes. *Source: [[agent-memory-session]]* `[confidence: high]` + +Link follows this model by storing raw sources and wiki pages as markdown files. The graph and MCP server read those files rather than sending them to a hosted backend. *Source: [[local-release-notes]]* `[confidence: high]` + +## Key Facts + +- **Raw notes stay immutable** while generated wiki pages evolve. *Source: [[agent-memory-session]]* `[confidence: high]` +- **Local markdown keeps memory inspectable.** *Source: [[local-release-notes]]* `[confidence: high]` + +## Related + +- [[link]] - implements local-first agent memory. +- [[agent-memory]] - benefits from local, inspectable storage. +- [[knowledge-graph]] - visualizes local wiki relationships. + +## Sources + +- [[agent-memory-session]] +- [[local-release-notes]] +""", + "wiki/concepts/knowledge-graph.md": """--- +type: concept +title: "Knowledge graph" +aliases: ["graph view", "wiki graph"] +date_created: "2026-05-02" +date_updated: "2026-05-02" +source_count: 1 +tags: [graph, wiki, visualization] +maturity: seed +--- + +# Knowledge graph + +> **TLDR:** A knowledge graph shows wiki pages as nodes and wikilinks as relationships. + +## Overview + +In Link, the knowledge graph makes relationships between sources, concepts, and entities visible. This helps users inspect what an agent has connected and where a claim came from. *Source: [[local-release-notes]]* `[confidence: high]` + +The graph supports the same mental model as MCP context retrieval: a topic is not isolated, it lives in a neighborhood of related pages. *Source: [[local-release-notes]]* `[confidence: high]` + +## Key Facts + +- **Graph views make relationships inspectable.** *Source: [[local-release-notes]]* `[confidence: high]` +- **Wikilinks provide the graph edges.** *Source: [[local-release-notes]]* `[confidence: high]` + +## Related + +- [[link]] - renders the graph. +- [[agent-memory]] - uses graph context to recover related knowledge. +- [[local-first-software]] - keeps graph data in markdown files. + +## Sources + +- [[local-release-notes]] +""", + "wiki/entities/link.md": """--- +type: entity +title: "Link" +entity_type: project +aliases: ["Link wiki", "Link MCP"] +date_created: "2026-05-02" +date_updated: "2026-05-02" +tags: [wiki, mcp, agents, local-first] +source_count: 2 +maturity: growing +--- + +# Link + +> **TLDR:** Link is a local-first wiki and MCP server that turns source notes into durable memory for AI agents. + +## Overview + +Link stores source material in `raw/` and compiled wiki pages in `wiki/`. The web viewer makes the wiki readable by humans, while MCP tools make the same knowledge readable by agents. *Source: [[local-release-notes]]* `[confidence: high]` + +The demo positions Link as a local [[agent-memory]] layer. It keeps knowledge inspectable through markdown and navigable through a [[knowledge-graph]]. *Source: [[agent-memory-session]]* `[confidence: high]` + +## Key Contributions + +- Provides search, context, backlinks, and graph tools. *Source: [[local-release-notes]]* `[confidence: high]` +- Keeps source material local and inspectable. *Source: [[local-release-notes]]* `[confidence: high]` +- Gives future agents durable project context. *Source: [[agent-memory-session]]* `[confidence: high]` + +## Connections + +- Implements [[agent-memory]]. +- Uses [[local-first-software]] as the storage model. +- Exposes a [[knowledge-graph]] for human inspection. +- Supports [[retrieval-augmented-generation]] workflows through MCP. + +## Sources + +- [[agent-memory-session]] +- [[local-release-notes]] +""", + "wiki/memories/prefer-local-personal-memory.md": """--- +type: memory +title: "Prefer local personal memory" +memory_type: preference +scope: user +status: active +date_captured: "2026-05-04T00:00:00Z" +source: "demo" +review_status: pending +tags: [memory, agents, local-first] +aliases: ["local personal memory", "agent personal memory"] +--- + +# Prefer local personal memory + +> **TLDR:** The user wants Link to be local personal memory for agents, with the wiki as the inspectable storage format. + +## Memory + +The user wants [[link]] to feel like local personal memory for agents rather than only a wiki. Agents should remember user preferences, project context, decisions, and why those memories exist. + +## Use This When + +- Positioning Link in product copy or onboarding. +- Deciding whether a feature should prioritize [[agent-memory]] workflows over generic note management. +- Explaining why [[local-first-software]] matters for personal agent memory. + +## Source + +Captured as demo product intent for the first-run wiki. +""", + "wiki/explorations/why-link-helps-agents.md": """--- +type: exploration +title: "Why Link helps agents" +date_created: "2026-05-02" +query: "Why does Link help AI agents?" +aliases: ["agent memory demo answer"] +tags: [agents, memory, demo] +--- + +# Why Link helps agents + +> **Query:** Why does Link help AI agents? + +## Answer + +Link helps agents because it turns past project material into durable, queryable context. Instead of starting each session from a blank chat, an agent can ask for [[agent-memory]] and receive the main page plus related concepts, sources, and entities. + +The important part is inspectability. The memory is just markdown, the relationships are just wikilinks, and the graph shows what the agent can retrieve. This fits [[local-first-software]] and makes the memory easier to audit. + +## Reasoning + +The answer combines [[agent-memory-session]], [[transformer-reading-notes]], and [[local-release-notes]]. Together they show Link as a local retrieval layer for AI workflows: sources become pages, pages form a [[knowledge-graph]], and MCP exposes that graph to agents. + +## Sources Consulted + +- [[agent-memory]] +- [[link]] +- [[knowledge-graph]] +- [[retrieval-augmented-generation]] +""", + "wiki/index.md": """# Link Demo Wiki Index + +> Last updated: 2026-05-02 | 11 pages | 3 sources + +## Categories + +### concepts +- [[agent-memory]] - Durable, inspectable context for AI agents. growing - 2 sources - also: AI memory, agent context +- [[retrieval-augmented-generation]] - Retrieves external context before generation. seed - 1 source - also: RAG +- [[transformers]] - Attention-based model architecture behind modern LLM systems. seed - 1 source +- [[local-first-software]] - Keeps user data on disk in inspectable formats. growing - 2 sources +- [[knowledge-graph]] - Shows pages as nodes and wikilinks as edges. seed - 1 source + +### entities +- [[link]] - Local-first wiki and MCP memory server for agents. growing - 2 sources - also: Link MCP + +### memories +- [[prefer-local-personal-memory]] - User preference that Link should behave as local personal memory for agents. preference · user + +### sources +- [[agent-memory-session]] - Demo note on durable project context. high +- [[transformer-reading-notes]] - Demo note connecting transformers, retrieval, and memory. high +- [[local-release-notes]] - Demo note on Link surfaces and graph visibility. high + +### explorations +- [[why-link-helps-agents]] - Filed answer explaining Link as durable agent memory. + +## Recent + +| Date | Operation | Pages Touched | +|------|-----------|---------------| +| 2026-05-02 | demo: create first-run sample wiki | 11 pages | +""", + "wiki/log.md": """# Link Demo Wiki Log + +*Append-only record of demo wiki operations.* + +--- + +## [2026-05-02T00:00:00Z] demo | create first-run sample wiki + +- Source: raw/agent-memory-session.md +- Source: raw/transformer-reading-notes.md +- Source: raw/local-release-notes.md +- Created: sources/agent-memory-session.md +- Created: sources/transformer-reading-notes.md +- Created: sources/local-release-notes.md +- Created: concepts/agent-memory.md +- Created: concepts/retrieval-augmented-generation.md +- Created: concepts/transformers.md +- Created: concepts/local-first-software.md +- Created: concepts/knowledge-graph.md +- Created: entities/link.md +- Created: memories/prefer-local-personal-memory.md +- Created: explorations/why-link-helps-agents.md +- Rebuilt: wiki/_backlinks.json +- Pages touched: 11 + +--- +""", +} diff --git a/mcp_package/link_core/files.py b/mcp_package/link_core/files.py new file mode 100644 index 0000000..6b88d7a --- /dev/null +++ b/mcp_package/link_core/files.py @@ -0,0 +1,116 @@ +"""Small filesystem write helpers for Link's local Markdown store.""" +from __future__ import annotations + +import json +import os +import tempfile +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Any + + +def _fsync_directory(path: Path) -> None: + """Best-effort directory fsync so an atomic rename survives local crashes.""" + if os.name == "nt": + return + try: + fd = os.open(str(path), os.O_RDONLY) + except OSError: + return + try: + os.fsync(fd) + except OSError: + pass + finally: + os.close(fd) + + +@contextmanager +def _file_lock(path: Path, *, timeout: float = 10.0, stale_after: float = 120.0): + """Serialize local writes to one target file across Link runtimes.""" + target = path.expanduser() + target.parent.mkdir(parents=True, exist_ok=True) + lock_path = target.with_name(f".{target.name}.lock") + start = time.monotonic() + fd: int | None = None + while fd is None: + try: + fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY) + os.write(fd, str(os.getpid()).encode("utf-8")) + except FileExistsError: + try: + if time.time() - lock_path.stat().st_mtime >= stale_after: + os.unlink(lock_path) + continue + except OSError: + pass + if time.monotonic() - start >= timeout: + raise TimeoutError(f"timed out waiting for write lock: {lock_path}") + time.sleep(0.025) + try: + yield + finally: + if fd is not None: + os.close(fd) + try: + os.unlink(lock_path) + except OSError: + pass + + +def _atomic_write_bytes_unlocked(path: Path, data: bytes) -> None: + """Write bytes via temp file + os.replace to avoid partial target files.""" + target = path.expanduser() + target.parent.mkdir(parents=True, exist_ok=True) + tmp_name = "" + with tempfile.NamedTemporaryFile( + mode="wb", + dir=target.parent, + prefix=f".{target.name}.", + suffix=".tmp", + delete=False, + ) as handle: + tmp_name = handle.name + handle.write(data) + handle.flush() + os.fsync(handle.fileno()) + try: + os.replace(tmp_name, target) + _fsync_directory(target.parent) + except Exception: + try: + os.unlink(tmp_name) + except OSError: + pass + raise + + +def atomic_write_bytes(path: Path, data: bytes) -> None: + """Write bytes with a temp-file replace and a per-target local lock.""" + with _file_lock(path): + _atomic_write_bytes_unlocked(path, data) + + +def atomic_write_text(path: Path, text: str, *, encoding: str = "utf-8") -> None: + atomic_write_bytes(path, text.encode(encoding)) + + +def atomic_write_json(path: Path, payload: Any, *, indent: int = 2, trailing_newline: bool = True) -> None: + text = json.dumps(payload, indent=indent) + if trailing_newline: + text += "\n" + atomic_write_text(path, text) + + +def append_text(path: Path, text: str, *, encoding: str = "utf-8", initial_text: str = "") -> None: + """Append one complete text block under the same local lock as replacements.""" + target = path.expanduser() + target.parent.mkdir(parents=True, exist_ok=True) + with _file_lock(target): + with target.open("a", encoding=encoding) as handle: + if initial_text and target.stat().st_size == 0: + handle.write(initial_text) + handle.write(text) + handle.flush() + os.fsync(handle.fileno()) diff --git a/mcp_package/link_core/frontmatter.py b/mcp_package/link_core/frontmatter.py new file mode 100644 index 0000000..7f38f2f --- /dev/null +++ b/mcp_package/link_core/frontmatter.py @@ -0,0 +1,122 @@ +"""Shared frontmatter parsing and formatting helpers for Link.""" +from __future__ import annotations + +import csv +import re +from collections.abc import Iterable, Mapping + + +FRONTMATTER_RE = re.compile(r"\A---[ \t]*\r?\n(.*?)\r?\n---[ \t]*(?:\r?\n|\Z)", re.DOTALL) + + +def _unquote(value: str) -> str: + value = value.strip() + if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}: + return value[1:-1] + return value + + +def _csv_list(value: str) -> list[str]: + try: + row = next(csv.reader([value], skipinitialspace=True)) + except csv.Error: + row = value.split(",") + return [_unquote(item).strip() for item in row if _unquote(item).strip()] + + +def parse_frontmatter_value(value: str) -> object: + raw = value.strip() + if raw.startswith("[") and raw.endswith("]"): + return _csv_list(raw[1:-1]) + return _unquote(raw) + + +def parse_frontmatter(text: str) -> tuple[dict[str, object], str]: + match = FRONTMATTER_RE.match(text) + if not match: + return {}, text + meta: dict[str, object] = {} + for line in match.group(1).splitlines(): + if ":" not in line or line.lstrip().startswith("#"): + continue + key, value = line.split(":", 1) + meta[key.strip()] = parse_frontmatter_value(value) + return meta, text[match.end():] + + +def frontmatter_string(value: object) -> str: + return str(value).replace("\\", "\\\\").replace('"', '\\"') + + +def csv_values(raw: str | None) -> list[str]: + if not raw: + return [] + return [item.strip() for item in raw.split(",") if item.strip()] + + +def meta_tags(value: object) -> list[str]: + if isinstance(value, list): + return [str(item).strip() for item in value if str(item).strip()] + return [item.strip().strip("\"'") for item in csv_values(str(value).strip("[]"))] + + +def _needs_quotes(value: str) -> bool: + return not value or any(char in value for char in ",[]{}:#\"'\n\r\t") + + +def _format_list_item(value: object) -> str: + text = str(value).strip() + if not _needs_quotes(text): + return text + return '"' + frontmatter_string(text) + '"' + + +def yaml_list(values: Iterable[object]) -> str: + return "[" + ", ".join(_format_list_item(value) for value in values) + "]" + + +def format_frontmatter_value(value: object) -> str: + if isinstance(value, (list, tuple, set)): + return yaml_list(value) + if isinstance(value, bool): + return "true" if value else "false" + return str(value) + + +def update_frontmatter_fields( + text: str, + updates: Mapping[str, object], + remove: set[str] | None = None, +) -> str: + remove = remove or set() + formatted = {key: format_frontmatter_value(value) for key, value in updates.items()} + match = FRONTMATTER_RE.match(text) + if not match: + frontmatter = [f"{key}: {value}" for key, value in formatted.items()] + return "---\n" + "\n".join(frontmatter) + "\n---\n\n" + text.lstrip("\n") + + seen: set[str] = set() + lines: list[str] = [] + for line in match.group(1).splitlines(): + if ":" not in line or line.lstrip().startswith("#"): + lines.append(line) + continue + key = line.split(":", 1)[0].strip() + if key in remove: + continue + if key in formatted: + lines.append(f"{key}: {formatted[key]}") + seen.add(key) + else: + lines.append(line) + for key, value in formatted.items(): + if key not in seen: + lines.append(f"{key}: {value}") + return "---\n" + "\n".join(lines) + "\n---\n" + text[match.end():].lstrip("\n") + + +def frontmatter_int(value: object) -> int: + try: + return int(str(value or "0").strip()) + except ValueError: + return 0 diff --git a/mcp_package/link_core/ingest.py b/mcp_package/link_core/ingest.py new file mode 100644 index 0000000..51fd4fe --- /dev/null +++ b/mcp_package/link_core/ingest.py @@ -0,0 +1,697 @@ +"""Shared Link ingest status helpers.""" +from __future__ import annotations + +import re +from pathlib import Path + +from .frontmatter import parse_frontmatter +from .security import secret_file_scan +from .wiki import build_backlinks, load_backlinks_index + + +DEFAULT_SKIP_DIRS = { + ".git", + "__pycache__", + ".pytest_cache", + ".ruff_cache", + ".tox", + "dist", + "build", + ".venv", + "venv", + "node_modules", +} + +SOURCE_RAW_MATCH_CHUNK_SIZE = 256 + + +def raw_source_files(raw_dir: Path, skip_dirs: set[str] | None = None) -> list[Path]: + if not raw_dir.exists(): + return [] + skipped = skip_dirs or DEFAULT_SKIP_DIRS + files: list[Path] = [] + for path in sorted(raw_dir.rglob("*")): + if not path.is_file() or path.name.startswith("."): + continue + rel_parts = path.relative_to(raw_dir).parts + if rel_parts and rel_parts[0] == "memory-captures": + continue + if any(part in skipped for part in rel_parts): + continue + files.append(path) + return files + + +def source_page_texts(wiki_dir: Path) -> dict[str, str]: + return {name: str(record["text"]) for name, record in source_page_index(wiki_dir).items()} + + +def _heading_title(body: str) -> str: + match = re.search(r"^#\s+(.+)", body, re.MULTILINE) + return match.group(1).strip() if match else "" + + +def source_page_index( + wiki_dir: Path, + read_warnings: list[dict[str, str]] | None = None, +) -> dict[str, dict[str, object]]: + sources_dir = wiki_dir / "sources" + if not sources_dir.exists(): + return {} + records: dict[str, dict[str, object]] = {} + for page in sorted(sources_dir.rglob("*.md")): + if page.name.startswith("."): + continue + try: + text = page.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + if read_warnings is not None: + read_warnings.append({ + "page": f"wiki/{page.relative_to(wiki_dir).as_posix()}", + "error": str(exc), + }) + continue + meta, body = parse_frontmatter(text) + try: + page_mtime = page.stat().st_mtime + except OSError: + page_mtime = 0.0 + name = page.stem.lower() + records[name] = { + "name": page.stem, + "path": f"wiki/{page.relative_to(wiki_dir).as_posix()}", + "title": str(meta.get("title") or _heading_title(body) or page.stem), + "text": text, + "mtime": page_mtime, + } + return records + + +def source_matches_by_raw( + source_records: dict[str, dict[str, object]], + raw_rels: list[str], + *, + chunk_size: int = SOURCE_RAW_MATCH_CHUNK_SIZE, +) -> dict[str, list[str]]: + """Build raw path -> source page matches without an O(raw * source) scan.""" + matches: dict[str, list[str]] = {raw_rel: [] for raw_rel in raw_rels} + if not source_records or not raw_rels: + return matches + + unique_raw_rels = sorted(set(raw_rels), key=lambda value: (-len(value), value)) + safe_chunk_size = max(1, chunk_size) + patterns = [ + re.compile("|".join(re.escape(raw_rel) for raw_rel in unique_raw_rels[index : index + safe_chunk_size])) + for index in range(0, len(unique_raw_rels), safe_chunk_size) + ] + for source_name, source_record in source_records.items(): + text = str(source_record.get("text") or "") + if not text: + continue + seen_in_source: set[str] = set() + for pattern in patterns: + for match in pattern.finditer(text): + raw_rel = match.group(0) + if raw_rel in seen_in_source: + continue + seen_in_source.add(raw_rel) + matches.setdefault(raw_rel, []).append(source_name) + return matches + + +def normalize_link_index(data: dict[str, dict[str, list[str]]]) -> dict[str, dict[str, list[str]]]: + normalized: dict[str, dict[str, list[str]]] = {"backlinks": {}, "forward": {}} + for section in ("backlinks", "forward"): + for key, values in data.get(section, {}).items(): + if isinstance(values, list): + normalized[section][key.lower()] = sorted({str(value).lower() for value in values}) + return normalized + + +def backlinks_health(wiki_dir: Path) -> tuple[str, str]: + current, load_error = load_backlinks_index( + wiki_dir / "_backlinks.json", + missing_error="missing wiki/_backlinks.json", + ) + if load_error: + return "missing" if "missing" in load_error else "invalid", load_error + try: + expected = build_backlinks(wiki_dir) + except OSError as exc: + return "invalid", f"could not inspect wiki pages for backlinks: {exc}" + if current is not None and normalize_link_index(current) == normalize_link_index(expected): + return "current", "wiki/_backlinks.json is current" + return "stale", "wiki/_backlinks.json is stale" + + +def _source_page_suggestion(raw_rel: str) -> str: + stem = Path(raw_rel).stem.lower() + slug = re.sub(r"[^a-z0-9]+", "-", stem).strip("-") or "source" + return f"wiki/sources/{slug}.md" + + +def _secret_blocked_items(items: list[dict[str, object]]) -> list[dict[str, object]]: + return [item for item in items if item.get("secret_warnings")] + + +def _access_blocked_items(items: list[dict[str, object]]) -> list[dict[str, object]]: + return [item for item in items if item.get("scan_error")] + + +def build_ingest_safety( + pending_raw: list[dict[str, object]], + represented_raw: list[dict[str, object]], +) -> dict[str, object]: + """Summarize raw-source secret warning state for agents and UI.""" + secret_blocked = _secret_blocked_items(pending_raw) + access_blocked = _access_blocked_items(pending_raw) + blocked = secret_blocked + [item for item in access_blocked if item not in secret_blocked] + represented_warnings = _secret_blocked_items(represented_raw) + warning_items = blocked + represented_warnings + label_set: set[str] = set() + for item in warning_items: + labels_for_item = item.get("secret_warnings") + if isinstance(labels_for_item, list): + label_set.update(str(label) for label in labels_for_item) + labels = sorted(label_set) + warning_count = sum(int(item.get("secret_warning_count") or 0) for item in warning_items) + if access_blocked: + status = "blocked" + summary = f"{len(access_blocked)} pending raw file could not be inspected before ingest." + if len(access_blocked) != 1: + summary = f"{len(access_blocked)} pending raw files could not be inspected before ingest." + elif secret_blocked: + status = "blocked" + summary = f"{len(secret_blocked)} pending raw file needs redaction before ingest." + if len(secret_blocked) != 1: + summary = f"{len(secret_blocked)} pending raw files need redaction before ingest." + elif represented_warnings: + status = "warning" + summary = "Raw source warnings exist in already represented files." + else: + status = "clear" + summary = "No secret-looking values detected in raw sources." + return { + "status": status, + "summary": summary, + "blocked_count": len(blocked), + "access_blocked_count": len(access_blocked), + "warning_count": warning_count, + "labels": labels, + "blocked_raw": [str(item.get("raw") or "") for item in blocked], + } + + +def build_ingest_plan(status: dict[str, object], limit: int = 5) -> dict[str, object]: + """Build a short, actionable ingest workflow for agents and humans.""" + guidance = status.get("guidance") if isinstance(status.get("guidance"), dict) else {} + state = str(guidance.get("state") or "unknown") + pending_raw = status.get("pending_raw") if isinstance(status.get("pending_raw"), list) else [] + ordered_pending_raw = sorted( + pending_raw, + key=lambda item: (0 if isinstance(item, dict) and item.get("stale") else 1, str(item.get("raw") or "")), + ) + batch: list[dict[str, object]] = [] + for item in ordered_pending_raw[: max(limit, 1)]: + raw_rel = str(item.get("raw") or "") + if not raw_rel: + continue + batch_item = { + "raw": raw_rel, + "size_bytes": int(item.get("size_bytes") or 0), + "suggested_source_page": _source_page_suggestion(raw_rel), + } + if item.get("stale"): + source_page_paths = list(item.get("source_page_paths") or []) + batch_item["stale"] = True + batch_item["stale_reason"] = str(item.get("stale_reason") or "") + batch_item["source_page_paths"] = source_page_paths + if source_page_paths: + batch_item["target_source_page"] = source_page_paths[0] + batch.append(batch_item) + + if state == "pending_raw" and batch: + first = batch[0] + batch_count = len(batch) + file_label = "file" if batch_count == 1 else "files" + return { + "state": state, + "title": "Ingest pending raw sources", + "summary": f"Start with {first['raw']} and process at most {batch_count} {file_label} in this pass.", + "batch": batch, + "steps": [ + "Read each raw file completely before writing wiki pages.", + "Create or update one source page per raw file and include the exact raw path.", + "Update existing concept/entity/memory pages before creating new thin pages.", + "Keep durable memories proposal-only until the human approves them.", + "Rebuild index and backlinks, then validate before reporting ingest complete.", + ], + "agent_prompt": guidance.get("agent_prompt"), + "memory_prompt": f"propose memories from {first['raw']}", + "post_checks": [ + "link rebuild-index", + "link rebuild-backlinks", + "link validate", + "link status --validate", + ], + } + + if state == "stale_raw" and batch: + first = batch[0] + batch_count = len(batch) + file_label = "file" if batch_count == 1 else "files" + return { + "state": state, + "title": "Refresh stale source pages", + "summary": f"Start with {first['raw']} and refresh at most {batch_count} stale raw {file_label} in this pass.", + "batch": batch, + "steps": [ + "Read each changed raw file completely before editing wiki pages.", + "Update the existing source page rather than creating a duplicate page.", + "Update affected concept/entity pages only where the source materially changed.", + "Keep durable memories proposal-only until the human approves them.", + "Rebuild index and backlinks, then validate before reporting ingest complete.", + ], + "agent_prompt": guidance.get("agent_prompt"), + "memory_prompt": f"propose memories from {first['raw']}", + "post_checks": [ + "link rebuild-index", + "link rebuild-backlinks", + "link validate", + "link status --validate", + ], + } + + if state == "blocked_secrets": + blocked = _secret_blocked_items(pending_raw) + first = blocked[0] if blocked else {"raw": "raw/"} + return { + "state": state, + "title": "Redact raw sources before ingest", + "summary": f"Start with {first['raw']}; Link will not suggest ingesting secret-looking raw content.", + "batch": [ + { + "raw": str(item.get("raw") or ""), + "size_bytes": int(item.get("size_bytes") or 0), + "secret_warnings": list(item.get("secret_warnings") or []), + "suggested_source_page": _source_page_suggestion(str(item.get("raw") or "")), + } + for item in blocked[: max(limit, 1)] + ], + "steps": [ + "Open each flagged raw file locally.", + "Remove or redact the secret-looking values before asking any agent to ingest it.", + "Refresh ingest status after redaction.", + "Only then ask the agent to create source-backed wiki pages.", + ], + "agent_prompt": None, + "memory_prompt": None, + "post_checks": ["link ingest-status", "link status --validate"], + } + + if state == "blocked_raw_access": + blocked = _access_blocked_items(pending_raw) + first = blocked[0] if blocked else {"raw": "raw/"} + return { + "state": state, + "title": "Inspect raw source access", + "summary": f"Start with {first['raw']}; Link could not read it to run safety checks.", + "batch": [ + { + "raw": str(item.get("raw") or ""), + "size_bytes": int(item.get("size_bytes") or 0), + "scan_error": str(item.get("scan_error") or ""), + "suggested_source_page": _source_page_suggestion(str(item.get("raw") or "")), + } + for item in blocked[: max(limit, 1)] + ], + "steps": [ + "Check the file still exists and is readable by the local user.", + "Fix permissions or move the source to a readable raw/ file.", + "Refresh ingest status before asking an agent to ingest it.", + "Only ingest after Link can inspect the raw source for secret-looking values.", + ], + "agent_prompt": None, + "memory_prompt": None, + "post_checks": ["link ingest-status", "link status --validate"], + } + + if state == "blocked_source_access": + warnings = status.get("source_read_warnings") if isinstance(status.get("source_read_warnings"), list) else [] + first = warnings[0] if warnings and isinstance(warnings[0], dict) else {"page": "wiki/sources/.md"} + return { + "state": state, + "title": "Inspect source page access", + "summary": f"Start with {first['page']}; Link could not read one or more source pages.", + "batch": [ + { + "page": str(item.get("page") or ""), + "error": str(item.get("error") or ""), + } + for item in warnings[: max(limit, 1)] + if isinstance(item, dict) + ], + "steps": [ + "Check that the source page still exists and is readable by the local user.", + "Fix permissions or repair the page before relying on represented/pending raw counts.", + "Refresh ingest status after the page is readable.", + "Run validation before reporting ingest complete.", + ], + "agent_prompt": None, + "memory_prompt": None, + "post_checks": ["link ingest-status", "link validate", "link status --validate"], + } + + if state == "stale_graph": + return { + "state": state, + "title": "Repair graph index", + "summary": "Raw sources are represented, but the graph index is stale.", + "batch": [], + "steps": [ + "Run the graph repair before relying on search, context, or graph views.", + "Validate the wiki after rebuilding backlinks.", + ], + "agent_prompt": guidance.get("agent_prompt"), + "post_checks": ["link rebuild-backlinks", "link validate", "link status --validate"], + } + + if state == "empty": + return { + "state": state, + "title": "Add first sources", + "summary": "Drop notes, articles, transcripts, screenshots, or project files into raw/.", + "batch": [], + "steps": [ + "Add one or more source files to raw/.", + "Ask your agent to ingest the specific raw file.", + "Review generated pages before relying on them as memory.", + ], + "agent_prompt": None, + "post_checks": ["link ingest-status", "link status --validate"], + } + + if state == "ready": + return { + "state": state, + "title": "Ready for new sources", + "summary": "All current raw sources are represented and the graph index is current.", + "batch": [], + "steps": [ + "Use query or brief for retrieval.", + "Add new files to raw/ when Link should learn new source-backed context.", + ], + "agent_prompt": None, + "post_checks": ["link doctor", "link status --validate"], + } + + return { + "state": state, + "title": "Initialize Link", + "summary": "Link needs its raw/ and wiki/ structure before ingest can start.", + "batch": [], + "steps": [ + "Run link init or rerun an installer.", + "Check readiness before adding sources.", + ], + "agent_prompt": None, + "post_checks": ["link init", "link status --validate"], + } + + +def build_ingest_completion(status: dict[str, object], limit: int = 8) -> dict[str, object]: + """Summarize raw files that are already represented in source pages.""" + represented_raw = status.get("represented_raw") if isinstance(status.get("represented_raw"), list) else [] + pending_count = int(status.get("pending_count") or 0) + represented_count = int(status.get("represented_count") or 0) + guidance = status.get("guidance") if isinstance(status.get("guidance"), dict) else {} + items: list[dict[str, object]] = [] + for item in represented_raw[: max(limit, 1)]: + raw_rel = str(item.get("raw") or "") + page_names = item.get("source_pages") if isinstance(item.get("source_pages"), list) else [] + page_paths = item.get("source_page_paths") if isinstance(item.get("source_page_paths"), list) else [] + page_titles = item.get("source_page_titles") if isinstance(item.get("source_page_titles"), list) else [] + pages: list[dict[str, str]] = [] + for index, page_name in enumerate(page_names): + pages.append({ + "name": str(page_name), + "path": str(page_paths[index]) if index < len(page_paths) else "", + "title": str(page_titles[index]) if index < len(page_titles) else str(page_name), + }) + items.append({ + "raw": raw_rel, + "size_bytes": int(item.get("size_bytes") or 0), + "source_pages": pages, + "memory_prompt": f"propose memories from {raw_rel}" if raw_rel else "", + "query_prompt": f"query Link for {Path(raw_rel).stem.replace('-', ' ')}" if raw_rel else "", + "secret_warnings": list(item.get("secret_warnings") or []), + "scan_error": str(item.get("scan_error") or ""), + }) + + if represented_count and pending_count: + summary = f"{represented_count} raw source(s) are represented; {pending_count} still need ingest." + next_prompt = str(guidance.get("agent_prompt") or "") + elif represented_count: + summary = f"All {represented_count} raw source(s) are represented in wiki source pages." + next_prompt = 'brief me from Link before we continue' + else: + summary = "No raw source files are represented yet." + next_prompt = str(guidance.get("agent_prompt") or "ingest raw/ into Link") + + return { + "title": "Ingest completion", + "summary": summary, + "represented_count": represented_count, + "pending_count": pending_count, + "shown_count": len(items), + "has_more": represented_count > len(items), + "items": items, + "next_prompt": next_prompt, + } + + +def build_ingest_guidance(status: dict[str, object]) -> dict[str, object]: + has_raw_dir = bool(status.get("has_raw_dir")) + has_wiki_dir = bool(status.get("has_wiki_dir")) + pending_raw = status.get("pending_raw") + pending_items = pending_raw if isinstance(pending_raw, list) else [] + pending_count = int(status.get("pending_count") or 0) + raw_count = int(status.get("raw_count") or 0) + backlinks_status = str(status.get("backlinks_status") or "unknown") + source_read_warning_count = int(status.get("source_read_warning_count") or 0) + secret_items = _secret_blocked_items(pending_items) + access_items = _access_blocked_items(pending_items) + stale_items = [item for item in pending_items if isinstance(item, dict) and item.get("stale")] + + if not has_raw_dir or not has_wiki_dir: + return { + "state": "missing_structure", + "summary": "Link is not initialized here yet.", + "agent_prompt": None, + "commands": ["link init", "link status --validate"], + "notes": ["Run the installer or initialize this directory before ingesting sources."], + } + + if source_read_warning_count: + return { + "state": "blocked_source_access", + "summary": f"{source_read_warning_count} source page could not be inspected. Fix source page access before ingest.", + "agent_prompt": None, + "commands": ["link ingest-status", "link validate", "link status --validate"], + "notes": [ + "Represented and pending raw counts may be incomplete while source pages cannot be read.", + "Fix permissions or repair the page, then refresh ingest status.", + ], + } + + if access_items: + first = str(access_items[0].get("raw", "raw/")) + count = len(access_items) + summary = f"{count} pending raw file could not be inspected." + if count != 1: + summary = f"{count} pending raw files could not be inspected." + return { + "state": "blocked_raw_access", + "summary": summary + f" Fix access for {first} before ingest.", + "agent_prompt": None, + "commands": ["link ingest-status", "link status --validate"], + "notes": [ + "Do not ask an agent to ingest raw files that Link cannot read and scan for secret-looking values.", + "Fix permissions or replace the file, then refresh ingest status.", + ], + } + + if secret_items: + first = str(secret_items[0].get("raw", "raw/")) + count = len(secret_items) + summary = f"{count} pending raw file contains secret-looking values." + if count != 1: + summary = f"{count} pending raw files contain secret-looking values." + return { + "state": "blocked_secrets", + "summary": summary + f" Redact {first} before ingest.", + "agent_prompt": None, + "commands": ["link ingest-status", "link status --validate"], + "notes": [ + "Do not ask an agent to ingest flagged raw files until the secret-looking values are removed or redacted.", + "After redaction, refresh ingest status and continue with the normal ingest prompt.", + ], + } + + if stale_items: + first = str(stale_items[0].get("raw", "raw/")) + count = len(stale_items) + summary = f"{count} represented raw file changed after its source page was written." + if count != 1: + summary = f"{count} represented raw files changed after their source pages were written." + return { + "state": "stale_raw", + "summary": summary, + "agent_prompt": f"re-ingest {first} into Link", + "commands": ["link rebuild-index", "link rebuild-backlinks", "link validate", "link status --validate"], + "notes": [ + "The raw file is represented, but it is newer than the linked source page.", + "Ask the agent to refresh the existing source page before relying on retrieval.", + ], + } + + if pending_items: + first = str(pending_items[0].get("raw", "raw/")) + more = pending_count - 1 + summary = f"{pending_count} raw file needs ingest." + if pending_count != 1: + summary = f"{pending_count} raw files need ingest." + if more > 0: + summary += f" Start with {first}; {more} more remain." + return { + "state": "pending_raw", + "summary": summary, + "agent_prompt": f"ingest {first} into Link", + "commands": ["link rebuild-index", "link rebuild-backlinks", "link validate", "link status --validate"], + "notes": [ + "If the source contains user preferences, decisions, or project context, ask for memory proposals before saving durable memories.", + "After ingest, rebuild index/backlinks if your agent did not already do it.", + ], + } + + if backlinks_status != "current": + return { + "state": "stale_graph", + "summary": "Raw files are represented, but the graph index needs repair.", + "agent_prompt": "rebuild Link backlinks and validate the wiki", + "commands": ["link rebuild-backlinks", "link validate", "link doctor"], + "notes": ["Run the graph repair before relying on context or graph views."], + } + + if raw_count == 0: + return { + "state": "empty", + "summary": "Link is ready, but raw/ has no source files yet.", + "agent_prompt": None, + "commands": ["link status --validate", "link serve"], + "notes": ["Drop notes, articles, transcripts, or project files into raw/, then ask your agent to ingest them into Link."], + } + + return { + "state": "ready", + "summary": "All raw files are represented in wiki/sources and the graph index is current.", + "agent_prompt": None, + "commands": ["link doctor", "link status --validate"], + "notes": ["Add new files to raw/ when you want Link to learn new source-backed knowledge."], + } + + +def collect_ingest_status(target: Path, skip_dirs: set[str] | None = None) -> dict[str, object]: + target = target.expanduser().resolve() + raw_dir = target / "raw" + wiki_dir = target / "wiki" + raw_files = raw_source_files(raw_dir, skip_dirs=skip_dirs) + source_read_warnings: list[dict[str, str]] = [] + source_records = source_page_index(wiki_dir, read_warnings=source_read_warnings) + raw_rels = [raw_path.relative_to(target).as_posix() for raw_path in raw_files] + source_matches = source_matches_by_raw(source_records, raw_rels) + + represented_raw: list[dict[str, object]] = [] + pending_raw: list[dict[str, object]] = [] + raw_secret_warning_count = 0 + raw_scan_warnings: list[dict[str, str]] = [] + stale_raw: list[dict[str, object]] = [] + for raw_path, rel in zip(raw_files, raw_rels): + matches = source_matches.get(rel, []) + match_records = [source_records[source_name] for source_name in matches] + scan = secret_file_scan(raw_path) + warnings = list(scan.get("labels") or []) + scan_error = str(scan.get("error") or "") + raw_secret_warning_count += len(warnings) + raw_mtime = 0.0 + try: + raw_stat = raw_path.stat() + size_bytes = raw_stat.st_size + raw_mtime = raw_stat.st_mtime + except OSError as exc: + size_bytes = 0 + if not scan_error: + scan_error = str(exc) + source_mtimes = [ + float(record.get("mtime") or 0) + for record in match_records + if record.get("mtime") is not None + ] + latest_source_mtime = max(source_mtimes) if source_mtimes else 0.0 + is_stale = bool(matches and raw_mtime and latest_source_mtime and raw_mtime > latest_source_mtime + 0.001) + if scan_error: + raw_scan_warnings.append({"raw": rel, "error": scan_error}) + item = { + "raw": rel, + "size_bytes": size_bytes, + "source_pages": matches, + "source_page_paths": [str(record.get("path") or "") for record in match_records], + "source_page_titles": [str(record.get("title") or record.get("name") or "") for record in match_records], + "secret_warnings": warnings, + "secret_warning_count": len(warnings), + "readable": not bool(scan_error), + "scan_error": scan_error, + "stale": is_stale, + "stale_reason": "raw changed after wiki source page" if is_stale else "", + "raw_mtime": raw_mtime, + "latest_source_mtime": latest_source_mtime, + } + if is_stale: + stale_raw.append(item) + pending_raw.append(item) + elif matches: + represented_raw.append(item) + else: + pending_raw.append(item) + + backlinks_status, backlinks_message = ( + backlinks_health(wiki_dir) + if wiki_dir.exists() + else ("missing", "missing wiki directory") + ) + + payload: dict[str, object] = { + "target": str(target), + "raw_count": len(raw_files), + "source_page_count": len(source_records), + "source_read_warning_count": len(source_read_warnings), + "source_read_warnings": source_read_warnings, + "represented_count": len(represented_raw), + "pending_count": len(pending_raw), + "stale_count": len(stale_raw), + "stale_raw": stale_raw, + "represented_raw": represented_raw, + "pending_raw": pending_raw, + "raw_secret_warning_count": raw_secret_warning_count, + "raw_scan_warning_count": len(raw_scan_warnings), + "raw_scan_warnings": raw_scan_warnings, + "backlinks_status": backlinks_status, + "backlinks_message": backlinks_message, + "has_raw_dir": raw_dir.exists(), + "has_wiki_dir": wiki_dir.exists(), + } + payload["safety"] = build_ingest_safety(pending_raw, represented_raw) + payload["guidance"] = build_ingest_guidance(payload) + payload["plan"] = build_ingest_plan(payload) + payload["completion"] = build_ingest_completion(payload) + return payload diff --git a/mcp_package/link_core/log.py b/mcp_package/link_core/log.py new file mode 100644 index 0000000..6f381d8 --- /dev/null +++ b/mcp_package/link_core/log.py @@ -0,0 +1,25 @@ +"""Shared Link log helpers.""" +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path + +from .files import append_text, atomic_write_text + +DEFAULT_LOG_TEXT = "# Link Wiki Log\n\n*Append-only record of wiki operations.*\n" + + +def utc_timestamp() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def write_default_log(path: Path) -> None: + atomic_write_text(path, DEFAULT_LOG_TEXT) + + +def append_log(wiki_dir: Path, timestamp: str, operation: str, description: str, lines: list[str]) -> None: + log_path = wiki_dir / "log.md" + entry = [f"## [{timestamp}] {operation} | {description}", ""] + entry.extend(f"- {line}" for line in lines) + entry.extend(["", "---", ""]) + append_text(log_path, "\n".join(entry), initial_text=DEFAULT_LOG_TEXT) diff --git a/mcp_package/link_core/markdown.py b/mcp_package/link_core/markdown.py new file mode 100644 index 0000000..808e1cc --- /dev/null +++ b/mcp_package/link_core/markdown.py @@ -0,0 +1,159 @@ +"""Small safe Markdown renderer used by the local Link web UI.""" +from __future__ import annotations + +import html +import re +import urllib.parse +from collections.abc import Callable + + +def default_page_href(name: str) -> str: + return "/page/" + urllib.parse.quote(name.strip(), safe="") + + +def inline_markdown(text: str, page_href: Callable[[str], str] = default_page_href) -> str: + """Render the inline subset Link supports while escaping unsafe HTML.""" + html_spans: list[str] = [] + + def _stash(rendered: str) -> str: + html_spans.append(rendered) + return f"\x00HTML{len(html_spans) - 1}\x00" + + def _safe_href(href: str) -> str: + href = html.unescape(href).strip() + parsed = urllib.parse.urlparse(href) + if href.startswith("//") or (parsed.scheme and parsed.scheme.lower() not in {"http", "https", "mailto"}): + return "#" + return html.escape(href, quote=True) + + def _wikilink(match: re.Match[str]) -> str: + inner = html.unescape(match.group(1)) + target, label = (inner.split("|", 1) if "|" in inner else (inner, inner)) + href = html.escape(page_href(target), quote=True) + return _stash(f'{html.escape(label.strip())}') + + def _markdown_link(match: re.Match[str]) -> str: + label = html.unescape(match.group(1)) + href = _safe_href(match.group(2)) + return _stash(f'{html.escape(label)}') + + text = html.escape(str(text), quote=False) + + def _save_code(match: re.Match[str]) -> str: + return _stash(f"{match.group(1)}") + + text = re.sub(r"`([^`]+)`", _save_code, text) + text = re.sub(r"\[\[([^\]]+)\]\]", _wikilink, text) + text = re.sub(r"(?\1", text) + text = re.sub(r"(?\1", text) + for index, span in enumerate(html_spans): + text = text.replace(f"\x00HTML{index}\x00", span) + return text + + +def markdown_to_html(markdown: str, page_href: Callable[[str], str] = default_page_href) -> str: + """Render Link's intentionally small wiki Markdown subset to HTML.""" + out: list[str] = [] + in_code = False + in_table = False + in_list = False + list_type: str | None = None + code_lang = "" + in_blockquote = False + blockquote_lines: list[str] = [] + + def _flush_blockquote() -> None: + if blockquote_lines: + out.append(f"
{'
'.join(blockquote_lines)}
") + blockquote_lines.clear() + + for line in str(markdown).split("\n"): + stripped = line.strip() + if stripped.startswith("```"): + _flush_blockquote() + in_blockquote = False + if in_code: + out.append("") + in_code = False + code_lang = "" + else: + code_lang = stripped[3:].strip() + lang_attr = f' class="language-{html.escape(code_lang)}"' if code_lang else "" + out.append(f"
")
+                in_code = True
+            continue
+        if in_code:
+            out.append(html.escape(line))
+            continue
+        if in_table and not stripped.startswith("|"):
+            out.append("")
+            in_table = False
+        if in_list and not re.match(r"^\s*[-*]\s|^\s*\d+\.\s", line) and stripped:
+            out.append(f'')
+            in_list = False
+        if stripped.startswith(">"):
+            if in_list:
+                out.append(f'')
+                in_list = False
+            if in_table:
+                out.append("")
+                in_table = False
+            blockquote_lines.append(inline_markdown(stripped[1:].strip(), page_href))
+            in_blockquote = True
+            continue
+        if in_blockquote:
+            _flush_blockquote()
+            in_blockquote = False
+        if stripped in ("---", "***", "___") and not in_table:
+            out.append("
") + continue + heading = re.match(r"^(#{1,6})\s+(.*)", line) + if heading: + level = len(heading.group(1)) + out.append(f"{inline_markdown(heading.group(2), page_href)}") + continue + if stripped.startswith("|"): + cells = [cell.strip() for cell in stripped.strip("|").split("|")] + if all(re.match(r"^[-:]+$", cell) for cell in cells): + continue + if not in_table: + out.append( + "" + + "".join(f"" for cell in cells) + + "" + ) + in_table = True + else: + out.append("" + "".join(f"" for cell in cells) + "") + continue + unordered = re.match(r"^\s*[-*]\s+(.*)", line) + if unordered: + if not in_list or list_type != "ul": + if in_list: + out.append(f'') + out.append("
    ") + in_list, list_type = True, "ul" + out.append(f"
  • {inline_markdown(unordered.group(1), page_href)}
  • ") + continue + ordered = re.match(r"^\s*\d+\.\s+(.*)", line) + if ordered: + if not in_list or list_type != "ol": + if in_list: + out.append(f'') + out.append("
      ") + in_list, list_type = True, "ol" + out.append(f"
    1. {inline_markdown(ordered.group(1), page_href)}
    2. ") + continue + if not stripped: + out.append("") + continue + out.append(f"

      {inline_markdown(stripped, page_href)}

      ") + if in_code: + out.append("") + if in_table: + out.append("
{inline_markdown(cell, page_href)}
{inline_markdown(cell, page_href)}
") + if in_list: + out.append(f'') + _flush_blockquote() + return "\n".join(out) diff --git a/mcp_package/link_core/memory.py b/mcp_package/link_core/memory.py new file mode 100644 index 0000000..a489cca --- /dev/null +++ b/mcp_package/link_core/memory.py @@ -0,0 +1,2029 @@ +"""Shared memory logic for Link CLI, HTTP, and MCP runtimes.""" +from __future__ import annotations + +import os +import re +import shlex +import subprocess +from collections.abc import Callable, Iterable, Mapping +from pathlib import Path + +from .files import atomic_write_text +from .frontmatter import ( + csv_values, + frontmatter_int, + frontmatter_string, + meta_tags, + parse_frontmatter, + update_frontmatter_fields, + yaml_list, +) +from .wiki import ( + WIKILINK_RE, + build_backlinks, + load_backlinks_index, +) + + +MEMORY_TYPES = ("preference", "decision", "project", "fact", "note") +MEMORY_SCOPES = ("user", "project", "global") +MEMORY_REVIEW_STATUSES = ("pending", "reviewed", "needs_update") +MEMORY_PROPOSAL_MIN_SCORE = 70 +MEMORY_CONFLICT_TYPES = {"preference", "decision", "project"} +MEMORY_STOPWORDS = { + "about", + "after", + "agent", + "agents", + "also", + "and", + "are", + "because", + "before", + "being", + "does", + "done", + "for", + "from", + "has", + "have", + "into", + "link", + "memory", + "more", + "not", + "now", + "our", + "prefer", + "prefers", + "project", + "should", + "that", + "the", + "their", + "this", + "use", + "user", + "users", + "want", + "wants", + "when", + "with", + "work", +} +NEGATION_TERMS = { + "avoid", + "disable", + "disabled", + "disallow", + "dont", + "don't", + "never", + "no", + "not", + "without", +} +CONFLICT_OPTION_GROUPS = { + "branch_policy": {"codex", "develop", "development", "direct", "feature", "main", "master", "release"}, + "storage_policy": {"cloud", "hosted", "local", "offline", "remote"}, + "theme": {"dark", "light", "system"}, + "install_method": {"brew", "global", "homebrew", "pipx", "system", "venv", "virtualenv"}, + "release_channel": {"github", "mcp", "pypi"}, +} +CONFLICT_GROUP_CONTEXT = { + "branch_policy": {"branch", "branches", "commit", "commits", "git", "merge", "pr", "pull", "push"}, + "storage_policy": {"agent", "agents", "backend", "data", "memory", "storage", "sync", "wiki"}, + "theme": {"background", "mode", "theme", "ui"}, + "install_method": {"install", "installer", "mcp", "package", "pip", "python", "setup"}, + "release_channel": {"package", "publish", "registry", "release", "version"}, +} +MemoryLogWriter = Callable[[str, str, str, list[str]], None] +BacklinkRebuilder = Callable[[], bool] + + +def slugify(value: str, fallback: str = "memory") -> str: + slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + return slug or fallback + + +def normalize_project(value: str | None) -> str: + return slugify(value or "", fallback="") + + +def default_project_for_target(target: Path) -> str: + resolved = target.expanduser().resolve() + if resolved.name == "wiki" and (resolved / "index.md").exists(): + resolved = resolved.parent + if (resolved / ".git").exists(): + return normalize_project(resolved.name) + return "" + + +def memory_title(text: str, explicit_title: str | None = None) -> str: + if explicit_title and explicit_title.strip(): + return explicit_title.strip() + first_line = next((line.strip() for line in text.splitlines() if line.strip()), "Memory") + first_sentence = re.split(r"(?<=[.!?])\s+", first_line, maxsplit=1)[0].strip() + if len(first_sentence) <= 70: + return first_sentence.rstrip(".") + return first_sentence[:67].rstrip() + "..." + + +def memory_tokens(value: str) -> set[str]: + return { + token + for token in re.split(r"[^a-z0-9]+", value.lower()) + if len(token) >= 3 + } + + +def compact_memory_text(value: str) -> str: + return " ".join( + token + for token in re.split(r"[^a-z0-9]+", value.lower()) + if token + ) + + +def significant_memory_tokens(value: str) -> set[str]: + return { + token + for token in memory_tokens(value) + if token not in MEMORY_STOPWORDS + } + + +def has_negation(value: str) -> bool: + compact = compact_memory_text(value) + tokens = set(compact.split()) + if tokens & NEGATION_TERMS: + return True + return bool(re.search(r"\b(?:do not|does not|did not|should not|don't|can't|cannot)\b", value, re.IGNORECASE)) + + +def _extract_option_groups(value: str) -> dict[str, set[str]]: + tokens = memory_tokens(value) + groups: dict[str, set[str]] = {} + for group, options in CONFLICT_OPTION_GROUPS.items(): + matches = tokens & options + if matches: + groups[group] = matches + return groups + + +def _extract_preference_pairs(value: str) -> list[tuple[set[str], set[str]]]: + pairs: list[tuple[set[str], set[str]]] = [] + patterns = ( + r"\bprefer(?:s|red)?\s+(?P.+?)\s+over\s+(?P.+?)(?:[.;]|$)", + r"\buse\s+(?P.+?)\s+instead\s+of\s+(?P.+?)(?:[.;]|$)", + ) + for pattern in patterns: + for match in re.finditer(pattern, value, flags=re.IGNORECASE): + preferred = significant_memory_tokens(match.group("preferred")) + rejected = significant_memory_tokens(match.group("rejected")) + if preferred and rejected: + pairs.append((preferred, rejected)) + return pairs + + +def slim_memory(record: Mapping[str, object]) -> dict[str, object]: + return {key: value for key, value in record.items() if key != "body"} + + +def is_active_memory(record: Mapping[str, object]) -> bool: + return str(record.get("status") or "active").lower() not in {"archived", "stale"} + + +def memory_visible_for_project(record: Mapping[str, object], project: str | None = None) -> bool: + project_name = normalize_project(project) + if not project_name: + return True + if str(record.get("scope") or "").lower() != "project": + return True + record_project = normalize_project(str(record.get("project") or "")) + return not record_project or record_project == project_name + + +def extract_tldr(body: str) -> str: + match = re.search(r">\s*\*\*TLDR:\*\*\s*(.+)", body) + return match.group(1).strip() if match else "" + + +def first_body_snippet(body: str) -> str: + for line in body.splitlines(): + stripped = line.strip() + if stripped and not stripped.startswith("#") and not stripped.startswith(">"): + return stripped[:200] + return "" + + +def _heading_title(body: str) -> str: + match = re.search(r"^#\s+(.+)", body, re.MULTILINE) + return match.group(1).strip() if match else "" + + +def memory_record_from_page(wiki_dir: Path, path: Path, include_body: bool = True) -> dict[str, object]: + wiki_root = wiki_dir.expanduser().resolve() + path = path.expanduser().resolve() + text = path.read_text(encoding="utf-8", errors="replace") + meta, body = parse_frontmatter(text) + title = meta.get("title") or _heading_title(body) or memory_title(body) or path.stem + record: dict[str, object] = { + "name": path.stem, + "path": f"wiki/{path.relative_to(wiki_root).as_posix()}", + "title": title, + "memory_type": meta.get("memory_type") or "note", + "scope": meta.get("scope") or "user", + "project": normalize_project(str(meta.get("project", ""))), + "status": meta.get("status") or "active", + "date_captured": meta.get("date_captured", ""), + "updated_at": meta.get("updated_at", ""), + "update_count": meta.get("update_count", "0"), + "last_update_source": meta.get("last_update_source", ""), + "archived_at": meta.get("archived_at", ""), + "archive_reason": meta.get("archive_reason", ""), + "restored_at": meta.get("restored_at", ""), + "source": meta.get("source", ""), + "review_status": meta.get("review_status") or "pending", + "reviewed_at": meta.get("reviewed_at", ""), + "review_note": meta.get("review_note", ""), + "tags": meta_tags(meta.get("tags", "")), + "tldr": extract_tldr(body), + "snippet": first_body_snippet(body), + } + if include_body: + record["body"] = body + return record + + +def memory_records(wiki_dir: Path, include_body: bool = True) -> list[dict[str, object]]: + memories_dir = wiki_dir / "memories" + if not memories_dir.exists(): + return [] + records: list[dict[str, object]] = [] + for path in sorted(memories_dir.rglob("*.md")): + if path.name.startswith("."): + continue + records.append(memory_record_from_page(wiki_dir, path, include_body=include_body)) + return records + + +def memory_review_issues( + record: Mapping[str, object], + review_command: str = "review-memory", +) -> list[dict[str, str]]: + issues: list[dict[str, str]] = [] + status = str(record.get("status") or "active").lower() + review_status = str(record.get("review_status") or "pending").lower() + memory_type = str(record.get("memory_type") or "") + scope = str(record.get("scope") or "") + + if review_status in {"pending", "needs_review"}: + issues.append({ + "code": "pending_review", + "severity": "medium", + "message": "Memory has not been reviewed by the user.", + "suggested_action": f"Confirm it is still accurate, then run {review_command}.", + }) + elif review_status == "needs_update": + issues.append({ + "code": "needs_update", + "severity": "high", + "message": "Memory is marked as needing an update.", + "suggested_action": "Edit the memory page or archive it if it is no longer useful.", + }) + elif review_status not in MEMORY_REVIEW_STATUSES: + issues.append({ + "code": "invalid_review_status", + "severity": "high", + "message": f"Unknown review_status: {review_status}.", + "suggested_action": "Use pending, reviewed, or needs_update.", + }) + + if status == "stale": + issues.append({ + "code": "stale_status", + "severity": "high", + "message": "Memory is marked stale and is excluded from default recall.", + "suggested_action": "Archive it, restore it, or update the memory text.", + }) + if memory_type not in MEMORY_TYPES: + issues.append({ + "code": "invalid_memory_type", + "severity": "high", + "message": f"Unknown memory_type: {memory_type or 'missing'}.", + "suggested_action": f"Use one of: {', '.join(MEMORY_TYPES)}.", + }) + if scope not in MEMORY_SCOPES: + issues.append({ + "code": "invalid_scope", + "severity": "high", + "message": f"Unknown scope: {scope or 'missing'}.", + "suggested_action": f"Use one of: {', '.join(MEMORY_SCOPES)}.", + }) + if not str(record.get("source") or "").strip(): + issues.append({ + "code": "missing_source", + "severity": "medium", + "message": "Memory has no source metadata.", + "suggested_action": "Add source metadata so future agents know why this memory exists.", + }) + if not str(record.get("date_captured") or "").strip(): + issues.append({ + "code": "missing_date_captured", + "severity": "medium", + "message": "Memory has no date_captured metadata.", + "suggested_action": "Add the capture timestamp or recreate the memory.", + }) + if not (str(record.get("tldr") or "").strip() or str(record.get("snippet") or "").strip()): + issues.append({ + "code": "missing_summary", + "severity": "medium", + "message": "Memory has no usable summary.", + "suggested_action": "Add a TLDR line or a clear first paragraph.", + }) + return issues + + +def _tool_name(command: str) -> str: + return command.replace("-", "_") + + +def _cli_command(command: str) -> str: + return command.replace("_", "-") + + +def _memory_action( + *, + kind: str, + label: str, + description: str, + command: str, + tool: str, + arguments: Mapping[str, object], + priority: str, +) -> dict[str, object]: + return { + "kind": kind, + "label": label, + "description": description, + "command": command, + "tool": tool, + "arguments": dict(arguments), + "priority": priority, + } + + +def memory_action_hints( + record: Mapping[str, object], + issues: Iterable[Mapping[str, str]] | None = None, + review_command: str = "review-memory", +) -> list[dict[str, object]]: + """Return ordered actions for resolving or auditing one memory.""" + name = str(record.get("name") or "") + path = str(record.get("path") or f"wiki/memories/{name}.md") + status = str(record.get("status") or "active").lower() + issue_list = [dict(issue) for issue in issues] if issues is not None else memory_review_issues(record, review_command) + issue_codes = {str(issue.get("code") or "") for issue in issue_list} + review_cli = _cli_command(review_command) + review_tool = _tool_name(review_command) + actions: list[dict[str, object]] = [] + seen: set[str] = set() + + def add(action: dict[str, object]) -> None: + kind = str(action["kind"]) + if kind in seen: + return + actions.append(action) + seen.add(kind) + + if status == "archived": + add(_memory_action( + kind="restore", + label="Restore", + description="Restore this archived memory to active recall if it is valid again.", + command=f'python3 link.py restore-memory "{name}" .', + tool="restore_memory", + arguments={"identifier": name}, + priority="high", + )) + add(_memory_action( + kind="explain", + label="Explain", + description="Inspect why this memory exists before restoring it.", + command=f'python3 link.py explain-memory "{name}" .', + tool="explain_memory", + arguments={"identifier": name}, + priority="medium", + )) + add(_memory_action( + kind="forget", + label="Forget", + description="Permanently delete only after explicit user confirmation.", + command=f'python3 link.py forget-memory "{name}" . --confirm', + tool="forget_memory", + arguments={"identifier": name, "confirm": True}, + priority="low", + )) + return actions + + if issue_codes & {"invalid_review_status", "invalid_memory_type", "invalid_scope", "missing_source", "missing_date_captured"}: + add(_memory_action( + kind="edit_metadata", + label="Edit metadata", + description="Fix the Markdown frontmatter, then run review again.", + command=f'$EDITOR "{path}"', + tool="edit_memory_file", + arguments={"path": path}, + priority="high", + )) + if issue_codes & {"needs_update", "missing_summary"}: + add(_memory_action( + kind="update", + label="Update", + description="Merge corrected memory text and reset review to pending.", + command=f'python3 link.py update-memory "{name}" "new detail" .', + tool="update_memory", + arguments={"identifier": name, "memory": "new detail"}, + priority="high", + )) + if "stale_status" in issue_codes: + add(_memory_action( + kind="archive", + label="Archive", + description="Archive this stale memory so default recall ignores it.", + command=f'python3 link.py archive-memory "{name}" . --reason "stale"', + tool="archive_memory", + arguments={"identifier": name, "reason": "stale"}, + priority="high", + )) + if "pending_review" in issue_codes and not any( + issue.get("severity") == "high" for issue in issue_list + ): + add(_memory_action( + kind="review", + label="Review", + description="Mark this memory reviewed after the user confirms it is accurate.", + command=f'python3 link.py {review_cli} "{name}" .', + tool=review_tool, + arguments={"identifier": name}, + priority="high", + )) + + add(_memory_action( + kind="explain", + label="Explain", + description="Audit provenance, graph links, lifecycle, and review state.", + command=f'python3 link.py explain-memory "{name}" .', + tool="explain_memory", + arguments={"identifier": name}, + priority="medium", + )) + if "update" not in seen: + add(_memory_action( + kind="update", + label="Update", + description="Merge a corrected detail into this memory.", + command=f'python3 link.py update-memory "{name}" "new detail" .', + tool="update_memory", + arguments={"identifier": name, "memory": "new detail"}, + priority="medium", + )) + if "archive" not in seen: + add(_memory_action( + kind="archive", + label="Archive", + description="Hide this memory from default recall without deleting the Markdown file.", + command=f'python3 link.py archive-memory "{name}" . --reason "why"', + tool="archive_memory", + arguments={"identifier": name, "reason": "why"}, + priority="medium", + )) + add(_memory_action( + kind="forget", + label="Forget", + description="Permanently delete only after explicit user confirmation.", + command=f'python3 link.py forget-memory "{name}" . --confirm', + tool="forget_memory", + arguments={"identifier": name, "confirm": True}, + priority="low", + )) + return actions + + +def primary_memory_action(actions: Iterable[Mapping[str, object]]) -> dict[str, object] | None: + action_list = [dict(action) for action in actions] + if not action_list: + return None + for action in action_list: + if str(action.get("priority") or "") == "high": + return action + return action_list[0] + + +def memory_log_entries( + wiki_dir: Path, + record: Mapping[str, object], + limit: int = 8, +) -> list[str]: + try: + parsed_limit = int(limit) + except (TypeError, ValueError): + parsed_limit = 8 + limit = max(1, min(parsed_limit, 50)) + log_path = wiki_dir / "log.md" + if not log_path.exists(): + return [] + text = log_path.read_text(encoding="utf-8", errors="replace") + name = str(record.get("name") or "") + needles = {name, str(record.get("title") or "")} + if name: + needles.add(f"memories/{name}.md") + needles = {needle.lower() for needle in needles if needle} + blocks = [block.strip() for block in re.split(r"\n---\n", text) if block.strip()] + matches = [ + block for block in blocks + if any(needle in block.lower() for needle in needles) + ] + return matches[-limit:] + + +def extract_wikilinks(text: str) -> list[str]: + links: list[str] = [] + for match in WIKILINK_RE.finditer(text): + target = match.group(1).strip() + if target and target not in links: + links.append(target) + return links + + +def recall_state( + record: Mapping[str, object], + issues: list[Mapping[str, str]], +) -> dict[str, object]: + default_enabled = is_active_memory(record) + high_issues = [issue for issue in issues if str(issue.get("severity") or "") == "high"] + if not default_enabled: + state = "disabled" + reason = f"Memory status is {record.get('status')}; default recall excludes archived and stale memories." + elif high_issues: + state = "unsafe" + reason = "Memory is active but has high-severity quality issues." + elif issues: + state = "needs_review" + reason = "Memory is active but still needs review or stronger metadata." + else: + state = "ready" + reason = "Memory is active, reviewed, and has no detected quality issues." + return { + "default_enabled": default_enabled, + "state": state, + "reason": reason, + } + + +def memory_explanation( + wiki_dir: Path, + identifier: str, + records: Iterable[Mapping[str, object]] | None = None, + review_command: str = "review-memory", + backlinks_body_only: bool = True, +) -> dict[str, object]: + record_list = [dict(record) for record in records] if records is not None else memory_records(wiki_dir) + page_path, resolved_record, error = resolve_memory_page(wiki_dir, identifier, records=record_list) + if error: + raise ValueError(error) + assert page_path is not None and resolved_record is not None + + record = next( + ( + item for item in record_list + if str(item.get("name") or "") == str(resolved_record.get("name") or "") + ), + dict(resolved_record), + ) + text = page_path.read_text(encoding="utf-8", errors="replace") + _, body = parse_frontmatter(text) + issues = memory_review_issues(record, review_command=review_command) + actions = memory_action_hints(record, issues=issues, review_command=review_command) + backlinks, backlinks_error = load_backlinks_index(wiki_dir / "_backlinks.json") + if backlinks_error: + backlinks = build_backlinks(wiki_dir, body_only=backlinks_body_only) + name = str(record["name"]) + graph = { + "forward": sorted(backlinks.get("forward", {}).get(name, [])), + "inbound": sorted(backlinks.get("backlinks", {}).get(name, [])), + "wikilinks": extract_wikilinks(body), + } + return { + "found": True, + "memory": slim_memory(record), + "recall": recall_state(record, issues), + "review": { + "status": record.get("review_status", "pending"), + "reviewed_at": record.get("reviewed_at", ""), + "review_note": record.get("review_note", ""), + "issues": issues, + "issue_count": len(issues), + "actions": actions, + "primary_action": primary_memory_action(actions), + }, + "provenance": { + "source": record.get("source", ""), + "date_captured": record.get("date_captured", ""), + "path": record.get("path", ""), + }, + "lifecycle": { + "status": record.get("status", "active"), + "archived_at": record.get("archived_at", ""), + "archive_reason": record.get("archive_reason", ""), + "restored_at": record.get("restored_at", ""), + }, + "graph": graph, + "log_entries": memory_log_entries(wiki_dir, record), + "body": body, + } + + +def resolve_memory_page( + wiki_dir: Path, + identifier: str, + records: Iterable[Mapping[str, object]] | None = None, + max_identifier_len: int | None = None, +) -> tuple[Path | None, dict[str, object] | None, str | None]: + needle = str(identifier or "").strip() + if max_identifier_len is not None: + needle = needle[:max_identifier_len] + if not needle: + return None, None, "memory name or title is required" + + memories_dir = wiki_dir / "memories" + direct_candidates: list[Path] = [] + raw_path = Path(needle) + if raw_path.suffix == ".md" or "/" in needle: + rel = Path(needle.removeprefix("wiki/")) + direct_candidates.append((wiki_dir / rel).resolve()) + direct_candidates.append((memories_dir / raw_path.name).resolve()) + else: + direct_candidates.append((memories_dir / f"{needle}.md").resolve()) + direct_candidates.append((memories_dir / f"{slugify(needle)}.md").resolve()) + + record_list = [dict(record) for record in records] if records is not None else None + memories_root = memories_dir.resolve() + for candidate in direct_candidates: + try: + candidate.relative_to(memories_root) + except ValueError: + continue + if candidate.exists() and candidate.is_file(): + if record_list is None: + return candidate, memory_record_from_page(wiki_dir, candidate), None + record = next( + (record for record in record_list if str(record.get("name") or "") == candidate.stem), + None, + ) + return candidate, dict(record) if record else None, None + + lowered = needle.lower() + slug = slugify(needle) + if record_list is None: + record_list = memory_records(wiki_dir) + matches = [ + dict(record) for record in record_list + if lowered in {str(record.get("name") or "").lower(), str(record.get("title") or "").lower()} + or slug == str(record.get("name") or "").lower() + ] + if len(matches) > 1: + names = ", ".join(str(record.get("name") or "") for record in matches[:5]) + return None, None, f"memory identifier is ambiguous: {names}" + if not matches: + return None, None, f"memory not found: {identifier}" + record = matches[0] + return wiki_dir / str(record["path"]).removeprefix("wiki/"), record, None + + +def unique_page_path(directory: Path, slug: str) -> Path: + candidate = directory / f"{slug}.md" + index = 2 + while candidate.exists(): + candidate = directory / f"{slug}-{index}.md" + index += 1 + return candidate + + +def write_default_index(index_path: Path) -> None: + atomic_write_text( + index_path, + "# Link Wiki Index\n\n" + "> Last updated: not yet ingested | 0 pages | 0 sources\n\n" + "## Categories\n\n" + "## Recent\n\n" + "| Date | Operation | Pages Touched |\n" + "|------|-----------|---------------|\n", + ) + + +def update_memory_index( + index_path: Path, + page_name: str, + title: str, + summary: str, + memory_type: str, + scope: str, +) -> None: + if not index_path.exists(): + write_default_index(index_path) + text = index_path.read_text(encoding="utf-8", errors="replace") + if f"[[{page_name}]]" in text: + return + entry = f"- [[{page_name}]] - {summary} {memory_type} · {scope}\n" + if "### memories" in text: + pattern = re.compile(r"(### memories\n)(.*?)(?=\n### |\n## Recent|\Z)", flags=re.DOTALL) + text = pattern.sub(lambda m: m.group(1) + m.group(2).rstrip() + "\n" + entry, text, count=1) + elif "\n## Recent" in text: + text = text.replace("\n## Recent", f"\n### memories\n{entry}\n## Recent", 1) + else: + text = text.rstrip() + f"\n\n### memories\n{entry}" + atomic_write_text(index_path, text) + + +def remove_memory_from_index(index_path: Path, page_name: str) -> bool: + if not index_path.exists(): + return False + text = index_path.read_text(encoding="utf-8", errors="replace") + lines = text.splitlines() + filtered = [line for line in lines if f"[[{page_name}]]" not in line] + if len(filtered) == len(lines): + return False + atomic_write_text(index_path, "\n".join(filtered).rstrip() + "\n") + return True + + +def replace_markdown_body(text: str, body: str) -> str: + if text.startswith("---\n"): + end = text.find("\n---", 4) + if end != -1: + return text[:end + 4] + "\n\n" + body.strip() + "\n" + return body.strip() + "\n" + + +def append_memory_update(body: str, update_text: str, timestamp: str, source: str) -> str: + source_label = source.strip() or "manual" + update_block = f"Update ({timestamp}, {source_label}):\n\n{update_text.strip()}" + pattern = re.compile(r"(## Memory\n)(.*?)(?=\n## |\Z)", flags=re.DOTALL) + match = pattern.search(body) + if not match: + return body.rstrip() + f"\n\n## Memory\n\n{update_block}\n" + existing = match.group(2).rstrip() + merged = (existing + "\n\n" if existing else "") + update_block + "\n\n" + return body[:match.start(2)] + merged + body[match.end(2):] + + +def set_memory_status( + wiki_dir: Path, + identifier: str, + status: str, + reason: str | None, + timestamp: str, + records: Iterable[Mapping[str, object]] | None = None, + log_writer: MemoryLogWriter | None = None, +) -> dict[str, object]: + page_path, record, error = resolve_memory_page(wiki_dir, identifier, records=records) + if error: + raise ValueError(error) + assert page_path is not None and record is not None + + current_status = str(record.get("status") or "active") + clean_reason = reason.strip() if reason else "" + if status == "archived": + updates = { + "status": "archived", + "archived_at": f'"{timestamp}"', + } + if clean_reason: + updates["archive_reason"] = f'"{frontmatter_string(clean_reason)}"' + remove = {"restored_at"} + operation = "archive-memory" + elif status == "active": + updates = { + "status": "active", + "restored_at": f'"{timestamp}"', + } + remove = {"archived_at", "archive_reason"} + operation = "restore-memory" + else: + raise ValueError("unsupported memory status") + + changed = current_status != status + if changed: + text = page_path.read_text(encoding="utf-8", errors="replace") + atomic_write_text(page_path, update_frontmatter_fields(text, updates, remove=remove)) + if log_writer: + log_lines = [ + f"Updated: memories/{page_path.name}", + f"Previous status: {current_status}", + f"New status: {status}", + ] + if clean_reason: + log_lines.append(f"Reason: {clean_reason}") + log_writer(timestamp, operation, str(record["title"]), log_lines) + + return { + "updated": changed, + "name": record["name"], + "path": record["path"], + "title": record["title"], + "previous_status": current_status, + "status": status, + } + + +def forget_memory_page( + wiki_dir: Path, + identifier: str, + confirm: bool = False, + records: Iterable[Mapping[str, object]] | None = None, + log_writer: MemoryLogWriter | None = None, + timestamp: str = "", + rebuild_backlinks: Callable[[], bool] | None = None, +) -> dict[str, object]: + page_path, record, error = resolve_memory_page(wiki_dir, identifier, records=records) + if error: + return { + "forgotten": False, + "found": False, + "error": error, + "confirmation_required": False, + } + assert page_path is not None and record is not None + + payload: dict[str, object] = { + "forgotten": False, + "found": True, + "name": record["name"], + "path": record["path"], + "title": record["title"], + "confirmation_required": not confirm, + } + if not confirm: + return payload + + page_path.unlink() + index_updated = remove_memory_from_index(wiki_dir / "index.md", page_path.stem) + backlinks_rebuilt = rebuild_backlinks() if rebuild_backlinks else False + payload.update({ + "forgotten": True, + "confirmation_required": False, + "index_updated": index_updated, + "backlinks_rebuilt": bool(backlinks_rebuilt), + }) + if log_writer: + log_writer( + timestamp, + "forget-memory", + f"Forgot memory {payload['path']}", + [ + f"Title: {payload['title']}", + "Deleted memory page only; memory body was not logged.", + ], + ) + return payload + + +def mark_memory_reviewed( + wiki_dir: Path, + identifier: str, + note: str | None, + timestamp: str, + records: Iterable[Mapping[str, object]] | None = None, + review_command: str = "review-memory", + log_writer: MemoryLogWriter | None = None, +) -> dict[str, object]: + page_path, record, error = resolve_memory_page(wiki_dir, identifier, records=records) + if error: + raise ValueError(error) + assert page_path is not None and record is not None + + previous_review_status = str(record.get("review_status") or "pending") + clean_note = note.strip() if note else "" + updates = { + "review_status": "reviewed", + "reviewed_at": f'"{timestamp}"', + } + if clean_note: + updates["review_note"] = f'"{frontmatter_string(clean_note)}"' + changed = previous_review_status != "reviewed" or bool(clean_note) + if changed: + text = page_path.read_text(encoding="utf-8", errors="replace") + atomic_write_text(page_path, update_frontmatter_fields(text, updates)) + if log_writer: + log_lines = [ + f"Reviewed: memories/{page_path.name}", + f"Previous review status: {previous_review_status}", + "New review status: reviewed", + ] + if clean_note: + log_lines.append(f"Note: {clean_note}") + log_writer(timestamp, "review-memory", str(record["title"]), log_lines) + + _, updated_record, _ = resolve_memory_page(wiki_dir, str(record["name"])) + updated_record = updated_record or record + issues = memory_review_issues(updated_record, review_command=review_command) + return { + "updated": changed, + "name": record["name"], + "path": record["path"], + "title": record["title"], + "previous_review_status": previous_review_status, + "review_status": "reviewed", + "remaining_issue_count": len(issues), + "remaining_issues": issues, + } + + +def update_memory_page( + wiki_dir: Path, + identifier: str, + text: str, + source: str, + timestamp: str, + records: Iterable[Mapping[str, object]] | None = None, + review_command: str = "review-memory", + allow_conflict: bool = False, + project: str | None = None, + log_writer: MemoryLogWriter | None = None, + rebuild_backlinks: BacklinkRebuilder | None = None, +) -> dict[str, object]: + clean_text = text.strip() + if not clean_text: + raise ValueError("memory update text required") + clean_source = source.strip() if source else "manual" + record_list = [dict(item) for item in records] if records is not None else memory_records(wiki_dir) + page_path, record, error = resolve_memory_page(wiki_dir, identifier, records=record_list) + if error: + raise ValueError(error) + assert page_path is not None and record is not None + if not is_active_memory(record): + raise ValueError("cannot update archived or stale memory; restore it first") + conflict_candidates = memory_conflict_candidates( + record_list, + clean_text, + str(record.get("title") or ""), + str(record.get("memory_type") or "note"), + str(record.get("scope") or "user"), + project=project or str(record.get("project") or ""), + exclude_names=[str(record.get("name") or "")], + ) + if conflict_candidates and not allow_conflict: + return { + "updated": False, + "conflict": True, + "message": "This update may conflict with another active memory. Explain, update, or archive the conflicting memory first, or pass allow_conflict if both should coexist.", + "name": record["name"], + "path": record["path"], + "title": record["title"], + "project": record.get("project", ""), + "conflict_candidates": conflict_candidates, + } + + previous_review_status = str(record.get("review_status") or "pending") + previous_update_count = frontmatter_int(record.get("update_count")) + next_update_count = previous_update_count + 1 + original = page_path.read_text(encoding="utf-8", errors="replace") + _, body = parse_frontmatter(original) + updated_body = append_memory_update(body, clean_text, timestamp, clean_source) + updates = { + "updated_at": f'"{timestamp}"', + "update_count": str(next_update_count), + "last_update_source": f'"{frontmatter_string(clean_source)}"', + "review_status": "pending", + } + updated_text = update_frontmatter_fields(original, updates, remove={"reviewed_at", "review_note"}) + atomic_write_text(page_path, replace_markdown_body(updated_text, updated_body)) + if log_writer: + log_writer( + timestamp, + "update-memory", + str(record["title"]), + [ + f"Updated: memories/{page_path.name}", + f"Previous review status: {previous_review_status}", + "New review status: pending", + f"Update count: {next_update_count}", + f"Source: {clean_source}", + ], + ) + backlinks_rebuilt = rebuild_backlinks() if rebuild_backlinks else False + + _, updated_record, _ = resolve_memory_page(wiki_dir, str(record["name"])) + updated_record = updated_record or record + issues = memory_review_issues(updated_record, review_command=review_command) + return { + "updated": True, + "name": updated_record["name"], + "path": updated_record["path"], + "title": updated_record["title"], + "project": updated_record.get("project", ""), + "previous_review_status": previous_review_status, + "review_status": updated_record.get("review_status", "pending"), + "updated_at": timestamp, + "update_count": next_update_count, + "source": clean_source, + "remaining_issue_count": len(issues), + "remaining_issues": issues, + "backlinks_rebuilt": bool(backlinks_rebuilt), + "conflict_override": bool(conflict_candidates and allow_conflict), + "conflict_candidates": conflict_candidates, + } + + +def write_memory_page( + wiki_dir: Path, + text: str, + title: str | None, + memory_type: str, + scope: str, + tags: str | None, + source: str, + timestamp: str, + project: str | None = None, + records: Iterable[Mapping[str, object]] | None = None, + allow_duplicate: bool = False, + allow_conflict: bool = False, + log_writer: MemoryLogWriter | None = None, + rebuild_backlinks: BacklinkRebuilder | None = None, +) -> dict[str, object]: + if memory_type not in MEMORY_TYPES: + raise ValueError(f"memory_type must be one of: {', '.join(MEMORY_TYPES)}") + if scope not in MEMORY_SCOPES: + raise ValueError(f"scope must be one of: {', '.join(MEMORY_SCOPES)}") + + clean_text = text.strip() + if not clean_text: + raise ValueError("memory text required") + clean_source = source.strip() if source is not None else "" + clean_project = normalize_project(project) if scope == "project" else "" + memory_title_value = memory_title(clean_text, title) + summary = clean_text.splitlines()[0].strip() + if len(summary) > 180: + summary = summary[:177].rstrip() + "..." + record_list = [dict(record) for record in records] if records is not None else memory_records(wiki_dir) + duplicate_candidates = memory_duplicate_candidates( + record_list, + clean_text, + title, + memory_type, + scope, + project=clean_project, + ) + if duplicate_candidates and not allow_duplicate: + return { + "created": False, + "duplicate": True, + "message": "Similar active memory already exists. Review or update the existing memory, or pass allow_duplicate if this is intentional.", + "title": memory_title_value, + "memory_type": memory_type, + "scope": scope, + "project": clean_project, + "candidates": duplicate_candidates, + } + conflict_candidates = memory_conflict_candidates( + record_list, + clean_text, + title, + memory_type, + scope, + project=clean_project, + ) + if conflict_candidates and not allow_conflict: + return { + "created": False, + "conflict": True, + "message": "This memory may conflict with an active memory. Review or update the existing memory, archive stale memory, or pass allow_conflict if both should coexist.", + "title": memory_title_value, + "memory_type": memory_type, + "scope": scope, + "project": clean_project, + "conflict_candidates": conflict_candidates, + } + + memories_dir = wiki_dir / "memories" + memories_dir.mkdir(parents=True, exist_ok=True) + page_path = unique_page_path(memories_dir, slugify(memory_title_value)) + page_name = page_path.stem + tag_values = ["memory", memory_type] + for tag in csv_values(tags): + slug_tag = slugify(tag, fallback="") + if slug_tag and slug_tag not in tag_values: + tag_values.append(slug_tag) + project_line = f'project: "{frontmatter_string(clean_project)}"\n' if clean_project else "" + + page = f"""--- +type: memory +title: "{frontmatter_string(memory_title_value)}" +memory_type: {memory_type} +scope: {scope} +{project_line}status: active +date_captured: "{timestamp}" +source: "{frontmatter_string(clean_source)}" +review_status: pending +tags: {yaml_list(tag_values)} +--- + +# {memory_title_value} + +> **TLDR:** {summary} + +## Memory + +{clean_text} + +## Use This When + +- An agent needs relevant {scope} context for future work. +- A future answer depends on this {memory_type}. + +## Source + +{clean_source} +""" + atomic_write_text(page_path, page) + update_memory_index(wiki_dir / "index.md", page_name, memory_title_value, summary, memory_type, scope) + if log_writer: + log_writer( + timestamp, + "remember", + memory_title_value, + [ + f"Created: memories/{page_path.name}", + f"Type: {memory_type}", + f"Scope: {scope}", + ], + ) + backlinks_rebuilt = rebuild_backlinks() if rebuild_backlinks else False + return { + "created": True, + "name": page_name, + "path": f"wiki/memories/{page_path.name}", + "title": memory_title_value, + "memory_type": memory_type, + "scope": scope, + "project": clean_project, + "backlinks_rebuilt": bool(backlinks_rebuilt), + "duplicate_override": bool(duplicate_candidates and allow_duplicate), + "duplicate_candidates": duplicate_candidates, + "conflict_override": bool(conflict_candidates and allow_conflict), + "conflict_candidates": conflict_candidates, + } + + +def memory_inbox( + records: Iterable[Mapping[str, object]], + limit: int = 20, + include_archived: bool = False, + review_command: str = "review-memory", + project: str | None = None, +) -> dict[str, object]: + limit = max(1, min(limit, 50)) + project_name = normalize_project(project) + severity_rank = {"high": 0, "medium": 1, "low": 2} + items: list[dict[str, object]] = [] + for record in records: + if not memory_visible_for_project(record, project_name): + continue + if not include_archived and str(record.get("status") or "").lower() == "archived": + continue + issues = memory_review_issues(record, review_command=review_command) + if not issues: + continue + item = slim_memory(record) + item["issues"] = issues + item["issue_count"] = len(issues) + item["actions"] = memory_action_hints(record, issues=issues, review_command=review_command) + item["primary_action"] = primary_memory_action(item["actions"]) + item["highest_severity"] = min( + (issue["severity"] for issue in issues), + key=lambda severity: severity_rank.get(severity, 9), + ) + items.append(item) + items.sort(key=lambda item: ( + severity_rank.get(str(item["highest_severity"]), 9), + -int(item["issue_count"]), + str(item.get("date_captured") or ""), + str(item.get("title") or "").lower(), + )) + counts_by_severity: dict[str, int] = {} + for item in items: + severity = str(item["highest_severity"]) + counts_by_severity[severity] = counts_by_severity.get(severity, 0) + 1 + return { + "review_count": len(items), + "counts_by_severity": counts_by_severity, + "include_archived": include_archived, + "project": project_name, + "next_actions": [ + item["primary_action"] + for item in items[:limit] + if item.get("primary_action") + ], + "items": items[:limit], + } + + +def count_values(records: Iterable[Mapping[str, object]], field: str) -> dict[str, int]: + counts: dict[str, int] = {} + for record in records: + value = str(record.get(field) or "unknown") + counts[value] = counts.get(value, 0) + 1 + return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0]))) + + +def top_tags(records: Iterable[Mapping[str, object]], limit: int = 12) -> list[dict[str, object]]: + counts: dict[str, int] = {} + skip = {"memory", *MEMORY_TYPES} + for record in records: + for tag in record.get("tags", []): + tag_text = str(tag).strip() + if not tag_text or tag_text in skip: + continue + counts[tag_text] = counts.get(tag_text, 0) + 1 + return [ + {"tag": tag, "count": count} + for tag, count in sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:limit] + ] + + +def recent_memories(records: Iterable[Mapping[str, object]]) -> list[dict[str, object]]: + return sorted( + (dict(record) for record in records), + key=lambda record: ( + str(record.get("date_captured") or ""), + str(record.get("title") or "").lower(), + ), + reverse=True, + ) + + +def memory_profile( + records: Iterable[Mapping[str, object]], + limit: int = 10, + review_command: str = "review-memory", + project: str | None = None, +) -> dict[str, object]: + limit = max(1, min(limit, 50)) + project_name = normalize_project(project) + record_list = [ + dict(record) + for record in records + if memory_visible_for_project(record, project_name) + ] + active_records = [record for record in record_list if is_active_memory(record)] + archived_records = [ + record for record in record_list + if str(record.get("status") or "").lower() == "archived" + ] + recent = [slim_memory(record) for record in recent_memories(active_records)] + + def typed(memory_type: str) -> list[dict[str, object]]: + return [ + slim_memory(record) + for record in recent_memories(active_records) + if str(record.get("memory_type") or "") == memory_type + ][:limit] + + return { + "memory_count": len(record_list), + "active_count": len(active_records), + "review_count": memory_inbox(record_list, limit=limit, review_command=review_command)["review_count"], + "project": project_name, + "by_type": count_values(record_list, "memory_type"), + "by_scope": count_values(record_list, "scope"), + "by_project": count_values( + [ + record + for record in record_list + if str(record.get("scope") or "") == "project" + and normalize_project(str(record.get("project") or "")) + ], + "project", + ), + "by_status": count_values(record_list, "status"), + "top_tags": top_tags(record_list), + "recent": recent[:limit], + "preferences": typed("preference"), + "decisions": typed("decision"), + "projects": typed("project"), + "archived": [slim_memory(record) for record in recent_memories(archived_records)][:limit], + } + + +def memory_audit_report( + profile: Mapping[str, object], + inbox: Mapping[str, object], + captures: Mapping[str, object], + next_actions: Iterable[Mapping[str, object]], + project: str | None = None, +) -> dict[str, object]: + """Build the shared memory/capture risk report for CLI, HTTP, and MCP.""" + project_name = normalize_project(project) + review_count = int(inbox.get("review_count") or 0) + capture_count = int(captures.get("count") or 0) + capture_warning_count = int(captures.get("warning_count") or 0) + capture_read_warning_count = int(captures.get("read_warning_count") or 0) + risk_factors: list[dict[str, object]] = [] + if review_count: + risk_factors.append({ + "code": "memory_review_backlog", + "count": review_count, + "message": f"{review_count} memory item(s) need review or cleanup.", + }) + if capture_count: + risk_factors.append({ + "code": "raw_capture_backlog", + "count": capture_count, + "message": f"{capture_count} raw capture(s) are waiting for review.", + }) + if capture_warning_count: + risk_factors.append({ + "code": "capture_secret_warnings", + "count": capture_warning_count, + "message": f"{capture_warning_count} raw capture(s) contain secret-looking values.", + }) + if capture_read_warning_count: + risk_factors.append({ + "code": "capture_read_warnings", + "count": capture_read_warning_count, + "message": f"{capture_read_warning_count} raw capture(s) could not be read.", + }) + return { + "status": "needs_attention" if risk_factors else "healthy", + "project": project_name, + "profile": dict(profile), + "inbox": dict(inbox), + "captures": dict(captures), + "risk_factors": risk_factors, + "next_actions": [dict(action) for action in next_actions], + } + + +def add_capture_review_to_brief( + payload: Mapping[str, object], + captures: Mapping[str, object], +) -> dict[str, object]: + """Attach raw-capture review state and guidance to a memory brief.""" + result = dict(payload) + capture_payload = dict(captures) + guidance = [str(item) for item in result.get("agent_guidance", [])] + result["captures"] = capture_payload + capture_count = int(capture_payload.get("count") or 0) + warning_count = int(capture_payload.get("warning_count") or 0) + read_warning_count = int(capture_payload.get("read_warning_count") or 0) + if capture_count: + plural = "s" if capture_count != 1 else "" + guidance.append( + f"Review {capture_count} saved raw capture{plural} before accepting or deleting capture state." + ) + if warning_count: + guidance.append("Redact raw captures with secret warnings before sharing snippets or using their contents.") + if read_warning_count: + guidance.append("Fix unreadable raw captures before deciding whether capture memory should be accepted or deleted.") + result["agent_guidance"] = guidance + return result + + +def memory_brief( + records: Iterable[Mapping[str, object]], + query: str = "", + limit: int = 6, + review_command: str = "review-memory", + project: str | None = None, +) -> dict[str, object]: + """Return the compact memory payload an agent should read before work.""" + limit = max(1, min(limit, 20)) + q = query.strip() + project_name = normalize_project(project) + record_list = [ + dict(record) + for record in records + if memory_visible_for_project(record, project_name) + ] + profile = memory_profile(record_list, limit=limit, review_command=review_command, project=project_name) + inbox = memory_inbox(record_list, limit=limit, review_command=review_command) + + if q: + relevant = recall_memories(record_list, q, limit=limit, project=project_name) + selection = "query" + else: + relevant = [] + seen: set[str] = set() + for memory_type in ("preference", "decision", "project"): + for record in recent_memories(record_list): + name = str(record.get("name") or "") + if name in seen: + continue + if not is_active_memory(record): + continue + if str(record.get("memory_type") or "") != memory_type: + continue + relevant.append(slim_memory(record)) + seen.add(name) + if len(relevant) >= limit: + break + if len(relevant) >= limit: + break + if len(relevant) < limit: + for record in recent_memories(record_list): + name = str(record.get("name") or "") + if name in seen or not is_active_memory(record): + continue + relevant.append(slim_memory(record)) + seen.add(name) + if len(relevant) >= limit: + break + selection = "startup" + + guidance = [ + "Use relevant_memories as durable local context before answering or coding.", + "Call explain_memory before relying on a surprising, stale, or high-impact memory.", + "Only write memory after explicit user approval; use propose_memories for candidates first.", + "If a new memory duplicates an existing one, update the existing memory instead of creating another page.", + ] + if inbox["review_count"]: + guidance.insert( + 1, + "Some memories need review; treat them as provisional when they affect an important decision.", + ) + + return { + "query": q, + "project": project_name, + "selection": selection, + "profile": profile, + "relevant_count": len(relevant), + "relevant_memories": relevant, + "review": { + "count": inbox["review_count"], + "counts_by_severity": inbox["counts_by_severity"], + "items": inbox["items"], + }, + "agent_guidance": guidance, + } + + +def score_memory(record: Mapping[str, object], query: str) -> int: + q = query.lower().strip() + tokens = [token for token in re.split(r"\W+", q) if len(token) >= 3] + title = str(record.get("title", "")).lower() + tldr = str(record.get("tldr", "")).lower() + body = str(record.get("body", "")).lower() + tags = " ".join(str(tag).lower() for tag in record.get("tags", [])) + score = 0 + if q and q in title: + score += 20 + if q and q in tldr: + score += 12 + if q and q in tags: + score += 8 + if q and q in body: + score += 4 + for token in tokens: + if token in title: + score += 6 + if token in tldr: + score += 4 + if token in tags: + score += 3 + if token in body: + score += 1 + return score + + +def memory_rank_score(record: Mapping[str, object], match_score: int, project: str | None = None) -> int: + rank_score = match_score + project_name = normalize_project(project) + record_scope = str(record.get("scope") or "").lower() + record_project = normalize_project(str(record.get("project") or "")) + if project_name and record_scope == "project" and record_project == project_name: + rank_score += 6 + if str(record.get("review_status") or "").lower() == "reviewed": + rank_score += 3 + if str(record.get("review_status") or "").lower() == "needs_update": + rank_score -= 3 + if not is_active_memory(record): + rank_score -= 10 + return max(1, rank_score) + + +def recall_memories( + records: Iterable[Mapping[str, object]], + query: str, + limit: int = 10, + include_archived: bool = False, + project: str | None = None, +) -> list[dict[str, object]]: + q = query.strip() + if not q: + return [] + project_name = normalize_project(project) + scored: list[tuple[int, int, str, dict[str, object]]] = [] + severity_rank = {"high": 0, "medium": 1, "low": 2} + for record in records: + if not memory_visible_for_project(record, project_name): + continue + if not include_archived and not is_active_memory(record): + continue + score = score_memory(record, q) + if score > 0: + rank_score = memory_rank_score(record, score, project=project_name) + issues = memory_review_issues(record) + slim = slim_memory(record) + slim["score"] = score + slim["rank_score"] = rank_score + slim["recall"] = recall_state(record, issues) + slim["review_issue_count"] = len(issues) + slim["highest_review_severity"] = ( + "none" if not issues else + min( + (str(issue.get("severity") or "low") for issue in issues), + key=lambda severity: severity_rank.get(severity, 9), + ) + ) + recency = str(record.get("updated_at") or record.get("date_captured") or "") + scored.append((rank_score, score, recency, slim)) + scored.sort(key=lambda item: str(item[3]["title"]).lower()) + scored.sort(key=lambda item: item[2], reverse=True) + scored.sort(key=lambda item: (item[0], item[1]), reverse=True) + return [record for _, _, _, record in scored[:limit]] + + +def memory_duplicate_candidates( + records: Iterable[Mapping[str, object]], + text: str, + title: str | None, + memory_type: str, + scope: str, + project: str | None = None, + limit: int = 3, +) -> list[dict[str, object]]: + title_value = memory_title(text, title) + new_slug = slugify(title_value) + new_title = compact_memory_text(title_value) + new_body = compact_memory_text(text) + new_tokens = memory_tokens(f"{title_value} {text}") + project_name = normalize_project(project) + candidates: list[tuple[int, dict[str, object]]] = [] + + for record in records: + if not is_active_memory(record): + continue + if scope == "project" and not memory_visible_for_project(record, project_name): + continue + reasons: list[str] = [] + score = 0 + record_title = compact_memory_text(str(record.get("title") or "")) + record_text = compact_memory_text( + " ".join( + str(record.get(field) or "") + for field in ("title", "tldr", "snippet", "body") + ) + ) + record_tokens = memory_tokens(record_text) + + if str(record.get("name") or "") == new_slug: + score = max(score, 100) + reasons.append("same_slug") + if new_title and record_title == new_title: + score = max(score, 96) + reasons.append("same_title") + if len(new_body) >= 40 and new_body in record_text: + score = max(score, 94) + reasons.append("same_memory_text") + + overlap = sorted(new_tokens & record_tokens) + union = new_tokens | record_tokens + overlap_ratio = (len(overlap) / len(union)) if union else 0.0 + same_kind = ( + str(record.get("memory_type") or "") == memory_type + and str(record.get("scope") or "") == scope + ) + if same_kind and len(overlap) >= 5 and overlap_ratio >= 0.72: + score = max(score, min(92, int(70 + overlap_ratio * 25))) + reasons.append("high_token_overlap") + + if score < 85: + continue + candidate = slim_memory(record) + candidate["duplicate_score"] = min(score, 100) + candidate["duplicate_reasons"] = reasons + candidate["matching_terms"] = overlap[:12] + candidates.append((int(candidate["duplicate_score"]), candidate)) + + candidates.sort(key=lambda item: (-item[0], str(item[1]["title"]).lower())) + return [candidate for _, candidate in candidates[:limit]] + + +def memory_conflict_candidates( + records: Iterable[Mapping[str, object]], + text: str, + title: str | None, + memory_type: str, + scope: str, + project: str | None = None, + limit: int = 3, + exclude_names: Iterable[str] | None = None, +) -> list[dict[str, object]]: + """Find active memories that may contradict the proposed memory.""" + if memory_type not in MEMORY_CONFLICT_TYPES: + return [] + + title_value = memory_title(text, title) + new_text = f"{title_value} {text}" + new_all_tokens = memory_tokens(new_text) + new_tokens = significant_memory_tokens(new_text) + new_negated = has_negation(new_text) + new_groups = _extract_option_groups(new_text) + new_pairs = _extract_preference_pairs(new_text) + project_name = normalize_project(project) + excluded = {name for name in (exclude_names or []) if name} + candidates: list[tuple[int, dict[str, object]]] = [] + + for record in records: + name = str(record.get("name") or "") + if name in excluded or not is_active_memory(record): + continue + if scope == "project" and not memory_visible_for_project(record, project_name): + continue + record_type = str(record.get("memory_type") or "") + record_scope = str(record.get("scope") or "") + if record_type != memory_type: + continue + if scope != record_scope and "global" not in {scope, record_scope}: + continue + + record_text = " ".join( + str(record.get(field) or "") + for field in ("title", "tldr", "snippet", "body") + ) + record_all_tokens = memory_tokens(record_text) + record_tokens = significant_memory_tokens(record_text) + overlap = sorted(new_tokens & record_tokens) + union = new_tokens | record_tokens + overlap_ratio = (len(overlap) / len(union)) if union else 0.0 + reasons: list[str] = [] + score = 0 + + if new_negated != has_negation(record_text) and len(overlap) >= 1 and overlap_ratio >= 0.45: + score = max(score, 92) + reasons.append("opposite_negation") + + record_groups = _extract_option_groups(record_text) + for group, new_options in new_groups.items(): + record_options = record_groups.get(group) + if not record_options: + continue + if new_options == record_options: + continue + # Ambiguous memories that mention multiple options without a clear + # preference are left for review instead of automatic conflict. + if len(new_options) > 1 or len(record_options) > 1: + continue + context = CONFLICT_GROUP_CONTEXT.get(group, set()) + context_matches = ( + not context + or ( + bool(new_all_tokens & context) + and bool(record_all_tokens & context) + ) + ) + if len(overlap) >= 2 or context_matches: + score = max(score, 88) + reasons.append(f"different_{group}") + + record_pairs = _extract_preference_pairs(record_text) + for new_preferred, new_rejected in new_pairs: + for record_preferred, record_rejected in record_pairs: + if (new_preferred & record_rejected) and (new_rejected & record_preferred): + score = max(score, 97) + reasons.append("reversed_preference") + + if score < 85: + continue + candidate = slim_memory(record) + candidate["conflict_score"] = min(score, 100) + candidate["conflict_reasons"] = sorted(set(reasons)) + candidate["matching_terms"] = overlap[:12] + candidates.append((int(candidate["conflict_score"]), candidate)) + + candidates.sort(key=lambda item: (-item[0], str(item[1]["title"]).lower())) + return [candidate for _, candidate in candidates[:limit]] + + +def memory_proposal_segments(text: str) -> list[str]: + text = re.sub(r"```.*?```", " ", text, flags=re.DOTALL) + segments: list[str] = [] + for raw_line in text.replace("\r\n", "\n").replace("\r", "\n").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + line = re.sub(r"^\s*(?:[-*+]\s+|\d+[.)]\s+)", "", line).strip() + line = re.sub(r"^(?:user|human|me|assistant|codex|agent)\s*:\s*", "", line, flags=re.IGNORECASE) + if not line: + continue + for sentence in re.split(r"(?<=[.!?])\s+", line): + sentence = sentence.strip() + if 18 <= len(sentence) <= 500: + segments.append(sentence) + return segments + + +def normalize_proposed_memory(text: str, memory_type: str) -> str: + value = text.strip() + value = re.sub(r"^please remember(?: that)?\s+", "", value, flags=re.IGNORECASE) + replacements = [ + (r"^i prefer\b", "User prefers"), + (r"^i like\b", "User likes"), + (r"^i want\b", "User wants"), + (r"^i need\b", "User needs"), + (r"^i do not want\b", "User does not want"), + (r"^i don't want\b", "User does not want"), + (r"^i am\b", "User is"), + (r"^i work\b", "User works"), + (r"^my\b", "User's"), + (r"^we decided\b", "Project decided"), + (r"^we agreed\b", "Project agreed"), + (r"^we chose\b", "Project chose"), + (r"^we settled\b", "Project settled"), + ] + for pattern, replacement in replacements: + value = re.sub(pattern, replacement, value, count=1, flags=re.IGNORECASE) + if memory_type == "decision" and value.lower().startswith("decision:"): + value = value.split(":", 1)[1].strip() + value = "Project decided " + value[0].lower() + value[1:] if value else "Project decision" + if value and value[-1] not in ".!?": + value += "." + return value + + +def proposal_title(memory: str, memory_type: str) -> str: + title = memory.strip().rstrip(".") + title = re.sub(r"^(?:User|Project|Team)\s+", "", title, flags=re.IGNORECASE) + title = re.sub(r"^prefers\b", "Prefer", title, flags=re.IGNORECASE) + title = re.sub(r"^wants\b", "Want", title, flags=re.IGNORECASE) + title = re.sub(r"^needs\b", "Need", title, flags=re.IGNORECASE) + title = re.sub(r"^decided(?: to)?\b", "Decision:", title, flags=re.IGNORECASE) + title = re.sub(r"^agreed(?: to)?\b", "Decision:", title, flags=re.IGNORECASE) + title = re.sub(r"^chose\b", "Decision:", title, flags=re.IGNORECASE) + if memory_type == "project" and not title.lower().startswith("project"): + title = f"Project {title[0].lower()}{title[1:]}" if title else "Project memory" + if len(title) <= 70: + return title or "Memory proposal" + return title[:67].rstrip() + "..." + + +def _shell_words(*parts: object) -> str: + words = [str(part) for part in parts if str(part) != ""] + if not words: + return "" + if os.name == "nt": + return subprocess.list2cmdline(words) + return shlex.join(words) + + +def memory_proposal_action(proposal: Mapping[str, object]) -> dict[str, object]: + """Return the safest next action for a memory proposal.""" + memory = str(proposal.get("memory") or "") + title = str(proposal.get("title") or proposal_title(memory, str(proposal.get("memory_type") or "note"))) + memory_type = str(proposal.get("memory_type") or "note") + scope = str(proposal.get("scope") or "user") + source = str(proposal.get("source") or "proposal") + project = str(proposal.get("project") or "") + duplicate_candidates = proposal.get("duplicate_candidates") + conflict_candidates = proposal.get("conflict_candidates") + duplicate_list = duplicate_candidates if isinstance(duplicate_candidates, list) else [] + conflict_list = conflict_candidates if isinstance(conflict_candidates, list) else [] + + if duplicate_list: + first = duplicate_list[0] if isinstance(duplicate_list[0], Mapping) else {} + identifier = str(first.get("name") or first.get("title") or "") + command_parts: list[object] = ["python3", "link.py", "update-memory", identifier, memory, ".", "--source", source] + if project: + command_parts.extend(["--project", project]) + command = _shell_words(*command_parts) + args: dict[str, object] = {"identifier": identifier, "memory": memory, "source": source} + if project: + args["project"] = project + action = _memory_action( + kind="update", + label="Update existing memory", + description="A strong duplicate exists; update it instead of creating another memory.", + command=command, + tool="update_memory", + arguments=args, + priority="high", + ) + action["prompt"] = f'Approve by asking: update memory {identifier} with "{memory}"' + return action + + if conflict_list: + first = conflict_list[0] if isinstance(conflict_list[0], Mapping) else {} + identifier = str(first.get("name") or first.get("title") or "") + action = _memory_action( + kind="review_conflict", + label="Review conflict", + description="A likely conflicting memory exists; inspect it before saving or archiving anything.", + command=_shell_words("python3", "link.py", "explain-memory", identifier, "."), + tool="explain_memory", + arguments={"identifier": identifier}, + priority="high", + ) + action["prompt"] = f"Review possible conflict with {identifier} before saving this proposal." + return action + + command_parts: list[object] = [ + "python3", + "link.py", + "remember", + memory, + ".", + "--title", + title, + "--type", + memory_type, + "--scope", + scope, + "--source", + source, + ] + args: dict[str, object] = { + "memory": memory, + "title": title, + "memory_type": memory_type, + "scope": scope, + "source": source, + } + if project: + command_parts.extend(["--project", project]) + args["project"] = project + action = _memory_action( + kind="remember", + label="Remember", + description="Create a new durable memory after the user approves this proposal.", + command=_shell_words(*command_parts), + tool="remember_memory", + arguments=args, + priority="high", + ) + action["prompt"] = f"Approve by asking: remember that {memory}" + return action + + +def classify_memory_segment(segment: str) -> dict[str, object] | None: + text = segment.strip() + lower = text.lower() + if any(cue in lower for cue in ("maybe", "might", "not sure", "wondering", "considering", "could later")): + return None + + checks: list[tuple[str, str, int, str, tuple[str, ...]]] = [ + ( + "preference", + "user", + 90, + "Matched an explicit user preference cue.", + ( + r"\b(?:i|user|human)\s+(?:prefer|prefers|like|likes|want|wants|need|needs)\b", + r"\b(?:please\s+)?(?:always|never|avoid|do not|don't)\b", + r"\bagents?\s+should\s+(?:always|never|prefer|avoid|use)\b", + ), + ), + ( + "decision", + "project", + 88, + "Matched an explicit decision cue.", + ( + r"\b(?:we|project|team|user)\s+(?:decided|agreed|chose|settled)\b", + r"\bdecision\s*:", + ), + ), + ( + "project", + "project", + 76, + "Matched a project context cue.", + ( + r"\b(?:project|repo|repository|link)\s+(?:uses|requires|runs|stores|keeps|ships|releases)\b", + r"\b(?:this project|this repo)\s+(?:uses|requires|keeps|stores)\b", + ), + ), + ( + "fact", + "user", + 74, + "Matched a stable user fact cue.", + ( + r"\b(?:i am|i work|user is|user works|user has|my role|my timezone)\b", + ), + ), + ] + + for memory_type, scope, score, reason, patterns in checks: + if any(re.search(pattern, lower) for pattern in patterns): + memory = normalize_proposed_memory(text, memory_type) + return { + "memory": memory, + "memory_type": memory_type, + "scope": scope, + "confidence_score": score, + "reason": reason, + } + return None + + +def confidence_label(score: int) -> str: + if score >= 85: + return "high" + if score >= 70: + return "medium" + return "low" + + +def propose_memories_from_text( + text: str, + records: Iterable[Mapping[str, object]], + source: str = "inline", + limit: int = 10, + writes_memory: bool = False, + project: str | None = None, +) -> dict[str, object]: + record_list = [dict(record) for record in records] + project_name = normalize_project(project) + proposals: list[dict[str, object]] = [] + seen: set[str] = set() + skipped = 0 + for segment in memory_proposal_segments(text): + classified = classify_memory_segment(segment) + if not classified: + skipped += 1 + continue + score = int(classified["confidence_score"]) + if score < MEMORY_PROPOSAL_MIN_SCORE: + skipped += 1 + continue + memory = str(classified["memory"]) + dedupe_key = compact_memory_text(memory) + if dedupe_key in seen: + skipped += 1 + continue + seen.add(dedupe_key) + memory_type = str(classified["memory_type"]) + scope = str(classified["scope"]) + title = proposal_title(memory, memory_type) + duplicate_candidates = memory_duplicate_candidates( + record_list, + memory, + title, + memory_type, + scope, + project=project_name, + ) + conflict_candidates = memory_conflict_candidates( + record_list, + memory, + title, + memory_type, + scope, + project=project_name, + ) + if duplicate_candidates: + suggested_action = "update-memory" + elif conflict_candidates: + suggested_action = "review-conflict" + else: + suggested_action = "remember" + proposal = { + "title": title, + "memory": memory, + "memory_type": memory_type, + "scope": scope, + "project": project_name if scope == "project" else "", + "confidence": confidence_label(score), + "confidence_score": score, + "reason": classified["reason"], + "source": source, + "duplicate_candidates": duplicate_candidates, + "conflict_candidates": conflict_candidates, + "suggested_action": suggested_action, + } + proposal["primary_action"] = memory_proposal_action(proposal) + proposals.append(proposal) + if len(proposals) >= limit: + break + return { + "proposed": True, + "source": source, + "project": project_name, + "count": len(proposals), + "skipped_count": skipped, + "proposals": proposals, + "writes_memory": writes_memory, + } diff --git a/mcp_package/link_core/prompts.py b/mcp_package/link_core/prompts.py new file mode 100644 index 0000000..b48d65b --- /dev/null +++ b/mcp_package/link_core/prompts.py @@ -0,0 +1,65 @@ +"""Shared first-run prompt helpers for Link.""" +from __future__ import annotations + +from pathlib import Path + +from .memory import default_project_for_target, normalize_project + + +def starter_prompt_payload(target: Path, project: str | None = None) -> dict[str, object]: + """Return natural agent prompts and local checks for a Link user.""" + target = target.expanduser().resolve() + project_name = normalize_project(project) if project is not None else default_project_for_target(target) + remember_prompt = ( + "remember that this project uses Link for local agent memory" + if project_name + else "remember that I prefer local-first agent memory" + ) + query_prompt = ( + "query Link for what this project remembers" + if project_name + else "query Link for what you know about me" + ) + prompts = [ + { + "label": "Check readiness", + "prompt": "is Link ready?", + "when": "right after install or before troubleshooting", + }, + { + "label": "Prime memory", + "prompt": "brief me from Link before we continue", + "when": "at the start of a session or task", + }, + { + "label": "Save explicit memory", + "prompt": remember_prompt, + "when": "when you want future agents to remember a preference, decision, or project fact", + }, + { + "label": "Ask with context", + "prompt": query_prompt, + "when": "when you want a compact answer-ready packet from memory and wiki context", + }, + { + "label": "Ingest a source", + "prompt": "ingest raw/ into Link", + "when": "after dropping a source file into raw/", + }, + { + "label": "Review memory proposals", + "prompt": "propose memories from raw/", + "when": "when a source may contain preferences, decisions, or project context", + }, + ] + return { + "target": str(target), + "project": project_name, + "prompts": prompts, + "commands": [ + "link status --validate", + "link ingest-status", + "link memory-inbox", + 'link benchmark "agent memory"', + ], + } diff --git a/mcp_package/link_core/query.py b/mcp_package/link_core/query.py new file mode 100644 index 0000000..27bcd01 --- /dev/null +++ b/mcp_package/link_core/query.py @@ -0,0 +1,380 @@ +"""Smart query packet construction for Link agents. + +This module keeps retrieval planning shared across CLI, HTTP, and MCP. It does +not answer the user directly; it returns a compact, source-backed packet an +agent can read before answering. +""" +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Iterable, Mapping + +from .memory import ( + memory_brief, + normalize_project, + recall_memories, +) +from .wiki import context_for_topic, search_pages + + +BUDGETS: dict[str, dict[str, int]] = { + "small": { + "memories": 3, + "search_results": 4, + "context_pages": 3, + "primary_chars": 1200, + "neighbor_chars": 450, + }, + "medium": { + "memories": 6, + "search_results": 6, + "context_pages": 5, + "primary_chars": 2400, + "neighbor_chars": 700, + }, + "large": { + "memories": 10, + "search_results": 10, + "context_pages": 8, + "primary_chars": 5000, + "neighbor_chars": 1200, + }, +} + + +def normalize_budget(value: object | None) -> str: + budget = str(value if value is not None else "medium").strip().lower()[:20] + return budget if budget in BUDGETS else "medium" + + +def _trim_text(value: object, max_chars: int) -> str: + text = str(value or "").strip() + if len(text) <= max_chars: + return text + return text[: max(0, max_chars - 3)].rstrip() + "..." + + +def _memory_reason(memory: Mapping[str, object]) -> str: + parts = ["matched the query"] + recall = memory.get("recall") + if isinstance(recall, Mapping): + state = str(recall.get("state") or "") + if state and state != "ready": + parts.append(f"recall state: {state}") + elif state == "ready": + parts.append("recall-ready") + if str(memory.get("review_status") or "").lower() == "reviewed": + parts.append("reviewed") + if memory.get("project"): + parts.append(f"project: {memory['project']}") + return "; ".join(parts) + + +def _drop_empty(data: dict[str, object]) -> dict[str, object]: + return {key: value for key, value in data.items() if value not in ("", [], {})} + + +def _memory_provenance(memory: Mapping[str, object]) -> dict[str, object]: + return _drop_empty({ + "path": memory.get("path", ""), + "source": memory.get("source", ""), + "date_captured": memory.get("date_captured", ""), + "updated_at": memory.get("updated_at", ""), + "last_update_source": memory.get("last_update_source", ""), + "review_status": memory.get("review_status", ""), + "reviewed_at": memory.get("reviewed_at", ""), + "status": memory.get("status", ""), + }) + + +def _page_reason(page: Mapping[str, object]) -> str: + relationship = str(page.get("relationship") or "") + if relationship == "primary": + return "best matching wiki page" + if relationship == "inbound": + return "links to the primary page" + if relationship == "forward": + return "linked from the primary page" + return "related wiki page" + + +def _page_provenance(page: Mapping[str, object]) -> dict[str, object]: + return _drop_empty({ + "path": page.get("path", ""), + "relationship": page.get("relationship", ""), + "type": page.get("type", ""), + "category": page.get("category", ""), + "source_count": page.get("source_count", ""), + "date_updated": page.get("date_updated", ""), + "date_published": page.get("date_published", ""), + }) + + +def _compact_memory(memory: Mapping[str, object]) -> dict[str, object]: + item = { + "kind": "memory", + "name": memory.get("name", ""), + "title": memory.get("title", ""), + "memory_type": memory.get("memory_type", ""), + "scope": memory.get("scope", ""), + "project": memory.get("project", ""), + "status": memory.get("status", ""), + "review_status": memory.get("review_status", ""), + "summary": memory.get("tldr") or memory.get("snippet") or "", + "score": memory.get("score", 0), + "rank_score": memory.get("rank_score", 0), + "recall": memory.get("recall", {}), + "review_issue_count": memory.get("review_issue_count", 0), + "highest_review_severity": memory.get("highest_review_severity", "none"), + "provenance": _memory_provenance(memory), + "why_selected": _memory_reason(memory), + } + return _drop_empty(item) + + +def _compact_page(page: Mapping[str, object], primary_chars: int, neighbor_chars: int) -> dict[str, object]: + relationship = str(page.get("relationship") or "") + max_chars = primary_chars if relationship == "primary" else neighbor_chars + return { + "kind": "page", + "name": page.get("name", ""), + "title": page.get("title", ""), + "type": page.get("type", ""), + "relationship": relationship, + "is_primary": bool(page.get("is_primary")), + "content": _trim_text(page.get("content", ""), max_chars), + "provenance": _page_provenance(page), + "why_selected": _page_reason(page), + } + + +def _compact_search_result(page: Mapping[str, object]) -> dict[str, object]: + return { + "name": page.get("name", ""), + "title": page.get("title", ""), + "type": page.get("type", ""), + "category": page.get("category", ""), + "score": page.get("score", 0), + "snippet": page.get("snippet", ""), + "provenance": _page_provenance(page), + } + + +def _compact_review(review: object, limit: int) -> dict[str, object]: + if not isinstance(review, Mapping): + return {"count": 0, "counts_by_severity": {}, "items": []} + items = [] + for item in list(review.get("items", []))[:limit]: + if not isinstance(item, Mapping): + continue + primary_action = item.get("primary_action") + action_kind = "" + if isinstance(primary_action, Mapping): + action_kind = str(primary_action.get("kind") or "") + items.append({ + "name": item.get("name", ""), + "title": item.get("title", ""), + "memory_type": item.get("memory_type", ""), + "scope": item.get("scope", ""), + "issue_count": item.get("issue_count", 0), + "highest_severity": item.get("highest_severity", "none"), + "primary_action": action_kind, + }) + return { + "count": review.get("count", 0), + "counts_by_severity": review.get("counts_by_severity", {}), + "items": items, + } + + +def _next_budget(current: str) -> str: + order = ["small", "medium", "large"] + try: + index = order.index(current) + except ValueError: + return "medium" + return order[min(index + 1, len(order) - 1)] + + +def _budget_item(selected: int, limit: int, has_more: bool) -> dict[str, object]: + return { + "selected": selected, + "limit": limit, + "has_more": has_more, + } + + +def _estimated_json_chars(value: object) -> int: + return len(json.dumps(value, ensure_ascii=False, sort_keys=True)) + + +def _estimated_tokens(chars: int) -> int: + # Practical rough count for agent budgeting; exact tokenizers vary by model. + return max(1, (chars + 3) // 4) if chars else 0 + + +def _context_packet_budget_item(packet: list[dict[str, object]], limit: int) -> dict[str, object]: + chars = _estimated_json_chars(packet) + item = _budget_item(len(packet), limit, False) + item["estimated_chars"] = chars + item["estimated_tokens"] = _estimated_tokens(chars) + return item + + +def _follow_up_actions( + query: str, + budget_name: str, + project: str, + primary: object, + budget_report: Mapping[str, Mapping[str, object]], +) -> list[dict[str, object]]: + actions: list[dict[str, object]] = [] + if any(bool(section.get("has_more")) for section in budget_report.values()): + next_budget = _next_budget(budget_name) + if next_budget != budget_name: + args: dict[str, object] = {"query": query, "budget": next_budget} + if project: + args["project"] = project + actions.append({ + "when": "packet is relevant but too thin", + "tool": "query_link", + "arguments": args, + }) + if primary: + actions.append({ + "when": "need the full source-backed topic neighborhood", + "tool": "get_context", + "arguments": {"topic": primary}, + }) + actions.append({ + "when": "need a different angle or exact page candidates", + "tool": "search_wiki", + "arguments": {"query": query, "limit": 10}, + }) + return actions + + +def query_link( + wiki_dir: Path, + query: str, + cache: dict[str, Any], + records: Iterable[Mapping[str, object]], + *, + budget: str = "medium", + project: str | None = None, + review_command: str = "review-memory", +) -> dict[str, object]: + """Return a compact context packet for an agent query. + + The packet combines relevant local memory, ranked wiki search results, and + graph-neighborhood context without forcing the agent to read the whole wiki. + """ + q = str(query or "").strip() + budget_name = normalize_budget(budget) + limits = BUDGETS[budget_name] + project_name = normalize_project(project) + record_list = list(records) + + if not q: + return { + "query": "", + "project": project_name, + "budget": budget_name, + "found": False, + "error": "query required", + "context_packet": [], + } + + raw_memories = recall_memories( + record_list, + q, + limit=limits["memories"] + 1, + project=project_name, + ) + memory_has_more = len(raw_memories) > limits["memories"] + memories = [_compact_memory(memory) for memory in raw_memories[: limits["memories"]]] + brief = memory_brief( + record_list, + query=q, + limit=limits["memories"], + review_command=review_command, + project=project_name, + ) + raw_search_results = search_pages(q, cache, limit=limits["search_results"] + 1) + search_has_more = len(raw_search_results) > limits["search_results"] + search_results = raw_search_results[: limits["search_results"]] + context = context_for_topic( + wiki_dir, + q, + cache, + limit=limits["context_pages"] + 1, + ) + raw_context_pages = [page for page in context.get("pages", []) if isinstance(page, Mapping)] + context_has_more = len(raw_context_pages) > limits["context_pages"] + pages = [ + _compact_page(page, limits["primary_chars"], limits["neighbor_chars"]) + for page in raw_context_pages[: limits["context_pages"]] + ] + packet = [*memories, *pages] + mode_parts = [] + if memories: + mode_parts.append("memory") + if pages: + mode_parts.append("wiki") + mode = "+".join(mode_parts) if mode_parts else "none" + + guidance = [ + "Use this packet before answering; do not read the whole wiki unless this packet is insufficient.", + "Prefer recall-ready reviewed memories for personalization and source-backed wiki pages for factual claims.", + "Use provenance.path/source/date fields to explain why Link knows something.", + "If important context appears missing, rerun query_link with a larger budget or call get_context on the primary page.", + "Do not create or update memory from this packet unless the user explicitly asks.", + ] + review = _compact_review(brief.get("review", {}), limit=limits["memories"]) + if review.get("count"): + guidance.insert(2, "Some memories need review; treat provisional memories carefully.") + budget_report = { + "memories": _budget_item(len(memories), limits["memories"], memory_has_more), + "wiki_search": _budget_item(len(search_results), limits["search_results"], search_has_more), + "graph_context": _budget_item(len(pages), limits["context_pages"], context_has_more), + "context_packet": _context_packet_budget_item(packet, limits["memories"] + limits["context_pages"]), + } + if any(bool(section.get("has_more")) for section in budget_report.values()): + guidance.insert(1, "This packet is budget-limited; use follow_up instead of scanning files manually.") + + return { + "query": q, + "project": project_name, + "budget": budget_name, + "found": bool(packet or search_results), + "strategy": { + "mode": mode, + "selection": "budgeted memory + ranked wiki + graph neighborhood", + "limits": limits, + }, + "budget_report": budget_report, + "follow_up": _follow_up_actions( + q, + budget_name, + project_name, + context.get("primary", ""), + budget_report, + ), + "memory": { + "count": len(memories), + "review": review, + "items": memories, + }, + "wiki": { + "found": bool(context.get("found")), + "primary": context.get("primary", ""), + "inbound_count": context.get("inbound_count", 0), + "forward_count": context.get("forward_count", 0), + "search_count": len(search_results), + "search_results": [_compact_search_result(page) for page in search_results], + "pages": pages, + }, + "context_packet": packet, + "agent_guidance": guidance, + } diff --git a/mcp_package/link_core/raw.py b/mcp_package/link_core/raw.py new file mode 100644 index 0000000..d557a21 --- /dev/null +++ b/mcp_package/link_core/raw.py @@ -0,0 +1,125 @@ +"""Shared raw-source creation helpers for Link.""" +from __future__ import annotations + +import re +from pathlib import Path + +from .files import atomic_write_text +from .security import clean_text_input, secret_value_warnings + + +ALLOWED_RAW_SOURCE_SUFFIXES = { + ".md", + ".markdown", + ".txt", + ".text", +} +DEFAULT_MAX_RAW_SOURCE_BYTES = 60 * 1024 + + +class RawSourceError(ValueError): + """User-correctable raw-source creation error.""" + + def __init__(self, message: str, *, status: int = 400, labels: list[str] | None = None) -> None: + super().__init__(message) + self.status = status + self.labels = labels or [] + + +def _slugify(value: str, fallback: str = "source") -> str: + slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + return slug or fallback + + +def _title_from_text(text: str) -> str: + for line in text.splitlines(): + stripped = line.strip() + if stripped.startswith("# "): + return clean_text_input(stripped[2:], max_len=120) + if stripped and not stripped.startswith("---"): + return clean_text_input(stripped, max_len=120) + return "" + + +def raw_source_filename(filename: object = "", title: object = "") -> str: + """Return a safe root-level raw filename, rejecting folders and unsupported suffixes.""" + requested = clean_text_input(filename, max_len=140) + title_text = clean_text_input(title, max_len=120) + if requested: + normalized = requested.replace("\\", "/").strip() + if "/" in normalized.strip("/"): + raise RawSourceError("filename must not include folders") + path = Path(normalized) + suffix = path.suffix.lower() or ".md" + if suffix not in ALLOWED_RAW_SOURCE_SUFFIXES: + allowed = ", ".join(sorted(ALLOWED_RAW_SOURCE_SUFFIXES)) + raise RawSourceError(f"filename must end with one of: {allowed}") + stem = path.stem + else: + suffix = ".md" + stem = title_text or "source" + return f"{_slugify(stem)}{suffix}" + + +def _unique_raw_path(raw_dir: Path, filename: str) -> Path: + path = raw_dir / filename + if not path.exists(): + return path + suffix = path.suffix + stem = path.stem + for index in range(2, 1000): + candidate = raw_dir / f"{stem}-{index}{suffix}" + if not candidate.exists(): + return candidate + raise RawSourceError("too many raw source files with the same name", status=409) + + +def create_raw_source( + root: Path, + *, + title: object = "", + text: object = "", + filename: object = "", + max_bytes: int = DEFAULT_MAX_RAW_SOURCE_BYTES, +) -> dict[str, object]: + """Create one local raw source file and return the next ingest prompt.""" + root = root.expanduser().resolve() + raw_dir = root / "raw" + source_text = str(text or "").strip() + if not source_text: + raise RawSourceError("text required") + size_bytes = len(source_text.encode("utf-8")) + if size_bytes > max_bytes: + raise RawSourceError(f"raw source too large; max {max_bytes} bytes", status=413) + + labels = secret_value_warnings(source_text) + if labels: + raise RawSourceError( + "source contains secret-looking values; redact before saving to raw/", + status=422, + labels=labels, + ) + + clean_title = clean_text_input(title, max_len=120) or _title_from_text(source_text) or "Source" + safe_filename = raw_source_filename(filename, clean_title) + raw_dir.mkdir(parents=True, exist_ok=True) + path = _unique_raw_path(raw_dir, safe_filename) + + if source_text.lstrip().startswith(("# ", "---")): + content = source_text + else: + content = f"# {clean_title}\n\n{source_text}" + if not content.endswith("\n"): + content += "\n" + atomic_write_text(path, content) + + rel = path.relative_to(root).as_posix() + return { + "created": True, + "path": rel, + "title": clean_title, + "size_bytes": path.stat().st_size, + "next_prompt": f"ingest {rel} into Link", + "proposal_prompt": f"propose memories from {rel}", + "secret_warnings": [], + } diff --git a/mcp_package/link_core/schema.py b/mcp_package/link_core/schema.py new file mode 100644 index 0000000..0d1b5c2 --- /dev/null +++ b/mcp_package/link_core/schema.py @@ -0,0 +1,161 @@ +"""Shared Link wiki schema helpers.""" +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from .files import atomic_write_json +from .log import utc_timestamp + + +CURRENT_SCHEMA_VERSION = 1 +SCHEMA_NAME = "link-wiki" +SCHEMA_FILE = "_link_schema.json" +REQUIRED_WIKI_DIRS = ( + "sources", + "concepts", + "entities", + "memories", + "comparisons", + "explorations", +) + + +def schema_path(wiki_dir: Path) -> Path: + return wiki_dir / SCHEMA_FILE + + +def _base_status(wiki_dir: Path) -> dict[str, object]: + path = schema_path(wiki_dir) + return { + "path": str(path), + "schema": SCHEMA_NAME, + "current_version": CURRENT_SCHEMA_VERSION, + "version": None, + "status": "missing", + "needs_migration": True, + "error": "", + } + + +def schema_status(wiki_dir: Path) -> dict[str, object]: + """Return schema marker status without mutating the wiki.""" + wiki_dir = wiki_dir.expanduser().resolve() + path = schema_path(wiki_dir) + status = _base_status(wiki_dir) + + if not path.exists(): + return status + + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception as exc: + status.update({ + "status": "invalid", + "needs_migration": False, + "error": f"invalid schema marker: {exc}", + }) + return status + + if not isinstance(data, dict): + status.update({ + "status": "invalid", + "needs_migration": False, + "error": "invalid schema marker: root must be an object", + }) + return status + + marker_schema = data.get("schema", SCHEMA_NAME) + if marker_schema != SCHEMA_NAME: + status.update({ + "status": "invalid", + "needs_migration": False, + "error": f"invalid schema marker: schema must be {SCHEMA_NAME!r}", + }) + return status + + raw_version = data.get("version") + try: + version = int(raw_version) + except (TypeError, ValueError): + status.update({ + "status": "invalid", + "needs_migration": False, + "error": "invalid schema marker: version must be an integer", + }) + return status + + status["version"] = version + if version < CURRENT_SCHEMA_VERSION: + status.update({"status": "old", "needs_migration": True}) + elif version > CURRENT_SCHEMA_VERSION: + status.update({ + "status": "newer", + "needs_migration": False, + "error": ( + f"wiki schema {version} is newer than this runtime " + f"supports ({CURRENT_SCHEMA_VERSION})" + ), + }) + else: + status.update({"status": "current", "needs_migration": False}) + return status + + +def write_schema(wiki_dir: Path, version: int = CURRENT_SCHEMA_VERSION) -> dict[str, object]: + """Write the current schema marker and return the serialized payload.""" + wiki_dir = wiki_dir.expanduser().resolve() + wiki_dir.mkdir(parents=True, exist_ok=True) + payload: dict[str, Any] = { + "schema": SCHEMA_NAME, + "version": int(version), + "updated_at": utc_timestamp(), + } + atomic_write_json(schema_path(wiki_dir), payload) + return payload + + +def migrate_wiki(wiki_dir: Path) -> dict[str, object]: + """Apply safe, idempotent wiki structure migrations. + + Version 1 only writes the schema marker and ensures canonical wiki category + directories exist. It does not rewrite user pages. + """ + wiki_dir = wiki_dir.expanduser().resolve() + before = schema_status(wiki_dir) + changes: list[str] = [] + + if before["status"] in {"invalid", "newer"}: + return { + "ok": False, + "migrated": False, + "previous": before, + "schema": before, + "changes": changes, + "error": before.get("error") or "schema migration refused", + } + + if not wiki_dir.exists(): + wiki_dir.mkdir(parents=True, exist_ok=True) + changes.append("created wiki directory") + + for dirname in REQUIRED_WIKI_DIRS: + path = wiki_dir / dirname + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + changes.append(f"created {dirname}/") + + if before["needs_migration"]: + write_schema(wiki_dir) + changes.append(f"wrote {SCHEMA_FILE}") + + after = schema_status(wiki_dir) + return { + "ok": after["status"] == "current", + "migrated": bool(changes), + "previous": before, + "schema": after, + "changes": changes, + "error": after.get("error") or "", + } diff --git a/mcp_package/link_core/search.py b/mcp_package/link_core/search.py new file mode 100644 index 0000000..28090dd --- /dev/null +++ b/mcp_package/link_core/search.py @@ -0,0 +1,244 @@ +"""Shared search indexing and ranking helpers for Link.""" +from __future__ import annotations + +import re +try: + import sqlite3 +except Exception: # pragma: no cover - depends on the host Python build + sqlite3 = None # type: ignore[assignment] +from typing import Any + + +def normalized_search_text(value: object) -> str: + """Normalize punctuation differences so natural queries match page slugs.""" + text = str(value).lower() + text = re.sub(r"[^a-z0-9]+", " ", text) + return re.sub(r"\s+", " ", text).strip() + + +def search_words(value: object) -> set[str]: + return {word for word in re.split(r"\W+", normalized_search_text(value)) if len(word) >= 3} + + +def _search_terms(value: object) -> list[str]: + seen: set[str] = set() + terms: list[str] = [] + for word in re.split(r"\W+", normalized_search_text(value)): + if len(word) < 3 or word in seen: + continue + seen.add(word) + terms.append(word) + return terms + + +def build_fts_index(pages: list[dict[str, Any]], fulltext: dict[str, str]) -> Any | None: + """Build an optional in-memory SQLite FTS index. + + Markdown remains the source of truth. This index is derived, local, and + rebuilt with the normal wiki cache; hosts without sqlite/FTS fall back to + the token index. + """ + if sqlite3 is None or not pages: + return None + conn = None + try: + conn = sqlite3.connect(":memory:") + conn.execute("CREATE VIRTUAL TABLE page_fts USING fts5(name UNINDEXED, title, metadata, body)") + rows = [] + for page in pages: + stem = str(page["name"]).lower() + metadata = " ".join([ + stem, + str(page.get("type") or ""), + str(page.get("category") or ""), + str(page.get("tldr") or ""), + " ".join(str(alias) for alias in page.get("aliases", [])), + " ".join(str(tag) for tag in page.get("tags", [])), + ]) + rows.append((stem, str(page.get("title") or ""), metadata, fulltext.get(stem, ""))) + conn.executemany("INSERT INTO page_fts(name, title, metadata, body) VALUES (?, ?, ?, ?)", rows) + return _FtsIndex(conn) + except Exception: + if conn is not None: + conn.close() + return None + + +def _fts_expr(terms: list[str], operator: str) -> str: + return f" {operator} ".join(f'"{term}"' for term in terms) + + +class _FtsIndex: + def __init__(self, conn: Any) -> None: + self._conn = conn + + def search(self, query: str, limit: int) -> list[str]: + terms = _search_terms(query) + if not terms: + return [] + expressions = [_fts_expr(terms, "AND")] + if len(terms) > 1: + expressions.append(_fts_expr(terms, "OR")) + for expression in expressions: + try: + rows = self._conn.execute( + "SELECT name FROM page_fts WHERE page_fts MATCH ? ORDER BY bm25(page_fts) LIMIT ?", + (expression, max(1, limit)), + ).fetchall() + except Exception: + continue + names = [str(row[0]) for row in rows] + if names: + return names + return [] + + def close(self) -> None: + conn = self._conn + if conn is None: + return + self._conn = None + conn.close() + + def __del__(self) -> None: + try: + self.close() + except Exception: + pass + + +def _fts_candidates(cache: dict[str, Any], query: str, limit: int) -> list[str]: + index = cache.get("fts_index") + if not isinstance(index, _FtsIndex): + return [] + return index.search(query, limit) + + +def _exact_search_candidates(q_lower: str, q_normalized: str, pages: list[dict[str, Any]]) -> set[str]: + candidates: set[str] = set() + for page in pages: + stem = str(page.get("name") or "").lower() + if not stem: + continue + title = str(page.get("title") or "") + if q_lower == stem or (q_normalized and q_normalized == normalized_search_text(stem)): + candidates.add(stem) + continue + if q_lower in title.lower() or (q_normalized and q_normalized in normalized_search_text(title)): + candidates.add(stem) + continue + aliases = page.get("aliases", []) + tags = page.get("tags", []) + tldr = str(page.get("tldr") or "") + if any(q_lower in str(alias).lower() or (q_normalized and q_normalized in normalized_search_text(alias)) for alias in aliases): + candidates.add(stem) + continue + if any(q_lower in str(tag).lower() or (q_normalized and q_normalized in normalized_search_text(tag)) for tag in tags): + candidates.add(stem) + continue + if q_lower in tldr.lower() or (q_normalized and q_normalized in normalized_search_text(tldr)): + candidates.add(stem) + return candidates + + +def close_wiki_cache(cache: dict[str, Any]) -> None: + index = cache.get("fts_index") if isinstance(cache, dict) else None + close = getattr(index, "close", None) + if callable(close): + close() + if isinstance(cache, dict): + cache["fts_index"] = None + + +def search_pages(query: str, cache: dict[str, Any], limit: int = 20) -> list[dict[str, Any]]: + q = query.strip() + if not q: + return [] + q_lower = q.lower() + q_normalized = normalized_search_text(q) + query_tokens = [token for token in re.split(r"\W+", q_lower) if len(token) >= 3] + pages = cache["pages"] + page_map = cache["page_map"] + token_index = cache["token_index"] + meta_token_index = cache["meta_token_index"] + fulltext = cache["fulltext"] + normalized_fulltext = cache.get("normalized_fulltext", {}) + text_words_index = cache.get("text_words_index", {}) + meta_words_index = cache.get("meta_words_index", {}) + snippet_index = cache["snippet_index"] + + is_single_token = bool(re.match(r"^\w+$", q_lower)) + if is_single_token and q_lower in token_index: + candidates = token_index[q_lower] | meta_token_index.get(q_lower, set()) + elif query_tokens: + token_sets = [ + token_index.get(token, set()) | meta_token_index.get(token, set()) + for token in query_tokens + if token in token_index or token in meta_token_index + ] + if token_sets: + intersection = set.intersection(*token_sets) + candidates = intersection if intersection else set.union(*token_sets) + else: + candidates = {page["name"].lower() for page in pages} + else: + candidates = {page["name"].lower() for page in pages} + + candidate_cap = max(limit * 25, 200) + fts_candidates = _fts_candidates(cache, q, limit=candidate_cap) + if fts_candidates: + fts_set = set(fts_candidates) + if len(candidates) > candidate_cap: + candidates = fts_set | _exact_search_candidates(q_lower, q_normalized, pages) + else: + candidates = candidates | fts_set + + scored: list[tuple[int, dict[str, Any]]] = [] + for stem in candidates: + page = page_map.get(stem) + if not page: + continue + score = 0 + title_normalized = normalized_search_text(page["title"]) + stem_normalized = normalized_search_text(stem) + aliases = page.get("aliases", []) + tags = page.get("tags", []) + tldr = page.get("tldr", "") + text_lower = fulltext.get(stem, "") + meta_words = meta_words_index.get(stem) + if meta_words is None: + meta_words = search_words(" ".join([ + str(page["title"]), + stem, + str(tldr), + " ".join(str(alias) for alias in aliases), + " ".join(str(tag) for tag in tags), + ])) + + if q_lower in str(page["title"]).lower() or (q_normalized and q_normalized in title_normalized): + score += 10 + if q_lower == stem or (q_normalized and q_normalized == stem_normalized): + score += 20 + if any(q_lower in alias or (q_normalized and q_normalized in normalized_search_text(alias)) for alias in aliases): + score += 8 + if any(q_lower in str(tag).lower() or (q_normalized and q_normalized in normalized_search_text(tag)) for tag in tags): + score += 5 + if q_lower in str(tldr).lower() or (q_normalized and q_normalized in normalized_search_text(tldr)): + score += 3 + text_normalized = normalized_fulltext.get(stem, "") + if text_lower and (q_lower in text_lower or (q_normalized and q_normalized in text_normalized)): + score += 2 + if query_tokens and all(token in meta_words for token in query_tokens): + score += 6 + elif query_tokens and any(token in meta_words for token in query_tokens): + score += 1 + if query_tokens and text_normalized: + text_words = text_words_index.get(stem) + if text_words is None: + text_words = search_words(text_normalized) + if all(token in text_words for token in query_tokens): + score += 2 + if score > 0: + scored.append((score, {**page, "score": score, "snippet": snippet_index.get(stem, "")})) + + scored.sort(key=lambda item: (-item[0], str(item[1]["title"]).lower())) + return [record for _, record in scored[:limit]] diff --git a/mcp_package/link_core/security.py b/mcp_package/link_core/security.py new file mode 100644 index 0000000..ded8a7e --- /dev/null +++ b/mcp_package/link_core/security.py @@ -0,0 +1,82 @@ +"""Local security hygiene helpers for Link.""" +from __future__ import annotations + +import re +from pathlib import Path + + +SECRET_VALUE_PATTERNS = ( + ("Anthropic API key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}\b")), + ("OpenAI API key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b")), + ("GitHub token", re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b")), + ("AWS access key", re.compile(r"\bA[SK]IA[0-9A-Z]{16}\b")), + ("PyPI token", re.compile(r"\bpypi-[A-Za-z0-9_-]{20,}\b")), + ("Google API key", re.compile(r"\bAIza[0-9A-Za-z_-]{35}\b")), + ("Slack token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{20,}\b")), + ("Stripe live secret key", re.compile(r"\bsk_live_[A-Za-z0-9]{20,}\b")), + ("Private key block", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")), +) + + +def clean_text_input(value: object, max_len: int = 500) -> str: + """Normalize optional user/tool text input to a stripped, bounded string.""" + if value is None: + return "" + return str(value).strip()[:max_len] + + +def secret_value_warnings(text: str) -> list[str]: + """Return labels for secret-looking values found in text.""" + warnings: list[str] = [] + for label, pattern in SECRET_VALUE_PATTERNS: + if pattern.search(text): + warnings.append(label) + return warnings + + +def secret_file_warnings(path: Path, chunk_size: int = 65536, tail_size: int = 512) -> list[str]: + """Return secret-looking labels from a file without loading it all at once.""" + return list(secret_file_scan(path, chunk_size=chunk_size, tail_size=tail_size)["labels"]) + + +def secret_file_scan(path: Path, chunk_size: int = 65536, tail_size: int = 512) -> dict[str, object]: + """Scan a file for secret-looking values and report read failures explicitly.""" + found: set[str] = set() + read_size = max(1, chunk_size) + tail_len = max(0, tail_size) + tail = "" + try: + with path.open("r", encoding="utf-8", errors="replace") as handle: + while True: + chunk = handle.read(read_size) + if not chunk: + break + text = tail + chunk + found.update(secret_value_warnings(text)) + if len(found) == len(SECRET_VALUE_PATTERNS): + break + tail = text[-tail_len:] if tail_len else "" + except OSError as exc: + return { + "labels": [], + "readable": False, + "error": str(exc), + } + return { + "labels": [label for label, _pattern in SECRET_VALUE_PATTERNS if label in found], + "readable": True, + "error": "", + } + + +def redact_secret_values(text: str, replacement: str = "[redacted-secret]") -> tuple[str, list[str], int]: + """Replace secret-looking values and return redacted text, labels, and count.""" + labels: list[str] = [] + total = 0 + redacted = text + for label, pattern in SECRET_VALUE_PATTERNS: + redacted, count = pattern.subn(replacement, redacted) + if count: + labels.append(label) + total += count + return redacted, labels, total diff --git a/mcp_package/link_core/status.py b/mcp_package/link_core/status.py new file mode 100644 index 0000000..ecfb465 --- /dev/null +++ b/mcp_package/link_core/status.py @@ -0,0 +1,175 @@ +"""Shared Link runtime status helpers.""" +from __future__ import annotations + +from pathlib import Path +from typing import Any, Iterable, Mapping + +from .memory import memory_records +from .schema import schema_status +from .validation import validate_wiki +from .wiki import build_wiki_cache, close_wiki_cache + + +def _action(label: str, tool: str, arguments: dict[str, object] | None = None) -> dict[str, object]: + return { + "label": label, + "tool": tool, + "arguments": arguments or {}, + } + + +def _warning(code: str, message: str, exc: Exception) -> dict[str, str]: + detail = str(exc).strip() + payload = { + "code": code, + "message": message, + } + if detail: + payload["detail"] = detail[:200] + return payload + + +def link_status( + wiki_dir: Path, + *, + version: str = "", + cache: Mapping[str, Any] | None = None, + records: Iterable[Mapping[str, object]] | None = None, + include_validation: bool = False, +) -> dict[str, object]: + """Return a compact readiness summary for agents and local clients.""" + wiki_dir = wiki_dir.expanduser().resolve() + required_paths = { + "wiki": wiki_dir, + "index": wiki_dir / "index.md", + "log": wiki_dir / "log.md", + "backlinks": wiki_dir / "_backlinks.json", + "memories": wiki_dir / "memories", + } + missing = [name for name, path in required_paths.items() if not path.exists()] + pages: list[Mapping[str, object]] = [] + record_list: list[Mapping[str, object]] = [] + warnings: list[dict[str, str]] = [] + search_backend = "unavailable" + if wiki_dir.exists(): + wiki_cache: Mapping[str, Any] | None = None + owns_cache = False + try: + if cache is None: + wiki_cache = build_wiki_cache(wiki_dir) + owns_cache = True + else: + wiki_cache = cache + pages = list(wiki_cache.get("pages", [])) + search_backend = str(wiki_cache.get("search_backend") or "token-index") + read_warning_count = int(wiki_cache.get("read_warning_count") or 0) + if read_warning_count: + warnings.append({ + "code": "cache_read_warnings", + "message": f"{read_warning_count} wiki page(s) could not be read; search and page counts may be incomplete.", + "detail": str((wiki_cache.get("read_warnings") or [])[:5]), + }) + except Exception as exc: + pages = [] + warnings.append(_warning( + "cache_unavailable", + "Could not build the wiki page cache; page counts and search backend may be incomplete.", + exc, + )) + finally: + if owns_cache and isinstance(wiki_cache, dict): + close_wiki_cache(wiki_cache) + try: + record_list = list(records if records is not None else memory_records(wiki_dir)) + except Exception as exc: + record_list = [] + warnings.append(_warning( + "memory_records_unavailable", + "Could not read memory records; memory counts may be incomplete.", + exc, + )) + + validation_summary: dict[str, object] = {"checked": False} + validation_findings: list[Mapping[str, str]] = [] + if include_validation and wiki_dir.exists(): + validation = validate_wiki(wiki_dir) + validation_findings = list(validation.get("findings") or []) + validation_error_codes = sorted({ + str(finding.get("code") or "") + for finding in validation_findings + if str(finding.get("severity") or "") == "error" + }) + validation_warning_codes = sorted({ + str(finding.get("code") or "") + for finding in validation_findings + if str(finding.get("severity") or "") == "warning" + }) + validation_summary = { + "checked": True, + "passed": validation["passed"], + "error_count": validation["error_count"], + "warning_count": validation["warning_count"], + "finding_count": validation["finding_count"], + "error_codes": validation_error_codes, + "warning_codes": validation_warning_codes, + } + + active_memory_count = sum(1 for record in record_list if str(record.get("status") or "active").lower() == "active") + needs_review_count = sum( + 1 + for record in record_list + if str(record.get("review_status") or "pending").lower() != "reviewed" + and str(record.get("status") or "active").lower() == "active" + ) + content_page_count = sum( + 1 + for page in pages + if str(page.get("path") or "") not in {"wiki/index.md", "wiki/log.md"} + ) + cache_degraded = any(warning.get("code") in {"cache_unavailable", "cache_read_warnings"} for warning in warnings) + ready = not missing and bool(pages) and not cache_degraded and ( + not include_validation or bool(validation_summary.get("passed")) + ) + schema = schema_status(wiki_dir) + + next_actions: list[dict[str, object]] = [] + if missing: + next_actions.append(_action("repair or scaffold Link structure", "doctor", {"fix": True})) + if schema.get("status") in {"missing", "old"}: + next_actions.append(_action("write current Link wiki schema marker", "migrate_wiki")) + elif schema.get("status") == "invalid": + next_actions.append(_action("inspect invalid Link wiki schema marker", "doctor")) + elif schema.get("status") == "newer": + next_actions.append(_action("upgrade Link before writing this wiki", "upgrade_link")) + if include_validation and validation_summary.get("checked") and not validation_summary.get("passed"): + error_codes = set(validation_summary.get("error_codes") or []) + if error_codes & {"stale_backlinks", "invalid_backlinks"}: + next_actions.append(_action("rebuild graph index", "rebuild_backlinks")) + if error_codes - {"stale_backlinks", "invalid_backlinks"}: + next_actions.append(_action("repair validation findings", "doctor", {"fix": True})) + next_actions.append(_action("rerun validation gate", "validate_wiki")) + if ready and content_page_count: + next_actions.append(_action("answer with compact local context", "query_link", {"query": ""})) + next_actions.append(_action("prime agent memory before work", "memory_brief", {"query": ""})) + elif ready: + next_actions.append(_action("add raw sources or inspect ingest readiness", "ingest_status")) + next_actions.append(_action("show first-run prompts", "starter_prompts")) + elif not missing: + next_actions.append(_action("inspect wiki health", "validate_wiki")) + + return { + "ready": ready, + "version": version, + "wiki": str(wiki_dir), + "missing": missing, + "page_count": len(pages), + "content_page_count": content_page_count, + "memory_count": len(record_list), + "active_memory_count": active_memory_count, + "needs_review_count": needs_review_count, + "search_backend": search_backend, + "schema": schema, + "validation": validation_summary, + "warnings": warnings, + "next_actions": next_actions, + } diff --git a/mcp_package/link_core/validation.py b/mcp_package/link_core/validation.py new file mode 100644 index 0000000..bcf9b04 --- /dev/null +++ b/mcp_package/link_core/validation.py @@ -0,0 +1,200 @@ +"""Wiki validation helpers for Link ingest gates.""" +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +from .frontmatter import parse_frontmatter +from .wiki import WIKILINK_RE, load_backlinks_index + + +TYPE_DIRECTORIES = { + "sources": "source", + "concepts": "concept", + "entities": "entity", + "memories": "memory", + "comparisons": "comparison", + "explorations": "exploration", +} + +REQUIRED_FIELDS = { + "source": ("type", "title"), + "concept": ("type", "title"), + "entity": ("type", "title"), + "memory": ("type", "title", "memory_type", "scope", "status", "source", "review_status"), + "comparison": ("type", "title"), + "exploration": ("type", "title"), +} + +REQUIRED_SECTIONS = { + "source": ("Summary", "Raw Source"), + "concept": ("Overview", "Sources"), + "entity": ("Overview", "Sources"), + "memory": ("Memory", "Source"), + "comparison": ("Sources",), + "exploration": ("Answer", "Sources"), +} + +SUMMARY_RE = re.compile(r">\s*\*\*(?:TLDR|Query):\*\*", re.IGNORECASE) + + +def _finding(severity: str, code: str, path: str, message: str) -> dict[str, str]: + return { + "severity": severity, + "code": code, + "path": path, + "message": message, + } + + +def _section_names(body: str) -> set[str]: + return { + match.group(1).strip().lower() + for match in re.finditer(r"^##\s+(.+?)\s*$", body, flags=re.MULTILINE) + } + + +def _has_section(body: str, required: str) -> bool: + names = _section_names(body) + required_lower = required.lower() + if required_lower in names: + return True + if required_lower == "sources": + return "sources consulted" in names + return False + + +def _markdown_pages(wiki_dir: Path) -> list[Path]: + if not wiki_dir.exists(): + return [] + return sorted(path for path in wiki_dir.rglob("*.md") if not path.name.startswith(".")) + + +def _normalize_links(index: dict[str, dict[str, list[str]]]) -> dict[str, dict[str, list[str]]]: + return { + "backlinks": { + str(key): sorted(str(item) for item in value) + for key, value in index.get("backlinks", {}).items() + }, + "forward": { + str(key): sorted(str(item) for item in value) + for key, value in index.get("forward", {}).items() + }, + } + + +def _add_links_to_index( + source: str, + text: str, + backlinks: dict[str, list[str]], + forward_links: dict[str, list[str]], +) -> None: + for match in WIKILINK_RE.finditer(text): + target = match.group(1).strip().lower() + if not target or target == source: + continue + backlinks.setdefault(target, []) + if source not in backlinks[target]: + backlinks[target].append(source) + forward_links.setdefault(source, []) + if target not in forward_links[source]: + forward_links[source].append(target) + + +def validate_wiki(wiki_dir: Path, *, strict: bool = False) -> dict[str, Any]: + """Validate Link wiki structure after agent writes or ingest.""" + wiki_dir = wiki_dir.expanduser().resolve() + findings: list[dict[str, str]] = [] + required_paths = [ + wiki_dir, + wiki_dir / "index.md", + wiki_dir / "log.md", + wiki_dir / "_backlinks.json", + *(wiki_dir / dirname for dirname in TYPE_DIRECTORIES), + ] + for path in required_paths: + if not path.exists(): + rel = path.name if path == wiki_dir else path.relative_to(wiki_dir).as_posix() + findings.append(_finding("error", "missing_required_path", rel, f"Missing required path: {rel}")) + + pages = _markdown_pages(wiki_dir) + stems = {path.stem.lower() for path in pages} + unreadable_pages: set[str] = set() + expected_backlinks: dict[str, dict[str, list[str]]] = {"backlinks": {}, "forward": {}} + for page in pages: + rel = page.relative_to(wiki_dir).as_posix() + try: + text = page.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + unreadable_pages.add(rel) + findings.append(_finding("error", "unreadable_page", rel, f"Could not read wiki page: {exc}")) + continue + _add_links_to_index( + page.stem.lower(), + text, + expected_backlinks["backlinks"], + expected_backlinks["forward"], + ) + if rel in {"index.md", "log.md"}: + continue + meta, body = parse_frontmatter(text) + top_dir = rel.split("/", 1)[0] + expected_type = TYPE_DIRECTORIES.get(top_dir) + page_type = str(meta.get("type") or "").strip() + + if not text.startswith("---\n"): + findings.append(_finding("error", "missing_frontmatter", rel, "Page must start with YAML-style frontmatter.")) + if expected_type is None: + findings.append(_finding("error", "invalid_directory", rel, "Wiki page is outside a known Link category directory.")) + continue + if page_type != expected_type: + findings.append( + _finding( + "error", + "type_directory_mismatch", + rel, + f"Page in {top_dir}/ must declare type: {expected_type}.", + ) + ) + + required_fields = REQUIRED_FIELDS.get(expected_type, ("type", "title")) + for field in required_fields: + if not str(meta.get(field) or "").strip(): + findings.append(_finding("error", "missing_frontmatter_field", rel, f"Missing required frontmatter field: {field}")) + + if not SUMMARY_RE.search(body): + findings.append(_finding("warning", "missing_summary", rel, "Page should include a TLDR or Query summary.")) + + for section in REQUIRED_SECTIONS.get(expected_type, ()): + if not _has_section(body, section): + findings.append(_finding("error", "missing_required_section", rel, f"Missing required section: ## {section}")) + + for match in WIKILINK_RE.finditer(body): + target = match.group(1).strip().lower() + if target and target not in stems: + findings.append(_finding("error", "dead_wikilink", rel, f"Dead wikilink: [[{target}]]")) + + backlinks, backlink_error = load_backlinks_index( + wiki_dir / "_backlinks.json", + missing_error="missing wiki/_backlinks.json", + invalid_prefix="invalid wiki/_backlinks.json", + ) + if backlink_error: + findings.append(_finding("error", "invalid_backlinks", "_backlinks.json", backlink_error)) + elif not unreadable_pages: + if _normalize_links(backlinks) != _normalize_links(expected_backlinks): + findings.append(_finding("error", "stale_backlinks", "_backlinks.json", "Backlink index is stale; run rebuild-backlinks.")) + + error_count = sum(1 for finding in findings if finding["severity"] == "error") + warning_count = sum(1 for finding in findings if finding["severity"] == "warning") + passed = error_count == 0 and (warning_count == 0 if strict else True) + return { + "wiki": str(wiki_dir), + "strict": strict, + "passed": passed, + "error_count": error_count, + "warning_count": warning_count, + "finding_count": len(findings), + "findings": findings, + } diff --git a/mcp_package/link_core/version.py b/mcp_package/link_core/version.py new file mode 100644 index 0000000..181227b --- /dev/null +++ b/mcp_package/link_core/version.py @@ -0,0 +1,4 @@ +"""Shared Link release version.""" +from __future__ import annotations + +LINK_VERSION = "1.1.0" diff --git a/mcp_package/link_core/web_assets.py b/mcp_package/link_core/web_assets.py new file mode 100644 index 0000000..0b293bf --- /dev/null +++ b/mcp_package/link_core/web_assets.py @@ -0,0 +1,912 @@ +"""Static CSS and JavaScript assets for the local Link web UI.""" +from __future__ import annotations + +__all__ = [ + "CSS", + "THEME_INIT_JS", + "THEME_CONTROL_JS", + "MEMORY_ACTION_JS", + "COPY_BUTTON_JS", + "RAW_SOURCE_JS", + "PROPOSAL_UI_JS", +] + +CSS = """ +* { box-sizing: border-box; margin: 0; padding: 0; } +:root { + color-scheme: light; + --bg: #ffffff; + --text: #222222; + --text-strong: #222222; + --muted: #666666; + --subtle: #888888; + --faint: #aaaaaa; + --link: #0645ad; + --border: #d0d7de; + --border-soft: #eeeeee; + --surface: #ffffff; + --surface-muted: #f6f8fa; + --surface-code: #f6f6f6; + --surface-code-inline: #f0f0f0; + --surface-table: #f8f8f8; + --surface-graph: #101418; + --surface-empty: #fafafa; + --mark-bg: #fff3cd; + --button-bg: #ffffff; + --button-hover: #f6f8fa; + --button-text: #24292f; + --button-disabled: #8c959f; + --accent: #0969da; + --accent-soft: #6ea8fe; + --quote-border: #cccccc; + --quote-text: #555555; + --shadow: rgba(0,0,0,0.15); +} +@media (prefers-color-scheme: dark) { + :root:not([data-theme="light"]) { + color-scheme: dark; + --bg: #000000; + --text: #e7e7e7; + --text-strong: #f2f2f2; + --muted: #b7b7b7; + --subtle: #8e8e8e; + --faint: #777777; + --link: #7db7ff; + --border: #2a2a2a; + --border-soft: #1f1f1f; + --surface: #080808; + --surface-muted: #101010; + --surface-code: #0d0d0d; + --surface-code-inline: #151515; + --surface-table: #0d0d0d; + --surface-graph: #05090d; + --surface-empty: #080808; + --mark-bg: #3b2f00; + --button-bg: #0f0f0f; + --button-hover: #171717; + --button-text: #e7e7e7; + --button-disabled: #777777; + --accent: #4ea1ff; + --accent-soft: #7db7ff; + --quote-border: #333333; + --quote-text: #c7c7c7; + --shadow: rgba(0,0,0,0.55); + } +} +:root[data-theme="dark"] { + color-scheme: dark; + --bg: #000000; + --text: #e7e7e7; + --text-strong: #f2f2f2; + --muted: #b7b7b7; + --subtle: #8e8e8e; + --faint: #777777; + --link: #7db7ff; + --border: #2a2a2a; + --border-soft: #1f1f1f; + --surface: #080808; + --surface-muted: #101010; + --surface-code: #0d0d0d; + --surface-code-inline: #151515; + --surface-table: #0d0d0d; + --surface-graph: #05090d; + --surface-empty: #080808; + --mark-bg: #3b2f00; + --button-bg: #0f0f0f; + --button-hover: #171717; + --button-text: #e7e7e7; + --button-disabled: #777777; + --accent: #4ea1ff; + --accent-soft: #7db7ff; + --quote-border: #333333; + --quote-text: #c7c7c7; + --shadow: rgba(0,0,0,0.55); +} +:root[data-theme="light"] { color-scheme: light; } +html { overflow-x: hidden; background: var(--bg); } +body { font-family: Georgia, "Times New Roman", serif; background: var(--bg); color: var(--text); + width: 100%; max-width: 760px; margin: 0 auto; padding: 20px; + overflow-x: hidden; overflow-wrap: anywhere; } +body.graph-page { max-width: min(1440px, 100%); } +a { color: var(--link); } +a, p, li, code { overflow-wrap: anywhere; } +a:hover { text-decoration: underline; } + +header { border-bottom: 1px solid var(--border); padding-bottom: 12px; margin-bottom: 24px; } +header .header-top { display: flex; align-items: flex-start; justify-content: space-between; gap: 16px; margin-bottom: 12px; } +header .logo { font-size: 24px; font-weight: bold; letter-spacing: 0; white-space: nowrap; flex: 0 0 auto; } +header .logo a { color: var(--text-strong); text-decoration: none; display: inline-flex; align-items: center; gap: 8px; } +header .logo img { width: 28px; height: 28px; border-radius: 7px; flex: none; } +header .logo small { font-weight: normal; font-size: 13px; color: var(--subtle); margin-left: 8px; } +header nav { display: flex; gap: 10px 16px; font-size: 14px; font-family: sans-serif; flex-wrap: wrap; min-width: 0; align-items: center; } +header .header-tools { display: grid; justify-items: end; gap: 7px; flex: 0 0 220px; min-width: 170px; max-width: 42vw; } +header form { display: block; width: 100%; } +header input { padding: 4px 8px; border: 1px solid var(--border); border-radius: 4px; font-size: 13px; width: 100%; background: var(--surface); color: var(--text); } +header .theme-toggle { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 999px; padding: 3px 8px; font: 12px -apple-system, BlinkMacSystemFont, sans-serif; + cursor: pointer; display: inline-flex; align-items: center; gap: 6px; max-width: 100%; } +header .theme-toggle:hover { background: var(--button-hover); } +header .theme-icon { width: 14px; height: 14px; border-radius: 50%; border: 1px solid currentColor; + background: linear-gradient(90deg, currentColor 0 50%, transparent 50% 100%); flex: none; } +header .theme-text { white-space: nowrap; } + +.breadcrumb { font-size: 13px; color: var(--subtle); margin-bottom: 12px; font-family: sans-serif; } +.breadcrumb a { color: var(--link); } + +.meta { font-size: 13px; color: var(--muted); margin-bottom: 16px; font-family: sans-serif; } +.meta .badge { background: var(--surface-muted); padding: 1px 8px; border-radius: 3px; font-size: 12px; } + +h1 { font-size: 26px; margin-bottom: 4px; line-height: 1.3; } +h2 { font-size: 20px; margin-top: 28px; margin-bottom: 10px; border-bottom: 1px solid var(--border-soft); padding-bottom: 4px; } +h3 { font-size: 17px; margin-top: 20px; margin-bottom: 8px; } +p { line-height: 1.7; margin-bottom: 12px; } +ul, ol { margin: 8px 0 12px 28px; line-height: 1.7; } +li { margin-bottom: 3px; } +blockquote { border-left: 3px solid var(--quote-border); padding: 6px 16px; margin: 12px 0; color: var(--quote-text); } +pre { background: var(--surface-code); padding: 14px; border-radius: 4px; overflow-x: auto; margin: 12px 0; + font-size: 13px; font-family: Menlo, monospace; } +code { font-family: Menlo, monospace; font-size: 0.9em; } +p code { background: var(--surface-code-inline); padding: 1px 5px; border-radius: 3px; } +table { border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 15px; } +th, td { border: 1px solid var(--border); padding: 7px 12px; text-align: left; } +th { background: var(--surface-table); } +hr { border: none; border-top: 1px solid var(--border); margin: 24px 0; } + +.home-stats { display: flex; gap: 24px; margin: 20px 0; font-family: sans-serif; font-size: 14px; } +.home-stats .stat { text-align: center; } +.home-stats .stat .num { font-size: 28px; font-weight: bold; color: var(--accent-soft); display: block; } +.home-stats .stat .label { color: var(--subtle); font-size: 12px; } +.product-lanes { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 12px; margin: 18px 0 22px; } +.product-lane { border: 1px solid var(--border-soft); border-radius: 6px; background: var(--surface); padding: 12px; font-family: sans-serif; } +.product-lane h2 { border: 0; margin: 0 0 8px; padding: 0; font-size: 15px; font-family: sans-serif; } +.product-lane p { margin: 0; color: var(--muted); line-height: 1.45; font-size: 13px; } +.product-lane code { white-space: normal; overflow-wrap: anywhere; } + +.page-list { list-style: none; padding: 0; margin: 12px 0; } +.page-list li { padding: 6px 0; border-bottom: 1px solid var(--border-soft); } +.page-list li:last-child { border-bottom: none; } +.page-list .type { font-size: 11px; color: var(--subtle); font-family: sans-serif; margin-left: 6px; } +.pager { display: flex; align-items: center; flex-wrap: wrap; gap: 8px; margin: 12px 0; font-family: sans-serif; color: var(--muted); font-size: 13px; } +.pager .button-link { border: 1px solid var(--border); border-radius: 4px; padding: 4px 9px; color: var(--button-text); background: var(--button-bg); text-decoration: none; } +.pager .button-link:hover { background: var(--button-hover); } +.pager .button-link.disabled { color: var(--button-disabled); background: transparent; cursor: default; } +.section-heading { display: flex; justify-content: space-between; align-items: baseline; gap: 12px; + margin-top: 28px; border-bottom: 1px solid var(--border-soft); } +.section-heading h2 { margin: 0; border: 0; padding-bottom: 4px; } +.section-heading a { font-size: 13px; font-family: sans-serif; font-weight: normal; } +.memory-profile { margin: 18px 0; } +.memory-profile .summary { color: var(--muted); font-family: sans-serif; margin-bottom: 16px; } +.memory-profile .memory-meta { color: var(--subtle); font-size: 12px; font-family: sans-serif; } +.brief-form { display: flex; gap: 8px; flex-wrap: wrap; margin: 14px 0; font-family: sans-serif; } +.brief-form input { flex: 1 1 220px; min-width: 0; padding: 6px 8px; border: 1px solid var(--border); + border-radius: 4px; background: var(--surface); color: var(--text); } +.brief-form button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 6px 10px; cursor: pointer; } +.brief-form button:hover { background: var(--button-hover); } +.raw-source-form { display: grid; gap: 10px; margin: 16px 0; padding: 12px; + border: 1px solid var(--border-soft); border-radius: 6px; background: var(--surface); font-family: sans-serif; } +.raw-source-form label { display: grid; gap: 4px; color: var(--muted); font-size: 12px; } +.raw-source-form input, +.raw-source-form textarea { width: 100%; min-width: 0; padding: 8px 9px; border: 1px solid var(--border); + border-radius: 4px; background: var(--bg); color: var(--text); font: inherit; } +.raw-source-form textarea { min-height: 150px; resize: vertical; line-height: 1.45; } +.raw-source-controls { display: grid; grid-template-columns: minmax(0, 1fr) minmax(0, 1fr); gap: 8px; } +.raw-source-actions { display: flex; flex-wrap: wrap; gap: 8px; align-items: center; } +.raw-source-actions button, +.raw-source-status button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 7px 10px; cursor: pointer; font: inherit; } +.raw-source-actions button:hover, +.raw-source-status button:hover { background: var(--button-hover); } +.raw-source-status { min-height: 1.4em; color: var(--muted); font-family: sans-serif; font-size: 13px; line-height: 1.45; } +.raw-source-status code { display: inline-block; margin: 4px 6px 4px 0; padding: 4px 6px; background: var(--surface-code); border-radius: 4px; } +.proposal-form { display: grid; gap: 10px; margin: 16px 0; font-family: sans-serif; } +.proposal-form textarea, +.proposal-form input { width: 100%; min-width: 0; padding: 8px 9px; border: 1px solid var(--border); + border-radius: 4px; background: var(--surface); color: var(--text); font: inherit; } +.proposal-form textarea { min-height: 190px; resize: vertical; line-height: 1.45; } +.proposal-controls { display: grid; grid-template-columns: minmax(0, 1.4fr) minmax(0, 1fr) 92px auto; gap: 8px; align-items: end; } +.proposal-form label { display: grid; gap: 4px; color: var(--muted); font-size: 12px; } +.proposal-form button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 8px 10px; cursor: pointer; font: inherit; } +.proposal-form button:hover { background: var(--button-hover); } +.proposal-source-list { display: grid; gap: 10px; margin: 16px 0; } +.proposal-source-card { border: 1px solid var(--border-soft); border-radius: 6px; padding: 10px; + background: var(--surface); min-width: 0; display: grid; gap: 6px; } +.proposal-source-card strong { overflow-wrap: anywhere; } +.proposal-source-card button { justify-self: start; border: 1px solid var(--border); background: var(--button-bg); + color: var(--button-text); border-radius: 4px; padding: 6px 9px; cursor: pointer; } +.proposal-source-card button:disabled { color: var(--button-disabled); cursor: default; } +.proposal-status { min-height: 1.4em; color: var(--muted); font-family: sans-serif; } +.proposal-results { display: grid; gap: 12px; margin-top: 14px; } +.proposal-card { border: 1px solid var(--border-soft); border-radius: 6px; padding: 12px; background: var(--surface); min-width: 0; } +.proposal-card h3 { margin-top: 0; font-size: 16px; } +.proposal-checklist { display: grid; gap: 5px; margin: 10px 0; padding: 9px 10px; + border: 1px solid var(--border-soft); border-radius: 6px; background: var(--surface-soft); + color: var(--muted); font-family: sans-serif; font-size: 13px; line-height: 1.4; } +.proposal-checklist strong { color: var(--text); } +.proposal-warning { color: #8a6d3b; font-family: sans-serif; font-size: 13px; line-height: 1.45; } +.proposal-command { display: block; margin-top: 10px; padding: 8px; background: var(--surface-code); + border-radius: 4px; white-space: normal; overflow-wrap: anywhere; } +.proposal-actions { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 10px; font-family: sans-serif; } +.proposal-actions button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 5px 8px; cursor: pointer; font: inherit; } +.proposal-actions button:hover { background: var(--button-hover); } +.proposal-actions button:disabled { color: var(--button-disabled); cursor: default; } +.memory-issues { margin-top: 6px; } +.memory-issues li { border: none; padding: 0; color: var(--muted); font-size: 13px; } +.memory-issues .severity { font-family: sans-serif; font-size: 11px; text-transform: uppercase; color: #8a6d3b; } +.memory-dashboard { margin: 18px 0; } +.memory-dashboard .section-heading { display: flex; justify-content: space-between; align-items: baseline; gap: 12px; } +.memory-dashboard .section-heading a { font-size: 13px; font-family: sans-serif; font-weight: normal; } +.memory-next { border-left: 3px solid var(--accent); padding: 10px 12px; margin: 12px 0 16px; background: var(--surface-muted); font-family: sans-serif; min-width: 0; } +.memory-next ul { margin: 8px 0 0; padding-left: 18px; } +.memory-next li { margin: 4px 0; } +.memory-next code { white-space: normal; overflow-wrap: anywhere; } +.memory-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 12px; margin: 12px 0; } +.memory-card { border: 1px solid var(--border-soft); border-radius: 6px; padding: 12px; min-width: 0; background: var(--surface); } +.memory-card h3 { margin-top: 0; font-size: 16px; } +.memory-card .summary { color: var(--muted); font-family: sans-serif; font-size: 13px; line-height: 1.5; margin: 8px 0; } +.memory-card .memory-meta { color: var(--subtle); font-size: 12px; font-family: sans-serif; } +.memory-actions { margin-top: 10px; display: grid; gap: 6px; } +.memory-actions div { font-size: 12px; font-family: sans-serif; } +.memory-actions code { display: block; margin-top: 2px; white-space: normal; overflow-wrap: anywhere; } +.memory-action-row { display: grid; gap: 4px; } +.memory-action-head { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; } +.memory-actions button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 4px 8px; cursor: pointer; font: inherit; } +.memory-actions button:hover { background: var(--button-hover); } +.memory-actions button:disabled { color: var(--button-disabled); cursor: default; } +.memory-action-result { color: var(--muted); min-height: 1em; } +.copy-button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 4px 8px; cursor: pointer; font: 12px -apple-system, BlinkMacSystemFont, sans-serif; + margin-left: 8px; vertical-align: middle; } +.copy-button:hover { background: var(--button-hover); } +.copy-button:disabled { color: var(--button-disabled); cursor: default; } +.ingest-path { display: grid; grid-template-columns: repeat(auto-fit, minmax(190px, 1fr)); gap: 10px; margin: 14px 0 18px; } +.ingest-step { border: 1px solid var(--border-soft); border-radius: 4px; background: var(--surface); padding: 12px; font-family: sans-serif; min-width: 0; } +.ingest-step .step-num { display: inline-flex; align-items: center; justify-content: center; width: 22px; height: 22px; border-radius: 50%; background: var(--accent); color: #fff; font-size: 12px; font-weight: 700; } +.ingest-step h3 { margin: 8px 0 5px; font-size: 15px; } +.ingest-step p { margin: 0 0 8px; color: var(--muted); line-height: 1.4; } +.ingest-step code { white-space: normal; overflow-wrap: anywhere; } +.ingest-completion-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 12px; margin: 14px 0; } +.ingest-completion-card { border: 1px solid var(--border-soft); border-radius: 6px; background: var(--surface); padding: 12px; min-width: 0; font-family: sans-serif; } +.ingest-completion-card h3 { margin: 0 0 8px; font-size: 16px; overflow-wrap: anywhere; } +.ingest-completion-card p { margin: 6px 0; color: var(--muted); line-height: 1.45; font-size: 13px; } +.ingest-completion-card code { white-space: normal; overflow-wrap: anywhere; } +.ingest-completion-pages { display: flex; gap: 6px; flex-wrap: wrap; margin: 8px 0; } +.ingest-completion-pages a { border: 1px solid var(--border-soft); border-radius: 999px; padding: 3px 8px; background: var(--surface-soft); font-size: 12px; } +.ingest-completion-actions { display: flex; gap: 8px; flex-wrap: wrap; align-items: center; margin-top: 10px; } +.ingest-completion-actions a { font-size: 13px; } +.trust-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 12px; margin: 16px 0; } +.trust-grid div { border: 1px solid var(--border-soft); border-radius: 4px; padding: 10px; font-family: sans-serif; background: var(--surface); } +.trust-grid strong { display: block; font-size: 12px; color: var(--subtle); margin-bottom: 4px; } +.prompt-strip { margin: 16px 0; padding: 12px; border: 1px solid var(--border-soft); border-radius: 4px; background: var(--surface-muted); } +.prompt-strip h2 { margin-top: 0; font-size: 17px; } +.prompt-strip p { color: var(--muted); margin-bottom: 10px; } +.prompt-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: 8px; } +.prompt-grid code { display: block; padding: 8px; background: var(--surface-code); border-radius: 4px; white-space: normal; } +.log-entry { white-space: pre-wrap; font-size: 12px; } + +mark { background: var(--mark-bg); color: inherit; border-radius: 2px; padding: 0 1px; } + +#graph-canvas { width: 100%; height: min(74vh, 860px); min-height: 560px; + border: 1px solid var(--border); border-radius: 4px; background: var(--surface-graph); + cursor: grab; display: block; margin: 0; } +#graph-canvas:active { cursor: grabbing; } +#graph-canvas:focus { outline: 2px solid var(--accent-soft); outline-offset: 2px; } +.graph-frame { margin: 12px 0; } +.graph-frame.is-fullscreen { position: fixed; inset: 0; z-index: 200; background: var(--bg); padding: 18px; + display: flex; flex-direction: column; overflow: hidden; } +.graph-frame.is-fullscreen .graph-shell { flex: 1; min-height: 0; } +.graph-frame.is-fullscreen #graph-canvas { height: 100%; min-height: 0; } +.graph-frame.is-fullscreen .graph-inspector { max-height: 100%; overflow: auto; } +.graph-shell { display: grid; grid-template-columns: minmax(0, 1fr) 320px; gap: 12px; align-items: stretch; margin: 12px 0; } +.graph-toolbar { display: flex; flex-wrap: wrap; align-items: center; gap: 8px; + margin: 12px 0 8px; font: 13px -apple-system, BlinkMacSystemFont, sans-serif; } +.graph-toolbar button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); + border-radius: 4px; padding: 5px 9px; cursor: pointer; } +.graph-toolbar button:hover { background: var(--button-hover); } +.graph-toolbar button[aria-pressed="true"] { background: var(--accent); border-color: var(--accent); color: #fff; } +.graph-control { display: grid; gap: 3px; color: var(--muted); font-size: 11px; } +.graph-control input, +.graph-control select { border: 1px solid var(--border); background: var(--surface); color: var(--text); + border-radius: 4px; padding: 5px 8px; font: 13px -apple-system, BlinkMacSystemFont, sans-serif; } +.graph-control input { width: 180px; } +.graph-control select:disabled { color: var(--button-disabled); cursor: not-allowed; opacity: 0.65; } +.graph-status { color: var(--muted); margin-left: auto; } +.graph-inspector { border: 1px solid var(--border-soft); border-radius: 4px; padding: 12px; font: 13px -apple-system, BlinkMacSystemFont, sans-serif; color: var(--muted); background: var(--surface); } +.graph-inspector strong { display: block; color: var(--text-strong); font-size: 15px; margin-bottom: 6px; overflow-wrap: anywhere; } +.graph-inspector p { margin: 0 0 10px; line-height: 1.4; } +.graph-inspector-links { display: grid; gap: 5px; margin: 10px 0; max-height: 180px; overflow: auto; } +.graph-inspector-links a { overflow-wrap: anywhere; } +.graph-inspector button { border: 1px solid var(--border); background: var(--button-bg); color: var(--button-text); border-radius: 4px; padding: 6px 9px; cursor: pointer; } +.graph-inspector button:disabled { color: var(--button-disabled); cursor: default; } +.graph-tooltip { position: fixed; background: var(--surface); border: 1px solid var(--border); border-radius: 4px; + padding: 6px 10px; font-size: 13px; pointer-events: none; display: none; + box-shadow: 0 2px 8px var(--shadow); z-index: 100; } +.graph-legend { font-size: 12px; color: var(--subtle); font-family: sans-serif; margin-top: 8px; } +.graph-legend span { display: inline-block; width: 10px; height: 10px; border-radius: 50%; + margin-right: 4px; vertical-align: middle; } +.graph-empty { border: 1px solid var(--border-soft); border-radius: 4px; padding: 28px; background: var(--surface-empty); + color: var(--muted); font-family: sans-serif; margin: 12px 0; } + +footer { margin-top: 40px; padding-top: 12px; border-top: 1px solid var(--border-soft); + font-size: 12px; color: var(--faint); font-family: sans-serif; } +@media (max-width: 760px) { + body { padding: 20px; } + header .header-top { align-items: flex-start; } + header nav { gap: 10px 14px; } + header .header-tools { justify-items: end; } + header .theme-toggle { justify-self: end; } + .home-stats { flex-wrap: wrap; gap: 14px 22px; } + .product-lanes { grid-template-columns: minmax(0, 1fr); } + .memory-grid { grid-template-columns: minmax(0, 1fr); } + .proposal-controls { grid-template-columns: minmax(0, 1fr); } + .raw-source-controls { grid-template-columns: minmax(0, 1fr); } + .section-heading, + .memory-dashboard .section-heading { flex-wrap: wrap; } + .memory-actions code, .memory-next code { word-break: break-word; } + .graph-shell { grid-template-columns: 1fr; } + #graph-canvas { min-height: 460px; } + .graph-frame.is-fullscreen { padding: 12px; } +} +@media (max-width: 560px) { + header .header-top { flex-wrap: wrap; } + header .header-tools { flex-basis: 100%; max-width: none; justify-items: stretch; } + header .theme-toggle { justify-self: end; } +} +""" + +THEME_INIT_JS = """ +(function() { + try { + var theme = localStorage.getItem('link-theme') || 'system'; + if (theme === 'dark' || theme === 'light') { + document.documentElement.dataset.theme = theme; + } + } catch (err) {} +})(); +""" + +THEME_CONTROL_JS = """ +(function() { + var modes = ['system', 'dark', 'light']; + var button = document.querySelector('[data-theme-toggle]'); + var media = window.matchMedia ? window.matchMedia('(prefers-color-scheme: dark)') : null; + + function systemTheme() { + return media && media.matches ? 'dark' : 'light'; + } + + function storedTheme() { + try { + return localStorage.getItem('link-theme') || 'system'; + } catch (err) { + return 'system'; + } + } + + function saveTheme(theme) { + try { + localStorage.setItem('link-theme', theme); + } catch (err) {} + } + + function applyTheme(theme) { + if (theme === 'dark' || theme === 'light') { + document.documentElement.dataset.theme = theme; + } else { + delete document.documentElement.dataset.theme; + } + if (!button) return; + var active = theme === 'system' ? systemTheme() : theme; + var text = button.querySelector('[data-theme-text]'); + if (text) { + text.textContent = theme; + } else { + button.textContent = theme; + } + button.title = 'Theme: ' + theme + ' (' + active + ')'; + button.setAttribute('aria-label', 'Theme: ' + theme + ' (' + active + '). Click to switch.'); + } + + applyTheme(storedTheme()); + + if (button) { + button.addEventListener('click', function() { + var current = storedTheme(); + var next = modes[(modes.indexOf(current) + 1) % modes.length] || 'system'; + saveTheme(next); + applyTheme(next); + }); + } + + if (media && media.addEventListener) { + media.addEventListener('change', function() { + if (storedTheme() === 'system') applyTheme('system'); + }); + } +})(); +""" + +MEMORY_ACTION_JS = """ +(function() { + var endpoints = { + review: '/api/review-memory', + archive: '/api/archive-memory', + restore: '/api/restore-memory' + }; + var buttons = Array.from(document.querySelectorAll('[data-memory-action]')); + if (!buttons.length) return; + + function resultFor(button) { + var row = button.closest('.memory-action-row') || button.parentElement; + var result = row ? row.querySelector('.memory-action-result') : null; + if (!result && row) { + result = document.createElement('span'); + result.className = 'memory-action-result'; + row.appendChild(result); + } + return result; + } + + buttons.forEach(function(button) { + button.addEventListener('click', async function() { + var action = button.getAttribute('data-memory-action') || ''; + var endpoint = endpoints[action]; + var memory = button.getAttribute('data-memory') || ''; + var result = resultFor(button); + if (!endpoint || !memory) return; + + var payload = {memory: memory}; + if (action === 'review' && !window.confirm('Mark this memory as reviewed?')) return; + if (action === 'archive') { + var reason = window.prompt('Archive reason', 'stale'); + if (reason === null) return; + payload.reason = reason; + } + if (action === 'restore' && !window.confirm('Restore this memory to active recall?')) return; + + button.disabled = true; + if (result) result.textContent = 'Updating...'; + try { + var response = await fetch(endpoint, { + method: 'POST', + headers: {'Content-Type': 'application/json', 'X-Link-Local-Action': 'true'}, + body: JSON.stringify(payload) + }); + var data = await response.json(); + if (!response.ok) { + throw new Error(data.error || 'memory action failed'); + } + if (result) result.textContent = 'Updated. Refreshing...'; + window.setTimeout(function() { window.location.reload(); }, 450); + } catch (err) { + if (result) result.textContent = err.message || 'memory action failed'; + button.disabled = false; + } + }); + }); +})(); +""" + +COPY_BUTTON_JS = """ +(function() { + var buttons = Array.from(document.querySelectorAll('[data-copy-text]')); + if (!buttons.length) return; + + async function copyText(text) { + if (navigator.clipboard && navigator.clipboard.writeText) { + await navigator.clipboard.writeText(text); + return; + } + var textarea = document.createElement('textarea'); + textarea.value = text; + textarea.setAttribute('readonly', 'true'); + textarea.style.position = 'fixed'; + textarea.style.left = '-9999px'; + document.body.appendChild(textarea); + textarea.select(); + document.execCommand('copy'); + document.body.removeChild(textarea); + } + + buttons.forEach(function(button) { + button.addEventListener('click', async function() { + var label = button.textContent || 'Copy'; + var text = button.getAttribute('data-copy-text') || ''; + if (!text) return; + button.disabled = true; + try { + await copyText(text); + button.textContent = 'Copied'; + } catch (err) { + button.textContent = 'Copy failed'; + } + window.setTimeout(function() { + button.textContent = label; + button.disabled = false; + }, 1200); + }); + }); +})(); +""" + +RAW_SOURCE_JS = """ +(function() { + var form = document.querySelector('[data-raw-source-form]'); + if (!form) return; + var statusEl = document.querySelector('[data-raw-source-status]'); + + function setStatus(text, tone) { + if (!statusEl) return; + statusEl.textContent = text || ''; + statusEl.dataset.tone = tone || ''; + } + + async function copyText(text) { + if (navigator.clipboard && navigator.clipboard.writeText) { + await navigator.clipboard.writeText(text); + return; + } + var textarea = document.createElement('textarea'); + textarea.value = text; + textarea.setAttribute('readonly', 'true'); + textarea.style.position = 'fixed'; + textarea.style.left = '-9999px'; + document.body.appendChild(textarea); + textarea.select(); + document.execCommand('copy'); + document.body.removeChild(textarea); + } + + function renderSaved(data) { + if (!statusEl) return; + statusEl.textContent = ''; + var saved = document.createElement('span'); + saved.textContent = 'Saved ' + (data.path || 'raw source') + '. Next: '; + statusEl.appendChild(saved); + var code = document.createElement('code'); + code.textContent = data.next_prompt || ''; + statusEl.appendChild(code); + var copy = document.createElement('button'); + copy.type = 'button'; + copy.textContent = 'Copy ingest prompt'; + copy.addEventListener('click', async function() { + var label = copy.textContent; + copy.disabled = true; + try { + await copyText(data.next_prompt || ''); + copy.textContent = 'Copied'; + } catch (err) { + copy.textContent = 'Copy failed'; + } + window.setTimeout(function() { + copy.textContent = label; + copy.disabled = false; + }, 1200); + }); + statusEl.appendChild(copy); + var refresh = document.createElement('a'); + refresh.href = '/ingest'; + refresh.textContent = ' refresh ingest status'; + statusEl.appendChild(refresh); + } + + form.addEventListener('submit', async function(event) { + event.preventDefault(); + var button = form.querySelector('button[type=\"submit\"]'); + if (button) button.disabled = true; + setStatus('Saving source locally...'); + try { + var payload = { + title: form.elements.title.value || '', + filename: form.elements.filename.value || '', + text: form.elements.text.value || '' + }; + var response = await fetch('/api/raw-source', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Link-Local-Action': 'true' + }, + body: JSON.stringify(payload) + }); + var data = await response.json(); + if (!response.ok) throw new Error(data.error || 'source save failed'); + form.reset(); + renderSaved(data); + } catch (error) { + setStatus(error.message || 'source save failed', 'error'); + } finally { + if (button) button.disabled = false; + } + }); +})(); +""" + +PROPOSAL_UI_JS = """ +(function() { + var form = document.querySelector('[data-proposal-form]'); + if (!form) return; + var statusEl = document.querySelector('[data-proposal-status]'); + var resultsEl = document.querySelector('[data-proposal-results]'); + var sourceListEl = document.querySelector('[data-proposal-sources]'); + + function setStatus(text) { + if (statusEl) statusEl.textContent = text || ''; + } + + function addText(parent, tag, className, text) { + var node = document.createElement(tag); + if (className) node.className = className; + node.textContent = text || ''; + parent.appendChild(node); + return node; + } + + function candidateNames(items) { + return (items || []).map(function(item) { + return item.name || item.title || ''; + }).filter(Boolean).join(', '); + } + + function renderSources(data) { + if (!sourceListEl) return; + sourceListEl.textContent = ''; + if (!data || !data.sources || !data.sources.length) { + addText(sourceListEl, 'p', 'summary', 'No local raw text sources found yet.'); + return; + } + data.sources.forEach(function(source) { + var card = document.createElement('article'); + card.className = 'proposal-source-card'; + addText(card, 'strong', '', source.title || source.path || 'raw source'); + addText(card, 'div', 'memory-meta', [ + source.path || '', + source.size ? source.size + ' bytes' : '', + source.warning_count ? source.warning_count + ' warning' + (source.warning_count === 1 ? '' : 's') : '' + ].filter(Boolean).join(' · ')); + if (source.snippet) addText(card, 'p', 'summary', source.snippet); + if (source.secret_warnings && source.secret_warnings.length) { + addText(card, 'p', 'proposal-warning', 'Secret-looking values: ' + source.secret_warnings.join(', ')); + } + if (source.error) { + addText(card, 'p', 'proposal-warning', 'Cannot load source: ' + source.error); + } + if (source.truncated) { + addText(card, 'p', 'proposal-warning', 'Large source: split or summarize it before loading into the proposal form.'); + } + var button = document.createElement('button'); + button.type = 'button'; + button.textContent = source.action_label || (source.loadable ? 'Use in form' : (source.warning_count ? 'Redact first' : 'Too large')); + button.disabled = !source.loadable; + button.setAttribute('data-proposal-source', source.path || ''); + card.appendChild(button); + sourceListEl.appendChild(card); + }); + } + + async function loadSource(path) { + setStatus('Loading ' + path + '...'); + try { + var response = await fetch('/api/proposal-source?path=' + encodeURIComponent(path)); + var data = await response.json(); + if (!response.ok) throw new Error(data.error || 'source load failed'); + form.elements.text.value = data.text || ''; + form.elements.source.value = data.source || path; + setStatus('Loaded ' + (data.path || path) + '. Nothing was written.'); + } catch (error) { + setStatus(error.message || 'source load failed'); + } + } + + function approvalPrompt(proposal) { + if (proposal.primary_action && proposal.primary_action.prompt) { + return proposal.primary_action.prompt; + } + var memory = proposal.memory || ''; + if (proposal.suggested_action === 'update-memory' && proposal.duplicate_candidates && proposal.duplicate_candidates.length) { + var target = proposal.duplicate_candidates[0].name || proposal.duplicate_candidates[0].title || ''; + return 'Approve by asking: update memory ' + target + ' with "' + memory + '"'; + } + return 'Approve by asking: remember that ' + memory; + } + + function addCopyButton(parent, label, text) { + if (!text) return; + var button = document.createElement('button'); + button.type = 'button'; + button.textContent = label; + button.addEventListener('click', async function() { + try { + await navigator.clipboard.writeText(text); + button.textContent = 'Copied'; + window.setTimeout(function() { button.textContent = label; }, 1200); + } catch (error) { + button.textContent = 'Select text above'; + window.setTimeout(function() { button.textContent = label; }, 1600); + } + }); + parent.appendChild(button); + } + + function firstCandidateName(items) { + if (!items || !items.length) return ''; + return items[0].name || items[0].title || ''; + } + + function approvalEndpoint(proposal) { + var action = proposal.primary_action || {}; + if (action.kind === 'remember' && !(proposal.conflict_candidates && proposal.conflict_candidates.length)) { + return '/api/remember-memory'; + } + if (action.kind === 'update' && firstCandidateName(proposal.duplicate_candidates)) { + return '/api/update-memory'; + } + return ''; + } + + function approvalPayload(proposal) { + var endpoint = approvalEndpoint(proposal); + if (endpoint === '/api/update-memory') { + return { + memory: firstCandidateName(proposal.duplicate_candidates), + text: proposal.memory || '', + source: proposal.source || 'web approval', + project: proposal.project || '' + }; + } + return { + memory: proposal.memory || '', + title: proposal.title || '', + memory_type: proposal.memory_type || 'note', + scope: proposal.scope || 'user', + source: proposal.source || 'web approval', + project: proposal.project || '' + }; + } + + function addApproveButton(parent, proposal) { + var endpoint = approvalEndpoint(proposal); + if (!endpoint) { + var blocked = document.createElement('button'); + blocked.type = 'button'; + blocked.disabled = true; + blocked.textContent = 'Manual review required'; + blocked.title = 'Copy the approval prompt and resolve duplicates or conflicts with your agent.'; + parent.appendChild(blocked); + return; + } + var button = document.createElement('button'); + button.type = 'button'; + button.textContent = endpoint === '/api/update-memory' ? 'Approve update' : 'Approve and save'; + button.title = 'Writes durable local memory only after this explicit approval.'; + button.addEventListener('click', async function() { + var message = endpoint === '/api/update-memory' + ? 'Update the existing memory with this proposal?' + : 'Save this proposal as durable local memory?'; + if (!window.confirm(message)) return; + button.disabled = true; + button.textContent = 'Saving...'; + try { + var response = await fetch(endpoint, { + method: 'POST', + headers: {'Content-Type': 'application/json', 'X-Link-Local-Action': 'true'}, + body: JSON.stringify(approvalPayload(proposal)) + }); + var data = await response.json(); + if (!response.ok) throw new Error(data.error || data.message || 'memory save failed'); + button.textContent = 'Saved'; + setStatus('Saved ' + (data.title || data.name || 'memory') + '. Review it in the memory inbox.'); + } catch (error) { + button.disabled = false; + button.textContent = endpoint === '/api/update-memory' ? 'Approve update' : 'Approve and save'; + setStatus(error.message || 'memory save failed'); + } + }); + parent.appendChild(button); + } + + function renderProposals(data) { + if (!resultsEl) return; + resultsEl.textContent = ''; + if (!data || data.error) { + addText(resultsEl, 'p', 'summary', data && data.error ? data.error : 'No response.'); + return; + } + if (!data.proposals || !data.proposals.length) { + addText(resultsEl, 'p', 'summary', 'No durable memory candidates found. Keep this as source-backed wiki knowledge unless there is a clear preference, decision, or project fact.'); + return; + } + data.proposals.forEach(function(proposal) { + var card = document.createElement('article'); + card.className = 'proposal-card'; + addText(card, 'h3', '', proposal.title || 'Memory proposal'); + addText(card, 'div', 'memory-meta', [ + proposal.memory_type || 'note', + proposal.scope || 'user', + proposal.confidence || 'unknown confidence', + proposal.suggested_action || 'remember' + ].filter(Boolean).join(' · ')); + addText(card, 'p', 'summary', proposal.memory || ''); + if (proposal.reason) addText(card, 'p', 'summary', proposal.reason); + var duplicates = candidateNames(proposal.duplicate_candidates); + if (duplicates) addText(card, 'p', 'proposal-warning', 'Possible duplicate: ' + duplicates); + var conflicts = candidateNames(proposal.conflict_candidates); + if (conflicts) addText(card, 'p', 'proposal-warning', 'Possible conflict: ' + conflicts); + var action = proposal.primary_action || {}; + if (action.label) addText(card, 'p', 'summary', action.label + ': ' + (action.description || '')); + addText(card, 'p', 'proposal-warning', 'Proposal-only: no durable memory has been written yet.'); + var checklist = document.createElement('div'); + checklist.className = 'proposal-checklist'; + addText(checklist, 'strong', '', 'Review gate'); + addText(checklist, 'span', '', 'Save only if this is a durable preference, decision, fact, or project context.'); + addText(checklist, 'span', '', 'Check scope, project, source label, duplicates, and conflicts before approval.'); + addText(checklist, 'span', '', conflicts ? 'Conflict found: use the approval prompt instead of direct save.' : 'Direct save still requires explicit approval.'); + card.appendChild(checklist); + var promptText = approvalPrompt(proposal); + var prompt = addText(card, 'code', 'proposal-command', promptText); + prompt.setAttribute('title', 'Copy this into your agent chat if you approve the memory.'); + if (action.command) { + var command = addText(card, 'code', 'proposal-command', action.command); + command.setAttribute('title', 'Equivalent local command.'); + } + var actions = document.createElement('div'); + actions.className = 'proposal-actions'; + addApproveButton(actions, proposal); + addCopyButton(actions, 'Copy approval prompt', promptText); + addCopyButton(actions, 'Copy CLI command', action.command || ''); + card.appendChild(actions); + resultsEl.appendChild(card); + }); + } + + form.addEventListener('submit', async function(event) { + event.preventDefault(); + var text = form.elements.text.value || ''; + if (!text.trim()) { + setStatus('Paste source or session notes first.'); + return; + } + setStatus('Proposing memories...'); + if (resultsEl) resultsEl.textContent = ''; + try { + var response = await fetch('/api/propose-memories', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({ + text: text, + source: form.elements.source.value || 'web proposal', + project: form.elements.project.value || '', + limit: form.elements.limit.value || '10' + }) + }); + var data = await response.json(); + if (!response.ok) throw new Error(data.error || 'proposal failed'); + setStatus(data.count + ' proposal' + (data.count === 1 ? '' : 's') + ' found. Nothing was written.'); + renderProposals(data); + } catch (error) { + setStatus(error.message || 'proposal failed'); + } + }); + + if (sourceListEl) { + sourceListEl.addEventListener('click', function(event) { + var button = event.target.closest('[data-proposal-source]'); + if (!button || button.disabled) return; + loadSource(button.getAttribute('data-proposal-source') || ''); + }); + fetch('/api/proposal-sources') + .then(function(response) { return response.json(); }) + .then(renderSources) + .catch(function() { + renderSources({sources: []}); + }); + } + var initialSource = form.getAttribute('data-initial-source') || ''; + if (initialSource) { + loadSource(initialSource); + } +})(); +""" diff --git a/mcp_package/link_core/web_graph.py b/mcp_package/link_core/web_graph.py new file mode 100644 index 0000000..b3b38f9 --- /dev/null +++ b/mcp_package/link_core/web_graph.py @@ -0,0 +1,103 @@ +"""Shared web graph rendering helpers.""" +from __future__ import annotations + +import html +from typing import Any, Mapping + + +GRAPH_INITIAL_FULL_NODE_LIMIT = 900 +GRAPH_INITIAL_SUMMARY_NODE_LIMIT = 250 +GRAPH_INITIAL_SUMMARY_EDGE_LIMIT = 1000 + +GRAPH_CATEGORY_COLORS = { + "concepts": "#4e79a7", + "entities": "#f28e2b", + "memories": "#edc948", + "sources": "#59a14f", + "comparisons": "#e15759", + "explorations": "#76b7b2", + "root": "#bab0ac", +} + + +def _visible_graph_parts(graph: Mapping[str, Any]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + nodes = [ + dict(node) + for node in graph.get("nodes", []) + if str(node.get("category") or "") != "root" + ] + ids = {str(node.get("id") or "") for node in nodes} + edges = [ + {"source": str(edge.get("source") or ""), "target": str(edge.get("target") or "")} + for edge in graph.get("edges", []) + if str(edge.get("source") or "") in ids and str(edge.get("target") or "") in ids + ] + return nodes, edges + + +def graph_needs_bounded_overview( + full_graph: Mapping[str, Any], + full_node_limit: int = GRAPH_INITIAL_FULL_NODE_LIMIT, +) -> bool: + """Return whether the graph page should start with a bounded overview.""" + nodes, _ = _visible_graph_parts(full_graph) + return len(nodes) > full_node_limit + + +def graph_initial_payload( + full_graph: Mapping[str, Any], + summary_graph: Mapping[str, Any] | None = None, + full_node_limit: int = GRAPH_INITIAL_FULL_NODE_LIMIT, +) -> dict[str, Any]: + """Build the initial browser graph payload and total counts. + + The full graph remains available through the HTTP API. This helper decides + whether the page should embed the full graph immediately or a bounded + high-signal overview first. + """ + full_nodes, full_edges = _visible_graph_parts(full_graph) + total_node_count = len(full_nodes) + total_edge_count = len(full_edges) + graph_mode = "full" + graph_note = "" + visible_nodes = full_nodes + visible_edges = full_edges + + if total_node_count > full_node_limit and summary_graph is not None: + visible_nodes, visible_edges = _visible_graph_parts(summary_graph) + graph_mode = "summary" + graph_note = ( + f" Showing a fast overview of {len(visible_nodes)} high-signal nodes first; " + f"load graph data when you need to search or filter across every page." + ) + + return { + "nodes": visible_nodes, + "edges": visible_edges, + "node_count": len(visible_nodes), + "edge_count": len(visible_edges), + "total_node_count": total_node_count, + "total_edge_count": total_edge_count, + "graph_mode": graph_mode, + "graph_note": graph_note, + } + + +def graph_category_options(nodes: list[Mapping[str, Any]]) -> str: + categories = sorted({ + str(node.get("category") or "") + for node in nodes + if str(node.get("category") or "") and str(node.get("category") or "") != "root" + }) + return '' + "".join( + f'' + for category in categories + ) + + +def graph_legend_items(colors: Mapping[str, str] = GRAPH_CATEGORY_COLORS) -> str: + return "".join( + f'{html.escape(str(category))} ' + for category, color in colors.items() + if category != "root" + ) diff --git a/mcp_package/link_core/web_http.py b/mcp_package/link_core/web_http.py new file mode 100644 index 0000000..e293048 --- /dev/null +++ b/mcp_package/link_core/web_http.py @@ -0,0 +1,238 @@ +"""Shared local HTTP guard helpers for Link's web viewer.""" +from __future__ import annotations + +import time +from pathlib import Path +from typing import Callable, Iterable, Mapping +from urllib.parse import unquote, urlsplit + + +ALLOWED_LOCAL_HOSTS = frozenset({"127.0.0.1", "localhost"}) +HOST_HEADER_REQUIRED = "Host header required" +HOST_HEADER_LOCAL_ONLY = "Host header must be localhost or 127.0.0.1" +BROWSER_SOURCE_LOCAL_ONLY = "Origin/Referer must match local Link viewer" +CONTENT_SECURITY_POLICY = ( + "default-src 'self'; " + "img-src 'self' data:; " + "style-src 'self' 'unsafe-inline'; " + "script-src 'self' 'unsafe-inline'; " + "connect-src 'self'; " + "object-src 'none'; " + "base-uri 'none'; " + "frame-ancestors 'none'" +) +PERMISSIONS_POLICY = ( + "camera=(), microphone=(), geolocation=(), payment=(), usb=(), " + "serial=(), bluetooth=(), accelerometer=(), gyroscope=(), magnetometer=()" +) +SVG_CONTENT_SECURITY_POLICY = ( + "default-src 'none'; " + "img-src 'self' data:; " + "style-src 'unsafe-inline'; " + "script-src 'none'; " + "object-src 'none'; " + "sandbox" +) + + +def parse_bounded_int( + raw: object, + label: str, + default: int, + min_value: int, + max_value: int, +) -> tuple[int | None, str | None]: + """Parse a bounded integer query parameter.""" + if raw == "" or raw is None: + return default, None + try: + value = int(raw) # type: ignore[arg-type] + except (TypeError, ValueError): + return None, f"{label} must be an integer" + if value < min_value: + return None, f"{label} must be at least {min_value}" + return min(value, max_value), None + + +class LocalRateLimiter: + """Small in-memory sliding-window limiter for local HTTP mutation APIs.""" + + def __init__( + self, + max_events: int, + window_seconds: float, + clock: Callable[[], float] | None = None, + ) -> None: + self.max_events = max(1, int(max_events)) + self.window_seconds = max(0.1, float(window_seconds)) + self._clock = clock or time.monotonic + self._events: dict[str, list[float]] = {} + + def check(self, key: object) -> tuple[bool, int]: + """Return (allowed, retry_after_seconds).""" + now = self._clock() + key_text = str(key or "local") + cutoff = now - self.window_seconds + events = [ + timestamp + for timestamp in self._events.get(key_text, []) + if timestamp > cutoff + ] + if len(events) >= self.max_events: + retry_after = max(1, int(round(events[0] + self.window_seconds - now))) + self._events[key_text] = events + return False, retry_after + events.append(now) + self._events[key_text] = events + return True, 0 + + +def local_security_headers( + api_version: str, + content_security_policy: str = CONTENT_SECURITY_POLICY, +) -> tuple[tuple[str, str], ...]: + """Return baseline local-viewer security headers.""" + return ( + ("X-Link-API-Version", str(api_version)), + ("X-Content-Type-Options", "nosniff"), + ("Referrer-Policy", "no-referrer"), + ("Cross-Origin-Resource-Policy", "same-origin"), + ("Cross-Origin-Opener-Policy", "same-origin"), + ("Permissions-Policy", PERMISSIONS_POLICY), + ("Content-Security-Policy", content_security_policy), + ) + + +def local_no_store_headers() -> tuple[tuple[str, str], ...]: + """Return cache-prevention headers for personal local memory responses.""" + return ( + ("Cache-Control", "no-store"), + ("Pragma", "no-cache"), + ("Expires", "0"), + ) + + +def _host_without_port(host: str) -> str | None: + if any(char.isspace() for char in host): + return None + if host.startswith("["): + closing = host.find("]") + if closing < 0: + return None + host_name = host[1:closing] + remainder = host[closing + 1:] + if remainder: + if not remainder.startswith(":"): + return None + port = remainder[1:] + if port and not port.isdigit(): + return None + return host_name + if host.count(":") == 1: + host_name, port = host.rsplit(":", 1) + if port and not port.isdigit(): + return None + return host_name + if ":" in host: + return None + return host + + +def validate_local_host_header( + host_header: object, + allowed_hosts: Iterable[str] = ALLOWED_LOCAL_HOSTS, +) -> tuple[bool, str | None]: + """Validate a local-only Host header for the unauthenticated viewer.""" + host = str(host_header or "").strip().lower() + if not host: + return False, HOST_HEADER_REQUIRED + host_name = _host_without_port(host) + if host_name in set(allowed_hosts): + return True, None + return False, HOST_HEADER_LOCAL_ONLY + + +def _browser_source_host(header_value: object) -> str | None: + value = str(header_value or "").strip().lower() + if not value: + return None + parsed = urlsplit(value) + if parsed.scheme not in {"http", "https"} or not parsed.netloc: + return "" + return _host_without_port(parsed.netloc) or "" + + +def validate_local_browser_source_headers( + origin_header: object, + referer_header: object, + allowed_hosts: Iterable[str] = ALLOWED_LOCAL_HOSTS, +) -> tuple[bool, str | None]: + """Allow browser-supplied Origin/Referer only from the local viewer.""" + allowed = set(allowed_hosts) + for header_value in (origin_header, referer_header): + host = _browser_source_host(header_value) + if host is None: + continue + if host not in allowed: + return False, BROWSER_SOURCE_LOCAL_ONLY + return True, None + + +def safe_resolve(path: Path) -> Path | None: + """Resolve a path, returning None for malformed filesystem inputs.""" + try: + return path.resolve() + except (OSError, ValueError): + return None + + +def is_relative_to(path: Path, root: Path) -> bool: + """Return whether path stays under root after both paths are resolved.""" + resolved_path = safe_resolve(path) + resolved_root = safe_resolve(root) + if not resolved_path or not resolved_root: + return False + try: + resolved_path.relative_to(resolved_root) + return True + except ValueError: + return False + + +def is_allowed_static_file( + path: Path, + raw_dir: Path, + root_files: Iterable[Path], + raw_static_types: Mapping[str, str], +) -> bool: + """Check whether a static file is an allowed root asset or raw media file.""" + resolved_path = safe_resolve(path) + resolved_raw_dir = safe_resolve(raw_dir) + if not resolved_path or not resolved_raw_dir: + return False + allowed_root_files = { + resolved + for root_file in root_files + if (resolved := safe_resolve(root_file)) is not None + } + return resolved_path in allowed_root_files or ( + is_relative_to(resolved_path, resolved_raw_dir) + and resolved_path.suffix.lower() in raw_static_types + ) + + +def resolve_raw_static_path( + raw_dir: Path, + url_fragment: object, + raw_static_types: Mapping[str, str], +) -> tuple[Path | None, str | None]: + """Resolve a /raw/ URL fragment to an allowed local file and MIME type.""" + decoded = unquote(str(url_fragment or "")).lstrip("/") + resolved_raw_dir = safe_resolve(raw_dir) + resolved = safe_resolve(raw_dir / decoded) + if not resolved_raw_dir or not resolved or not is_relative_to(resolved, resolved_raw_dir): + return None, None + content_type = raw_static_types.get(resolved.suffix.lower()) + if not content_type: + return None, None + return resolved, content_type diff --git a/mcp_package/link_core/web_layout.py b/mcp_package/link_core/web_layout.py new file mode 100644 index 0000000..2b2fcde --- /dev/null +++ b/mcp_package/link_core/web_layout.py @@ -0,0 +1,113 @@ +"""Shared HTML shell for the local Link web UI.""" +from __future__ import annotations + +import html + +from .web_assets import ( + COPY_BUTTON_JS, + CSS, + MEMORY_ACTION_JS, + PROPOSAL_UI_JS, + RAW_SOURCE_JS, + THEME_CONTROL_JS, + THEME_INIT_JS, +) + + +KEYBOARD_NAV_JS = """ +// Keyboard navigation +document.addEventListener('keydown', function(e) { + var tag = document.activeElement.tagName; + var inInput = tag === 'INPUT' || tag === 'TEXTAREA'; + // / -> focus search + if (e.key === '/' && !inInput) { + e.preventDefault(); + var inp = document.getElementById('search-input'); + if (inp) { inp.focus(); inp.select(); } + } + // Escape -> blur search + if (e.key === 'Escape' && inInput) { + document.activeElement.blur(); + } + if (e.key === 'Enter' && document.activeElement.id === 'search-input') { + var q = document.activeElement.value.trim(); + if (q) { + e.preventDefault(); + window.location.href = '/search?q=' + encodeURIComponent(q); + } + } + // j/k -> navigate focusable links in page-list + if ((e.key === 'j' || e.key === 'k') && !inInput) { + var links = Array.from(document.querySelectorAll('.page-list a, .search-results a')); + if (!links.length) return; + var cur = document.activeElement; + var idx = links.indexOf(cur); + if (e.key === 'j') idx = idx < links.length - 1 ? idx + 1 : 0; + else idx = idx > 0 ? idx - 1 : links.length - 1; + links[idx].focus(); + e.preventDefault(); + } +}); +""" + + +def render_header_html() -> str: + return """
+
+ +
+ +
+ +
+
+
+ +
""" + + +def render_footer_html() -> str: + return '
Link — local agent memory · github
' + + +def render_layout(title: str, body: str, page_class: str = "") -> str: + body_class = f' class="{html.escape(page_class, quote=True)}"' if page_class else "" + return f""" + + + + +{html.escape(title)} — Link + + + + + +{render_header_html()} +
+{body} +{render_footer_html()} + + + + + + + +""" diff --git a/mcp_package/link_core/web_memory.py b/mcp_package/link_core/web_memory.py new file mode 100644 index 0000000..5f8ef76 --- /dev/null +++ b/mcp_package/link_core/web_memory.py @@ -0,0 +1,175 @@ +"""HTML helpers for Link's local memory web views.""" +from __future__ import annotations + +import html +from collections.abc import Callable, Sequence + + +MemoryActionHints = Callable[[dict[str, object]], list[dict[str, object]]] +PageHref = Callable[[str], str] + + +def render_memory_action_button(action: dict[str, object]) -> str: + kind = str(action.get("kind") or "") + if kind not in {"review", "archive", "restore"}: + return "" + arguments = action.get("arguments") if isinstance(action.get("arguments"), dict) else {} + identifier = str(arguments.get("identifier") or "") + if not identifier: + return "" + labels = { + "review": "Mark reviewed", + "archive": "Archive", + "restore": "Restore", + } + return ( + f'' + ) + + +def render_memory_action_commands(actions: Sequence[dict[str, object]]) -> str: + if not actions: + return "" + rows = "" + for action in actions: + label = html.escape(str(action.get("label") or "")) + if action.get("href"): + label_html = f'{label}' + else: + label_html = label + priority = str(action.get("priority") or "") + priority_html = f'{html.escape(priority)}' if priority else "" + button_html = render_memory_action_button(action) + rows += ( + f'
{label_html}' + f'{priority_html}{button_html}' + f'{html.escape(str(action.get("command") or ""))}
' + ) + return f'
{rows}
' + + +def render_memory_card( + record: dict[str, object], + *, + page_href: PageHref, + action_hints: MemoryActionHints | None = None, + include_issues: bool = False, +) -> str: + name = str(record.get("name") or "") + title = str(record.get("title") or name) + summary = str(record.get("tldr") or record.get("snippet") or "") + meta_parts = [ + str(record.get("memory_type") or "note"), + str(record.get("scope") or "user"), + str(record.get("status") or "active"), + ] + if record.get("updated_at"): + meta_parts.append(f'updated {record["updated_at"]}') + elif record.get("date_captured"): + meta_parts.append(f'captured {record["date_captured"]}') + meta = " · ".join(part for part in meta_parts if part) + issues_html = "" + if include_issues and record.get("issues"): + issues_html = "
    " + "".join( + f'
  • {html.escape(str(issue["severity"]))} ' + f'{html.escape(str(issue["code"]))}: {html.escape(str(issue["message"]))}
  • ' + for issue in record["issues"] + if isinstance(issue, dict) + ) + "
" + actions = render_memory_action_commands(record.get("actions") or (action_hints(record) if action_hints else [])) + summary_html = f'

{html.escape(summary)}

' if summary else "" + return ( + '
' + f'

{html.escape(title)}

' + f'
{html.escape(meta)}
' + f'{summary_html}' + f'{issues_html}' + f'{actions}' + '
' + ) + + +def render_memory_section( + title: str, + records: list[dict[str, object]], + empty: str, + *, + page_href: PageHref, + action_hints: MemoryActionHints | None = None, + href: str = "", + include_issues: bool = False, +) -> str: + heading_link = f'view all' if href else "" + heading = f'

{html.escape(title)}

{heading_link}
' + if not records: + return heading + f"

{html.escape(empty)}

" + cards = "".join( + render_memory_card(record, page_href=page_href, action_hints=action_hints, include_issues=include_issues) + for record in records + ) + return heading + f'
{cards}
' + + +def render_capture_card(capture: dict[str, object]) -> str: + title = html.escape(str(capture.get("title") or capture.get("path") or "Raw capture")) + path = html.escape(str(capture.get("path") or "")) + meta_parts = ["raw capture"] + if capture.get("project"): + meta_parts.append(f'project {capture["project"]}') + if capture.get("date_captured"): + meta_parts.append(f'captured {capture["date_captured"]}') + warnings = [str(label) for label in capture.get("secret_warnings") or []] + if warnings: + meta_parts.append("secret warnings") + meta = " · ".join(meta_parts) + warning_html = "" + if warnings: + warning_html = ( + '

Secret-looking values: ' + + html.escape(", ".join(warnings)) + + "

" + ) + commands = capture.get("commands") or {} + actions = "".join( + f'
{html.escape(label)}{html.escape(str(command))}
' + for label, command in ( + ("Accept proposal", commands.get("accept", "")), + ("Redact", commands.get("redact", "")), + ("Delete", commands.get("delete", "")), + ) + if command + ) + return ( + '
' + f'

{title}

' + f'
{html.escape(meta)}
' + f'

{path}

' + f'{warning_html}' + f'
{actions}
' + '
' + ) + + +def render_capture_section(captures: list[dict[str, object]]) -> str: + heading = '

Raw captures

' + if not captures: + return heading + "

No saved raw captures.

" + cards = "".join(render_capture_card(capture) for capture in captures) + return heading + f'
{cards}
' + + +def render_memory_next_actions(actions: list[dict[str, str]]) -> str: + items = "" + for action in actions: + label = html.escape(action["label"]) + if action.get("href"): + label_html = f'{label}' + else: + label_html = label + items += ( + f'
  • {label_html}: {html.escape(action["detail"])}' + f'
    {html.escape(action["command"])}
  • ' + ) + return f'
    Next actions
      {items}
    ' diff --git a/mcp_package/link_core/wiki.py b/mcp_package/link_core/wiki.py new file mode 100644 index 0000000..bbeb922 --- /dev/null +++ b/mcp_package/link_core/wiki.py @@ -0,0 +1,1022 @@ +"""Shared wiki indexing, search, context, and graph helpers for Link.""" +from __future__ import annotations + +import json +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from .files import atomic_write_json, atomic_write_text +from .frontmatter import parse_frontmatter +from .search import ( + build_fts_index, + close_wiki_cache, + normalized_search_text, + search_pages, + search_words, +) + + +WIKILINK_RE = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") +PERSISTENT_CACHE_SCHEMA_VERSION = 1 +INDEX_CATEGORY_ORDER = ( + "memories", + "concepts", + "entities", + "sources", + "comparisons", + "explorations", + "root", +) +INDEX_CATEGORY_TITLES = { + "memories": "Memories", + "concepts": "Concepts", + "entities": "Entities", + "sources": "Sources", + "comparisons": "Comparisons", + "explorations": "Explorations", + "root": "Other Pages", +} + + +def wiki_mtime(wiki_dir: Path) -> float: + """Return an mtime signal for files that affect wiki indexes.""" + try: + timestamp = wiki_dir.stat().st_mtime + for path in wiki_dir.rglob("*"): + try: + if path.is_dir() or path.suffix == ".md" or path.name == "_backlinks.json": + timestamp = max(timestamp, path.stat().st_mtime) + except OSError: + continue + return timestamp + except Exception: + return 0.0 + + +def _heading_title(body: str) -> str: + match = re.search(r"^#\s+(.+)", body, re.MULTILINE) + return match.group(1).strip() if match else "" + + +def _tldr(body: str) -> str: + match = re.search(r">\s*\*\*TLDR:\*\*\s*(.+)", body) + return match.group(1).strip() if match else "" + + +def _list_value(value: Any) -> list[Any]: + if isinstance(value, list): + return value + if isinstance(value, str): + return [item.strip() for item in value.split(",") if item.strip()] + return [] + + +def _body_snippet(body: str) -> str: + body_lines = [ + line.strip() + for line in body.split("\n") + if line.strip() and not line.startswith("#") and not line.startswith(">") + ] + return body_lines[0][:200] if body_lines else "" + + +def _markdown_page_paths(wiki_dir: Path) -> list[Path]: + return sorted(path for path in wiki_dir.rglob("*.md") if not path.name.startswith(".")) + + +def _persistent_cache_path(wiki_dir: Path) -> Path: + return wiki_dir.parent / ".link-cache" / f"wiki-cache-v{PERSISTENT_CACHE_SCHEMA_VERSION}.json" + + +def _page_signatures(wiki_dir: Path, page_paths: list[Path]) -> list[dict[str, Any]]: + signatures: list[dict[str, Any]] = [] + for path in page_paths: + try: + stat = path.stat() + except OSError: + continue + signatures.append({ + "path": path.relative_to(wiki_dir).as_posix(), + "size": stat.st_size, + "mtime_ns": stat.st_mtime_ns, + "mode": stat.st_mode, + }) + return signatures + + +def _load_persistent_records( + cache_path: Path, + signatures: list[dict[str, Any]], +) -> list[dict[str, str]] | None: + try: + payload = json.loads(cache_path.read_text(encoding="utf-8")) + except Exception: + return None + if not isinstance(payload, dict): + return None + if payload.get("schema_version") != PERSISTENT_CACHE_SCHEMA_VERSION: + return None + if payload.get("signatures") != signatures: + return None + records = payload.get("records") + if not isinstance(records, list): + return None + loaded: list[dict[str, str]] = [] + for item in records: + if not isinstance(item, dict): + return None + rel = item.get("path") + text = item.get("text") + if not isinstance(rel, str) or not isinstance(text, str): + return None + loaded.append({"path": rel, "text": text}) + return loaded + + +def _write_persistent_records( + cache_path: Path, + signatures: list[dict[str, Any]], + records: list[dict[str, str]], +) -> bool: + try: + atomic_write_json( + cache_path, + { + "schema_version": PERSISTENT_CACHE_SCHEMA_VERSION, + "signatures": signatures, + "records": records, + }, + ) + except OSError: + return False + return True + + +def build_wiki_cache(wiki_dir: Path, *, use_persistent_cache: bool = True) -> dict[str, Any]: + use_persistent_cache = use_persistent_cache and wiki_dir.exists() + page_paths = _markdown_page_paths(wiki_dir) + signatures = _page_signatures(wiki_dir, page_paths) + persistent_cache_path = _persistent_cache_path(wiki_dir) + cache_records = ( + _load_persistent_records(persistent_cache_path, signatures) + if use_persistent_cache + else None + ) + cache_hit = cache_records is not None + + records: list[dict[str, str]] = [] + read_warnings: list[dict[str, str]] = [] + if cache_records is not None: + records = cache_records + else: + for md in page_paths: + rel = md.relative_to(wiki_dir) + try: + text = md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + read_warnings.append({ + "page": f"wiki/{rel.as_posix()}", + "error": str(exc) or exc.__class__.__name__, + }) + continue + records.append({"path": rel.as_posix(), "text": text}) + cache_written = False + if use_persistent_cache and not cache_hit and not read_warnings: + cache_written = _write_persistent_records(persistent_cache_path, signatures, records) + + pages: list[dict[str, Any]] = [] + page_index: dict[str, Path] = {} + fulltext: dict[str, str] = {} + body_index: dict[str, str] = {} + meta_index: dict[str, dict[str, Any]] = {} + normalized_fulltext: dict[str, str] = {} + text_words_index: dict[str, set[str]] = {} + meta_words_index: dict[str, set[str]] = {} + snippet_index: dict[str, str] = {} + token_index: dict[str, set[str]] = {} + meta_token_index: dict[str, set[str]] = {} + raw_forward_links: dict[str, list[str]] = {} + + for record in records: + rel = Path(record["path"]) + md = wiki_dir / rel + text = record["text"] + meta, body = parse_frontmatter(text) + + title = str(meta.get("title") or _heading_title(body) or md.stem) + tldr = _tldr(body) + aliases_raw = _list_value(meta.get("aliases", [])) + aliases = [str(alias).lower() for alias in aliases_raw] + tags_raw = _list_value(meta.get("tags", [])) + category = rel.parts[0] if len(rel.parts) > 1 else "root" + stem = md.stem.lower() + + page = { + "name": md.stem, + "path": f"wiki/{rel.as_posix()}", + "title": title, + "category": category, + "type": meta.get("type", ""), + "tags": tags_raw, + "aliases": aliases, + "maturity": meta.get("maturity", ""), + "source_count": meta.get("source_count", ""), + "tldr": tldr, + "date_updated": meta.get("date_updated", ""), + "date_published": meta.get("date_published", ""), + } + pages.append(page) + page_index[stem] = md + raw_forward_links[stem] = [ + match.group(1).strip().lower() + for match in WIKILINK_RE.finditer(body) + if match.group(1).strip() + ] + for alias in aliases: + if alias not in page_index: + page_index[alias] = md + + text_lower = text.lower() + fulltext[stem] = text_lower + body_index[stem] = body + meta_index[stem] = dict(meta) + text_normalized = normalized_search_text(text_lower) + normalized_fulltext[stem] = text_normalized + text_words_index[stem] = search_words(text_normalized) + snippet_index[stem] = _body_snippet(body) + + for token in re.split(r"\W+", text_lower): + if len(token) >= 3: + token_index.setdefault(token, set()).add(stem) + + meta_tokens: set[str] = set() + for word in re.split(r"\W+", title.lower()): + if len(word) >= 3: + meta_tokens.add(word) + for alias in aliases: + for word in re.split(r"\W+", alias): + if len(word) >= 3: + meta_tokens.add(word) + for tag in tags_raw: + for word in re.split(r"\W+", str(tag).lower()): + if len(word) >= 3: + meta_tokens.add(word) + if tldr: + for word in re.split(r"\W+", tldr.lower()): + if len(word) >= 3: + meta_tokens.add(word) + for token in meta_tokens: + meta_token_index.setdefault(token, set()).add(stem) + meta_words_index[stem] = search_words(" ".join([ + title, + stem, + tldr, + " ".join(str(alias) for alias in aliases), + " ".join(str(tag) for tag in tags_raw), + ])) + + page_ids = {page["name"].lower(): page["name"] for page in pages} + forward_links_index: dict[str, list[str]] = {} + for source, raw_targets in raw_forward_links.items(): + source_name = page_ids.get(source, source) + seen_targets: set[str] = set() + for target_key in raw_targets: + target = page_ids.get(target_key) + if not target or target_key == source: + continue + if target in seen_targets: + continue + seen_targets.add(target) + forward_links_index.setdefault(source_name, []).append(target) + + fts_index = build_fts_index(pages, fulltext) + return { + "mtime": wiki_mtime(wiki_dir), + "pages": pages, + "page_index": page_index, + "fulltext": fulltext, + "body_index": body_index, + "meta_index": meta_index, + "normalized_fulltext": normalized_fulltext, + "text_words_index": text_words_index, + "meta_words_index": meta_words_index, + "snippet_index": snippet_index, + "token_index": token_index, + "meta_token_index": meta_token_index, + "page_map": {page["name"].lower(): page for page in pages}, + "forward_links_index": forward_links_index, + "fts_index": fts_index, + "search_backend": "sqlite-fts" if fts_index is not None else "token-index", + "read_warning_count": len(read_warnings), + "read_warnings": read_warnings, + "persistent_cache": { + "enabled": use_persistent_cache, + "hit": cache_hit, + "written": cache_written, + "path": str(persistent_cache_path), + "schema_version": PERSISTENT_CACHE_SCHEMA_VERSION, + }, + } + + +def load_backlinks_index( + backlinks_path: Path, + missing_error: str | None = None, + invalid_prefix: str = "invalid backlinks index", +) -> tuple[dict[str, dict[str, list[str]]], str | None]: + empty: dict[str, dict[str, list[str]]] = {"backlinks": {}, "forward": {}} + if not backlinks_path.exists(): + return empty, missing_error + try: + raw = json.loads(backlinks_path.read_text(encoding="utf-8")) + except Exception as exc: + return empty, f"{invalid_prefix}: {exc}" + if not isinstance(raw, dict): + return empty, f"{invalid_prefix}: root must be an object" + if "backlinks" not in raw: + return {"backlinks": raw, "forward": {}}, None + backlinks = raw.get("backlinks", {}) + forward = raw.get("forward", {}) + if not isinstance(backlinks, dict) or not isinstance(forward, dict): + return empty, f"{invalid_prefix}: backlinks and forward must be objects" + return {"backlinks": backlinks, "forward": forward}, None + + +def build_backlinks(wiki_dir: Path, body_only: bool = True) -> dict[str, dict[str, list[str]]]: + backlinks: dict[str, list[str]] = {} + forward_links: dict[str, list[str]] = {} + for md in sorted(wiki_dir.rglob("*.md")): + if md.name.startswith("."): + continue + text = md.read_text(encoding="utf-8", errors="replace") + if body_only: + _, text = parse_frontmatter(text) + source = md.stem.lower() + for match in WIKILINK_RE.finditer(text): + target = match.group(1).strip().lower() + if not target or target == source: + continue + backlinks.setdefault(target, []) + if source not in backlinks[target]: + backlinks[target].append(source) + forward_links.setdefault(source, []) + if target not in forward_links[source]: + forward_links[source].append(target) + return {"backlinks": backlinks, "forward": forward_links} + + +def context_for_topic( + wiki_dir: Path, + topic: str, + cache: dict[str, Any], + limit: int = 10, + empty_error: str | None = None, +) -> dict[str, Any]: + q = topic.strip() + if not q: + result: dict[str, Any] = {"topic": "", "found": False, "pages": []} + if empty_error: + result["error"] = empty_error + return result + + matches = search_pages(q, cache, limit=5) + if not matches: + return {"topic": topic, "found": False, "pages": []} + + primary = matches[0] + primary_name = primary["name"].lower() + backlinks_data, _ = load_backlinks_index(wiki_dir / "_backlinks.json") + inbound = backlinks_data.get("backlinks", {}).get(primary_name, []) + + forward: list[str] = [] + forward_seen: set[str] = set() + page_set = {page["name"].lower() for page in cache["pages"]} + forward_links_index = cache.get("forward_links_index") + if isinstance(forward_links_index, dict): + cached_forward = ( + forward_links_index.get(str(primary.get("name") or "")) + or forward_links_index.get(primary_name) + or [] + ) + for target_name in cached_forward: + target = str(target_name).lower() + if target in page_set and target != primary_name and target not in forward_seen: + forward_seen.add(target) + forward.append(target) + else: + path = cache["page_index"].get(primary_name) + if path and path.exists(): + text = path.read_text(encoding="utf-8", errors="replace") + _, body = parse_frontmatter(text) + for match in WIKILINK_RE.finditer(body): + target = match.group(1).strip().lower() + if target in page_set and target != primary_name and target not in forward_seen: + forward_seen.add(target) + forward.append(target) + + seen = {primary_name} + context_names = [primary_name] + for name in inbound + forward: + if name not in seen: + seen.add(name) + context_names.append(name) + + context_pages = [] + body_index = cache.get("body_index") if isinstance(cache.get("body_index"), dict) else {} + meta_index = cache.get("meta_index") if isinstance(cache.get("meta_index"), dict) else {} + for name in context_names[:limit]: + page_path = cache["page_index"].get(name) + if not page_path or not page_path.exists(): + continue + cached_page = cache.get("page_map", {}).get(name, {}) + body = str(body_index.get(name) or "") + meta = dict(meta_index.get(name) or {}) + if not body and not meta: + text = page_path.read_text(encoding="utf-8", errors="replace") + meta, body = parse_frontmatter(text) + is_primary = name == primary_name + if is_primary: + content = body + else: + summary_lines = [] + for line in body.split("\n")[:20]: + summary_lines.append(line) + if line.startswith("## ") and len(summary_lines) > 3: + break + content = "\n".join(summary_lines) + context_pages.append({ + "name": name, + "path": cached_page.get("path") or f"wiki/{page_path.relative_to(wiki_dir).as_posix()}", + "title": meta.get("title", name), + "category": cached_page.get("category", ""), + "type": meta.get("type", ""), + "source_count": cached_page.get("source_count", ""), + "tldr": cached_page.get("tldr", ""), + "date_updated": cached_page.get("date_updated", ""), + "date_published": cached_page.get("date_published", ""), + "is_primary": is_primary, + "relationship": "primary" if is_primary else ("inbound" if name in inbound else "forward"), + "content": content, + }) + + return { + "topic": topic, + "found": True, + "primary": primary["name"], + "inbound_count": len(inbound), + "forward_count": len(forward), + "pages": context_pages, + } + + +def graph_data(cache: dict[str, Any]) -> dict[str, list[dict[str, Any]]]: + pages = cache["pages"] + page_ids = {page["name"].lower(): page["name"] for page in pages} + valid_ids = set(page_ids.values()) + nodes = [ + {"id": page["name"], "title": page["title"], "category": page["category"], "type": page["type"]} + for page in pages + ] + edges: list[dict[str, str]] = [] + seen_edges: set[tuple[str, str]] = set() + forward_links = cache.get("forward_links_index") + if isinstance(forward_links, dict): + for source, targets in forward_links.items(): + source_id = page_ids.get(str(source).lower(), str(source)) + if source_id not in valid_ids: + continue + if not isinstance(targets, list): + continue + for target_raw in targets: + target_key = str(target_raw).lower() + target = page_ids.get(target_key, str(target_raw)) + if target not in valid_ids or target == source_id: + continue + edge_key = (source_id, target) + if edge_key in seen_edges: + continue + seen_edges.add(edge_key) + edges.append({"source": source_id, "target": target}) + return {"nodes": nodes, "edges": edges} + + for page in pages: + source = page["name"] + path = cache["page_index"].get(source.lower()) + if not path or not path.exists(): + continue + text = path.read_text(encoding="utf-8", errors="replace") + _, body = parse_frontmatter(text) + for match in WIKILINK_RE.finditer(body): + target_key = match.group(1).strip().lower() + target = page_ids.get(target_key) + if not target or target_key == source.lower(): + continue + edge_key = (source, target) + if edge_key in seen_edges: + continue + seen_edges.add(edge_key) + edges.append({"source": source, "target": target}) + return {"nodes": nodes, "edges": edges} + + +def _bounded_int(value: object, default: int, lower: int, upper: int) -> int: + try: + parsed = int(value) # type: ignore[arg-type] + except (TypeError, ValueError): + return default + return min(max(parsed, lower), upper) + + +def _count_by(nodes: list[dict[str, Any]], key: str) -> dict[str, int]: + counts: dict[str, int] = {} + for node in nodes: + value = str(node.get(key) or "unknown") + counts[value] = counts.get(value, 0) + 1 + return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0]))) + + +def _trim_summary(value: object, max_chars: int = 180) -> str: + text = str(value or "").strip() + if len(text) <= max_chars: + return text + return text[: max(0, max_chars - 3)].rstrip() + "..." + + +def graph_summary( + cache: dict[str, Any], + topic: str = "", + limit: int = 40, + depth: int = 1, + max_edges: int = 120, +) -> dict[str, Any]: + """Return a token-safe graph packet for agents and large local wikis. + + ``graph_data`` intentionally returns the full graph for visualization and + exports. This summary keeps the same source graph but selects a bounded set + of high-signal nodes so MCP clients do not accidentally pull a 1000+ page + graph into model context. + """ + limit = _bounded_int(limit, 40, 1, 250) + depth = _bounded_int(depth, 1, 0, 3) + max_edges = _bounded_int(max_edges, 120, 0, 1000) + topic = str(topic or "").strip() + + graph = graph_data(cache) + all_nodes = list(graph.get("nodes", [])) + all_edges = list(graph.get("edges", [])) + node_by_id = {str(node.get("id") or ""): node for node in all_nodes} + generated_node_ids = {"index", "log"} + selectable_ids = { + node_id + for node_id, node in node_by_id.items() + if node_id.lower() not in generated_node_ids and str(node.get("category") or "") != "root" + } + page_map = cache.get("page_map", {}) + snippet_index = cache.get("snippet_index", {}) + + in_degree = {node_id: 0 for node_id in node_by_id} + out_degree = {node_id: 0 for node_id in node_by_id} + adjacency: dict[str, set[str]] = {node_id: set() for node_id in node_by_id} + for edge in all_edges: + source = str(edge.get("source") or "") + target = str(edge.get("target") or "") + if source not in node_by_id or target not in node_by_id: + continue + out_degree[source] = out_degree.get(source, 0) + 1 + in_degree[target] = in_degree.get(target, 0) + 1 + adjacency.setdefault(source, set()).add(target) + adjacency.setdefault(target, set()).add(source) + + degree = {node_id: in_degree.get(node_id, 0) + out_degree.get(node_id, 0) for node_id in node_by_id} + + def node_rank(node_id: str) -> tuple[int, str, str]: + node = node_by_id[node_id] + return (-degree.get(node_id, 0), str(node.get("title") or "").lower(), node_id) + + top_hubs = [ + { + "id": node_id, + "title": node_by_id[node_id].get("title", ""), + "category": node_by_id[node_id].get("category", ""), + "type": node_by_id[node_id].get("type", ""), + "degree": degree.get(node_id, 0), + } + for node_id in sorted(selectable_ids, key=node_rank)[:10] + ] + + selected_ids: list[str] = [] + selection_reasons: dict[str, str] = {} + distances: dict[str, int] = {} + found = False + mode = "overview" + + if topic: + search_results = search_pages(topic, cache, limit=min(max(limit, 10), 50)) + seeds = [ + str(result.get("name") or "") + for result in search_results + if str(result.get("name") or "") in selectable_ids + ] + if seeds: + found = True + mode = "topic-neighborhood" + frontier = list(dict.fromkeys(seeds)) + for seed in frontier: + distances[seed] = 0 + selection_reasons[seed] = "matched topic" + for current_depth in range(1, depth + 1): + candidates: list[str] = [] + for node_id in frontier: + candidates.extend( + neighbor for neighbor in adjacency.get(node_id, set()) + if neighbor in selectable_ids + ) + next_frontier = [] + for candidate in sorted(set(candidates), key=node_rank): + if candidate in distances: + continue + distances[candidate] = current_depth + selection_reasons[candidate] = f"within {current_depth} hop{'s' if current_depth != 1 else ''} of a topic match" + next_frontier.append(candidate) + frontier = next_frontier + selected_ids = sorted(distances, key=lambda node_id: (distances[node_id],) + node_rank(node_id))[:limit] + + if not selected_ids: + selected_ids = sorted(selectable_ids, key=node_rank)[:limit] + selection_reasons = {node_id: "high-degree overview node" for node_id in selected_ids} + + selected = set(selected_ids) + selected_edges = [ + {"source": str(edge.get("source") or ""), "target": str(edge.get("target") or "")} + for edge in all_edges + if str(edge.get("source") or "") in selected and str(edge.get("target") or "") in selected + ] + selected_edges.sort(key=lambda edge: ( + -degree.get(edge["source"], 0) - degree.get(edge["target"], 0), + edge["source"], + edge["target"], + )) + edge_truncated = len(selected_edges) > max_edges + selected_edges = selected_edges[:max_edges] + + nodes_payload: list[dict[str, Any]] = [] + for node_id in selected_ids: + node = node_by_id[node_id] + page = page_map.get(node_id.lower(), {}) if isinstance(page_map, dict) else {} + summary = "" + if isinstance(page, dict): + summary = str(page.get("tldr") or "") + if not summary and isinstance(snippet_index, dict): + summary = str(snippet_index.get(node_id.lower(), "")) + item = { + "id": node_id, + "title": node.get("title", ""), + "category": node.get("category", ""), + "type": node.get("type", ""), + "degree": degree.get(node_id, 0), + "in_degree": in_degree.get(node_id, 0), + "out_degree": out_degree.get(node_id, 0), + "summary": _trim_summary(summary), + "why_selected": selection_reasons.get(node_id, "selected for graph summary"), + } + if node_id in distances: + item["distance"] = distances[node_id] + nodes_payload.append(item) + + if mode == "topic-neighborhood": + agent_guidance = [ + "Use this bounded graph summary for orientation before requesting full pages.", + "Call get_context on the best matching node when you need source-backed page content.", + "Do not call get_graph unless the user explicitly asks for a full graph export.", + ] + else: + agent_guidance = [ + "This is a high-degree overview, not the full graph.", + "Pass a topic to get_graph_summary to inspect a bounded neighborhood.", + "Use query_link or get_context for answer-ready source-backed context.", + ] + + follow_up: list[dict[str, Any]] = [] + if topic and found and selected_ids: + follow_up.append({"tool": "get_context", "arguments": {"topic": selected_ids[0]}}) + follow_up.append({"tool": "get_backlinks", "arguments": {"page_name": selected_ids[0]}}) + elif topic and not found: + follow_up.append({"tool": "search_wiki", "arguments": {"query": topic, "limit": 10}}) + else: + follow_up.append({"tool": "get_graph_summary", "arguments": {"topic": "", "limit": limit, "depth": depth}}) + + considered_nodes = len(distances) if distances else len(selectable_ids) + if len(nodes_payload) < considered_nodes or edge_truncated: + follow_up.append({ + "tool": "get_graph_summary", + "arguments": {"topic": topic, "limit": min(limit * 2, 250), "depth": depth, "max_edges": min(max_edges * 2, 1000)}, + "when": "Use only if the bounded graph is insufficient.", + }) + follow_up.append({"tool": "get_graph", "when": "Only for an explicit full graph export or offline analysis."}) + + return { + "topic": topic, + "mode": mode, + "found": found if topic else True, + "node_count": len(all_nodes), + "edge_count": len(all_edges), + "returned_nodes": len(nodes_payload), + "returned_edges": len(selected_edges), + "considered_nodes": considered_nodes, + "generated_nodes_excluded": len(node_by_id) - len(selectable_ids), + "limit": limit, + "depth": depth, + "max_edges": max_edges, + "truncated": len(nodes_payload) < considered_nodes or edge_truncated, + "edge_truncated": edge_truncated, + "search_backend": str(cache.get("search_backend") or "token-index"), + "category_counts": _count_by(all_nodes, "category"), + "type_counts": _count_by(all_nodes, "type"), + "top_hubs": top_hubs, + "nodes": nodes_payload, + "edges": selected_edges, + "agent_guidance": agent_guidance, + "follow_up": follow_up, + } + + +def list_pages( + cache: dict[str, Any], + category: str = "", + page_type: str = "", + maturity: str = "", + limit: int = 100, + offset: int = 0, + include_all: bool = False, +) -> dict[str, Any]: + """Return filtered page metadata, bounded by default for agent context.""" + pages = list(cache.get("pages", [])) + category = str(category or "").strip().lower() + page_type = str(page_type or "").strip().lower() + maturity = str(maturity or "").strip().lower() + if category: + pages = [page for page in pages if str(page.get("category") or "").lower() == category] + if page_type: + pages = [page for page in pages if str(page.get("type") or "").lower() == page_type] + if maturity: + pages = [page for page in pages if str(page.get("maturity") or "").lower() == maturity] + + total = len(pages) + offset = _bounded_int(offset, 0, 0, max(total, 0)) + limit = _bounded_int(limit, 100, 1, 1000) + if include_all: + returned_pages = pages[offset:] + effective_limit: int | None = None + else: + returned_pages = pages[offset: offset + limit] + effective_limit = limit + next_offset = offset + len(returned_pages) + truncated = next_offset < total + + follow_up: list[dict[str, Any]] = [] + if truncated: + follow_up.append({ + "tool": "get_pages", + "arguments": { + "category": category, + "page_type": page_type, + "maturity": maturity, + "limit": limit, + "offset": next_offset, + }, + }) + follow_up.append({"tool": "search_wiki", "when": "Use when you know what topic or text you need."}) + follow_up.append({"tool": "query_link", "when": "Use for answer-ready memory plus wiki context."}) + + return { + "count": total, + "total": total, + "returned_count": len(returned_pages), + "offset": offset, + "limit": effective_limit, + "truncated": truncated, + "filters": { + "category": category, + "page_type": page_type, + "maturity": maturity, + }, + "pages": returned_pages, + "agent_guidance": [ + "This page list is metadata only and may be paginated for context safety.", + "Use search_wiki, query_link, or get_context instead of paging through the whole wiki when answering a question.", + ], + "follow_up": follow_up, + } + + +def page_link_summary( + backlinks_data: dict[str, dict[str, list[str]]], + page_name: str, + limit: int = 100, + offset: int = 0, + include_all: bool = False, +) -> dict[str, Any]: + """Return bounded inbound/forward links for one page.""" + display_name = str(page_name or "").strip() + name = display_name.lower().replace(" ", "-") + limit = _bounded_int(limit, 100, 1, 1000) + inbound_all = list(backlinks_data.get("backlinks", {}).get(name, [])) + forward_all = list(backlinks_data.get("forward", {}).get(name, [])) + max_count = max(len(inbound_all), len(forward_all)) + offset = _bounded_int(offset, 0, 0, max(max_count, 0)) + + if include_all: + inbound = inbound_all[offset:] + forward = forward_all[offset:] + effective_limit: int | None = None + else: + inbound = inbound_all[offset: offset + limit] + forward = forward_all[offset: offset + limit] + effective_limit = limit + + next_offset = offset + max(len(inbound), len(forward)) + truncated = next_offset < max_count + follow_up: list[dict[str, Any]] = [] + if truncated: + follow_up.append({ + "tool": "get_backlinks", + "arguments": { + "page_name": display_name, + "limit": limit, + "offset": next_offset, + }, + }) + follow_up.append({"tool": "get_context", "arguments": {"topic": display_name}}) + follow_up.append({"tool": "get_graph_summary", "arguments": {"topic": display_name}}) + + return { + "page": display_name, + "key": name, + "inbound_count": len(inbound_all), + "forward_count": len(forward_all), + "returned_inbound": len(inbound), + "returned_forward": len(forward), + "offset": offset, + "limit": effective_limit, + "truncated": truncated, + "inbound": inbound, + "forward": forward, + "agent_guidance": [ + "This page link list may be paginated for context safety.", + "Use get_context or query_link when you need source-backed content, not only graph links.", + ], + "follow_up": follow_up, + } + + +def _index_pages(cache: dict[str, Any]) -> list[dict[str, Any]]: + return [ + page for page in cache["pages"] + if str(page.get("name") or "").lower() not in {"index", "log"} + ] + + +def _category_sort_key(category: str) -> tuple[int, str]: + try: + index = INDEX_CATEGORY_ORDER.index(category) + except ValueError: + index = len(INDEX_CATEGORY_ORDER) + return index, category + + +def _page_sort_key(page: dict[str, Any]) -> tuple[tuple[int, str], str]: + return _category_sort_key(str(page.get("category") or "root")), str(page.get("title") or "").lower() + + +def _page_summary(page: dict[str, Any], cache: dict[str, Any]) -> str: + name = str(page.get("name") or "").lower() + tldr = str(page.get("tldr") or "").strip() + snippet = str(cache.get("snippet_index", {}).get(name, "")).strip() + title = str(page.get("title") or page.get("name") or "").strip() + return tldr or snippet or title + + +def _index_entry(page: dict[str, Any], cache: dict[str, Any]) -> str: + name = str(page.get("name") or "") + title = str(page.get("title") or name) + summary = _page_summary(page, cache) + metadata = [ + value for value in ( + str(page.get("type") or "").strip(), + str(page.get("maturity") or "").strip(), + ) + if value + ] + meta = f" ({', '.join(metadata)})" if metadata else "" + if summary and summary != title: + return f"- [[{name}]] - {summary}{meta}" + return f"- [[{name}]]{meta}" + + +def build_index_markdown( + wiki_dir: Path, + cache: dict[str, Any] | None = None, + generated_at: str | None = None, +) -> str: + """Build a deterministic, human-readable catalog for a Link wiki.""" + owns_cache = cache is None + cache = cache or build_wiki_cache(wiki_dir) + try: + pages = sorted(_index_pages(cache), key=_page_sort_key) + generated_at = generated_at or datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + source_count = sum( + 1 for page in pages + if str(page.get("category") or "") == "sources" or str(page.get("type") or "") == "source" + ) + memory_count = sum( + 1 for page in pages + if str(page.get("category") or "") == "memories" or str(page.get("type") or "") == "memory" + ) + + categories: dict[str, list[dict[str, Any]]] = {} + for page in pages: + categories.setdefault(str(page.get("category") or "root"), []).append(page) + + lines = [ + "# Link Wiki Index", + "", + f"> Last updated: {generated_at} | {len(pages)} pages | {source_count} sources | {memory_count} memories", + "", + "## Categories", + "", + ] + for category in sorted(categories, key=_category_sort_key): + title = INDEX_CATEGORY_TITLES.get(category, category.replace("-", " ").title()) + lines.append(f"- {title}: {len(categories[category])}") + if not categories: + lines.append("- No pages yet") + + for category in sorted(categories, key=_category_sort_key): + title = INDEX_CATEGORY_TITLES.get(category, category.replace("-", " ").title()) + lines.extend(["", f"### {category}", ""]) + for page in categories[category]: + lines.append(_index_entry(page, cache)) + + lines.extend([ + "", + "## Recent", + "", + "See [[log]] for the append-only local audit trail.", + "", + ]) + return "\n".join(lines) + finally: + if owns_cache: + close_wiki_cache(cache) + + +def rebuild_index( + wiki_dir: Path, + cache: dict[str, Any] | None = None, + generated_at: str | None = None, +) -> dict[str, Any]: + """Regenerate wiki/index.md from the current Markdown pages.""" + owns_cache = cache is None + cache = cache or build_wiki_cache(wiki_dir) + try: + read_warning_count = int(cache.get("read_warning_count") or 0) + if read_warning_count: + read_warnings = cache.get("read_warnings") or [] + first_warning = read_warnings[0] if isinstance(read_warnings, list) and read_warnings else {} + page = first_warning.get("page") if isinstance(first_warning, dict) else "" + detail = f" starting at {page}" if page else "" + raise OSError(f"could not read {read_warning_count} wiki page(s){detail}") + markdown = build_index_markdown(wiki_dir, cache=cache, generated_at=generated_at) + index_path = wiki_dir / "index.md" + atomic_write_text(index_path, markdown) + pages = _index_pages(cache) + category_counts: dict[str, int] = {} + for page in pages: + category = str(page.get("category") or "root") + category_counts[category] = category_counts.get(category, 0) + 1 + return { + "rebuilt": True, + "path": "wiki/index.md", + "page_count": len(pages), + "source_count": sum( + 1 for page in pages + if str(page.get("category") or "") == "sources" or str(page.get("type") or "") == "source" + ), + "memory_count": sum( + 1 for page in pages + if str(page.get("category") or "") == "memories" or str(page.get("type") or "") == "memory" + ), + "category_counts": dict(sorted(category_counts.items(), key=lambda item: _category_sort_key(item[0]))), + "next_actions": [ + { + "tool": "rebuild_backlinks", + "command": "link rebuild-backlinks", + "reason": "Regenerated index links change graph edges; rebuild backlinks before validation.", + } + ], + } + finally: + if owns_cache: + close_wiki_cache(cache) diff --git a/mcp_package/link_mcp/__init__.py b/mcp_package/link_mcp/__init__.py index 2cb6d74..12ad961 100644 --- a/mcp_package/link_mcp/__init__.py +++ b/mcp_package/link_mcp/__init__.py @@ -1,2 +1,2 @@ """Link MCP Server — personal knowledge wiki as MCP tools.""" -__version__ = "1.0.7" +__version__ = "1.1.0" diff --git a/mcp_package/link_mcp/server.py b/mcp_package/link_mcp/server.py index 313858d..d15b4dc 100644 --- a/mcp_package/link_mcp/server.py +++ b/mcp_package/link_mcp/server.py @@ -24,7 +24,9 @@ } """ from __future__ import annotations -import argparse, json, re, sys +import argparse +import json +import sys from pathlib import Path # ── Resolve wiki directory ──────────────────────────────────────────── @@ -51,11 +53,36 @@ mcp = FastMCP( "link", instructions=( - "Link is a personal knowledge wiki. Use search_wiki to find pages, " - "get_context to retrieve a topic with its full graph neighborhood, " - "and get_pages to browse all pages. Always prefer get_context over " - "reading files directly — it returns the primary page plus related " - "pages via graph traversal in one call." + "Link is local personal memory for agents. Use link_status when " + "connecting to Link or troubleshooting setup/readiness. Start with " + "migrate_wiki if link_status reports a missing or old schema marker. " + "Use starter_prompts when the user asks what to try after install. " + "Use ingest_status to check pending raw files, the guided ingest plan, and the next ingest prompt. " + "query_link when the user asks a substantive question that may need " + "both memory and wiki context. Use memory_brief at " + "session start or before personalized/project work; pass the user's " + "task as the query when available. Use recall_memory for focused user " + "preferences, decisions, and project context, memory_profile to inspect " + "what Link remembers, memory_inbox to find memories needing review, and " + "explain_memory to audit why a memory exists. Use capture_session for " + "long chat or session notes that should be stored locally before memory " + "approval, and capture_inbox to review saved captures before accepting, " + "redacting, or deleting them; use propose_memories when no raw capture is needed. Use search_wiki to find " + "specific pages and get_pages for bounded metadata lists; use get_context to retrieve a topic with its full graph " + "neighborhood. Use get_graph_summary for bounded graph orientation on " + "large wikis; use get_graph only for explicit full graph exports. After " + "ingesting sources or substantially editing wiki " + "pages, call rebuild_index, rebuild_backlinks, then validate_wiki " + "before saying the " + "wiki is updated. Use backup_wiki before broad repairs or risky local " + "wiki edits; raw/ is excluded unless explicitly requested. Only call " + "remember_memory when the user explicitly asks " + "you to remember something; if it returns duplicate candidates, use " + "update_memory on the existing memory instead of forcing a duplicate. " + "If it returns conflict candidates, ask the user whether to update or " + "archive the older memory before forcing a conflict. " + "Use archive_memory instead of deleting stale or wrong memories; use " + "forget_memory only when the user explicitly asks for permanent deletion." ), ) @@ -63,13 +90,104 @@ _cache: dict = {} _cache_mtime: float = 0.0 MAX_TEXT_INPUT = 200 +MAX_CAPTURE_INPUT = 12000 + +from link_core.memory import ( + add_capture_review_to_brief as _core_add_capture_review_to_brief, + count_values as _core_count_values, + default_project_for_target as _core_default_project_for_target, + forget_memory_page as _core_forget_memory_page, + mark_memory_reviewed as _core_mark_memory_reviewed, + memory_brief as _core_memory_brief, + memory_explanation as _core_memory_explanation, + memory_inbox as _core_memory_inbox, + memory_profile as _core_memory_profile, + memory_audit_report as _core_memory_audit_report, + memory_records as _core_memory_records, + normalize_project as _core_normalize_project, + memory_review_issues as _core_memory_review_issues, + propose_memories_from_text as _core_propose_memories_from_text, + recall_memories as _core_recall_memories, + recent_memories as _core_recent_memories, + resolve_memory_page as _core_resolve_memory_page, + set_memory_status as _core_set_memory_status, + slim_memory as _core_slim_memory, + slugify as _core_slugify, + top_tags as _core_top_tags, + update_memory_page as _core_update_memory_page, + write_memory_page as _core_write_memory_page, +) +from link_core.backup import ( + BackupError as _CoreBackupError, + create_backup as _core_create_backup, + list_backups as _core_list_backups, +) +from link_core.capture import ( + capture_filename as _core_capture_filename, + capture_inbox as _core_capture_inbox, + capture_notes_from_markdown as _core_capture_notes_from_markdown, + capture_records as _core_capture_records, + capture_review_summary as _core_capture_review_summary, + capture_title as _core_capture_title, + mcp_capture_commands as _core_mcp_capture_commands, + resolve_capture_file as _core_resolve_capture_file, +) +from link_core.files import ( + atomic_write_json as _core_atomic_write_json, + atomic_write_text as _core_atomic_write_text, +) +from link_core.frontmatter import ( + frontmatter_string as _frontmatter_string, +) +from link_core.ingest import ( + collect_ingest_status as _core_collect_ingest_status, +) +from link_core.log import ( + append_log as _core_append_log, + utc_timestamp as _core_utc_timestamp, +) +from link_core.security import ( + clean_text_input as _clean_text_input, + redact_secret_values as _redact_secret_values, + secret_value_warnings as _secret_value_warnings, +) +from link_core.query import ( + query_link as _core_query_link, +) +from link_core.prompts import ( + starter_prompt_payload as _core_starter_prompt_payload, +) +from link_core.validation import ( + validate_wiki as _core_validate_wiki, +) +from link_core.version import LINK_VERSION +from link_core.status import ( + link_status as _core_link_status, +) +from link_core.schema import ( + migrate_wiki as _core_migrate_wiki, +) +from link_core.wiki import ( + build_backlinks as _core_build_backlinks, + build_wiki_cache as _core_build_wiki_cache, + close_wiki_cache as _core_close_wiki_cache, + context_for_topic as _core_context_for_topic, + graph_data as _core_graph_data, + graph_summary as _core_graph_summary, + list_pages as _core_list_pages, + load_backlinks_index as _core_load_backlinks_index, + page_link_summary as _core_page_link_summary, + rebuild_index as _core_rebuild_index, + search_pages as _core_search_pages, + wiki_mtime as _core_wiki_mtime, +) -def _clean_text_input(value, max_len: int = MAX_TEXT_INPUT) -> str: - if value is None: - return "" - text = str(value).strip() - return text[:max_len] +def _required_text_input(value, message: str, max_len: int = MAX_TEXT_INPUT) -> str: + text = _clean_text_input(value, max_len=max_len) + if not text: + raise ValueError(message) + return text def _parse_limit(value, default: int = 20, max_limit: int = 50) -> int: @@ -80,35 +198,19 @@ def _parse_limit(value, default: int = 20, max_limit: int = 50) -> int: return min(max(limit, 1), max_limit) +def _default_project() -> str: + return _core_default_project_for_target(WIKI_DIR) + + def _wiki_mtime() -> float: - try: - t = WIKI_DIR.stat().st_mtime - for path in WIKI_DIR.rglob("*"): - try: - if path.is_dir() or path.suffix == ".md" or path.name == "_backlinks.json": - t = max(t, path.stat().st_mtime) - except OSError: - continue - return t - except Exception: - return 0.0 - - -def _parse_frontmatter(text: str) -> tuple[dict, str]: - if not text.startswith("---"): - return {}, text - end = text.find("---", 3) - if end == -1: - return {}, text - meta: dict = {} - for line in text[3:end].strip().splitlines(): - if ":" in line: - k, v = line.split(":", 1) - v = v.strip().strip('"').strip("'") - if v.startswith("[") and v.endswith("]"): - v = [x.strip().strip('"').strip("'") for x in v[1:-1].split(",")] - meta[k.strip()] = v - return meta, text[end + 3:].strip() + return _core_wiki_mtime(WIKI_DIR) + + +def _clear_cache() -> None: + global _cache, _cache_mtime + _core_close_wiki_cache(_cache) + _cache = {} + _cache_mtime = 0.0 def _build_cache() -> dict: @@ -117,100 +219,8 @@ def _build_cache() -> dict: if _cache and mtime == _cache_mtime: return _cache - pages = [] - page_index: dict[str, Path] = {} - fulltext: dict[str, str] = {} - snippet_index: dict[str, str] = {} - token_index: dict[str, set] = {} - meta_token_index: dict[str, set] = {} - - for md in sorted(WIKI_DIR.rglob("*.md")): - if md.name.startswith("."): - continue - rel = md.relative_to(WIKI_DIR) - text = md.read_text(encoding="utf-8", errors="replace") - meta, body = _parse_frontmatter(text) - - title = meta.get("title", "") - if not title: - m = re.search(r"^#\s+(.+)", body, re.MULTILINE) - title = m.group(1) if m else md.stem - - tldr = "" - tldr_m = re.search(r">\s*\*\*TLDR:\*\*\s*(.+)", body) - if tldr_m: - tldr = tldr_m.group(1).strip() - - aliases_raw = meta.get("aliases", []) - if isinstance(aliases_raw, str): - aliases_raw = [a.strip() for a in aliases_raw.split(",") if a.strip()] - aliases = [a.lower() for a in aliases_raw] - - tags_raw = meta.get("tags", []) - if isinstance(tags_raw, str): - tags_raw = [t.strip() for t in tags_raw.split(",") if t.strip()] - - cat = rel.parts[0] if len(rel.parts) > 1 else "root" - stem = md.stem.lower() - - page = { - "name": md.stem, - "title": title, - "category": cat, - "type": meta.get("type", ""), - "tags": tags_raw, - "aliases": aliases, - "maturity": meta.get("maturity", ""), - "source_count": meta.get("source_count", ""), - "tldr": tldr, - "date_updated": meta.get("date_updated", ""), - "date_published": meta.get("date_published", ""), - } - pages.append(page) - page_index[stem] = md - for alias in aliases: - if alias not in page_index: - page_index[alias] = md - - text_lower = text.lower() - fulltext[stem] = text_lower - body_lines = [l.strip() for l in body.split("\n") if l.strip() and not l.startswith("#") and not l.startswith(">")] - snippet_index[stem] = body_lines[0][:200] if body_lines else "" - - for token in re.split(r"\W+", text_lower): - if len(token) >= 3: - token_index.setdefault(token, set()).add(stem) - - meta_tokens: set = set() - for word in re.split(r"\W+", title.lower()): - if len(word) >= 3: - meta_tokens.add(word) - for alias in aliases: - for word in re.split(r"\W+", alias): - if len(word) >= 3: - meta_tokens.add(word) - for tag in tags_raw: - for word in re.split(r"\W+", str(tag).lower()): - if len(word) >= 3: - meta_tokens.add(word) - if tldr: - for word in re.split(r"\W+", tldr.lower()): - if len(word) >= 3: - meta_tokens.add(word) - for token in meta_tokens: - meta_token_index.setdefault(token, set()).add(stem) - - page_map = {p["name"].lower(): p for p in pages} - - _cache = { - "pages": pages, - "page_index": page_index, - "fulltext": fulltext, - "snippet_index": snippet_index, - "token_index": token_index, - "meta_token_index": meta_token_index, - "page_map": page_map, - } + _core_close_wiki_cache(_cache) + _cache = _core_build_wiki_cache(WIKI_DIR) _cache_mtime = mtime return _cache @@ -220,131 +230,673 @@ def _search(q: str, limit: int = 20) -> list[dict]: limit = _parse_limit(limit) if not q: return [] - q_lower = q.lower() - c = _build_cache() - pages = c["pages"] - page_map = c["page_map"] - token_index = c["token_index"] - meta_token_index = c["meta_token_index"] - fulltext = c["fulltext"] - snippet_index = c["snippet_index"] - - is_single = bool(re.match(r"^\w+$", q_lower)) - if is_single and q_lower in token_index: - candidates = token_index[q_lower] | meta_token_index.get(q_lower, set()) - else: - candidates = {p["name"].lower() for p in pages} - - scored = [] - for stem in candidates: - p = page_map.get(stem) - if not p: - continue - score = 0 - if q_lower in p["title"].lower(): - score += 10 - if q_lower == stem: - score += 20 - if any(q_lower in a for a in p.get("aliases", [])): - score += 8 - if any(q_lower in str(t).lower() for t in p.get("tags", [])): - score += 5 - if q_lower in p.get("tldr", "").lower(): - score += 3 - if fulltext.get(stem, "") and q_lower in fulltext[stem]: - score += 2 - if score > 0: - scored.append((score, {**p, "score": score, "snippet": snippet_index.get(stem, "")})) - - scored.sort(key=lambda x: (-x[0], x[1]["title"].lower())) - return [r for _, r in scored[:limit]] + return _core_search_pages(q, _build_cache(), limit=limit) def _get_context(topic: str) -> dict: topic = _clean_text_input(topic) - if not topic: - return {"topic": "", "found": False, "error": "topic required", "pages": []} + return _core_context_for_topic(WIKI_DIR, topic, _build_cache(), empty_error="topic required") - c = _build_cache() - matches = _search(topic, limit=5) - if not matches: - return {"topic": topic, "found": False, "pages": []} - primary = matches[0] - primary_name = primary["name"].lower() +def _utc_timestamp() -> str: + return _core_utc_timestamp() + + +def _memory_records() -> list[dict[str, object]]: + return _core_memory_records(WIKI_DIR) + + +def _slim_memory(record: dict[str, object]) -> dict[str, object]: + return _core_slim_memory(record) + + +def _memory_review_issues(record: dict[str, object]) -> list[dict[str, str]]: + return _core_memory_review_issues(record, review_command="review_memory") + + +def _memory_inbox(limit: int = 20, include_archived: bool = False, project: str = "") -> dict[str, object]: + return _core_memory_inbox( + _memory_records(), + limit=limit, + include_archived=include_archived, + review_command="review_memory", + project=project, + ) + + +def _memory_explanation(identifier: str) -> dict[str, object]: + return _core_memory_explanation( + WIKI_DIR, + identifier, + records=_memory_records(), + review_command="review_memory", + ) + + +def _count_values(records: list[dict[str, object]], field: str) -> dict[str, int]: + return _core_count_values(records, field) + + +def _top_tags(records: list[dict[str, object]], limit: int = 12) -> list[dict[str, object]]: + return _core_top_tags(records, limit=limit) + + +def _recent_memories(records: list[dict[str, object]]) -> list[dict[str, object]]: + return _core_recent_memories(records) + + +def _resolve_project(project: str = "") -> str: + return _clean_text_input(project) or _default_project() + + +def _memory_profile(limit: int = 10, project: str = "") -> dict[str, object]: + return _core_memory_profile( + _memory_records(), + limit=limit, + review_command="review_memory", + project=_resolve_project(project), + ) + + +def _memory_brief(query: str = "", limit: int = 6, project: str = "") -> dict[str, object]: + project_name = _resolve_project(project) + payload = _core_memory_brief( + _memory_records(), query=_clean_text_input(query, max_len=500), + limit=limit, review_command="review_memory", project=project_name, + ) + return _core_add_capture_review_to_brief(payload, _capture_review_summary(project=project_name)) + + +def _query_link(query: str, budget: str = "medium", project: str = "") -> dict[str, object]: + project_name = _resolve_project(project) + return _core_query_link( + WIKI_DIR, + _clean_text_input(query, max_len=500), + _build_cache(), + _memory_records(), + budget=budget, + project=project_name, + review_command="review_memory", + ) + + +def _validate_wiki(strict: bool = False) -> dict[str, object]: + return _core_validate_wiki(WIKI_DIR, strict=bool(strict)) + + +def _package_version() -> str: + return LINK_VERSION + + +def _link_status(include_validation: bool = False) -> dict[str, object]: + return _core_link_status( + WIKI_DIR, + version=_package_version(), + include_validation=include_validation, + ) + + +def _starter_prompts(project: str = "") -> dict[str, object]: + return _core_starter_prompt_payload(WIKI_DIR.parent, project=project or None) + + +def _migrate_wiki() -> dict[str, object]: + payload = _core_migrate_wiki(WIKI_DIR) + _clear_cache() + return payload + + +def _ingest_status() -> dict[str, object]: + return _core_collect_ingest_status(WIKI_DIR.parent) + + +def _mcp_memory_audit_actions( + inbox: dict[str, object], + captures: dict[str, object], + project_name: str, +) -> list[dict[str, object]]: + project_arg = f', project="{project_name}"' if project_name else "" + return [ + { + "label": "Review memory inbox", + "tool": "memory_inbox", + "command": f"memory_inbox(include_archived=true{project_arg})", + "recommended": bool(inbox["review_count"]), + }, + { + "label": "Review raw captures", + "tool": "capture_inbox", + "command": f"capture_inbox({project_arg.lstrip(', ')})" if project_arg else "capture_inbox()", + "recommended": bool(captures["count"] or captures.get("read_warning_count")), + }, + { + "label": "Explain a memory", + "tool": "explain_memory", + "command": 'explain_memory(identifier="")', + "recommended": False, + }, + ] + + +def _memory_audit(limit: int = 10, project: str = "") -> dict[str, object]: + parsed_limit = _parse_limit(limit, default=10, max_limit=50) + project_name = _resolve_project(project) + profile = _memory_profile(limit=parsed_limit, project=project_name) + inbox = _memory_inbox(limit=parsed_limit, include_archived=True, project=project_name) + captures = _capture_review_summary(project=project_name, limit=min(parsed_limit, 10)) + return _core_memory_audit_report( + profile, + inbox, + captures, + _mcp_memory_audit_actions(inbox, captures, project_name), + project=project_name, + ) + + +def _recall_memories( + query: str, + limit: int = 10, + include_archived: bool = False, + project: str = "", +) -> list[dict[str, object]]: + query = _clean_text_input(query) + return _core_recall_memories( + _memory_records(), + query, + limit=limit, + include_archived=include_archived, + project=_resolve_project(project), + ) + + +def _propose_memories_from_text( + text: str, + source: str = "mcp", + limit: int = 10, + project: str = "", +) -> dict[str, object]: + return _core_propose_memories_from_text( + text, + _memory_records(), + source=source, + limit=limit, + writes_memory=False, + project=_resolve_project(project), + ) + + +def _capture_session( + text: str, + title: str = "", + source: str = "mcp", + limit: int = 10, + project: str = "", +) -> dict[str, object]: + clean_text = _clean_text_input(text, max_len=MAX_CAPTURE_INPUT) + if not clean_text: + raise ValueError("session text required") + clean_source = _clean_text_input(source, max_len=500) or "mcp" + project_name = _resolve_project(project) + timestamp = _utc_timestamp() + capture_title = _core_capture_title( + clean_text, + clean_source, + _clean_text_input(title, max_len=200), + default_source="mcp", + ) + secret_warnings = _secret_value_warnings(clean_text) + root = WIKI_DIR.parent + capture_dir = root / "raw" / "memory-captures" + capture_dir.mkdir(parents=True, exist_ok=True) + capture_path = _core_capture_filename(timestamp, capture_title, capture_dir) + project_line = f'project: "{_frontmatter_string(project_name)}"\n' if project_name else "" + _core_atomic_write_text( + capture_path, + f"""--- +title: "{_frontmatter_string(capture_title)}" +source_type: conversation +date_captured: "{timestamp}" +{project_line}--- + +# {capture_title} + +Captured locally for Link memory review. This raw note is proposal-only until the user approves durable memories. + +## Source Input + +{clean_source} + +## Notes + +{clean_text} +""", + ) + rel_path = capture_path.relative_to(root).as_posix() + proposals = _propose_memories_from_text( + clean_text, + source=rel_path, + limit=limit, + project=project_name, + ) + _append_log( + timestamp, + "capture-session", + f"Captured proposal-only session notes at {rel_path}", + [ + f"Source input: {clean_source}", + f"Project: {project_name or 'none'}", + f"Secret warnings: {', '.join(secret_warnings) if secret_warnings else 'none'}", + f"Proposals: {proposals['count']}", + ], + ) + _clear_cache() + return { + "captured": True, + "path": rel_path, + "source": clean_source, + "title": capture_title, + "project": project_name, + "secret_warnings": secret_warnings, + "proposals": proposals, + } - bl_path = WIKI_DIR / "_backlinks.json" - backlinks_data: dict = {} - if bl_path.exists(): - try: - raw = json.loads(bl_path.read_text(encoding="utf-8")) - backlinks_data = raw.get("backlinks", raw) - except Exception: - pass - - inbound = backlinks_data.get(primary_name, []) - - forward: list[str] = [] - forward_seen: set[str] = set() - path = c["page_index"].get(primary_name) - if path and path.exists(): - text = path.read_text(encoding="utf-8", errors="replace") - _, body = _parse_frontmatter(text) - page_set = {p["name"].lower() for p in c["pages"]} - for m in re.finditer(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]", body): - target = m.group(1).strip().lower() - if target in page_set and target != primary_name and target not in forward_seen: - forward_seen.add(target) - forward.append(target) - - seen = {primary_name} - context_names = [primary_name] - for name in inbound + forward: - if name not in seen: - seen.add(name) - context_names.append(name) - - context_pages = [] - for name in context_names[:10]: - p_path = c["page_index"].get(name) - if not p_path or not p_path.exists(): - continue - text = p_path.read_text(encoding="utf-8", errors="replace") - meta, body = _parse_frontmatter(text) - is_primary = name == primary_name - if is_primary: - content = body - else: - lines = body.split("\n") - summary = [] - for line in lines[:20]: - summary.append(line) - if line.startswith("## ") and len(summary) > 3: - break - content = "\n".join(summary) - - page_meta = c["page_map"].get(name, {}) - context_pages.append({ - "name": name, - "title": meta.get("title", name), - "type": meta.get("type", ""), - "is_primary": is_primary, - "relationship": "primary" if is_primary else ("inbound" if name in inbound else "forward"), - "content": content, - }) +def _resolve_capture_file(capture: str) -> Path | None: + return _core_resolve_capture_file(WIKI_DIR.parent, capture, max_len=500) + + +def _capture_records(limit: int = 20, project: str = "") -> list[dict[str, object]]: + root = WIKI_DIR.parent + return _core_capture_records( + root, + limit=limit, + project=project, + commands_for=_core_mcp_capture_commands, + ) + + +def _capture_inbox(limit: int = 20, project: str = "") -> dict[str, object]: + return _core_capture_inbox( + WIKI_DIR.parent, + limit=limit, + project=project, + commands_for=_core_mcp_capture_commands, + ) + + +def _capture_review_summary(project: str = "", limit: int = 3) -> dict[str, object]: + project_name = _core_normalize_project(project) + summary = _core_capture_review_summary( + WIKI_DIR.parent, + limit=limit, + project=project_name, + commands_for=_core_mcp_capture_commands, + ) + next_action = "capture_inbox()" + if project_name: + next_action = f'capture_inbox(project="{project_name}")' + summary["next_action"] = next_action + return summary + + +def _accept_capture( + capture: str, + index: int = 1, + title: str = "", + memory_type: str = "", + scope: str = "", + tags: str = "", + project: str = "", + allow_duplicate: bool = False, + allow_conflict: bool = False, +) -> dict[str, object]: + try: + proposal_index = int(index) + except (TypeError, ValueError): + raise ValueError("proposal index must be an integer") + if proposal_index < 1: + raise ValueError("proposal index must be 1 or greater") + + root = WIKI_DIR.parent + capture_path = _resolve_capture_file(capture) + if capture_path is None: + raise ValueError(f"capture not found: {_clean_text_input(capture, max_len=500)}") + raw_text = capture_path.read_text(encoding="utf-8", errors="replace") + meta, notes = _core_capture_notes_from_markdown(raw_text) + if not notes: + raise ValueError("capture has no notes") + + rel_path = capture_path.relative_to(root).as_posix() + project_name = _core_slugify( + _clean_text_input(project) or str(meta.get("project") or "") or _default_project(), + fallback="", + ) + proposals = _propose_memories_from_text( + notes, + source=rel_path, + limit=max(1, min(max(proposal_index, 10), 50)), + project=project_name, + ) + if proposal_index > len(proposals["proposals"]): + raise ValueError(f"capture has {len(proposals['proposals'])} proposal(s); index {proposal_index} is unavailable") + proposal = proposals["proposals"][proposal_index - 1] + chosen_scope = _clean_text_input(scope).lower() or str(proposal["scope"]) + chosen_project = project_name if chosen_scope == "project" else "" + result = _write_memory_page( + str(proposal["memory"]), + title=_clean_text_input(title) or str(proposal["title"]), + memory_type=_clean_text_input(memory_type).lower() or str(proposal["memory_type"]), + scope=chosen_scope, + tags=tags, + source=rel_path, + allow_duplicate=allow_duplicate, + allow_conflict=allow_conflict, + project=chosen_project, + ) + payload = { + "accepted": bool(result.get("created")), + "capture": rel_path, + "proposal_index": proposal_index, + "project": str(result.get("project") or proposal.get("project") or ""), + "proposal": proposal, + "result": result, + } + if result.get("created"): + _append_log( + _utc_timestamp(), + "accept-capture", + f"Accepted proposal {proposal_index} from {rel_path}", + [ + f"Memory: {result['path']}", + f"Project: {result.get('project') or 'none'}", + ], + ) + return payload + + +def _redact_capture(capture: str, replacement: str = "[redacted-secret]") -> dict[str, object]: + root = WIKI_DIR.parent + capture_path = _resolve_capture_file(capture) + if capture_path is None: + raise ValueError(f"capture not found: {_clean_text_input(capture, max_len=500)}") + original = capture_path.read_text(encoding="utf-8", errors="replace") + redacted, labels, replacement_count = _redact_secret_values( + original, + replacement=_clean_text_input(replacement, max_len=100) or "[redacted-secret]", + ) + rel_path = capture_path.relative_to(root).as_posix() + if replacement_count: + _core_atomic_write_text(capture_path, redacted) + _append_log( + _utc_timestamp(), + "redact-capture", + f"Redacted secret-looking values from {rel_path}", + [ + f"Labels: {', '.join(labels)}", + f"Replacement count: {replacement_count}", + ], + ) return { - "topic": topic, - "found": True, - "primary": primary["name"], - "inbound_count": len(inbound), - "forward_count": len(forward), - "pages": context_pages, + "redacted": bool(replacement_count), + "path": rel_path, + "labels": labels, + "replacement_count": replacement_count, + } + + +def _delete_capture(capture: str, confirm: bool = False) -> dict[str, object]: + root = WIKI_DIR.parent + capture_path = _resolve_capture_file(capture) + if capture_path is None: + raise ValueError(f"capture not found: {_clean_text_input(capture, max_len=500)}") + rel_path = capture_path.relative_to(root).as_posix() + payload = { + "deleted": False, + "path": rel_path, + "confirmation_required": not confirm, } + if not confirm: + return payload + capture_path.unlink() + _append_log( + _utc_timestamp(), + "delete-capture", + f"Deleted raw capture {rel_path}", + ["Deleted file only; capture contents were not logged."], + ) + payload["deleted"] = True + payload["confirmation_required"] = False + return payload + + +def _append_log(timestamp: str, operation: str, description: str, lines: list[str]) -> None: + _core_append_log(WIKI_DIR, timestamp, operation, description, lines) + + +def _resolve_memory_page(identifier: str) -> tuple[Path | None, dict[str, object] | None, str | None]: + return _core_resolve_memory_page( + WIKI_DIR, + identifier, + records=_memory_records(), + max_identifier_len=300, + ) + + +def _rebuild_memory_backlinks() -> bool: + rebuilt = json.loads(rebuild_backlinks()) + return bool(rebuilt.get("rebuilt")) + + +def _memory_mutation_options(project: str = "") -> dict[str, object]: + return { + "timestamp": _utc_timestamp(), + "records": _memory_records(), + "project": _resolve_project(project), + "log_writer": _append_log, + "rebuild_backlinks": _rebuild_memory_backlinks, + } + + +def _memory_type_scope(memory_type: str, scope: str) -> tuple[str, str]: + return ( + _clean_text_input(memory_type).lower() or "note", + _clean_text_input(scope).lower() or "user", + ) + + +def _set_memory_status(identifier: str, status: str, reason: str = "") -> dict[str, object]: + result = _core_set_memory_status( + WIKI_DIR, + _clean_text_input(identifier, max_len=300), + status, + reason=_clean_text_input(reason, max_len=500), + timestamp=_utc_timestamp(), + records=_memory_records(), + log_writer=_append_log, + ) + if result["updated"]: + _clear_cache() + return result + + +def _forget_memory(identifier: str, confirm: bool = False) -> dict[str, object]: + result = _core_forget_memory_page( + WIKI_DIR, + _clean_text_input(identifier, max_len=300), + confirm=confirm, + records=_memory_records(), + timestamp=_utc_timestamp(), + log_writer=_append_log, + rebuild_backlinks=_rebuild_memory_backlinks, + ) + if result.get("forgotten"): + _clear_cache() + return result + + +def _mark_memory_reviewed(identifier: str, note: str = "") -> dict[str, object]: + result = _core_mark_memory_reviewed( + WIKI_DIR, + _clean_text_input(identifier, max_len=300), + note=_clean_text_input(note, max_len=500), + timestamp=_utc_timestamp(), + records=_memory_records(), + review_command="review_memory", + log_writer=_append_log, + ) + if result["updated"]: + _clear_cache() + return result + + +def _update_memory_page( + identifier: str, + text: str, + source: str = "mcp", + allow_conflict: bool = False, + project: str = "", +) -> dict[str, object]: + clean_text = _required_text_input(text, "memory update text required", max_len=4000) + clean_source = _clean_text_input(source, max_len=500) or "mcp" + options = _memory_mutation_options(project) + + result = _core_update_memory_page( + WIKI_DIR, _clean_text_input(identifier, max_len=300), clean_text, + source=clean_source, review_command="review_memory", + allow_conflict=allow_conflict, + **options, + ) + _clear_cache() + return result + + +def _write_memory_page( + text: str, title: str = "", memory_type: str = "note", + scope: str = "user", tags: str = "", source: str = "mcp", + allow_duplicate: bool = False, allow_conflict: bool = False, project: str = "", +) -> dict[str, object]: + clean_text = _required_text_input(text, "memory text required", max_len=4000) + memory_type, scope = _memory_type_scope(memory_type, scope) + options = _memory_mutation_options(project) + + result = _core_write_memory_page( + WIKI_DIR, clean_text, title=_clean_text_input(title), + memory_type=memory_type, scope=scope, + tags=_clean_text_input(tags, max_len=500), source=_clean_text_input(source, max_len=500), + allow_duplicate=allow_duplicate, allow_conflict=allow_conflict, + **options, + ) + if result.get("created"): + _clear_cache() + return result # ── MCP Tools ───────────────────────────────────────────────────────── +@mcp.tool() +def query_link(query: str, budget: str = "medium", project: str = "") -> str: + """Build a compact answer-ready Link context packet. + + Use this before answering substantive questions that may need local memory, + wiki knowledge, or both. It returns budgeted memories, ranked wiki results, + graph-neighborhood context, and why each item was selected so the agent does + not waste context by reading the whole wiki. + budget: small, medium, or large. + """ + return json.dumps(_query_link(query=query, budget=budget, project=project), ensure_ascii=False) + + +@mcp.tool() +def link_status(include_validation: bool = False) -> str: + """Return a compact Link readiness summary. + + Use this when connecting to Link or troubleshooting setup. It reports the + wiki path, package version, page/memory counts, missing required paths, + optional validation summary, and safe next actions. + """ + return json.dumps(_link_status(include_validation=include_validation), ensure_ascii=False) + + +@mcp.tool() +def starter_prompts(project: str = "") -> str: + """Return first-run Link prompts and local checks. + + Use this when a user asks what to try after installing Link, or when an + agent needs concise natural-language prompts for readiness, brief, remember, + query, ingest, and proposal workflows. + """ + return json.dumps(_starter_prompts(project=project), ensure_ascii=False) + + +@mcp.tool() +def backup_wiki(label: str = "mcp", include_raw: bool = False, list_only: bool = False) -> str: + """Create or list local backup archives for this Link wiki. + + Use before broad repairs or risky local wiki edits. Backups stay under + .link-backups/ next to the wiki. raw/ is excluded by default because it may + contain sensitive source material; include_raw should only be true after + explicit user approval. + """ + link_root = WIKI_DIR.parent + if list_only: + return json.dumps(_core_list_backups(link_root), ensure_ascii=False) + try: + result = _core_create_backup( + link_root, + label=_clean_text_input(label, max_len=80) or "mcp", + include_raw=include_raw, + ) + except (FileNotFoundError, _CoreBackupError) as exc: + return json.dumps({"created": False, "error": str(exc)}, ensure_ascii=False) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def memory_brief(query: str = "", limit: int = 6, project: str = "") -> str: + """Prime the agent with local memory before answering or coding. + + Call this at the start of a session or before a user task that may depend + on preferences, project decisions, or personal context. It returns profile + counts, relevant memories for the query, review warnings, and rules for + safe memory use. + """ + limit = _parse_limit(limit, default=6, max_limit=20) + return json.dumps(_memory_brief(query=query, limit=limit, project=project), ensure_ascii=False) + + +@mcp.tool() +def validate_wiki(strict: bool = False) -> str: + """Validate agent-generated wiki pages after ingest or large edits. + + Call rebuild_backlinks first, then validate_wiki before reporting ingest + complete. The response checks required frontmatter, directory/type + alignment, required sections, dead wikilinks, and backlink freshness. + strict=true also fails on warnings such as missing TLDR/Query summaries. + """ + return json.dumps(_validate_wiki(strict=strict), ensure_ascii=False) + + +@mcp.tool() +def migrate_wiki() -> str: + """Apply safe Link wiki schema migrations. + + Use this when link_status reports a missing or old schema marker. The + operation is idempotent and only creates missing canonical wiki directories + plus the local schema marker; it does not rewrite user pages. + """ + return json.dumps(_migrate_wiki(), ensure_ascii=False) + + +@mcp.tool() +def ingest_status() -> str: + """Return raw source ingest state and the next safe action. + + Use this when the user asks to ingest, after they drop files into raw/, or + when you need the exact next agent prompt and validation commands. + """ + return json.dumps(_ingest_status(), ensure_ascii=False) + + @mcp.tool() def search_wiki(query: str, limit: int = 20) -> str: """Search the Link wiki by title, alias, tag, and full-text content. @@ -372,6 +924,301 @@ def search_wiki(query: str, limit: int = 20) -> str: return json.dumps({"query": query, "count": len(slim), "results": slim}, ensure_ascii=False) +@mcp.tool() +def recall_memory(query: str, limit: int = 10, include_archived: bool = False, project: str = "") -> str: + """Search local agent memory pages first. + + Use this when the user asks about preferences, decisions, project context, + or anything the agent should remember across sessions. Returns only pages + under wiki/memories/. Archived and stale memories are excluded unless + include_archived is true. + """ + query = _clean_text_input(query) + limit = _parse_limit(limit, default=10) + if not query: + return json.dumps({"error": "query required", "query": "", "count": 0, "memories": []}) + project_name = _resolve_project(project) + memories = _recall_memories(query, limit=limit, include_archived=include_archived, project=project_name) + return json.dumps({ + "query": query, + "count": len(memories), + "include_archived": include_archived, + "project": project_name, + "memories": memories, + }, ensure_ascii=False) + + +@mcp.tool() +def propose_memories(text: str, source: str = "mcp", limit: int = 10, project: str = "") -> str: + """Propose durable memories from chat or session notes without writing them. + + Returns conservative memory proposals with type, scope, confidence, reason, + duplicate candidates, and a suggested follow-up action. Use remember_memory + or update_memory after the user confirms a proposal. + """ + clean_text = _clean_text_input(text, max_len=12000) + if not clean_text: + return json.dumps({"proposed": False, "error": "text required", "count": 0, "proposals": []}) + source = _clean_text_input(source, max_len=500) or "mcp" + limit = _parse_limit(limit, default=10, max_limit=20) + return json.dumps(_propose_memories_from_text(clean_text, source=source, limit=limit, project=project), ensure_ascii=False) + + +@mcp.tool() +def capture_session(text: str, title: str = "", source: str = "mcp", limit: int = 10, project: str = "") -> str: + """Save long chat/session notes locally and return memory proposals only. + + Writes a raw note under raw/memory-captures/ and logs the capture, but does + not create durable memory pages. Use this when the user wants the session + preserved for review before approving remember_memory or update_memory. + """ + limit = _parse_limit(limit, default=10, max_limit=20) + try: + result = _capture_session(text, title=title, source=source, limit=limit, project=project) + except ValueError as exc: + return json.dumps({ + "captured": False, + "error": str(exc), + "proposals": {"proposed": False, "count": 0, "proposals": []}, + }) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def capture_inbox(limit: int = 20, project: str = "") -> str: + """List saved raw session captures without changing them. + + Returns saved captures, secret-warning labels, redacted snippets, and the + next MCP tool calls for accepting, redacting, or deleting a capture. + """ + limit = _parse_limit(limit, default=20, max_limit=50) + return json.dumps(_capture_inbox(limit=limit, project=project), ensure_ascii=False) + + +@mcp.tool() +def accept_capture( + capture: str, + index: int = 1, + title: str = "", + memory_type: str = "", + scope: str = "", + tags: str = "", + project: str = "", + allow_duplicate: bool = False, + allow_conflict: bool = False, +) -> str: + """Accept one proposal from a saved raw session capture. + + Recomputes proposals from raw/memory-captures, selects the 1-based index, + and writes the chosen memory through duplicate/conflict-safe creation. + """ + try: + result = _accept_capture( + capture, + index=index, + title=title, + memory_type=memory_type, + scope=scope, + tags=tags, + project=project, + allow_duplicate=allow_duplicate, + allow_conflict=allow_conflict, + ) + except ValueError as exc: + return json.dumps({"accepted": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def redact_capture(capture: str, replacement: str = "[redacted-secret]") -> str: + """Redact secret-looking values from a saved raw session capture. + + Use after capture_session returns secret_warnings and the user approves + redaction. Logs warning labels and counts only, never secret values. + """ + try: + result = _redact_capture(capture, replacement=replacement) + except ValueError as exc: + return json.dumps({"redacted": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def delete_capture(capture: str, confirm: bool = False) -> str: + """Delete a saved raw session capture after explicit user confirmation. + + The tool refuses to delete unless confirm is true. It logs the capture path + and deletion operation only, never the capture contents. + """ + try: + result = _delete_capture(capture, confirm=confirm) + except ValueError as exc: + return json.dumps({"deleted": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def memory_profile(limit: int = 10, project: str = "") -> str: + """Summarize what Link currently remembers. + + Use this to inspect the local memory profile before doing personalized work. + Returns counts by type/scope/status, top tags, recent memories, and focused + lists for preferences, decisions, and project context. + """ + limit = _parse_limit(limit, default=10) + return json.dumps(_memory_profile(limit=limit, project=project), ensure_ascii=False) + + +@mcp.tool() +def memory_audit(limit: int = 10, project: str = "") -> str: + """Audit local memory health, review backlog, and raw capture state. + + Use this when the user asks what Link knows, what needs attention, or + whether local agent memory is ready for use. + """ + return json.dumps(_memory_audit(limit=limit, project=project), ensure_ascii=False) + + +@mcp.tool() +def memory_inbox(limit: int = 20, include_archived: bool = False, project: str = "") -> str: + """List memories that need user review. + + Use this to surface pending, stale, invalid, or underspecified memories for + human confirmation. Archived memories are excluded unless include_archived + is true. Pass project to include broad user/global memory plus that + project's scoped memories while excluding other explicit projects. + """ + limit = _parse_limit(limit, default=20) + return json.dumps(_memory_inbox(limit=limit, include_archived=include_archived, project=project), ensure_ascii=False) + + +@mcp.tool() +def review_memory(identifier: str, note: str = "") -> str: + """Mark a memory as reviewed after user confirmation.""" + try: + result = _mark_memory_reviewed(identifier, note=note) + except ValueError as exc: + return json.dumps({"updated": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def explain_memory(identifier: str) -> str: + """Explain why a memory exists and whether it is ready for recall. + + Returns provenance, review state, lifecycle state, graph links, recent log + entries, and detected quality issues for one memory. + """ + try: + result = _memory_explanation(identifier) + except ValueError as exc: + return json.dumps({"found": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def update_memory( + identifier: str, + memory: str, + source: str = "mcp", + allow_conflict: bool = False, + project: str = "", +) -> str: + """Merge new information into an existing active memory. + + Use this when remember_memory returns a duplicate candidate or when the user + asks to update something Link already remembers. The update is appended to + the memory body, logged, and marked pending review. + """ + try: + result = _update_memory_page( + identifier, + memory, + source=source, + allow_conflict=allow_conflict, + project=project, + ) + except ValueError as exc: + return json.dumps({"updated": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def archive_memory(identifier: str, reason: str = "") -> str: + """Archive a memory without deleting its Markdown page. + + Use this when the user says a memory is stale, wrong, or no longer useful. + The page remains local and inspectable, recall_memory hides it by default, + and the operation is appended to wiki/log.md. + """ + try: + result = _set_memory_status(identifier, "archived", reason=reason) + except ValueError as exc: + return json.dumps({"updated": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def restore_memory(identifier: str) -> str: + """Restore an archived memory to active status.""" + try: + result = _set_memory_status(identifier, "active") + except ValueError as exc: + return json.dumps({"updated": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + +@mcp.tool() +def forget_memory(identifier: str, confirm: bool = False) -> str: + """Permanently delete a memory after explicit user confirmation. + + Prefer archive_memory for reversible cleanup. Use forget_memory only when + the user asks Link to permanently forget a memory; the tool refuses to + delete unless confirm is true and never logs the memory body. + """ + return json.dumps(_forget_memory(identifier, confirm=confirm), ensure_ascii=False) + + +@mcp.tool() +def remember_memory( + memory: str, + title: str = "", + memory_type: str = "note", + scope: str = "user", + tags: str = "", + source: str = "mcp", + allow_duplicate: bool = False, + allow_conflict: bool = False, + project: str = "", +) -> str: + """Save a local agent memory as a Markdown page. + + Use only when the user explicitly asks you to remember something. The memory + is written under wiki/memories/, indexed, logged, and kept local. Strong + duplicates are refused unless allow_duplicate is true. + Potential conflicts are refused unless allow_conflict is true. + memory_type: preference, decision, project, fact, or note. + scope: user, project, or global. + project: optional project key for project-scoped memories. + tags: optional comma-separated tags. + """ + try: + result = _write_memory_page( + memory, + title=title, + memory_type=memory_type, + scope=scope, + tags=tags, + source=source, + allow_duplicate=allow_duplicate, + allow_conflict=allow_conflict, + project=project, + ) + except ValueError as exc: + return json.dumps({"created": False, "error": str(exc)}) + return json.dumps(result, ensure_ascii=False) + + @mcp.tool() def get_context(topic: str) -> str: """Get full context for a topic from the Link wiki. @@ -392,61 +1239,76 @@ def get_context(topic: str) -> str: @mcp.tool() -def get_pages(category: str = "", page_type: str = "", maturity: str = "") -> str: - """List all pages in the Link wiki with metadata. +def get_pages( + category: str = "", + page_type: str = "", + maturity: str = "", + limit: int = 100, + offset: int = 0, + include_all: bool = False, +) -> str: + """List Link wiki pages with metadata, bounded by default. Optional filters: - - category: "concepts", "entities", "sources", "comparisons", "explorations" - - page_type: "concept", "entity", "source", "comparison", "exploration" + - category: "memories", "concepts", "entities", "sources", "comparisons", "explorations" + - page_type: "memory", "concept", "entity", "source", "comparison", "exploration" - maturity: "seed", "growing", "mature", "established" + - limit: max returned pages, clamped to 1..1000; default 100 + - offset: pagination offset + - include_all: true only when the user explicitly needs a full metadata export Returns pages with: name, title, category, type, tags, aliases, maturity, source_count, tldr, date_updated. Does not include full page content. + Use search_wiki, query_link, or get_context instead of paging through the + whole wiki when answering a question. """ - c = _build_cache() - pages = c["pages"] - category = _clean_text_input(category).lower() - page_type = _clean_text_input(page_type).lower() - maturity = _clean_text_input(maturity).lower() - if category: - pages = [p for p in pages if p["category"] == category] - if page_type: - pages = [p for p in pages if p["type"] == page_type] - if maturity: - pages = [p for p in pages if p["maturity"] == maturity] - return json.dumps({"count": len(pages), "pages": pages}, ensure_ascii=False) + return json.dumps( + _core_list_pages( + _build_cache(), + category=_clean_text_input(category).lower(), + page_type=_clean_text_input(page_type).lower(), + maturity=_clean_text_input(maturity).lower(), + limit=limit, + offset=offset, + include_all=include_all, + ), + ensure_ascii=False, + ) @mcp.tool() -def get_backlinks(page_name: str) -> str: - """Get all pages that link to or from a given wiki page. +def get_backlinks(page_name: str, limit: int = 100, offset: int = 0, include_all: bool = False) -> str: + """Get pages that link to or from a given wiki page, bounded by default. Returns: - inbound: pages that link TO this page (who references it) - forward: pages this page links TO (what it references) + - inbound_count / forward_count: total available link counts + - returned_inbound / returned_forward: returned link counts + - follow_up: pagination and context actions when truncated Useful for understanding a page's position in the knowledge graph. + Set include_all=true only when the user explicitly asks for a full link + export. """ - bl_path = WIKI_DIR / "_backlinks.json" - if not bl_path.exists(): - return json.dumps({"error": "backlinks not built — run rebuild_backlinks first"}) - try: - raw = json.loads(bl_path.read_text(encoding="utf-8")) - except Exception as e: - return json.dumps({"error": str(e)}) + backlinks, error = _core_load_backlinks_index(WIKI_DIR / "_backlinks.json", missing_error="backlinks not built — run rebuild_backlinks first") + if error: + return json.dumps({"error": error}) page_name = _clean_text_input(page_name) if not page_name: return json.dumps({"error": "page_name required", "inbound": [], "forward": []}) - name = page_name.lower().replace(" ", "-") - backlinks = raw.get("backlinks", raw) - forward = raw.get("forward", {}) - return json.dumps({ - "page": page_name, - "inbound": backlinks.get(name, []), - "forward": forward.get(name, []), - }, ensure_ascii=False) + return json.dumps( + _core_page_link_summary( + backlinks, + page_name, + limit=limit, + offset=offset, + include_all=include_all, + ), + ensure_ascii=False, + ) @mcp.tool() @@ -459,34 +1321,54 @@ def get_graph() -> str: Useful for understanding the overall structure of the wiki, finding highly-connected pages, or detecting isolated clusters. + + For large wikis, prefer get_graph_summary first. Use get_graph only when + the user explicitly needs the full graph export. """ - c = _build_cache() - pages = c["pages"] - page_ids = {p["name"].lower(): p["name"] for p in pages} - nodes = [{"id": p["name"], "title": p["title"], "category": p["category"], "type": p["type"]} for p in pages] - - edges = [] - seen_edges: set[tuple[str, str]] = set() - wl_re = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") - for p in pages: - source = p["name"] - path = c["page_index"].get(source.lower()) - if not path or not path.exists(): - continue - text = path.read_text(encoding="utf-8", errors="replace") - _, body = _parse_frontmatter(text) - for m in wl_re.finditer(body): - target_key = m.group(1).strip().lower() - target = page_ids.get(target_key) - if not target or target_key == source.lower(): - continue - edge_key = (source, target) - if edge_key in seen_edges: - continue - seen_edges.add(edge_key) - edges.append({"source": source, "target": target}) - - return json.dumps({"nodes": nodes, "edges": edges}, ensure_ascii=False) + return json.dumps(_core_graph_data(_build_cache()), ensure_ascii=False) + + +@mcp.tool() +def get_graph_summary(topic: str = "", limit: int = 40, depth: int = 1, max_edges: int = 120) -> str: + """Get a bounded graph summary for large wikis and agent context budgets. + + Args: + - topic: optional topic/query. When provided, Link returns a bounded + neighborhood around matching pages. When omitted, Link returns a + high-degree overview. + - limit: maximum nodes to return, clamped to 1..250. + - depth: graph neighborhood depth for topic mode, clamped to 0..3. + - max_edges: maximum returned edges among selected nodes, clamped to 0..1000. + + Use this before get_graph when the wiki may contain hundreds or thousands + of pages. The response includes total graph size, returned node/edge counts, + why each node was selected, top hubs, and follow-up tool actions. + """ + return json.dumps( + _core_graph_summary( + _build_cache(), + topic=_clean_text_input(topic, max_len=MAX_TEXT_INPUT), + limit=limit, + depth=depth, + max_edges=max_edges, + ), + ensure_ascii=False, + ) + + +@mcp.tool() +def rebuild_index() -> str: + """Regenerate wiki/index.md from current Markdown pages. + + Run this after ingesting sources or making large page edits so the + human-readable wiki catalog reflects all pages grouped by category. + """ + try: + result = _core_rebuild_index(WIKI_DIR, cache=_build_cache()) + except OSError as exc: + return json.dumps({"rebuilt": False, "error": f"Could not rebuild index: {exc}"}, ensure_ascii=False) + _clear_cache() + return json.dumps(result, ensure_ascii=False) @mcp.tool() @@ -497,36 +1379,16 @@ def rebuild_backlinks() -> str: the graph index is up to date. Updates wiki/_backlinks.json with both reverse links (backlinks) and forward links. """ - backlinks: dict[str, list] = {} - forward_links: dict[str, list] = {} - wl_re = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") - - for md in WIKI_DIR.rglob("*.md"): - if md.name.startswith("."): - continue - text = md.read_text(encoding="utf-8", errors="replace") - _, body = _parse_frontmatter(text) - source = md.stem.lower() - for m in wl_re.finditer(body): - target = m.group(1).strip().lower() - if target != source: - backlinks.setdefault(target, []) - if source not in backlinks[target]: - backlinks[target].append(source) - forward_links.setdefault(source, []) - if target not in forward_links[source]: - forward_links[source].append(target) - - result = {"backlinks": backlinks, "forward": forward_links} + try: + result = _core_build_backlinks(WIKI_DIR) + except OSError as exc: + return json.dumps({"rebuilt": False, "error": f"Could not rebuild backlinks: {exc}"}, ensure_ascii=False) bl_path = WIKI_DIR / "_backlinks.json" - bl_path.write_text(json.dumps(result, indent=2), encoding="utf-8") + _core_atomic_write_json(bl_path, result) - # Invalidate cache - global _cache, _cache_mtime - _cache = {} - _cache_mtime = 0.0 + _clear_cache() - return json.dumps({"rebuilt": True, "pages_indexed": len(backlinks)}) + return json.dumps({"rebuilt": True, "pages_indexed": len(result["backlinks"])}) # ── Entry point ─────────────────────────────────────────────────────── diff --git a/mcp_package/pyproject.toml b/mcp_package/pyproject.toml index 78cdd9d..bd8bc4b 100644 --- a/mcp_package/pyproject.toml +++ b/mcp_package/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "hatchling.build" [project] name = "link-mcp" -version = "1.0.7" -description = "MCP server for the Link personal knowledge wiki — search, context, and graph traversal" +version = "1.1.0" +description = "MCP server for Link local agent memory — remember, recall, search, context, and graph traversal" readme = "README.md" license = { text = "MIT" } requires-python = ">=3.10" dependencies = ["mcp>=1.0.0"] -keywords = ["mcp", "knowledge-base", "wiki", "llm", "ai"] +keywords = ["mcp", "memory", "knowledge-base", "wiki", "llm", "ai"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", @@ -19,6 +19,8 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] @@ -30,7 +32,7 @@ Repository = "https://github.com/gowtham0992/link" link-mcp = "link_mcp.server:main" [tool.hatch.build.targets.wheel] -packages = ["link_mcp"] +packages = ["link_mcp", "link_core"] [tool.hatch.build.targets.sdist] exclude = [ diff --git a/mcp_package/server.json b/mcp_package/server.json index 802bb34..4665ea6 100644 --- a/mcp_package/server.json +++ b/mcp_package/server.json @@ -1,17 +1,17 @@ { "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", "name": "io.github.gowtham0992/link", - "description": "Personal knowledge wiki as MCP tools \u2014 search, context, graph traversal.", + "description": "Local personal memory for agents as MCP tools \u2014 remember, recall, search, context, graph traversal.", "repository": { "url": "https://github.com/gowtham0992/link", "source": "github" }, - "version": "1.0.7", + "version": "1.1.0", "packages": [ { "registryType": "pypi", "identifier": "link-mcp", - "version": "1.0.7", + "version": "1.1.0", "transport": { "type": "stdio" } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b1c5da --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[tool.ruff] +target-version = "py310" +line-length = 120 +src = ["mcp_package", "scripts", "tests"] +extend-exclude = [ + ".git", + ".link-backups", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + ".venv", + "build", + "dist", + "raw", + "wiki", +] + +[tool.ruff.lint] +select = [ + "E4", + "E7", + "E9", + "F", +] + +[tool.ruff.lint.per-file-ignores] +"link.py" = ["E402"] +"serve.py" = ["E402", "E701", "F541"] +"mcp_package/link_mcp/server.py" = ["E402"] +"tests/*.py" = ["E402"] diff --git a/scripts/check_release_hygiene.py b/scripts/check_release_hygiene.py index ba0087d..e795133 100644 --- a/scripts/check_release_hygiene.py +++ b/scripts/check_release_hygiene.py @@ -6,7 +6,6 @@ import json import re import subprocess -import sys from pathlib import Path @@ -30,6 +29,15 @@ "service-account*.json", ) +BUILD_ARTIFACT_PATTERNS = ( + "dist/*", + "*/dist/*", + "*.whl", + "*.tar.gz", + "*.egg-info", + "*.egg-info/*", +) + SECRET_VALUE_PATTERNS = ( ("Anthropic API key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}\b")), ("OpenAI API key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b")), @@ -42,6 +50,25 @@ ("Private key block", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")), ) +OUTBOUND_NETWORK_CODE_SUFFIXES = {".py", ".sh"} +OUTBOUND_NETWORK_ALLOWLIST = { + Path("scripts/smoke_http_viewer.py"), +} +OUTBOUND_NETWORK_PATTERNS = ( + ("requests import", re.compile(r"^\s*(?:import\s+requests\b|from\s+requests\b)", re.MULTILINE)), + ("httpx import", re.compile(r"^\s*(?:import\s+httpx\b|from\s+httpx\b)", re.MULTILINE)), + ("http.client import", re.compile(r"^\s*(?:import\s+http\.client\b|from\s+http\.client\b)", re.MULTILINE)), + ("urllib.request import", re.compile(r"^\s*(?:import\s+urllib\.request\b|from\s+urllib\.request\b)", re.MULTILINE)), + ("urllib request import", re.compile(r"^\s*from\s+urllib\s+import\s+request\b", re.MULTILINE)), + ("socket import", re.compile(r"^\s*(?:import\s+socket\b|from\s+socket\b)", re.MULTILINE)), + ("urlopen call", re.compile(r"\burlopen\s*\(")), + ("http.client connection", re.compile(r"\b(?:http\.client\.)?HTTPS?Connection\s*\(")), + ("requests call", re.compile(r"\brequests\.(?:get|post|put|patch|delete|request)\s*\(")), + ("httpx call", re.compile(r"\bhttpx\.(?:get|post|put|patch|delete|request)\s*\(")), + ("curl command", re.compile(r"(^|[;&|]\s*)curl\s+(?:-[^\s]+\s+)*https?://", re.MULTILINE)), + ("wget command", re.compile(r"(^|[;&|]\s*)wget\s+(?:-[^\s]+\s+)*https?://", re.MULTILINE)), +) + BINARY_SUFFIXES = { ".gif", ".gz", @@ -59,6 +86,59 @@ CHANGELOG_VERSION_RE = re.compile(r"^## \[([^\]]+)\](?: - \d{4}-\d{2}-\d{2})?\s*$", re.MULTILINE) +AGENT_CONTRACT_REQUIREMENTS = { + Path("LINK.md"): ( + "link_status", + "starter_prompts", + "ingest_status", + "query_link", + "memory_brief", + "get_graph_summary", + "backup_wiki", + "validate_wiki", + ), + Path("README.md"): ( + "link_status", + "starter_prompts", + "ingest_status", + "query_link", + "memory_brief", + "get_graph_summary", + "backup_wiki", + "validate_wiki", + ), + Path("mcp_package/README.md"): ( + "link_status", + "starter_prompts", + "ingest_status", + "query_link", + "memory_brief", + "get_graph_summary", + "backup_wiki", + "validate_wiki", + ), + Path("integrations/_shared/link-instructions.md"): ( + "link_status", + "starter_prompts", + "ingest_status", + "query_link", + "memory_brief", + "get_graph_summary", + "backup_wiki", + "validate_wiki", + ), + Path("integrations/_shared/link-instructions-project.md"): ( + "link_status", + "starter_prompts", + "ingest_status", + "query_link", + "memory_brief", + "get_graph_summary", + "backup_wiki", + "validate_wiki", + ), +} + def tracked_files() -> list[Path]: result = subprocess.run( @@ -80,9 +160,28 @@ def read_init_version(path: Path) -> str | None: return match.group(1) if match else None +def read_core_version(path: Path) -> str | None: + match = re.search(r'^LINK_VERSION\s*=\s*"([^"]+)"', path.read_text(encoding="utf-8"), flags=re.MULTILINE) + return match.group(1) if match else None + + +def check_version_values( + findings: list[str], + versions: dict[str, str | None], + package_versions: set[str | None], +) -> None: + if not package_versions: + findings.append("version mismatch: server.json has no link-mcp package version") + if len(set(versions.values()) | package_versions) != 1: + for label, version in versions.items(): + findings.append(f"version mismatch: {label} is {version!r}") + findings.append(f"version mismatch: server.json package versions are {sorted(package_versions)!r}") + + def check_version_consistency(findings: list[str]) -> str | None: pyproject_version = read_pyproject_version(Path("mcp_package/pyproject.toml")) init_version = read_init_version(Path("mcp_package/link_mcp/__init__.py")) + core_version = read_core_version(Path("mcp_package/link_core/version.py")) server = json.loads(Path("mcp_package/server.json").read_text(encoding="utf-8")) server_version = server.get("version") package_versions = { @@ -93,12 +192,10 @@ def check_version_consistency(findings: list[str]) -> str | None: versions = { "mcp_package/pyproject.toml": pyproject_version, "mcp_package/link_mcp/__init__.py": init_version, + "mcp_package/link_core/version.py": core_version, "mcp_package/server.json": server_version, } - if len(set(versions.values()) | package_versions) != 1: - for label, version in versions.items(): - findings.append(f"version mismatch: {label} is {version!r}") - findings.append(f"version mismatch: server.json package versions are {sorted(package_versions)!r}") + check_version_values(findings, versions, package_versions) return pyproject_version @@ -122,15 +219,55 @@ def check_changelog(findings: list[str], current_version: str | None, path: Path findings.append(f"CHANGELOG.md missing current package version: {current_version}") +def check_agent_contract( + findings: list[str], + requirements: dict[Path, tuple[str, ...]] = AGENT_CONTRACT_REQUIREMENTS, +) -> None: + for path, required_terms in requirements.items(): + if not path.exists(): + findings.append(f"agent contract file missing: {path}") + continue + text = path.read_text(encoding="utf-8", errors="replace") + for term in required_terms: + if term not in text: + findings.append(f"agent contract missing {term!r} in {path}") + + +def check_tracked_path_hygiene(findings: list[str], path: Path) -> bool: + """Check release-blocking tracked path patterns. Return true when caller should skip content scan.""" + rel = path.as_posix() + if any(fnmatch.fnmatch(rel, pattern) for pattern in BUILD_ARTIFACT_PATTERNS): + findings.append(f"build artifact should not be tracked: {path}") + return True + + name = path.name + if any(fnmatch.fnmatch(name, pattern) for pattern in SECRET_NAME_PATTERNS): + findings.append(f"sensitive-looking tracked filename: {path}") + return True + + return False + + +def check_outbound_network_hygiene(findings: list[str], path: Path, text: str) -> None: + """Block accidental outbound network code in Link's local-first runtime.""" + if path.suffix.lower() not in OUTBOUND_NETWORK_CODE_SUFFIXES: + return + if path in OUTBOUND_NETWORK_ALLOWLIST: + return + for label, pattern in OUTBOUND_NETWORK_PATTERNS: + if pattern.search(text): + findings.append(f"outbound network code in {path}: {label}") + return + + def main() -> int: findings: list[str] = [] current_version = check_version_consistency(findings) check_changelog(findings, current_version) + check_agent_contract(findings) for path in tracked_files(): - name = path.name - if any(fnmatch.fnmatch(name, pattern) for pattern in SECRET_NAME_PATTERNS): - findings.append(f"sensitive-looking tracked filename: {path}") + if check_tracked_path_hygiene(findings, path): continue if path.suffix.lower() in BINARY_SUFFIXES: @@ -146,6 +283,7 @@ def main() -> int: if pattern.search(text): findings.append(f"sensitive-looking content in {path}: {label}") break + check_outbound_network_hygiene(findings, path, text) if findings: print("Release hygiene check failed:") diff --git a/scripts/check_runtime_duplication.py b/scripts/check_runtime_duplication.py new file mode 100644 index 0000000..06a9035 --- /dev/null +++ b/scripts/check_runtime_duplication.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +"""Guard against large copied helper bodies across Link runtimes.""" +from __future__ import annotations + +import argparse +import ast +from dataclasses import dataclass +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +RUNTIME_FILES = ( + ROOT / "link.py", + ROOT / "serve.py", + ROOT / "mcp_package/link_mcp/server.py", +) +EXACT_DUPLICATE_LINE_THRESHOLD = 12 +LARGE_DUPLICATE_LINE_THRESHOLD = 20 + +# New large duplicate runtime helpers should be extracted instead of added here. +ALLOWED_LARGE_DUPLICATE_NAMES: set[str] = set() + + +@dataclass(frozen=True) +class FunctionInfo: + path: Path + name: str + lineno: int + end_lineno: int + body_dump: str + + @property + def line_count(self) -> int: + return self.end_lineno - self.lineno + 1 + + @property + def location(self) -> str: + try: + display_path = self.path.relative_to(ROOT) + except ValueError: + display_path = self.path + return f"{display_path}:{self.lineno}" + + +def runtime_functions(paths: tuple[Path, ...] = RUNTIME_FILES) -> list[FunctionInfo]: + functions: list[FunctionInfo] = [] + for path in paths: + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + for node in ast.walk(tree): + if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + continue + functions.append( + FunctionInfo( + path=path, + name=node.name, + lineno=node.lineno, + end_lineno=node.end_lineno or node.lineno, + body_dump=ast.dump( + ast.Module(body=node.body, type_ignores=[]), + include_attributes=False, + ), + ) + ) + return functions + + +def check_exact_duplicate_bodies(functions: list[FunctionInfo]) -> list[str]: + by_body: dict[str, list[FunctionInfo]] = {} + for info in functions: + if info.line_count >= EXACT_DUPLICATE_LINE_THRESHOLD: + by_body.setdefault(info.body_dump, []).append(info) + + findings: list[str] = [] + for group in by_body.values(): + paths = {info.path for info in group} + if len(paths) < 2: + continue + locations = ", ".join(info.location for info in sorted(group, key=lambda item: item.location)) + findings.append(f"exact duplicate runtime function body: {locations}") + return findings + + +def check_large_duplicate_private_names(functions: list[FunctionInfo]) -> list[str]: + findings: list[str] = [] + for name, group in duplicate_private_name_groups(functions): + if max(info.line_count for info in group) < LARGE_DUPLICATE_LINE_THRESHOLD: + continue + if name in ALLOWED_LARGE_DUPLICATE_NAMES: + continue + locations = ", ".join(info.location for info in sorted(group, key=lambda item: item.location)) + findings.append(f"large duplicate private helper '{name}': {locations}") + return findings + + +def duplicate_private_name_groups(functions: list[FunctionInfo]) -> list[tuple[str, list[FunctionInfo]]]: + by_name: dict[str, list[FunctionInfo]] = {} + for info in functions: + if info.name.startswith("_"): + by_name.setdefault(info.name, []).append(info) + + groups: list[tuple[str, list[FunctionInfo]]] = [] + for name, group in sorted(by_name.items()): + paths = {info.path for info in group} + if len(paths) >= 2: + groups.append((name, group)) + return groups + + +def format_private_name_report(functions: list[FunctionInfo]) -> str: + groups = duplicate_private_name_groups(functions) + if not groups: + return "Duplicate private runtime helper names: 0" + + report_rows = [] + for name, group in groups: + max_lines = max(info.line_count for info in group) + total_lines = sum(info.line_count for info in group) + guarded = max_lines >= LARGE_DUPLICATE_LINE_THRESHOLD + locations = ", ".join(info.location for info in sorted(group, key=lambda item: item.location)) + report_rows.append((guarded, max_lines, total_lines, name, locations)) + + report_rows.sort(key=lambda row: (not row[0], -row[1], row[3])) + guarded_count = sum(1 for guarded, *_ in report_rows if guarded) + lines = [ + "Duplicate private runtime helper names: " + f"{len(report_rows)} ({guarded_count} at or above {LARGE_DUPLICATE_LINE_THRESHOLD} lines)" + ] + for guarded, max_lines, total_lines, name, locations in report_rows: + status = "guarded" if guarded else "thin" + lines.append(f"- {name}: {status}; max {max_lines} lines, total {total_lines}; {locations}") + return "\n".join(lines) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--report", + action="store_true", + help="print a non-failing audit of duplicate private helper names before running the guard", + ) + args = parser.parse_args(argv) + + functions = runtime_functions() + if args.report: + print(format_private_name_report(functions)) + + findings = [ + *check_exact_duplicate_bodies(functions), + *check_large_duplicate_private_names(functions), + ] + if findings: + if args.report: + print("") + print("Runtime duplication guard failed:") + for finding in findings: + print(f"- {finding}") + print("") + print("Move shared logic into mcp_package/link_core/ and keep runtimes as thin adapters.") + return 1 + print("Runtime duplication guard passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/check_tool_contract.py b/scripts/check_tool_contract.py new file mode 100644 index 0000000..09bc7ea --- /dev/null +++ b/scripts/check_tool_contract.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +"""Check that Link's public CLI and MCP tool contracts do not drift.""" +from __future__ import annotations + +import ast +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + +EXPECTED_CLI_COMMANDS = { + "accept-capture", + "archive-memory", + "backup", + "benchmark", + "brief", + "capture-inbox", + "capture-session", + "delete-capture", + "demo", + "doctor", + "explain-memory", + "forget-memory", + "graph-summary", + "ingest-status", + "init", + "memory-audit", + "memory-inbox", + "migrate", + "profile", + "prompts", + "propose-memories", + "query", + "query-link", + "rebuild-index", + "rebuild-backlinks", + "recall", + "redact-capture", + "remember", + "restore-memory", + "review-memory", + "serve", + "status", + "update-memory", + "validate", + "verify-mcp", +} + +EXPECTED_MCP_TOOLS = { + "accept_capture", + "archive_memory", + "backup_wiki", + "capture_inbox", + "capture_session", + "delete_capture", + "explain_memory", + "forget_memory", + "get_backlinks", + "get_context", + "get_graph", + "get_graph_summary", + "get_pages", + "ingest_status", + "link_status", + "memory_audit", + "memory_brief", + "memory_inbox", + "memory_profile", + "migrate_wiki", + "propose_memories", + "query_link", + "rebuild_index", + "rebuild_backlinks", + "recall_memory", + "redact_capture", + "remember_memory", + "restore_memory", + "review_memory", + "search_wiki", + "starter_prompts", + "update_memory", + "validate_wiki", +} + +DOCS_CLI_COMMANDS = EXPECTED_CLI_COMMANDS - {"query-link"} +CLI_DOC_PATH = Path("docs/cli.html") +MCP_DOC_PATHS = ( + Path("docs/mcp.html"), + Path("mcp_package/README.md"), +) + + +def _literal_string(node: ast.AST) -> str | None: + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + return None + + +def _literal_string_list(node: ast.AST) -> list[str]: + if not isinstance(node, (ast.List, ast.Tuple)): + return [] + values: list[str] = [] + for item in node.elts: + value = _literal_string(item) + if value is not None: + values.append(value) + return values + + +def cli_commands(path: Path = ROOT / "link.py") -> set[str]: + """Return argparse subcommands and aliases declared by link.py.""" + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + commands: set[str] = set() + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + if not isinstance(node.func, ast.Attribute) or node.func.attr != "add_parser": + continue + if not node.args: + continue + command = _literal_string(node.args[0]) + if command: + commands.add(command) + for keyword in node.keywords: + if keyword.arg == "aliases": + commands.update(_literal_string_list(keyword.value)) + return commands + + +def _is_mcp_tool_decorator(node: ast.AST) -> bool: + target = node.func if isinstance(node, ast.Call) else node + return ( + isinstance(target, ast.Attribute) + and target.attr == "tool" + and isinstance(target.value, ast.Name) + and target.value.id == "mcp" + ) + + +def mcp_tools(path: Path = ROOT / "mcp_package/link_mcp/server.py") -> set[str]: + """Return functions exported through @mcp.tool().""" + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + tools: set[str] = set() + for node in ast.walk(tree): + if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + continue + if any(_is_mcp_tool_decorator(decorator) for decorator in node.decorator_list): + tools.add(node.name) + return tools + + +def _missing_terms(path: Path, terms: set[str]) -> list[str]: + text = path.read_text(encoding="utf-8", errors="replace") + return sorted(term for term in terms if term not in text) + + +def _missing_cli_reference(path: Path = ROOT / CLI_DOC_PATH) -> list[str]: + text = path.read_text(encoding="utf-8", errors="replace") + missing: list[str] = [] + for command in sorted(DOCS_CLI_COMMANDS): + command_tokens = ( + f"`link {command}", + f"`python3 link.py {command}", + f"link {command}", + f"python3 link.py {command}", + ) + if not any(token in text for token in command_tokens): + missing.append(command) + return missing + + +def check_tool_contract(root: Path = ROOT) -> list[str]: + findings: list[str] = [] + + actual_cli = cli_commands(root / "link.py") + missing_cli = sorted(EXPECTED_CLI_COMMANDS - actual_cli) + extra_cli = sorted(actual_cli - EXPECTED_CLI_COMMANDS) + if missing_cli: + findings.append(f"link.py is missing CLI commands: {', '.join(missing_cli)}") + if extra_cli: + findings.append(f"link.py has undocumented CLI commands: {', '.join(extra_cli)}") + + actual_mcp = mcp_tools(root / "mcp_package/link_mcp/server.py") + missing_mcp = sorted(EXPECTED_MCP_TOOLS - actual_mcp) + extra_mcp = sorted(actual_mcp - EXPECTED_MCP_TOOLS) + if missing_mcp: + findings.append(f"link_mcp.server is missing MCP tools: {', '.join(missing_mcp)}") + if extra_mcp: + findings.append(f"link_mcp.server has undocumented MCP tools: {', '.join(extra_mcp)}") + + missing_cli_docs = _missing_cli_reference(root / CLI_DOC_PATH) + if missing_cli_docs: + findings.append(f"{CLI_DOC_PATH} command reference is missing: {', '.join(missing_cli_docs)}") + + for relative_path in MCP_DOC_PATHS: + path = root / relative_path + missing = _missing_terms(path, EXPECTED_MCP_TOOLS) + if missing: + findings.append(f"{relative_path} is missing MCP tools: {', '.join(missing)}") + + return findings + + +def main() -> int: + findings = check_tool_contract() + if findings: + print("Tool contract check failed:") + for finding in findings: + print(f"- {finding}") + return 1 + + print("Tool contract check passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/generate_docs_media.py b/scripts/generate_docs_media.py new file mode 100644 index 0000000..bd5d595 --- /dev/null +++ b/scripts/generate_docs_media.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +"""Generate small, reproducible GIF assets for the public docs.""" + +from __future__ import annotations + +from pathlib import Path +from textwrap import wrap + +from PIL import Image, ImageDraw, ImageFont + + +ROOT = Path(__file__).resolve().parents[1] +ASSETS = ROOT / "docs" / "assets" +SIZE = (860, 484) + + +COLORS = { + "bg": "#050607", + "panel": "#0e1116", + "panel_2": "#17120c", + "paper": "#fff4b8", + "paper_2": "#fffdf1", + "ink": "#f7f0d8", + "muted": "#9ca3af", + "blue": "#5b8cff", + "green": "#54c79d", + "yellow": "#ffd342", + "red": "#ff6d5f", + "border": "#17120c", +} + + +def _font(size: int, bold: bool = False, mono: bool = False) -> ImageFont.FreeTypeFont | ImageFont.ImageFont: + candidates = [] + if mono: + candidates.extend( + [ + "/System/Library/Fonts/Menlo.ttc", + "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", + ] + ) + if bold: + candidates.extend( + [ + "/System/Library/Fonts/Supplemental/Arial Bold.ttf", + "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", + ] + ) + candidates.extend( + [ + "/System/Library/Fonts/Supplemental/Arial.ttf", + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", + ] + ) + for candidate in candidates: + try: + return ImageFont.truetype(candidate, size) + except OSError: + continue + return ImageFont.load_default() + + +FONT_TITLE = _font(24, bold=True) +FONT_SUBTITLE = _font(15) +FONT_MONO = _font(16, mono=True) +FONT_MONO_SMALL = _font(13, mono=True) +FONT_MONO_BOLD = _font(16, bold=True, mono=True) + + +def _fit_text(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.ImageFont, width: int) -> list[str]: + words = text.split() + lines: list[str] = [] + current: list[str] = [] + for word in words: + candidate = " ".join([*current, word]) + if draw.textbbox((0, 0), candidate, font=font)[2] <= width: + current.append(word) + else: + if current: + lines.append(" ".join(current)) + current = [word] + if current: + lines.append(" ".join(current)) + return lines + + +def _draw_text_block( + draw: ImageDraw.ImageDraw, + xy: tuple[int, int], + text: str, + font: ImageFont.ImageFont, + fill: str, + max_width: int, + line_gap: int = 6, +) -> int: + x, y = xy + for line in _fit_text(draw, text, font, max_width): + draw.text((x, y), line, font=font, fill=fill) + y += draw.textbbox((0, 0), line, font=font)[3] + line_gap + return y + + +def _save_gif(frames: list[Image.Image], path: Path, duration: int = 1350) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + palette_frames = [frame.convert("P", palette=Image.Palette.ADAPTIVE) for frame in frames] + palette_frames[0].save( + path, + save_all=True, + append_images=palette_frames[1:], + duration=duration, + loop=0, + optimize=True, + disposal=2, + ) + + +def _window_frame(title: str, subtitle: str = "") -> tuple[Image.Image, ImageDraw.ImageDraw]: + image = Image.new("RGB", SIZE, COLORS["paper"]) + draw = ImageDraw.Draw(image) + draw.rectangle((0, 0, SIZE[0] - 1, SIZE[1] - 1), fill=COLORS["paper"], outline=COLORS["border"], width=4) + draw.rectangle((4, 4, SIZE[0] - 5, 54), fill=COLORS["paper_2"], outline=COLORS["border"], width=2) + for x, color in [(24, COLORS["red"]), (48, COLORS["yellow"]), (72, COLORS["green"])]: + draw.ellipse((x, 21, x + 13, 34), fill=color, outline=COLORS["border"], width=2) + draw.text((102, 18), title, font=FONT_TITLE, fill=COLORS["border"]) + if subtitle: + draw.text((102, 56), subtitle, font=FONT_SUBTITLE, fill=COLORS["muted"]) + return image, draw + + +def _make_ui_tour() -> None: + shots = [ + ("Start with prompts", "The local viewer gives a human-readable front door.", "link-home-dark.png"), + ("Ingest safely", "Raw files are scanned, represented, and validated.", "link-ingest-dark.png"), + ("Brief before work", "Agents get compact, source-backed context.", "link-brief-dark.png"), + ("Review memory", "Memories are inspectable, explainable, and reversible.", "link-memory-dashboard-dark.png"), + ("Explore the graph", "Large graphs open bounded first, then expand on demand.", "link-graph-dark.png"), + ] + frames: list[Image.Image] = [] + for title, caption, filename in shots: + screenshot = Image.open(ASSETS / filename).convert("RGB").resize(SIZE) + overlay = Image.new("RGBA", SIZE, (0, 0, 0, 0)) + draw = ImageDraw.Draw(overlay) + draw.rectangle((0, 0, SIZE[0], 76), fill=(0, 0, 0, 220)) + draw.text((28, 15), title, font=FONT_TITLE, fill=COLORS["ink"]) + draw.text((28, 45), caption, font=FONT_SUBTITLE, fill="#d1d5db") + frames.append(Image.alpha_composite(screenshot.convert("RGBA"), overlay).convert("RGB")) + _save_gif(frames, ASSETS / "link-ui-tour.gif") + _save_gif(frames, ASSETS / "link-product-tour-dark.gif") + + +def _terminal_frame(title: str, lines: list[tuple[str, str]]) -> Image.Image: + image, draw = _window_frame(title, "CLI commands stay local and scriptable.") + draw.rectangle((34, 96, SIZE[0] - 34, SIZE[1] - 34), fill=COLORS["bg"], outline=COLORS["border"], width=3) + y = 122 + for kind, line in lines: + color = { + "prompt": COLORS["green"], + "cmd": "#93c5fd", + "ok": COLORS["ink"], + "muted": "#9ca3af", + "warn": COLORS["yellow"], + }.get(kind, COLORS["ink"]) + prefix = "$ " if kind == "cmd" else " " + for wrapped in wrap(line, width=78) or [""]: + draw.text((58, y), prefix + wrapped if wrapped == line else " " + wrapped, font=FONT_MONO, fill=color) + y += 25 + y += 4 + return image + + +def _make_cli_tour() -> None: + frames = [ + _terminal_frame( + "1. Check readiness", + [ + ("cmd", "link status --validate"), + ("ok", "Ready: yes"), + ("ok", "Pages: 25 · Memories: 1 active · Search: sqlite-fts"), + ("muted", "Next: query, brief, ingest, or serve the local viewer."), + ], + ), + _terminal_frame( + "2. Ask for compact context", + [ + ("cmd", 'link query "why does Link help agents?" --budget small'), + ("ok", "Answer-ready packet: 3 memories, 5 pages, graph neighborhood."), + ("muted", "has_more: true · follow_up: widen budget or open context."), + ], + ), + _terminal_frame( + "3. Prime an agent", + [ + ("cmd", 'link brief "working on Link release" --project link'), + ("ok", "Relevant decisions, preferences, open review items, and project context."), + ("muted", "Local Markdown. No hosted memory service."), + ], + ), + _terminal_frame( + "4. Prove scale locally", + [ + ("cmd", 'link benchmark "agent memory"'), + ("ok", "cache 0.10s · search 0.009s · query 0.018s · graph 0.022s"), + ("ok", "Verdict: interactive"), + ], + ), + ] + _save_gif(frames, ASSETS / "link-cli-tour.gif", duration=1450) + + +def _chat_bubble( + draw: ImageDraw.ImageDraw, + xy: tuple[int, int], + width: int, + label: str, + body: str, + *, + align: str, +) -> int: + x, y = xy + fill = "#2563eb" if align == "right" else "#1f2937" + outline = "#60a5fa" if align == "right" else "#374151" + body_color = "#ffffff" + label_color = "#bfdbfe" if align == "right" else "#86efac" + lines = _fit_text(draw, body, FONT_SUBTITLE, width - 34) + height = 50 + (len(lines) * 23) + draw.rounded_rectangle((x, y, x + width, y + height), radius=16, fill=fill, outline=outline, width=2) + draw.text((x + 16, y + 12), label, font=FONT_MONO_SMALL, fill=label_color) + text_y = y + 34 + for line in lines: + draw.text((x + 16, text_y), line, font=FONT_SUBTITLE, fill=body_color) + text_y += 23 + return y + height + + +def _tool_card( + draw: ImageDraw.ImageDraw, + xy: tuple[int, int], + tool: str, + args: str, + result: str, + *, + active: bool = True, +) -> int: + x, y = xy + width = 738 + fill = "#101827" if active else "#111111" + outline = COLORS["green"] if active else "#374151" + draw.rounded_rectangle((x, y, x + width, y + 128), radius=14, fill=fill, outline=outline, width=3) + draw.rectangle((x, y, x + width, y + 38), fill="#172033", outline=outline, width=0) + draw.text((x + 18, y + 10), "MCP tool", font=FONT_MONO_SMALL, fill="#9ca3af") + draw.text((x + 108, y + 9), f"link / {tool}", font=FONT_MONO_BOLD, fill="#86efac") + draw.text((x + width - 88, y + 9), "ready", font=FONT_MONO_SMALL, fill=COLORS["yellow"]) + draw.text((x + 18, y + 54), "{", font=FONT_MONO_SMALL, fill="#9ca3af") + draw.text((x + 34, y + 76), f'"arguments": {args}', font=FONT_MONO_SMALL, fill="#d1d5db") + draw.text((x + 18, y + 100), f"→ {result}", font=FONT_MONO_SMALL, fill="#bfdbfe") + return y + 128 + + +def _mcp_chat_frame(title: str, user_prompt: str, tool: str, args: str, result: str, answer: str) -> Image.Image: + image = Image.new("RGB", SIZE, COLORS["paper"]) + draw = ImageDraw.Draw(image) + draw.rectangle((0, 0, SIZE[0] - 1, SIZE[1] - 1), fill=COLORS["paper"], outline=COLORS["border"], width=4) + draw.rectangle((20, 20, SIZE[0] - 20, SIZE[1] - 20), fill="#080b10", outline=COLORS["border"], width=4) + draw.rectangle((24, 24, SIZE[0] - 24, 70), fill="#151923") + for x, color in [(44, COLORS["red"]), (66, COLORS["yellow"]), (88, COLORS["green"])]: + draw.ellipse((x, 40, x + 11, 51), fill=color) + draw.text((116, 38), "Agent chat", font=FONT_MONO_BOLD, fill=COLORS["ink"]) + draw.text((SIZE[0] - 280, 38), title, font=FONT_MONO_SMALL, fill="#9ca3af") + + _chat_bubble(draw, (500, 92), 298, "User", user_prompt, align="right") + _chat_bubble(draw, (58, 166), 408, "Agent", "I'll ask Link first so I do not guess from chat history.", align="left") + _tool_card(draw, (58, 246), tool, args, result) + _chat_bubble(draw, (58, 392), 560, "Agent", answer, align="left") + + return image + + +def _make_mcp_tour() -> None: + frames = [ + _mcp_chat_frame( + "1 / readiness", + "is Link ready?", + "link_status", + "{}", + "ready: yes · pages: 25 · search: sqlite-fts", + "Link is ready. I can query, brief, ingest, or remember from here.", + ), + _mcp_chat_frame( + "2 / brief", + "brief me from Link before we continue", + "memory_brief", + '{"query": "current task", "project": "link"}', + "2 memories · 4 pages · 1 review warning", + "I have the relevant preferences, project context, and review notes.", + ), + _mcp_chat_frame( + "3 / smart query", + "query Link for release process", + "query_link", + '{"query": "release process", "budget": "small"}', + "why_selected · 3 memories · 5 pages · follow-ups", + "Here is the compact release context. I did not dump the whole wiki.", + ), + _mcp_chat_frame( + "4 / reviewed memory", + "remember that I prefer short release notes", + "remember_memory", + '{"memory_type": "preference", "scope": "user"}', + "saved · pending review · duplicate check passed", + "Saved locally as Markdown. You can review, update, archive, or forget it.", + ), + ] + _save_gif(frames, ASSETS / "link-mcp-agent-chat.gif", duration=1650) + + +def main() -> None: + _make_ui_tour() + _make_cli_tour() + _make_mcp_tour() + print("Generated docs GIFs:") + for name in ["link-ui-tour.gif", "link-cli-tour.gif", "link-mcp-agent-chat.gif", "link-product-tour-dark.gif"]: + print(f"- docs/assets/{name}") + + +if __name__ == "__main__": + main() diff --git a/scripts/prepare_release.py b/scripts/prepare_release.py index 819175b..cd7b3c0 100644 --- a/scripts/prepare_release.py +++ b/scripts/prepare_release.py @@ -24,6 +24,7 @@ class ReleaseFiles: pyproject: Path init: Path + core_version: Path server_json: Path changelog: Path @@ -32,6 +33,7 @@ def release_files(root: Path = ROOT) -> ReleaseFiles: return ReleaseFiles( pyproject=root / "mcp_package/pyproject.toml", init=root / "mcp_package/link_mcp/__init__.py", + core_version=root / "mcp_package/link_core/version.py", server_json=root / "mcp_package/server.json", changelog=root / "CHANGELOG.md", ) @@ -69,6 +71,13 @@ def read_init_version(path: Path) -> str: return match.group(1) +def read_core_version(path: Path) -> str: + match = re.search(r'^LINK_VERSION\s*=\s*"([^"]+)"', path.read_text(encoding="utf-8"), flags=re.MULTILINE) + if not match: + raise ValueError(f"could not read LINK_VERSION from {path}") + return match.group(1) + + def read_server_versions(path: Path) -> set[str]: data = json.loads(path.read_text(encoding="utf-8")) versions = {str(data.get("version", ""))} @@ -84,6 +93,7 @@ def read_current_versions(files: ReleaseFiles) -> set[str]: versions = { read_pyproject_version(files.pyproject), read_init_version(files.init), + read_core_version(files.core_version), } versions.update(read_server_versions(files.server_json)) return versions @@ -111,6 +121,10 @@ def update_init(text: str, version: str) -> str: return replace_one(r'^__version__\s*=\s*"[^"]+"', f'__version__ = "{version}"', text, "__version__") +def update_core_version(text: str, version: str) -> str: + return replace_one(r'^LINK_VERSION\s*=\s*"[^"]+"', f'LINK_VERSION = "{version}"', text, "LINK_VERSION") + + def update_server_json(text: str, version: str) -> str: data = json.loads(text) data["version"] = version @@ -165,6 +179,7 @@ def prepare_release(root: Path, version: str, release_date: str, dry_run: bool = updates = { files.pyproject: update_pyproject(files.pyproject.read_text(encoding="utf-8"), version), files.init: update_init(files.init.read_text(encoding="utf-8"), version), + files.core_version: update_core_version(files.core_version.read_text(encoding="utf-8"), version), files.server_json: update_server_json(files.server_json.read_text(encoding="utf-8"), version), files.changelog: update_changelog(files.changelog.read_text(encoding="utf-8"), version, release_date), } @@ -187,7 +202,8 @@ def release_commands(version: str) -> list[str]: 'python3 -c "from pathlib import Path; import shutil; shutil.rmtree(\'dist\', ignore_errors=True); [shutil.rmtree(p, ignore_errors=True) for p in Path(\'.\').glob(\'*.egg-info\')]"', "python3 -m build", "python3 -m twine check dist/*", - "TWINE_USERNAME=__token__ python3 -m twine upload dist/*", + f"TWINE_USERNAME=__token__ python3 -m twine upload dist/link_mcp-{version}*", + "mcp-publisher validate", "mcp-publisher publish", ] diff --git a/scripts/smoke_first_use.py b/scripts/smoke_first_use.py new file mode 100644 index 0000000..fc77a8b --- /dev/null +++ b/scripts/smoke_first_use.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +"""Exercise Link's first-use path the way a new local user would.""" +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parents[1] + + +class SmokeFailure(RuntimeError): + pass + + +def run_link(*args: str, python: str = sys.executable) -> subprocess.CompletedProcess[str]: + result = subprocess.run( + [python, str(ROOT / "link.py"), *args], + cwd=ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode != 0: + command = " ".join([python, "link.py", *args]) + raise SmokeFailure( + f"command failed ({result.returncode}): {command}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + return result + + +def run_json(*args: str, python: str = sys.executable) -> dict[str, Any]: + result = run_link(*args, python=python) + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError as exc: + command = " ".join([python, "link.py", *args]) + raise SmokeFailure(f"command returned invalid JSON: {command}\n{result.stdout}") from exc + if not isinstance(payload, dict): + command = " ".join([python, "link.py", *args]) + raise SmokeFailure(f"command returned {type(payload).__name__}, expected JSON object: {command}") + return payload + + +def require(condition: bool, message: str) -> None: + if not condition: + raise SmokeFailure(message) + + +def run_smoke(work_dir: Path, python: str = sys.executable) -> None: + work_dir.mkdir(parents=True, exist_ok=True) + init_target = work_dir / "new-user-link" + demo_target = work_dir / "demo-link" + + run_link("init", str(init_target), python=python) + require((init_target / "link.py").exists(), "init did not copy link.py") + require((init_target / "serve.py").exists(), "init did not copy serve.py") + require((init_target / "wiki/_link_schema.json").exists(), "init did not write schema marker") + + init_status = run_json("status", str(init_target), "--validate", "--json", python=python) + require(init_status.get("ready") is True, "initialized wiki did not report ready") + require(init_status.get("schema", {}).get("status") == "current", "initialized wiki schema is not current") + + init_prompts = run_json("prompts", str(init_target), "--json", python=python) + require(len(init_prompts.get("prompts", [])) >= 6, "prompts did not return the first-run prompt set") + require( + init_prompts.get("prompts", [{}])[0].get("prompt") == "is Link ready?", + "prompts did not start with readiness guidance", + ) + require( + "link status --validate" in init_prompts.get("commands", []), + "prompts did not include readiness command", + ) + + demo_result = run_link("demo", str(demo_target), "--force", python=python) + require("Try the value loop:" in demo_result.stdout, "demo output did not show the value loop") + require( + "query 'why does Link help agents?'" in demo_result.stdout + or 'query "why does Link help agents?"' in demo_result.stdout, + "demo output did not show the query proof command", + ) + require("START_HERE.md" in demo_result.stdout, "demo output did not point to START_HERE.md") + require((demo_target / "START_HERE.md").exists(), "demo did not create START_HERE.md") + start_here = (demo_target / "START_HERE.md").read_text(encoding="utf-8") + require("query Link for why Link helps agents" in start_here, "START_HERE.md did not include agent prompt") + require("python3 link.py query" in start_here, "START_HERE.md did not include CLI proof command") + + demo_status = run_json("status", str(demo_target), "--validate", "--json", python=python) + require(demo_status.get("ready") is True, "demo wiki did not report ready") + require(demo_status.get("validation", {}).get("passed") is True, "demo validation did not pass") + require(int(demo_status.get("memory_count") or 0) >= 1, "demo did not include a starter memory") + + project_prompts = run_json("prompts", str(demo_target), "--project", "demo", "--json", python=python) + require( + "this project uses Link" in project_prompts.get("prompts", [{}, {}, {}])[2].get("prompt", ""), + "project prompts did not include project memory guidance", + ) + + backup = run_json("backup", str(demo_target), "--label", "first-use-smoke", "--json", python=python) + require(backup.get("created") is True, "backup did not create an archive") + require(backup.get("included") == ["wiki"], "backup did not default to wiki-only") + require((demo_target / ".link-backups" / str(backup["name"])).exists(), "backup archive is missing") + + query = run_json("query", "what is Link agent memory?", str(demo_target), "--budget", "small", "--json", python=python) + require(query.get("found") is True, "query did not find demo context") + require(bool(query.get("context_packet")), "query returned an empty context packet") + require(query.get("budget_report", {}).get("context_packet", {}).get("returned", 0) <= 6, "small query budget was not enforced") + + graph_summary = run_json("graph-summary", "agent memory", str(demo_target), "--limit", "10", "--json", python=python) + require(graph_summary.get("returned_nodes", 0) >= 1, "graph-summary did not return demo graph context") + require(graph_summary.get("returned_nodes", 0) <= 10, "graph-summary did not enforce first-use node limit") + + benchmark = run_json("benchmark", "agent memory", str(demo_target), "--budget", "small", "--json", python=python) + require(benchmark.get("health", {}).get("status") == "pass", "benchmark health did not pass on demo wiki") + require(benchmark.get("graph_initial", {}).get("mode") in {"full", "summary"}, "benchmark did not report graph initial-load mode") + + brief = run_json("brief", "testing Link as local personal memory", str(demo_target), "--json", python=python) + require(brief.get("profile", {}).get("memory_count", 0) >= 1, "brief did not include memory profile") + require("agent_guidance" in brief, "brief did not include agent guidance") + + remembered = run_json( + "remember", + "User is testing Link first-use smoke as local personal memory for agents.", + str(demo_target), + "--title", + "First-use smoke memory", + "--type", + "note", + "--source", + "first-use-smoke", + "--json", + python=python, + ) + require(remembered.get("created") is True, "remember did not create a first-use memory") + require((demo_target / "wiki/memories/first-use-smoke-memory.md").exists(), "remembered memory page is missing") + + recalled = run_json("recall", "first-use smoke", str(demo_target), "--json", python=python) + require(recalled.get("count", 0) >= 1, "recall did not find the remembered first-use memory") + + capture_note = work_dir / "session-note.md" + capture_note.write_text( + "Remember that first-use smoke keeps memory approval local and explicit.", + encoding="utf-8", + ) + captured = run_json("capture-session", str(capture_note), str(demo_target), "--json", python=python) + require(captured.get("captured") is True, "capture-session did not save the session note") + require(str(captured.get("path", "")).startswith("raw/memory-captures/"), "capture path was not under raw/memory-captures") + + inbox = run_json("capture-inbox", str(demo_target), "--json", python=python) + require(inbox.get("count", 0) >= 1, "capture-inbox did not show the saved capture") + + raw_source = demo_target / "raw/new-user-source.md" + raw_source.write_text("# New user source\n\nA pending raw source for first-use smoke.\n", encoding="utf-8") + ingest = run_json("ingest-status", str(demo_target), "--json", python=python) + require(ingest.get("pending_count") == 1, "ingest-status did not report the pending raw source") + require(ingest.get("guidance", {}).get("agent_prompt"), "ingest-status did not include the next agent prompt") + require(ingest.get("plan", {}).get("batch"), "ingest-status did not include a guided ingest batch") + + run_link("rebuild-index", str(demo_target), python=python) + require("[[first-use-smoke-memory]]" in (demo_target / "wiki/index.md").read_text(encoding="utf-8"), "rebuild-index did not catalog the remembered memory") + run_link("rebuild-backlinks", str(demo_target), python=python) + + validation = run_json("validate", str(demo_target), "--strict", "--json", python=python) + require(validation.get("passed") is True, "validate --strict did not pass after first-use actions") + + +def main() -> int: + parser = argparse.ArgumentParser(description="Smoke test Link's first-use local workflow.") + parser.add_argument("--work-dir", default="", help="directory for temporary smoke artifacts") + parser.add_argument("--python", default=sys.executable, help="Python executable used to run link.py") + args = parser.parse_args() + + work_dir = Path(args.work_dir).expanduser().resolve() if args.work_dir else Path(tempfile.mkdtemp(prefix="link-first-use-")) + try: + run_smoke(work_dir, python=args.python) + except SmokeFailure as exc: + print(f"First-use smoke failed: {exc}", file=sys.stderr) + return 1 + + print(f"First-use smoke passed in {work_dir}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/smoke_http_viewer.py b/scripts/smoke_http_viewer.py new file mode 100644 index 0000000..c97ccf6 --- /dev/null +++ b/scripts/smoke_http_viewer.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +"""Smoke test the generated Link local HTTP viewer over a real localhost socket.""" +from __future__ import annotations + +import argparse +import json +import socket +import subprocess +import sys +import tempfile +import time +import urllib.error +import urllib.parse +import urllib.request +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parents[1] + + +class SmokeFailure(RuntimeError): + pass + + +def require(condition: bool, message: str) -> None: + if not condition: + raise SmokeFailure(message) + + +def free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind(("127.0.0.1", 0)) + return int(sock.getsockname()[1]) + + +def create_demo(target: Path, python: str) -> None: + result = subprocess.run( + [python, str(ROOT / "link.py"), "demo", str(target), "--force"], + cwd=ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode != 0: + raise SmokeFailure( + "demo creation failed\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + + +def request( + base_url: str, + path: str, + *, + method: str = "GET", + payload: dict[str, Any] | None = None, + headers: dict[str, str] | None = None, +) -> tuple[int, dict[str, str], bytes]: + body = None + request_headers = dict(headers or {}) + if payload is not None: + body = json.dumps(payload).encode("utf-8") + request_headers.setdefault("Content-Type", "application/json") + req = urllib.request.Request( + f"{base_url}{path}", + data=body, + headers=request_headers, + method=method, + ) + try: + with urllib.request.urlopen(req, timeout=5) as response: + return int(response.status), dict(response.headers.items()), response.read() + except urllib.error.HTTPError as exc: + return int(exc.code), dict(exc.headers.items()), exc.read() + + +def request_json(base_url: str, path: str, **kwargs: Any) -> tuple[int, dict[str, str], dict[str, Any]]: + status, headers, body = request(base_url, path, **kwargs) + try: + payload = json.loads(body.decode("utf-8")) + except json.JSONDecodeError as exc: + raise SmokeFailure(f"{path} returned invalid JSON: {body[:200]!r}") from exc + if not isinstance(payload, dict): + raise SmokeFailure(f"{path} returned {type(payload).__name__}, expected object") + return status, headers, payload + + +def wait_until_ready(base_url: str, process: subprocess.Popen[str]) -> None: + deadline = time.monotonic() + 10 + while time.monotonic() < deadline: + if process.poll() is not None: + stdout, stderr = process.communicate(timeout=1) + raise SmokeFailure( + f"server exited early with {process.returncode}\n" + f"stdout:\n{stdout}\n" + f"stderr:\n{stderr}" + ) + try: + status, _, _ = request_json(base_url, "/api/status") + if status == 200: + return + except Exception: + pass + time.sleep(0.1) + raise SmokeFailure("server did not become ready within 10 seconds") + + +def run_smoke(work_dir: Path, python: str) -> None: + work_dir.mkdir(parents=True, exist_ok=True) + demo_target = work_dir / "http-viewer-demo" + create_demo(demo_target, python) + + port = free_port() + base_url = f"http://127.0.0.1:{port}" + process = subprocess.Popen( + [python, "serve.py", "--port", str(port)], + cwd=demo_target, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + try: + wait_until_ready(base_url, process) + + status, headers, body = request(base_url, "/") + html = body.decode("utf-8", errors="replace") + require(status == 200, "home page did not return 200") + require("Link" in html and "agent memory" in html, "home page did not render Link shell") + require(headers.get("X-Link-API-Version") == "1", "home page missing API version header") + require(headers.get("Cache-Control") == "no-store", "home page missing no-store cache policy") + require("frame-ancestors 'none'" in headers.get("Content-Security-Policy", ""), "home page missing frame CSP") + + status, headers, body = request(base_url, "/graph") + graph_html = body.decode("utf-8", errors="replace") + require(status == 200, "graph page did not return 200") + require("Knowledge Graph" in graph_html, "graph page did not render") + require("graph-canvas" in graph_html, "graph page did not include the canvas") + + status, headers, status_payload = request_json(base_url, "/api/status?validate=true") + require(status == 200, "status API did not return 200") + require(status_payload.get("ready") is True, "status API did not report ready") + require(status_payload.get("validation", {}).get("passed") is True, "status API validation did not pass") + require(headers.get("Content-Type") == "application/json", "status API content type changed") + + query = urllib.parse.quote("agent memory") + status, _, summary = request_json(base_url, f"/api/graph-summary?q={query}&limit=5") + require(status == 200, "graph-summary API did not return 200") + require(summary.get("returned_nodes", 0) <= 5, "graph-summary API ignored node limit") + + status, _, denied = request_json(base_url, "/api/rebuild-backlinks", method="POST", payload={}) + require(status == 403, "mutation without local action header did not fail closed") + require("X-Link-Local-Action" in str(denied.get("error", "")), "mutation guard error was not actionable") + + status, _, rebuilt = request_json( + base_url, + "/api/rebuild-backlinks", + method="POST", + payload={}, + headers={"X-Link-Local-Action": "true"}, + ) + require(status == 200, "authorized rebuild-backlinks did not return 200") + require(rebuilt.get("rebuilt") is True, "authorized rebuild-backlinks did not rebuild") + + status, _, options_payload = request_json(base_url, "/api/status", method="OPTIONS") + require(status == 405, "OPTIONS did not return controlled 405") + require(options_payload.get("error"), "OPTIONS response did not include JSON error") + finally: + process.terminate() + try: + process.communicate(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + process.communicate(timeout=5) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Smoke test Link's generated local HTTP viewer.") + parser.add_argument("--work-dir", default="", help="directory for temporary smoke artifacts") + parser.add_argument("--python", default=sys.executable, help="Python executable used to run Link") + args = parser.parse_args() + work_dir = Path(args.work_dir).expanduser().resolve() if args.work_dir else Path(tempfile.mkdtemp(prefix="link-http-viewer-")) + try: + run_smoke(work_dir, python=args.python) + except SmokeFailure as exc: + print(f"HTTP viewer smoke failed: {exc}", file=sys.stderr) + return 1 + print(f"HTTP viewer smoke passed in {work_dir}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/smoke_large_wiki.py b/scripts/smoke_large_wiki.py new file mode 100644 index 0000000..beefe1b --- /dev/null +++ b/scripts/smoke_large_wiki.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +"""Exercise Link's query and graph path against a synthetic large wiki.""" +from __future__ import annotations + +import argparse +import json +import sys +import tempfile +import time +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.benchmark import benchmark_health # noqa: E402 +from link_core.memory import memory_records # noqa: E402 +from link_core.query import query_link # noqa: E402 +from link_core.web_graph import ( # noqa: E402 + GRAPH_INITIAL_SUMMARY_EDGE_LIMIT, + GRAPH_INITIAL_SUMMARY_NODE_LIMIT, + graph_initial_payload, + graph_needs_bounded_overview, +) +from link_core.wiki import ( # noqa: E402 + build_backlinks, + build_wiki_cache, + close_wiki_cache, + graph_data, + graph_summary, + list_pages, + search_pages, +) + +DEFAULT_MAX_SECONDS = { + "cache": 5.0, + "search": 2.0, + "query": 5.0, + "graph_summary": 1.0, + "page_list": 0.5, + "graph_initial": 1.0, + "graph": 3.0, +} + + +class SmokeFailure(RuntimeError): + pass + + +def require(condition: bool, message: str) -> None: + if not condition: + raise SmokeFailure(message) + + +def write_page(wiki: Path, rel: str, text: str) -> None: + path = wiki / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def build_large_wiki(root: Path, page_count: int) -> Path: + wiki = root / "wiki" + wiki.mkdir(parents=True, exist_ok=True) + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + + source_count = max(12, min(40, page_count // 20)) + for index in range(source_count): + write_page( + wiki, + f"sources/source-{index}.md", + "---\n" + "type: source\n" + f"title: Source {index}\n" + "---\n\n" + f"# Source {index}\n\n" + f"> **TLDR:** Source {index} covers local agent memory topic {index}.\n\n" + "## Summary\n\nSynthetic source for large-wiki smoke coverage.\n\n" + f"## Raw Source\n\n`raw/source-{index}.md`\n", + ) + + for index in range(page_count): + next_index = (index + 1) % page_count + skip_index = (index + 17) % page_count + source_index = index % source_count + write_page( + wiki, + f"concepts/topic-{index}.md", + "---\n" + "type: concept\n" + f"title: Topic {index} Agent Memory\n" + "tags: [agent-memory, large-wiki]\n" + "---\n\n" + f"# Topic {index} Agent Memory\n\n" + f"> **TLDR:** Topic {index} describes local agent memory behavior.\n\n" + "## Overview\n\n" + f"Topic {index} links to [[topic-{next_index}]], [[topic-{skip_index}]], " + f"and [[source-{source_index}]]. The repeated phrase keeps search realistic " + "without requiring an unbounded context packet.\n\n" + "## Sources\n\n" + f"- [[source-{source_index}]]\n", + ) + + memory_count = max(16, min(40, page_count // 25)) + for index in range(memory_count): + topic = 42 if index == 0 else index + write_page( + wiki, + f"memories/prefer-topic-{topic}.md", + "---\n" + "type: memory\n" + f"title: Prefer topic {topic}\n" + "memory_type: preference\n" + "scope: project\n" + "project: large-wiki\n" + "status: active\n" + "date_captured: \"2026-05-06T00:00:00Z\"\n" + "source: large-wiki-smoke\n" + "review_status: reviewed\n" + "---\n\n" + f"# Prefer topic {topic}\n\n" + f"> **TLDR:** User prefers topic {topic} local agent memory notes.\n\n" + f"## Memory\n\nUser prefers topic {topic} local agent memory notes.\n", + ) + + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + return wiki + + +def timed(label: str, fn): + start = time.perf_counter() + value = fn() + elapsed = time.perf_counter() - start + return label, value, elapsed + + +def check_timing_thresholds(timings: dict[str, float], max_seconds: dict[str, float]) -> None: + for label, elapsed in sorted(timings.items()): + ceiling = max_seconds.get(label) + if ceiling is None: + continue + require( + elapsed <= ceiling, + f"{label} path took {elapsed:.4f}s, above {ceiling:.4f}s threshold", + ) + + +def initial_graph_payload_for_smoke(cache: dict[str, object], graph: dict[str, object]) -> dict[str, object]: + summary_graph = None + if graph_needs_bounded_overview(graph): + summary = graph_summary( + cache, + limit=GRAPH_INITIAL_SUMMARY_NODE_LIMIT, + depth=1, + max_edges=GRAPH_INITIAL_SUMMARY_EDGE_LIMIT, + ) + summary_graph = { + "nodes": summary.get("nodes", []), + "edges": summary.get("edges", []), + } + return graph_initial_payload(graph, summary_graph=summary_graph) + + +def run_smoke(work_dir: Path, page_count: int, max_seconds: dict[str, float] | None = None) -> dict[str, object]: + wiki = build_large_wiki(work_dir, page_count) + timings: dict[str, float] = {} + + label, cache, elapsed = timed("cache", lambda: build_wiki_cache(wiki)) + timings[label] = elapsed + label, results, elapsed = timed("search", lambda: search_pages("agent memory", cache, limit=20)) + timings[label] = elapsed + label, packet, elapsed = timed( + "query", + lambda: query_link( + wiki, + "agent memory", + cache, + memory_records(wiki), + budget="small", + project="large-wiki", + ), + ) + timings[label] = elapsed + label, graph_packet, elapsed = timed( + "graph_summary", + lambda: graph_summary(cache, topic="agent memory", limit=40, depth=1, max_edges=120), + ) + timings[label] = elapsed + label, page_list, elapsed = timed("page_list", lambda: list_pages(cache, limit=100)) + timings[label] = elapsed + label, graph, elapsed = timed("graph", lambda: graph_data(cache)) + timings[label] = elapsed + label, initial_graph, elapsed = timed( + "graph_initial", + lambda: initial_graph_payload_for_smoke(cache, graph), + ) + timings[label] = elapsed + + expected_pages = page_count + max(12, min(40, page_count // 20)) + max(16, min(40, page_count // 25)) + 2 + require(len(cache["pages"]) == expected_pages, f"expected {expected_pages} cached pages, got {len(cache['pages'])}") + require(len(results) == 20, f"expected capped search results, got {len(results)}") + require(packet.get("found") is True, "query_link did not find large-wiki context") + require(len(packet.get("context_packet", [])) <= 6, "small query budget was not enforced") + require(packet.get("budget_report", {}).get("wiki_search", {}).get("has_more") is True, "query did not report additional matches") + require(packet.get("follow_up", [{}])[0].get("tool") == "query_link", "query did not return follow-up guidance") + require(graph_packet.get("returned_nodes", 0) <= 40, "graph_summary did not enforce node limit") + require(graph_packet.get("truncated") is True, "graph_summary did not report truncation for large wiki") + require(page_list.get("returned_count") == 100, "page list did not enforce default agent-safe limit") + require(page_list.get("truncated") is True, "page list did not report truncation for large wiki") + require(len(graph["nodes"]) == expected_pages, f"expected {expected_pages} graph nodes, got {len(graph['nodes'])}") + require(len(graph["edges"]) >= page_count * 2, "graph edge count is unexpectedly low") + visible_graph_nodes = [ + node for node in graph.get("nodes", []) + if str(node.get("category") or "") != "root" + ] + if graph_needs_bounded_overview(graph): + require(initial_graph.get("graph_mode") == "summary", "large graph did not start with bounded overview") + require(initial_graph.get("node_count", 0) <= GRAPH_INITIAL_SUMMARY_NODE_LIMIT, "initial graph payload exceeded node cap") + else: + require(initial_graph.get("graph_mode") == "full", "small graph should start with full payload") + require(initial_graph.get("total_node_count") == len(visible_graph_nodes), "initial graph total node count is incorrect") + max_seconds = max_seconds or DEFAULT_MAX_SECONDS + check_timing_thresholds(timings, max_seconds) + + payload = { + "wiki": str(wiki), + "pages": len(cache["pages"]), + "edges": len(graph["edges"]), + "search_backend": str(cache.get("search_backend") or "token-index"), + "context_items": len(packet.get("context_packet", [])), + "search_results": len(results), + "graph_summary": { + "returned_nodes": graph_packet.get("returned_nodes", 0), + "returned_edges": graph_packet.get("returned_edges", 0), + "truncated": graph_packet.get("truncated", False), + }, + "page_list": { + "returned_count": page_list.get("returned_count", 0), + "truncated": page_list.get("truncated", False), + }, + "graph_initial": { + "mode": initial_graph.get("graph_mode", "unknown"), + "nodes": initial_graph.get("node_count", 0), + "edges": initial_graph.get("edge_count", 0), + "total_nodes": initial_graph.get("total_node_count", 0), + "total_edges": initial_graph.get("total_edge_count", 0), + }, + "timings": {key: round(value, 4) for key, value in timings.items()}, + "max_seconds": max_seconds, + } + payload["health"] = benchmark_health(payload) + require(payload["health"]["status"] == "pass", "large-wiki benchmark health did not pass") + close_wiki_cache(cache) + return payload + + +def main() -> int: + parser = argparse.ArgumentParser(description="Smoke test Link against a synthetic large wiki.") + parser.add_argument("--pages", type=int, default=1000, help="number of synthetic concept pages") + parser.add_argument("--work-dir", default="", help="directory for generated wiki artifacts") + parser.add_argument("--max-cache-seconds", type=float, default=DEFAULT_MAX_SECONDS["cache"]) + parser.add_argument("--max-search-seconds", type=float, default=DEFAULT_MAX_SECONDS["search"]) + parser.add_argument("--max-query-seconds", type=float, default=DEFAULT_MAX_SECONDS["query"]) + parser.add_argument("--max-graph-summary-seconds", type=float, default=DEFAULT_MAX_SECONDS["graph_summary"]) + parser.add_argument("--max-page-list-seconds", type=float, default=DEFAULT_MAX_SECONDS["page_list"]) + parser.add_argument("--max-graph-initial-seconds", type=float, default=DEFAULT_MAX_SECONDS["graph_initial"]) + parser.add_argument("--max-graph-seconds", type=float, default=DEFAULT_MAX_SECONDS["graph"]) + args = parser.parse_args() + + if args.pages < 1: + print("Large-wiki smoke failed: --pages must be at least 1", file=sys.stderr) + return 2 + + work_dir = Path(args.work_dir).expanduser().resolve() if args.work_dir else Path(tempfile.mkdtemp(prefix="link-large-wiki-")) + max_seconds = { + "cache": args.max_cache_seconds, + "search": args.max_search_seconds, + "query": args.max_query_seconds, + "graph_summary": args.max_graph_summary_seconds, + "page_list": args.max_page_list_seconds, + "graph_initial": args.max_graph_initial_seconds, + "graph": args.max_graph_seconds, + } + try: + payload = run_smoke(work_dir, args.pages, max_seconds=max_seconds) + except SmokeFailure as exc: + print(f"Large-wiki smoke failed: {exc}", file=sys.stderr) + return 1 + + print(json.dumps(payload, indent=2)) + print(f"Large-wiki smoke passed for {payload['pages']} pages") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/smoke_mcp_stdio.py b/scripts/smoke_mcp_stdio.py new file mode 100644 index 0000000..0cb6449 --- /dev/null +++ b/scripts/smoke_mcp_stdio.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +"""Smoke test a real Link MCP stdio server with the MCP client SDK.""" +from __future__ import annotations + +import argparse +import json +import os +import sys +from datetime import timedelta +from pathlib import Path +from typing import Any + + +EXPECTED_TOOLS = { + "link_status", + "starter_prompts", + "migrate_wiki", + "ingest_status", + "query_link", + "validate_wiki", + "backup_wiki", + "search_wiki", + "get_context", + "get_graph", + "get_graph_summary", + "recall_memory", + "memory_profile", + "rebuild_index", + "explain_memory", +} + + +def _json_text(result: Any, tool_name: str) -> dict[str, Any]: + is_error = getattr(result, "isError", getattr(result, "is_error", False)) + if is_error: + raise RuntimeError(f"{tool_name} returned an MCP error result") + if not result.content: + raise RuntimeError(f"{tool_name} returned no content") + text = getattr(result.content[0], "text", "") + if not text: + raise RuntimeError(f"{tool_name} returned non-text content") + try: + payload = json.loads(text) + except json.JSONDecodeError as exc: + raise RuntimeError(f"{tool_name} returned invalid JSON: {exc}") from exc + if not isinstance(payload, dict): + raise RuntimeError(f"{tool_name} returned JSON {type(payload).__name__}, expected object") + return payload + + +async def _run_smoke(wiki_dir: Path, python: str) -> None: + from mcp import ClientSession + from mcp.client.stdio import StdioServerParameters, stdio_client + + server = StdioServerParameters( + command=python, + args=["-m", "link_mcp", "--wiki", str(wiki_dir)], + env=os.environ.copy(), + ) + async with stdio_client(server) as (read_stream, write_stream): + async with ClientSession(read_stream, write_stream) as session: + await session.initialize() + + listed = await session.list_tools() + tool_names = {tool.name for tool in listed.tools} + missing = sorted(EXPECTED_TOOLS - tool_names) + if missing: + raise RuntimeError(f"missing MCP tools: {', '.join(missing)}") + + status = _json_text( + await session.call_tool( + "link_status", + {"include_validation": True}, + read_timeout_seconds=timedelta(seconds=10), + ), + "link_status", + ) + if not status.get("ready") or status.get("validation", {}).get("passed") is not True: + raise RuntimeError("link_status did not report the demo wiki as ready") + + prompts = _json_text( + await session.call_tool( + "starter_prompts", + {"project": "mcp-smoke"}, + read_timeout_seconds=timedelta(seconds=10), + ), + "starter_prompts", + ) + if prompts.get("project") != "mcp-smoke" or prompts.get("prompts", [{}])[0].get("prompt") != "is Link ready?": + raise RuntimeError("starter_prompts did not return the expected first-run guidance") + + search = _json_text( + await session.call_tool( + "search_wiki", + {"query": "agent memory", "limit": 3}, + read_timeout_seconds=timedelta(seconds=10), + ), + "search_wiki", + ) + if search.get("count", 0) < 1 or search["results"][0]["name"] != "agent-memory": + raise RuntimeError("search_wiki did not return the expected demo result") + + packet = _json_text( + await session.call_tool( + "query_link", + {"query": "agent memory", "budget": "small"}, + read_timeout_seconds=timedelta(seconds=10), + ), + "query_link", + ) + if not packet.get("found") or packet.get("wiki", {}).get("primary") != "agent-memory": + raise RuntimeError("query_link did not return the expected demo packet") + if not packet.get("context_packet"): + raise RuntimeError("query_link returned an empty context packet") + + validation = _json_text( + await session.call_tool( + "validate_wiki", + {}, + read_timeout_seconds=timedelta(seconds=10), + ), + "validate_wiki", + ) + if not validation.get("passed") or validation.get("error_count") != 0: + raise RuntimeError("validate_wiki did not accept the demo wiki") + + backup = _json_text( + await session.call_tool( + "backup_wiki", + {"label": "mcp-smoke"}, + read_timeout_seconds=timedelta(seconds=10), + ), + "backup_wiki", + ) + if not backup.get("created") or "wiki" not in backup.get("included", []): + raise RuntimeError("backup_wiki did not create a wiki backup") + + context = _json_text( + await session.call_tool( + "get_context", + {"topic": "agent memory"}, + read_timeout_seconds=timedelta(seconds=10), + ), + "get_context", + ) + if not context.get("found") or context.get("primary") != "agent-memory": + raise RuntimeError("get_context did not return the expected primary page") + + graph_summary = _json_text( + await session.call_tool( + "get_graph_summary", + {"topic": "agent memory", "limit": 5, "depth": 1}, + read_timeout_seconds=timedelta(seconds=10), + ), + "get_graph_summary", + ) + if graph_summary.get("returned_nodes", 0) > 5: + raise RuntimeError("get_graph_summary ignored the node limit") + if not graph_summary.get("nodes"): + raise RuntimeError("get_graph_summary did not return any nodes") + + profile = _json_text( + await session.call_tool( + "memory_profile", + {}, + read_timeout_seconds=timedelta(seconds=10), + ), + "memory_profile", + ) + if profile.get("memory_count", 0) < 1: + raise RuntimeError("memory_profile did not see the demo memory") + + rebuilt_index = _json_text( + await session.call_tool( + "rebuild_index", + {}, + read_timeout_seconds=timedelta(seconds=10), + ), + "rebuild_index", + ) + if not rebuilt_index.get("rebuilt") or rebuilt_index.get("page_count", 0) < 1: + raise RuntimeError("rebuild_index did not rebuild the demo catalog") + + rebuilt_backlinks = _json_text( + await session.call_tool( + "rebuild_backlinks", + {}, + read_timeout_seconds=timedelta(seconds=10), + ), + "rebuild_backlinks", + ) + if not rebuilt_backlinks.get("rebuilt"): + raise RuntimeError("rebuild_backlinks did not repair the demo graph") + + +def main() -> int: + parser = argparse.ArgumentParser(description="Smoke test Link MCP over stdio.") + parser.add_argument("wiki", help="path to a Link wiki directory") + parser.add_argument("--python", default=sys.executable, help="Python executable used to run -m link_mcp") + args = parser.parse_args() + + wiki_dir = Path(args.wiki).expanduser().resolve() + if not (wiki_dir / "index.md").exists(): + print(f"MCP smoke failed: {wiki_dir} does not look like a Link wiki", file=sys.stderr) + return 1 + + try: + import anyio + + anyio.run(_run_smoke, wiki_dir, args.python) + except Exception as exc: + print(f"MCP smoke failed: {exc}", file=sys.stderr) + return 1 + + print(f"MCP stdio smoke passed for {wiki_dir}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/serve.py b/serve.py index 92f0b2d..f964203 100644 --- a/serve.py +++ b/serve.py @@ -1,139 +1,276 @@ #!/usr/bin/env python3 -"""Link — local wiki viewer. python serve.py → http://localhost:3000""" +"""Link — local wiki viewer. python serve.py → http://127.0.0.1:3000""" from __future__ import annotations -import html, http.server, json, re, socketserver, sys, urllib.parse + +import errno +import html +import http.server +import json +import re +import socketserver +import sys +import time +import urllib.parse from pathlib import Path -WIKI_DIR = Path(__file__).parent / "wiki" -RAW_DIR = Path(__file__).parent / "raw" +ROOT = Path(__file__).resolve().parent +_BUNDLED_CORE = ROOT / "mcp_package" +if (_BUNDLED_CORE / "link_core").exists(): + sys.path.insert(0, str(_BUNDLED_CORE)) + +from link_core.memory import ( + add_capture_review_to_brief as _core_add_capture_review_to_brief, + count_values as _core_count_values, + is_active_memory as _core_is_active_memory, + memory_action_hints as _core_memory_action_hints, + memory_brief as _core_memory_brief, + memory_explanation as _core_memory_explanation, + memory_inbox as _core_memory_inbox, + memory_profile as _core_memory_profile, + memory_audit_report as _core_memory_audit_report, + memory_records as _core_memory_records, + memory_review_issues as _core_memory_review_issues, + memory_duplicate_candidates as _core_memory_duplicate_candidates, + memory_visible_for_project as _core_memory_visible_for_project, + mark_memory_reviewed as _core_mark_memory_reviewed, + normalize_project as _core_normalize_project, + propose_memories_from_text as _core_propose_memories_from_text, + set_memory_status as _core_set_memory_status, + update_memory_page as _core_update_memory_page, + write_memory_page as _core_write_memory_page, +) +from link_core.frontmatter import ( + parse_frontmatter as _parse_frontmatter, +) +from link_core.ingest import ( + collect_ingest_status as _core_collect_ingest_status, +) +from link_core.log import ( + append_log as _core_append_log, + utc_timestamp as _core_utc_timestamp, +) +from link_core.markdown import ( + markdown_to_html as _core_markdown_to_html, +) +from link_core.raw import ( + RawSourceError as _RawSourceError, + create_raw_source as _core_create_raw_source, +) +from link_core.security import ( + clean_text_input as _clean_text_input, + redact_secret_values as _redact_secret_values, + secret_file_scan as _secret_file_scan, +) +from link_core.query import ( + query_link as _core_query_link, +) +from link_core.prompts import ( + starter_prompt_payload as _core_starter_prompt_payload, +) +from link_core.validation import ( + validate_wiki as _core_validate_wiki, +) +from link_core.version import ( + LINK_VERSION, +) +from link_core.web_assets import CSS # noqa: F401 - kept as serve.CSS for tests and compatibility +from link_core.web_memory import ( + render_capture_card as _core_render_capture_card, + render_capture_section as _core_render_capture_section, + render_memory_action_button as _core_render_memory_action_button, + render_memory_action_commands as _core_render_memory_action_commands, + render_memory_card as _core_render_memory_card, + render_memory_next_actions as _core_render_memory_next_actions, + render_memory_section as _core_render_memory_section, +) +from link_core.web_layout import ( + render_footer_html as _core_render_footer_html, + render_header_html as _core_render_header_html, + render_layout as _core_render_layout, +) +from link_core.web_graph import ( + GRAPH_CATEGORY_COLORS as _core_graph_category_colors, + GRAPH_INITIAL_SUMMARY_EDGE_LIMIT as _core_graph_initial_summary_edge_limit, + GRAPH_INITIAL_SUMMARY_NODE_LIMIT as _core_graph_initial_summary_node_limit, + graph_category_options as _core_graph_category_options, + graph_initial_payload as _core_graph_initial_payload, + graph_legend_items as _core_graph_legend_items, + graph_needs_bounded_overview as _core_graph_needs_bounded_overview, +) +from link_core.web_http import ( + CONTENT_SECURITY_POLICY as _core_content_security_policy, + is_allowed_static_file as _core_is_allowed_static_file, + is_relative_to as _core_is_relative_to, + LocalRateLimiter as _CoreLocalRateLimiter, + local_no_store_headers as _core_local_no_store_headers, + local_security_headers as _core_local_security_headers, + parse_bounded_int as _core_parse_bounded_int, + PERMISSIONS_POLICY as _core_permissions_policy, + resolve_raw_static_path as _core_resolve_raw_static_path, + safe_resolve as _core_safe_resolve, + SVG_CONTENT_SECURITY_POLICY as _core_svg_content_security_policy, + validate_local_browser_source_headers as _core_validate_local_browser_source_headers, + validate_local_host_header as _core_validate_local_host_header, +) +from link_core.status import ( + link_status as _core_link_status, +) +from link_core.capture import ( + capture_inbox as _core_capture_inbox, + capture_records as _core_capture_records, + capture_review_summary as _core_capture_review_summary, + cli_capture_commands as _core_cli_capture_commands, +) +from link_core.files import ( + atomic_write_json as _core_atomic_write_json, +) +from link_core.wiki import ( + build_backlinks as _core_build_backlinks, + build_wiki_cache as _core_build_wiki_cache, + close_wiki_cache as _core_close_wiki_cache, + context_for_topic as _core_context_for_topic, + graph_data as _core_graph_data, + graph_summary as _core_graph_summary, + list_pages as _core_list_pages, + load_backlinks_index as _core_load_backlinks_index, + page_link_summary as _core_page_link_summary, + rebuild_index as _core_rebuild_index, + search_pages as _core_search_pages, + wiki_mtime as _core_wiki_mtime, +) +del _BUNDLED_CORE + +WIKI_DIR = ROOT / "wiki" +RAW_DIR = ROOT / "raw" PORT = 3000 +API_VERSION = "1" +MAX_POST_BYTES = 64 * 1024 +MAX_QUERY_TEXT = 500 +MAX_PROPOSAL_SOURCE_BYTES = 64 * 1024 +MAX_RAW_SOURCE_BYTES = 60 * 1024 +LOCAL_ACTION_HEADER = "X-Link-Local-Action" +LOCAL_ACTION_VALUES = {"1", "true", "yes"} +MUTATION_RATE_LIMIT = 180 +MUTATION_RATE_WINDOW_SECONDS = 60 +CONTENT_SECURITY_POLICY = _core_content_security_policy +PERMISSIONS_POLICY = _core_permissions_policy +SVG_CONTENT_SECURITY_POLICY = _core_svg_content_security_policy +PROPOSAL_SOURCE_SUFFIXES = { + ".md", + ".markdown", + ".txt", + ".text", + ".rst", + ".adoc", + ".json", + ".yaml", + ".yml", + ".csv", +} +RAW_STATIC_TYPES = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", + ".pdf": "application/pdf", +} # --------------------------------------------------------------------------- # In-memory caches — invalidated on each request by mtime check # --------------------------------------------------------------------------- +CACHE_MTIME_CHECK_INTERVAL_SECONDS = 0.5 _pages_cache: list | None = None _pages_cache_mtime: float = 0.0 +_pages_cache_checked_at: float = 0.0 _page_index: dict[str, Path] = {} # stem.lower() → path _fulltext_index: dict[str, str] = {} # stem.lower() → full text (for search) +_normalized_fulltext_index: dict[str, str] = {} # punctuation-normalized full text +_text_words_index: dict[str, set[str]] = {} # stem.lower() → normalized fulltext words +_meta_words_index: dict[str, set[str]] = {} # stem.lower() → normalized metadata words _snippet_index: dict[str, str] = {} # stem.lower() → pre-extracted first snippet _token_index: dict[str, set[str]] = {} # token → set of page stems that contain it _page_map: dict[str, dict] = {} # stem.lower() → page dict (for O(1) lookup in search) _meta_token_index: dict[str, set[str]] = {} # token → stems with that token in title/alias/tag/tldr +_forward_links_index: dict[str, list[str]] = {} # page name → canonical outbound wikilinks +_fts_index = None +_search_backend = "token-index" +_cache_read_warnings: list[dict[str, str]] = [] +_mutation_rate_limiter = _CoreLocalRateLimiter( + max_events=MUTATION_RATE_LIMIT, + window_seconds=MUTATION_RATE_WINDOW_SECONDS, +) + +def _invalidate_pages_cache() -> None: + global _pages_cache, _pages_cache_mtime, _pages_cache_checked_at, _forward_links_index, _fts_index, _search_backend, _cache_read_warnings + _core_close_wiki_cache({"fts_index": _fts_index}) + _pages_cache = None + _pages_cache_mtime = 0.0 + _pages_cache_checked_at = 0.0 + _forward_links_index = {} + _fts_index = None + _search_backend = "token-index" + _cache_read_warnings = [] def _wiki_mtime() -> float: - """Return an mtime signal for all files that affect wiki indexes. - Directory mtimes catch added/removed files; file mtimes catch normal edits - made from Obsidian, an editor, or an agent without touching index.md/log.md. - """ - try: - t = WIKI_DIR.stat().st_mtime - for path in WIKI_DIR.rglob("*"): - try: - if path.is_dir() or path.suffix == ".md" or path.name == "_backlinks.json": - t = max(t, path.stat().st_mtime) - except OSError: - continue - return t - except Exception: - return 0.0 - - -def _get_all_pages() -> list: - global _pages_cache, _pages_cache_mtime, _page_index, _fulltext_index, _snippet_index, _token_index, _page_map, _meta_token_index + return _core_wiki_mtime(WIKI_DIR) + + +def _get_all_pages(force_check: bool = False) -> list: + global _pages_cache, _pages_cache_mtime, _pages_cache_checked_at, _page_index, _fulltext_index, _normalized_fulltext_index, _text_words_index, _meta_words_index, _snippet_index, _token_index, _page_map, _meta_token_index, _forward_links_index, _fts_index, _search_backend, _cache_read_warnings + now = time.monotonic() + if ( + _pages_cache is not None + and not force_check + and CACHE_MTIME_CHECK_INTERVAL_SECONDS > 0 + and now - _pages_cache_checked_at < CACHE_MTIME_CHECK_INTERVAL_SECONDS + ): + return _pages_cache mtime = _wiki_mtime() + _pages_cache_checked_at = now if _pages_cache is not None and mtime == _pages_cache_mtime: return _pages_cache - pages = [] - index: dict[str, Path] = {} - fulltext: dict[str, str] = {} - snippets: dict[str, str] = {} - token_idx: dict[str, set[str]] = {} - meta_idx: dict[str, set[str]] = {} - for md in sorted(WIKI_DIR.rglob("*.md")): - if md.name.startswith("."): continue - rel = md.relative_to(WIKI_DIR) - text = md.read_text(encoding="utf-8", errors="replace") - meta, body = _parse_frontmatter(text) - title = meta.get("title", "") - if not title: - m = re.search(r"^#\s+(.+)", body, re.MULTILINE) - title = m.group(1) if m else md.stem - cat = rel.parts[0] if len(rel.parts) > 1 else "root" - - # Extract TLDR for quick summaries - tldr = "" - tldr_m = re.search(r">\s*\*\*TLDR:\*\*\s*(.+)", body) - if tldr_m: - tldr = tldr_m.group(1).strip() - - # Normalize aliases to list - aliases_raw = meta.get("aliases", []) - if isinstance(aliases_raw, str): - aliases_raw = [a.strip() for a in aliases_raw.split(",") if a.strip()] - aliases = [a.lower() for a in aliases_raw] - - # Normalize tags to list - tags_raw = meta.get("tags", []) - if isinstance(tags_raw, str): - tags_raw = [t.strip() for t in tags_raw.split(",") if t.strip()] - - stem = md.stem.lower() - pages.append({ - "name": md.stem, - "title": title, - "category": cat, - "type": meta.get("type", ""), - "tags": tags_raw, - "aliases": aliases, - "maturity": meta.get("maturity", ""), - "source_count": meta.get("source_count", ""), - "tldr": tldr, - "date_updated": meta.get("date_updated", ""), - }) - index[stem] = md - # Store full text in memory for zero-IO search - text_lower = text.lower() - fulltext[stem] = text_lower - # Pre-extract snippet: first non-empty body line after frontmatter - body_lines = [l.strip() for l in body.split("\n") if l.strip() and not l.startswith("#") and not l.startswith(">")] - snippets[stem] = body_lines[0][:200] if body_lines else "" - # Build inverted token index: token → set of page stems - for token in re.split(r"\W+", text_lower): - if len(token) >= 3: - if token not in token_idx: - token_idx[token] = set() - token_idx[token].add(stem) - # Build meta token index: tokens from title/aliases/tags/tldr → stems - meta_tokens = set() - for word in re.split(r"\W+", title.lower()): - if len(word) >= 3: meta_tokens.add(word) - for alias in aliases: - for word in re.split(r"\W+", alias): - if len(word) >= 3: meta_tokens.add(word) - for tag in tags_raw: - for word in re.split(r"\W+", str(tag).lower()): - if len(word) >= 3: meta_tokens.add(word) - if tldr: - for word in re.split(r"\W+", tldr.lower()): - if len(word) >= 3: meta_tokens.add(word) - for token in meta_tokens: - if token not in meta_idx: - meta_idx[token] = set() - meta_idx[token].add(stem) - # Also index by alias so _find_page works with alternate names - for alias in aliases: - if alias not in index: - index[alias] = md - _pages_cache = pages + _core_close_wiki_cache({"fts_index": _fts_index}) + cache = _core_build_wiki_cache(WIKI_DIR) + _pages_cache = cache["pages"] _pages_cache_mtime = mtime - _page_index = index - _fulltext_index = fulltext - _snippet_index = snippets - _token_index = token_idx - _meta_token_index = meta_idx - _page_map = {p["name"].lower(): p for p in pages} - return pages + _page_index = cache["page_index"] + _fulltext_index = cache["fulltext"] + _normalized_fulltext_index = cache["normalized_fulltext"] + _text_words_index = cache["text_words_index"] + _meta_words_index = cache["meta_words_index"] + _snippet_index = cache["snippet_index"] + _token_index = cache["token_index"] + _meta_token_index = cache["meta_token_index"] + _page_map = cache["page_map"] + _forward_links_index = cache.get("forward_links_index", {}) + _fts_index = cache.get("fts_index") + _search_backend = str(cache.get("search_backend") or "token-index") + _cache_read_warnings = cache.get("read_warnings") if isinstance(cache.get("read_warnings"), list) else [] + return _pages_cache + + +def _current_wiki_cache() -> dict[str, object]: + _get_all_pages() + return { + "pages": _pages_cache or [], + "page_index": _page_index, + "fulltext": _fulltext_index, + "normalized_fulltext": _normalized_fulltext_index, + "text_words_index": _text_words_index, + "meta_words_index": _meta_words_index, + "snippet_index": _snippet_index, + "token_index": _token_index, + "meta_token_index": _meta_token_index, + "page_map": _page_map, + "forward_links_index": _forward_links_index, + "fts_index": _fts_index, + "search_backend": _search_backend, + "read_warning_count": len(_cache_read_warnings), + "read_warnings": _cache_read_warnings, + } def _find_page(name: str) -> Path | None: @@ -147,34 +284,69 @@ def _all_pages() -> list: return _get_all_pages() +def _page_list_payload( + category: str = "", + page_type: str = "", + maturity: str = "", + limit: int = 100, + offset: int = 0, + include_all: bool = False, +) -> dict: + return _core_list_pages( + _current_wiki_cache(), + category=category, + page_type=page_type, + maturity=maturity, + limit=limit, + offset=offset, + include_all=include_all, + ) + + def _load_backlinks_index() -> tuple[dict, str | None]: - bl_path = WIKI_DIR / "_backlinks.json" - empty = {"backlinks": {}, "forward": {}} - if not bl_path.exists(): - return empty, None - try: - data = json.loads(bl_path.read_text(encoding="utf-8")) - except Exception as e: - return empty, f"invalid backlinks index: {e}" - if not isinstance(data, dict): - return empty, "invalid backlinks index: root must be an object" - if "backlinks" not in data: - return {"backlinks": data, "forward": {}}, None - backlinks = data.get("backlinks", {}) - forward = data.get("forward", {}) - if not isinstance(backlinks, dict) or not isinstance(forward, dict): - return empty, "invalid backlinks index: backlinks and forward must be objects" - return {"backlinks": backlinks, "forward": forward}, None - - -def _parse_search_limit(raw: str) -> tuple[int | None, str | None]: - try: - limit = int(raw) - except ValueError: - return None, "limit must be an integer" - if limit < 1: - return None, "limit must be at least 1" - return min(limit, 50), None + return _core_load_backlinks_index(WIKI_DIR / "_backlinks.json") + + +def _page_links_payload( + page_name: str, + limit: int = 100, + offset: int = 0, + include_all: bool = False, +) -> tuple[dict, int]: + backlinks, error = _load_backlinks_index() + if error: + return {"error": error}, 500 + if not page_name.strip(): + return {"error": "page parameter required", "inbound": [], "forward": []}, 400 + return _core_page_link_summary( + backlinks, + page_name, + limit=limit, + offset=offset, + include_all=include_all, + ), 200 + + +def _parse_search_limit(raw: object) -> tuple[int | None, str | None]: + return _core_parse_bounded_int(raw, "limit", 20, 1, 50) + + +def _query_text(query: dict[str, list[str]], *names: str, max_len: int = MAX_QUERY_TEXT) -> str: + for name in names: + values = query.get(name) + if values: + text = _clean_text_input(values[0], max_len=max_len) + if text: + return text + return "" + + +def _utc_timestamp() -> str: + return _core_utc_timestamp() + + +def _append_log(timestamp: str, operation: str, description: str, lines: list[str]) -> None: + _core_append_log(WIKI_DIR, timestamp, operation, description, lines) def _page_href(name: str) -> str: @@ -182,12 +354,435 @@ def _page_href(name: str) -> str: def _plural_type_label(page_type: str) -> str: - irregular = {"entity": "entities"} + irregular = {"entity": "entities", "memory": "memories"} if page_type in irregular: return irregular[page_type] return page_type if page_type.endswith("s") else page_type + "s" +def _memory_records() -> list[dict[str, object]]: + return _core_memory_records(WIKI_DIR, include_body=False) + + +def _count_values(records: list[dict[str, object]], field: str) -> dict[str, int]: + return _core_count_values(records, field) + + +def _is_active_memory(record: dict[str, object]) -> bool: + return _core_is_active_memory(record) + + +def _memory_review_issues(record: dict[str, object]) -> list[dict[str, str]]: + return _core_memory_review_issues(record, review_command="review-memory") + + +def _project_visible_records(project: str | None = None) -> list[dict[str, object]]: + project_name = _core_normalize_project(project) + return [ + record + for record in _memory_records() + if _core_memory_visible_for_project(record, project_name) + ] + + +def _ingest_status() -> dict[str, object]: + return _core_collect_ingest_status(WIKI_DIR.parent) + + +def _memory_inbox(limit: int = 20, include_archived: bool = False, project: str | None = None) -> dict[str, object]: + return _core_memory_inbox( + _project_visible_records(project), + limit=limit, + include_archived=include_archived, + review_command="review-memory", + project=project, + ) + + +def _slugify(value: str, fallback: str = "memory") -> str: + slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + return slug or fallback + + +def _memory_title(text: str, explicit_title: str | None = None) -> str: + if explicit_title and explicit_title.strip(): + return explicit_title.strip() + first_line = next((line.strip() for line in text.splitlines() if line.strip()), "Memory") + first_sentence = re.split(r"(?<=[.!?])\s+", first_line, maxsplit=1)[0].strip() + if len(first_sentence) <= 70: + return first_sentence.rstrip(".") + return first_sentence[:67].rstrip() + "..." + + +def _memory_duplicate_candidates( + text: str, + title: str | None, + memory_type: str, + scope: str, + limit: int = 3, +) -> list[dict[str, object]]: + return _core_memory_duplicate_candidates( + _memory_records(), + text, + title, + memory_type, + scope, + limit=limit, + ) + + +def _propose_memories_from_text( + text: str, + source: str = "http", + limit: int = 10, + project: str | None = None, +) -> dict[str, object]: + return _core_propose_memories_from_text( + text, + _memory_records(), + source=source, + limit=limit, + writes_memory=False, + project=project, + ) + + +def _memory_explanation(identifier: str) -> dict[str, object]: + return _core_memory_explanation( + WIKI_DIR, + identifier, + records=_memory_records(), + review_command="review-memory", + ) + + +def _memory_profile(limit: int = 10, project: str | None = None) -> dict[str, object]: + return _core_memory_profile(_memory_records(), limit=limit, review_command="review-memory", project=project) + + +def _mark_memory_reviewed(identifier: str, note: str = "") -> dict[str, object]: + result = _core_mark_memory_reviewed( + WIKI_DIR, + _clean_text_input(identifier, max_len=300), + note=_clean_text_input(note, max_len=500), + timestamp=_utc_timestamp(), + records=_memory_records(), + review_command="review-memory", + log_writer=_append_log, + ) + if result["updated"]: + _invalidate_pages_cache() + return result + + +def _set_memory_status(identifier: str, status: str, reason: str = "") -> dict[str, object]: + result = _core_set_memory_status( + WIKI_DIR, + _clean_text_input(identifier, max_len=300), + status, + reason=_clean_text_input(reason, max_len=500), + timestamp=_utc_timestamp(), + records=_memory_records(), + log_writer=_append_log, + ) + if result["updated"]: + _invalidate_pages_cache() + return result + + +def _remember_memory_from_web(payload: dict[str, object]) -> dict[str, object]: + result = _core_write_memory_page( + WIKI_DIR, + _clean_text_input(payload.get("memory") or payload.get("text"), max_len=MAX_POST_BYTES), + _clean_text_input(payload.get("title"), max_len=160) or None, + _clean_text_input(payload.get("memory_type") or payload.get("type") or "note", max_len=30), + _clean_text_input(payload.get("scope") or "user", max_len=30), + _clean_text_input(payload.get("tags"), max_len=500) or None, + _clean_text_input(payload.get("source") or "web approval", max_len=500), + _utc_timestamp(), + project=_clean_text_input(payload.get("project"), max_len=80) or None, + records=_memory_records(), + allow_duplicate=False, + allow_conflict=False, + log_writer=_append_log, + rebuild_backlinks=lambda: bool(_rebuild_backlinks_payload().get("rebuilt")), + ) + if result.get("created"): + _invalidate_pages_cache() + return result + + +def _update_memory_from_web(payload: dict[str, object]) -> dict[str, object]: + result = _core_update_memory_page( + WIKI_DIR, + _clean_text_input(payload.get("memory") or payload.get("identifier"), max_len=300), + _clean_text_input(payload.get("text"), max_len=MAX_POST_BYTES), + _clean_text_input(payload.get("source") or "web approval", max_len=500), + _utc_timestamp(), + records=_memory_records(), + review_command="review-memory", + allow_conflict=False, + project=_clean_text_input(payload.get("project"), max_len=80) or None, + log_writer=_append_log, + rebuild_backlinks=lambda: bool(_rebuild_backlinks_payload().get("rebuilt")), + ) + if result.get("updated"): + _invalidate_pages_cache() + return result + + +def _memory_activity_key(record: dict[str, object]) -> tuple[str, str, str]: + return ( + str(record.get("updated_at") or record.get("date_captured") or ""), + str(record.get("date_captured") or ""), + str(record.get("title") or "").lower(), + ) + + +def _memory_action_hints(record: dict[str, object]) -> list[dict[str, object]]: + hints: list[dict[str, object]] = [] + for action in _core_memory_action_hints(record, review_command="review-memory"): + item = { + "kind": str(action.get("kind") or ""), + "label": str(action.get("label") or ""), + "href": "", + "command": str(action.get("command") or ""), + "description": str(action.get("description") or ""), + "priority": str(action.get("priority") or ""), + "arguments": action.get("arguments") if isinstance(action.get("arguments"), dict) else {}, + } + if action.get("kind") == "explain": + name = str(record.get("name") or "") + item["href"] = f"/explain-memory?memory={urllib.parse.quote(name, safe='')}" + hints.append(item) + return hints + + +def _memory_with_actions(record: dict[str, object]) -> dict[str, object]: + item = dict(record) + item["actions"] = _memory_action_hints(record) + return item + + +def _memory_dashboard_next_actions( + memory_count: int, + review_count: int, + updated_count: int, + archived_count: int, + capture_count: int = 0, + capture_warning_count: int = 0, +) -> list[dict[str, str]]: + actions: list[dict[str, str]] = [] + if capture_warning_count: + actions.append({ + "label": "Redact capture warnings", + "detail": f"{capture_warning_count} raw capture{'s' if capture_warning_count != 1 else ''} contain secret-looking values.", + "href": "/captures", + "command": "python3 link.py redact-capture raw/memory-captures/.md .", + "priority": "high", + }) + if review_count: + memory_label = "memory" if review_count == 1 else "memories" + verb = "needs" if review_count == 1 else "need" + actions.append({ + "label": "Review pending memories", + "detail": f"{review_count} {memory_label} {verb} confirmation or metadata cleanup.", + "href": "/inbox", + "command": "python3 link.py memory-inbox .", + "priority": "high", + }) + if updated_count: + actions.append({ + "label": "Audit recent memory updates", + "detail": f"{updated_count} memory update{'s' if updated_count != 1 else ''} should be checked for accuracy.", + "href": "/memory", + "command": "python3 link.py profile .", + "priority": "medium", + }) + if archived_count: + actions.append({ + "label": "Inspect archived memory", + "detail": f"{archived_count} archived memory page{'s' if archived_count != 1 else ''} remain inspectable but hidden from default recall.", + "href": "/profile", + "command": "python3 link.py profile .", + "priority": "low", + }) + if capture_count and not capture_warning_count: + actions.append({ + "label": "Review raw captures", + "detail": f"{capture_count} saved raw capture{'s' if capture_count != 1 else ''} can be accepted, redacted, or deleted.", + "href": "/captures", + "command": "python3 link.py accept-capture raw/memory-captures/.md . --index 1", + "priority": "medium", + }) + if not memory_count: + actions.append({ + "label": "Create the first memory", + "detail": "Save an explicit preference, decision, project fact, or note for local agents.", + "href": "", + "command": 'python3 link.py remember "User prefers ..." . --type preference --scope user', + "priority": "high", + }) + if not actions: + actions.append({ + "label": "Memory is recall-ready", + "detail": "No pending review items or recent updates need attention.", + "href": "/profile", + "command": "python3 link.py profile .", + "priority": "info", + }) + return actions[:3] + + +def _capture_records(limit: int = 12, project: str | None = None) -> list[dict[str, object]]: + root = WIKI_DIR.parent + return _core_capture_records( + root, + limit=limit, + project=project, + commands_for=_core_cli_capture_commands, + ) + + +def _capture_inbox(limit: int = 20, project: str | None = None) -> dict[str, object]: + return _core_capture_inbox( + WIKI_DIR.parent, + limit=limit, + project=project, + commands_for=_core_cli_capture_commands, + ) + + +def _capture_review_summary(project: str | None = None, limit: int = 3) -> dict[str, object]: + project_name = _core_normalize_project(project) + summary = _core_capture_review_summary( + WIKI_DIR.parent, + limit=limit, + project=project_name, + commands_for=_core_cli_capture_commands, + ) + project_query = f"?project={urllib.parse.quote(project_name, safe='')}" if project_name else "" + project_arg = f' --project "{project_name}"' if project_name else "" + summary["href"] = f"/captures{project_query}" + summary["command"] = f"python3 link.py capture-inbox .{project_arg}" + return summary + + +def _memory_brief(query: str = "", limit: int = 6, project: str | None = None) -> dict[str, object]: + limit = max(1, min(limit, 20)) + project_name = _core_normalize_project(project) + payload = _core_memory_brief( + _memory_records(), query=query, limit=limit, + review_command="review-memory", project=project_name, + ) + return _core_add_capture_review_to_brief( + payload, + _capture_review_summary(project=project_name, limit=min(limit, 10)), + ) + + +def _memory_dashboard(limit: int = 12, project: str | None = None) -> dict[str, object]: + limit = max(1, min(limit, 50)) + project_name = _core_normalize_project(project) + records = _project_visible_records(project_name) + active_records = [record for record in records if _is_active_memory(record)] + archived_records = [ + record for record in records + if str(record.get("status") or "").lower() == "archived" + ] + recent_active = sorted(active_records, key=_memory_activity_key, reverse=True) + recent_updates = sorted( + [record for record in records if str(record.get("updated_at") or "").strip()], + key=lambda record: ( + str(record.get("updated_at") or ""), + str(record.get("title") or "").lower(), + ), + reverse=True, + ) + archived = sorted(archived_records, key=_memory_activity_key, reverse=True) + inbox = _memory_inbox(limit=limit, project=project_name) + review_count = inbox["review_count"] + updated_count = len(recent_updates) + archived_count = len(archived_records) + captures = _capture_records(limit=limit, project=project_name) + capture_warning_count = sum(1 for capture in captures if capture["warning_count"]) + return { + "memory_count": len(records), + "active_count": len(active_records), + "review_count": review_count, + "archived_count": archived_count, + "updated_count": updated_count, + "capture_count": len(captures), + "capture_warning_count": capture_warning_count, + "project": project_name, + "by_type": _count_values(records, "memory_type"), + "by_scope": _count_values(records, "scope"), + "counts_by_severity": inbox["counts_by_severity"], + "next_actions": _memory_dashboard_next_actions( + memory_count=len(records), + review_count=review_count, + updated_count=updated_count, + archived_count=archived_count, + capture_count=len(captures), + capture_warning_count=capture_warning_count, + ), + "active": [_memory_with_actions(record) for record in recent_active[:limit]], + "review": [_memory_with_actions(record) for record in inbox["items"][:limit]], + "recent_updates": [_memory_with_actions(record) for record in recent_updates[:limit]], + "archived": [_memory_with_actions(record) for record in archived[:limit]], + "captures": captures, + } + + +def _web_memory_audit_actions( + inbox: dict[str, object], + captures: dict[str, object], + risk_factors: list[dict[str, object]], + project_name: str, +) -> list[dict[str, object]]: + project_query = f"?project={urllib.parse.quote(project_name, safe='')}" if project_name else "" + project_arg = f' --project "{project_name}"' if project_name else "" + return [ + { + "label": "Review memory inbox", + "detail": "Review pending, stale, invalid, or underspecified memories.", + "href": f"/inbox{project_query}", + "command": f"python3 link.py memory-inbox .{project_arg}", + "recommended": bool(inbox["review_count"]), + }, + { + "label": "Review raw captures", + "detail": "Accept, redact, or delete saved proposal-only raw captures.", + "href": f"/captures{project_query}", + "command": f"python3 link.py capture-inbox .{project_arg}", + "recommended": bool(captures["count"] or captures.get("read_warning_count")), + }, + { + "label": "Run doctor", + "detail": "Check graph, source, memory, raw capture, and secret hygiene.", + "href": "", + "command": "python3 link.py doctor .", + "recommended": not risk_factors, + }, + ] + + +def _memory_audit(limit: int = 10, project: str | None = None) -> dict[str, object]: + limit = max(1, min(limit, 50)) + project_name = _core_normalize_project(project) + profile = _memory_profile(limit=limit, project=project_name) + inbox = _memory_inbox(limit=limit, include_archived=True, project=project_name) + captures = _capture_review_summary(project=project_name, limit=min(limit, 10)) + payload = _core_memory_audit_report(profile, inbox, captures, [], project=project_name) + payload["next_actions"] = _web_memory_audit_actions( + inbox, + captures, + payload["risk_factors"], + str(payload["project"]), + ) + return payload + + def _json_for_script(data) -> str: """Serialize JSON for direct embedding inside a - -""" + return _core_render_footer_html() + + +def _layout(title, body, page_class: str = ""): + return _core_render_layout(title, body, page_class=page_class) # --------------------------------------------------------------------------- @@ -537,7 +1032,7 @@ def _render_home(): counts[t] = counts.get(t, 0) + 1 stats_items = f'
    {len(pages)}pages
    ' - for t in ["source", "concept", "entity", "comparison", "exploration"]: + for t in ["memory", "source", "concept", "entity", "comparison", "exploration"]: if counts.get(t, 0) > 0: label = _plural_type_label(t) stats_items += f'
    {counts[t]}{label}
    ' @@ -560,7 +1055,67 @@ def _render_home(): if not cats: sections = "

    Wiki is empty. Drop sources into raw/ and tell your agent to ingest them.

    " - return _layout("Link", f"

    Link

    Personal knowledge wiki. Knowledge compounds here.

    {stats}{sections}") + lanes = ( + '
    ' + '

    1. Sources become wiki knowledge

    ' + '

    Drop files into raw/ and say ingest raw/file.md into Link. ' + 'Link creates source-backed pages, concepts, backlinks, index entries, and logs.

    ' + '

    2. Remember saves agent memory

    ' + '

    Say remember that ... when a preference, decision, or project fact should affect future agents. ' + 'Ingest alone does not silently personalize recall.

    ' + '

    3. Query uses both safely

    ' + '

    Ask query Link for ... or open a memory brief. Link combines reviewed memory, wiki pages, and graph context.

    ' + '
    ' + ) + prompt_codes = "" + for item in _starter_prompts_payload().get("prompts", []): + if isinstance(item, dict): + prompt_codes += f'{html.escape(str(item.get("prompt") or ""))}' + prompts = ( + '
    ' + '

    Try These Prompts

    ' + '

    Ask from Codex, Claude, Cursor, Kiro, or any agent with Link installed. Open starter prompts.

    ' + '
    ' + f'{prompt_codes}
    ' + ) + + return _layout("Link", f"

    Link

    Local agent memory. Knowledge compounds here.

    {lanes}{prompts}{stats}{sections}") + + +def _starter_prompts_payload(project: str | None = None) -> dict[str, object]: + return _core_starter_prompt_payload(WIKI_DIR, project=project) + + +def _render_prompts(project: str | None = None): + payload = _starter_prompts_payload(project=project) + prompt_rows = "" + for item in payload.get("prompts", []): + if not isinstance(item, dict): + continue + prompt_rows += ( + f'
    ' + f'

    {html.escape(str(item.get("label") or "Prompt"))}

    ' + f'{html.escape(str(item.get("prompt") or ""))}' + f'

    {html.escape(str(item.get("when") or ""))}

    ' + f'
    ' + ) + command_rows = "".join( + f'
  • {html.escape(str(command))}
  • ' + for command in payload.get("commands", []) + ) + project_line = ( + f'

    Project examples are scoped to {html.escape(str(payload["project"]))}.

    ' + if payload.get("project") + else '

    These prompts work for a personal Link wiki. Add ?project=slug for project wording.

    ' + ) + body = ( + f'' + f'

    Starter Prompts

    ' + f'{project_line}' + f'

    Ask Your Agent

    {prompt_rows}
    ' + f'

    Local Checks

      {command_rows}
    ' + ) + return _layout("Starter Prompts", body) def _render_page(page_path): @@ -595,29 +1150,828 @@ def _render_page(page_path): return _layout(title, crumb + meta_line + body_html) -def _render_all(): +def _render_all(query: dict[str, list[str]] | None = None): + query = query or {} pages = _get_all_pages() + total = len(pages) + limit_raw = query.get("limit", ["250"])[0] + offset_raw = query.get("offset", ["0"])[0] + limit, limit_error = _core_parse_bounded_int(limit_raw, "limit", 250, 1, 500) + offset, offset_error = _core_parse_bounded_int(offset_raw, "offset", 0, 0, 1000000) + error = limit_error or offset_error + if error: + limit = 250 + offset = 0 + assert limit is not None + assert offset is not None + sorted_pages = sorted(pages, key=lambda x: x["title"]) + window = sorted_pages[offset:offset + limit] items = "".join( f'
  • {html.escape(p["title"])}' f'{p["type"] or p["category"]}
  • ' - for p in sorted(pages, key=lambda x: x["title"]) + for p in window ) + next_offset = offset + limit + prev_offset = max(0, offset - limit) + controls = "" + if total > limit or offset: + start = 0 if total == 0 else offset + 1 + end = min(offset + limit, total) + prev_href = html.escape(f"/all?limit={limit}&offset={prev_offset}", quote=True) + next_href = html.escape(f"/all?limit={limit}&offset={next_offset}", quote=True) + prev_link = ( + f'Previous' + if offset > 0 + else 'Previous' + ) + next_link = ( + f'Next' + if next_offset < total + else 'Next' + ) + controls = ( + f'
    Showing {start}-{end} of {total}' + f'{prev_link}{next_link}
    ' + ) + warning = f'

    {html.escape(error)}

    ' if error else "" return _layout("All Pages", f'' - f"

    All Pages ({len(pages)})

      {items}
    ") + f"

    All Pages ({total})

    {warning}{controls}
      {items}
    {controls}") + + +def _render_memory_card(record: dict[str, object], include_issues: bool = False) -> str: + return _core_render_memory_card( + record, + page_href=_page_href, + action_hints=_memory_action_hints, + include_issues=include_issues, + ) + + +def _render_memory_action_commands(actions: list[dict[str, object]] | tuple[dict[str, object], ...]) -> str: + return _core_render_memory_action_commands(actions) + + +def _render_memory_action_button(action: dict[str, object]) -> str: + return _core_render_memory_action_button(action) + + +def _render_memory_section(title: str, records: list[dict[str, object]], empty: str, href: str = "", include_issues: bool = False) -> str: + return _core_render_memory_section( + title, + records, + empty, + page_href=_page_href, + action_hints=_memory_action_hints, + href=href, + include_issues=include_issues, + ) + + +def _render_capture_card(capture: dict[str, object]) -> str: + return _core_render_capture_card(capture) + + +def _render_capture_section(captures: list[dict[str, object]]) -> str: + return _core_render_capture_section(captures) + + +def _render_memory_next_actions(actions: list[dict[str, str]]) -> str: + return _core_render_memory_next_actions(actions) + + +def _render_brief(query: str = "", project: str | None = None): + brief = _memory_brief(query=query, limit=8, project=project) + profile = brief["profile"] + captures = brief["captures"] + stats = ( + f'
    ' + f'
    {profile["active_count"]}active
    ' + f'
    {brief["relevant_count"]}relevant
    ' + f'
    {brief["review"]["count"]}review
    ' + f'
    {captures["count"]}captures
    ' + f'
    ' + ) + guidance = "".join( + f"
  • {html.escape(str(item))}
  • " + for item in brief["agent_guidance"] + ) + query_value = html.escape(str(query), quote=True) + project_field = ( + f'' + if brief["project"] else "" + ) + body = ( + f'' + f'

    Memory Brief

    ' + f'
    ' + f'

    Startup context for local agents before answering, coding, or planning.

    ' + f'
    ' + f'' + f'{project_field}
    ' + f'{"

    Project: " + html.escape(str(brief["project"])) + "

    " if brief["project"] else ""}' + f'{stats}' + f'

    Agent Guidance

      {guidance}
    ' + f'{_render_memory_section("Relevant memories", brief["relevant_memories"], "No relevant memories yet.")}' + f'{_render_memory_section("Review queue", brief["review"]["items"], "No memory review items.", href="/inbox", include_issues=True)}' + f'{_render_capture_section(captures["items"])}' + f'
    ' + ) + return _layout("Memory Brief", body) + + +def _render_memory_dashboard(project: str | None = None): + dashboard = _memory_dashboard(limit=8, project=project) + stats = ( + f'
    ' + f'
    {dashboard["memory_count"]}memories
    ' + f'
    {dashboard["active_count"]}active
    ' + f'
    {dashboard["review_count"]}review
    ' + f'
    {dashboard["updated_count"]}updated
    ' + f'
    {dashboard["capture_count"]}captures
    ' + f'
    {dashboard["archived_count"]}archived
    ' + f'
    ' + ) + counts = "" + if dashboard["by_type"]: + counts += "

    Types: " + ", ".join( + f"{html.escape(name)}: {count}" for name, count in dashboard["by_type"].items() + ) + "

    " + if dashboard["by_scope"]: + counts += "

    Scopes: " + ", ".join( + f"{html.escape(name)}: {count}" for name, count in dashboard["by_scope"].items() + ) + "

    " + body = ( + f'' + f'

    Memory Dashboard

    ' + f'
    ' + f'

    Read-only command center for what local agents can remember, what needs review, and what changed recently.

    ' + f'{"

    Project: " + html.escape(str(dashboard["project"])) + "

    " if dashboard["project"] else ""}' + f'{stats}' + f'{_render_memory_next_actions(dashboard["next_actions"])}' + f'{counts}' + f'{_render_memory_section("Review needed", dashboard["review"], "No memories need review.", href="/inbox", include_issues=True)}' + f'{_render_capture_section(dashboard["captures"])}' + f'{_render_memory_section("Recent updates", dashboard["recent_updates"], "No memory updates yet.")}' + f'{_render_memory_section("Active memories", dashboard["active"], "No active memories yet.", href="/profile")}' + f'{_render_memory_section("Archived memories", dashboard["archived"], "No archived memories.")}' + f'
    ' + ) + return _layout("Memory Dashboard", body) + + +def _render_profile(project: str | None = None): + profile = _memory_profile(limit=12, project=project) + memory_count = profile["memory_count"] + active_count = profile["active_count"] + stats = ( + f'
    ' + f'
    {memory_count}memories
    ' + f'
    {active_count}active
    ' + f'
    {profile["review_count"]}review
    ' + f'
    ' + ) + + def counts_line(title: str, counts: dict[str, int]) -> str: + if not counts: + return "" + parts = ", ".join(f"{html.escape(name)}: {count}" for name, count in counts.items()) + return f"

    {html.escape(title)}: {parts}

    " + + def section(title: str, records: list[dict[str, object]], empty: str = "none") -> str: + if not records: + return f"

    {html.escape(title)}

    {html.escape(empty)}

    " + items = "" + for record in records: + summary = record.get("tldr") or record.get("snippet") or "" + meta = f'{record.get("memory_type", "")} · {record.get("scope", "")}' + items += ( + f'
  • {html.escape(str(record["title"]))}' + f'
    {html.escape(meta)}
    ' + f'' + f'{f"{html.escape(str(summary))}" if summary else ""}
  • ' + ) + return f"

    {html.escape(title)}

      {items}
    " + + body = ( + f'' + f'

    Memory Profile

    ' + f'
    ' + f'

    What Link currently remembers about the user, projects, decisions, and preferences.

    ' + f'{"

    Project: " + html.escape(str(profile["project"])) + "

    " if profile["project"] else ""}' + f'{stats}' + f'{counts_line("Types", profile["by_type"])}' + f'{counts_line("Scopes", profile["by_scope"])}' + f'{counts_line("Status", profile["by_status"])}' + f'{section("Recent memories", profile["recent"])}' + f'{section("Preferences", profile["preferences"])}' + f'{section("Decisions", profile["decisions"])}' + f'{section("Project context", profile["projects"])}' + f'{section("Archived memories", profile["archived"]) if profile["archived"] else ""}' + f'
    ' + ) + return _layout("Memory Profile", body) + + +def _render_memory_audit(project: str | None = None): + audit = _memory_audit(limit=10, project=project) + profile = audit["profile"] + captures = audit["captures"] + stats = ( + f'
    ' + f'
    {profile["memory_count"]}memories
    ' + f'
    {profile["active_count"]}active
    ' + f'
    {profile["review_count"]}review
    ' + f'
    {captures["count"]}captures
    ' + f'
    {captures["warning_count"]}warnings
    ' + f'
    {captures.get("read_warning_count", 0)}read warnings
    ' + f'
    ' + ) + risk_html = "" + if audit["risk_factors"]: + risk_html = "

    Needs attention

      " + "".join( + f'
    • review {html.escape(str(item["code"]))}: ' + f'{html.escape(str(item["message"]))}
    • ' + for item in audit["risk_factors"] + ) + "
    " + else: + risk_html = "

    Needs attention

    No memory audit risks detected.

    " + body = ( + f'' + f'

    Memory Audit

    ' + f'
    ' + f'

    Read-only health report for local agent memory, review backlog, raw captures, and safe next actions.

    ' + f'{"

    Project: " + html.escape(str(audit["project"])) + "

    " if audit["project"] else ""}' + f'

    Status: {html.escape(str(audit["status"]))}

    ' + f'{stats}' + f'{risk_html}' + f'{_render_memory_next_actions(audit["next_actions"])}' + f'{_render_memory_section("Memory inbox sample", audit["inbox"]["items"], "No memory review items.", href="/inbox", include_issues=True)}' + f'{_render_capture_section(captures["items"])}' + f'
    ' + ) + return _layout("Memory Audit", body) + + +def _render_captures(project: str | None = None): + inbox = _capture_inbox(limit=50, project=project) + stats = ( + f'
    ' + f'
    {inbox["count"]}captures
    ' + f'
    {inbox["warning_count"]}warnings
    ' + f'
    {inbox.get("read_warning_count", 0)}read warnings
    ' + f'
    ' + ) + warning_html = "" + if inbox["warning_count"]: + warning_html = ( + f'
    Needs redaction' + f'

    {inbox["warning_count"]} raw capture' + f'{"s contain" if inbox["warning_count"] != 1 else " contains"} secret-looking values.

    ' + f'python3 link.py redact-capture raw/memory-captures/<capture>.md .
    ' + ) + read_warning_html = "" + read_warnings = inbox.get("read_warnings") if isinstance(inbox.get("read_warnings"), list) else [] + if read_warnings: + rows = "".join( + f'
  • {html.escape(str(item.get("capture") or ""))} ' + f'{html.escape(str(item.get("error") or "unreadable"))}
  • ' + for item in read_warnings[:50] + ) + read_warning_html = ( + '
    Fix capture access' + '

    Some raw captures could not be read and are not listed for approval.

    ' + f'
      {rows}
    ' + ) + body = ( + f'' + f'

    Raw Capture Inbox

    ' + f'
    ' + f'

    Saved proposal-only session notes waiting for human review before they become durable memory.

    ' + f'{"

    Project: " + html.escape(str(inbox["project"])) + "

    " if inbox["project"] else ""}' + f'{stats}' + f'{warning_html}' + f'{read_warning_html}' + f'{_render_capture_section(inbox["captures"])}' + f'
    ' + ) + return _layout("Raw Capture Inbox", body) + + +def _render_propose(project: str | None = None, source: str | None = None): + project_value = html.escape(_clean_text_input(project, max_len=80), quote=True) + source_value = html.escape(_clean_text_input(source, max_len=500), quote=True) + proposal_path = ( + f'
    ' + f'
    1' + f'

    Load source

    Paste notes or load a safe local raw file. The source stays local.

    ' + f'raw/file.md
    ' + f'
    2' + f'

    Propose

    Link returns candidates only. This step never writes durable memory.

    ' + f'Propose
    ' + f'
    3' + f'

    Approve explicitly

    Copy the approval prompt into your agent chat only for memories you want kept.

    ' + f'remember that ...
    ' + f'
    4' + f'

    Review later

    Use the inbox and explain views to review, archive, update, or forget memories.

    ' + f'link memory-inbox
    ' + f'
    ' + ) + body = ( + f'' + f'

    Propose Memories

    ' + f'

    Paste source notes, session notes, or a raw excerpt. Link returns memory candidates without writing anything.

    ' + f'
    Trust rule' + f'

    Source-backed wiki knowledge and durable agent memory are separate. Save only preferences, decisions, or project facts you approve.

    ' + f'

    Review Gate

    ' + f'Before saving memory' + f'Keep ordinary facts in wiki pages; save only durable preferences, decisions, project context, or user facts.' + f'Check source label, scope, project, duplicate candidates, and conflict warnings.' + f'Use direct approval only when the proposal is clean; otherwise copy the approval prompt into your agent chat.' + f'
    ' + f'{proposal_path}' + f'

    Local Raw Sources

    captures
    ' + f'
    ' + f'
    ' + f'' + f'
    ' + f'' + f'' + f'' + f'' + f'
    ' + f'
    ' + f'
    ' + f'
    ' + ) + return _layout("Propose Memories", body) + + +def _copy_button(text: object, label: str = "Copy") -> str: + value = str(text or "") + if not value: + return "" + return ( + f'' + ) + + +def _render_ingest(): + status = _ingest_status() + guidance = status.get("guidance") if isinstance(status.get("guidance"), dict) else {} + agent_prompt = str(guidance.get("agent_prompt") or "") + commands = guidance.get("commands") if isinstance(guidance.get("commands"), list) else [] + notes = guidance.get("notes") if isinstance(guidance.get("notes"), list) else [] + plan = status.get("plan") if isinstance(status.get("plan"), dict) else {} + pending = status.get("pending_raw") if isinstance(status.get("pending_raw"), list) else [] + represented = status.get("represented_raw") if isinstance(status.get("represented_raw"), list) else [] + safety = status.get("safety") if isinstance(status.get("safety"), dict) else {} + completion = status.get("completion") if isinstance(status.get("completion"), dict) else {} + plan_batch = plan.get("batch") if isinstance(plan.get("batch"), list) else [] + plan_first = plan_batch[0] if plan_batch and isinstance(plan_batch[0], dict) else {} + first_raw = str(plan_first.get("raw") or "") + if not first_raw: + first_raw = str(pending[0].get("raw") or "raw/") if pending else "raw/" + ingest_prompt = agent_prompt or f"ingest {first_raw} into Link" + memory_prompt = str(plan.get("memory_prompt") or f"propose memories from {first_raw}") + propose_href = "/propose?source=" + urllib.parse.quote(first_raw) if pending else "/propose" + state = str(guidance.get("state") or plan.get("state") or "unknown") + + stats = ( + f'
    ' + f'
    {int(status.get("raw_count") or 0)}raw
    ' + f'
    {int(status.get("represented_count") or 0)}represented
    ' + f'
    {int(status.get("pending_count") or 0)}pending
    ' + f'
    {int(status.get("stale_count") or 0)}stale
    ' + f'
    {html.escape(str(status.get("backlinks_status") or "unknown"))}graph
    ' + f'
    {html.escape(str(safety.get("status") or "unknown"))}safety
    ' + f'
    ' + ) + safety_html = "" + if safety: + labels = safety.get("labels") if isinstance(safety.get("labels"), list) else [] + labels_text = ", ".join(html.escape(str(label)) for label in labels) + labels_html = f"

    Warnings: {labels_text}

    " if labels_text else "" + safety_html = ( + f'
    Raw safety: {html.escape(str(safety.get("status") or "unknown"))}' + f'

    {html.escape(str(safety.get("summary") or ""))}

    {labels_html}
    ' + ) + action_rows = "" + if agent_prompt: + action_rows += ( + f'
    Ask your agent' + f'{_copy_button(agent_prompt, "Copy prompt")}{html.escape(agent_prompt)}
    ' + ) + for command in commands: + action_rows += ( + f'
    Run' + f'{_copy_button(command, "Copy command")}{html.escape(str(command))}
    ' + ) + actions = f'
    {action_rows}
    ' if action_rows else "" + if agent_prompt: + next_detail = "Copy this into your agent chat. The agent should ingest the raw source, rebuild indexes, and validate before reporting done." + next_code = agent_prompt + next_extra = ( + f'

    If the source contains preferences, decisions, or project facts, ' + f'open memory proposals first.

    ' + ) + elif state == "blocked_secrets": + next_detail = "Redact secret-looking values in the flagged raw source before asking any agent to ingest it." + next_code = f"edit {first_raw}" + next_extra = "" + elif state == "blocked_raw_access": + next_detail = "Fix raw file access before asking any agent to ingest it. Link could not inspect the source for safety." + next_code = f"inspect {first_raw}" + next_extra = "" + elif state == "blocked_source_access": + next_detail = "Fix source page access before relying on ingest state. Link could not inspect represented source pages." + next_code = "link ingest-status" + next_extra = "" + elif state == "stale_graph": + next_detail = "Repair the graph index before relying on search, context, or the graph view." + next_code = "link rebuild-backlinks && link validate" + next_extra = "" + elif state == "empty": + next_detail = "Add a note, article, transcript, or project file to raw/, then refresh this page." + next_code = "cp notes.md raw/ && link ingest-status" + next_extra = "" + elif state == "ready": + next_detail = "No ingest is pending. Ask Link for context, or add another source when there is new material." + next_code = 'link brief "current task"' + next_extra = "" + else: + next_detail = "Initialize or repair the Link folder before ingesting sources." + next_code = "link init && link status --validate" + next_extra = "" + if state == "blocked_secrets": + ingest_prompt = f"redact secret-looking values in {first_raw} before ingest" + optional_memory_html = 'redact before memory proposals' + elif state == "blocked_raw_access": + ingest_prompt = f"fix raw source access for {first_raw} before ingest" + optional_memory_html = 'fix access before memory proposals' + elif state == "blocked_source_access": + ingest_prompt = "fix source page access before ingest" + optional_memory_html = 'fix source access first' + else: + optional_memory_html = ( + f'{html.escape(memory_prompt)}' + ) + next_html = ( + f'
    Next step' + f'

    {html.escape(next_detail)}

    ' + f'{html.escape(next_code)}' + f'{_copy_button(next_code, "Copy next step")}' + f'{next_extra}
    ' + ) + guide_html = ( + f'
    ' + f'
    1' + f'

    Add source

    Put notes, articles, transcripts, or project files in raw/.

    ' + f'{html.escape(first_raw)}
    ' + f'
    2' + f'

    Ask agent

    Have your agent convert the source into source-backed wiki pages.

    ' + f'{html.escape(ingest_prompt)}
    ' + f'
    3' + f'

    Validate

    Check page shape, links, and graph freshness before relying on the result.

    ' + f'link validate
    ' + f'
    4' + f'

    Optional memory

    Only save preferences, decisions, or project facts after approval.

    ' + f'{optional_memory_html}
    ' + f'
    ' + ) + + pending_html = "" + if pending: + rows = "" + for item in pending[:50]: + raw_path = str(item.get("raw") or "") + propose_href = "/propose?source=" + urllib.parse.quote(raw_path) + secret_warnings = item.get("secret_warnings") if isinstance(item.get("secret_warnings"), list) else [] + if secret_warnings: + meta = ( + f'{int(item.get("size_bytes") or 0)} bytes · secret warning: ' + f'{", ".join(html.escape(str(label)) for label in secret_warnings)} · redact before ingest' + ) + elif item.get("scan_error"): + meta = ( + f'{int(item.get("size_bytes") or 0)} bytes · ' + f'could not inspect: {html.escape(str(item.get("scan_error") or ""))} · fix access before ingest' + ) + elif item.get("stale"): + target_pages = item.get("source_page_paths") if isinstance(item.get("source_page_paths"), list) else [] + target_label = ", ".join(html.escape(str(page)) for page in target_pages if page) + target_text = f" · refresh {target_label}" if target_label else " · refresh existing source page" + meta = ( + f'{int(item.get("size_bytes") or 0)} bytes · ' + f'{html.escape(str(item.get("stale_reason") or "raw changed after wiki source page"))}' + f'{target_text}' + ) + else: + meta = ( + f'{int(item.get("size_bytes") or 0)} bytes · ' + f'propose memories' + ) + rows += f'
  • {html.escape(raw_path)}{meta}
  • ' + if len(pending) > 50: + rows += f'
  • ... {len(pending) - 50} more
  • ' + pending_html = f'

    Pending Raw Files

    propose memories
      {rows}
    ' + elif not represented: + pending_html = '

    No raw source files found yet.

    ' + + notes_html = "" + if notes: + notes_html = "
      " + "".join(f"
    • {html.escape(str(note))}
    • " for note in notes) + "
    " + + source_warning_html = "" + source_warnings = status.get("source_read_warnings") if isinstance(status.get("source_read_warnings"), list) else [] + if source_warnings: + rows = "" + for item in source_warnings[:50]: + if not isinstance(item, dict): + continue + rows += ( + f'
  • {html.escape(str(item.get("page") or ""))}' + f'could not inspect: {html.escape(str(item.get("error") or ""))}
  • ' + ) + source_warning_html = f'

    Source Page Warnings

      {rows}
    ' + + plan_html = "" + if plan: + steps = plan.get("steps") if isinstance(plan.get("steps"), list) else [] + batch = plan.get("batch") if isinstance(plan.get("batch"), list) else [] + post_checks = plan.get("post_checks") if isinstance(plan.get("post_checks"), list) else [] + step_html = "".join(f"
  • {html.escape(str(step))}
  • " for step in steps[:6]) + batch_html = "" + if batch: + rows = "" + for item in batch[:5]: + rows += ( + f'
  • {html.escape(str(item.get("raw") or ""))}' + f'{html.escape(str(item.get("target_source_page") or item.get("suggested_source_page") or ""))}
  • ' + ) + batch_html = f'

    Batch

      {rows}
    ' + checks_html = "" + if post_checks: + rows = "".join( + f'
  • {html.escape(str(check))}' + f'run before reporting done {_copy_button(check)}
  • ' + for check in post_checks[:6] + ) + checks_html = f'

    Post-ingest checks

      {rows}
    ' + plan_html = ( + f'

    {html.escape(str(plan.get("title") or "Suggested Workflow"))}

    ' + f'

    {html.escape(str(plan.get("summary") or ""))}

    ' + f'
      {step_html}
    {batch_html}{checks_html}
    ' + ) + + completion_html = "" + completion_items = completion.get("items") if isinstance(completion.get("items"), list) else [] + if completion_items: + cards = "" + for item in completion_items: + raw_path = str(item.get("raw") or "") + pages = item.get("source_pages") if isinstance(item.get("source_pages"), list) else [] + page_links = "" + for page in pages: + if not isinstance(page, dict): + continue + page_name = str(page.get("name") or "") + page_title = str(page.get("title") or page_name) + if not page_name: + continue + page_links += ( + f'{html.escape(page_title)}' + ) + if not page_links: + page_links = 'source page not found' + memory_prompt_value = str(item.get("memory_prompt") or "") + query_prompt_value = str(item.get("query_prompt") or "") + propose_link = "/propose?source=" + urllib.parse.quote(raw_path) if raw_path else "/propose" + warnings = item.get("secret_warnings") if isinstance(item.get("secret_warnings"), list) else [] + warning_html = "" + if warnings: + warning_html = ( + f'

    Raw warnings: ' + f'{", ".join(html.escape(str(label)) for label in warnings)}

    ' + ) + cards += ( + f'
    ' + f'

    {html.escape(raw_path)}

    ' + f'

    {int(item.get("size_bytes") or 0)} bytes represented by:

    ' + f'
    {page_links}
    ' + f'{warning_html}' + f'
    ' + f'propose memories' + f'{_copy_button(memory_prompt_value, "Copy memory prompt")}' + f'{_copy_button(query_prompt_value, "Copy query prompt")}' + f'
    ' + f'
    ' + ) + more_html = "" + if completion.get("has_more"): + more_html = f'

    Showing {int(completion.get("shown_count") or 0)} of {int(completion.get("represented_count") or 0)} represented sources.

    ' + next_prompt = str(completion.get("next_prompt") or "") + next_html_for_completion = "" + if next_prompt: + next_html_for_completion = ( + f'
    After ingest' + f'

    Use this to confirm the new context is retrievable before moving on.

    ' + f'{html.escape(next_prompt)}{_copy_button(next_prompt, "Copy prompt")}
    ' + ) + completion_html = ( + f'

    {html.escape(str(completion.get("title") or "Ingest Completion"))}

    ' + f'all pages
    ' + f'

    {html.escape(str(completion.get("summary") or ""))}

    ' + f'
    {cards}
    {more_html}{next_html_for_completion}
    ' + ) + + raw_form = ( + f'

    Add Raw Source

    memory proposals
    ' + f'

    Paste a note, article excerpt, transcript, or project context. Link saves it under ' + f'raw/ locally, blocks secret-looking values, and gives you the exact ingest prompt.

    ' + f'
    ' + f'
    ' + f'' + f'' + f'
    ' + f'' + f'
    ' + f'Nothing becomes durable memory until you approve memory proposals.
    ' + f'
    ' + f'
    ' + ) + + body = ( + f'' + f'

    Ingest

    ' + f'

    {html.escape(str(guidance.get("summary") or "Check raw source ingest state."))}

    ' + f'{raw_form}' + f'{stats}' + f'{safety_html}' + f'{source_warning_html}' + f'{next_html}' + f'{guide_html}' + f'{actions}' + f'{plan_html}' + f'{completion_html}' + f'{pending_html}' + f'{notes_html}' + ) + return _layout("Ingest", body) + + +def _render_inbox(project: str | None = None): + inbox = _memory_inbox(limit=50, project=project) + review_count = inbox["review_count"] + stats = ( + f'
    ' + f'
    {review_count}review
    ' + f'
    ' + ) + if inbox["counts_by_severity"]: + severity = ", ".join( + f"{html.escape(name)}: {count}" + for name, count in inbox["counts_by_severity"].items() + ) + severity_html = f"

    Severity: {severity}

    " + else: + severity_html = "" + + if not inbox["items"]: + content = "

    Inbox is clear.

    " + else: + items = "" + for item in inbox["items"]: + summary = item.get("tldr") or item.get("snippet") or "" + meta = f'{item.get("memory_type", "")} · {item.get("scope", "")} · {item.get("status", "")}' + issues = "".join( + f'
  • {html.escape(str(issue["severity"]))} ' + f'{html.escape(str(issue["code"]))}: {html.escape(str(issue["message"]))}
  • ' + for issue in item["issues"] + ) + primary = item.get("primary_action") or {} + primary_html = "" + if primary: + primary_html = ( + f'

    Next: {html.escape(str(primary.get("label") or ""))} ' + f'- {html.escape(str(primary.get("description") or ""))}

    ' + ) + actions_html = _render_memory_action_commands(item.get("actions") or []) + items += ( + f'
  • {html.escape(str(item["title"]))}' + f'
    {html.escape(meta)}
    ' + f'' + f'{f"{html.escape(str(summary))}" if summary else ""}' + f'
      {issues}
    ' + f'{primary_html}' + f'{actions_html}
  • ' + ) + content = f"
      {items}
    " + + body = ( + f'' + f'

    Memory Review Inbox

    ' + f'
    ' + f'

    Memories that need confirmation, stronger metadata, or cleanup.

    ' + f'{"

    Project: " + html.escape(str(inbox["project"])) + "

    " if inbox["project"] else ""}' + f'{stats}' + f'{severity_html}' + f'{content}' + f'
    ' + ) + return _layout("Memory Review Inbox", body) + + +def _render_explain_memory(identifier: str): + try: + explanation = _memory_explanation(identifier) + except ValueError as exc: + return _layout("Memory Explanation", f'

    Memory not found

    {html.escape(str(exc))}

    ') + + memory = explanation["memory"] + recall_info = explanation["recall"] + review = explanation["review"] + provenance = explanation["provenance"] + lifecycle = explanation["lifecycle"] + graph = explanation["graph"] + summary = memory.get("tldr") or memory.get("snippet") or "" + issues = "".join( + f'
  • {html.escape(str(issue["severity"]))} ' + f'{html.escape(str(issue["code"]))}: {html.escape(str(issue["message"]))}
  • ' + for issue in review["issues"] + ) + issue_html = ( + f'

    Review Issues

      {issues}
    ' + if issues else "

    Review Issues

    No detected issues.

    " + ) + primary = review.get("primary_action") or {} + primary_html = "" + if primary: + primary_html = ( + f'

    Next: {html.escape(str(primary.get("label") or ""))} ' + f'- {html.escape(str(primary.get("description") or ""))}

    ' + ) + action_html = f'

    Actions

    {primary_html}{_render_memory_action_commands(review.get("actions") or [])}' + graph_html = ( + f'

    Graph

    ' + f'

    Forward: {html.escape(", ".join(graph["forward"]) or "none")}

    ' + f'

    Inbound: {html.escape(", ".join(graph["inbound"]) or "none")}

    ' + f'

    Wikilinks: {html.escape(", ".join(graph["wikilinks"]) or "none")}

    ' + ) + logs = "".join( + f'
    {html.escape(entry)}
    ' + for entry in explanation["log_entries"][-5:] + ) + log_html = f"

    Log Entries

    {logs}" if logs else "

    Log Entries

    No matching log entries.

    " + body_html = _md_to_html(str(explanation.get("body") or "")) + body = ( + f'' + f'

    {html.escape(str(memory["title"]))}

    ' + f'

    {html.escape(str(summary))}

    ' + f'
    ' + f'
    Recall{html.escape(str(recall_info["state"]))}
    {html.escape(str(recall_info["reason"]))}
    ' + f'
    Review{html.escape(str(review["status"]))} · {review["issue_count"]} issues
    ' + f'
    Status{html.escape(str(lifecycle["status"]))}
    ' + f'
    Source{html.escape(str(provenance["source"] or "missing"))}
    ' + f'
    Captured{html.escape(str(provenance["date_captured"] or "missing"))}
    ' + f'
    Path{html.escape(str(provenance["path"]))}
    ' + f'
    ' + f'{issue_html}' + f'{action_html}' + f'{graph_html}' + f'{log_html}' + f'

    Memory Body

    {body_html}' + ) + return _layout(f"Explain: {memory['title']}", body) def _render_graph(): - graph = _get_graph_data() - visible_nodes = [n for n in graph["nodes"] if n["category"] != "root"] - visible_ids = {n["id"] for n in visible_nodes} - visible_edges = [ - e for e in graph["edges"] - if e["source"] in visible_ids and e["target"] in visible_ids - ] + full_graph = _get_graph_data() + summary_graph = None + if _core_graph_needs_bounded_overview(full_graph): + summary = _get_graph_summary( + limit=_core_graph_initial_summary_node_limit, + depth=1, + max_edges=_core_graph_initial_summary_edge_limit, + ) + summary_graph = { + "nodes": summary.get("nodes", []), + "edges": summary.get("edges", []), + } + graph_view = _core_graph_initial_payload(full_graph, summary_graph=summary_graph) + visible_nodes = graph_view["nodes"] + visible_edges = graph_view["edges"] + node_count = int(graph_view["node_count"]) + edge_count = int(graph_view["edge_count"]) + total_node_count = int(graph_view["total_node_count"]) + total_edge_count = int(graph_view["total_edge_count"]) + graph_mode = str(graph_view["graph_mode"]) + graph_note = str(graph_view["graph_note"]) nodes_json = _json_for_script(visible_nodes) edges_json = _json_for_script(visible_edges) - node_count = len(visible_nodes) - edge_count = len(visible_edges) if node_count == 0: body = ( @@ -630,9 +1984,8 @@ def _render_graph(): ) return _layout("Knowledge Graph", body) - # Category → color mapping - cat_colors = {"concepts": "#4e79a7", "entities": "#f28e2b", "sources": "#59a14f", - "comparisons": "#e15759", "explorations": "#76b7b2", "root": "#bab0ac"} + cat_colors = _core_graph_category_colors + category_options = _core_graph_category_options(visible_nodes) graph_js = f""" """ - legend_items = "".join( - f'{cat} ' - for cat, c in cat_colors.items() if cat != "root" - ) + legend_items = _core_graph_legend_items(cat_colors) + load_full_button = "" + if graph_mode != "full": + load_full_button = ( + f'' + ) body = ( f'' f'

    Knowledge Graph

    ' + f'

    For large wikis, use fullscreen, zoom, pan, and sparse labels. ' + f'The graph is for exploring neighborhoods, not reading every label at once.' + f'{html.escape(graph_note)}

    ' + f'
    ' f'
    ' f'' f'' f'' + f'' + f'{load_full_button}' + f'' + f'' + f'' f'' - f'{node_count} nodes · {edge_count} edges' + f'{node_count}/{total_node_count} nodes · {edge_count}/{total_edge_count} edges' f'
    ' + f'
    ' f'' + f'' + f'
    ' f'
    {legend_items}
    ' + f'
    ' f'{graph_js}' ) - return _layout("Knowledge Graph", body) + return _layout("Knowledge Graph", body, page_class="graph-page") def _render_search(query): @@ -1094,58 +2988,19 @@ def _search_pages(q: str, limit: int = 20) -> list: """Search pages by title, alias, tag, and full-text body. Uses token index to pre-filter candidates, snippet index for zero file I/O. """ - q_lower = q.lower() - pages = _get_all_pages() - # Use pre-built page_map — no dict comprehension per call - scored: list[tuple[int, dict]] = [] - - # Build candidate set: pages that could possibly match - # For single-word queries: use token index (O(1)) to get exact candidate set - # For multi-word/substring: fall back to all pages - is_single_token = bool(re.match(r"^\w+$", q_lower)) - if is_single_token and q_lower in _token_index: - # Fast path: union of fulltext candidates + meta candidates — both O(1) - token_candidates = _token_index[q_lower] - meta_candidates = _meta_token_index.get(q_lower, set()) - candidates = token_candidates | meta_candidates - else: - # Substring query — must check all pages - candidates = {p["name"].lower() for p in pages} - - for stem in candidates: - p = _page_map.get(stem) - if not p: - continue - score = 0 - - # Title match - if q_lower in p["title"].lower(): - score += 10 - # Exact name match - if q_lower == stem: - score += 20 - # Alias match - if any(q_lower in a for a in p.get("aliases", [])): - score += 8 - # Tag match - if any(q_lower in str(t).lower() for t in p.get("tags", [])): - score += 5 - # TLDR match - if q_lower in p.get("tldr", "").lower(): - score += 3 - # Fulltext match - text_lower = _fulltext_index.get(stem, "") - if text_lower and q_lower in text_lower: - score += 2 - - if score > 0: - # Use pre-extracted snippet — zero file I/O - snippet = _snippet_index.get(stem, "") - result = {**p, "score": score, "snippet": snippet} - scored.append((score, result)) - - scored.sort(key=lambda x: (-x[0], x[1]["title"].lower())) - return [r for _, r in scored[:limit]] + return _core_search_pages(q, _current_wiki_cache(), limit=limit) + + +def _query_link(query: str, budget: str = "medium", project: str | None = None) -> dict[str, object]: + return _core_query_link( + WIKI_DIR, + query, + _current_wiki_cache(), + _memory_records(), + budget=budget, + project=project, + review_command="review-memory", + ) def _get_context(topic: str) -> dict: @@ -1156,158 +3011,71 @@ def _get_context(topic: str) -> dict: - Its forward links (pages it references) - Related pages (shared tags or backlink overlap) """ - q = topic.lower().strip() - pages = _get_all_pages() - - # Find best matching page - matches = _search_pages(q, limit=5) - if not matches: - return {"topic": topic, "found": False, "pages": []} - - primary = matches[0] - primary_name = primary["name"].lower() - - # Load backlinks - bl_path = WIKI_DIR / "_backlinks.json" - backlinks_data: dict = {} - if bl_path.exists(): - try: - raw = json.loads(bl_path.read_text(encoding="utf-8")) - backlinks_data = raw.get("backlinks", raw) - except Exception: - pass - - inbound = backlinks_data.get(primary_name, []) - - # Load forward links (pages this page links to) - forward: list[str] = [] - forward_seen: set[str] = set() - path = _page_index.get(primary_name) - if path and path.exists(): - text = path.read_text(encoding="utf-8", errors="replace") - _, body = _parse_frontmatter(text) - wl_re = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") - page_set = {p["name"].lower() for p in pages} - for m in wl_re.finditer(body): - target = m.group(1).strip().lower() - if target in page_set and target != primary_name and target not in forward_seen: - forward_seen.add(target) - forward.append(target) - - # Build context pages list: primary + inbound + forward (deduplicated) - seen = {primary_name} - context_names = [primary_name] - for name in (inbound + forward): - if name not in seen: - seen.add(name) - context_names.append(name) - - # Load page summaries for context - context_pages = [] - for name in context_names[:10]: # cap at 10 to keep context lean - p_path = _page_index.get(name) - if not p_path or not p_path.exists(): - continue - text = p_path.read_text(encoding="utf-8", errors="replace") - meta, body = _parse_frontmatter(text) - # Include full content for primary, TLDR+summary for related - is_primary = name == primary_name - if is_primary: - content = body - else: - # Extract just TLDR + first paragraph - lines = body.split("\n") - summary_lines = [] - for line in lines[:20]: - summary_lines.append(line) - if line.startswith("## ") and len(summary_lines) > 3: - break - content = "\n".join(summary_lines) - - page_meta = next((p for p in pages if p["name"].lower() == name), {}) - context_pages.append({ - "name": name, - "title": meta.get("title", name), - "type": meta.get("type", ""), - "is_primary": is_primary, - "relationship": "primary" if is_primary else ("inbound" if name in inbound else "forward"), - "content": content, - }) - - return { - "topic": topic, - "found": True, - "primary": primary["name"], - "inbound_count": len(inbound), - "forward_count": len(forward), - "pages": context_pages, - } + return _core_context_for_topic(WIKI_DIR, topic, _current_wiki_cache()) # --------------------------------------------------------------------------- # Graph helpers # --------------------------------------------------------------------------- -def _build_backlinks() -> dict[str, list[str]]: - """Scan all wiki pages for [[wikilinks]] and build a reverse index. - Returns {target_stem: [source_stem, ...]} mapping. +def _build_backlinks() -> dict[str, dict[str, list[str]]]: + """Scan all wiki pages for [[wikilinks]] and build graph indexes. + Returns {"backlinks": {target: [sources]}, "forward": {source: [targets]}}. """ - backlinks: dict[str, list[str]] = {} - forward_links: dict[str, list[str]] = {} - wikilink_re = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") - for md in WIKI_DIR.rglob("*.md"): - if md.name.startswith("."): continue - text = md.read_text(encoding="utf-8", errors="replace") - _, body = _parse_frontmatter(text) - source = md.stem.lower() - for m in wikilink_re.finditer(body): - target = m.group(1).strip().lower() - if target != source: - # Reverse index - backlinks.setdefault(target, []) - if source not in backlinks[target]: - backlinks[target].append(source) - # Forward index - forward_links.setdefault(source, []) - if target not in forward_links[source]: - forward_links[source].append(target) - return {"backlinks": backlinks, "forward": forward_links} + return _core_build_backlinks(WIKI_DIR) def _get_graph_data() -> dict: """Return graph nodes and edges for visualization. Uses in-memory fulltext index — no separate rglob scan. """ - pages = _get_all_pages() - page_ids = {p["name"].lower(): p["name"] for p in pages} - nodes = [{"id": p["name"], "title": p["title"], "category": p["category"], "type": p["type"]} for p in pages] + return _core_graph_data(_current_wiki_cache()) + + +def _get_graph_summary(topic: str = "", limit: int = 40, depth: int = 1, max_edges: int = 120) -> dict: + """Return bounded graph context for agents and large local wikis.""" + return _core_graph_summary( + _current_wiki_cache(), + topic=topic, + limit=limit, + depth=depth, + max_edges=max_edges, + ) + + +def _rebuild_backlinks_payload() -> dict[str, object]: + try: + result = _build_backlinks() + except OSError as exc: + return {"rebuilt": False, "error": f"Could not rebuild backlinks: {exc}"} + bl_path = WIKI_DIR / "_backlinks.json" + _core_atomic_write_json(bl_path, result) + # Invalidate pages cache so next request picks up the new backlinks mtime. + _invalidate_pages_cache() + return {"rebuilt": True, "pages": len(result.get("backlinks", {}))} + + +def _rebuild_index_payload() -> dict[str, object]: + try: + result = _core_rebuild_index(WIKI_DIR, cache=_current_wiki_cache()) + except OSError as exc: + return {"rebuilt": False, "error": f"Could not rebuild index: {exc}"} + _invalidate_pages_cache() + return result + + +def _validate_wiki_payload(strict: bool = False) -> dict[str, object]: + return _core_validate_wiki(WIKI_DIR, strict=strict) - edges = [] - seen_edges: set[tuple[str, str]] = set() - wikilink_re = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]*)?\]\]") - for p in pages: - source = p["name"] - text_lower = _fulltext_index.get(source.lower(), "") - if not text_lower: - continue - # Use original text for wikilink extraction (case-sensitive targets) - path = _page_index.get(source.lower()) - if not path or not path.exists(): - continue - orig = path.read_text(encoding="utf-8", errors="replace") - _, body = _parse_frontmatter(orig) - for m in wikilink_re.finditer(body): - target_key = m.group(1).strip().lower() - target = page_ids.get(target_key) - if not target or target_key == source.lower(): - continue - edge_key = (source, target) - if edge_key in seen_edges: - continue - seen_edges.add(edge_key) - edges.append({"source": source, "target": target}) - return {"nodes": nodes, "edges": edges} +def _link_status_payload(include_validation: bool = False) -> dict[str, object]: + payload = _core_link_status( + WIKI_DIR, + version=LINK_VERSION, + include_validation=include_validation, + ) + payload["api_version"] = API_VERSION + return payload # --------------------------------------------------------------------------- @@ -1318,11 +3086,155 @@ class Handler(http.server.BaseHTTPRequestHandler): def do_HEAD(self): """HEAD requests: send headers only, no body.""" self._head_only = True - self.do_GET() + try: + self.do_GET() + finally: + self._head_only = False + + def do_OPTIONS(self): + self._head_only = False + if not self._require_allowed_host(): + return + self._json( + {"error": "CORS preflight is not supported; Link is localhost-only"}, + status=405, + headers={"Allow": "GET, HEAD, POST"}, + ) + + def do_PUT(self): + self._method_not_allowed() + + def do_PATCH(self): + self._method_not_allowed() + + def do_DELETE(self): + self._method_not_allowed() + + def do_TRACE(self): + self._method_not_allowed() + + def do_CONNECT(self): + self._method_not_allowed() + + def do_POST(self): self._head_only = False + if not self._require_allowed_host(): + return + if not self._require_mutation_rate_limit(): + return + parsed = urllib.parse.urlparse(self.path) + path = parsed.path + if path == "/api/rebuild-index": + if not self._require_local_action_header({"rebuilt": False}): + return + payload, error, status = self._read_json_body() + if error: + self._json({"rebuilt": False, "error": error}, status=status) + return + assert payload is not None + self._json(_rebuild_index_payload()) + return + if path == "/api/rebuild-backlinks": + if not self._require_local_action_header({"rebuilt": False}): + return + payload, error, status = self._read_json_body() + if error: + self._json({"rebuilt": False, "error": error}, status=status) + return + assert payload is not None + self._json(_rebuild_backlinks_payload()) + return + if path == "/api/raw-source": + if not self._require_local_action_header({"created": False}): + return + payload, error, status = self._read_json_body() + if error: + self._json({"created": False, "error": error}, status=status) + return + assert payload is not None + result, http_status = _create_raw_source_payload(payload) + self._json(result, status=http_status) + return + if path == "/api/propose-memories": + payload, error, status = self._read_json_body() + if error: + self._json({"proposed": False, "error": error, "count": 0, "proposals": []}, status=status) + return + assert payload is not None + text = _clean_text_input(payload.get("text"), max_len=MAX_POST_BYTES) + if not text.strip(): + self._json({"proposed": False, "error": "text required", "count": 0, "proposals": []}, status=400) + return + source = _clean_text_input(payload.get("source") or "http", max_len=500) or "http" + limit, limit_error = _parse_search_limit(str(payload.get("limit", "10"))) + if limit_error: + self._json({"proposed": False, "error": limit_error, "count": 0, "proposals": []}, status=400) + return + result = _propose_memories_from_text( + text, + source=source, + limit=min(limit, 20), + project=_clean_text_input(payload.get("project"), max_len=80), + ) + self._json(result) + return + if path in {"/api/remember-memory", "/api/update-memory"}: + if not self._require_local_action_header({"saved": False}): + return + payload, error, status = self._read_json_body() + if error: + self._json({"saved": False, "error": error}, status=status) + return + assert payload is not None + try: + if path == "/api/remember-memory": + result = _remember_memory_from_web(payload) + http_status = 200 if result.get("created") else 409 + self._json({"saved": bool(result.get("created")), **result}, status=http_status) + else: + result = _update_memory_from_web(payload) + http_status = 200 if result.get("updated") else 409 + self._json({"saved": bool(result.get("updated")), **result}, status=http_status) + except ValueError as exc: + self._json({"saved": False, "error": str(exc)}, status=400) + return + if path in {"/api/review-memory", "/api/archive-memory", "/api/restore-memory"}: + if not self._require_local_action_header(): + return + payload, error, status = self._read_json_body() + if error: + self._json({"updated": False, "error": error}, status=status) + return + assert payload is not None + identifier = _clean_text_input(payload.get("memory") or payload.get("identifier"), max_len=300) + if not identifier: + self._json({"updated": False, "error": "memory required"}, status=400) + return + try: + if path == "/api/review-memory": + result = _mark_memory_reviewed( + identifier, + note=_clean_text_input(payload.get("note"), max_len=500), + ) + elif path == "/api/archive-memory": + result = _set_memory_status( + identifier, + "archived", + reason=_clean_text_input(payload.get("reason"), max_len=500), + ) + else: + result = _set_memory_status(identifier, "active") + except ValueError as exc: + self._json({"updated": False, "error": str(exc)}, status=404) + return + self._json(result) + return + self._json({"error": "POST endpoint not found"}, status=404) def do_GET(self): self._head_only = getattr(self, '_head_only', False) + if not self._require_allowed_host(): + return parsed = urllib.parse.urlparse(self.path) path, query = parsed.path, urllib.parse.parse_qs(parsed.query) if path == "/logo.svg": @@ -1330,45 +3242,210 @@ def do_GET(self): elif path == "/logo.png": self._file(Path(__file__).parent / "logo.png", "image/png") elif path.startswith("/raw/"): - raw_path = Path(__file__).parent / "raw" / urllib.parse.unquote(path[5:]) - ext = raw_path.suffix.lower() - ctypes = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", - ".gif": "image/gif", ".webp": "image/webp", ".svg": "image/svg+xml", - ".pdf": "application/pdf"} - self._file(raw_path, ctypes.get(ext, "application/octet-stream")) + raw_path, content_type = _resolve_raw_static_path(path[5:]) + if raw_path and content_type: + self._file(raw_path, content_type) + else: + self._err("file") elif path in ("/", ""): self._ok(_render_home()) + elif path == "/ingest": + self._ok(_render_ingest()) + elif path == "/brief": + self._ok(_render_brief( + query=_query_text(query, "q", "query"), + project=_query_text(query, "project", max_len=80), + )) + elif path == "/propose": + self._ok(_render_propose( + project=_query_text(query, "project", max_len=80), + source=_query_text(query, "source", max_len=500), + )) + elif path == "/prompts": + self._ok(_render_prompts(project=_query_text(query, "project", max_len=80))) + elif path == "/memory": + self._ok(_render_memory_dashboard(project=_query_text(query, "project", max_len=80))) + elif path == "/audit": + self._ok(_render_memory_audit(project=_query_text(query, "project", max_len=80))) + elif path == "/inbox": + self._ok(_render_inbox(project=_query_text(query, "project", max_len=80))) + elif path == "/captures": + self._ok(_render_captures(project=_query_text(query, "project", max_len=80))) + elif path == "/explain-memory": + identifier = _query_text(query, "memory", "name", max_len=300) + self._ok(_render_explain_memory(identifier)) + elif path == "/profile": + self._ok(_render_profile(project=_query_text(query, "project", max_len=80))) elif path == "/all": - self._ok(_render_all()) + self._ok(_render_all(query)) elif path == "/graph": self._ok(_render_graph()) elif path == "/search": - self._ok(_render_search(query.get("q", [""])[0])) + self._ok(_render_search(_query_text(query, "q"))) elif path.startswith("/page/"): page = _find_page(urllib.parse.unquote(path[6:])) if page: self._ok(_render_page(page)) else: self._err(urllib.parse.unquote(path[6:])) elif path == "/api/pages": self._json(_all_pages()) + elif path == "/api/page-list": + limit, limit_error = _core_parse_bounded_int(query.get("limit", ["100"])[0], "limit", 100, 1, 1000) + offset, offset_error = _core_parse_bounded_int(query.get("offset", ["0"])[0], "offset", 0, 0, 1000000) + error = limit_error or offset_error + if error: + self._json({"error": error}, status=400) + else: + assert limit is not None + assert offset is not None + self._json(_page_list_payload( + category=query.get("category", [""])[0], + page_type=query.get("type", [""])[0] or query.get("page_type", [""])[0], + maturity=query.get("maturity", [""])[0], + limit=limit, + offset=offset, + include_all=query.get("all", ["false"])[0].lower() in {"1", "true", "yes"}, + )) + elif path == "/api/status": + include_validation = query.get("validate", ["false"])[0].lower() in {"1", "true", "yes"} + self._json(_link_status_payload(include_validation=include_validation)) + elif path == "/api/prompts": + self._json(_starter_prompts_payload(project=_query_text(query, "project", max_len=80))) + elif path == "/api/ingest-status": + self._json(_ingest_status()) elif path == "/api/backlinks": data, error = _load_backlinks_index() if error: self._json({"error": error}, status=500) else: self._json(data) + elif path == "/api/page-links": + limit, limit_error = _core_parse_bounded_int(query.get("limit", ["100"])[0], "limit", 100, 1, 1000) + offset, offset_error = _core_parse_bounded_int(query.get("offset", ["0"])[0], "offset", 0, 0, 1000000) + error = limit_error or offset_error + if error: + self._json({"error": error}, status=400) + else: + assert limit is not None + assert offset is not None + payload, status = _page_links_payload( + query.get("page", [""])[0] or query.get("page_name", [""])[0], + limit=limit, + offset=offset, + include_all=query.get("all", ["false"])[0].lower() in {"1", "true", "yes"}, + ) + self._json(payload, status=status) elif path == "/api/rebuild-backlinks": - result = _build_backlinks() - bl_path = WIKI_DIR / "_backlinks.json" - bl_path.write_text(json.dumps(result, indent=2), encoding="utf-8") - # Invalidate pages cache so next request picks up the new backlinks mtime - global _pages_cache, _pages_cache_mtime - _pages_cache = None - _pages_cache_mtime = 0.0 - self._json({"rebuilt": True, "pages": len(result.get("backlinks", {}))}) + self._json({"error": "use POST with JSON body: {}"}, status=405) + elif path == "/api/rebuild-index": + self._json({"error": "use POST with JSON body: {}"}, status=405) + elif path == "/api/validate": + strict = query.get("strict", ["false"])[0].lower() in {"1", "true", "yes"} + payload = _validate_wiki_payload(strict=strict) + self._json(payload, status=200 if payload.get("passed") else 422) elif path == "/api/graph": self._json(_get_graph_data()) + elif path == "/api/graph-summary": + limit, limit_error = _core_parse_bounded_int(query.get("limit", ["40"])[0], "limit", 40, 1, 250) + depth, depth_error = _core_parse_bounded_int(query.get("depth", ["1"])[0], "depth", 1, 0, 3) + max_edges, edge_error = _core_parse_bounded_int(query.get("max_edges", ["120"])[0], "max_edges", 120, 0, 1000) + error = limit_error or depth_error or edge_error + if error: + self._json({"error": error}, status=400) + else: + assert limit is not None + assert depth is not None + assert max_edges is not None + self._json(_get_graph_summary( + topic=_query_text(query, "topic", "q"), + limit=limit, + depth=depth, + max_edges=max_edges, + )) + elif path == "/api/memory-profile": + limit, error = _parse_search_limit(query.get("limit", ["10"])[0]) + if error: + self._json({"error": error}, status=400) + else: + self._json(_memory_profile(limit=limit, project=_query_text(query, "project", max_len=80))) + elif path == "/api/memory-dashboard": + limit, error = _parse_search_limit(query.get("limit", ["12"])[0]) + if error: + self._json({"error": error}, status=400) + else: + self._json(_memory_dashboard(limit=limit, project=_query_text(query, "project", max_len=80))) + elif path == "/api/memory-brief": + limit, error = _parse_search_limit(query.get("limit", ["6"])[0]) + if error: + self._json({"error": error}, status=400) + else: + self._json(_memory_brief( + query=_query_text(query, "q", "query"), + limit=limit, + project=_query_text(query, "project", max_len=80), + )) + elif path == "/api/query-link": + query_text = _query_text(query, "q", "query") + if not query_text.strip(): + self._json({"found": False, "error": "query parameter required", "context_packet": []}, status=400) + else: + self._json(_query_link( + query=query_text, + budget=query.get("budget", ["medium"])[0], + project=_query_text(query, "project", max_len=80), + )) + elif path == "/api/memory-audit": + limit, error = _parse_search_limit(query.get("limit", ["10"])[0]) + if error: + self._json({"error": error}, status=400) + else: + self._json(_memory_audit(limit=limit, project=_query_text(query, "project", max_len=80))) + elif path == "/api/memory-inbox": + limit, error = _parse_search_limit(query.get("limit", ["20"])[0]) + if error: + self._json({"error": error}, status=400) + else: + include_archived = query.get("include_archived", ["false"])[0].lower() in {"1", "true", "yes"} + self._json(_memory_inbox( + limit=limit, + include_archived=include_archived, + project=_query_text(query, "project", max_len=80), + )) + elif path == "/api/capture-inbox": + limit, error = _parse_search_limit(query.get("limit", ["20"])[0]) + if error: + self._json({"error": error}, status=400) + else: + self._json(_capture_inbox( + limit=limit, + project=_query_text(query, "project", max_len=80), + )) + elif path == "/api/proposal-sources": + limit, error = _parse_search_limit(query.get("limit", ["50"])[0]) + if error: + self._json({"error": error, "sources": []}, status=400) + else: + self._json(_proposal_sources(limit=min(limit, 100))) + elif path == "/api/proposal-source": + source_path = query.get("path", [""])[0] + payload, status = _proposal_source_payload(source_path) + self._json(payload, status=status) + elif path == "/api/raw-source": + self._json({"error": "use POST with JSON body: {\"text\": \"...\"}"}, status=405) + elif path == "/api/propose-memories": + self._json({"error": "use POST with JSON body: {\"text\": \"...\"}"}, status=405) + elif path in {"/api/review-memory", "/api/archive-memory", "/api/restore-memory"}: + self._json({"error": "use POST with JSON body: {\"memory\": \"...\"}"}, status=405) + elif path == "/api/explain-memory": + identifier = _query_text(query, "memory", "name", max_len=300) + if not identifier: + self._json({"found": False, "error": "memory parameter required"}, status=400) + else: + try: + self._json(_memory_explanation(identifier)) + except ValueError as exc: + self._json({"found": False, "error": str(exc)}, status=404) elif path == "/api/search": - q = query.get("q", [""])[0].strip() + q = _query_text(query, "q") limit, error = _parse_search_limit(query.get("limit", ["20"])[0]) if error: self._json({"error": error, "results": []}, status=400) @@ -1379,9 +3456,9 @@ def do_GET(self): results = _search_pages(q, limit=limit) self._json({"query": q, "count": len(results), "results": results}) elif path == "/api/context": - topic = query.get("topic", [""])[0].strip() or query.get("q", [""])[0].strip() + topic = _query_text(query, "topic", "q") if not topic: - self._json({"error": "topic parameter required"}) + self._json({"error": "topic parameter required"}, status=400) else: self._json(_get_context(topic)) else: @@ -1393,7 +3470,7 @@ def _ok(self, body: str): self.send_header("Content-Type", "text/html; charset=utf-8") self._security_headers() self.send_header("Content-Length", str(len(encoded))) - self.send_header("Cache-Control", "no-cache") + self._no_store_headers() self.end_headers() if not getattr(self, '_head_only', False): self.wfile.write(encoded) @@ -1404,25 +3481,108 @@ def _err(self, name: str): self.send_header("Content-Type", "text/html; charset=utf-8") self._security_headers() self.send_header("Content-Length", str(len(encoded))) - self.send_header("Cache-Control", "no-cache") + self._no_store_headers() self.end_headers() if not getattr(self, '_head_only', False): self.wfile.write(encoded) - def _json(self, data, status: int = 200): + def _json(self, data, status: int = 200, headers=None): encoded = json.dumps(data).encode() self.send_response(status) self.send_header("Content-Type", "application/json") self._security_headers() + self._no_store_headers() + for key, value in (headers or {}).items(): + self.send_header(str(key), str(value)) self.send_header("Content-Length", str(len(encoded))) self.end_headers() if not getattr(self, '_head_only', False): self.wfile.write(encoded) - def _security_headers(self): - self.send_header("X-Content-Type-Options", "nosniff") - self.send_header("Referrer-Policy", "no-referrer") - self.send_header("Cross-Origin-Resource-Policy", "same-origin") + def _require_allowed_host(self) -> bool: + allowed, error = _core_validate_local_host_header(self.headers.get("Host", "")) + if allowed: + return True + self._json({"error": error}, status=403) + return False + + def _require_local_action_header(self, error_payload: dict[str, object] | None = None) -> bool: + value = self.headers.get(LOCAL_ACTION_HEADER, "").strip().lower() + if value in LOCAL_ACTION_VALUES: + allowed, error = _core_validate_local_browser_source_headers( + self.headers.get("Origin", ""), + self.headers.get("Referer", ""), + ) + if allowed: + return True + payload = dict(error_payload or {"updated": False}) + payload["error"] = error + self._json(payload, status=403) + return False + payload = dict(error_payload or {"updated": False}) + payload["error"] = f"{LOCAL_ACTION_HEADER} header required for local mutations" + self._json({ + **payload, + }, status=403) + return False + + def _require_mutation_rate_limit(self) -> bool: + client_host = self.client_address[0] if self.client_address else "local" + allowed, retry_after = _mutation_rate_limiter.check(client_host) + if allowed: + return True + self._json( + { + "error": "local mutation rate limit exceeded", + "retry_after_seconds": retry_after, + }, + status=429, + headers={"Retry-After": str(retry_after)}, + ) + return False + + def _method_not_allowed(self) -> None: + self._head_only = False + if not self._require_allowed_host(): + return + self._json( + {"error": "method not allowed; Link supports GET, HEAD, and POST"}, + status=405, + headers={"Allow": "GET, HEAD, POST"}, + ) + + def _read_json_body(self) -> tuple[dict | None, str | None, int]: + content_type = self.headers.get("Content-Type", "") + media_type = content_type.split(";", 1)[0].strip().lower() + if media_type != "application/json": + return None, "Content-Type must be application/json", 415 + raw_length = self.headers.get("Content-Length") + if raw_length is None: + return None, "Content-Length required", 411 + try: + length = int(raw_length) + except ValueError: + return None, "invalid Content-Length", 400 + if length < 0: + return None, "invalid Content-Length", 400 + if length > MAX_POST_BYTES: + return None, f"request body too large; max {MAX_POST_BYTES} bytes", 413 + raw = self.rfile.read(length) + try: + payload = json.loads(raw.decode("utf-8") or "{}") + except (UnicodeDecodeError, json.JSONDecodeError): + return None, "invalid JSON body", 400 + if not isinstance(payload, dict): + return None, "JSON body must be an object", 400 + return payload, None, 200 + + def _security_headers(self, content_security_policy: str = CONTENT_SECURITY_POLICY): + for key, value in _core_local_security_headers(API_VERSION, content_security_policy): + self.send_header(key, value) + + def _no_store_headers(self): + for key, value in _core_local_no_store_headers(): + self.send_header(key, value) def _file(self, fpath, content_type): fpath = _safe_resolve(fpath) @@ -1433,9 +3593,11 @@ def _file(self, fpath, content_type): data = fpath.read_bytes() self.send_response(200) self.send_header("Content-Type", content_type) - self._security_headers() if content_type == "image/svg+xml": - self.send_header("Content-Security-Policy", "default-src 'none'; img-src 'self' data:; style-src 'unsafe-inline'; script-src 'none'; object-src 'none'; sandbox") + self._security_headers(content_security_policy=SVG_CONTENT_SECURITY_POLICY) + else: + self._security_headers() + self._no_store_headers() self.send_header("Content-Length", str(len(data))) self.end_headers() if not getattr(self, '_head_only', False): @@ -1446,15 +3608,66 @@ def _file(self, fpath, content_type): def log_message(self, *a): pass +def _parse_serve_args(argv: list[str], default_port: int = PORT, default_root: Path = ROOT) -> tuple[int, Path]: + port = default_port + root = default_root + for index, arg in enumerate(argv): + if arg in {"--host", "--bind"} or arg.startswith("--host=") or arg.startswith("--bind="): + raise SystemExit("Link serve is local-only; host/bind options are not supported.") + if arg == "--port": + if index + 1 >= len(argv): + raise SystemExit("--port requires a value") + try: + port = int(argv[index + 1]) + except ValueError as exc: + raise SystemExit("--port must be an integer") from exc + elif arg.startswith("--port="): + try: + port = int(arg.split("=", 1)[1]) + except ValueError as exc: + raise SystemExit("--port must be an integer") from exc + elif arg == "--root": + if index + 1 >= len(argv): + raise SystemExit("--root requires a value") + root = Path(argv[index + 1]).expanduser().resolve() + elif arg.startswith("--root="): + root = Path(arg.split("=", 1)[1]).expanduser().resolve() + if port < 1 or port > 65535: + raise SystemExit("--port must be between 1 and 65535") + return port, root + + +def _parse_serve_port(argv: list[str], default: int = PORT) -> int: + port, _ = _parse_serve_args(argv, default_port=default, default_root=ROOT) + return port + + +def _serve_bind_error_message(exc: OSError, port: int) -> str: + if exc.errno in {errno.EADDRINUSE, 48, 98}: + next_port = port + 1 if port < 65535 else 3000 + return ( + f"Link could not start because 127.0.0.1:{port} is already in use.\n" + f"Try another port, for example: python serve.py --port {next_port}" + ) + return f"Link could not start local server on 127.0.0.1:{port}: {exc}" + + def main(): - global PORT - for i, a in enumerate(sys.argv[1:]): - if a == "--port" and i + 1 < len(sys.argv) - 1: PORT = int(sys.argv[i+2]) + global PORT, WIKI_DIR, RAW_DIR + PORT, root = _parse_serve_args(sys.argv[1:], default_port=PORT, default_root=ROOT) + WIKI_DIR = root / "wiki" + RAW_DIR = root / "raw" socketserver.TCPServer.allow_reuse_address = True - with socketserver.TCPServer(("127.0.0.1", PORT), Handler) as s: - print(f" Link → http://localhost:{PORT}") - try: s.serve_forever() - except KeyboardInterrupt: print("\n stopped.") + try: + with socketserver.TCPServer(("127.0.0.1", PORT), Handler) as s: + print(f" Link → http://127.0.0.1:{PORT}") + print(" Local-only: bound to 127.0.0.1; no public host mode.") + print(" No auth: do not expose this server without your own authentication layer.") + try: s.serve_forever() + except KeyboardInterrupt: print("\n stopped.") + except OSError as exc: + print(_serve_bind_error_message(exc, PORT), file=sys.stderr) + raise SystemExit(1) from exc if __name__ == "__main__": diff --git a/tests/test_backup_core.py b/tests/test_backup_core.py new file mode 100644 index 0000000..976fb41 --- /dev/null +++ b/tests/test_backup_core.py @@ -0,0 +1,108 @@ +import os +import sys +import tarfile +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.backup import BackupError, create_backup, list_backups + + +class BackupCoreTests(unittest.TestCase): + def make_root(self) -> Path: + root = Path(tempfile.mkdtemp(prefix="link-backup-core-")) + (root / "wiki/concepts").mkdir(parents=True) + (root / "raw").mkdir() + (root / "wiki/index.md").write_text("# Index\n", encoding="utf-8") + (root / "wiki/concepts/agent-memory.md").write_text("# Agent Memory\n", encoding="utf-8") + (root / "raw/secret-session.md").write_text("api key: test-secret\n", encoding="utf-8") + return root + + def test_backup_includes_wiki_and_excludes_raw_by_default(self): + root = self.make_root() + + result = create_backup(root, label="unit test") + + self.assertTrue(result["created"]) + self.assertEqual(result["included"], ["wiki"]) + self.assertFalse(result["include_raw"]) + archive = Path(result["path"]) + self.assertTrue(archive.exists()) + with tarfile.open(archive, "r:gz") as tar: + names = set(tar.getnames()) + self.assertIn("wiki/index.md", names) + self.assertIn("wiki/concepts/agent-memory.md", names) + self.assertNotIn("raw/secret-session.md", names) + + def test_backup_can_include_raw_when_requested_and_lists_archives(self): + root = self.make_root() + + result = create_backup(root, label="with raw", include_raw=True) + listing = list_backups(root) + + with tarfile.open(result["path"], "r:gz") as tar: + names = set(tar.getnames()) + self.assertIn("raw/secret-session.md", names) + self.assertEqual(listing["count"], 1) + self.assertEqual(listing["backups"][0]["name"], result["name"]) + + def test_list_backups_reports_unreadable_archive_metadata(self): + root = self.make_root() + result = create_backup(root, label="unit test") + archive = Path(result["path"]) + original_stat = Path.stat + + def flaky_stat(path: Path, *args, **kwargs): + if path.name == archive.name: + raise OSError("permission denied") + return original_stat(path, *args, **kwargs) + + with patch.object(Path, "stat", flaky_stat): + listing = list_backups(root) + + self.assertEqual(listing["count"], 0) + self.assertEqual(listing["warning_count"], 1) + self.assertEqual(listing["warnings"][0]["backup"], archive.name) + + def test_backup_requires_wiki(self): + root = Path(tempfile.mkdtemp(prefix="link-backup-core-")) + + with self.assertRaises(FileNotFoundError): + create_backup(root) + + def test_backup_failure_removes_partial_archive(self): + root = self.make_root() + original_add = tarfile.TarFile.add + + def flaky_add(tar, name, *args, **kwargs): + if Path(name).name == "agent-memory.md": + raise OSError("permission denied") + return original_add(tar, name, *args, **kwargs) + + with patch.object(tarfile.TarFile, "add", flaky_add): + with self.assertRaisesRegex(BackupError, "wiki/concepts/agent-memory.md"): + create_backup(root, label="partial") + + self.assertEqual(list((root / ".link-backups").glob("*.tar.gz")), []) + + @unittest.skipUnless(hasattr(os, "symlink"), "symlinks are not available") + def test_backup_skips_symlinks(self): + root = self.make_root() + outside = root.parent / "outside-secret.txt" + outside.write_text("outside", encoding="utf-8") + os.symlink(outside, root / "wiki/concepts/outside-link.md") + + result = create_backup(root) + + with tarfile.open(result["path"], "r:gz") as tar: + names = set(tar.getnames()) + self.assertNotIn("wiki/concepts/outside-link.md", names) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_benchmark_core.py b/tests/test_benchmark_core.py new file mode 100644 index 0000000..09158d3 --- /dev/null +++ b/tests/test_benchmark_core.py @@ -0,0 +1,105 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.benchmark import benchmark_health # noqa: E402 + + +class BenchmarkCoreTests(unittest.TestCase): + def test_benchmark_health_passes_fast_sqlite_search(self): + payload = { + "pages": 1200, + "search_backend": "sqlite-fts", + "timings": { + "cache": 0.2, + "search": 0.01, + "query": 0.03, + "graph_summary": 0.01, + "page_list": 0.01, + "graph_initial": 0.01, + "graph": 0.04, + }, + } + + health = benchmark_health(payload) + + self.assertEqual(health["status"], "pass") + self.assertEqual(health["label"], "interactive") + self.assertEqual(health["summary"], "Ready for interactive local agent memory.") + self.assertEqual(health["warnings"], []) + self.assertEqual(health["recommendations"], []) + + def test_benchmark_health_warns_on_slow_paths(self): + payload = { + "pages": 20, + "search_backend": "sqlite-fts", + "timings": { + "cache": 0.2, + "search": 1.5, + "query": 0.03, + "graph_summary": 0.01, + "page_list": 0.01, + "graph_initial": 0.01, + "graph": 0.04, + }, + } + + health = benchmark_health(payload) + + self.assertEqual(health["status"], "warn") + self.assertEqual(health["label"], "review") + self.assertIn("search took 1.5000s", health["warnings"][0]) + self.assertIn("Review recommended", health["summary"]) + self.assertIn("Run link doctor --fix", health["recommendations"][0]) + self.assertIn("sqlite3/FTS5", health["recommendations"][1]) + + def test_benchmark_health_warns_on_large_token_fallback(self): + payload = { + "pages": 1000, + "search_backend": "token-index", + "timings": { + "cache": 0.2, + "search": 0.01, + "query": 0.03, + "graph_summary": 0.01, + "page_list": 0.01, + "graph_initial": 0.01, + "graph": 0.04, + }, + } + + health = benchmark_health(payload) + + self.assertEqual(health["status"], "warn") + self.assertIn("SQLite FTS", health["warnings"][0]) + self.assertIn("sqlite3/FTS5", health["recommendations"][1]) + + def test_benchmark_health_gives_graph_specific_recommendations(self): + payload = { + "pages": 2000, + "search_backend": "sqlite-fts", + "timings": { + "cache": 0.2, + "search": 0.01, + "query": 0.03, + "graph_summary": 0.01, + "page_list": 0.01, + "graph_initial": 1.4, + "graph": 2.4, + }, + } + + health = benchmark_health(payload) + + self.assertEqual(health["status"], "warn") + self.assertTrue(any("graph_initial took" in warning for warning in health["warnings"])) + self.assertTrue(any("graph took" in warning for warning in health["warnings"])) + self.assertIn("focused neighborhoods", " ".join(health["recommendations"])) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_capture_core.py b/tests/test_capture_core.py new file mode 100644 index 0000000..94e925f --- /dev/null +++ b/tests/test_capture_core.py @@ -0,0 +1,179 @@ +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from mcp_package.link_core.capture import ( + capture_filename, + capture_inbox, + capture_notes_from_markdown, + capture_records, + capture_review_summary, + capture_title, + mcp_capture_commands, + resolve_capture_file, +) + + +class CaptureCoreTests(unittest.TestCase): + def test_capture_title_uses_explicit_title_first(self): + self.assertEqual( + capture_title("ignored", "inline", " Sprint planning notes "), + "Sprint planning notes", + ) + + def test_capture_title_supports_cli_path_sources(self): + self.assertEqual( + capture_title("", "raw/first-memory.md", path_source=True), + "Memory capture: First Memory", + ) + + def test_capture_title_supports_mcp_source_labels(self): + self.assertEqual( + capture_title("", "daily standup", default_source="mcp"), + "Memory capture: daily standup", + ) + + def test_capture_title_falls_back_to_first_note_line(self): + self.assertEqual( + capture_title("\n\nRemember that Link is local agent memory.\nMore detail."), + "Memory capture: Remember that Link is local agent memory", + ) + + def test_capture_filename_is_unique_and_slugged(self): + root = Path(tempfile.mkdtemp(prefix="link-capture-filename-")) + first = capture_filename("2026-05-06T01:02:03Z", "Memory capture: First Memory", root) + first.write_text("# first\n", encoding="utf-8") + second = capture_filename("2026-05-06T01:02:03Z", "Memory capture: First Memory", root) + + self.assertEqual(first.name, "20260506T010203Z-first-memory.md") + self.assertEqual(second.name, "20260506T010203Z-first-memory-2.md") + + def test_resolve_capture_file_accepts_supported_root_relative_forms(self): + root = Path(tempfile.mkdtemp(prefix="link-capture-core-")) + capture_dir = root / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + capture = capture_dir / "session.md" + capture.write_text("# Session\n", encoding="utf-8") + + self.assertEqual(resolve_capture_file(root, "raw/memory-captures/session.md"), capture.resolve()) + self.assertEqual(resolve_capture_file(root, "session.md"), capture.resolve()) + self.assertEqual(resolve_capture_file(root, "session"), capture.resolve()) + + def test_resolve_capture_file_rejects_paths_outside_root(self): + root = Path(tempfile.mkdtemp(prefix="link-capture-core-")) + outside = Path(tempfile.mkdtemp(prefix="link-capture-outside-")) / "session.md" + outside.write_text("# Outside\n", encoding="utf-8") + capture_dir = root / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + symlink = capture_dir / "outside.md" + try: + symlink.symlink_to(outside) + except OSError: + symlink = None + + self.assertIsNone(resolve_capture_file(root, str(outside))) + self.assertIsNone(resolve_capture_file(root, "../session.md")) + if symlink is not None: + self.assertIsNone(resolve_capture_file(root, "outside.md")) + + def test_capture_notes_from_markdown_extracts_notes_section(self): + meta, notes = capture_notes_from_markdown( + "---\ntitle: Session\nproject: link\n---\n\n" + "# Session\n\n" + "Intro should not be used.\n\n" + "## Notes\n\n" + "Important memory candidate.\n\n" + "## Proposals\n\n" + "- Ignore generated proposals.\n" + ) + + self.assertEqual(meta["title"], "Session") + self.assertEqual(meta["project"], "link") + self.assertEqual(notes, "Important memory candidate.") + + def test_capture_records_redact_snippets_and_filter_project(self): + root = Path(tempfile.mkdtemp(prefix="link-capture-core-")) + capture_dir = root / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + fake_key = "sk-" + "a" * 48 + (capture_dir / "alpha.md").write_text( + "---\n" + "title: Alpha\n" + "project: alpha\n" + "date_captured: 2026-05-05T00:00:00Z\n" + "---\n\n" + "# Alpha\n\n" + "## Notes\n\n" + f"Remember alpha. Secret {fake_key}\n", + encoding="utf-8", + ) + (capture_dir / "beta.md").write_text( + "---\n" + "title: Beta\n" + "project: beta\n" + "date_captured: 2026-05-04T00:00:00Z\n" + "---\n\n" + "# Beta\n\n" + "## Notes\n\n" + "Remember beta.\n", + encoding="utf-8", + ) + + records = capture_records(root, project="alpha", commands_for=mcp_capture_commands) + inbox = capture_inbox(root, project="alpha", commands_for=mcp_capture_commands) + + self.assertEqual([record["title"] for record in records], ["Alpha"]) + self.assertEqual(records[0]["secret_warnings"], ["OpenAI API key"]) + self.assertIn("[redacted-secret]", records[0]["snippet"]) + self.assertNotIn(fake_key, records[0]["snippet"]) + self.assertIn("accept_capture", records[0]["commands"]["accept"]) + self.assertEqual(inbox["count"], 1) + self.assertEqual(inbox["warning_count"], 1) + self.assertEqual(inbox["project"], "alpha") + + def test_capture_inbox_reports_unreadable_captures(self): + root = Path(tempfile.mkdtemp(prefix="link-capture-core-")) + capture_dir = root / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + (capture_dir / "good.md").write_text( + "---\n" + "title: Good\n" + "date_captured: 2026-05-05T00:00:00Z\n" + "---\n\n" + "## Notes\n\n" + "Remember the readable capture.\n", + encoding="utf-8", + ) + (capture_dir / "locked.md").write_text( + "---\n" + "title: Locked\n" + "---\n\n" + "## Notes\n\n" + "This should report a read warning.\n", + encoding="utf-8", + ) + + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + inbox = capture_inbox(root) + summary = capture_review_summary(root) + + self.assertEqual(inbox["count"], 1) + self.assertEqual(inbox["read_warning_count"], 1) + self.assertEqual( + inbox["read_warnings"], + [{"capture": "raw/memory-captures/locked.md", "error": "permission denied"}], + ) + self.assertEqual(summary["count"], 1) + self.assertEqual(summary["read_warning_count"], 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_demo_snapshot.py b/tests/test_demo_snapshot.py index 37965c9..74ab305 100644 --- a/tests/test_demo_snapshot.py +++ b/tests/test_demo_snapshot.py @@ -1,4 +1,5 @@ import importlib.util +import re import tempfile import unittest from contextlib import redirect_stdout @@ -30,6 +31,7 @@ "local-first-software", "local-release-notes", "log", + "prefer-local-personal-memory", "retrieval-augmented-generation", "transformer-reading-notes", "transformers", @@ -44,6 +46,8 @@ ("knowledge-graph", "agent-memory"), ("link", "knowledge-graph"), ("link", "retrieval-augmented-generation"), + ("prefer-local-personal-memory", "agent-memory"), + ("prefer-local-personal-memory", "link"), ("retrieval-augmented-generation", "transformers"), ("why-link-helps-agents", "agent-memory"), } @@ -55,16 +59,24 @@ def create_demo_quiet(target: Path) -> None: def reset_serve_wiki(wiki_dir: Path) -> None: + close = getattr(getattr(serve, "_fts_index", None), "close", None) + if callable(close): + close() serve.WIKI_DIR = wiki_dir serve.RAW_DIR = wiki_dir.parent / "raw" serve._pages_cache = None serve._pages_cache_mtime = 0.0 serve._page_index = {} serve._fulltext_index = {} + serve._normalized_fulltext_index = {} + serve._text_words_index = {} + serve._meta_words_index = {} serve._snippet_index = {} serve._token_index = {} serve._page_map = {} serve._meta_token_index = {} + serve._fts_index = None + serve._search_backend = "token-index" class DemoSnapshotTests(unittest.TestCase): @@ -83,6 +95,25 @@ def test_demo_file_snapshot_and_health(self): self.assertEqual(raw_files, EXPECTED_RAW_FILES) self.assertEqual(wiki_pages, EXPECTED_WIKI_PAGES) self.assertTrue((target / "logo.svg").exists()) + self.assertTrue((target / "wiki/memories/prefer-local-personal-memory.md").exists()) + self.assertTrue((target / "wiki/explorations/why-link-helps-agents.md").exists()) + + memory = (target / "wiki/memories/prefer-local-personal-memory.md").read_text(encoding="utf-8") + self.assertIn("review_status: pending", memory) + self.assertIn("## Source", memory) + self.assertIn("[[link]]", memory) + + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + self.assertIn("Created: memories/prefer-local-personal-memory.md", log_text) + self.assertIn("Created: explorations/why-link-helps-agents.md", log_text) + + raw_refs = set() + for source_page in (target / "wiki/sources").glob("*.md"): + source_text = source_page.read_text(encoding="utf-8") + page_refs = set(re.findall(r"`(raw/[^`]+)`", source_text)) + self.assertTrue(page_refs, source_page) + raw_refs.update(page_refs) + self.assertEqual(raw_refs, {f"raw/{name}" for name in EXPECTED_RAW_FILES}) status = link_cli._collect_ingest_status(target) self.assertEqual(status["raw_count"], 3) @@ -102,11 +133,136 @@ def test_demo_graph_snapshot(self): edges = {(edge["source"], edge["target"]) for edge in graph["edges"]} self.assertEqual(node_ids, EXPECTED_WIKI_PAGES) - self.assertEqual(len(graph["nodes"]), 12) - self.assertEqual(len(graph["edges"]), 54) + self.assertEqual(len(graph["nodes"]), 13) + self.assertEqual(len(graph["edges"]), 58) self.assertEqual(len(edges), len(graph["edges"])) self.assertTrue(EXPECTED_KEY_EDGES.issubset(edges)) + def test_demo_home_shows_memories(self): + target = self.make_demo() + reset_serve_wiki(target / "wiki") + + html = serve._render_home() + + self.assertIn('memories', html) + self.assertIn("Prefer local personal memory", html) + + def test_demo_search_matches_hyphenated_pages_with_natural_query(self): + target = self.make_demo() + reset_serve_wiki(target / "wiki") + + results = serve._search_pages("local first software") + context = serve._get_context("local first software") + + self.assertEqual(results[0]["name"], "local-first-software") + self.assertTrue(context["found"]) + self.assertEqual(context["primary"], "local-first-software") + + def test_demo_profile_snapshot(self): + target = self.make_demo() + reset_serve_wiki(target / "wiki") + + profile = serve._memory_profile() + html = serve._render_profile() + + self.assertEqual(profile["memory_count"], 1) + self.assertEqual(profile["active_count"], 1) + self.assertEqual(profile["review_count"], 1) + self.assertEqual(profile["by_type"]["preference"], 1) + self.assertEqual(profile["recent"][0]["name"], "prefer-local-personal-memory") + self.assertIn("Memory Profile", html) + self.assertIn("Prefer local personal memory", html) + + def test_demo_inbox_snapshot(self): + target = self.make_demo() + reset_serve_wiki(target / "wiki") + + inbox = serve._memory_inbox() + html = serve._render_inbox() + + self.assertEqual(inbox["review_count"], 1) + self.assertEqual(inbox["counts_by_severity"]["medium"], 1) + self.assertEqual(inbox["items"][0]["name"], "prefer-local-personal-memory") + self.assertEqual(inbox["items"][0]["issues"][0]["code"], "pending_review") + self.assertIn("Memory Review Inbox", html) + self.assertIn("pending_review", html) + + def test_demo_memory_dashboard_snapshot(self): + target = self.make_demo() + reset_serve_wiki(target / "wiki") + + dashboard = serve._memory_dashboard() + html = serve._render_memory_dashboard() + + self.assertEqual(dashboard["memory_count"], 1) + self.assertEqual(dashboard["active_count"], 1) + self.assertEqual(dashboard["review_count"], 1) + self.assertEqual(dashboard["next_actions"][0]["label"], "Review pending memories") + self.assertEqual(dashboard["review"][0]["name"], "prefer-local-personal-memory") + self.assertEqual(dashboard["review"][0]["actions"][0]["label"], "Review") + self.assertIn("Memory Dashboard", html) + self.assertIn("Next actions", html) + self.assertIn("Review needed", html) + self.assertIn("Prefer local personal memory", html) + self.assertIn("python3 link.py review-memory", html) + self.assertIn("python3 link.py update-memory", html) + self.assertIn("python3 link.py archive-memory", html) + + def test_demo_memory_dashboard_shows_recent_updates(self): + target = self.make_demo() + with redirect_stdout(StringIO()): + link_cli.update_memory( + target, + "prefer-local-personal-memory", + "Also prefer checking the web memory dashboard for review status.", + source="snapshot test", + ) + reset_serve_wiki(target / "wiki") + + dashboard = serve._memory_dashboard() + html = serve._render_memory_dashboard() + + self.assertEqual(dashboard["updated_count"], 1) + self.assertEqual(dashboard["recent_updates"][0]["name"], "prefer-local-personal-memory") + self.assertEqual(dashboard["recent_updates"][0]["update_count"], "1") + self.assertEqual(dashboard["next_actions"][0]["label"], "Review pending memories") + self.assertEqual(dashboard["next_actions"][1]["label"], "Audit recent memory updates") + self.assertIn("Recent updates", html) + self.assertIn("snapshot test", dashboard["recent_updates"][0]["last_update_source"]) + + def test_demo_explain_memory_snapshot(self): + target = self.make_demo() + reset_serve_wiki(target / "wiki") + + explanation = serve._memory_explanation("prefer-local-personal-memory") + html = serve._render_explain_memory("prefer-local-personal-memory") + + self.assertTrue(explanation["found"]) + self.assertEqual(explanation["memory"]["name"], "prefer-local-personal-memory") + self.assertEqual(explanation["provenance"]["source"], "demo") + self.assertEqual(explanation["recall"]["state"], "needs_review") + self.assertIn("agent-memory", explanation["graph"]["forward"]) + self.assertIn("Explain: Prefer local personal memory", html) + self.assertIn("pending_review", html) + + def test_demo_profile_separates_archived_memories(self): + target = self.make_demo() + with redirect_stdout(StringIO()): + link_cli.archive_memory(target, "prefer-local-personal-memory", reason="snapshot test") + reset_serve_wiki(target / "wiki") + + profile = serve._memory_profile() + html = serve._render_profile() + + self.assertEqual(profile["memory_count"], 1) + self.assertEqual(profile["active_count"], 0) + self.assertEqual(profile["review_count"], 0) + self.assertEqual(profile["by_status"]["archived"], 1) + self.assertEqual(profile["recent"], []) + self.assertEqual(profile["archived"][0]["name"], "prefer-local-personal-memory") + self.assertIn("Archived memories", html) + self.assertIn("Prefer local personal memory", html) + def test_demo_context_snapshot(self): target = self.make_demo() reset_serve_wiki(target / "wiki") @@ -117,7 +273,7 @@ def test_demo_context_snapshot(self): self.assertTrue(ctx["found"]) self.assertEqual(ctx["primary"], "agent-memory") - self.assertEqual(ctx["inbound_count"], 9) + self.assertEqual(ctx["inbound_count"], 10) self.assertEqual(ctx["forward_count"], 5) self.assertEqual(page_names[0], "agent-memory") self.assertIn("link", page_names) diff --git a/tests/test_docs_site.py b/tests/test_docs_site.py new file mode 100644 index 0000000..434f46e --- /dev/null +++ b/tests/test_docs_site.py @@ -0,0 +1,42 @@ +import re +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +class DocsSiteTests(unittest.TestCase): + def docs_pages(self): + return sorted((ROOT / "docs").glob("*.html")) + + def test_github_pages_site_references_existing_local_assets(self): + pages = self.docs_pages() + self.assertGreaterEqual(len(pages), 6) + self.assertTrue((ROOT / "docs/.nojekyll").exists()) + + all_refs = [] + for page in pages: + html = page.read_text(encoding="utf-8") + all_refs.extend(re.findall(r'(?:src|href)="(assets/[^"]+)"', html)) + for local_page in re.findall(r'href="([^":#]+\.html)"', html): + self.assertTrue((ROOT / "docs" / local_page).exists(), f"{page.name} -> {local_page}") + + index_html = (ROOT / "docs/index.html").read_text(encoding="utf-8") + self.assertIn("Link gives every agent the same memory.", index_html) + self.assertIn("MCP Registry", index_html) + self.assertGreaterEqual(len(all_refs), 10) + for ref in all_refs: + self.assertTrue((ROOT / "docs" / ref).exists(), ref) + + def test_github_pages_site_has_no_external_runtime_dependencies(self): + for page in self.docs_pages(): + html = page.read_text(encoding="utf-8") + + self.assertNotIn(" None: + append_text(path, f"entry-{index}\n", initial_text="# Log\n\n") + + with ThreadPoolExecutor(max_workers=8) as executor: + list(executor.map(append, range(25))) + + text = path.read_text(encoding="utf-8") + self.assertEqual(text.count("# Log"), 1) + for index in range(25): + self.assertIn(f"entry-{index}\n", text) + self.assertEqual(list(path.parent.glob(".*.lock")), []) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_frontmatter_core.py b/tests/test_frontmatter_core.py new file mode 100644 index 0000000..29d2782 --- /dev/null +++ b/tests/test_frontmatter_core.py @@ -0,0 +1,48 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.frontmatter import parse_frontmatter, update_frontmatter_fields # noqa: E402 + + +class FrontmatterCoreTests(unittest.TestCase): + def test_parse_frontmatter_preserves_colons_and_lists(self): + meta, body = parse_frontmatter( + "---\n" + "title: \"My: Project\"\n" + "tags: [memory, \"release:notes\", local-first]\n" + "---\n\n" + "# Body\n" + ) + + self.assertEqual(meta["title"], "My: Project") + self.assertEqual(meta["tags"], ["memory", "release:notes", "local-first"]) + self.assertEqual(body, "\n# Body\n") + + def test_update_frontmatter_formats_lists_and_removes_fields(self): + updated = update_frontmatter_fields( + "---\n" + "title: Old\n" + "tags: [old]\n" + "reviewed_at: \"2026-05-05T00:00:00Z\"\n" + "---\n\n" + "Body\n", + { + "tags": ["memory", "release:notes"], + "review_status": "pending", + }, + remove={"reviewed_at"}, + ) + + self.assertIn("tags: [memory, \"release:notes\"]", updated) + self.assertIn("review_status: pending", updated) + self.assertNotIn("reviewed_at:", updated) + self.assertTrue(updated.endswith("\nBody\n")) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_ingest_core.py b/tests/test_ingest_core.py new file mode 100644 index 0000000..23449e4 --- /dev/null +++ b/tests/test_ingest_core.py @@ -0,0 +1,281 @@ +import json +import re +import sys +import tempfile +import time +import unittest +from pathlib import Path +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.ingest import collect_ingest_status, source_matches_by_raw # noqa: E402 +from link_core.wiki import build_backlinks # noqa: E402 + + +def write_page(wiki: Path, rel: str, text: str) -> None: + path = wiki / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +class IngestCoreTests(unittest.TestCase): + def test_collect_ingest_status_reports_missing_structure(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + + payload = collect_ingest_status(root) + + self.assertFalse(payload["has_raw_dir"]) + self.assertFalse(payload["has_wiki_dir"]) + self.assertEqual(payload["guidance"]["state"], "missing_structure") + + def test_collect_ingest_status_reports_pending_raw(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + (raw / "new-note.md").write_text("# New note\n", encoding="utf-8") + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + (wiki / "sources").mkdir(parents=True, exist_ok=True) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + + payload = collect_ingest_status(root) + + self.assertEqual(payload["pending_count"], 1) + self.assertEqual(payload["pending_raw"][0]["raw"], "raw/new-note.md") + self.assertEqual(payload["guidance"]["state"], "pending_raw") + self.assertEqual(payload["safety"]["status"], "clear") + self.assertEqual(payload["guidance"]["agent_prompt"], "ingest raw/new-note.md into Link") + self.assertEqual(payload["plan"]["title"], "Ingest pending raw sources") + self.assertEqual(payload["plan"]["batch"][0]["suggested_source_page"], "wiki/sources/new-note.md") + self.assertEqual(payload["plan"]["memory_prompt"], "propose memories from raw/new-note.md") + self.assertIn("link rebuild-index", payload["plan"]["post_checks"]) + + def test_collect_ingest_status_blocks_secret_looking_raw(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + (raw / "secret-note.md").write_text( + "# Secret note\n\nDo not ingest sk-" + ("a" * 25) + "\n", + encoding="utf-8", + ) + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + (wiki / "sources").mkdir(parents=True, exist_ok=True) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + + payload = collect_ingest_status(root) + + self.assertEqual(payload["pending_count"], 1) + self.assertEqual(payload["raw_secret_warning_count"], 1) + self.assertEqual(payload["safety"]["status"], "blocked") + self.assertEqual(payload["safety"]["blocked_count"], 1) + self.assertEqual(payload["safety"]["access_blocked_count"], 0) + self.assertEqual(payload["safety"]["labels"], ["OpenAI API key"]) + self.assertEqual(payload["safety"]["blocked_raw"], ["raw/secret-note.md"]) + self.assertEqual(payload["pending_raw"][0]["secret_warnings"], ["OpenAI API key"]) + self.assertEqual(payload["guidance"]["state"], "blocked_secrets") + self.assertIsNone(payload["guidance"]["agent_prompt"]) + self.assertEqual(payload["plan"]["title"], "Redact raw sources before ingest") + self.assertEqual(payload["plan"]["batch"][0]["secret_warnings"], ["OpenAI API key"]) + self.assertIn("Do not ask an agent to ingest", payload["guidance"]["notes"][0]) + + def test_collect_ingest_status_blocks_unreadable_raw(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + (raw / "locked-note.md").write_text("# Locked note\n", encoding="utf-8") + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + (wiki / "sources").mkdir(parents=True, exist_ok=True) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + + with patch( + "link_core.ingest.secret_file_scan", + return_value={"labels": [], "readable": False, "error": "permission denied"}, + ): + payload = collect_ingest_status(root) + + self.assertEqual(payload["pending_count"], 1) + self.assertEqual(payload["raw_scan_warning_count"], 1) + self.assertEqual(payload["raw_scan_warnings"], [{"raw": "raw/locked-note.md", "error": "permission denied"}]) + self.assertEqual(payload["pending_raw"][0]["scan_error"], "permission denied") + self.assertEqual(payload["safety"]["status"], "blocked") + self.assertEqual(payload["safety"]["blocked_count"], 1) + self.assertEqual(payload["safety"]["access_blocked_count"], 1) + self.assertEqual(payload["guidance"]["state"], "blocked_raw_access") + self.assertIsNone(payload["guidance"]["agent_prompt"]) + self.assertEqual(payload["plan"]["title"], "Inspect raw source access") + self.assertEqual(payload["plan"]["batch"][0]["scan_error"], "permission denied") + self.assertIn("cannot read and scan", payload["guidance"]["notes"][0]) + + def test_collect_ingest_status_blocks_unreadable_source_pages(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + (raw / "broken-source.md").write_text("# Broken source\n", encoding="utf-8") + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + write_page( + wiki, + "sources/broken.md", + "---\ntype: source\ntitle: Broken\n---\n\n`raw/broken-source.md`\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + + original_read_text = Path.read_text + + def read_text(path: Path, *args: object, **kwargs: object) -> str: + if path.name == "broken.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", read_text): + payload = collect_ingest_status(root) + + self.assertEqual(payload["source_read_warning_count"], 1) + self.assertEqual(payload["source_read_warnings"], [{"page": "wiki/sources/broken.md", "error": "permission denied"}]) + self.assertEqual(payload["guidance"]["state"], "blocked_source_access") + self.assertIsNone(payload["guidance"]["agent_prompt"]) + self.assertEqual(payload["plan"]["title"], "Inspect source page access") + self.assertEqual(payload["plan"]["batch"][0]["page"], "wiki/sources/broken.md") + self.assertIn("Represented and pending raw counts may be incomplete", payload["guidance"]["notes"][0]) + + def test_collect_ingest_status_reports_represented_raw(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + (raw / "source.md").write_text("# Source\n", encoding="utf-8") + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + write_page( + wiki, + "sources/source.md", + "---\ntype: source\ntitle: Source\n---\n\n" + "# Source\n\n" + "## Raw Source\n\n`raw/source.md`\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + + payload = collect_ingest_status(root) + + self.assertEqual(payload["pending_count"], 0) + self.assertEqual(payload["represented_count"], 1) + self.assertEqual(payload["represented_raw"][0]["source_pages"], ["source"]) + self.assertEqual(payload["represented_raw"][0]["source_page_paths"], ["wiki/sources/source.md"]) + self.assertEqual(payload["represented_raw"][0]["source_page_titles"], ["Source"]) + self.assertEqual(payload["safety"]["status"], "clear") + self.assertEqual(payload["guidance"]["state"], "ready") + self.assertEqual(payload["plan"]["title"], "Ready for new sources") + self.assertEqual(payload["completion"]["represented_count"], 1) + self.assertEqual(payload["completion"]["pending_count"], 0) + self.assertEqual(payload["completion"]["items"][0]["raw"], "raw/source.md") + self.assertEqual(payload["completion"]["items"][0]["source_pages"][0]["path"], "wiki/sources/source.md") + self.assertEqual(payload["completion"]["items"][0]["memory_prompt"], "propose memories from raw/source.md") + self.assertEqual(payload["completion"]["next_prompt"], "brief me from Link before we continue") + + def test_collect_ingest_status_reports_stale_represented_raw(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + raw_page = raw / "source.md" + raw_page.write_text("# Source\n\nOriginal note.\n", encoding="utf-8") + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + write_page( + wiki, + "sources/custom-source.md", + "---\ntype: source\ntitle: Custom Source\n---\n\n" + "# Custom Source\n\n" + "## Raw Source\n\n`raw/source.md`\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + time.sleep(0.02) + raw_page.write_text("# Source\n\nUpdated note.\n", encoding="utf-8") + + payload = collect_ingest_status(root) + + self.assertEqual(payload["pending_count"], 1) + self.assertEqual(payload["represented_count"], 0) + self.assertEqual(payload["stale_count"], 1) + self.assertEqual(payload["stale_raw"][0]["raw"], "raw/source.md") + self.assertTrue(payload["pending_raw"][0]["stale"]) + self.assertEqual(payload["pending_raw"][0]["stale_reason"], "raw changed after wiki source page") + self.assertEqual(payload["pending_raw"][0]["source_page_paths"], ["wiki/sources/custom-source.md"]) + self.assertEqual(payload["guidance"]["state"], "stale_raw") + self.assertEqual(payload["guidance"]["agent_prompt"], "re-ingest raw/source.md into Link") + self.assertEqual(payload["plan"]["title"], "Refresh stale source pages") + self.assertEqual(payload["plan"]["batch"][0]["target_source_page"], "wiki/sources/custom-source.md") + self.assertEqual(payload["completion"]["represented_count"], 0) + self.assertEqual(payload["completion"]["pending_count"], 1) + + def test_source_matches_by_raw_handles_special_characters_and_prefixes(self): + source_records = { + "alpha": { + "text": "`raw/source (v1)+.md`\n`raw/source (v1)+.md`\n`raw/source.md.backup`\n", + }, + "beta": { + "text": "No raw references here.", + }, + } + + matches = source_matches_by_raw( + source_records, + ["raw/source (v1)+.md", "raw/source.md", "raw/source.md.backup"], + ) + + self.assertEqual(matches["raw/source (v1)+.md"], ["alpha"]) + self.assertEqual(matches["raw/source.md"], []) + self.assertEqual(matches["raw/source.md.backup"], ["alpha"]) + + def test_source_matches_by_raw_compiles_one_pattern_per_chunk(self): + source_records = {"alpha": {"text": "`raw/a.md` `raw/d.md`"}} + raw_rels = ["raw/a.md", "raw/b.md", "raw/c.md", "raw/d.md"] + + with patch("link_core.ingest.re.compile", wraps=re.compile) as mocked_compile: + matches = source_matches_by_raw(source_records, raw_rels, chunk_size=1) + + self.assertEqual(mocked_compile.call_count, len(raw_rels)) + self.assertEqual(matches["raw/a.md"], ["alpha"]) + self.assertEqual(matches["raw/d.md"], ["alpha"]) + + def test_collect_ingest_status_warns_on_represented_secret_raw(self): + root = Path(tempfile.mkdtemp(prefix="link-ingest-core-")) + raw = root / "raw" + wiki = root / "wiki" + raw.mkdir() + (raw / "source.md").write_text( + "# Source\n\nHistorical token sk-" + ("a" * 25) + "\n", + encoding="utf-8", + ) + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + write_page( + wiki, + "sources/source.md", + "---\ntype: source\ntitle: Source\n---\n\n" + "# Source\n\n" + "## Raw Source\n\n`raw/source.md`\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + + payload = collect_ingest_status(root) + + self.assertEqual(payload["pending_count"], 0) + self.assertEqual(payload["represented_count"], 1) + self.assertEqual(payload["safety"]["status"], "warning") + self.assertEqual(payload["safety"]["blocked_count"], 0) + self.assertEqual(payload["safety"]["labels"], ["OpenAI API key"]) + self.assertEqual(payload["guidance"]["state"], "ready") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_installers.py b/tests/test_installers.py index 7fbe212..ac2032c 100644 --- a/tests/test_installers.py +++ b/tests/test_installers.py @@ -23,6 +23,23 @@ def test_scaffold_does_not_use_break_system_packages(self): self.assertNotIn("--break-system-packages", scaffold) self.assertIn(".link-mcp-venv", scaffold) self.assertIn(".link-mcp-python", scaffold) + self.assertIn("LINK_MCP_INSTALLED=false", scaffold) + self.assertIn('[ "$LINK_MCP_INSTALLED" = true ]', scaffold) + + def test_scaffold_installs_short_global_link_command(self): + scaffold = (ROOT / "integrations/_shared/scaffold.sh").read_text(encoding="utf-8") + + self.assertIn('LINK_CLI_BIN="$LINK_CLI_DIR/link"', scaffold) + self.assertIn("Link command wrapper", scaffold) + self.assertIn("not overwriting", scaffold) + self.assertIn("link status --validate", scaffold) + self.assertIn('if [ "$MODE" = "--project" ]', scaffold) + + def test_scaffold_project_mode_uses_absolute_target(self): + scaffold = (ROOT / "integrations/_shared/scaffold.sh").read_text(encoding="utf-8") + + self.assertIn('TARGET_DIR="$(pwd)"', scaffold) + self.assertNotIn('TARGET_DIR="."', scaffold) def test_installers_read_resolved_mcp_python_marker(self): for installer in INSTALLERS: @@ -31,6 +48,26 @@ def test_installers_read_resolved_mcp_python_marker(self): self.assertIn("MCP_PYTHON", text) self.assertIn(".link-mcp-python", text) + def test_installers_print_mode_specific_next_steps(self): + instructions = (ROOT / "integrations/_shared/instructions.sh").read_text(encoding="utf-8") + + self.assertIn("link_print_next_steps()", instructions) + self.assertIn('if [ "$mode" = "--project" ]; then', instructions) + self.assertIn("View wiki: python3 link.py serve", instructions) + self.assertIn("View wiki: link serve", instructions) + self.assertIn("Try in your agent:", instructions) + self.assertIn("is Link ready?", instructions) + self.assertIn("brief me from Link before we continue", instructions) + self.assertIn("ingest raw/ into Link", instructions) + self.assertIn("query Link for what you know about me", instructions) + self.assertIn("query Link for what this project remembers", instructions) + + for installer in INSTALLERS: + with self.subTest(installer=installer.name): + text = installer.read_text(encoding="utf-8") + self.assertIn('. "$SCRIPT_DIR/../_shared/instructions.sh"', text) + self.assertIn('link_print_next_steps "$MODE"', text) + def test_codex_and_kiro_update_existing_mcp_registration(self): codex = (ROOT / "integrations/codex/install.sh").read_text(encoding="utf-8") kiro = (ROOT / "integrations/kiro/install.sh").read_text(encoding="utf-8") diff --git a/tests/test_large_wiki_smoke.py b/tests/test_large_wiki_smoke.py new file mode 100644 index 0000000..2e6a881 --- /dev/null +++ b/tests/test_large_wiki_smoke.py @@ -0,0 +1,144 @@ +import json +import importlib.util +import sys +import tempfile +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.memory import memory_records # noqa: E402 +from link_core.query import query_link # noqa: E402 +from link_core.wiki import build_backlinks, build_wiki_cache, close_wiki_cache, graph_data # noqa: E402 + +SPEC = importlib.util.spec_from_file_location( + "smoke_large_wiki", ROOT / "scripts/smoke_large_wiki.py" +) +smoke_large_wiki = importlib.util.module_from_spec(SPEC) +assert SPEC.loader is not None +sys.modules[SPEC.name] = smoke_large_wiki +SPEC.loader.exec_module(smoke_large_wiki) + + +def write_page(wiki: Path, rel: str, text: str) -> None: + path = wiki / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +class LargeWikiSmokeTests(unittest.TestCase): + def test_smart_query_and_graph_handle_hundreds_of_pages(self): + root = Path(tempfile.mkdtemp(prefix="link-large-wiki-")) + wiki = root / "wiki" + wiki.mkdir() + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + + for index in range(12): + write_page( + wiki, + f"sources/source-{index}.md", + "---\n" + "type: source\n" + f"title: Source {index}\n" + "---\n\n" + f"# Source {index}\n\n" + f"> **TLDR:** Source {index} covers local agent memory topic {index}.\n\n" + "## Summary\n\n" + "Synthetic source for large-wiki smoke coverage.\n\n" + f"## Raw Source\n\n`raw/source-{index}.md`\n", + ) + + page_count = 260 + for index in range(page_count): + next_index = (index + 1) % page_count + source_index = index % 12 + write_page( + wiki, + f"concepts/topic-{index}.md", + "---\n" + "type: concept\n" + f"title: Topic {index} Agent Memory\n" + "tags: [agent-memory, large-wiki]\n" + "---\n\n" + f"# Topic {index} Agent Memory\n\n" + f"> **TLDR:** Topic {index} describes local agent memory behavior.\n\n" + "## Overview\n\n" + f"Topic {index} links to [[topic-{next_index}]] and [[source-{source_index}]]. " + "The repeated phrase keeps search realistic without requiring a full scan.\n\n" + "## Sources\n\n" + f"- [[source-{source_index}]]\n", + ) + + for index in range(16): + topic = 42 if index == 0 else index + write_page( + wiki, + f"memories/prefer-topic-{topic}.md", + "---\n" + "type: memory\n" + f"title: Prefer topic {topic}\n" + "memory_type: preference\n" + "scope: project\n" + "project: large-wiki\n" + "status: active\n" + "date_captured: \"2026-05-06T00:00:00Z\"\n" + "source: large-wiki-smoke\n" + "review_status: reviewed\n" + "---\n\n" + f"# Prefer topic {topic}\n\n" + f"> **TLDR:** User prefers topic {topic} local agent memory notes.\n\n" + f"## Memory\n\nUser prefers topic {topic} local agent memory notes.\n", + ) + + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki)), encoding="utf-8") + cache = build_wiki_cache(wiki) + + packet = query_link( + wiki, + "agent memory", + cache, + memory_records(wiki), + budget="small", + project="large-wiki", + ) + graph = graph_data(cache) + + self.assertEqual(len(cache["pages"]), page_count + 30) + self.assertTrue(packet["found"]) + self.assertLessEqual(len(packet["context_packet"]), 6) + self.assertTrue(packet["budget_report"]["wiki_search"]["has_more"]) + self.assertLess(packet["budget_report"]["context_packet"]["estimated_tokens"], 3000) + self.assertLessEqual(packet["memory"]["count"], 3) + self.assertEqual(packet["follow_up"][0]["tool"], "query_link") + self.assertEqual(len(graph["nodes"]), page_count + 30) + self.assertGreaterEqual(len(graph["edges"]), page_count) + close_wiki_cache(cache) + + def test_large_wiki_smoke_enforces_timing_thresholds(self): + smoke_large_wiki.check_timing_thresholds({"query": 0.01}, {"query": 0.02}) + + with self.assertRaisesRegex(smoke_large_wiki.SmokeFailure, "above 0.0200s threshold"): + smoke_large_wiki.check_timing_thresholds({"query": 0.03}, {"query": 0.02}) + + def test_large_wiki_smoke_reports_benchmark_health(self): + root = Path(tempfile.mkdtemp(prefix="link-large-wiki-health-")) + + payload = smoke_large_wiki.run_smoke(root, 80) + + self.assertEqual(payload["health"]["status"], "pass") + self.assertEqual(payload["health"]["label"], "interactive") + self.assertIn("thresholds_seconds", payload["health"]) + self.assertIn("graph_summary", payload["timings"]) + self.assertIn("page_list", payload["timings"]) + self.assertIn("graph_initial", payload["timings"]) + self.assertLessEqual(payload["graph_summary"]["returned_nodes"], 40) + self.assertEqual(payload["page_list"]["returned_count"], 100) + self.assertEqual(payload["graph_initial"]["mode"], "full") + self.assertEqual(payload["graph_initial"]["nodes"], payload["graph_initial"]["total_nodes"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_link_cli.py b/tests/test_link_cli.py index 3088fd4..9a2c10a 100644 --- a/tests/test_link_cli.py +++ b/tests/test_link_cli.py @@ -1,10 +1,15 @@ import importlib.util import json +import subprocess +import sys +import tarfile import tempfile +import time import unittest -from contextlib import redirect_stdout +from contextlib import redirect_stderr, redirect_stdout from io import StringIO from pathlib import Path +from unittest.mock import patch ROOT = Path(__file__).resolve().parents[1] @@ -16,127 +21,1736 @@ def create_demo_quiet(target: Path, force: bool = False) -> None: with redirect_stdout(StringIO()): - link_cli.create_demo(target, force=force) + code = link_cli.create_demo(target, force=force) + if code != 0: + raise AssertionError(f"create_demo failed with exit code {code}") class LinkCliTests(unittest.TestCase): + def test_init_creates_empty_wiki(self): + tmp = Path(tempfile.mkdtemp(prefix="link-init-test-")) + target = tmp / "my-link" + + out = StringIO() + with redirect_stdout(out): + code = link_cli.init_wiki(target) + + self.assertEqual(code, 0) + self.assertTrue((target / "serve.py").exists()) + self.assertTrue((target / "link.py").exists()) + self.assertTrue((target / "LINK.md").exists()) + self.assertTrue((target / "link_core/frontmatter.py").exists()) + self.assertTrue((target / "raw").is_dir()) + self.assertTrue((target / "wiki/index.md").exists()) + self.assertTrue((target / "wiki/log.md").exists()) + self.assertTrue((target / "wiki/_backlinks.json").exists()) + self.assertTrue((target / "wiki/_link_schema.json").exists()) + self.assertTrue((target / "wiki/sources").is_dir()) + self.assertTrue((target / "wiki/memories").is_dir()) + + backlinks = json.loads((target / "wiki/_backlinks.json").read_text(encoding="utf-8")) + self.assertIn("backlinks", backlinks) + self.assertIn("forward", backlinks) + self.assertIn("link status --validate", out.getvalue()) + self.assertIn("link serve", out.getvalue()) + + def test_init_preserves_existing_pages(self): + tmp = Path(tempfile.mkdtemp(prefix="link-init-test-")) + target = tmp / "my-link" + page = target / "wiki/concepts/custom.md" + page.parent.mkdir(parents=True) + page.write_text("# Custom\n", encoding="utf-8") + + with redirect_stdout(StringIO()): + code = link_cli.init_wiki(target) + + self.assertEqual(code, 0) + self.assertEqual(page.read_text(encoding="utf-8"), "# Custom\n") + + def test_init_copies_core_from_installed_runtime_layout(self): + tmp = Path(tempfile.mkdtemp(prefix="link-init-test-")) + runtime = tmp / "runtime" + runtime.mkdir() + for name in ("serve.py", "link.py", "LINK.md", ".linkignore"): + (runtime / name).write_text(f"# {name}\n", encoding="utf-8") + (runtime / "link_core").mkdir() + (runtime / "link_core/frontmatter.py").write_text("# core\n", encoding="utf-8") + target = tmp / "my-link" + + with patch.object(link_cli, "ROOT", runtime), redirect_stdout(StringIO()): + code = link_cli.init_wiki(target) + + self.assertEqual(code, 0) + self.assertTrue((target / "link_core/frontmatter.py").exists()) + + def test_prompts_prints_first_run_agent_prompts(self): + tmp = Path(tempfile.mkdtemp(prefix="link-prompts-test-")) + target = tmp / "my-link" + + out = StringIO() + with redirect_stdout(out): + code = link_cli.starter_prompts(target) + + self.assertEqual(code, 0) + self.assertIn("Link starter prompts:", out.getvalue()) + self.assertIn("is Link ready?", out.getvalue()) + self.assertIn("brief me from Link before we continue", out.getvalue()) + self.assertIn("remember that I prefer local-first agent memory", out.getvalue()) + self.assertIn("query Link for what you know about me", out.getvalue()) + self.assertIn("propose memories from raw/", out.getvalue()) + self.assertIn("link status --validate", out.getvalue()) + + def test_prompts_json_supports_project_examples(self): + tmp = Path(tempfile.mkdtemp(prefix="link-prompts-test-")) + target = tmp / "my-link" + + out = StringIO() + with redirect_stdout(out): + code = link_cli.starter_prompts(target, project="Client Launch", json_output=True) + payload = json.loads(out.getvalue()) + + self.assertEqual(code, 0) + self.assertEqual(payload["project"], "client-launch") + self.assertIn("this project uses Link", payload["prompts"][2]["prompt"]) + self.assertIn("what this project remembers", payload["prompts"][3]["prompt"]) + + def test_serve_runs_target_viewer(self): + tmp = Path(tempfile.mkdtemp(prefix="link-serve-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + with patch.object(link_cli.subprocess, "run") as run: + run.return_value.returncode = 0 + code = link_cli.serve_wiki(target, port=3010) + + self.assertEqual(code, 0) + run.assert_called_once_with([ + sys.executable, + str(link_cli.ROOT / "serve.py"), + "--root", + str(target.resolve()), + "--port", + "3010", + ]) + + def test_serve_reports_missing_wiki(self): + tmp = Path(tempfile.mkdtemp(prefix="link-serve-test-")) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.serve_wiki(tmp / "missing") + + self.assertEqual(code, 1) + self.assertIn("Link wiki missing", out.getvalue()) + self.assertIn("link init", out.getvalue()) + + def test_serve_validates_port_before_spawning_viewer(self): + tmp = Path(tempfile.mkdtemp(prefix="link-serve-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with patch.object(link_cli.subprocess, "run") as run, redirect_stdout(out): + code = link_cli.serve_wiki(target, port=70000) + + self.assertEqual(code, 1) + run.assert_not_called() + self.assertIn("--port must be between 1 and 65535", out.getvalue()) + + def test_serve_handles_ctrl_c_without_traceback(self): + tmp = Path(tempfile.mkdtemp(prefix="link-serve-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + with patch.object(link_cli.subprocess, "run", side_effect=KeyboardInterrupt): + code = link_cli.serve_wiki(target, port=3010) + + self.assertEqual(code, 130) + def test_demo_creates_preingested_wiki(self): tmp = Path(tempfile.mkdtemp(prefix="link-demo-test-")) target = tmp / "demo" create_demo_quiet(target) - self.assertTrue((target / ".link-demo").exists()) - self.assertTrue((target / "serve.py").exists()) - self.assertTrue((target / "link.py").exists()) - self.assertTrue((target / "LINK.md").exists()) - self.assertTrue((target / "raw/agent-memory-session.md").exists()) - self.assertTrue((target / "wiki/concepts/agent-memory.md").exists()) - self.assertTrue((target / "wiki/entities/link.md").exists()) + self.assertTrue((target / ".link-demo").exists()) + self.assertTrue((target / "serve.py").exists()) + self.assertTrue((target / "link.py").exists()) + self.assertTrue((target / "link_core/frontmatter.py").exists()) + self.assertTrue((target / "link_core/memory.py").exists()) + self.assertTrue((target / "LINK.md").exists()) + self.assertTrue((target / "START_HERE.md").exists()) + self.assertTrue((target / "raw/agent-memory-session.md").exists()) + self.assertTrue((target / "wiki/concepts/agent-memory.md").exists()) + self.assertTrue((target / "wiki/entities/link.md").exists()) + self.assertTrue((target / "wiki/_link_schema.json").exists()) + guide = (target / "START_HERE.md").read_text(encoding="utf-8") + self.assertIn("query Link for why Link helps agents", guide) + self.assertIn('python3 link.py query "why does Link help agents?" . --budget small', guide) + + backlinks = json.loads((target / "wiki/_backlinks.json").read_text(encoding="utf-8")) + self.assertIn("backlinks", backlinks) + self.assertIn("forward", backlinks) + self.assertIn("agent-memory", backlinks["backlinks"]) + self.assertIn("link", backlinks["backlinks"]) + self.assertIn("agent-memory", backlinks["forward"]["link"]) + + def test_demo_refuses_to_overwrite_non_demo_directory(self): + tmp = Path(tempfile.mkdtemp(prefix="link-demo-test-")) + target = tmp / "not-demo" + target.mkdir() + (target / "keep.txt").write_text("do not replace", encoding="utf-8") + + err = StringIO() + with redirect_stderr(err): + code = link_cli.create_demo(target, force=True) + + self.assertEqual(code, 1) + self.assertIn("refusing to overwrite", err.getvalue()) + self.assertEqual((target / "keep.txt").read_text(encoding="utf-8"), "do not replace") + + def test_demo_force_replaces_demo_directory(self): + tmp = Path(tempfile.mkdtemp(prefix="link-demo-test-")) + target = tmp / "demo" + + create_demo_quiet(target) + (target / "extra.txt").write_text("old", encoding="utf-8") + create_demo_quiet(target, force=True) + + self.assertFalse((target / "extra.txt").exists()) + self.assertTrue((target / "wiki/index.md").exists()) + + def test_doctor_accepts_demo_wiki(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.doctor(target) + + self.assertEqual(code, 0) + self.assertIn("Result: healthy", out.getvalue()) + self.assertIn("OK wiki pages have summaries", out.getvalue()) + self.assertIn("OK source-backed pages cite sources", out.getvalue()) + self.assertIn("OK no sensitive-looking file contents", out.getvalue()) + self.assertIn("memories need review", out.getvalue()) + + def test_ingest_status_accepts_demo_wiki(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target) + + self.assertEqual(code, 0) + self.assertIn("Raw files: 3", out.getvalue()) + self.assertIn("Pending ingest: 0", out.getvalue()) + self.assertIn("Backlinks: current", out.getvalue()) + + def test_capture_session_is_not_pending_source_ingest(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.capture_session( + target, + "Remember that capture notes are proposal-only memory backlog.", + title="Capture backlog", + json_output=True, + ) + + ingest_out = StringIO() + with redirect_stdout(ingest_out): + ingest_code = link_cli.ingest_status(target, json_output=True) + ingest = json.loads(ingest_out.getvalue()) + + doctor_out = StringIO() + with redirect_stdout(doctor_out): + doctor_code = link_cli.doctor(target) + + self.assertEqual(ingest_code, 0) + self.assertEqual(ingest["raw_count"], 3) + self.assertEqual(ingest["pending_count"], 0) + self.assertEqual(doctor_code, 0) + self.assertIn("raw memory captures pending review: 1", doctor_out.getvalue()) + self.assertNotIn("raw files not referenced by wiki pages", doctor_out.getvalue()) + + def test_ingest_status_reports_pending_raw_file(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "raw/new-source.md").write_text("# New source\n", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target) + + self.assertEqual(code, 0) + self.assertIn("Pending ingest: 1", out.getvalue()) + self.assertIn("Safety: clear (No secret-looking values detected in raw sources.)", out.getvalue()) + self.assertIn("raw/new-source.md", out.getvalue()) + self.assertIn("Guidance: 1 raw file needs ingest.", out.getvalue()) + self.assertIn("Ask your agent: ingest raw/new-source.md into Link", out.getvalue()) + self.assertIn("Run: link validate", out.getvalue()) + self.assertIn("Suggested workflow: Ingest pending raw sources", out.getvalue()) + self.assertIn("Memory review: propose memories from raw/new-source.md", out.getvalue()) + self.assertIn("raw/new-source.md -> wiki/sources/new-source.md", out.getvalue()) + self.assertIn("Post-ingest checks:", out.getvalue()) + self.assertIn("link status --validate", out.getvalue()) + + def test_ingest_status_reports_represented_completion(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target) + + text = out.getvalue() + self.assertEqual(code, 0) + self.assertIn("Pending ingest: 0", text) + self.assertIn("Ingest completion: All 3 raw source(s) are represented", text) + self.assertIn("raw/agent-memory-session.md -> wiki/sources/agent-memory-session.md", text) + self.assertIn("Memory review: propose memories from raw/agent-memory-session.md", text) + self.assertIn("Retrieval check: query Link for agent memory session", text) + self.assertIn("Next check: brief me from Link before we continue", text) + + def test_ingest_status_reports_stale_represented_raw_file(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + raw_page = target / "raw/agent-memory-session.md" + time.sleep(0.02) + raw_page.write_text("# Agent memory session\n\nUpdated after ingest.\n", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target) + + text = out.getvalue() + self.assertEqual(code, 0) + self.assertIn("Represented in wiki/sources: 2", text) + self.assertIn("Pending ingest: 1", text) + self.assertIn("Stale represented raw: 1", text) + self.assertIn( + "raw/agent-memory-session.md [refresh source page: raw changed after wiki source page]", + text, + ) + self.assertIn("Guidance: 1 represented raw file changed after its source page was written.", text) + self.assertIn("Ask your agent: re-ingest raw/agent-memory-session.md into Link", text) + self.assertIn("Suggested workflow: Refresh stale source pages", text) + self.assertIn("raw/agent-memory-session.md -> wiki/sources/agent-memory-session.md", text) + + def test_ingest_status_warns_before_secret_raw_ingest(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "raw/secret-note.md").write_text( + "# Secret note\n\nDo not ingest sk-" + ("a" * 25) + "\n", + encoding="utf-8", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target) + + self.assertEqual(code, 0) + self.assertIn("Pending ingest: 1", out.getvalue()) + self.assertIn("Safety: blocked (1 pending raw file needs redaction before ingest.)", out.getvalue()) + self.assertIn("raw/secret-note.md [redact before ingest: OpenAI API key]", out.getvalue()) + self.assertIn("Guidance: 1 pending raw file contains secret-looking values.", out.getvalue()) + self.assertIn("Suggested workflow: Redact raw sources before ingest", out.getvalue()) + self.assertNotIn("Ask your agent: ingest raw/secret-note.md into Link", out.getvalue()) + + def test_ingest_status_blocks_unreadable_raw_ingest(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "raw/locked-note.md").write_text("# Locked note\n", encoding="utf-8") + + out = StringIO() + with ( + patch( + "link_core.ingest.secret_file_scan", + return_value={"labels": [], "readable": False, "error": "permission denied"}, + ), + redirect_stdout(out), + ): + code = link_cli.ingest_status(target) + + text = out.getvalue() + self.assertEqual(code, 0) + self.assertIn("Pending ingest: 1", text) + self.assertIn("Safety: blocked (1 pending raw file could not be inspected before ingest.)", text) + self.assertIn("raw/locked-note.md [fix access before ingest: permission denied]", text) + self.assertIn("Guidance: 1 pending raw file could not be inspected.", text) + self.assertIn("Suggested workflow: Inspect raw source access", text) + self.assertNotIn("Ask your agent: ingest raw/locked-note.md into Link", text) + + def test_ingest_status_blocks_unreadable_source_pages(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "wiki/sources/broken.md").write_text( + "---\ntype: source\ntitle: Broken\n---\n\n`raw/agent-memory-session.md`\n", + encoding="utf-8", + ) + original_read_text = Path.read_text + + def read_text(path: Path, *args: object, **kwargs: object) -> str: + if path.name == "broken.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + out = StringIO() + with patch.object(Path, "read_text", read_text), redirect_stdout(out): + code = link_cli.ingest_status(target) + + text = out.getvalue() + self.assertEqual(code, 0) + self.assertIn("Source page read warnings: 1", text) + self.assertIn("wiki/sources/broken.md [fix access: permission denied]", text) + self.assertIn("Guidance: 1 source page could not be inspected.", text) + self.assertIn("Suggested workflow: Inspect source page access", text) + self.assertNotIn("Ask your agent:", text) + + def test_ingest_status_json(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "raw/new-source.md").write_text("# New source\n", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target, json_output=True) + + data = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(data["raw_count"], 4) + self.assertEqual(data["pending_count"], 1) + self.assertEqual(data["pending_raw"][0]["raw"], "raw/new-source.md") + self.assertEqual(data["guidance"]["state"], "pending_raw") + self.assertEqual(data["guidance"]["agent_prompt"], "ingest raw/new-source.md into Link") + self.assertEqual(data["plan"]["batch"][0]["suggested_source_page"], "wiki/sources/new-source.md") + + def test_ingest_status_reports_stale_backlinks(self): + tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + target = tmp / "demo" + create_demo_quiet(target) + backlinks_path = target / "wiki/_backlinks.json" + backlinks_path.write_text(json.dumps({"backlinks": {}, "forward": {}}), encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.ingest_status(target) + + self.assertEqual(code, 0) + self.assertIn("Backlinks: stale", out.getvalue()) + self.assertIn("Guidance: Raw files are represented, but the graph index needs repair.", out.getvalue()) + self.assertIn("Run: link rebuild-backlinks", out.getvalue()) + + def test_status_reports_demo_readiness(self): + tmp = Path(tempfile.mkdtemp(prefix="link-status-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.status(target, include_validation=True) + + self.assertEqual(code, 0) + self.assertIn(f"Version: {link_cli.LINK_VERSION}", out.getvalue()) + self.assertIn("Ready: yes", out.getvalue()) + self.assertIn("Content pages:", out.getvalue()) + self.assertIn("Schema: current", out.getvalue()) + self.assertIn("Search backend:", out.getvalue()) + self.assertIn("Validation: passed", out.getvalue()) + self.assertIn("query_link", out.getvalue()) + + json_out = StringIO() + with redirect_stdout(json_out): + json_code = link_cli.status(target, include_validation=True, json_output=True) + self.assertEqual(json_code, 0) + status_payload = json.loads(json_out.getvalue()) + self.assertEqual(status_payload["version"], link_cli.LINK_VERSION) + self.assertGreater(status_payload["content_page_count"], 0) + + def test_status_guides_empty_initialized_wiki_to_ingest(self): + tmp = Path(tempfile.mkdtemp(prefix="link-status-test-")) + target = tmp / "my-link" + with redirect_stdout(StringIO()): + self.assertEqual(link_cli.init_wiki(target), 0) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.status(target) + + self.assertEqual(code, 0) + text = out.getvalue() + self.assertIn("Ready: yes", text) + self.assertIn("Content pages: 0", text) + self.assertIn("ingest_status", text) + self.assertIn("starter_prompts", text) + + json_out = StringIO() + with redirect_stdout(json_out): + json_code = link_cli.status(target, json_output=True) + payload = json.loads(json_out.getvalue()) + self.assertEqual(json_code, 0) + self.assertEqual(payload["content_page_count"], 0) + self.assertEqual(payload["next_actions"][0]["tool"], "ingest_status") + self.assertEqual(payload["next_actions"][1]["tool"], "starter_prompts") + + def test_status_prints_readiness_warnings(self): + tmp = Path(tempfile.mkdtemp(prefix="link-status-test-")) + target = tmp / "my-link" + payload = { + "ready": False, + "version": link_cli.LINK_VERSION, + "wiki": str(target / "wiki"), + "missing": [], + "page_count": 0, + "content_page_count": 0, + "memory_count": 0, + "active_memory_count": 0, + "needs_review_count": 0, + "search_backend": "unavailable", + "schema": {"status": "current", "version": 1}, + "validation": {"checked": False}, + "warnings": [{ + "code": "cache_unavailable", + "message": "Could not build the wiki page cache.", + "detail": "cache failed", + }], + "next_actions": [{"tool": "validate_wiki", "label": "inspect wiki health", "arguments": {}}], + } + + out = StringIO() + with patch.object(link_cli, "_core_link_status", return_value=payload), redirect_stdout(out): + code = link_cli.status(target) + + self.assertEqual(code, 1) + self.assertIn("Warnings:", out.getvalue()) + self.assertIn("cache_unavailable", out.getvalue()) + self.assertIn("cache failed", out.getvalue()) + + def test_main_prints_version(self): + out = StringIO() + + with redirect_stdout(out), self.assertRaises(SystemExit) as cm: + link_cli.main(["--version"]) + + self.assertEqual(cm.exception.code, 0) + self.assertIn(f"Link {link_cli.LINK_VERSION}", out.getvalue()) + + def test_backup_creates_local_archive_without_raw_by_default(self): + tmp = Path(tempfile.mkdtemp(prefix="link-backup-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "raw/private-note.md").write_text("secret source", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.backup(target, label="cli test") + + self.assertEqual(code, 0) + self.assertIn("Link backup created:", out.getvalue()) + archives = list((target / ".link-backups").glob("*.tar.gz")) + self.assertEqual(len(archives), 1) + with tarfile.open(archives[0], "r:gz") as tar: + names = set(tar.getnames()) + self.assertIn("wiki/index.md", names) + self.assertNotIn("raw/private-note.md", names) + + def test_backup_json_can_include_raw_and_list_archives(self): + tmp = Path(tempfile.mkdtemp(prefix="link-backup-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "raw/private-note.md").write_text("source", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.backup(target, label="with raw", include_raw=True, json_output=True) + payload = json.loads(out.getvalue()) + + list_out = StringIO() + with redirect_stdout(list_out): + list_code = link_cli.backup(target, list_only=True, json_output=True) + listing = json.loads(list_out.getvalue()) + + self.assertEqual(code, 0) + self.assertEqual(list_code, 0) + self.assertIn("raw", payload["included"]) + self.assertEqual(listing["count"], 1) + self.assertEqual(listing["backups"][0]["name"], payload["name"]) + + def test_backup_reports_controlled_error_on_archive_failure(self): + tmp = Path(tempfile.mkdtemp(prefix="link-backup-test-")) + target = tmp / "demo" + create_demo_quiet(target) + original_add = tarfile.TarFile.add + + def flaky_add(tar, name, *args, **kwargs): + if Path(name).name == "agent-memory.md": + raise OSError("permission denied") + return original_add(tar, name, *args, **kwargs) + + out = StringIO() + with patch.object(tarfile.TarFile, "add", flaky_add): + with redirect_stdout(out): + code = link_cli.backup(target, label="partial", json_output=True) + payload = json.loads(out.getvalue()) + + self.assertEqual(code, 1) + self.assertFalse(payload["created"]) + self.assertIn("backup failed", payload["error"]) + self.assertEqual(list((target / ".link-backups").glob("*.tar.gz")), []) + + def test_backup_list_reports_unreadable_archive_warning(self): + tmp = Path(tempfile.mkdtemp(prefix="link-backup-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.backup(target, label="warning source") + archive = next((target / ".link-backups").glob("*.tar.gz")) + original_stat = Path.stat + + def flaky_stat(path: Path, *args, **kwargs): + if path.name == archive.name: + raise OSError("permission denied") + return original_stat(path, *args, **kwargs) + + json_out = StringIO() + text_out = StringIO() + with patch.object(Path, "stat", flaky_stat): + with redirect_stdout(json_out): + json_code = link_cli.backup(target, list_only=True, json_output=True) + with redirect_stdout(text_out): + text_code = link_cli.backup(target, list_only=True) + payload = json.loads(json_out.getvalue()) + + self.assertEqual(json_code, 0) + self.assertEqual(text_code, 0) + self.assertEqual(payload["warning_count"], 1) + self.assertIn("could not read backup", text_out.getvalue()) + + def test_migrate_repairs_schema_marker(self): + tmp = Path(tempfile.mkdtemp(prefix="link-migrate-test-")) + target = tmp / "demo" + create_demo_quiet(target) + (target / "wiki/_link_schema.json").unlink() + + out = StringIO() + with redirect_stdout(out): + code = link_cli.migrate(target) + + self.assertEqual(code, 0) + self.assertTrue((target / "wiki/_link_schema.json").exists()) + self.assertIn("Previous schema: missing", out.getvalue()) + self.assertIn("Result: current", out.getvalue()) + + def test_migrate_json_reports_current_schema(self): + tmp = Path(tempfile.mkdtemp(prefix="link-migrate-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.migrate(target, json_output=True) + payload = json.loads(out.getvalue()) + + self.assertEqual(code, 0) + self.assertEqual(payload["schema"]["status"], "current") + self.assertFalse(payload["migrated"]) + + def test_status_json_reports_missing_structure(self): + tmp = Path(tempfile.mkdtemp(prefix="link-status-test-")) + target = tmp / "empty" + + out = StringIO() + with redirect_stdout(out): + code = link_cli.status(target, include_validation=True, json_output=True) + payload = json.loads(out.getvalue()) + + self.assertEqual(code, 1) + self.assertFalse(payload["ready"]) + self.assertIn("wiki", payload["missing"]) + self.assertEqual(payload["search_backend"], "unavailable") + + def test_validate_accepts_demo_wiki(self): + tmp = Path(tempfile.mkdtemp(prefix="link-validate-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.validate(target) + + self.assertEqual(code, 0) + self.assertIn("OK wiki pages satisfy the ingest validation gate", out.getvalue()) + self.assertIn("Result: passed", out.getvalue()) + + def test_validate_reports_agent_format_errors_as_json(self): + tmp = Path(tempfile.mkdtemp(prefix="link-validate-test-")) + target = tmp / "demo" + create_demo_quiet(target) + page = target / "wiki/concepts/agent-memory.md" + page.write_text( + page.read_text(encoding="utf-8") + .replace("type: concept", "type: source", 1) + .replace("## Sources", "## References", 1), + encoding="utf-8", + ) + with redirect_stdout(StringIO()): + link_cli.rebuild_backlinks(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.validate(target, json_output=True) + payload = json.loads(out.getvalue()) + codes = {finding["code"] for finding in payload["findings"]} + + self.assertEqual(code, 1) + self.assertFalse(payload["passed"]) + self.assertIn("type_directory_mismatch", codes) + self.assertIn("missing_required_section", codes) + self.assertNotIn("stale_backlinks", codes) + + def test_validate_strict_fails_on_warnings(self): + tmp = Path(tempfile.mkdtemp(prefix="link-validate-test-")) + target = tmp / "demo" + create_demo_quiet(target) + page = target / "wiki/concepts/agent-memory.md" + page.write_text( + page.read_text(encoding="utf-8").replace("> **TLDR:**", "> **Summary:**", 1), + encoding="utf-8", + ) + + normal_out = StringIO() + with redirect_stdout(normal_out): + normal_code = link_cli.validate(target) + + strict_out = StringIO() + with redirect_stdout(strict_out): + strict_code = link_cli.validate(target, strict=True) + + self.assertEqual(normal_code, 0) + self.assertEqual(strict_code, 1) + self.assertIn("WARNING", strict_out.getvalue()) + self.assertIn("Result: failed (0 errors, 1 warnings)", strict_out.getvalue()) + + def test_remember_creates_memory_page_and_updates_backlinks(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + tags="git, release", + source="unit test", + ) + + memory_path = target / "wiki/memories/prefer-release-branches.md" + backlinks = json.loads((target / "wiki/_backlinks.json").read_text(encoding="utf-8")) + index_text = (target / "wiki/index.md").read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + + self.assertEqual(code, 0) + self.assertTrue(memory_path.exists()) + self.assertIn("memory_type: preference", memory_path.read_text(encoding="utf-8")) + self.assertIn("[[prefer-release-branches]]", index_text) + self.assertIn("Created: memories/prefer-release-branches.md", log_text) + self.assertIn("prefer-release-branches", backlinks["backlinks"]) + self.assertIn("Memory saved", out.getvalue()) + + def test_remember_blocks_strong_duplicate_by_default(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + ) + + duplicate_out = StringIO() + with redirect_stdout(duplicate_out): + duplicate_code = link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + json_output=True, + ) + duplicate = json.loads(duplicate_out.getvalue()) + + override_out = StringIO() + with redirect_stdout(override_out): + override_code = link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + allow_duplicate=True, + json_output=True, + ) + override = json.loads(override_out.getvalue()) + + self.assertEqual(duplicate_code, 0) + self.assertFalse(duplicate["created"]) + self.assertTrue(duplicate["duplicate"]) + self.assertEqual(duplicate["candidates"][0]["name"], "prefer-release-branches") + self.assertEqual(override_code, 0) + self.assertTrue(override["created"]) + self.assertTrue(override["duplicate_override"]) + self.assertEqual(override["name"], "prefer-release-branches-2") + + def test_remember_blocks_conflict_by_default(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + ) + + conflict_out = StringIO() + with redirect_stdout(conflict_out): + conflict_code = link_cli.remember( + target, + "User prefers develop branches for Link work.", + title="Prefer develop branches", + memory_type="preference", + scope="project", + ) + + self.assertEqual(conflict_code, 0) + self.assertIn("Possible conflicting memory found", conflict_out.getvalue()) + self.assertIn("Prefer release branches", conflict_out.getvalue()) + self.assertFalse((target / "wiki/memories/prefer-develop-branches.md").exists()) + + def test_update_memory_merges_text_and_resets_review(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.review_memory(target, "prefer-local-personal-memory", note="confirmed") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.update_memory( + target, + "prefer-local-personal-memory", + "Also prefer updating existing memories instead of creating duplicates.", + source="unit test", + json_output=True, + ) + payload = json.loads(out.getvalue()) + memory_text = (target / "wiki/memories/prefer-local-personal-memory.md").read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + backlinks = json.loads((target / "wiki/_backlinks.json").read_text(encoding="utf-8")) + + self.assertEqual(code, 0) + self.assertTrue(payload["updated"]) + self.assertEqual(payload["previous_review_status"], "reviewed") + self.assertEqual(payload["review_status"], "pending") + self.assertEqual(payload["update_count"], 1) + self.assertIn("updated_at:", memory_text) + self.assertIn("update_count: 1", memory_text) + self.assertIn('last_update_source: "unit test"', memory_text) + self.assertIn("review_status: pending", memory_text) + self.assertNotIn("reviewed_at:", memory_text) + self.assertIn("Update (", memory_text) + self.assertIn("instead of creating duplicates", memory_text) + self.assertIn("update-memory", log_text) + self.assertIn("prefer-local-personal-memory", backlinks["backlinks"]["link"]) + + def test_propose_memories_from_session_note_without_writing(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + ) + session_note = tmp / "session.md" + session_note.write_text( + "\n".join([ + "- I prefer release branches for Link work.", + "- We decided to keep Memory Mode local and source-backed.", + "- Maybe we could add cloud sync later.", + ]), + encoding="utf-8", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.propose_memories(target, str(session_note), json_output=True) + payload = json.loads(out.getvalue()) + + self.assertEqual(code, 0) + self.assertEqual(payload["count"], 2) + self.assertGreaterEqual(payload["skipped_count"], 1) + self.assertEqual(payload["proposals"][0]["memory_type"], "preference") + self.assertEqual(payload["proposals"][0]["suggested_action"], "update-memory") + self.assertEqual(payload["proposals"][0]["primary_action"]["kind"], "update") + self.assertEqual(payload["proposals"][0]["duplicate_candidates"][0]["name"], "prefer-release-branches") + self.assertEqual(payload["proposals"][1]["memory_type"], "decision") + self.assertEqual(payload["proposals"][1]["scope"], "project") + self.assertEqual(payload["proposals"][1]["primary_action"]["kind"], "remember") + self.assertFalse((target / "wiki/memories/decision-keep-memory-mode-local.md").exists()) + + def test_recall_finds_memory_pages(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember( + target, + "User prefers release branches for Link work.", + title="Prefer release branches", + memory_type="preference", + scope="project", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.recall(target, "release branches") + + self.assertEqual(code, 0) + self.assertIn("Prefer release branches", out.getvalue()) + self.assertIn("wiki/memories/prefer-release-branches.md", out.getvalue()) + self.assertIn("Recall: needs_review", out.getvalue()) + + def test_recall_json(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember(target, "User likes local first memory.", title="Local memory preference") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.recall(target, "local memory", json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(payload["count"], 2) + self.assertEqual(payload["memories"][0]["name"], "local-memory-preference") + self.assertEqual(payload["memories"][0]["recall"]["state"], "needs_review") + self.assertEqual(payload["memories"][0]["review_issue_count"], 1) + + def test_recall_json_filters_project(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember( + target, + "Project uses alpha API for imports.", + title="Alpha API imports", + memory_type="project", + scope="project", + project="alpha", + ) + link_cli.remember( + target, + "Project uses beta API for imports.", + title="Beta API imports", + memory_type="project", + scope="project", + project="beta", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.recall(target, "API imports", project="alpha", json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(payload["project"], "alpha") + self.assertEqual([item["name"] for item in payload["memories"]], ["alpha-api-imports"]) + + def test_profile_summarizes_memories(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.remember( + target, + "User decided to keep Memory Mode local.", + title="Keep Memory Mode local", + memory_type="decision", + scope="project", + tags="product", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.profile(target) + + self.assertEqual(code, 0) + self.assertIn("Link memory profile", out.getvalue()) + self.assertIn("2 memories", out.getvalue()) + self.assertIn("preference: 1", out.getvalue()) + self.assertIn("decision: 1", out.getvalue()) + self.assertIn("Keep Memory Mode local", out.getvalue()) + + def test_profile_json(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.profile(target, json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(payload["memory_count"], 1) + self.assertEqual(payload["by_type"]["preference"], 1) + self.assertEqual(payload["preferences"][0]["name"], "prefer-local-personal-memory") + self.assertEqual(payload["review_count"], 1) + + def test_brief_primes_agent_memory(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.brief(target, "local personal memory") + + self.assertEqual(code, 0) + self.assertIn("Link memory brief: local personal memory", out.getvalue()) + self.assertIn("Prefer local personal memory", out.getvalue()) + self.assertIn("Agent guidance", out.getvalue()) + + def test_brief_json(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.brief(target, "local personal memory", json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(payload["selection"], "query") + self.assertEqual(payload["profile"]["memory_count"], 1) + self.assertEqual(payload["captures"]["count"], 0) + self.assertEqual(payload["relevant_memories"][0]["name"], "prefer-local-personal-memory") + self.assertNotIn("body", payload["relevant_memories"][0]) + + def test_query_builds_context_packet(self): + tmp = Path(tempfile.mkdtemp(prefix="link-query-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.query(target, "agent memory", budget="small", json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertTrue(payload["found"]) + self.assertEqual(payload["budget"], "small") + self.assertIn("memory", payload["strategy"]["mode"]) + self.assertEqual(payload["wiki"]["primary"], "agent-memory") + self.assertEqual(payload["memory"]["items"][0]["name"], "prefer-local-personal-memory") + self.assertIn("context_packet", payload) + + def test_agent_facing_cli_queries_are_bounded(self): + tmp = Path(tempfile.mkdtemp(prefix="link-query-test-")) + target = tmp / "demo" + create_demo_quiet(target) + long_query = "agent memory " + ("memory " * 200) + + query_out = StringIO() + with redirect_stdout(query_out): + query_code = link_cli.query(target, long_query, budget="small", json_output=True) + graph_out = StringIO() + with redirect_stdout(graph_out): + graph_code = link_cli.graph_summary(target, long_query, json_output=True) + brief_out = StringIO() + with redirect_stdout(brief_out): + brief_code = link_cli.brief(target, long_query, json_output=True) + benchmark_out = StringIO() + with redirect_stdout(benchmark_out): + benchmark_code = link_cli.benchmark(target, long_query, json_output=True) + + self.assertEqual(query_code, 0) + self.assertEqual(graph_code, 0) + self.assertEqual(brief_code, 0) + self.assertEqual(benchmark_code, 0) + self.assertLessEqual(len(json.loads(query_out.getvalue())["query"]), 500) + self.assertLessEqual(len(json.loads(graph_out.getvalue())["topic"]), 500) + self.assertLessEqual(len(json.loads(brief_out.getvalue())["query"]), 500) + self.assertLessEqual(len(json.loads(benchmark_out.getvalue())["query"]), 500) + + def test_graph_summary_reports_bounded_context(self): + tmp = Path(tempfile.mkdtemp(prefix="link-graph-summary-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.graph_summary(target, "agent memory", limit=5, depth=1, max_edges=10, json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(payload["mode"], "topic-neighborhood") + self.assertLessEqual(payload["returned_nodes"], 5) + self.assertIn("agent-memory", {node["id"] for node in payload["nodes"]}) + + def test_benchmark_reports_local_query_timings(self): + tmp = Path(tempfile.mkdtemp(prefix="link-benchmark-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.benchmark(target, "agent memory", budget="small", json_output=True) + + payload = json.loads(out.getvalue()) + self.assertEqual(code, 0) + self.assertEqual(payload["query"], "agent memory") + self.assertTrue(payload["found"]) + self.assertGreaterEqual(payload["pages"], 1) + self.assertGreaterEqual(payload["memories"], 1) + self.assertGreaterEqual(payload["edges"], 1) + self.assertIn(payload["search_backend"], {"sqlite-fts", "token-index"}) + self.assertEqual(payload["budget"], "small") + self.assertIn("cache", payload["timings"]) + self.assertIn("search", payload["timings"]) + self.assertIn("query", payload["timings"]) + self.assertIn("graph_summary", payload["timings"]) + self.assertIn("page_list", payload["timings"]) + self.assertIn("graph_initial", payload["timings"]) + self.assertIn("graph", payload["timings"]) + self.assertGreaterEqual(payload["graph_summary"]["returned_nodes"], 1) + self.assertGreaterEqual(payload["page_list"]["returned_count"], 1) + self.assertEqual(payload["graph_initial"]["mode"], "full") + self.assertGreaterEqual(payload["graph_initial"]["nodes"], 1) + self.assertGreater(payload["budget_report"]["context_packet"]["estimated_chars"], 0) + self.assertEqual(payload["health"]["status"], "pass") + self.assertEqual(payload["health"]["label"], "interactive") + self.assertIn("interactive local agent memory", payload["health"]["summary"]) + self.assertIn("search", payload["health"]["thresholds_seconds"]) + self.assertIn("graph_summary", payload["health"]["thresholds_seconds"]) + self.assertIn("graph_initial", payload["health"]["thresholds_seconds"]) + + text_out = StringIO() + with redirect_stdout(text_out): + text_code = link_cli.benchmark(target, "agent memory", budget="small") + + self.assertEqual(text_code, 0) + self.assertIn("Verdict: interactive", text_out.getvalue()) + self.assertIn("Agent-safe payloads:", text_out.getvalue()) + self.assertIn("Graph page initial load:", text_out.getvalue()) + self.assertIn("Health: Ready for interactive local agent memory.", text_out.getvalue()) + + def test_brief_surfaces_saved_captures_without_secret_values(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + fake_key = "sk-" + ("F" * 24) + with redirect_stdout(StringIO()): + link_cli.capture_session( + target, + f"Remember that brief should surface capture review. Test key {fake_key}", + title="Brief capture", + project="alpha", + json_output=True, + ) + + json_out = StringIO() + with redirect_stdout(json_out): + json_code = link_cli.brief(target, "capture review", project="alpha", json_output=True) + payload = json.loads(json_out.getvalue()) + + text_out = StringIO() + with redirect_stdout(text_out): + text_code = link_cli.brief(target, "capture review", project="alpha") + + self.assertEqual(json_code, 0) + self.assertEqual(text_code, 0) + self.assertEqual(payload["captures"]["project"], "alpha") + self.assertEqual(payload["captures"]["count"], 1) + self.assertEqual(payload["captures"]["warning_count"], 1) + self.assertIn("[redacted-secret]", payload["captures"]["items"][0]["snippet"]) + self.assertIn("capture-inbox", payload["captures"]["next_action"]) + self.assertIn("Redact raw captures", "\n".join(payload["agent_guidance"])) + self.assertNotIn(fake_key, json_out.getvalue()) + self.assertIn("Raw captures", text_out.getvalue()) + self.assertNotIn(fake_key, text_out.getvalue()) + + def test_memory_audit_reports_backlog_without_secret_values(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + fake_key = "sk-" + ("G" * 24) + with redirect_stdout(StringIO()): + link_cli.capture_session( + target, + f"Remember that memory audit should show capture risk. Test key {fake_key}", + title="Audit capture", + project="alpha", + json_output=True, + ) + + json_out = StringIO() + with redirect_stdout(json_out): + json_code = link_cli.memory_audit(target, project="alpha", json_output=True) + payload = json.loads(json_out.getvalue()) + + text_out = StringIO() + with redirect_stdout(text_out): + text_code = link_cli.memory_audit(target, project="alpha") + + self.assertEqual(json_code, 0) + self.assertEqual(text_code, 0) + self.assertEqual(payload["status"], "needs_attention") + self.assertEqual(payload["project"], "alpha") + self.assertEqual(payload["captures"]["warning_count"], 1) + self.assertIn("capture_secret_warnings", [factor["code"] for factor in payload["risk_factors"]]) + self.assertIn("memory-inbox", payload["next_actions"][0]["command"]) + self.assertIn("capture-inbox", payload["next_actions"][1]["command"]) + self.assertNotIn(fake_key, json_out.getvalue()) + self.assertIn("Link memory audit", text_out.getvalue()) + self.assertIn("needs_attention", text_out.getvalue()) + self.assertNotIn(fake_key, text_out.getvalue()) + + def test_capture_session_writes_raw_note_and_proposes_only(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + before_memories = list((target / "wiki/memories").glob("*.md")) + + out = StringIO() + fake_key = "sk-" + ("A" * 24) + with redirect_stdout(out): + code = link_cli.capture_session( + target, + f"Remember that the user prefers release branches for Link work. Test key {fake_key}", + title="Release workflow session", + project="link", + json_output=True, + ) + + payload = json.loads(out.getvalue()) + capture_path = target / payload["path"] + after_memories = list((target / "wiki/memories").glob("*.md")) + capture_text = capture_path.read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + + self.assertEqual(code, 0) + self.assertTrue(payload["captured"]) + self.assertEqual(payload["project"], "link") + self.assertTrue(payload["path"].startswith("raw/memory-captures/")) + self.assertIn('project: "link"', capture_text) + self.assertIn("proposal-only", capture_text) + self.assertEqual(payload["secret_warnings"], ["OpenAI API key"]) + self.assertGreaterEqual(payload["proposals"]["count"], 1) + self.assertEqual(len(after_memories), len(before_memories)) + self.assertIn("capture-session", log_text) + + def test_capture_inbox_lists_captures_without_secret_values(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + fake_key = "sk-" + ("E" * 24) + + alpha_out = StringIO() + with redirect_stdout(alpha_out): + alpha_code = link_cli.capture_session( + target, + f"Remember that Alpha project captures need review. Test key {fake_key}", + title="Alpha capture", + project="alpha", + json_output=True, + ) + beta_out = StringIO() + with redirect_stdout(beta_out): + beta_code = link_cli.capture_session( + target, + "Remember that Beta project captures stay separate.", + title="Beta capture", + project="beta", + json_output=True, + ) + + inbox_out = StringIO() + with redirect_stdout(inbox_out): + inbox_code = link_cli.capture_inbox(target, project="alpha", json_output=True) + inbox = json.loads(inbox_out.getvalue()) + + text_out = StringIO() + with redirect_stdout(text_out): + text_code = link_cli.capture_inbox(target, project="alpha") + text = text_out.getvalue() + + self.assertEqual(alpha_code, 0) + self.assertEqual(beta_code, 0) + self.assertEqual(inbox_code, 0) + self.assertEqual(text_code, 0) + self.assertEqual(inbox["project"], "alpha") + self.assertEqual(inbox["count"], 1) + self.assertEqual(inbox["warning_count"], 1) + self.assertEqual(inbox["captures"][0]["project"], "alpha") + self.assertEqual(inbox["captures"][0]["secret_warnings"], ["OpenAI API key"]) + self.assertIn("[redacted-secret]", inbox["captures"][0]["snippet"]) + self.assertNotIn(fake_key, inbox_out.getvalue()) + self.assertIn("accept-capture", inbox["captures"][0]["commands"]["accept"]) + self.assertIn("redact-capture", text) + self.assertIn("delete-capture", text) + self.assertNotIn("Beta capture", inbox_out.getvalue()) + self.assertNotIn(fake_key, text) + + def test_capture_inbox_reports_unreadable_captures(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + capture_dir = target / "raw" / "memory-captures" + capture_dir.mkdir(parents=True, exist_ok=True) + (capture_dir / "locked.md").write_text( + "---\n" + "title: Locked capture\n" + "---\n\n" + "## Notes\n\n" + "This capture should surface as unreadable.\n", + encoding="utf-8", + ) + + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + inbox_out = StringIO() + text_out = StringIO() + with patch.object(Path, "read_text", flaky_read_text): + with redirect_stdout(inbox_out): + inbox_code = link_cli.capture_inbox(target, json_output=True) + with redirect_stdout(text_out): + text_code = link_cli.capture_inbox(target) + audit_out = StringIO() + with redirect_stdout(audit_out): + audit_code = link_cli.memory_audit(target, json_output=True) + inbox = json.loads(inbox_out.getvalue()) + audit = json.loads(audit_out.getvalue()) + text = text_out.getvalue() + + self.assertEqual(inbox_code, 0) + self.assertEqual(text_code, 0) + self.assertEqual(audit_code, 0) + self.assertEqual(inbox["read_warning_count"], 1) + self.assertEqual(inbox["read_warnings"][0]["capture"], "raw/memory-captures/locked.md") + self.assertIn("capture_read_warnings", [factor["code"] for factor in audit["risk_factors"]]) + self.assertTrue(audit["next_actions"][1]["recommended"]) + self.assertIn("Capture read warnings", text) + self.assertIn("locked.md", text) + + def test_accept_capture_writes_approved_proposal(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) - backlinks = json.loads((target / "wiki/_backlinks.json").read_text(encoding="utf-8")) - self.assertIn("backlinks", backlinks) - self.assertIn("forward", backlinks) - self.assertIn("agent-memory", backlinks["backlinks"]) - self.assertIn("link", backlinks["backlinks"]) - self.assertIn("agent-memory", backlinks["forward"]["link"]) + capture_out = StringIO() + with redirect_stdout(capture_out): + capture_code = link_cli.capture_session( + target, + "We decided to keep session capture approval local and explicit.", + title="Capture approval session", + project="link", + json_output=True, + ) + capture = json.loads(capture_out.getvalue()) - def test_demo_refuses_to_overwrite_non_demo_directory(self): - tmp = Path(tempfile.mkdtemp(prefix="link-demo-test-")) - target = tmp / "not-demo" - target.mkdir() - (target / "keep.txt").write_text("do not replace", encoding="utf-8") + accept_out = StringIO() + with redirect_stdout(accept_out): + accept_code = link_cli.accept_capture( + target, + capture["path"], + index=1, + json_output=True, + ) + accepted = json.loads(accept_out.getvalue()) + memory_path = target / accepted["result"]["path"] + memory_text = memory_path.read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + + self.assertEqual(capture_code, 0) + self.assertEqual(accept_code, 0) + self.assertTrue(accepted["accepted"]) + self.assertEqual(accepted["capture"], capture["path"]) + self.assertEqual(accepted["project"], "link") + self.assertTrue(accepted["result"]["created"]) + self.assertEqual(accepted["result"]["project"], "link") + self.assertIn(f'source: "{capture["path"]}"', memory_text) + self.assertIn('project: "link"', memory_text) + self.assertIn("session capture approval", memory_text) + self.assertIn("accept-capture", log_text) + + recall_out = StringIO() + with redirect_stdout(recall_out): + recall_code = link_cli.recall(target, "session capture approval", project="link", json_output=True) + recall = json.loads(recall_out.getvalue()) + self.assertEqual(recall_code, 0) + self.assertEqual(recall["memories"][0]["project"], "link") + + def test_redact_capture_replaces_secret_like_values(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + fake_key = "sk-" + ("B" * 24) - with self.assertRaises(SystemExit): - link_cli.create_demo(target, force=True) + capture_out = StringIO() + with redirect_stdout(capture_out): + link_cli.capture_session( + target, + f"Remember that capture redaction stays local. Test key {fake_key}", + title="Capture redaction session", + json_output=True, + ) + capture = json.loads(capture_out.getvalue()) - self.assertEqual((target / "keep.txt").read_text(encoding="utf-8"), "do not replace") + redact_out = StringIO() + with redirect_stdout(redact_out): + code = link_cli.redact_capture(target, capture["path"], json_output=True) + redacted = json.loads(redact_out.getvalue()) + capture_text = (target / capture["path"]).read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") - def test_demo_force_replaces_demo_directory(self): - tmp = Path(tempfile.mkdtemp(prefix="link-demo-test-")) + self.assertEqual(code, 0) + self.assertTrue(redacted["redacted"]) + self.assertEqual(redacted["labels"], ["OpenAI API key"]) + self.assertNotIn(fake_key, capture_text) + self.assertIn("[redacted-secret]", capture_text) + self.assertIn("redact-capture", log_text) + self.assertNotIn(fake_key, log_text) + + def test_delete_capture_requires_confirmation_and_removes_file(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) target = tmp / "demo" + create_demo_quiet(target) + capture_out = StringIO() + with redirect_stdout(capture_out): + link_cli.capture_session( + target, + "Remember that raw capture deletion requires confirmation.", + title="Capture deletion session", + json_output=True, + ) + capture = json.loads(capture_out.getvalue()) + capture_path = target / capture["path"] + + denied_out = StringIO() + with redirect_stdout(denied_out): + denied_code = link_cli.delete_capture(target, capture["path"], json_output=True) + denied = json.loads(denied_out.getvalue()) + self.assertTrue(capture_path.exists()) + + delete_out = StringIO() + with redirect_stdout(delete_out): + delete_code = link_cli.delete_capture(target, capture["path"], confirm=True, json_output=True) + deleted = json.loads(delete_out.getvalue()) + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + + self.assertEqual(denied_code, 1) + self.assertFalse(denied["deleted"]) + self.assertEqual(delete_code, 0) + self.assertTrue(deleted["deleted"]) + self.assertFalse(capture_path.exists()) + self.assertIn("delete-capture", log_text) + self.assertNotIn("raw capture deletion requires confirmation", log_text) + + def test_memory_inbox_and_review_memory(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" create_demo_quiet(target) - (target / "extra.txt").write_text("old", encoding="utf-8") - create_demo_quiet(target, force=True) - self.assertFalse((target / "extra.txt").exists()) - self.assertTrue((target / "wiki/index.md").exists()) + inbox_out = StringIO() + with redirect_stdout(inbox_out): + inbox_code = link_cli.memory_inbox(target, json_output=True) + inbox = json.loads(inbox_out.getvalue()) + + self.assertEqual(inbox_code, 0) + self.assertEqual(inbox["review_count"], 1) + self.assertEqual(inbox["items"][0]["name"], "prefer-local-personal-memory") + self.assertEqual(inbox["items"][0]["issues"][0]["code"], "pending_review") + self.assertEqual(inbox["items"][0]["primary_action"]["kind"], "review") + + text_out = StringIO() + with redirect_stdout(text_out): + text_code = link_cli.memory_inbox(target) + self.assertEqual(text_code, 0) + self.assertIn("Next: Review", text_out.getvalue()) + self.assertIn("Other actions:", text_out.getvalue()) + + review_out = StringIO() + with redirect_stdout(review_out): + review_code = link_cli.review_memory( + target, + "prefer-local-personal-memory", + note="confirmed in unit test", + json_output=True, + ) + review = json.loads(review_out.getvalue()) + memory_text = (target / "wiki/memories/prefer-local-personal-memory.md").read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + + self.assertEqual(review_code, 0) + self.assertTrue(review["updated"]) + self.assertEqual(review["review_status"], "reviewed") + self.assertEqual(review["remaining_issue_count"], 0) + self.assertIn("review_status: reviewed", memory_text) + self.assertIn("reviewed_at:", memory_text) + self.assertIn('review_note: "confirmed in unit test"', memory_text) + self.assertIn("review-memory", log_text) + + clear_out = StringIO() + with redirect_stdout(clear_out): + clear_code = link_cli.memory_inbox(target, json_output=True) + clear = json.loads(clear_out.getvalue()) + self.assertEqual(clear_code, 0) + self.assertEqual(clear["review_count"], 0) + + def test_memory_inbox_filters_by_project(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + with redirect_stdout(StringIO()): + link_cli.review_memory(target, "prefer-local-personal-memory", json_output=True) + alpha_code = link_cli.remember( + target, + "Alpha project stores deployment context in Link.", + title="Alpha deployment context", + memory_type="project", + scope="project", + project="alpha", + json_output=True, + ) + beta_code = link_cli.remember( + target, + "Beta project stores design context in Link.", + title="Beta design context", + memory_type="project", + scope="project", + project="beta", + json_output=True, + ) - def test_doctor_accepts_demo_wiki(self): - tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + inbox_out = StringIO() + with redirect_stdout(inbox_out): + inbox_code = link_cli.memory_inbox(target, project="alpha", json_output=True) + inbox = json.loads(inbox_out.getvalue()) + + text_out = StringIO() + with redirect_stdout(text_out): + text_code = link_cli.memory_inbox(target, project="alpha") + + self.assertEqual(alpha_code, 0) + self.assertEqual(beta_code, 0) + self.assertEqual(inbox_code, 0) + self.assertEqual(text_code, 0) + self.assertEqual(inbox["project"], "alpha") + self.assertEqual([item["project"] for item in inbox["items"]], ["alpha"]) + self.assertIn("Project: alpha", text_out.getvalue()) + self.assertNotIn("Beta design context", inbox_out.getvalue()) + + def test_explain_memory_reports_trust_state_and_graph(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) target = tmp / "demo" create_demo_quiet(target) out = StringIO() with redirect_stdout(out): - code = link_cli.doctor(target) + code = link_cli.explain_memory(target, "prefer-local-personal-memory", json_output=True) + payload = json.loads(out.getvalue()) self.assertEqual(code, 0) - self.assertIn("Result: healthy", out.getvalue()) - self.assertIn("OK wiki pages have summaries", out.getvalue()) - self.assertIn("OK source-backed pages cite sources", out.getvalue()) - self.assertIn("OK no sensitive-looking file contents", out.getvalue()) - - def test_ingest_status_accepts_demo_wiki(self): - tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + self.assertTrue(payload["found"]) + self.assertEqual(payload["memory"]["name"], "prefer-local-personal-memory") + self.assertEqual(payload["provenance"]["source"], "demo") + self.assertEqual(payload["recall"]["state"], "needs_review") + self.assertTrue(payload["recall"]["default_enabled"]) + self.assertEqual(payload["review"]["issues"][0]["code"], "pending_review") + self.assertIn("agent-memory", payload["graph"]["forward"]) + self.assertIn("link", payload["graph"]["forward"]) + self.assertIn("Prefer local personal memory", payload["body"]) + + def test_explain_memory_ready_after_review_and_disabled_after_archive(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) target = tmp / "demo" create_demo_quiet(target) - - out = StringIO() - with redirect_stdout(out): - code = link_cli.ingest_status(target) + with redirect_stdout(StringIO()): + link_cli.review_memory(target, "prefer-local-personal-memory") + + reviewed_out = StringIO() + with redirect_stdout(reviewed_out): + link_cli.explain_memory(target, "prefer-local-personal-memory", json_output=True) + reviewed = json.loads(reviewed_out.getvalue()) + + with redirect_stdout(StringIO()): + link_cli.archive_memory(target, "prefer-local-personal-memory", reason="unit test") + archived_out = StringIO() + with redirect_stdout(archived_out): + link_cli.explain_memory(target, "prefer-local-personal-memory", json_output=True) + archived = json.loads(archived_out.getvalue()) + + self.assertEqual(reviewed["recall"]["state"], "ready") + self.assertEqual(reviewed["review"]["issue_count"], 0) + self.assertEqual(archived["recall"]["state"], "disabled") + self.assertFalse(archived["recall"]["default_enabled"]) + self.assertEqual(archived["lifecycle"]["status"], "archived") + + def test_reviewed_memory_with_quality_issue_stays_in_inbox(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + memory_path = target / "wiki/memories/prefer-local-personal-memory.md" + text = memory_path.read_text(encoding="utf-8") + text = text.replace('source: "demo"\n', "") + memory_path.write_text(text, encoding="utf-8") + + with redirect_stdout(StringIO()): + code = link_cli.review_memory(target, "prefer-local-personal-memory") + inbox_out = StringIO() + with redirect_stdout(inbox_out): + link_cli.memory_inbox(target, json_output=True) + inbox = json.loads(inbox_out.getvalue()) self.assertEqual(code, 0) - self.assertIn("Raw files: 3", out.getvalue()) - self.assertIn("Pending ingest: 0", out.getvalue()) - self.assertIn("Backlinks: current", out.getvalue()) + self.assertEqual(inbox["review_count"], 1) + self.assertEqual(inbox["items"][0]["issues"][0]["code"], "missing_source") - def test_ingest_status_reports_pending_raw_file(self): - tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + def test_archive_memory_hides_from_default_recall_and_restore_reenables(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) target = tmp / "demo" create_demo_quiet(target) - (target / "raw/new-source.md").write_text("# New source\n", encoding="utf-8") - out = StringIO() - with redirect_stdout(out): - code = link_cli.ingest_status(target) + with redirect_stdout(StringIO()): + archive_code = link_cli.archive_memory( + target, + "prefer-local-personal-memory", + reason="unit test stale memory", + ) - self.assertEqual(code, 0) - self.assertIn("Pending ingest: 1", out.getvalue()) - self.assertIn("raw/new-source.md", out.getvalue()) - self.assertIn("Ask your agent: ingest raw/new-source.md", out.getvalue()) + memory_path = target / "wiki/memories/prefer-local-personal-memory.md" + archived_text = memory_path.read_text(encoding="utf-8") + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") - def test_ingest_status_json(self): - tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) - target = tmp / "demo" - create_demo_quiet(target) - (target / "raw/new-source.md").write_text("# New source\n", encoding="utf-8") + self.assertEqual(archive_code, 0) + self.assertIn("status: archived", archived_text) + self.assertIn("archived_at:", archived_text) + self.assertIn('archive_reason: "unit test stale memory"', archived_text) + self.assertIn("archive-memory", log_text) + + profile_out = StringIO() + with redirect_stdout(profile_out): + link_cli.profile(target, json_output=True) + profile_payload = json.loads(profile_out.getvalue()) + self.assertEqual(profile_payload["active_count"], 0) + self.assertEqual(profile_payload["by_status"]["archived"], 1) + self.assertEqual(profile_payload["archived"][0]["name"], "prefer-local-personal-memory") out = StringIO() with redirect_stdout(out): - code = link_cli.ingest_status(target, json_output=True) + recall_code = link_cli.recall(target, "local personal memory") + self.assertEqual(recall_code, 0) + self.assertIn("No matching memories found.", out.getvalue()) + + out_json = StringIO() + with redirect_stdout(out_json): + include_code = link_cli.recall(target, "local personal memory", include_archived=True, json_output=True) + include_payload = json.loads(out_json.getvalue()) + self.assertEqual(include_code, 0) + self.assertTrue(include_payload["include_archived"]) + self.assertEqual(include_payload["memories"][0]["status"], "archived") + + with redirect_stdout(StringIO()): + restore_code = link_cli.restore_memory(target, "Prefer local personal memory") + restored_text = memory_path.read_text(encoding="utf-8") + self.assertEqual(restore_code, 0) + self.assertIn("status: active", restored_text) + self.assertIn("restored_at:", restored_text) + self.assertNotIn("archived_at:", restored_text) + self.assertNotIn("archive_reason:", restored_text) - data = json.loads(out.getvalue()) - self.assertEqual(code, 0) - self.assertEqual(data["raw_count"], 4) - self.assertEqual(data["pending_count"], 1) - self.assertEqual(data["pending_raw"][0]["raw"], "raw/new-source.md") + out = StringIO() + with redirect_stdout(out): + link_cli.recall(target, "local personal memory") + self.assertIn("Prefer local personal memory", out.getvalue()) - def test_ingest_status_reports_stale_backlinks(self): - tmp = Path(tempfile.mkdtemp(prefix="link-ingest-test-")) + def test_forget_memory_requires_confirmation_and_deletes_page(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) + target = tmp / "demo" + create_demo_quiet(target) + memory_path = target / "wiki/memories/prefer-local-personal-memory.md" + + denied_out = StringIO() + with redirect_stdout(denied_out): + denied_code = link_cli.forget_memory(target, "prefer-local-personal-memory", json_output=True) + denied = json.loads(denied_out.getvalue()) + self.assertEqual(denied_code, 1) + self.assertFalse(denied["forgotten"]) + self.assertTrue(denied["confirmation_required"]) + self.assertTrue(memory_path.exists()) + + forget_out = StringIO() + with redirect_stdout(forget_out): + forget_code = link_cli.forget_memory(target, "prefer-local-personal-memory", confirm=True, json_output=True) + forgotten = json.loads(forget_out.getvalue()) + log_text = (target / "wiki/log.md").read_text(encoding="utf-8") + index_text = (target / "wiki/index.md").read_text(encoding="utf-8") + + self.assertEqual(forget_code, 0) + self.assertTrue(forgotten["forgotten"]) + self.assertTrue(forgotten["backlinks_rebuilt"]) + self.assertFalse(memory_path.exists()) + self.assertNotIn("[[prefer-local-personal-memory]]", index_text) + self.assertIn("forget-memory", log_text) + self.assertNotIn("local personal memory for agents", log_text) + + def test_archive_memory_json_not_found(self): + tmp = Path(tempfile.mkdtemp(prefix="link-memory-test-")) target = tmp / "demo" create_demo_quiet(target) - backlinks_path = target / "wiki/_backlinks.json" - backlinks_path.write_text(json.dumps({"backlinks": {}, "forward": {}}), encoding="utf-8") - out = StringIO() - with redirect_stdout(out): - code = link_cli.ingest_status(target) + err = StringIO() + with redirect_stdout(StringIO()), redirect_stderr(err): + code = link_cli.archive_memory(target, "missing-memory", json_output=True) - self.assertEqual(code, 0) - self.assertIn("Backlinks: stale", out.getvalue()) - self.assertIn("Repair graph index", out.getvalue()) + self.assertEqual(code, 1) + self.assertIn("memory not found", err.getvalue()) def test_verify_mcp_ready(self): tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) @@ -148,11 +1762,11 @@ def test_verify_mcp_ready(self): code = link_cli.verify_mcp( target, python_cmd="/tmp/python", - import_check=lambda _: {"installed": True, "version": "9.9.9", "error": None}, + import_check=lambda _: {"installed": True, "version": link_cli.LINK_VERSION, "error": None}, ) self.assertEqual(code, 0) - self.assertIn("link-mcp: installed (9.9.9)", out.getvalue()) + self.assertIn(f"link-mcp: installed ({link_cli.LINK_VERSION})", out.getvalue()) self.assertIn('"command": "/tmp/python"', out.getvalue()) self.assertIn("Result: ready", out.getvalue()) @@ -167,7 +1781,7 @@ def test_verify_mcp_uses_installer_python_marker(self): with redirect_stdout(out): code = link_cli.verify_mcp( target, - import_check=lambda cmd: checked.append(cmd) or {"installed": True, "version": "9.9.9", "error": None}, + import_check=lambda cmd: checked.append(cmd) or {"installed": True, "version": link_cli.LINK_VERSION, "error": None}, ) self.assertEqual(code, 0) @@ -187,7 +1801,7 @@ def test_verify_mcp_explicit_python_overrides_marker(self): target, python_cmd="/tmp/explicit-python", json_output=True, - import_check=lambda cmd: checked.append(cmd) or {"installed": True, "version": "9.9.9", "error": None}, + import_check=lambda cmd: checked.append(cmd) or {"installed": True, "version": link_cli.LINK_VERSION, "error": None}, ) self.assertEqual(code, 0) @@ -204,15 +1818,152 @@ def test_verify_mcp_json(self): target, json_output=True, python_cmd="/tmp/python", - import_check=lambda _: {"installed": True, "version": "9.9.9", "error": None}, + import_check=lambda _: {"installed": True, "version": link_cli.LINK_VERSION, "error": None}, ) data = json.loads(out.getvalue()) self.assertEqual(code, 0) self.assertTrue(data["ready"]) - self.assertEqual(data["link_mcp"]["version"], "9.9.9") + self.assertEqual(data["expected_version"], link_cli.LINK_VERSION) + self.assertTrue(data["version_matches"]) + self.assertEqual(data["issues"], []) + self.assertEqual(data["next_actions"], []) + self.assertTrue(data["link_mcp"]["mcp_sdk"]) + self.assertEqual(data["link_mcp"]["version"], link_cli.LINK_VERSION) self.assertEqual(data["config"]["mcpServers"]["link"]["command"], "/tmp/python") + def test_verify_mcp_json_reports_repair_actions(self): + tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.verify_mcp( + target, + json_output=True, + python_cmd="/tmp/Link Python/bin/python", + import_check=lambda _: { + "installed": True, + "version": "0.9.0", + "mcp_sdk": False, + "error": "No module named mcp", + }, + ) + + data = json.loads(out.getvalue()) + self.assertEqual(code, 1) + self.assertFalse(data["ready"]) + self.assertFalse(data["version_matches"]) + self.assertFalse(data["link_mcp"]["mcp_sdk"]) + self.assertEqual([issue["code"] for issue in data["issues"]], ["mcp_sdk_missing", "version_mismatch"]) + self.assertEqual( + [action["tool"] for action in data["next_actions"]], + ["reinstall_link_mcp", "upgrade_link_mcp"], + ) + self.assertEqual( + data["next_actions"][0]["command"], + [ + "/tmp/Link Python/bin/python", + "-m", + "pip", + "install", + "--upgrade", + f"link-mcp=={link_cli.LINK_VERSION}", + ], + ) + self.assertIn("'/tmp/Link Python/bin/python'", data["next_actions"][0]["command_text"]) + + def test_verify_mcp_json_reports_missing_wiki_action(self): + tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) + target = tmp / "empty" + target.mkdir() + + out = StringIO() + with redirect_stdout(out): + code = link_cli.verify_mcp( + target, + json_output=True, + python_cmd="/tmp/python", + import_check=lambda _: { + "installed": True, + "version": link_cli.LINK_VERSION, + "mcp_sdk": True, + "error": None, + }, + ) + + data = json.loads(out.getvalue()) + self.assertEqual(code, 1) + self.assertFalse(data["wiki"]["exists"]) + self.assertEqual([issue["code"] for issue in data["issues"]], ["wiki_missing"]) + self.assertEqual(data["next_actions"][0]["tool"], "init_wiki") + self.assertEqual(data["next_actions"][0]["command"][-2:], ["init", str(target.resolve())]) + + def test_check_link_mcp_import_requires_mcp_sdk(self): + stdout = json.dumps({ + "installed": True, + "version": link_cli.LINK_VERSION, + "mcp_sdk": False, + "error": "No module named mcp", + }) + with patch.object( + link_cli.subprocess, + "run", + return_value=subprocess.CompletedProcess(["/tmp/python"], 0, stdout=stdout, stderr=""), + ) as run: + payload = link_cli._check_link_mcp_import("/tmp/python") + + self.assertTrue(payload["installed"]) + self.assertEqual(payload["version"], link_cli.LINK_VERSION) + self.assertFalse(payload["mcp_sdk"]) + self.assertEqual(payload["error"], "No module named mcp") + self.assertIn("mcp.server.fastmcp", run.call_args.args[0][2]) + + def test_verify_mcp_reports_version_mismatch(self): + tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.verify_mcp( + target, + python_cmd="/tmp/Link Python/bin/python", + import_check=lambda _: {"installed": True, "version": "0.9.0", "error": None}, + ) + + self.assertEqual(code, 1) + text = out.getvalue() + self.assertIn("link-mcp: installed (0.9.0)", text) + self.assertIn(f"Expected version: {link_cli.LINK_VERSION}", text) + self.assertIn(f"'/tmp/Link Python/bin/python' -m pip install --upgrade link-mcp=={link_cli.LINK_VERSION}", text) + + def test_verify_mcp_reports_missing_mcp_sdk_dependency(self): + tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) + target = tmp / "demo" + create_demo_quiet(target) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.verify_mcp( + target, + python_cmd="/tmp/python", + import_check=lambda _: { + "installed": True, + "version": link_cli.LINK_VERSION, + "mcp_sdk": False, + "error": "No module named mcp", + }, + ) + + self.assertEqual(code, 1) + text = out.getvalue() + self.assertIn(f"link-mcp: installed ({link_cli.LINK_VERSION})", text) + self.assertIn("MCP SDK: missing", text) + self.assertIn("Import error: No module named mcp", text) + self.assertIn(f"/tmp/python -m pip install --upgrade link-mcp=={link_cli.LINK_VERSION}", text) + def test_verify_mcp_reports_missing_package(self): tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) target = tmp / "demo" @@ -228,7 +1979,7 @@ def test_verify_mcp_reports_missing_package(self): self.assertEqual(code, 1) self.assertIn("link-mcp: missing", out.getvalue()) - self.assertIn("python3 -m pip install --upgrade link-mcp", out.getvalue()) + self.assertIn("/tmp/python -m pip install --upgrade link-mcp", out.getvalue()) def test_verify_mcp_reports_missing_wiki(self): tmp = Path(tempfile.mkdtemp(prefix="link-verify-test-")) @@ -240,12 +1991,12 @@ def test_verify_mcp_reports_missing_wiki(self): code = link_cli.verify_mcp( target, python_cmd="/tmp/python", - import_check=lambda _: {"installed": True, "version": "9.9.9", "error": None}, + import_check=lambda _: {"installed": True, "version": link_cli.LINK_VERSION, "error": None}, ) self.assertEqual(code, 1) self.assertIn("Wiki: missing", out.getvalue()) - self.assertIn("python3 link.py demo", out.getvalue()) + self.assertIn("python3 link.py init", out.getvalue()) def test_doctor_reports_dead_links(self): tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) @@ -293,6 +2044,70 @@ def test_rebuild_backlinks_repairs_stale_index(self): self.assertIn("Rebuilt", out.getvalue()) self.assertIn("agent-memory", rebuilt["backlinks"]) + def test_rebuild_backlinks_reports_unreadable_pages(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + locked = target / "wiki/concepts/locked-page.md" + locked.write_text("---\ntype: concept\ntitle: Locked\n---\n\n[[link]]\n", encoding="utf-8") + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked-page.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + err = StringIO() + with patch.object(Path, "read_text", flaky_read_text): + with redirect_stderr(err): + code = link_cli.rebuild_backlinks(target) + + self.assertEqual(code, 1) + self.assertIn("Could not rebuild backlinks", err.getvalue()) + + def test_rebuild_index_repairs_missing_catalog_entries(self): + tmp = Path(tempfile.mkdtemp(prefix="link-index-test-")) + target = tmp / "demo" + create_demo_quiet(target) + index_path = target / "wiki/index.md" + index_path.write_text("# Broken Index\n", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + rebuild_code = link_cli.rebuild_index(target) + backlinks_code = link_cli.rebuild_backlinks(target) + doctor_code = link_cli.doctor(target) + + index_text = index_path.read_text(encoding="utf-8") + self.assertEqual(rebuild_code, 0) + self.assertEqual(backlinks_code, 0) + self.assertEqual(doctor_code, 0) + self.assertIn("Rebuilt", out.getvalue()) + self.assertIn("rebuild-backlinks before validation", out.getvalue()) + self.assertIn("[[agent-memory]]", index_text) + self.assertIn("[[prefer-local-personal-memory]]", index_text) + + def test_rebuild_index_reports_unreadable_pages(self): + tmp = Path(tempfile.mkdtemp(prefix="link-index-test-")) + target = tmp / "demo" + create_demo_quiet(target) + locked = target / "wiki/concepts/locked-page.md" + locked.write_text("---\ntype: concept\ntitle: Locked\n---\n\n# Locked\n", encoding="utf-8") + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked-page.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + err = StringIO() + with patch.object(Path, "read_text", flaky_read_text): + with redirect_stderr(err): + code = link_cli.rebuild_index(target) + + self.assertEqual(code, 1) + self.assertIn("Could not rebuild index", err.getvalue()) + def test_doctor_fix_repairs_stale_backlinks(self): tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) target = tmp / "demo" @@ -310,6 +2125,25 @@ def test_doctor_fix_repairs_stale_backlinks(self): self.assertIn("Result: healthy", out.getvalue()) self.assertIn("agent-memory", rebuilt["backlinks"]) + def test_doctor_fix_repairs_index_drift(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + index_path = target / "wiki/index.md" + index_path.write_text("# Broken Index\n", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.doctor(target, fix=True) + + index_text = index_path.read_text(encoding="utf-8") + backlinks = json.loads((target / "wiki/_backlinks.json").read_text(encoding="utf-8")) + self.assertEqual(code, 0) + self.assertIn("rebuilt wiki/index.md", out.getvalue()) + self.assertIn("rebuilt wiki/_backlinks.json", out.getvalue()) + self.assertIn("[[agent-memory]]", index_text) + self.assertIn("agent-memory", backlinks["backlinks"]) + def test_doctor_fix_creates_missing_structure(self): tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) target = tmp / "empty" @@ -323,9 +2157,12 @@ def test_doctor_fix_creates_missing_structure(self): self.assertTrue((target / "raw").is_dir()) self.assertTrue((target / "wiki/sources").is_dir()) self.assertTrue((target / "wiki/concepts").is_dir()) + self.assertTrue((target / "wiki/memories").is_dir()) self.assertTrue((target / "wiki/_backlinks.json").exists()) + self.assertTrue((target / "wiki/_link_schema.json").exists()) self.assertIn("created raw", out.getvalue()) self.assertIn("created wiki/index.md", out.getvalue()) + self.assertIn("schema: wrote _link_schema.json", out.getvalue()) self.assertIn("Result: healthy", out.getvalue()) def test_doctor_fix_does_not_hide_content_errors(self): @@ -342,6 +2179,72 @@ def test_doctor_fix_does_not_hide_content_errors(self): self.assertEqual(code, 1) self.assertIn("dead wikilinks", out.getvalue()) + def test_doctor_reports_validation_errors(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + page = target / "wiki/sources/agent-memory-session.md" + page.write_text( + "---\ntype: source\ntitle: Agent Memory Session\n---\n\n" + "# Agent Memory Session\n\n" + "Captured from raw/agent-memory-session.md.\n", + encoding="utf-8", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.doctor(target) + + self.assertEqual(code, 1) + self.assertIn("validation errors:", out.getvalue()) + self.assertIn("missing_required_section", out.getvalue()) + + def test_doctor_fix_repairs_source_page_validation_shape(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + page = target / "wiki/sources/agent-memory-session.md" + page.write_text( + "---\ntype: source\ntitle: Agent Memory Session\n---\n\n" + "# Agent Memory Session\n\n" + "Captured from raw/agent-memory-session.md.\n", + encoding="utf-8", + ) + + out = StringIO() + with redirect_stdout(out): + code = link_cli.doctor(target, fix=True) + + self.assertEqual(code, 0) + text = out.getvalue() + self.assertIn("repaired validation shape for wiki/sources/agent-memory-session.md", text) + self.assertIn("OK ingest validation gate", text) + validation = link_cli._core_validate_wiki(target / "wiki") + self.assertTrue(validation["passed"]) + repaired_text = page.read_text(encoding="utf-8") + self.assertIn("> **TLDR:** Agent Memory Session source notes.", repaired_text) + self.assertIn("## Summary", repaired_text) + self.assertIn("## Raw Source", repaired_text) + self.assertIn("`raw/agent-memory-session.md`", repaired_text) + + def test_doctor_labels_stale_raw_as_source_refresh(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + raw_page = target / "raw/agent-memory-session.md" + time.sleep(0.02) + raw_page.write_text("# Agent memory session\n\nUpdated after ingest.\n", encoding="utf-8") + + out = StringIO() + with redirect_stdout(out): + code = link_cli.doctor(target) + + self.assertEqual(code, 0) + text = out.getvalue() + self.assertIn("raw files need source refresh: raw/agent-memory-session.md", text) + self.assertNotIn("raw files not referenced by wiki pages", text) + self.assertNotIn("raw files not referenced by wiki source pages", text) + def test_doctor_warns_on_missing_summary(self): tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) target = tmp / "demo" @@ -373,6 +2276,28 @@ def test_doctor_fails_on_secret_like_content(self): self.assertEqual(code, 1) self.assertIn("sensitive-looking file contents", out.getvalue()) + def test_doctor_fails_when_secret_scan_cannot_read_file(self): + tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) + target = tmp / "demo" + create_demo_quiet(target) + locked = target / "raw/locked.md" + locked.write_text("could contain secrets\n", encoding="utf-8") + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + out = StringIO() + with patch.object(Path, "read_text", flaky_read_text): + with redirect_stdout(out): + code = link_cli.doctor(target) + + self.assertEqual(code, 1) + self.assertIn("could not scan file contents for secrets", out.getvalue()) + self.assertIn("raw/locked.md", out.getvalue()) + def test_doctor_fails_on_google_api_key_content(self): tmp = Path(tempfile.mkdtemp(prefix="link-doctor-test-")) target = tmp / "demo" diff --git a/tests/test_markdown_core.py b/tests/test_markdown_core.py new file mode 100644 index 0000000..2bdbada --- /dev/null +++ b/tests/test_markdown_core.py @@ -0,0 +1,61 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.markdown import markdown_to_html # noqa: E402 + + +class MarkdownCoreTests(unittest.TestCase): + def test_inline_markdown_sanitizes_html_and_links(self): + rendered = markdown_to_html( + "Hello " + "and [bad](javascript:alert%281%29) " + "and [ok](https://example.com?a=1&b=2) " + "and [[target|label]] " + "and ``" + ) + + self.assertIn("<script>alert(1)</script>", rendered) + self.assertIn('bad', rendered) + self.assertIn('ok', rendered) + self.assertIn('<b>label</b>', rendered) + self.assertIn("<tag>", rendered) + self.assertNotIn(" " @@ -130,17 +551,549 @@ def test_static_file_allowlist_rejects_raw_traversal(self): raw_dir.mkdir() reset_wiki(wiki) - allowed = serve._safe_resolve(raw_dir / "note.txt") + allowed = serve._safe_resolve(raw_dir / "image.png") + unsupported = serve._safe_resolve(raw_dir / "note.txt") denied = serve._safe_resolve(raw_dir / "../serve.py") self.assertIsNotNone(allowed) + self.assertIsNotNone(unsupported) self.assertIsNotNone(denied) self.assertTrue(serve._is_allowed_static_file(allowed)) + self.assertFalse(serve._is_allowed_static_file(unsupported)) self.assertFalse(serve._is_allowed_static_file(denied)) def test_static_file_resolve_handles_malformed_paths(self): self.assertIsNone(serve._safe_resolve(Path("bad\0path"))) + def test_memory_dashboard_next_actions_empty_and_ready_states(self): + empty_actions = serve._memory_dashboard_next_actions( + memory_count=0, + review_count=0, + updated_count=0, + archived_count=0, + ) + ready_actions = serve._memory_dashboard_next_actions( + memory_count=2, + review_count=0, + updated_count=0, + archived_count=0, + ) + + self.assertEqual(empty_actions[0]["label"], "Create the first memory") + self.assertIn("remember", empty_actions[0]["command"]) + self.assertEqual(ready_actions[0]["label"], "Memory is recall-ready") + self.assertEqual(ready_actions[0]["href"], "/profile") + + def test_memory_dashboard_next_actions_uses_singular_memory_label(self): + actions = serve._memory_dashboard_next_actions( + memory_count=1, + review_count=1, + updated_count=0, + archived_count=0, + ) + + self.assertIn("1 memory needs confirmation", actions[0]["detail"]) + self.assertNotIn("memoryy", actions[0]["detail"]) + + def test_memory_dashboard_surfaces_raw_captures_and_secret_warnings(self): + wiki = self.make_wiki() + capture_dir = wiki.parent / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + fake_key = "sk-" + ("D" * 24) + (capture_dir / "session.md").write_text( + "---\n" + "title: \"Session capture\"\n" + "source_type: conversation\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "project: \"link\"\n" + "---\n\n" + "# Session capture\n\n" + "## Notes\n\n" + f"Remember that dashboard capture review is visible. Test key {fake_key}\n", + encoding="utf-8", + ) + + dashboard = serve._memory_dashboard(limit=8) + html = serve._render_memory_dashboard() + + self.assertEqual(dashboard["capture_count"], 1) + self.assertEqual(dashboard["capture_warning_count"], 1) + self.assertEqual(dashboard["captures"][0]["secret_warnings"], ["OpenAI API key"]) + self.assertIn("[redacted-secret]", dashboard["captures"][0]["snippet"]) + self.assertNotIn(fake_key, dashboard["captures"][0]["snippet"]) + self.assertIn("Redact capture warnings", dashboard["next_actions"][0]["label"]) + self.assertIn("accept-capture", dashboard["captures"][0]["commands"]["accept"]) + self.assertIn("Raw captures", html) + self.assertIn("redact-capture", html) + self.assertNotIn(fake_key, html) + + def test_capture_inbox_page_and_api_redact_secret_values(self): + wiki = self.make_wiki() + capture_dir = wiki.parent / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + fake_key = "sk-" + ("K" * 24) + (capture_dir / "alpha.md").write_text( + "---\n" + "title: \"Alpha capture\"\n" + "source_type: conversation\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "project: \"alpha\"\n" + "---\n\n" + "# Alpha capture\n\n" + "## Notes\n\n" + f"Remember that capture inbox is first class. Test key {fake_key}\n", + encoding="utf-8", + ) + (capture_dir / "beta.md").write_text( + "---\n" + "title: \"Beta capture\"\n" + "source_type: conversation\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "project: \"beta\"\n" + "---\n\n" + "# Beta capture\n\n" + "## Notes\n\n" + "Remember that beta capture stays separate.\n", + encoding="utf-8", + ) + + status, payload = run_handler("GET", "/api/capture-inbox?project=alpha") + html = serve._render_captures(project="alpha") + + self.assertEqual(status, 200) + self.assertEqual(payload["project"], "alpha") + self.assertEqual(payload["count"], 1) + self.assertEqual(payload["warning_count"], 1) + self.assertEqual(payload["captures"][0]["secret_warnings"], ["OpenAI API key"]) + self.assertIn("[redacted-secret]", payload["captures"][0]["snippet"]) + self.assertNotIn(fake_key, json.dumps(payload)) + self.assertIn("Raw Capture Inbox", html) + self.assertIn("Alpha capture", html) + self.assertNotIn("Beta capture", html) + self.assertIn("redact-capture", html) + self.assertNotIn(fake_key, html) + + def test_capture_inbox_page_and_api_report_read_warnings(self): + wiki = self.make_wiki() + capture_dir = wiki.parent / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + (capture_dir / "locked.md").write_text( + "---\n" + "title: \"Locked capture\"\n" + "---\n\n" + "## Notes\n\n" + "This capture should surface as unreadable.\n", + encoding="utf-8", + ) + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + status, payload = run_handler("GET", "/api/capture-inbox") + html = serve._render_captures() + audit = serve._memory_audit() + + self.assertEqual(status, 200) + self.assertEqual(payload["read_warning_count"], 1) + self.assertEqual(payload["read_warnings"][0]["capture"], "raw/memory-captures/locked.md") + self.assertIn("capture_read_warnings", [item["code"] for item in audit["risk_factors"]]) + self.assertTrue(audit["next_actions"][1]["recommended"]) + self.assertIn("Fix capture access", html) + self.assertIn("locked.md", html) + + def test_memory_brief_page_and_api_include_capture_status(self): + wiki = self.make_wiki() + write_page( + wiki, + "memories/alpha-brief.md", + ( + "---\n" + "type: memory\n" + "title: \"Alpha brief\"\n" + "memory_type: project\n" + "scope: project\n" + "project: \"alpha\"\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: pending\n" + "---\n\n" + "# Alpha brief\n\n" + "> **TLDR:** Alpha project uses memory brief before work.\n\n" + "## Memory\n\nAlpha project uses memory brief before work.\n" + ), + ) + capture_dir = wiki.parent / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + fake_key = "sk-" + ("L" * 24) + (capture_dir / "alpha.md").write_text( + "---\n" + "title: \"Alpha brief capture\"\n" + "source_type: conversation\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "project: \"alpha\"\n" + "---\n\n" + "# Alpha brief capture\n\n" + "## Notes\n\n" + f"Remember that brief surfaces capture status. Test key {fake_key}\n", + encoding="utf-8", + ) + + status, payload = run_handler("GET", "/api/memory-brief?q=brief&project=alpha") + html = serve._render_brief(query="brief", project="alpha") + + self.assertEqual(status, 200) + self.assertEqual(payload["query"], "brief") + self.assertEqual(payload["project"], "alpha") + self.assertEqual(payload["relevant_count"], 1) + self.assertEqual(payload["captures"]["count"], 1) + self.assertEqual(payload["captures"]["warning_count"], 1) + self.assertIn("Redact raw captures", "\n".join(payload["agent_guidance"])) + self.assertNotIn(fake_key, json.dumps(payload)) + self.assertIn("Memory Brief", html) + self.assertIn("Agent Guidance", html) + self.assertIn("Alpha brief", html) + self.assertIn("Alpha brief capture", html) + self.assertNotIn(fake_key, html) + + def test_query_link_api_returns_context_packet(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent memory\ntags: [memory]\n---\n\n" + "# Agent memory\n\n" + "> **TLDR:** Agents use durable local memory.\n\n" + "## Overview\n\nAgent memory connects to [[retrieval]].\n", + ) + write_page( + wiki, + "concepts/retrieval.md", + "---\ntype: concept\ntitle: Retrieval\n---\n\n" + "# Retrieval\n\n> **TLDR:** Retrieval selects context.\n", + ) + write_page( + wiki, + "memories/prefer-local-memory.md", + "---\n" + "type: memory\n" + "title: Prefer local memory\n" + "memory_type: preference\n" + "scope: user\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: unit-test\n" + "review_status: reviewed\n" + "tags: [memory]\n" + "---\n\n" + "# Prefer local memory\n\n" + "> **TLDR:** User prefers local agent memory.\n\n" + "## Memory\n\nUser prefers local agent memory.\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(serve._build_backlinks()), encoding="utf-8") + reset_wiki(wiki) + + status, payload = run_handler("GET", "/api/query-link?q=agent%20memory&budget=small") + + self.assertEqual(status, 200) + self.assertTrue(payload["found"]) + self.assertEqual(payload["budget"], "small") + self.assertEqual(payload["wiki"]["primary"], "agent-memory") + self.assertEqual(payload["memory"]["items"][0]["name"], "prefer-local-memory") + self.assertIn("context_packet", payload) + self.assertIn("budget_report", payload) + self.assertIn("follow_up", payload) + + def test_status_api_returns_readiness_summary(self): + wiki = self.make_wiki() + for dirname in ("sources", "concepts", "entities", "memories", "comparisons", "explorations"): + (wiki / dirname).mkdir(exist_ok=True) + write_page( + wiki, + "memories/prefer-local-memory.md", + "---\n" + "type: memory\n" + "title: Prefer local memory\n" + "memory_type: preference\n" + "scope: user\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: unit-test\n" + "review_status: reviewed\n" + "---\n\n" + "# Prefer local memory\n\n" + "> **TLDR:** User prefers local memory.\n\n" + "## Memory\n\nUser prefers local memory.\n\n" + "## Source\n\nunit-test\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(serve._build_backlinks()), encoding="utf-8") + reset_wiki(wiki) + + status, payload = run_handler("GET", "/api/status?validate=true") + + self.assertEqual(status, 200) + self.assertEqual(payload["api_version"], serve.API_VERSION) + self.assertEqual(payload["version"], serve.LINK_VERSION) + self.assertTrue(payload["ready"]) + self.assertEqual(payload["page_count"], 3) + self.assertEqual(payload["content_page_count"], 1) + self.assertEqual(payload["memory_count"], 1) + self.assertIn(payload["search_backend"], {"sqlite-fts", "token-index"}) + self.assertTrue(payload["validation"]["passed"]) + self.assertEqual(payload["warnings"], []) + self.assertEqual(payload["next_actions"][0]["tool"], "query_link") + + def test_status_api_reports_cache_warnings(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/locked-page.md", + "---\ntype: concept\ntitle: Locked\n---\n# Locked\n", + ) + reset_wiki(wiki) + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked-page.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + status, payload = run_handler("GET", "/api/status") + + self.assertEqual(status, 200) + self.assertFalse(payload["ready"]) + self.assertEqual(payload["page_count"], 2) + self.assertEqual(payload["warnings"][0]["code"], "cache_read_warnings") + + def test_memory_inbox_and_explain_render_action_commands(self): + wiki = self.make_wiki() + write_page( + wiki, + "memories/prefer-reviewable-memory.md", + ( + "---\n" + "type: memory\n" + "title: \"Prefer reviewable memory\"\n" + "memory_type: preference\n" + "scope: user\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: pending\n" + "---\n\n" + "# Prefer reviewable memory\n\n" + "> **TLDR:** User prefers visible memory actions.\n\n" + "## Memory\n\nUser prefers visible memory actions.\n" + ), + ) + + inbox_html = serve._render_inbox() + explain_html = serve._render_explain_memory("prefer-reviewable-memory") + + self.assertIn("Next: Review", inbox_html) + self.assertIn("review-memory", inbox_html) + self.assertIn('data-memory-action="review"', inbox_html) + self.assertIn('data-memory="prefer-reviewable-memory"', inbox_html) + self.assertIn("archive-memory", inbox_html) + self.assertIn('data-memory-action="archive"', inbox_html) + self.assertIn("forget-memory", inbox_html) + self.assertIn("

    Actions

    ", explain_html) + self.assertIn("Next: Review", explain_html) + self.assertIn("forget-memory", explain_html) + + def test_memory_action_post_endpoints_update_pages(self): + wiki = self.make_wiki() + page = write_page( + wiki, + "memories/prefer-web-review.md", + ( + "---\n" + "type: memory\n" + "title: \"Prefer web review\"\n" + "memory_type: preference\n" + "scope: user\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: pending\n" + "---\n\n" + "# Prefer web review\n\n" + "> **TLDR:** User prefers safe web memory review.\n\n" + "## Memory\n\nUser prefers safe web memory review.\n" + ), + ) + + review_status, review_payload = post_json( + "/api/review-memory", + {"memory": "prefer-web-review", "note": "confirmed from web"}, + ) + archive_status, archive_payload = post_json( + "/api/archive-memory", + {"memory": "prefer-web-review", "reason": "validated archive"}, + ) + restore_status, restore_payload = post_json( + "/api/restore-memory", + {"memory": "Prefer web review"}, + ) + text = page.read_text(encoding="utf-8") + log_text = (wiki / "log.md").read_text(encoding="utf-8") + + self.assertEqual(review_status, 200) + self.assertTrue(review_payload["updated"]) + self.assertEqual(review_payload["review_status"], "reviewed") + self.assertEqual(archive_status, 200) + self.assertEqual(archive_payload["status"], "archived") + self.assertEqual(restore_status, 200) + self.assertEqual(restore_payload["status"], "active") + self.assertIn("review_status: reviewed", text) + self.assertIn('review_note: "confirmed from web"', text) + self.assertIn("status: active", text) + self.assertIn("review-memory", log_text) + self.assertIn("archive-memory", log_text) + self.assertIn("restore-memory", log_text) + + def test_memory_action_post_requires_memory_identifier(self): + self.make_wiki() + status, payload = post_json("/api/review-memory", {}) + + self.assertEqual(status, 400) + self.assertFalse(payload["updated"]) + self.assertEqual(payload["error"], "memory required") + + def test_memory_action_post_requires_local_action_header(self): + self.make_wiki() + status, payload = post_json( + "/api/review-memory", + {"memory": "prefer-web-review"}, + local_action=False, + ) + + self.assertEqual(status, 403) + self.assertFalse(payload["updated"]) + self.assertIn("X-Link-Local-Action", payload["error"]) + + def test_memory_audit_page_and_api_report_backlog(self): + wiki = self.make_wiki() + write_page( + wiki, + "memories/alpha-review.md", + ( + "---\n" + "type: memory\n" + "title: \"Alpha review\"\n" + "memory_type: project\n" + "scope: project\n" + "project: \"alpha\"\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: pending\n" + "---\n\n" + "# Alpha review\n\n" + "> **TLDR:** Alpha memory needs review.\n" + ), + ) + capture_dir = wiki.parent / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + fake_key = "sk-" + ("H" * 24) + (capture_dir / "alpha.md").write_text( + "---\n" + "title: \"Alpha capture\"\n" + "source_type: conversation\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "project: \"alpha\"\n" + "---\n\n" + "# Alpha capture\n\n" + "## Notes\n\n" + f"Remember that web audit reports capture risks. Test key {fake_key}\n", + encoding="utf-8", + ) + + audit = serve._memory_audit(project="alpha") + status, payload = run_handler("GET", "/api/memory-audit?project=alpha") + html = serve._render_memory_audit(project="alpha") + + self.assertEqual(status, 200) + self.assertEqual(audit["status"], "needs_attention") + self.assertEqual(payload["project"], "alpha") + self.assertEqual(payload["captures"]["warning_count"], 1) + self.assertIn("capture_secret_warnings", [item["code"] for item in payload["risk_factors"]]) + self.assertIn("Memory Audit", html) + self.assertIn("memory-inbox", html) + self.assertIn("capture-inbox", html) + self.assertNotIn(fake_key, html) + + def test_memory_dashboard_filters_project_memory_and_captures(self): + wiki = self.make_wiki() + write_page( + wiki, + "memories/global-style.md", + ( + "---\n" + "type: memory\n" + "title: \"Global style\"\n" + "memory_type: preference\n" + "scope: user\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: reviewed\n" + "---\n\n" + "# Global style\n\n" + "> **TLDR:** User prefers concise updates.\n" + ), + ) + for project in ("alpha", "beta"): + write_page( + wiki, + f"memories/{project}-imports.md", + ( + "---\n" + "type: memory\n" + f"title: \"{project.title()} imports\"\n" + "memory_type: project\n" + "scope: project\n" + f"project: \"{project}\"\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: reviewed\n" + "---\n\n" + f"# {project.title()} imports\n\n" + f"> **TLDR:** {project.title()} has project-specific imports.\n" + ), + ) + capture_dir = wiki.parent / "raw" / "memory-captures" + capture_dir.mkdir(parents=True) + for project in ("alpha", "beta"): + (capture_dir / f"{project}.md").write_text( + "---\n" + f"title: \"{project.title()} capture\"\n" + "source_type: conversation\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + f"project: \"{project}\"\n" + "---\n\n" + "# Capture\n\n## Notes\n\nMemory capture.\n", + encoding="utf-8", + ) + + dashboard = serve._memory_dashboard(limit=8, project="alpha") + status, payload = run_handler("GET", "/api/memory-dashboard?project=alpha") + html = serve._render_memory_dashboard(project="alpha") + + self.assertEqual(status, 200) + self.assertEqual(dashboard["project"], "alpha") + self.assertEqual(payload["project"], "alpha") + self.assertEqual({record["name"] for record in dashboard["active"]}, {"global-style", "alpha-imports"}) + self.assertEqual([capture["project"] for capture in dashboard["captures"]], ["alpha"]) + self.assertIn("Project: alpha", html) + self.assertNotIn("Beta imports", html) + def test_cache_invalidation_sees_existing_page_edits(self): wiki = self.make_wiki() page = write_page( @@ -158,6 +1111,24 @@ def test_cache_invalidation_sees_existing_page_edits(self): self.assertEqual(next(p["title"] for p in before if p["name"] == "a"), "A") self.assertEqual(next(p["title"] for p in after if p["name"] == "a"), "A2") + def test_cache_mtime_check_is_throttled_for_hot_navigation(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\n---\n# A\n", + ) + serve.CACHE_MTIME_CHECK_INTERVAL_SECONDS = 60.0 + + with patch("serve._wiki_mtime", wraps=serve._wiki_mtime) as mtime: + first = serve._get_all_pages() + second = serve._get_all_pages() + forced = serve._get_all_pages(force_check=True) + + self.assertIs(first, second) + self.assertIs(first, forced) + self.assertEqual(mtime.call_count, 2) + def test_backlinks_loader_returns_documented_shape(self): wiki = self.make_wiki() (wiki / "_backlinks.json").write_text( @@ -206,6 +1177,72 @@ def test_graph_data_uses_canonical_node_ids(self): 1, ) + def test_graph_summary_is_bounded_for_api_agents(self): + wiki = self.make_wiki() + for index in range(8): + links = " ".join(f"[[node-{target}]]" for target in range(8) if target != index) + write_page( + wiki, + f"concepts/node-{index}.md", + f"---\ntype: concept\ntitle: Node {index}\n---\n# Node {index}\n\n{links}\n", + ) + + summary = serve._get_graph_summary(limit=4, max_edges=3) + + self.assertEqual(summary["returned_nodes"], 4) + self.assertEqual(summary["returned_edges"], 3) + self.assertTrue(summary["truncated"]) + self.assertIn("get_graph", {item["tool"] for item in summary["follow_up"]}) + + def test_graph_data_uses_served_cache_forward_links_without_rereading_pages(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n# Agent Memory\n\n[[link]]\n", + ) + write_page(wiki, "entities/link.md", "---\ntype: entity\ntitle: Link\n---\n# Link\n") + serve._get_all_pages() + + with patch.object(Path, "read_text", side_effect=AssertionError("serve graph should use cache")): + graph = serve._get_graph_data() + + self.assertIn({"source": "agent-memory", "target": "link"}, graph["edges"]) + + def test_page_list_payload_is_bounded_for_api_agents(self): + wiki = self.make_wiki() + for index in range(5): + write_page( + wiki, + f"concepts/page-{index}.md", + f"---\ntype: concept\ntitle: Page {index}\n---\n# Page {index}\n", + ) + + payload = serve._page_list_payload(category="concepts", limit=2) + + self.assertEqual(payload["count"], 5) + self.assertEqual(payload["returned_count"], 2) + self.assertTrue(payload["truncated"]) + self.assertEqual(payload["follow_up"][0]["tool"], "get_pages") + + def test_page_links_payload_is_bounded_for_api_agents(self): + wiki = self.make_wiki() + (wiki / "_backlinks.json").write_text( + json.dumps({ + "backlinks": {"hub": ["a", "b", "c", "d"]}, + "forward": {"hub": ["e", "f", "g"]}, + }), + encoding="utf-8", + ) + + payload, status = serve._page_links_payload("hub", limit=2) + + self.assertEqual(status, 200) + self.assertEqual(payload["inbound_count"], 4) + self.assertEqual(payload["returned_inbound"], 2) + self.assertTrue(payload["truncated"]) + self.assertEqual(payload["follow_up"][0]["tool"], "get_backlinks") + def test_graph_tooltip_exists_before_graph_script(self): wiki = self.make_wiki() write_page( @@ -218,6 +1255,844 @@ def test_graph_tooltip_exists_before_graph_script(self): self.assertLess(html.index('id="graph-tooltip"'), html.index("var tooltip =")) + def test_propose_memories_post_is_write_free(self): + wiki = self.make_wiki() + write_page( + wiki, + "memories/prefer-release-branches.md", + ( + "---\n" + "type: memory\n" + "title: \"Prefer release branches\"\n" + "memory_type: preference\n" + "scope: project\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: \"unit test\"\n" + "review_status: pending\n" + "tags: [memory, preference]\n" + "---\n\n" + "# Prefer release branches\n\n" + "> **TLDR:** User prefers release branches for Link work.\n\n" + "## Memory\n\nUser prefers release branches for Link work.\n" + ), + ) + before_files = sorted(path.relative_to(wiki).as_posix() for path in wiki.rglob("*") if path.is_file()) + + request_body = json.dumps({ + "text": "\n".join([ + "I prefer release branches for Link work.", + "We decided to keep Memory Mode local and source-backed.", + "Maybe we could add cloud sync later.", + ]), + "source": "unit test session", + }).encode("utf-8") + status, payload = run_handler( + "POST", + "/api/propose-memories", + body=request_body, + headers={ + "Content-Type": "application/json", + "Content-Length": str(len(request_body)), + }, + ) + get_status, get_payload = run_handler("GET", "/api/propose-memories") + bad_type_status, bad_type_payload = run_handler( + "POST", + "/api/propose-memories", + body=request_body, + headers={ + "Content-Type": "text/plain", + "Content-Length": str(len(request_body)), + }, + ) + + after_files = sorted(path.relative_to(wiki).as_posix() for path in wiki.rglob("*") if path.is_file()) + + self.assertEqual(status, 200) + self.assertTrue(payload["proposed"]) + self.assertFalse(payload["writes_memory"]) + self.assertEqual(payload["count"], 2) + self.assertEqual(payload["proposals"][0]["suggested_action"], "update-memory") + self.assertEqual(payload["proposals"][0]["duplicate_candidates"][0]["name"], "prefer-release-branches") + self.assertEqual(payload["proposals"][0]["primary_action"]["tool"], "update_memory") + self.assertIn("update-memory", payload["proposals"][0]["primary_action"]["command"]) + self.assertEqual(payload["proposals"][1]["suggested_action"], "remember") + self.assertEqual(payload["proposals"][1]["primary_action"]["tool"], "remember_memory") + self.assertEqual(before_files, after_files) + self.assertEqual(get_status, 405) + self.assertIn("use POST", get_payload["error"]) + self.assertEqual(bad_type_status, 415) + self.assertIn("application/json", bad_type_payload["error"]) + + def test_propose_memories_post_bounds_source_and_project(self): + self.make_wiki() + request_body = json.dumps({ + "text": " Remember that bounded proposal inputs matter. ", + "source": "s" * 600, + "project": "p" * 100, + "limit": 50, + }).encode("utf-8") + + with patch.object( + serve, + "_propose_memories_from_text", + return_value={"proposed": True, "count": 0, "proposals": []}, + ) as propose: + status, payload = run_handler( + "POST", + "/api/propose-memories", + body=request_body, + headers={ + "Content-Type": "application/json", + "Content-Length": str(len(request_body)), + }, + ) + + self.assertEqual(status, 200) + self.assertTrue(payload["proposed"]) + args, kwargs = propose.call_args + self.assertEqual(args[0], "Remember that bounded proposal inputs matter.") + self.assertEqual(kwargs["source"], "s" * 500) + self.assertEqual(kwargs["project"], "p" * 80) + self.assertEqual(kwargs["limit"], 20) + + def test_propose_page_renders_read_only_workflow(self): + self.make_wiki() + + html = serve._render_propose(project="link", source="raw/first-memory.md") + + self.assertIn('propose', html) + self.assertIn('data-proposal-sources', html) + self.assertIn('data-proposal-form', html) + self.assertIn('data-initial-source="raw/first-memory.md"', html) + self.assertIn('data-proposal-results', html) + self.assertIn('value="link"', html) + self.assertIn("without writing anything", html) + self.assertIn("Save only preferences", html) + self.assertIn("Review Gate", html) + self.assertIn("Before saving memory", html) + self.assertIn("ordinary facts in wiki pages", html) + self.assertIn("Memory proposal path", html) + self.assertIn("Approve explicitly", html) + self.assertIn("This step never writes durable memory", html) + self.assertIn("Proposal-only: no durable memory has been written yet.", html) + self.assertIn("Manual review required", html) + self.assertIn("Conflict found: use the approval prompt", html) + self.assertIn("Writes durable local memory only after this explicit approval.", html) + self.assertIn("Approve and save", html) + self.assertIn("/api/remember-memory", html) + self.assertIn("/api/update-memory", html) + self.assertIn("Copy approval prompt", html) + self.assertIn("navigator.clipboard.writeText", html) + self.assertIn("var initialSource = form.getAttribute('data-initial-source')", html) + + def test_propose_page_bounds_query_seed_values(self): + self.make_wiki() + + html = serve._render_propose(project="p" * 100, source="s" * 600) + + self.assertIn(f'value="{"p" * 80}"', html) + self.assertNotIn("p" * 81, html) + self.assertIn(f'data-initial-source="{"s" * 500}"', html) + self.assertNotIn("s" * 501, html) + + def test_memory_approval_api_requires_header_and_writes_memory(self): + wiki = self.make_wiki() + payload = { + "memory": "User wants Link memory approvals to stay explicit.", + "title": "Explicit approvals", + "memory_type": "preference", + "scope": "user", + "source": "web proposal", + } + + denied_status, denied_payload = post_json("/api/remember-memory", payload, local_action=False) + create_status, created = post_json("/api/remember-memory", payload) + duplicate_status, duplicate = post_json("/api/remember-memory", payload) + update_status, updated = post_json( + "/api/update-memory", + { + "memory": created["name"], + "text": "User also wants the web proposal flow to preserve review.", + "source": "web proposal", + }, + ) + page_text = (wiki / "memories" / f"{created['name']}.md").read_text(encoding="utf-8") + + self.assertEqual(denied_status, 403) + self.assertIn("X-Link-Local-Action", denied_payload["error"]) + self.assertEqual(create_status, 200) + self.assertTrue(created["saved"]) + self.assertTrue(created["created"]) + self.assertEqual(created["path"], f"wiki/memories/{created['name']}.md") + self.assertEqual(duplicate_status, 409) + self.assertFalse(duplicate["saved"]) + self.assertTrue(duplicate["duplicate"]) + self.assertEqual(update_status, 200) + self.assertTrue(updated["saved"]) + self.assertTrue(updated["updated"]) + self.assertEqual(updated["review_status"], "pending") + self.assertIn("User also wants the web proposal flow", page_text) + + def test_memory_approval_api_ignores_duplicate_override_flags(self): + wiki = self.make_wiki() + payload = { + "memory": "User prefers Link web approvals to be reviewable.", + "title": "Reviewable web approvals", + "memory_type": "preference", + "scope": "user", + "source": "web proposal", + } + + create_status, created = post_json("/api/remember-memory", payload) + duplicate_status, duplicate = post_json( + "/api/remember-memory", + {**payload, "allow_duplicate": True, "allow_conflict": True}, + ) + + memory_pages = sorted((wiki / "memories").glob("reviewable-web-approvals*.md")) + self.assertEqual(create_status, 200) + self.assertTrue(created["saved"]) + self.assertEqual(duplicate_status, 409) + self.assertFalse(duplicate["saved"]) + self.assertTrue(duplicate["duplicate"]) + self.assertEqual(len(memory_pages), 1) + + def test_memory_update_api_ignores_conflict_override_flags(self): + wiki = self.make_wiki() + first_status, first = post_json( + "/api/remember-memory", + { + "memory": "User prefers release branches for Link work.", + "title": "Prefer release branches", + "memory_type": "preference", + "scope": "project", + "project": "link", + "source": "web proposal", + }, + ) + second = serve._core_write_memory_page( + wiki, + "User prefers dark mode for Link work.", + "Prefer dark mode", + "preference", + "project", + None, + "test setup", + serve._utc_timestamp(), + project="link", + records=serve._memory_records(), + allow_conflict=True, + ) + update_status, update = post_json( + "/api/update-memory", + { + "memory": second["name"], + "text": "User prefers develop branches for Link work.", + "source": "web proposal", + "project": "link", + "allow_conflict": True, + }, + ) + + self.assertEqual(first_status, 200) + self.assertTrue(second["created"]) + self.assertEqual(update_status, 409) + self.assertFalse(update["saved"]) + self.assertTrue(update["conflict"]) + + def test_proposal_sources_api_lists_safe_raw_files(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "first-memory.md").write_text( + "# First Memory\n\nI prefer local-first agent memory.", + encoding="utf-8", + ) + fake_secret = "sk-" + ("a" * 24) + (raw / "secret-note.md").write_text( + f"# Secret Note\n\nToken {fake_secret} should not be loaded.", + encoding="utf-8", + ) + (raw / "big-note.md").write_text( + "# Big Note\n\n" + ("large source text\n" * 5000), + encoding="utf-8", + ) + (raw / ".hidden-note.md").write_text( + "# Hidden Note\n\nThis should not be listed or loaded directly.", + encoding="utf-8", + ) + (raw / "image.png").write_bytes(b"not listed") + reset_wiki(wiki) + + list_status, list_payload = run_handler("GET", "/api/proposal-sources") + load_status, load_payload = run_handler("GET", "/api/proposal-source?path=raw/first-memory.md") + secret_status, secret_payload = run_handler("GET", "/api/proposal-source?path=raw/secret-note.md") + big_status, big_payload = run_handler("GET", "/api/proposal-source?path=raw/big-note.md") + traversal_status, traversal_payload = run_handler("GET", "/api/proposal-source?path=../serve.py") + hidden_status, hidden_payload = run_handler("GET", "/api/proposal-source?path=raw/.hidden-note.md") + long_status, long_payload = run_handler("GET", f"/api/proposal-source?path={'x' * 1001}.md") + + self.assertEqual(list_status, 200) + self.assertEqual(list_payload["count"], 3) + sources = {item["path"]: item for item in list_payload["sources"]} + self.assertTrue(sources["raw/first-memory.md"]["loadable"]) + self.assertEqual(sources["raw/first-memory.md"]["action"], "load") + self.assertEqual(sources["raw/first-memory.md"]["action_label"], "Use in form") + self.assertFalse(sources["raw/secret-note.md"]["loadable"]) + self.assertEqual(sources["raw/secret-note.md"]["action"], "redact") + self.assertEqual(sources["raw/secret-note.md"]["action_label"], "Redact first") + self.assertEqual(sources["raw/secret-note.md"]["secret_warnings"], ["OpenAI API key"]) + self.assertNotIn(fake_secret, sources["raw/secret-note.md"]["snippet"]) + self.assertFalse(sources["raw/big-note.md"]["loadable"]) + self.assertTrue(sources["raw/big-note.md"]["truncated"]) + self.assertEqual(sources["raw/big-note.md"]["action"], "split") + self.assertEqual(sources["raw/big-note.md"]["action_label"], "Split file") + self.assertNotIn("raw/.hidden-note.md", sources) + self.assertEqual(load_status, 200) + self.assertIn("local-first agent memory", load_payload["text"]) + self.assertEqual(load_payload["source"], "raw/first-memory.md") + self.assertEqual(secret_status, 409) + self.assertIn("redact", secret_payload["error"]) + self.assertNotIn("text", secret_payload) + self.assertEqual(big_status, 413) + self.assertIn("too large", big_payload["error"]) + self.assertNotIn("text", big_payload) + self.assertEqual(traversal_status, 404) + self.assertFalse(traversal_payload["found"]) + self.assertEqual(hidden_status, 404) + self.assertFalse(hidden_payload["found"]) + self.assertEqual(long_status, 404) + self.assertFalse(long_payload["found"]) + + def test_proposal_sources_api_blocks_unreadable_raw_files(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "locked-note.md").write_text("# Locked note\n", encoding="utf-8") + reset_wiki(wiki) + original_open = Path.open + + def open_path(path: Path, *args: object, **kwargs: object): + if path.name == "locked-note.md": + raise OSError("permission denied") + return original_open(path, *args, **kwargs) + + with patch.object(Path, "open", open_path): + list_status, list_payload = run_handler("GET", "/api/proposal-sources") + load_status, load_payload = run_handler("GET", "/api/proposal-source?path=raw/locked-note.md") + + self.assertEqual(list_status, 200) + self.assertEqual(list_payload["count"], 1) + source = list_payload["sources"][0] + self.assertEqual(source["path"], "raw/locked-note.md") + self.assertFalse(source["loadable"]) + self.assertEqual(source["action"], "unavailable") + self.assertEqual(source["action_label"], "Fix access") + self.assertEqual(source["error"], "permission denied") + self.assertEqual(load_status, 423) + self.assertEqual(load_payload["action"], "unavailable") + self.assertIn("permission denied", load_payload["error"]) + self.assertNotIn("text", load_payload) + + def test_raw_source_api_creates_local_source_for_ingest(self): + wiki = self.make_wiki() + + status, payload = post_json( + "/api/raw-source", + { + "title": "Project Notes", + "filename": "Project Notes.md", + "text": "User wants a web path for adding Link sources.", + }, + ) + duplicate_status, duplicate_payload = post_json( + "/api/raw-source", + { + "title": "Project Notes", + "filename": "Project Notes.md", + "text": "# Project Notes\n\nSecond source.", + }, + ) + missing_header_status, missing_header = post_json( + "/api/raw-source", + {"title": "No Header", "text": "Should not save."}, + local_action=False, + ) + + self.assertEqual(status, 201) + self.assertTrue(payload["created"]) + self.assertEqual(payload["path"], "raw/project-notes.md") + self.assertEqual(payload["next_prompt"], "ingest raw/project-notes.md into Link") + self.assertTrue((wiki.parent / payload["path"]).exists()) + self.assertIn("# Project Notes", (wiki.parent / payload["path"]).read_text(encoding="utf-8")) + self.assertIn("add-raw-source", (wiki / "log.md").read_text(encoding="utf-8")) + self.assertEqual(duplicate_status, 201) + self.assertEqual(duplicate_payload["path"], "raw/project-notes-2.md") + self.assertEqual(missing_header_status, 403) + self.assertFalse(missing_header["created"]) + + def test_raw_source_api_blocks_secret_and_unsafe_names(self): + wiki = self.make_wiki() + + secret_status, secret_payload = post_json( + "/api/raw-source", + { + "title": "Secret", + "filename": "secret.md", + "text": "Do not save sk-" + ("a" * 25), + }, + ) + unsafe_status, unsafe_payload = post_json( + "/api/raw-source", + { + "title": "Unsafe", + "filename": "../unsafe.md", + "text": "Safe text.", + }, + ) + get_status, get_payload = run_handler("GET", "/api/raw-source") + + self.assertEqual(secret_status, 422) + self.assertFalse(secret_payload["created"]) + self.assertEqual(secret_payload["secret_warnings"], ["OpenAI API key"]) + self.assertFalse((wiki.parent / "raw" / "secret.md").exists()) + self.assertEqual(unsafe_status, 400) + self.assertIn("filename", unsafe_payload["error"]) + self.assertEqual(get_status, 405) + self.assertIn("POST", get_payload["error"]) + + def test_ingest_page_and_api_show_pending_raw(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "new-source.md").write_text("# New source\n", encoding="utf-8") + reset_wiki(wiki) + + api_status, payload = run_handler("GET", "/api/ingest-status") + html = serve._render_ingest() + + self.assertEqual(api_status, 200) + self.assertEqual(payload["pending_count"], 1) + self.assertEqual(payload["guidance"]["state"], "pending_raw") + self.assertEqual(payload["safety"]["status"], "clear") + self.assertEqual(payload["plan"]["batch"][0]["suggested_source_page"], "wiki/sources/new-source.md") + self.assertIn("Add Raw Source", html) + self.assertIn('data-raw-source-form', html) + self.assertIn('data-raw-source-status', html) + self.assertIn("Save to raw/", html) + self.assertIn("blocks secret-looking values", html) + self.assertIn("Next step", html) + self.assertIn("Raw safety: clear", html) + self.assertIn("No secret-looking values detected in raw sources.", html) + self.assertIn("Copy this into your agent chat", html) + self.assertIn('data-copy-text="ingest raw/new-source.md into Link"', html) + self.assertIn("Copy prompt", html) + self.assertIn("Copy command", html) + self.assertIn('data-copy-text="link validate"', html) + self.assertIn("ingest raw/new-source.md into Link", html) + self.assertIn("open memory proposals first", html) + self.assertIn("Ingest path", html) + self.assertIn("Optional memory", html) + self.assertIn("propose memories from raw/new-source.md", html) + self.assertIn("Post-ingest checks", html) + self.assertIn("run before reporting done", html) + self.assertIn("Ingest pending raw sources", html) + self.assertIn("wiki/sources/new-source.md", html) + self.assertIn('/propose?source=raw/new-source.md', html) + self.assertIn("Pending Raw Files", html) + + def test_ingest_page_shows_completion_for_represented_raw(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "represented-source.md").write_text("# Represented source\n", encoding="utf-8") + (wiki / "sources").mkdir(parents=True, exist_ok=True) + (wiki / "sources" / "represented-source.md").write_text( + "---\ntype: source\ntitle: Represented Source\n---\n\n" + "# Represented Source\n\n" + "## Raw Source\n\n`raw/represented-source.md`\n", + encoding="utf-8", + ) + reset_wiki(wiki) + + api_status, payload = run_handler("GET", "/api/ingest-status") + html = serve._render_ingest() + + self.assertEqual(api_status, 200) + self.assertEqual(payload["guidance"]["state"], "ready") + self.assertEqual(payload["completion"]["items"][0]["source_pages"][0]["title"], "Represented Source") + self.assertIn("Ingest completion", html) + self.assertIn("All 1 raw source(s) are represented", html) + self.assertIn("raw/represented-source.md", html) + self.assertIn('/page/represented-source', html) + self.assertIn("Represented Source", html) + self.assertIn('/propose?source=raw/represented-source.md', html) + self.assertIn('data-copy-text="propose memories from raw/represented-source.md"', html) + self.assertIn('data-copy-text="query Link for represented source"', html) + self.assertIn("brief me from Link before we continue", html) + + def test_ingest_page_marks_stale_represented_raw(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + raw_page = raw / "represented-source.md" + raw_page.write_text("# Represented source\n\nOriginal note.\n", encoding="utf-8") + (wiki / "sources").mkdir(parents=True, exist_ok=True) + (wiki / "sources" / "represented-source.md").write_text( + "---\ntype: source\ntitle: Represented Source\n---\n\n" + "# Represented Source\n\n" + "## Raw Source\n\n`raw/represented-source.md`\n", + encoding="utf-8", + ) + time.sleep(0.02) + raw_page.write_text("# Represented source\n\nUpdated note.\n", encoding="utf-8") + reset_wiki(wiki) + + api_status, payload = run_handler("GET", "/api/ingest-status") + html = serve._render_ingest() + + self.assertEqual(api_status, 200) + self.assertEqual(payload["guidance"]["state"], "stale_raw") + self.assertEqual(payload["stale_count"], 1) + self.assertIn("stale", html) + self.assertIn("raw changed after wiki source page", html) + self.assertIn("Refresh stale source pages", html) + self.assertIn("wiki/sources/represented-source.md", html) + self.assertIn('data-copy-text="re-ingest raw/represented-source.md into Link"', html) + + def test_ingest_page_blocks_secret_looking_raw(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "a-safe-note.md").write_text( + "# Safe note\n\nThis should stay available for memory proposals.\n", + encoding="utf-8", + ) + (raw / "secret-note.md").write_text( + "# Secret note\n\nDo not ingest sk-" + ("a" * 25) + "\n", + encoding="utf-8", + ) + reset_wiki(wiki) + + api_status, payload = run_handler("GET", "/api/ingest-status") + html = serve._render_ingest() + + self.assertEqual(api_status, 200) + self.assertEqual(payload["guidance"]["state"], "blocked_secrets") + self.assertIsNone(payload["guidance"]["agent_prompt"]) + self.assertEqual(payload["safety"]["status"], "blocked") + self.assertEqual(payload["safety"]["blocked_raw"], ["raw/secret-note.md"]) + self.assertIn("Raw safety: blocked", html) + self.assertIn('data-copy-text="edit raw/secret-note.md"', html) + self.assertIn("Copy next step", html) + self.assertIn("Redact raw sources before ingest", html) + self.assertIn("redact secret-looking values in raw/secret-note.md before ingest", html) + self.assertIn("secret warning: OpenAI API key", html) + self.assertIn("redact before ingest", html) + self.assertIn('/propose?source=raw/a-safe-note.md', html) + self.assertNotIn('/propose?source=raw/secret-note.md', html) + + def test_ingest_page_blocks_unreadable_raw(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "locked-note.md").write_text("# Locked note\n", encoding="utf-8") + reset_wiki(wiki) + + with patch( + "link_core.ingest.secret_file_scan", + return_value={"labels": [], "readable": False, "error": "permission denied"}, + ): + api_status, payload = run_handler("GET", "/api/ingest-status") + html = serve._render_ingest() + + self.assertEqual(api_status, 200) + self.assertEqual(payload["guidance"]["state"], "blocked_raw_access") + self.assertIsNone(payload["guidance"]["agent_prompt"]) + self.assertEqual(payload["safety"]["status"], "blocked") + self.assertEqual(payload["raw_scan_warning_count"], 1) + self.assertIn("Raw safety: blocked", html) + self.assertIn('data-copy-text="inspect raw/locked-note.md"', html) + self.assertIn("Inspect raw source access", html) + self.assertIn("fix raw source access for raw/locked-note.md before ingest", html) + self.assertIn("could not inspect: permission denied", html) + self.assertIn("fix access before ingest", html) + self.assertNotIn('/propose?source=raw/locked-note.md', html) + + def test_ingest_page_blocks_unreadable_source_pages(self): + wiki = self.make_wiki() + raw = wiki.parent / "raw" + raw.mkdir() + (raw / "broken-source.md").write_text("# Broken source\n", encoding="utf-8") + write_page( + wiki, + "sources/broken.md", + "---\ntype: source\ntitle: Broken\n---\n\n`raw/broken-source.md`\n", + ) + reset_wiki(wiki) + original_read_text = Path.read_text + + def read_text(path: Path, *args: object, **kwargs: object) -> str: + if path.name == "broken.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", read_text): + api_status, payload = run_handler("GET", "/api/ingest-status") + html = serve._render_ingest() + + self.assertEqual(api_status, 200) + self.assertEqual(payload["guidance"]["state"], "blocked_source_access") + self.assertIsNone(payload["guidance"]["agent_prompt"]) + self.assertEqual(payload["source_read_warning_count"], 1) + self.assertIn("Source Page Warnings", html) + self.assertIn("wiki/sources/broken.md", html) + self.assertIn("could not inspect: permission denied", html) + self.assertIn("Inspect source page access", html) + self.assertIn("fix source page access before ingest", html) + + def test_rebuild_backlinks_requires_json_post(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\n---\n# A\n\n[[b]]\n", + ) + write_page( + wiki, + "concepts/b.md", + "---\ntype: concept\ntitle: B\n---\n# B\n", + ) + backlinks_path = wiki / "_backlinks.json" + backlinks_path.write_text(json.dumps({"backlinks": {}, "forward": {}}), encoding="utf-8") + + get_status, get_payload = run_handler("GET", "/api/rebuild-backlinks") + bad_post_status, bad_post_payload = run_handler("POST", "/api/rebuild-backlinks") + missing_header_status, missing_header_payload = run_handler( + "POST", + "/api/rebuild-backlinks", + body=b"{}", + headers={"Content-Type": "application/json", "Content-Length": "2"}, + ) + post_status, post_payload = run_handler( + "POST", + "/api/rebuild-backlinks", + body=b"{}", + headers={ + "Content-Type": "application/json", + "Content-Length": "2", + "X-Link-Local-Action": "true", + }, + ) + bad_origin_status, bad_origin_payload = run_handler( + "POST", + "/api/rebuild-backlinks", + body=b"{}", + headers={ + "Content-Type": "application/json", + "Content-Length": "2", + "X-Link-Local-Action": "true", + "Origin": "https://attacker.example", + }, + ) + rebuilt = json.loads(backlinks_path.read_text(encoding="utf-8")) + + self.assertEqual(get_status, 405) + self.assertIn("use POST", get_payload["error"]) + self.assertEqual(bad_post_status, 403) + self.assertFalse(bad_post_payload["rebuilt"]) + self.assertIn("X-Link-Local-Action", bad_post_payload["error"]) + self.assertEqual(missing_header_status, 403) + self.assertFalse(missing_header_payload["rebuilt"]) + self.assertIn("X-Link-Local-Action", missing_header_payload["error"]) + self.assertEqual(post_status, 200) + self.assertTrue(post_payload["rebuilt"]) + self.assertEqual(bad_origin_status, 403) + self.assertFalse(bad_origin_payload["rebuilt"]) + self.assertIn("Origin/Referer", bad_origin_payload["error"]) + self.assertEqual(rebuilt["backlinks"], {"b": ["a"]}) + self.assertEqual(rebuilt["forward"], {"a": ["b"]}) + + def test_rebuild_backlinks_reports_read_errors(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/locked-page.md", + "---\ntype: concept\ntitle: Locked\n---\n# Locked\n\n[[link]]\n", + ) + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked-page.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + status, payload = run_handler( + "POST", + "/api/rebuild-backlinks", + body=b"{}", + headers={ + "Content-Type": "application/json", + "Content-Length": "2", + "X-Link-Local-Action": "true", + }, + ) + + self.assertEqual(status, 200) + self.assertFalse(payload["rebuilt"]) + self.assertIn("Could not rebuild backlinks", payload["error"]) + + def test_rebuild_backlinks_rejects_bad_json_after_local_header(self): + self.make_wiki() + + bad_post_status, bad_post_payload = run_handler( + "POST", + "/api/rebuild-backlinks", + headers={"X-Link-Local-Action": "true"}, + ) + + self.assertEqual(bad_post_status, 415) + self.assertFalse(bad_post_payload["rebuilt"]) + + def test_rebuild_index_requires_json_post(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\n---\n# A\n\n> **TLDR:** A page.\n", + ) + index_path = wiki / "index.md" + index_path.write_text("# Broken Index\n", encoding="utf-8") + + get_status, get_payload = run_handler("GET", "/api/rebuild-index") + bad_post_status, bad_post_payload = run_handler("POST", "/api/rebuild-index") + missing_header_status, missing_header_payload = run_handler( + "POST", + "/api/rebuild-index", + body=b"{}", + headers={"Content-Type": "application/json", "Content-Length": "2"}, + ) + post_status, post_payload = run_handler( + "POST", + "/api/rebuild-index", + body=b"{}", + headers={ + "Content-Type": "application/json", + "Content-Length": "2", + "X-Link-Local-Action": "true", + }, + ) + index_text = index_path.read_text(encoding="utf-8") + + self.assertEqual(get_status, 405) + self.assertIn("use POST", get_payload["error"]) + self.assertEqual(bad_post_status, 403) + self.assertFalse(bad_post_payload["rebuilt"]) + self.assertIn("X-Link-Local-Action", bad_post_payload["error"]) + self.assertEqual(missing_header_status, 403) + self.assertFalse(missing_header_payload["rebuilt"]) + self.assertIn("X-Link-Local-Action", missing_header_payload["error"]) + self.assertEqual(post_status, 200) + self.assertTrue(post_payload["rebuilt"]) + self.assertIn("[[a]]", index_text) + self.assertEqual(post_payload["category_counts"]["concepts"], 1) + + def test_rebuild_index_reports_read_errors(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/locked-page.md", + "---\ntype: concept\ntitle: Locked\n---\n# Locked\n", + ) + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked-page.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + status, payload = run_handler( + "POST", + "/api/rebuild-index", + body=b"{}", + headers={ + "Content-Type": "application/json", + "Content-Length": "2", + "X-Link-Local-Action": "true", + }, + ) + + self.assertEqual(status, 200) + self.assertFalse(payload["rebuilt"]) + self.assertIn("Could not rebuild index", payload["error"]) + + def test_rebuild_index_rejects_bad_json_after_local_header(self): + self.make_wiki() + + bad_post_status, bad_post_payload = run_handler( + "POST", + "/api/rebuild-index", + headers={"X-Link-Local-Action": "true"}, + ) + + self.assertEqual(bad_post_status, 415) + self.assertFalse(bad_post_payload["rebuilt"]) + + def test_validate_api_reports_wiki_gate_status(self): + wiki = self.make_wiki() + for dirname in ("sources", "concepts", "entities", "memories", "comparisons", "explorations"): + (wiki / dirname).mkdir(exist_ok=True) + write_page( + wiki, + "sources/example-source.md", + "---\ntype: source\ntitle: Example Source\n---\n\n" + "# Example Source\n\n" + "> **TLDR:** A valid source page.\n\n" + "## Summary\n\nUseful source.\n\n" + "## Raw Source\n\n`raw/example.md`\n", + ) + write_page( + wiki, + "concepts/example-concept.md", + "---\ntype: concept\ntitle: Example Concept\n---\n\n" + "# Example Concept\n\n" + "> **TLDR:** A valid concept page.\n\n" + "## Overview\n\nConcept cites [[example-source]].\n\n" + "## Sources\n\n- [[example-source]]\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(serve._build_backlinks()), encoding="utf-8") + + status, payload = run_handler("GET", "/api/validate") + + self.assertEqual(status, 200) + self.assertTrue(payload["passed"]) + self.assertEqual(payload["error_count"], 0) + + def test_validate_api_uses_422_for_failed_gate(self): + wiki = self.make_wiki() + for dirname in ("sources", "concepts", "entities", "memories", "comparisons", "explorations"): + (wiki / dirname).mkdir(exist_ok=True) + write_page( + wiki, + "concepts/bad-page.md", + "---\ntype: source\n---\n\n" + "# Bad Page\n\n" + "Mentions [[missing-page]].\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(serve._build_backlinks()), encoding="utf-8") + + status, payload = run_handler("GET", "/api/validate?strict=true") + codes = {finding["code"] for finding in payload["findings"]} + + self.assertEqual(status, 422) + self.assertFalse(payload["passed"]) + self.assertIn("type_directory_mismatch", codes) + self.assertIn("dead_wikilink", codes) + def test_graph_controls_exist_before_graph_script(self): wiki = self.make_wiki() write_page( @@ -231,12 +2106,34 @@ def test_graph_controls_exist_before_graph_script(self): self.assertLess(html.index('id="graph-reset"'), html.index("var resetButton =")) self.assertLess(html.index('id="graph-labels"'), html.index("var labelsButton =")) self.assertLess(html.index('id="graph-motion"'), html.index("var motionButton =")) + self.assertLess(html.index('id="graph-search"'), html.index("var searchInput =")) + self.assertLess(html.index('id="graph-category"'), html.index("var categoryFilter =")) + self.assertLess(html.index('id="graph-depth"'), html.index("var depthFilter =")) + self.assertLess(html.index('id="graph-inspector"'), html.index("var inspector =")) + self.assertLess(html.index('id="graph-focus"'), html.index("var inspectorFocus =")) self.assertIn('id="graph-status"', html) + self.assertIn("Focus neighborhood", html) + self.assertIn('id="graph-open"', html) self.assertIn('tabindex="0"', html) self.assertIn('role="img"', html) + self.assertIn('', html) + self.assertIn("function visibleNodes()", html) + self.assertIn("function visibleEdges()", html) + self.assertIn("function syncDepthControl()", html) + self.assertIn("depthValue = '1'", html) + self.assertIn("depthFilter.disabled = !selectedNode;", html) + self.assertIn("Select a node before filtering by neighborhood.", html) + self.assertIn("var LARGE_GRAPH_LIMIT = 350;", html) + self.assertIn("var LARGE_LABEL_LIMIT = 160;", html) + self.assertIn("var FAST_RENDER_NODE_LIMIT = 450;", html) + self.assertIn("var FAST_RENDER_EDGE_LIMIT = 1200;", html) + self.assertIn("function syncLabelsButton()", html) + self.assertIn("function graphNeedsFastRender(currentNodes, currentEdges)", html) + self.assertIn("function graphTooLargeForMotion()", html) + self.assertIn("searchInput.addEventListener('input'", html) def test_graph_empty_state_when_no_visible_pages(self): - wiki = self.make_wiki() + self.make_wiki() html = serve._render_graph() @@ -255,8 +2152,118 @@ def test_graph_drag_and_zoom_interactions_are_guarded(self): self.assertIn("return dx * dx + dy * dy > 9;", html) self.assertIn("pinned[dragging.id] = didDrag;", html) + self.assertIn("if (hit) selectNode(hit);", html) + self.assertIn("canvas.addEventListener('dblclick'", html) + self.assertIn("if (hit) openNode(hit);", html) self.assertIn("panX += after.x - before.x;", html) + def test_graph_motion_is_capped_for_large_visible_sets(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\n---\n# A\n", + ) + + html = serve._render_graph() + + self.assertIn("var simNodes = visibleNodes();", html) + self.assertIn("if (simNodes.length > LARGE_GRAPH_LIMIT) return;", html) + self.assertIn("if (graphTooLargeForMotion()) parts.push('motion capped');", html) + self.assertIn("motionButton.textContent = graphTooLargeForMotion() ? 'Motion capped'", html) + self.assertIn("var renderQueued = false;", html) + self.assertIn("function shouldRunContinuously()", html) + self.assertIn("function drawSoon()", html) + self.assertIn("var animateFlow = !motionPaused && !graphTooLargeForMotion();", html) + self.assertIn("if (activeEdge && animateFlow)", html) + self.assertIn("if (shouldRunContinuously()) startLoop();", html) + + def test_graph_uses_fast_canvas_rendering_for_large_visible_sets(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\n---\n# A\n", + ) + + html = serve._render_graph() + + self.assertIn("if (graphNeedsFastRender(currentNodes, currentEdges)) parts.push('fast render');", html) + self.assertIn("function strokeEdgeBatch(edgeList, strokeStyle, lineWidth)", html) + self.assertIn("if (fastRender) {", html) + self.assertIn("strokeEdgeBatch(currentEdges, 'rgba(88,166,255,0.07)', 0.45);", html) + self.assertIn("Radial glow stays off in large overview mode except for focused nodes.", html) + self.assertIn("ctx.fillStyle = fastRender ? color + '28' : color + '40';", html) + + def test_graph_caps_default_overview_for_huge_visible_sets(self): + wiki = self.make_wiki() + for index in range(700): + write_page( + wiki, + f"concepts/topic-{index}.md", + "---\ntype: concept\ntitle: Topic\n---\n" + f"# Topic {index}\n\n[[topic-{(index + 1) % 700}]]\n", + ) + reset_wiki(wiki) + + html = serve._render_graph() + + self.assertIn("var OVERVIEW_NODE_LIMIT = 650;", html) + self.assertIn("function capEligibleNodes(eligible)", html) + self.assertIn("lockedOverviewIds[n.id] = true;", html) + self.assertIn("fullGraphLoaded && lockedOverviewIds && !searchTerm", html) + self.assertIn("function markKeep(n)", html) + self.assertIn("var highSignalLimit = Math.floor(OVERVIEW_NODE_LIMIT * 0.65);", html) + self.assertIn(".slice(0, highSignalLimit)", html) + self.assertIn("var sampled = eligible[Math.floor((i + 0.5) * eligible.length / Math.max(sampleLimit, 1))];", html) + self.assertIn("while (keepCount < OVERVIEW_NODE_LIMIT && fillIndex < eligible.length)", html) + self.assertIn("function reseedVisiblePositions()", html) + self.assertIn("if (searchMatches(n)) markKeep(n);", html) + self.assertIn("invalidateFilters();\n if (searchTerm && !fullGraphLoaded) loadFullGraph();", html) + self.assertIn("cachedSearchMatches = nodes.filter(searchMatches).length;", html) + self.assertIn("matches > SEARCH_LABEL_LIMIT", html) + self.assertIn("parts.push('data loaded');", html) + self.assertIn("parts.push('overview capped');", html) + + def test_graph_uses_bounded_initial_payload_for_large_wikis(self): + wiki = self.make_wiki() + for index in range(920): + write_page( + wiki, + f"concepts/topic-{index}.md", + "---\ntype: concept\ntitle: Topic\n---\n" + f"# Topic {index}\n\n[[topic-{(index + 1) % 920}]]\n", + ) + reset_wiki(wiki) + + html = serve._render_graph() + + self.assertIn('var initialGraphMode = "summary";', html) + self.assertIn("var totalNodeCount = 920;", html) + self.assertIn("250/920 nodes", html) + self.assertIn("fast overview", html) + self.assertIn("Load graph data (920 nodes)", html) + self.assertIn("var loadFullButton = document.getElementById('graph-load-full');", html) + self.assertIn("function loadFullGraph()", html) + self.assertIn("fetch('/api/graph')", html) + self.assertIn("Graph data loaded", html) + + def test_graph_labels_are_sparse_for_large_visible_sets(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\n---\n# A\n", + ) + + html = serve._render_graph() + + self.assertIn("function graphTooLargeForDefaultLabels()", html) + self.assertIn("if (graphTooLargeForDefaultLabels() && !showAllLabels) parts.push('labels sparse');", html) + self.assertIn("labelsButton.textContent = showAllLabels ? 'Hide labels'", html) + self.assertIn("var largeLabelSet = currentNodes.length > LARGE_LABEL_LIMIT;", html) + self.assertIn("var defaultSparseLabel = !largeLabelSet", html) + def test_graph_script_embeds_titles_safely(self): wiki = self.make_wiki() write_page( @@ -273,9 +2280,25 @@ def test_graph_script_embeds_titles_safely(self): def test_search_limit_validation(self): self.assertEqual(serve._parse_search_limit("3"), (3, None)) self.assertEqual(serve._parse_search_limit("500"), (50, None)) + self.assertEqual(serve._parse_search_limit(""), (20, None)) self.assertEqual(serve._parse_search_limit("bad"), (None, "limit must be an integer")) self.assertEqual(serve._parse_search_limit("0"), (None, "limit must be at least 1")) + def test_query_text_bounds_and_falls_back_across_names(self): + self.assertEqual(serve._query_text({"q": [" agent memory "]}, "q"), "agent memory") + self.assertEqual(serve._query_text({"q": [""], "query": ["fallback"]}, "q", "query"), "fallback") + self.assertEqual(serve._query_text({"q": ["x" * 600]}, "q"), "x" * serve.MAX_QUERY_TEXT) + self.assertEqual(serve._query_text({"project": ["x" * 100]}, "project", max_len=80), "x" * 80) + + def test_search_api_bounds_query_text(self): + self.make_wiki() + long_query = "x" * 600 + + status, payload = run_handler("GET", f"/api/search?q={long_query}") + + self.assertEqual(status, 200) + self.assertEqual(payload["query"], "x" * serve.MAX_QUERY_TEXT) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_status_core.py b/tests/test_status_core.py new file mode 100644 index 0000000..9ec65f9 --- /dev/null +++ b/tests/test_status_core.py @@ -0,0 +1,172 @@ +import json +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core import status as status_core # noqa: E402 +from link_core.status import link_status # noqa: E402 +from link_core.schema import write_schema # noqa: E402 +from link_core.wiki import build_backlinks, build_wiki_cache # noqa: E402 + + +def write_page(wiki: Path, rel: str, text: str) -> None: + path = wiki / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +class StatusCoreTests(unittest.TestCase): + def make_wiki(self) -> Path: + root = Path(tempfile.mkdtemp(prefix="link-status-core-")) + wiki = root / "wiki" + for dirname in ("sources", "concepts", "entities", "memories", "comparisons", "explorations"): + (wiki / dirname).mkdir(parents=True, exist_ok=True) + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + write_page( + wiki, + "memories/prefer-local-memory.md", + "---\n" + "type: memory\n" + "title: Prefer local memory\n" + "memory_type: preference\n" + "scope: user\n" + "status: active\n" + "date_captured: \"2026-05-05T00:00:00Z\"\n" + "source: unit-test\n" + "review_status: reviewed\n" + "---\n\n" + "# Prefer local memory\n\n" + "> **TLDR:** User prefers local memory.\n\n" + "## Memory\n\nUser prefers local memory.\n\n" + "## Source\n\nunit-test\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki, body_only=False)), encoding="utf-8") + write_schema(wiki) + return wiki + + def test_link_status_reports_ready_wiki(self): + wiki = self.make_wiki() + + payload = link_status(wiki, version="9.9.9", cache=build_wiki_cache(wiki), include_validation=True) + + self.assertTrue(payload["ready"]) + self.assertEqual(payload["version"], "9.9.9") + self.assertEqual(payload["page_count"], 3) + self.assertEqual(payload["content_page_count"], 1) + self.assertEqual(payload["memory_count"], 1) + self.assertEqual(payload["active_memory_count"], 1) + self.assertIn(payload["search_backend"], {"sqlite-fts", "token-index"}) + self.assertEqual(payload["schema"]["status"], "current") + self.assertTrue(payload["validation"]["passed"]) + self.assertEqual(payload["next_actions"][0]["tool"], "query_link") + + def test_link_status_reports_missing_structure(self): + wiki = Path(tempfile.mkdtemp(prefix="link-status-core-")) / "wiki" + + payload = link_status(wiki, include_validation=True) + + self.assertFalse(payload["ready"]) + self.assertIn("wiki", payload["missing"]) + self.assertEqual(payload["schema"]["status"], "missing") + self.assertEqual(payload["page_count"], 0) + self.assertEqual(payload["search_backend"], "unavailable") + self.assertEqual(payload["next_actions"][0]["tool"], "doctor") + + def test_link_status_guides_empty_initialized_wiki_to_ingest(self): + root = Path(tempfile.mkdtemp(prefix="link-status-core-")) + wiki = root / "wiki" + for dirname in ("sources", "concepts", "entities", "memories", "comparisons", "explorations"): + (wiki / dirname).mkdir(parents=True, exist_ok=True) + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki, body_only=False)), encoding="utf-8") + write_schema(wiki) + + payload = link_status(wiki) + + self.assertTrue(payload["ready"]) + self.assertEqual(payload["page_count"], 2) + self.assertEqual(payload["content_page_count"], 0) + self.assertEqual(payload["next_actions"][0]["tool"], "ingest_status") + self.assertEqual(payload["next_actions"][1]["tool"], "starter_prompts") + + def test_link_status_surfaces_cache_and_memory_warnings(self): + wiki = self.make_wiki() + + with ( + patch.object(status_core, "build_wiki_cache", side_effect=RuntimeError("cache failed")), + patch.object(status_core, "memory_records", side_effect=RuntimeError("memory failed")), + ): + payload = link_status(wiki) + + self.assertFalse(payload["ready"]) + self.assertEqual(payload["page_count"], 0) + self.assertEqual(payload["memory_count"], 0) + self.assertEqual( + [warning["code"] for warning in payload["warnings"]], + ["cache_unavailable", "memory_records_unavailable"], + ) + self.assertEqual(payload["warnings"][0]["detail"], "cache failed") + self.assertEqual(payload["warnings"][1]["detail"], "memory failed") + + def test_link_status_surfaces_cache_read_warnings(self): + wiki = self.make_wiki() + cache = build_wiki_cache(wiki) + cache["read_warning_count"] = 1 + cache["read_warnings"] = [{"page": "wiki/concepts/locked.md", "error": "permission denied"}] + + payload = link_status(wiki, cache=cache) + + self.assertFalse(payload["ready"]) + self.assertEqual(payload["warnings"][0]["code"], "cache_read_warnings") + + def test_link_status_points_validation_shape_errors_to_doctor_fix(self): + wiki = self.make_wiki() + write_page( + wiki, + "sources/bad-source.md", + "---\ntype: source\ntitle: Bad Source\n---\n\n" + "# Bad Source\n\n" + "Captured from raw/bad-source.md.\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki, body_only=False)), encoding="utf-8") + + payload = link_status(wiki, include_validation=True) + + self.assertFalse(payload["ready"]) + self.assertIn("missing_required_section", payload["validation"]["error_codes"]) + self.assertEqual(payload["next_actions"][0]["tool"], "doctor") + self.assertEqual(payload["next_actions"][0]["arguments"], {"fix": True}) + self.assertEqual(payload["next_actions"][1]["tool"], "validate_wiki") + self.assertNotIn("rebuild_backlinks", [action["tool"] for action in payload["next_actions"]]) + + def test_link_status_points_stale_backlinks_to_rebuild(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/linking.md", + "---\ntype: concept\ntitle: Linking\n---\n\n" + "# Linking\n\n" + "> **TLDR:** Valid linked concept.\n\n" + "## Overview\n\nLinks to [[prefer-local-memory]].\n\n" + "## Sources\n\n- [[prefer-local-memory]]\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps({"backlinks": {}, "forward": {}}), encoding="utf-8") + + payload = link_status(wiki, include_validation=True) + + self.assertFalse(payload["ready"]) + self.assertIn("stale_backlinks", payload["validation"]["error_codes"]) + self.assertEqual(payload["next_actions"][0]["tool"], "rebuild_backlinks") + self.assertEqual(payload["next_actions"][1]["tool"], "validate_wiki") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_tool_contract.py b/tests/test_tool_contract.py new file mode 100644 index 0000000..12cada8 --- /dev/null +++ b/tests/test_tool_contract.py @@ -0,0 +1,50 @@ +import importlib.util +import shutil +import tempfile +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SPEC = importlib.util.spec_from_file_location("check_tool_contract", ROOT / "scripts/check_tool_contract.py") +contract = importlib.util.module_from_spec(SPEC) +assert SPEC.loader is not None +SPEC.loader.exec_module(contract) + + +class ToolContractTests(unittest.TestCase): + def test_cli_contract_matches_expected_commands(self): + self.assertEqual(contract.cli_commands(), contract.EXPECTED_CLI_COMMANDS) + + def test_mcp_contract_matches_expected_tools(self): + self.assertEqual(contract.mcp_tools(), contract.EXPECTED_MCP_TOOLS) + + def test_repo_tool_contract_passes(self): + self.assertEqual(contract.check_tool_contract(), []) + + def test_contract_reports_missing_mcp_docs(self): + tmp = Path(tempfile.mkdtemp(prefix="link-tool-contract-")) + try: + (tmp / "mcp_package/link_mcp").mkdir(parents=True) + (tmp / "mcp_package").mkdir(exist_ok=True) + shutil.copy2(ROOT / "link.py", tmp / "link.py") + shutil.copy2(ROOT / "mcp_package/link_mcp/server.py", tmp / "mcp_package/link_mcp/server.py") + + (tmp / "docs").mkdir() + cli_reference = "\n".join(f"`link {command}`" for command in sorted(contract.DOCS_CLI_COMMANDS)) + mcp_reference = "\n".join( + tool for tool in sorted(contract.EXPECTED_MCP_TOOLS) if tool != "query_link" + ) + (tmp / "docs/cli.html").write_text(cli_reference, encoding="utf-8") + (tmp / "docs/mcp.html").write_text(mcp_reference, encoding="utf-8") + (tmp / "mcp_package/README.md").write_text(mcp_reference, encoding="utf-8") + + findings = contract.check_tool_contract(tmp) + finally: + shutil.rmtree(tmp) + + self.assertTrue(any("query_link" in finding for finding in findings)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_validation_core.py b/tests/test_validation_core.py new file mode 100644 index 0000000..bae8dad --- /dev/null +++ b/tests/test_validation_core.py @@ -0,0 +1,128 @@ +import json +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.validation import validate_wiki # noqa: E402 +from link_core.wiki import build_backlinks # noqa: E402 + + +def write_page(wiki: Path, rel: str, text: str) -> None: + path = wiki / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +class ValidationCoreTests(unittest.TestCase): + def make_wiki(self) -> Path: + root = Path(tempfile.mkdtemp(prefix="link-validation-core-")) + wiki = root / "wiki" + for dirname in ("sources", "concepts", "entities", "memories", "comparisons", "explorations"): + (wiki / dirname).mkdir(parents=True, exist_ok=True) + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + return wiki + + def test_validate_wiki_accepts_well_formed_pages(self): + wiki = self.make_wiki() + write_page( + wiki, + "sources/example-source.md", + "---\ntype: source\ntitle: Example Source\n---\n\n" + "# Example Source\n\n" + "> **TLDR:** A valid source page.\n\n" + "## Summary\n\nUseful source.\n\n" + "## Raw Source\n\n`raw/example.md`\n", + ) + write_page( + wiki, + "concepts/example-concept.md", + "---\ntype: concept\ntitle: Example Concept\n---\n\n" + "# Example Concept\n\n" + "> **TLDR:** A valid concept page.\n\n" + "## Overview\n\nConcept overview cites [[example-source]].\n\n" + "## Sources\n\n- [[example-source]]\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki, body_only=False)), encoding="utf-8") + + read_counts: dict[str, int] = {} + original_read_text = Path.read_text + resolved_wiki = wiki.resolve() + + def counting_read_text(path: Path, *args, **kwargs): + if path.suffix == ".md": + rel = path.relative_to(resolved_wiki).as_posix() + read_counts[rel] = read_counts.get(rel, 0) + 1 + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", counting_read_text): + payload = validate_wiki(wiki) + + self.assertTrue(payload["passed"]) + self.assertEqual(payload["error_count"], 0) + self.assertEqual( + read_counts, + { + "concepts/example-concept.md": 1, + "index.md": 1, + "log.md": 1, + "sources/example-source.md": 1, + }, + ) + + def test_validate_wiki_rejects_malformed_agent_pages(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/bad-page.md", + "---\ntype: source\n---\n\n" + "# Bad Page\n\n" + "Mentions [[missing-page]].\n", + ) + (wiki / "_backlinks.json").write_text("{}", encoding="utf-8") + + payload = validate_wiki(wiki) + codes = {finding["code"] for finding in payload["findings"]} + + self.assertFalse(payload["passed"]) + self.assertIn("type_directory_mismatch", codes) + self.assertIn("missing_frontmatter_field", codes) + self.assertIn("missing_required_section", codes) + self.assertIn("dead_wikilink", codes) + self.assertIn("stale_backlinks", codes) + + def test_validate_wiki_reports_unreadable_pages(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/locked-page.md", + "---\ntype: concept\ntitle: Locked Page\n---\n\n" + "# Locked Page\n\n" + "> **TLDR:** A locked page.\n\n" + "## Overview\n\nCannot read this.\n\n" + "## Sources\n\n- [[locked-page]]\n", + ) + (wiki / "_backlinks.json").write_text(json.dumps(build_backlinks(wiki, body_only=False)), encoding="utf-8") + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked-page.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + payload = validate_wiki(wiki) + + self.assertFalse(payload["passed"]) + self.assertIn("unreadable_page", {finding["code"] for finding in payload["findings"]}) + self.assertNotIn("stale_backlinks", {finding["code"] for finding in payload["findings"]}) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_web_graph_core.py b/tests/test_web_graph_core.py new file mode 100644 index 0000000..9e9ac86 --- /dev/null +++ b/tests/test_web_graph_core.py @@ -0,0 +1,77 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.web_graph import ( # noqa: E402 + GRAPH_CATEGORY_COLORS, + graph_category_options, + graph_initial_payload, + graph_legend_items, + graph_needs_bounded_overview, +) + + +class WebGraphCoreTests(unittest.TestCase): + def test_graph_initial_payload_uses_full_graph_under_limit(self): + graph = { + "nodes": [ + {"id": "root", "category": "root"}, + {"id": "a", "title": "A", "category": "concepts"}, + {"id": "b", "title": "B", "category": "sources"}, + ], + "edges": [ + {"source": "a", "target": "b"}, + {"source": "root", "target": "a"}, + ], + } + + payload = graph_initial_payload(graph, full_node_limit=10) + + self.assertEqual(payload["graph_mode"], "full") + self.assertEqual(payload["node_count"], 2) + self.assertEqual(payload["edge_count"], 1) + self.assertEqual(payload["total_node_count"], 2) + self.assertEqual(payload["total_edge_count"], 1) + + def test_graph_initial_payload_uses_summary_for_large_graph(self): + full_graph = { + "nodes": [{"id": f"n-{index}", "category": "concepts"} for index in range(5)], + "edges": [{"source": "n-0", "target": "n-1"}], + } + summary_graph = { + "nodes": [{"id": "n-0", "category": "concepts"}, {"id": "n-1", "category": "concepts"}], + "edges": [{"source": "n-0", "target": "n-1"}], + } + + payload = graph_initial_payload(full_graph, summary_graph=summary_graph, full_node_limit=2) + + self.assertEqual(payload["graph_mode"], "summary") + self.assertEqual(payload["node_count"], 2) + self.assertEqual(payload["edge_count"], 1) + self.assertEqual(payload["total_node_count"], 5) + self.assertIn("fast overview", payload["graph_note"]) + self.assertTrue(graph_needs_bounded_overview(full_graph, full_node_limit=2)) + + def test_graph_category_options_and_legend_escape_content(self): + nodes = [ + {"id": "a", "category": "concepts"}, + {"id": "b", "category": 'weird"type'}, + {"id": "root", "category": "root"}, + ] + + options = graph_category_options(nodes) + legend = graph_legend_items({**GRAPH_CATEGORY_COLORS, "": '"red"'}) + + self.assertIn('', options) + self.assertIn('', options) + self.assertNotIn(">root<", options) + self.assertIn("<bad>", legend) + self.assertIn(""red"", legend) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_web_http_core.py b/tests/test_web_http_core.py new file mode 100644 index 0000000..b36a57d --- /dev/null +++ b/tests/test_web_http_core.py @@ -0,0 +1,143 @@ +import sys +import tempfile +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.web_http import ( # noqa: E402 + BROWSER_SOURCE_LOCAL_ONLY, + CONTENT_SECURITY_POLICY, + HOST_HEADER_LOCAL_ONLY, + HOST_HEADER_REQUIRED, + LocalRateLimiter, + PERMISSIONS_POLICY, + SVG_CONTENT_SECURITY_POLICY, + is_allowed_static_file, + local_no_store_headers, + local_security_headers, + parse_bounded_int, + resolve_raw_static_path, + safe_resolve, + validate_local_browser_source_headers, + validate_local_host_header, +) + + +class WebHttpCoreTests(unittest.TestCase): + def test_local_security_headers_include_browser_isolation(self): + headers = dict(local_security_headers("1")) + + self.assertEqual(headers["X-Link-API-Version"], "1") + self.assertEqual(headers["X-Content-Type-Options"], "nosniff") + self.assertEqual(headers["Cross-Origin-Opener-Policy"], "same-origin") + self.assertEqual(headers["Permissions-Policy"], PERMISSIONS_POLICY) + self.assertEqual(headers["Content-Security-Policy"], CONTENT_SECURITY_POLICY) + self.assertIn("frame-ancestors 'none'", CONTENT_SECURITY_POLICY) + self.assertIn("camera=()", PERMISSIONS_POLICY) + self.assertNotIn("fullscreen=()", PERMISSIONS_POLICY) + + def test_local_security_headers_can_use_strict_svg_policy(self): + headers = dict(local_security_headers("2", SVG_CONTENT_SECURITY_POLICY)) + + self.assertEqual(headers["X-Link-API-Version"], "2") + self.assertEqual(headers["Content-Security-Policy"], SVG_CONTENT_SECURITY_POLICY) + self.assertIn("script-src 'none'", SVG_CONTENT_SECURITY_POLICY) + + def test_local_no_store_headers_include_legacy_cache_guards(self): + headers = dict(local_no_store_headers()) + + self.assertEqual(headers["Cache-Control"], "no-store") + self.assertEqual(headers["Pragma"], "no-cache") + self.assertEqual(headers["Expires"], "0") + + def test_local_rate_limiter_reports_retry_after_window(self): + now = 100.0 + + def clock() -> float: + return now + + limiter = LocalRateLimiter(max_events=2, window_seconds=10, clock=clock) + + self.assertEqual(limiter.check("127.0.0.1"), (True, 0)) + self.assertEqual(limiter.check("127.0.0.1"), (True, 0)) + self.assertEqual(limiter.check("127.0.0.1"), (False, 10)) + now = 111.0 + self.assertEqual(limiter.check("127.0.0.1"), (True, 0)) + + def test_parse_bounded_int_clamps_and_reports_errors(self): + self.assertEqual(parse_bounded_int("", "limit", 40, 1, 100), (40, None)) + self.assertEqual(parse_bounded_int("250", "limit", 40, 1, 100), (100, None)) + self.assertEqual(parse_bounded_int("0", "limit", 40, 1, 100), (None, "limit must be at least 1")) + self.assertEqual(parse_bounded_int("bad", "limit", 40, 1, 100), (None, "limit must be an integer")) + + def test_validate_local_host_header_accepts_local_hosts_with_ports(self): + for host in ("127.0.0.1", "127.0.0.1:3000", "localhost", "localhost:3000"): + self.assertEqual(validate_local_host_header(host), (True, None)) + + def test_validate_local_host_header_rejects_missing_or_remote_hosts(self): + self.assertEqual(validate_local_host_header(""), (False, HOST_HEADER_REQUIRED)) + self.assertEqual(validate_local_host_header("attacker.example"), (False, HOST_HEADER_LOCAL_ONLY)) + self.assertEqual(validate_local_host_header("localhost.evil.test"), (False, HOST_HEADER_LOCAL_ONLY)) + self.assertEqual(validate_local_host_header("localhost:bad"), (False, HOST_HEADER_LOCAL_ONLY)) + self.assertEqual(validate_local_host_header("localhost attacker"), (False, HOST_HEADER_LOCAL_ONLY)) + + def test_validate_local_browser_source_headers_accepts_local_or_missing_sources(self): + self.assertEqual(validate_local_browser_source_headers("", ""), (True, None)) + self.assertEqual(validate_local_browser_source_headers("http://localhost:3000", ""), (True, None)) + self.assertEqual(validate_local_browser_source_headers("", "http://127.0.0.1:3000/graph"), (True, None)) + + def test_validate_local_browser_source_headers_rejects_remote_sources(self): + self.assertEqual( + validate_local_browser_source_headers("https://attacker.example", ""), + (False, BROWSER_SOURCE_LOCAL_ONLY), + ) + self.assertEqual( + validate_local_browser_source_headers("", "http://localhost.evil.test/page"), + (False, BROWSER_SOURCE_LOCAL_ONLY), + ) + self.assertEqual(validate_local_browser_source_headers("null", ""), (False, BROWSER_SOURCE_LOCAL_ONLY)) + + def test_raw_static_resolver_stays_under_raw_directory(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + raw = root / "raw" + raw.mkdir() + asset = raw / "asset.png" + asset.write_bytes(b"png") + allowed_types = {".png": "image/png"} + + self.assertEqual( + resolve_raw_static_path(raw, "asset.png", allowed_types), + (asset.resolve(), "image/png"), + ) + self.assertEqual(resolve_raw_static_path(raw, "../logo.png", allowed_types), (None, None)) + self.assertEqual(resolve_raw_static_path(raw, "%2e%2e/logo.png", allowed_types), (None, None)) + self.assertEqual(resolve_raw_static_path(raw, "asset.txt", allowed_types), (None, None)) + + def test_static_file_allowlist_allows_root_assets_and_raw_media_only(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + raw = root / "raw" + raw.mkdir() + logo = root / "logo.svg" + image = raw / "image.png" + note = raw / "note.txt" + private = root / "serve.py" + for path in (logo, image, note, private): + path.write_text("x", encoding="utf-8") + + allowed_types = {".png": "image/png"} + self.assertTrue(is_allowed_static_file(logo, raw, [logo], allowed_types)) + self.assertTrue(is_allowed_static_file(image, raw, [logo], allowed_types)) + self.assertFalse(is_allowed_static_file(note, raw, [logo], allowed_types)) + self.assertFalse(is_allowed_static_file(private, raw, [logo], allowed_types)) + + def test_safe_resolve_handles_malformed_paths(self): + self.assertIsNone(safe_resolve(Path("bad\0path"))) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_web_layout_core.py b/tests/test_web_layout_core.py new file mode 100644 index 0000000..da741fc --- /dev/null +++ b/tests/test_web_layout_core.py @@ -0,0 +1,43 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.web_layout import render_footer_html, render_header_html, render_layout # noqa: E402 + + +class WebLayoutCoreTests(unittest.TestCase): + def test_header_has_primary_navigation_and_search(self): + html = render_header_html() + + self.assertIn('ingest', html) + self.assertIn('brief', html) + self.assertIn('propose', html) + self.assertIn('graph', html) + self.assertIn('id="search-input"', html) + self.assertIn("data-theme-toggle", html) + + def test_footer_points_to_github(self): + html = render_footer_html() + + self.assertIn("local agent memory", html) + self.assertIn("https://github.com/gowtham0992/link", html) + + def test_layout_escapes_title_and_page_class(self): + html = render_layout('', "<main>Body</main>", page_class='graph" onclick="bad') + + self.assertIn("<title><Title> — Link", html) + self.assertIn('class="graph" onclick="bad"', html) + self.assertIn("
    Body
    ", html) + self.assertIn("document.activeElement.id === 'search-input'", html) + self.assertIn("window.location.href = '/search?q=' + encodeURIComponent(q);", html) + self.assertIn("localStorage.getItem('link-theme')", html) + self.assertIn("navigator.clipboard.writeText", html) + self.assertIn("/api/raw-source", html) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_web_memory_core.py b/tests/test_web_memory_core.py new file mode 100644 index 0000000..7ec265d --- /dev/null +++ b/tests/test_web_memory_core.py @@ -0,0 +1,105 @@ +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.web_memory import ( # noqa: E402 + render_capture_card, + render_memory_action_button, + render_memory_card, + render_memory_next_actions, + render_memory_section, +) + + +def page_href(name: str) -> str: + return f"/page/{name}" + + +class WebMemoryCoreTests(unittest.TestCase): + def test_memory_card_escapes_content_and_renders_actions(self): + record = { + "name": "local-memory", + "title": "", + "tldr": "Use memory.", + "memory_type": "preference", + "scope": "user", + "status": "active", + "actions": [{ + "label": "Review", + "kind": "review", + "command": "link review-memory local-memory", + "arguments": {"identifier": "local-memory"}, + }], + } + + html = render_memory_card(record, page_href=page_href) + + self.assertIn('<Local Memory>', html) + self.assertIn("Use <local> memory.", html) + self.assertIn('data-memory-action="review"', html) + self.assertNotIn("", html) + + def test_memory_section_uses_action_hints_when_record_has_no_actions(self): + record = {"name": "agent-memory", "title": "Agent Memory"} + + html = render_memory_section( + "Memories", + [record], + "No memories.", + page_href=page_href, + action_hints=lambda _record: [{ + "label": "Archive", + "kind": "archive", + "command": "link archive-memory agent-memory", + "arguments": {"identifier": "agent-memory"}, + }], + href="/inbox", + ) + + self.assertIn('view all', html) + self.assertIn('data-memory-action="archive"', html) + self.assertIn("link archive-memory agent-memory", html) + + def test_capture_card_escapes_warnings_and_commands(self): + html = render_capture_card({ + "title": "Raw ", + "path": "raw/memory-captures/session.md", + "secret_warnings": ["OpenAI "], + "commands": { + "accept": "accept-capture", + "redact": "redact-capture", + }, + }) + + self.assertIn("Raw <Capture>", html) + self.assertIn("OpenAI <key>", html) + self.assertIn("accept-capture", html) + self.assertNotIn("Raw ", html) + + def test_memory_action_button_requires_supported_kind_and_identifier(self): + self.assertIn("Mark reviewed", render_memory_action_button({ + "kind": "review", + "arguments": {"identifier": "one"}, + })) + self.assertEqual("", render_memory_action_button({"kind": "forget", "arguments": {"identifier": "one"}})) + self.assertEqual("", render_memory_action_button({"kind": "review", "arguments": {}})) + + def test_next_actions_render_commands(self): + html = render_memory_next_actions([{ + "label": "Review", + "detail": "Open inbox.", + "command": "link memory-inbox", + "href": "/inbox", + }]) + + self.assertIn('Review', html) + self.assertIn("Open inbox.", html) + self.assertIn("link memory-inbox", html) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_wiki_core.py b/tests/test_wiki_core.py new file mode 100644 index 0000000..04c26de --- /dev/null +++ b/tests/test_wiki_core.py @@ -0,0 +1,472 @@ +import json +import os +import sys +import tempfile +import time +import unittest +from pathlib import Path +from unittest.mock import patch + + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "mcp_package")) + +from link_core.wiki import ( # noqa: E402 + build_index_markdown, + build_backlinks, + build_wiki_cache, + context_for_topic, + graph_data, + graph_summary, + list_pages, + load_backlinks_index, + page_link_summary, + rebuild_index, + search_pages, + wiki_mtime, +) + + +def write_page(wiki: Path, rel: str, text: str) -> Path: + path = wiki / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + return path + + +class WikiCoreTests(unittest.TestCase): + def make_wiki(self) -> Path: + root = Path(tempfile.mkdtemp(prefix="link-wiki-core-")) + wiki = root / "wiki" + wiki.mkdir() + write_page(wiki, "index.md", "# Index\n") + write_page(wiki, "log.md", "# Log\n") + return wiki + + def test_cache_search_context_and_graph(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + ( + "---\n" + "type: concept\n" + "title: Agent Memory\n" + "aliases: [durable context]\n" + "tags: [agents, memory]\n" + "maturity: growing\n" + "---\n" + "# Agent Memory\n\n" + "> **TLDR:** Durable memory for agents.\n\n" + "Links to [[link]] and [[retrieval]].\n" + ), + ) + write_page( + wiki, + "entities/link.md", + "---\ntype: entity\ntitle: Link\n---\n# Link\n\nLink references [[agent-memory]].\n", + ) + write_page( + wiki, + "concepts/retrieval.md", + "---\ntype: concept\ntitle: Retrieval\n---\n# Retrieval\n", + ) + (wiki / "_backlinks.json").write_text( + json.dumps({"backlinks": {"agent-memory": ["link"]}, "forward": {"link": ["agent-memory"]}}), + encoding="utf-8", + ) + + cache = build_wiki_cache(wiki) + search = search_pages("durable", cache) + context_read_counts: dict[str, int] = {} + original_read_text = Path.read_text + + def counting_read_text(path: Path, *args, **kwargs): + if path.suffix == ".md": + context_read_counts[path.stem] = context_read_counts.get(path.stem, 0) + 1 + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", counting_read_text): + context = context_for_topic(wiki, "agent memory", cache) + graph = graph_data(cache) + + self.assertEqual(search[0]["name"], "agent-memory") + self.assertIn("date_published", search[0]) + self.assertIn(cache["search_backend"], {"sqlite-fts", "token-index"}) + if cache["search_backend"] == "sqlite-fts": + self.assertIsNotNone(cache["fts_index"]) + self.assertIn("durable", cache["meta_words_index"]["agent-memory"]) + self.assertIn("references", cache["text_words_index"]["link"]) + self.assertIn("Links to [[link]]", cache["body_index"]["agent-memory"]) + self.assertEqual(cache["meta_index"]["agent-memory"]["title"], "Agent Memory") + self.assertEqual(context["primary"], "agent-memory") + self.assertEqual(context["inbound_count"], 1) + self.assertEqual(context["forward_count"], 2) + self.assertEqual([page["name"] for page in context["pages"]], ["agent-memory", "link", "retrieval"]) + self.assertEqual(context_read_counts, {}) + self.assertEqual(cache["forward_links_index"]["agent-memory"], ["link", "retrieval"]) + self.assertIn({"source": "agent-memory", "target": "link"}, graph["edges"]) + self.assertIn({"source": "agent-memory", "target": "retrieval"}, graph["edges"]) + + def test_build_wiki_cache_reports_read_warnings(self): + wiki = self.make_wiki() + write_page(wiki, "concepts/readable.md", "---\ntype: concept\ntitle: Readable\n---\n# Readable\n") + write_page(wiki, "concepts/locked.md", "---\ntype: concept\ntitle: Locked\n---\n# Locked\n") + original_read_text = Path.read_text + + def flaky_read_text(path: Path, *args, **kwargs): + if path.name == "locked.md": + raise OSError("permission denied") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", flaky_read_text): + cache = build_wiki_cache(wiki) + + self.assertEqual(cache["read_warning_count"], 1) + self.assertEqual(cache["read_warnings"][0]["page"], "wiki/concepts/locked.md") + self.assertIn("readable", cache["page_map"]) + self.assertNotIn("locked", cache["page_map"]) + self.assertFalse((wiki.parent / ".link-cache/wiki-cache-v1.json").exists()) + + def test_build_wiki_cache_uses_persistent_page_records_when_unchanged(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n# Agent Memory\n\n> **TLDR:** Durable context.\n", + ) + + first = build_wiki_cache(wiki) + self.assertFalse(first["persistent_cache"]["hit"]) + self.assertTrue(first["persistent_cache"]["written"]) + self.assertTrue((wiki.parent / ".link-cache/wiki-cache-v1.json").exists()) + + original_read_text = Path.read_text + + def no_markdown_reads(path: Path, *args, **kwargs): + if path.suffix == ".md": + raise AssertionError("unchanged persistent cache should avoid markdown reads") + return original_read_text(path, *args, **kwargs) + + with patch.object(Path, "read_text", no_markdown_reads): + second = build_wiki_cache(wiki) + + self.assertTrue(second["persistent_cache"]["hit"]) + self.assertIn("agent-memory", second["page_map"]) + self.assertIn("durable", second["fulltext"]["agent-memory"]) + + def test_build_wiki_cache_invalidates_persistent_records_after_page_edit(self): + wiki = self.make_wiki() + page = write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n# Agent Memory\n\n> **TLDR:** Old context.\n", + ) + build_wiki_cache(wiki) + time.sleep(0.01) + page.write_text( + "---\ntype: concept\ntitle: Agent Memory\n---\n# Agent Memory\n\n> **TLDR:** New context.\n", + encoding="utf-8", + ) + + cache = build_wiki_cache(wiki) + + self.assertFalse(cache["persistent_cache"]["hit"]) + self.assertIn("new context", cache["fulltext"]["agent-memory"]) + + def test_build_wiki_cache_does_not_create_persistent_cache_for_missing_wiki(self): + root = Path(tempfile.mkdtemp(prefix="link-wiki-core-")) + wiki = root / "missing-wiki" + + cache = build_wiki_cache(wiki) + + self.assertEqual(cache["pages"], []) + self.assertFalse(cache["persistent_cache"]["enabled"]) + self.assertFalse((root / ".link-cache").exists()) + + def test_graph_data_uses_cached_forward_links_without_rereading_pages(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n# Agent Memory\n\n[[link]]\n", + ) + write_page(wiki, "entities/link.md", "---\ntype: entity\ntitle: Link\n---\n# Link\n") + cache = build_wiki_cache(wiki) + + with patch.object(Path, "read_text", side_effect=AssertionError("graph_data should use cache")): + graph = graph_data(cache) + + self.assertIn({"source": "agent-memory", "target": "link"}, graph["edges"]) + + def test_list_pages_is_bounded_and_paginated_by_default(self): + wiki = self.make_wiki() + for index in range(5): + write_page( + wiki, + f"concepts/page-{index}.md", + f"---\ntype: concept\ntitle: Page {index}\nmaturity: growing\n---\n# Page {index}\n", + ) + cache = build_wiki_cache(wiki) + + first = list_pages(cache, category="concepts", limit=2) + second = list_pages(cache, category="concepts", limit=2, offset=2) + full = list_pages(cache, category="concepts", limit=2, include_all=True) + + self.assertEqual(first["count"], 5) + self.assertEqual(first["returned_count"], 2) + self.assertTrue(first["truncated"]) + self.assertEqual(first["follow_up"][0]["arguments"]["offset"], 2) + self.assertEqual(second["returned_count"], 2) + self.assertEqual(full["returned_count"], 5) + self.assertFalse(full["truncated"]) + + def test_graph_summary_caps_overview_for_agent_context(self): + wiki = self.make_wiki() + for index in range(6): + links = " ".join(f"[[page-{target}]]" for target in range(6) if target != index) + write_page( + wiki, + f"concepts/page-{index}.md", + f"---\ntype: concept\ntitle: Page {index}\n---\n# Page {index}\n\n{links}\n", + ) + cache = build_wiki_cache(wiki) + + summary = graph_summary(cache, limit=3, max_edges=2) + + self.assertEqual(summary["mode"], "overview") + self.assertEqual(summary["node_count"], 8) + self.assertEqual(summary["returned_nodes"], 3) + self.assertEqual(summary["returned_edges"], 2) + self.assertTrue(summary["truncated"]) + self.assertTrue(summary["edge_truncated"]) + self.assertEqual(summary["follow_up"][-1]["tool"], "get_graph") + + def test_graph_summary_topic_returns_bounded_neighborhood(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n# Agent Memory\n\n[[link]] [[retrieval]]\n", + ) + write_page(wiki, "entities/link.md", "---\ntype: entity\ntitle: Link\n---\n# Link\n\n[[agent-memory]]\n") + write_page(wiki, "concepts/retrieval.md", "---\ntype: concept\ntitle: Retrieval\n---\n# Retrieval\n") + write_page(wiki, "concepts/isolated.md", "---\ntype: concept\ntitle: Isolated\n---\n# Isolated\n") + cache = build_wiki_cache(wiki) + + summary = graph_summary(cache, topic="agent memory", limit=10, depth=1) + node_ids = {node["id"] for node in summary["nodes"]} + + self.assertEqual(summary["mode"], "topic-neighborhood") + self.assertTrue(summary["found"]) + self.assertIn("agent-memory", node_ids) + self.assertIn("link", node_ids) + self.assertIn("retrieval", node_ids) + self.assertNotIn("isolated", node_ids) + self.assertEqual(summary["nodes"][0]["why_selected"], "matched topic") + self.assertEqual(summary["follow_up"][0]["tool"], "get_context") + + def test_multi_token_search_uses_token_relevance_without_exact_phrase(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/local-memory.md", + "---\ntype: concept\ntitle: Local Recall\n---\n\n" + "# Local Recall\n\n" + "Agent workflows keep durable project notes as private memory.\n", + ) + write_page( + wiki, + "concepts/agent-only.md", + "---\ntype: concept\ntitle: Agent Runtime\n---\n\n" + "# Agent Runtime\n\n" + "Agent execution details without user preference storage.\n", + ) + write_page( + wiki, + "concepts/memory-only.md", + "---\ntype: concept\ntitle: Memory Archive\n---\n\n" + "# Memory Archive\n\n" + "Memory storage details for archival notes.\n", + ) + + results = search_pages("agent memory", build_wiki_cache(wiki), limit=5) + + self.assertEqual(results[0]["name"], "local-memory") + self.assertNotIn("agent-only", {result["name"] for result in results}) + self.assertNotIn("memory-only", {result["name"] for result in results}) + + def test_search_falls_back_without_optional_fts_index(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n\n" + "# Agent Memory\n\n" + "Source-backed local memory for agents.\n", + ) + cache = build_wiki_cache(wiki) + cache["fts_index"] = None + cache["search_backend"] = "token-index" + + results = search_pages("local memory", cache, limit=5) + + self.assertEqual(results[0]["name"], "agent-memory") + + def test_backlinks_loader_and_builder_shapes(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/a.md", + "---\ntype: concept\ntitle: A\nrelated: [[frontmatter-only]]\n---\n# A\n\n[[b]] [[b]]\n", + ) + write_page(wiki, "concepts/b.md", "---\ntype: concept\ntitle: B\n---\n# B\n") + backlinks_path = wiki / "_backlinks.json" + backlinks_path.write_text(json.dumps({"a": ["b"]}), encoding="utf-8") + + loaded, error = load_backlinks_index(backlinks_path) + body_only = build_backlinks(wiki) + full_text = build_backlinks(wiki, body_only=False) + + self.assertIsNone(error) + self.assertEqual(loaded, {"backlinks": {"a": ["b"]}, "forward": {}}) + self.assertEqual(body_only["backlinks"], {"b": ["a"]}) + self.assertEqual(body_only["forward"], {"a": ["b"]}) + self.assertIn("frontmatter-only", full_text["backlinks"]) + + def test_page_link_summary_is_bounded_and_paginated(self): + backlinks = { + "backlinks": {"hub": ["a", "b", "c", "d"]}, + "forward": {"hub": ["e", "f", "g"]}, + } + + first = page_link_summary(backlinks, "hub", limit=2) + second = page_link_summary(backlinks, "hub", limit=2, offset=2) + full = page_link_summary(backlinks, "hub", limit=2, include_all=True) + + self.assertEqual(first["inbound_count"], 4) + self.assertEqual(first["returned_inbound"], 2) + self.assertEqual(first["returned_forward"], 2) + self.assertTrue(first["truncated"]) + self.assertEqual(first["follow_up"][0]["arguments"]["offset"], 2) + self.assertEqual(second["inbound"], ["c", "d"]) + self.assertEqual(second["forward"], ["g"]) + self.assertEqual(full["returned_inbound"], 4) + self.assertFalse(full["truncated"]) + + def test_wiki_mtime_sees_existing_page_edits(self): + wiki = self.make_wiki() + page = write_page(wiki, "concepts/a.md", "# A\n") + before = wiki_mtime(wiki) + future = time.time() + 2 + page.write_text("# A2\n", encoding="utf-8") + os.utime(page, (future, future)) + + self.assertGreater(wiki_mtime(wiki), before) + + def test_empty_context_can_report_tool_error(self): + wiki = self.make_wiki() + cache = build_wiki_cache(wiki) + + result = context_for_topic(wiki, " ", cache, empty_error="topic required") + + self.assertFalse(result["found"]) + self.assertEqual(result["error"], "topic required") + + def test_rebuild_index_generates_category_catalog(self): + wiki = self.make_wiki() + write_page( + wiki, + "concepts/agent-memory.md", + "---\ntype: concept\ntitle: Agent Memory\n---\n\n" + "# Agent Memory\n\n> **TLDR:** Durable memory for agents.\n", + ) + write_page( + wiki, + "sources/session.md", + "---\ntype: source\ntitle: Session Notes\n---\n\n" + "# Session Notes\n\n> **TLDR:** Source notes for Link.\n", + ) + write_page( + wiki, + "memories/prefer-local.md", + "---\ntype: memory\ntitle: Prefer Local\n---\n\n" + "# Prefer Local\n\n> **TLDR:** User prefers local memory.\n", + ) + + markdown = build_index_markdown(wiki, generated_at="2026-05-06T00:00:00Z") + result = rebuild_index(wiki, generated_at="2026-05-06T00:00:00Z") + index_text = (wiki / "index.md").read_text(encoding="utf-8") + + self.assertIn("3 pages | 1 sources | 1 memories", markdown) + self.assertIn("### concepts", index_text) + self.assertIn("- [[agent-memory]] - Durable memory for agents. (concept)", index_text) + self.assertIn("- [[session]] - Source notes for Link. (source)", index_text) + self.assertIn("- [[prefer-local]] - User prefers local memory. (memory)", index_text) + self.assertEqual(result["page_count"], 3) + self.assertEqual(result["category_counts"]["concepts"], 1) + self.assertEqual(result["next_actions"][0]["tool"], "rebuild_backlinks") + + def test_index_build_closes_owned_cache(self): + wiki = self.make_wiki() + + class FakeIndex: + closed = False + + def close(self): + self.closed = True + + fake = FakeIndex() + cache = { + "pages": [ + { + "name": "agent-memory", + "title": "Agent Memory", + "category": "concepts", + "type": "concept", + "tldr": "Durable memory.", + } + ], + "snippet_index": {}, + "fts_index": fake, + } + + with patch("link_core.wiki.build_wiki_cache", return_value=cache): + markdown = build_index_markdown(wiki) + + self.assertIn("[[agent-memory]]", markdown) + self.assertTrue(fake.closed) + + def test_rebuild_index_closes_owned_cache(self): + wiki = self.make_wiki() + + class FakeIndex: + closed = False + + def close(self): + self.closed = True + + fake = FakeIndex() + cache = { + "pages": [ + { + "name": "agent-memory", + "title": "Agent Memory", + "category": "concepts", + "type": "concept", + "tldr": "Durable memory.", + } + ], + "snippet_index": {}, + "fts_index": fake, + } + + with patch("link_core.wiki.build_wiki_cache", return_value=cache): + result = rebuild_index(wiki) + + self.assertEqual(result["page_count"], 1) + self.assertTrue(fake.closed) + + +if __name__ == "__main__": + unittest.main() diff --git a/wiki/_backlinks.json b/wiki/_backlinks.json index 0967ef4..f239ff3 100644 --- a/wiki/_backlinks.json +++ b/wiki/_backlinks.json @@ -1 +1,134 @@ -{} +{ + "backlinks": { + "neural-networks": [ + "attention-mechanism", + "machine-learning", + "transformers", + "gpt", + "index", + "deep-learning-overview", + "intro-to-ml" + ], + "transformers": [ + "attention-mechanism", + "machine-learning", + "neural-networks", + "gpt", + "index", + "attention-is-all-you-need", + "deep-learning-overview", + "intro-to-ml" + ], + "gpt": [ + "attention-mechanism", + "transformers", + "index", + "attention-is-all-you-need", + "deep-learning-overview", + "intro-to-ml" + ], + "attention-is-all-you-need": [ + "attention-mechanism", + "transformers", + "index" + ], + "deep-learning-overview": [ + "attention-mechanism", + "machine-learning", + "neural-networks", + "transformers", + "gpt", + "index" + ], + "intro-to-ml": [ + "machine-learning", + "neural-networks", + "transformers", + "gpt", + "index" + ], + "attention-mechanism": [ + "machine-learning", + "neural-networks", + "transformers", + "gpt", + "index", + "attention-is-all-you-need" + ], + "machine-learning": [ + "neural-networks", + "transformers", + "gpt", + "index", + "deep-learning-overview", + "intro-to-ml" + ] + }, + "forward": { + "attention-mechanism": [ + "neural-networks", + "transformers", + "gpt", + "attention-is-all-you-need", + "deep-learning-overview" + ], + "machine-learning": [ + "neural-networks", + "intro-to-ml", + "transformers", + "attention-mechanism", + "deep-learning-overview" + ], + "neural-networks": [ + "machine-learning", + "transformers", + "attention-mechanism", + "deep-learning-overview", + "intro-to-ml" + ], + "transformers": [ + "attention-mechanism", + "machine-learning", + "gpt", + "attention-is-all-you-need", + "neural-networks", + "deep-learning-overview", + "intro-to-ml" + ], + "gpt": [ + "transformers", + "attention-mechanism", + "deep-learning-overview", + "machine-learning", + "intro-to-ml", + "neural-networks" + ], + "index": [ + "attention-is-all-you-need", + "deep-learning-overview", + "intro-to-ml", + "attention-mechanism", + "machine-learning", + "neural-networks", + "transformers", + "gpt" + ], + "attention-is-all-you-need": [ + "transformers", + "attention-mechanism", + "gpt" + ], + "deep-learning-overview": [ + "neural-networks", + "machine-learning", + "transformers", + "gpt" + ], + "intro-to-ml": [ + "machine-learning", + "neural-networks", + "gpt", + "transformers" + ] + } +} diff --git a/wiki/index.md b/wiki/index.md index 95b259b..f0668a0 100644 --- a/wiki/index.md +++ b/wiki/index.md @@ -1,13 +1,28 @@ # Link Wiki Index -> Last updated: 2026-04-15 | 0 pages | 0 sources +> Last updated: 2026-05-05 | 8 pages | 3 sources ## Categories -*No pages yet. Drop a source into `raw/` and ask your agent to ingest it.* +### Sources + +- [[attention-is-all-you-need]] — transformer architecture and self-attention. +- [[deep-learning-overview]] — deep learning foundations. +- [[intro-to-ml]] — machine learning fundamentals. + +### Concepts + +- [[attention-mechanism]] — learned weighting over input positions. +- [[machine-learning]] — systems that learn patterns from data. +- [[neural-networks]] — layered computational graphs for learning. +- [[transformers]] — attention-based sequence architecture. + +### Entities + +- [[gpt]] — decoder-only transformer language model family. ## Recent | Date | Operation | Pages Touched | |------|-----------|---------------| -| — | — | — | +| 2026-04-16 | ingest: sample AI/ML wiki | 8 pages | diff --git a/wiki/memories/.gitkeep b/wiki/memories/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/wiki/memories/.gitkeep @@ -0,0 +1 @@ +