From ea12f58c58ba6f827021f53209ae5d369dace7ec Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Mon, 20 Oct 2025 10:28:14 +0200 Subject: [PATCH 01/48] add configuration options for namespaces --- entangled/config/__init__.py | 16 ++++++++++++++++ entangled/document.py | 7 ++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index 7cae419..4835642 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -44,6 +44,19 @@ class AnnotationMethod(StrEnum): SUPPLEMENTED = "supplemented" +class NamespaceDefault(StrEnum): + """Default namespace behaviour. + + - `GLOBAL` is the default. Identifiers are all collected into the + global namespace. + - `PRIVATE` means that identifiers are only accessible within the + same file. + """ + + GLOBAL = "global" + PRIVATE = "private" + + class Markers(Struct): """Markers can be used to configure the Markdown dialect. Currently not used.""" @@ -86,6 +99,9 @@ class Config(Struct, dict=True): annotation_format: str | None = None annotation: AnnotationMethod = AnnotationMethod.STANDARD use_line_directives: bool = False + + namespace_default: NamespaceDefault = NamespaceDefault.GLOBAL + hooks: list[str] = field(default_factory=lambda: ["shebang"]) hook: dict[str, Any] = field(default_factory=dict) # pyright: ignore[reportExplicitAny] brei: Program = field(default_factory=Program) diff --git a/entangled/document.py b/entangled/document.py index af60674..58b9218 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -19,12 +19,13 @@ def length[T](iter: Iterable[T]) -> int: @dataclass class ReferenceId: name: str + namespace: list[str] file: PurePath ref_count: int @override def __hash__(self) -> int: - return hash((self.name, self.file, self.ref_count)) + return hash((self.name, self.namespace, self.file, self.ref_count)) @dataclass @@ -84,9 +85,9 @@ def by_name(self, n: str) -> Iterable[CodeBlock]: return (self.map[r] for r in self.index[n]) - def new_id(self, filename: PurePath, name: str) -> ReferenceId: + def new_id(self, filename: PurePath, namespace: list[str], name: str) -> ReferenceId: c = length(filter(lambda r: r.file == filename, self.index[name])) - return ReferenceId(name, filename, c) + return ReferenceId(name, namespace, filename, c) def __setitem__(self, key: ReferenceId, value: CodeBlock): if key in self.map: From f956254b40e17134ae10a5e0baa2cdf122d5dad5 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Tue, 28 Oct 2025 19:20:33 +0100 Subject: [PATCH 02/48] work namespaces into the data model --- entangled/code_reader.py | 11 +++++++---- entangled/document.py | 6 ++++-- entangled/hooks/quarto_attributes.py | 17 +++++++++-------- entangled/markdown_reader.py | 8 +++++--- pyproject.toml | 1 + uv.lock | 27 +++++++++++++++++++++++++++ 6 files changed, 53 insertions(+), 17 deletions(-) diff --git a/entangled/code_reader.py b/entangled/code_reader.py index 5da2116..679faf5 100644 --- a/entangled/code_reader.py +++ b/entangled/code_reader.py @@ -21,7 +21,7 @@ class CodeReader(mawk.RuleSet): def __init__(self, path: PurePath, refs: ReferenceMap): self.location: TextLocation = TextLocation(path, 0) - self.stack: list[Frame] = [Frame(ReferenceId("#root#", PurePath("-"), -1), "")] + self.stack: list[Frame] = [Frame(ReferenceId("#root#", (), PurePath("-"), -1), "")] self.refs: ReferenceMap = refs @property @@ -36,8 +36,10 @@ def increase_line_number(self, _): r"^(?P\s*).* ~/~ begin <<(?P[^#<>]+)#(?P[^#<>]+)>>\[(?Pinit|\d+)\]" ) def on_block_begin(self, m: re.Match[str]) -> list[str]: - ref_name = m["ref_name"] - + full_ref_name = m["ref_name"] + ref_name = full_ref_name.split("::")[-1] + namespace = tuple(full_ref_name.split("::")[:-1]) + # When there are lines above the first ref, say a shebang, swap # them into the first block. if len(self.stack) == 1 and len(self.stack[0].content) > 0: @@ -57,7 +59,8 @@ def on_block_begin(self, m: re.Match[str]) -> list[str]: self.stack.append( Frame( - ReferenceId(m["ref_name"], PurePath(m["source"]), ref_count), m["indent"], content + ReferenceId(m["ref_name"], namespace, PurePath(m["source"]), ref_count), + m["indent"], content ) ) return [] diff --git a/entangled/document.py b/entangled/document.py index 58b9218..8ea32be 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -19,7 +19,7 @@ def length[T](iter: Iterable[T]) -> int: @dataclass class ReferenceId: name: str - namespace: list[str] + namespace: tuple[str,...] file: PurePath ref_count: int @@ -85,7 +85,7 @@ def by_name(self, n: str) -> Iterable[CodeBlock]: return (self.map[r] for r in self.index[n]) - def new_id(self, filename: PurePath, namespace: list[str], name: str) -> ReferenceId: + def new_id(self, filename: PurePath, namespace: tuple[str,...], name: str) -> ReferenceId: c = length(filter(lambda r: r.file == filename, self.index[name])) return ReferenceId(name, namespace, filename, c) @@ -131,6 +131,8 @@ def content_to_text(r: ReferenceMap, c: Content) -> str: case ReferenceId(): return r.get_codeblock(c).indented_text + raise ValueError("impossible code path") + def document_to_text(r: ReferenceMap, cs: Iterable[Content]) -> str: """ diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index 7108f9e..4961614 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -1,7 +1,6 @@ from __future__ import annotations import re -import textwrap -from typing import Any, final, override +from typing import final, override, cast import yaml from entangled.config.language import Language @@ -51,20 +50,22 @@ def amend_code_properties(code_block: CodeBlock): log.warning(f"tried to parse:\n{header}") return - if "id" in attrs.keys(): - if not isinstance(attrs["id"], str): + attrs = cast(dict[str, object], attrs) + code_id = attrs.get("id", None) + if code_id is not None: + if not isinstance(code_id, str): log.warning(f"{code_block.origin}: Quarto id does not evaluate to string; skipping") log.warning(f"tried to parse:\n{header}") return - props.append(Id(attrs["id"])) + props.append(Id(code_id)) - if "classes" in attrs.keys(): - classes = attrs["classes"] + classes = attrs.get("classes", None) + if isinstance(classes, list): if not all(isinstance(c, str) for c in classes): log.warning(f"{code_block.origin}: Quarto classes do not evaluate to strings; skipping") log.warning(f"tried to parse:\n{header}") return - props.extend(Class(c) for c in classes) + props.extend(Class(cast(str, c)) for c in classes) props.extend(Attribute(str(k), v) for k, v in attrs.items() if k not in ("id", "classes")) diff --git a/entangled/markdown_reader.py b/entangled/markdown_reader.py index 27c6b4a..a75f5aa 100644 --- a/entangled/markdown_reader.py +++ b/entangled/markdown_reader.py @@ -201,7 +201,8 @@ def read_markdown_string( def process(r: RawContent) -> Content: match r: case CodeBlock(): - for h in hooks: h.on_read(r) + for h in hooks: + h.on_read(r) block_id = get_id(r.properties) target_file = get_attribute(r.properties, "file") @@ -211,7 +212,7 @@ def process(r: RawContent) -> Content: ref_name = block_id or target_file if ref_name is None: ref_name = f"unnamed-{r.origin}" - ref = refs.new_id(r.origin.filename, ref_name) + ref = refs.new_id(r.origin.filename, (), ref_name) refs[ref] = r if target_file is not None: @@ -221,7 +222,8 @@ def process(r: RawContent) -> Content: return ref - case PlainText(): return r + case PlainText(): + return r content = list(map(process, md.raw_content)) logging.debug("found ids: %s", list(refs.map.keys())) diff --git a/pyproject.toml b/pyproject.toml index 74082f3..4f4064d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dev = [ "types-pexpect>=4.9.0.20250916", "types-PyYAML>=6.0.1,<7", "pytest-timeout>=2.4.0", + "ty>=0.0.1a24", ] [tool.hatch.build.targets.sdist] diff --git a/uv.lock b/uv.lock index 1c1f19b..93b61eb 100644 --- a/uv.lock +++ b/uv.lock @@ -321,6 +321,7 @@ dev = [ { name = "pytest-timeout" }, { name = "python-lsp-server" }, { name = "ruff" }, + { name = "ty" }, { name = "types-colorama" }, { name = "types-pexpect" }, { name = "types-pygments" }, @@ -358,6 +359,7 @@ dev = [ { name = "pytest-timeout", specifier = ">=2.4.0" }, { name = "python-lsp-server", specifier = ">=1.13.1" }, { name = "ruff", specifier = ">=0.14" }, + { name = "ty", specifier = ">=0.0.1a24" }, { name = "types-colorama", specifier = ">=0.4.15.20240311,<0.5" }, { name = "types-pexpect", specifier = ">=4.9.0.20250916" }, { name = "types-pygments", specifier = ">=2.18.0.20240506,<3" }, @@ -1269,6 +1271,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/6d/b5406752c4e4ba86692b22fab0afed8b48f16bdde8f92e1d852976b61dc6/tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f", size = 37685, upload-time = "2024-05-08T13:50:17.343Z" }, ] +[[package]] +name = "ty" +version = "0.0.1a24" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/71/a1db0d604be8d0067342e7aad74ab0c7fec6bea20eb33b6a6324baabf45f/ty-0.0.1a24.tar.gz", hash = "sha256:3273c514df5b9954c9928ee93b6a0872d12310ea8de42249a6c197720853e096", size = 4386721, upload-time = "2025-10-23T13:33:29.729Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/89/21fb275cb676d3480b67fbbf6eb162aec200b4dcb10c7885bffc754dc73f/ty-0.0.1a24-py3-none-linux_armv6l.whl", hash = "sha256:d478cd02278b988d5767df5821a0f03b99ef848f6fc29e8c77f30e859b89c779", size = 8833903, upload-time = "2025-10-23T13:32:53.552Z" }, + { url = "https://files.pythonhosted.org/packages/a2/22/beb127bce67fc2a1f3704b6b39505d77a7078a61becfbe10c5ee7ed9f5d8/ty-0.0.1a24-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:de758790f05f0a3bb396da4c75f770c85ab3a46095ec188b830c916bd5a5bc10", size = 8691210, upload-time = "2025-10-23T13:32:55.706Z" }, + { url = "https://files.pythonhosted.org/packages/39/bd/190f5e934339669191179fa01c60f5a140822dc465f0d4d312985903d109/ty-0.0.1a24-py3-none-macosx_11_0_arm64.whl", hash = "sha256:68f325ddc8cfb7a7883501e5e22f01284c5d5912aaa901d21e477f38edf4e625", size = 8138421, upload-time = "2025-10-23T13:32:58.718Z" }, + { url = "https://files.pythonhosted.org/packages/40/84/f08020dabad1e660957bb641b2ba42fe1e1e87192c234b1fc1fd6fb42cf2/ty-0.0.1a24-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49a52bbb1f8b0b29ad717d3fd70bd2afe752e991072fd13ff2fc14f03945c849", size = 8419861, upload-time = "2025-10-23T13:33:00.068Z" }, + { url = "https://files.pythonhosted.org/packages/e5/cc/e3812f7c1c2a0dcfb1bf8a5d6a7e5aa807a483a632c0d5734ea50a60a9ae/ty-0.0.1a24-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12945fe358fb0f73acf0b72a29efcc80da73f8d95cfe7f11a81e4d8d730e7b18", size = 8641443, upload-time = "2025-10-23T13:33:01.887Z" }, + { url = "https://files.pythonhosted.org/packages/e3/8b/3fc047d04afbba4780aba031dc80e06f6e95d888bbddb8fd6da502975cfb/ty-0.0.1a24-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6231e190989798b0860d15a8f225e3a06a6ce442a7083d743eb84f5b4b83b980", size = 8997853, upload-time = "2025-10-23T13:33:03.951Z" }, + { url = "https://files.pythonhosted.org/packages/e0/d9/ae1475d9200ecf6b196a59357ea3e4f4aa00e1d38c9237ca3f267a4a3ef7/ty-0.0.1a24-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7c6401f4a7532eab63dd7fe015c875792a701ca4b1a44fc0c490df32594e071f", size = 9676864, upload-time = "2025-10-23T13:33:05.744Z" }, + { url = "https://files.pythonhosted.org/packages/cc/d9/abd6849f0601b24d5d5098e47b00dfbdfe44a4f6776f2e54a21005739bdf/ty-0.0.1a24-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83c69759bfa2a00278aa94210eded35aea599215d16460445cbbf5b36f77c454", size = 9351386, upload-time = "2025-10-23T13:33:07.807Z" }, + { url = "https://files.pythonhosted.org/packages/63/5c/639e0fe3b489c65b12b38385fe5032024756bc07f96cd994d7df3ab579ef/ty-0.0.1a24-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71146713cb8f804aad2b2e87a8efa7e7df0a5a25aed551af34498bcc2721ae03", size = 9517674, upload-time = "2025-10-23T13:33:09.641Z" }, + { url = "https://files.pythonhosted.org/packages/78/ae/323f373fcf54a883e39ea3fb6f83ed6d1eda6dfd8246462d0cfd81dac781/ty-0.0.1a24-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4836854411059de592f0ecc62193f2b24fc3acbfe6ce6ce0bf2c6d1a5ea9de7", size = 9000468, upload-time = "2025-10-23T13:33:11.51Z" }, + { url = "https://files.pythonhosted.org/packages/14/26/1a4be005aa4326264f0e7ce554844d5ef8afc4c5600b9a38b05671e9ed18/ty-0.0.1a24-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a7f0b8546d27605e09cd0fe08dc28c1d177bf7498316dd11c3bb8ef9440bf2e1", size = 8377164, upload-time = "2025-10-23T13:33:13.504Z" }, + { url = "https://files.pythonhosted.org/packages/73/2f/dcd6b449084e53a2beb536d8721a2517143a2353413b5b323d6eb9a31705/ty-0.0.1a24-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4e2fbf7dce2311127748824e03d9de2279e96ab5713029c3fa58acbaf19b2f51", size = 8672709, upload-time = "2025-10-23T13:33:15.213Z" }, + { url = "https://files.pythonhosted.org/packages/dc/2e/8b3b45d46085a79547e6db5295f42c6b798a0240d34454181e2ca947183c/ty-0.0.1a24-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f35b7f0a65f7e34e59f34173164946c89a4c4b1d1c18cabe662356a35f33efcd", size = 8788732, upload-time = "2025-10-23T13:33:17.347Z" }, + { url = "https://files.pythonhosted.org/packages/cf/c5/7675ff8693ad13044d86d8d4c824caf6bbb00340df05ad93d0e9d1e0338b/ty-0.0.1a24-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:120fe95eaf2a200f531f949e3dd0a9d95ab38915ce388412873eae28c499c0b9", size = 9095693, upload-time = "2025-10-23T13:33:19.836Z" }, + { url = "https://files.pythonhosted.org/packages/62/0b/bdba5d31aa3f0298900675fd355eec63a9c682aa46ef743dbac8f28b4608/ty-0.0.1a24-py3-none-win32.whl", hash = "sha256:d8d8379264a8c14e1f4ca9e117e72df3bf0a0b0ca64c5fd18affbb6142d8662a", size = 8361302, upload-time = "2025-10-23T13:33:21.572Z" }, + { url = "https://files.pythonhosted.org/packages/b4/48/127a45e16c49563df82829542ca64b0bc387591a777df450972bc85957e6/ty-0.0.1a24-py3-none-win_amd64.whl", hash = "sha256:2e826d75bddd958643128c309f6c47673ed6cef2ea5f2b3cd1a1159a1392971a", size = 9039221, upload-time = "2025-10-23T13:33:23.055Z" }, + { url = "https://files.pythonhosted.org/packages/31/67/9161fbb8c1a2005938bdb5ccd4e4c98ee4bea2d262afb777a4b69aa15eb5/ty-0.0.1a24-py3-none-win_arm64.whl", hash = "sha256:2efbfcdc94d306f0d25f3efe2a90c0f953132ca41a1a47d0bae679d11cdb15aa", size = 8514044, upload-time = "2025-10-23T13:33:27.816Z" }, +] + [[package]] name = "types-colorama" version = "0.4.15.20250801" From dd940ad72c62b0a98609f30dbd0e8ef06b5d6417 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 29 Oct 2025 02:05:44 +0100 Subject: [PATCH 03/48] halfway implementing new markdown reader --- entangled/code_reader.py | 4 +- entangled/document.py | 2 +- entangled/errors/user.py | 2 +- entangled/markdown_reader.py | 2 +- entangled/readers/builder.py | 12 +++++ entangled/readers/lines.py | 22 ++++++++++ entangled/readers/markdown.py | 29 +++++++++++++ entangled/readers/peekable.py | 43 ++++++++++++++++++ entangled/readers/text_location.py | 21 +++++++++ entangled/readers/types.py | 11 +++++ entangled/readers/yaml_header.py | 37 ++++++++++++++++ entangled/tangle.py | 2 +- entangled/text_location.py | 12 ----- test/readers/test_builder.py | 15 +++++++ test/readers/test_lines.py | 17 ++++++++ test/readers/test_peakable.py | 24 ++++++++++ test/readers/test_text_location.py | 12 +++++ test/readers/test_yaml_header.py | 70 ++++++++++++++++++++++++++++++ 18 files changed, 319 insertions(+), 18 deletions(-) create mode 100644 entangled/readers/builder.py create mode 100644 entangled/readers/lines.py create mode 100644 entangled/readers/markdown.py create mode 100644 entangled/readers/peekable.py create mode 100644 entangled/readers/text_location.py create mode 100644 entangled/readers/types.py create mode 100644 entangled/readers/yaml_header.py delete mode 100644 entangled/text_location.py create mode 100644 test/readers/test_builder.py create mode 100644 test/readers/test_lines.py create mode 100644 test/readers/test_peakable.py create mode 100644 test/readers/test_text_location.py create mode 100644 test/readers/test_yaml_header.py diff --git a/entangled/code_reader.py b/entangled/code_reader.py index 679faf5..a0ed021 100644 --- a/entangled/code_reader.py +++ b/entangled/code_reader.py @@ -4,7 +4,7 @@ import mawk import re -from .text_location import TextLocation +from .readers.text_location import TextLocation from .document import ReferenceId, ReferenceMap from .errors.user import IndentationError @@ -39,7 +39,7 @@ def on_block_begin(self, m: re.Match[str]) -> list[str]: full_ref_name = m["ref_name"] ref_name = full_ref_name.split("::")[-1] namespace = tuple(full_ref_name.split("::")[:-1]) - + # When there are lines above the first ref, say a shebang, swap # them into the first block. if len(self.stack) == 1 and len(self.stack[0].content) > 0: diff --git a/entangled/document.py b/entangled/document.py index 8ea32be..0894dd4 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -9,7 +9,7 @@ from .config.language import Language from .properties import Property from .errors.internal import InternalError -from .text_location import TextLocation +from .readers.text_location import TextLocation def length[T](iter: Iterable[T]) -> int: diff --git a/entangled/errors/user.py b/entangled/errors/user.py index 78b5ea6..48208d6 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -2,7 +2,7 @@ from typing import Any, Callable from pathlib import Path -from ..text_location import TextLocation +from ..readers.text_location import TextLocation class UserError(Exception): diff --git a/entangled/markdown_reader.py b/entangled/markdown_reader.py index a75f5aa..c3f55c4 100644 --- a/entangled/markdown_reader.py +++ b/entangled/markdown_reader.py @@ -11,7 +11,7 @@ from .config import config from .utility import first -from .text_location import TextLocation +from .readers.text_location import TextLocation from .document import CodeBlock, ReferenceMap, Content, PlainText, RawContent from .properties import Property, read_properties, get_attribute, get_classes, get_id from .hooks.base import HookBase diff --git a/entangled/readers/builder.py b/entangled/readers/builder.py new file mode 100644 index 0000000..9ad0731 --- /dev/null +++ b/entangled/readers/builder.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass +from collections.abc import Generator + + +@dataclass +class Builder[T, U]: + parent: Generator[T, None, U] + result: U | None = None + + def __iter__(self) -> Generator[T, None, U]: + self.result = yield from self.parent + return self.result diff --git a/entangled/readers/lines.py b/entangled/readers/lines.py new file mode 100644 index 0000000..fc95fda --- /dev/null +++ b/entangled/readers/lines.py @@ -0,0 +1,22 @@ +from collections.abc import Generator +from pathlib import PurePath + +from .types import InputToken +from .text_location import TextLocation +from .peekable import peekable + + +@peekable +def lines(filename: PurePath, text: str) -> Generator[InputToken]: + """ + Iterate the lines in a file. Doesn't strip newlines. Works with both + Windows and Unix line endings. + """ + location = TextLocation(filename) + pos = 0 + while (next_pos := text.find("\n", pos)) != -1: + yield (location, text[pos:next_pos + 1]) + pos = next_pos + 1 + location.increment() + + yield (location, text[pos:]) diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py new file mode 100644 index 0000000..fc96f79 --- /dev/null +++ b/entangled/readers/markdown.py @@ -0,0 +1,29 @@ +from typing import cast +from .types import InputStream, MarkdownStream +from .yaml_header import read_yaml_header +from ..document import PlainText, CodeBlock, ReferenceMap +from ..config import Config, config + +import msgspec + +def get_path(d: object, path: str) -> object: + for element in path.split("."): + if not isinstance(d, dict): + raise ValueError("expected object") + d = cast(dict[str, object], d).get(element, None) + if d is None: + return None + return d + + +def read_markdown(input: InputStream, refs: ReferenceMap | None = None) -> MarkdownStream[ReferenceMap]: + refs = refs or ReferenceMap() + header = yield from read_yaml_header(input) + if isinstance(header, dict): + header = cast(dict[str, object], header) + extra_config = msgspec.convert(header.get("entangled", None), Config) + elif header is None: + extra_config = None + + + return refs diff --git a/entangled/readers/peekable.py b/entangled/readers/peekable.py new file mode 100644 index 0000000..6ee8577 --- /dev/null +++ b/entangled/readers/peekable.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass, field +from collections.abc import Iterator, Callable +import functools + + +@dataclass +class Peekable[T]: + """ + An iterator that allows peeking one element into the future. + """ + iterator: Iterator[T] + head: list[T] = field(default_factory=list) + + def peek(self) -> T: + if not self.head: + try: + self.head.append(next(self.iterator)) + except StopIteration: + raise + return self.head[0] + + def __bool__(self): + if not self.head: + try: + _ = self.peek() + except StopIteration: + return False + return True + + def __next__(self): + if self.head: + return self.head.pop() + return next(self.iterator) + + def __iter__(self): + return self + + +def peekable[**P, T](f: Callable[P, Iterator[T]]) -> Callable[P, Peekable[T]]: + @functools.wraps(f) + def peekabled(*args: P.args, **kwargs: P.kwargs) -> Peekable[T]: + return Peekable(f(*args, **kwargs)) + return peekabled diff --git a/entangled/readers/text_location.py b/entangled/readers/text_location.py new file mode 100644 index 0000000..531aa90 --- /dev/null +++ b/entangled/readers/text_location.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass +from pathlib import PurePath +from typing import override + + +@dataclass +class TextLocation: + """ + A dataclass to indicate the origin of a line. Because this is only used for + interfacing with humans, the filename is stored as a string, and line numbers + start at one. + """ + filename: PurePath + line_number: int = 1 + + def increment(self): + self.line_number += 1 + + @override + def __str__(self): + return f"{self.filename}:{self.line_number}" diff --git a/entangled/readers/types.py b/entangled/readers/types.py new file mode 100644 index 0000000..757ecd4 --- /dev/null +++ b/entangled/readers/types.py @@ -0,0 +1,11 @@ +from collections.abc import Callable, Generator + +from .text_location import TextLocation +from .peekable import Peekable +from ..document import Content + + +type InputToken = tuple[TextLocation, str] +type InputStream = Peekable[InputToken] +type Reader[OutputToken, Result] = Callable[[InputStream], Generator[OutputToken, None, Result]] +type MarkdownStream[Result] = Generator[Content, None, Result] diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py new file mode 100644 index 0000000..3d637fa --- /dev/null +++ b/entangled/readers/yaml_header.py @@ -0,0 +1,37 @@ +from ..document import PlainText +from ..errors.user import ParseError +from .types import InputStream, MarkdownStream + +import yaml + + +def read_yaml_header(input: InputStream) -> MarkdownStream[object]: + """ + Reads the YAML header that can be found at the top of a Markdown document. + """ + if not input: + return None + + pos, line = input.peek() + if line.rstrip() == "---": + _ = next(input) + yield PlainText(line) + else: + return None + + raw_header = "" + for pos, line in input: + if line.rstrip() == "---": + try: + header = yaml.safe_load(raw_header) # pyright: ignore[reportAny] + except yaml.YAMLError as e: + raise ParseError(pos, str(e)) + + yield PlainText(raw_header) + yield PlainText(line) + return header # pyright: ignore[reportAny] + + else: + raw_header += line + + raise ParseError(pos, "unterminated YAML header") diff --git a/entangled/tangle.py b/entangled/tangle.py index 9c04c37..6c24cf9 100644 --- a/entangled/tangle.py +++ b/entangled/tangle.py @@ -9,7 +9,7 @@ import mawk from .config import AnnotationMethod -from .text_location import TextLocation +from .readers.text_location import TextLocation from .document import ( ReferenceMap, ReferenceId, diff --git a/entangled/text_location.py b/entangled/text_location.py deleted file mode 100644 index 36f96ff..0000000 --- a/entangled/text_location.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass -from pathlib import PurePath -from typing import override - -@dataclass -class TextLocation: - filename: PurePath - line_number: int = 0 - - @override - def __str__(self) -> str: - return f"{self.filename}:{self.line_number}" diff --git a/test/readers/test_builder.py b/test/readers/test_builder.py new file mode 100644 index 0000000..9e5d6ce --- /dev/null +++ b/test/readers/test_builder.py @@ -0,0 +1,15 @@ +from entangled.readers.builder import Builder + + +def make_sum(x): + s = 0 + for y in x: + s += y + yield y + return s + + +def test_builer(): + b = Builder(make_sum(range(10))) + assert list(b) == list(range(10)) + assert b.result == sum(range(10)) diff --git a/test/readers/test_lines.py b/test/readers/test_lines.py new file mode 100644 index 0000000..60e7305 --- /dev/null +++ b/test/readers/test_lines.py @@ -0,0 +1,17 @@ +from pathlib import PurePath +from entangled.readers.lines import lines +from entangled.readers.peekable import Peekable +from entangled.readers.types import InputStream + + +def test_lines(): + def collect(lst: InputStream) -> list[str]: + return list(map(lambda x: x[1], lst)) + + assert isinstance(lines(PurePath("-"), ""), Peekable) + + assert collect(lines(PurePath("-"), "")) == [""] + assert collect(lines(PurePath("-"), "\n")) == ["\n", ""] + assert collect(lines(PurePath("-"), "a\nb")) == ["a\n", "b"] + assert collect(lines(PurePath("-"), "a\nb\n")) == ["a\n", "b\n", ""] + assert collect(lines(PurePath("-"), "a\r\nb\r\n")) == ["a\r\n", "b\r\n", ""] diff --git a/test/readers/test_peakable.py b/test/readers/test_peakable.py new file mode 100644 index 0000000..f9897ad --- /dev/null +++ b/test/readers/test_peakable.py @@ -0,0 +1,24 @@ +from collections.abc import Generator +import pytest +from entangled.readers.peekable import Peekable, peekable + + +def test_peekable_class(): + p = Peekable(iter(range(5))) + assert p + assert p.peek() == 0 + assert next(p) == 0 + assert p.peek() == 1 + assert list(p) == [1, 2, 3, 4] + assert not p + + with pytest.raises(StopIteration): + _ = p.peek() + + +def test_peekable_decorator(): + @peekable + def counter() -> Generator[int]: + yield from range(10) + + assert isinstance(counter(), Peekable) diff --git a/test/readers/test_text_location.py b/test/readers/test_text_location.py new file mode 100644 index 0000000..81ad6b9 --- /dev/null +++ b/test/readers/test_text_location.py @@ -0,0 +1,12 @@ +from pathlib import PurePath +from entangled.readers.text_location import TextLocation + + +def test_text_location(): + pos = TextLocation(PurePath("foo"), 4) + assert str(pos) == "foo:4" + pos.increment() + assert str(pos) == "foo:5" + pos.increment() + assert pos.filename == PurePath("foo") + assert pos.line_number == 6 diff --git a/test/readers/test_yaml_header.py b/test/readers/test_yaml_header.py new file mode 100644 index 0000000..d1f8d90 --- /dev/null +++ b/test/readers/test_yaml_header.py @@ -0,0 +1,70 @@ +from pathlib import PurePath +import logging +import pytest + +from entangled.readers.yaml_header import read_yaml_header +from entangled.readers.lines import lines +from entangled.errors.user import ParseError +from entangled.readers.types import MarkdownStream + + +input_correct = """--- +title: hello +--- + +more content +""" + +input_non_terminating = """--- +title: hello + +there's no end to this header +""" + +input_no_header = """ +Nothing to see here. +""" + +input_not_on_top = """ + +--- +title: hello +--- + +should this header be parsed or not? +""" + + +input_invalid_yaml = """--- +} +--- +""" + + +def get_yaml_header(input: str) -> object: + path = PurePath("-") + result = None + + def reader() -> MarkdownStream[object]: + nonlocal result + result = yield from read_yaml_header(lines(path, input)) + return + + def run(): + for _ in reader(): + pass + + run() + return result + + +def test_read_yaml_header(): + assert get_yaml_header(input_correct) == { "title": "hello" } + assert get_yaml_header(input_no_header) is None + assert get_yaml_header(input_not_on_top) is None + + with pytest.raises(ParseError): + _ = get_yaml_header(input_invalid_yaml) + + with pytest.raises(ParseError): + _ = get_yaml_header(input_non_terminating) From a5e450c29f451a804d7454e7b0de5ec75f7acfcd Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 29 Oct 2025 23:30:28 +0100 Subject: [PATCH 04/48] generic function for delimited block reading --- entangled/readers/delimiters.py | 76 ++++++++++++++++++++++++++++++++ entangled/readers/markdown.py | 5 --- entangled/readers/types.py | 8 ++++ entangled/readers/yaml_header.py | 60 ++++++++++++++++--------- test/test_daemon.py | 3 -- 5 files changed, 122 insertions(+), 30 deletions(-) create mode 100644 entangled/readers/delimiters.py diff --git a/entangled/readers/delimiters.py b/entangled/readers/delimiters.py new file mode 100644 index 0000000..59d8b58 --- /dev/null +++ b/entangled/readers/delimiters.py @@ -0,0 +1,76 @@ +from dataclasses import dataclass +from collections.abc import Callable + +import re + +from .text_location import TextLocation +from .types import InputStream +from ..errors.user import ParseError + + +@dataclass +class DelimitedToken: + origin: TextLocation + open_line: str + open_match: re.Match[str] + content: str + close_line: str + close_match: re.Match[str] + + @property + def string(self) -> str: + """ + Reconstructs the original input string. + """ + return self.open_line + self.content + self.close_line + + +def delimited_token_getter(open: str, close: str) -> Callable[[InputStream], DelimitedToken | None]: + """ + Creates a function that reads a given `DelimitedToken` from + the input stream or returns `None` if the stream does not start + with a match for the opening pattern. + + Args: + open: a regex on which the token is triggered + close: a regex on which the enclosed content is closed + + Returns: + A `DelimitedToken` object containing the text location of + the token, the `re.Match` objects for both opening and closing + patterns, and the contained text. + + The first line in the input stream is peeked. If it does not match + the `open` expression, `None` is returned. Otherwise the lines + upto and including the closing expression are consumed. + """ + open_pattern = re.compile(open) + close_pattern = re.compile(close) + + def get(input: InputStream) -> DelimitedToken | None: + if not input: + return None + + origin, open_line = input.peek() + open_match = open_pattern.match(open_line.rstrip()) + if not open_match: + return None + + _ = next(input) + content = "" + + # We consumed the single buffered token, so we can + # iterate directly from the inner iterator. + for _, line in input.iterator: + close_match = close_pattern.match(line.rstrip()) + if not close_match: + content += line + else: + close_line = line + return DelimitedToken( + origin, open_line, open_match, + content, close_line, close_match) + + raise ParseError(origin, "unexpected end of file") + + return get diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index fc96f79..510ebfa 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -19,11 +19,6 @@ def get_path(d: object, path: str) -> object: def read_markdown(input: InputStream, refs: ReferenceMap | None = None) -> MarkdownStream[ReferenceMap]: refs = refs or ReferenceMap() header = yield from read_yaml_header(input) - if isinstance(header, dict): - header = cast(dict[str, object], header) - extra_config = msgspec.convert(header.get("entangled", None), Config) - elif header is None: - extra_config = None return refs diff --git a/entangled/readers/types.py b/entangled/readers/types.py index 757ecd4..06e84cb 100644 --- a/entangled/readers/types.py +++ b/entangled/readers/types.py @@ -1,4 +1,5 @@ from collections.abc import Callable, Generator +import functools from .text_location import TextLocation from .peekable import Peekable @@ -9,3 +10,10 @@ type InputStream = Peekable[InputToken] type Reader[OutputToken, Result] = Callable[[InputStream], Generator[OutputToken, None, Result]] type MarkdownStream[Result] = Generator[Content, None, Result] + + +def map_reader[Out, T, U](f: Callable[[T], U], reader: Reader[Out, T]) -> Reader[Out, U]: + def mapped(input: InputStream) -> Generator[Out, None, U]: + x = yield from reader(input) + return f(x) + return mapped diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py index 3d637fa..99ecf96 100644 --- a/entangled/readers/yaml_header.py +++ b/entangled/readers/yaml_header.py @@ -1,37 +1,53 @@ +from typing import cast + +import logging +import yaml +import msgspec + +from ..config import Config from ..document import PlainText from ..errors.user import ParseError from .types import InputStream, MarkdownStream +from .delimiters import delimited_token_getter -import yaml + +get_yaml_header_token = delimited_token_getter("---", "---") def read_yaml_header(input: InputStream) -> MarkdownStream[object]: """ Reads the YAML header that can be found at the top of a Markdown document. """ - if not input: - return None - - pos, line = input.peek() - if line.rstrip() == "---": - _ = next(input) - yield PlainText(line) - else: + delimited_token = get_yaml_header_token(input) + if delimited_token is None: return None - raw_header = "" - for pos, line in input: - if line.rstrip() == "---": - try: - header = yaml.safe_load(raw_header) # pyright: ignore[reportAny] - except yaml.YAMLError as e: - raise ParseError(pos, str(e)) + yield PlainText(delimited_token.string) + try: + return yaml.safe_load(delimited_token.content) + except yaml.YAMLError as e: + raise ParseError(delimited_token.origin, str(e)) - yield PlainText(raw_header) - yield PlainText(line) - return header # pyright: ignore[reportAny] - else: - raw_header += line +def get_config(header: object) -> Config | None: + """ + Get the `entangled` component from the unstructured header data, + and convert it to a `Config` object. - raise ParseError(pos, "unterminated YAML header") + If there was no YAML header or it contained no `entangled` member, + this will return `None`. If the given YAML header evaluated to something + other than an object/dictionary or the conversion to `Config` failed, + a `TypeError` is raised. + """ + if isinstance(header, dict): + header = cast(dict[str, object], header) + try: + return msgspec.convert(header.get("entangled", None), Config) + except msgspec.ValidationError as e: + logging.error(e) + raise TypeError() + + elif header is None: + return None + else: + raise TypeError() diff --git a/test/test_daemon.py b/test/test_daemon.py index 97d47fa..82283b4 100644 --- a/test/test_daemon.py +++ b/test/test_daemon.py @@ -36,9 +36,6 @@ def wait_for_stat_diff(md_stat, filename, timeout=5): return False -# @pytest.mark.skipif( -# sys.platform=="win32" and sys.version.startswith("3.13"), -# reason="threading.Event seems to be broken") @pytest.mark.timeout(30) def test_daemon(tmp_path: Path): config.read(force=True) From 387610f6a1fa04528043efac05428d97f6da50e2 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 30 Oct 2025 08:15:09 +0100 Subject: [PATCH 05/48] ... --- entangled/readers/code_block.py | 15 +++++++++++++++ entangled/readers/markdown.py | 15 ++------------- entangled/readers/yaml_header.py | 3 ++- 3 files changed, 19 insertions(+), 14 deletions(-) create mode 100644 entangled/readers/code_block.py diff --git a/entangled/readers/code_block.py b/entangled/readers/code_block.py new file mode 100644 index 0000000..4c2fe09 --- /dev/null +++ b/entangled/readers/code_block.py @@ -0,0 +1,15 @@ +from ..document import CodeBlock, Content +from ..config import Config +from .types import InputStream, Reader, MarkdownStream +from .delimiters import delimited_token_getter + + +def code_block(config: Config) -> Reader[Content, None, CodeBlock | None]: + get_raw_token = delimited_token_getter(config.markers.open, config.markers.close) + + def code_block_reader(input: InputStream) -> MarkdownStream[CodeBlock | None]: + block = get_raw_token(input) + if block is None: + return None + + return code_block_reader diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index 510ebfa..e536b1f 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -1,24 +1,13 @@ from typing import cast from .types import InputStream, MarkdownStream -from .yaml_header import read_yaml_header +from .yaml_header import get_config, read_yaml_header from ..document import PlainText, CodeBlock, ReferenceMap from ..config import Config, config -import msgspec - -def get_path(d: object, path: str) -> object: - for element in path.split("."): - if not isinstance(d, dict): - raise ValueError("expected object") - d = cast(dict[str, object], d).get(element, None) - if d is None: - return None - return d - def read_markdown(input: InputStream, refs: ReferenceMap | None = None) -> MarkdownStream[ReferenceMap]: refs = refs or ReferenceMap() header = yield from read_yaml_header(input) - + config = get_config(header) return refs diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py index 99ecf96..9e10670 100644 --- a/entangled/readers/yaml_header.py +++ b/entangled/readers/yaml_header.py @@ -23,8 +23,9 @@ def read_yaml_header(input: InputStream) -> MarkdownStream[object]: return None yield PlainText(delimited_token.string) + try: - return yaml.safe_load(delimited_token.content) + return yaml.safe_load(delimited_token.content) # pyright: ignore[reportAny] except yaml.YAMLError as e: raise ParseError(delimited_token.origin, str(e)) From aca86d1c5c4cecd9fa78296b0572a0bf7190b4b2 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sat, 1 Nov 2025 23:07:49 +0100 Subject: [PATCH 06/48] improve config code --- entangled/config/__init__.py | 127 +++----------------------- entangled/config/annotation_method.py | 17 ++++ entangled/config/config_data.py | 100 ++++++++++++++++++++ entangled/config/config_update.py | 59 ++++++++++++ entangled/config/document_style.py | 18 ++++ entangled/config/markers.py | 21 +++++ entangled/config/namespace_default.py | 14 +++ entangled/config/version.py | 3 + entangled/hooks/__init__.py | 2 +- entangled/hooks/base.py | 4 + entangled/hooks/quarto_attributes.py | 5 + entangled/readers/code_block.py | 51 ++++++++++- entangled/readers/delimiters.py | 25 ++++- entangled/readers/lines.py | 19 ++-- test/readers/test_lines.py | 23 +++-- 15 files changed, 347 insertions(+), 141 deletions(-) create mode 100644 entangled/config/annotation_method.py create mode 100644 entangled/config/config_data.py create mode 100644 entangled/config/config_update.py create mode 100644 entangled/config/document_style.py create mode 100644 entangled/config/markers.py create mode 100644 entangled/config/namespace_default.py diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index 4835642..aba2d8f 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -4,127 +4,25 @@ from __future__ import annotations -from functools import cached_property import threading from contextlib import contextmanager -from copy import copy, deepcopy -from enum import StrEnum +from copy import deepcopy from pathlib import Path from typing import Any from itertools import chain import msgspec -from msgspec import Struct, field import tomllib -from brei import Program - -from entangled import from_str -from .language import Language, languages -from .version import Version +from .language import Language +from .config_data import Config +from .config_update import ConfigUpdate from ..logging import logger log = logger() -class AnnotationMethod(StrEnum): - """Annotation methods. - - - `STANDARD` is the default. Comments tell where a piece of code - came from in enough detail to reconstruct the markdown if some - of the code is changed. - - `NAKED` adds no comments to the tangled files. Stitching is not - possible with this setting. - - `SUPPLEMENTED` adds extra information to the comment lines. - """ - - STANDARD = "standard" - NAKED = "naked" - SUPPLEMENTED = "supplemented" - - -class NamespaceDefault(StrEnum): - """Default namespace behaviour. - - - `GLOBAL` is the default. Identifiers are all collected into the - global namespace. - - `PRIVATE` means that identifiers are only accessible within the - same file. - """ - - GLOBAL = "global" - PRIVATE = "private" - - -class Markers(Struct): - """Markers can be used to configure the Markdown dialect. Currently not used.""" - - open: str - close: str - begin_ignore: str = r"^\s*\~\~\~markdown\s*$" - end_ignore: str = r"^\s*\~\~\~\s*$" - - -markers = Markers( - r"^(?P\s*)```\s*{(?P[^{}]*)}\s*$", r"^(?P\s*)```\s*$" -) - - -class Config(Struct, dict=True): - """Main config class. - - Attributes: - version: Version of Entangled for which this config was created. - Entangled should read all versions lower than its own. - languages: List of programming languages and their comment styles. - markers: Regexes for detecting open and close of code blocks. - watch_list: List of glob-expressions indicating files to include - for tangling. - annotation: Style of annotation. - annotation_format: Extra annotation. - use_line_directives: Wether to print pragmas in source code for - indicating markdown source locations. - hooks: List of enabled hooks. - hook: Sub-config of hooks. - loom: Sub-config of loom. - - This class is made thread-local to make it possible to test in parallel.""" - - _version: str = field(name = "version") - languages: list[Language] = field(default_factory=list) - markers: Markers = field(default_factory=lambda: copy(markers)) - watch_list: list[str] = field(default_factory=lambda: ["**/*.md"]) - ignore_list: list[str] = field(default_factory=list) - annotation_format: str | None = None - annotation: AnnotationMethod = AnnotationMethod.STANDARD - use_line_directives: bool = False - - namespace_default: NamespaceDefault = NamespaceDefault.GLOBAL - - hooks: list[str] = field(default_factory=lambda: ["shebang"]) - hook: dict[str, Any] = field(default_factory=dict) # pyright: ignore[reportExplicitAny] - brei: Program = field(default_factory=Program) - - language_index: dict[str, Language] = field(default_factory=dict) - - @cached_property - def version(self) -> Version: - return Version.from_str(self._version) - - def __post_init__(self): - self.languages = languages + self.languages - self.make_language_index() - - def make_language_index(self): - for l in self.languages: - for i in l.identifiers: - self.language_index[i] = l - - -default = Config("2.0") # Version.from_str("2.0")) - - def read_config_from_toml( path: Path, section: str | None = None ) -> Config | None: @@ -147,7 +45,8 @@ def read_config_from_toml( if section is not None: for s in section.split("."): json = json[s] # pyright: ignore[reportAny] - return msgspec.convert(json, type=Config, dec_hook=from_str.dec_hook) + update = msgspec.convert(json, type=ConfigUpdate) + return Config() | update except ValueError as e: log.error("Could not read config: %s", e) @@ -160,12 +59,12 @@ def read_config_from_toml( def read_config(): if Path("./entangled.toml").exists(): - return read_config_from_toml(Path("./entangled.toml")) or default + return read_config_from_toml(Path("./entangled.toml")) or Config() if Path("./pyproject.toml").exists(): return ( - read_config_from_toml(Path("./pyproject.toml"), "tool.entangled") or default + read_config_from_toml(Path("./pyproject.toml"), "tool.entangled") or Config() ) - return default + return Config() class ConfigWrapper(threading.local): @@ -185,10 +84,8 @@ def get(self) -> Config: @contextmanager def __call__(self, **kwargs): backup = self.config - new_config = deepcopy(self.config) - for k, v in kwargs.items(): - setattr(new_config, k, v) - self.config = new_config + self.config = (self.config if self.config is not None else Config()) \ + | ConfigUpdate(**kwargs) yield @@ -197,7 +94,7 @@ def __call__(self, **kwargs): def get_language(self, lang_name: str) -> Language | None: if self.config is None: raise ValueError("No config loaded.") - return self.config.language_index.get(lang_name, None) + return self.config.languages.get(lang_name, None) config = ConfigWrapper() diff --git a/entangled/config/annotation_method.py b/entangled/config/annotation_method.py new file mode 100644 index 0000000..ccb96ff --- /dev/null +++ b/entangled/config/annotation_method.py @@ -0,0 +1,17 @@ +from enum import StrEnum + + +class AnnotationMethod(StrEnum): + """Annotation methods. + + - `STANDARD` is the default. Comments tell where a piece of code + came from in enough detail to reconstruct the markdown if some + of the code is changed. + - `NAKED` adds no comments to the tangled files. Stitching is not + possible with this setting. + - `SUPPLEMENTED` adds extra information to the comment lines. + """ + + STANDARD = "standard" + NAKED = "naked" + SUPPLEMENTED = "supplemented" diff --git a/entangled/config/config_data.py b/entangled/config/config_data.py new file mode 100644 index 0000000..011ecf8 --- /dev/null +++ b/entangled/config/config_data.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from copy import copy +from dataclasses import dataclass, field +from .version import Version +from .language import Language, languages +from .markers import Markers, default_markers +from .annotation_method import AnnotationMethod +from .namespace_default import NamespaceDefault +from .config_update import ConfigUpdate, prefab_config + +from brei import Program + + +@dataclass(frozen=True) +class Config: + """Main config class. + + Attributes: + version: Version of Entangled for which this config was created. + Entangled should read all versions lower than its own. + languages: Dict of programming languages and their comment styles. + markers: Regexes for detecting open and close of code blocks. + + watch_list: List of glob-expressions indicating files to include + for tangling. + ignore_list: List of glob-expressions black-listing files, overrides + anything in the watch_list. + + annotation: Style of annotation. + annotation_format: Extra annotation. + + use_line_directives: Wether to print pragmas in source code for + indicating markdown source locations. + hooks: List of enabled hooks. + hook: Sub-config of hooks. + loom: Sub-config of loom. + """ + version: Version = Version((2, 0)) + languages: dict[str, Language] = field(default_factory=lambda: { + i: l for l in languages for i in l.identifiers + }) + markers: Markers = field(default_factory=lambda: default_markers) + + watch_list: list[str] = field(default_factory=lambda: ["**/*.md"]) + ignore_list: list[str] = field(default_factory=list) + + annotation_format: str | None = None + annotation: AnnotationMethod = AnnotationMethod.STANDARD + use_line_directives: bool = False + + namespace_default: NamespaceDefault = NamespaceDefault.GLOBAL + namespace: tuple[str, ...] | None = None + + hooks: set[str] = field(default_factory=lambda: { "shebang" }) + hook: dict[str, object] = field(default_factory=dict) + brei: Program = field(default_factory=Program) + + def __or__(self, update: ConfigUpdate) -> Config: + if update.style is not None: + x = self | prefab_config[update.style] + else: + x = self + + version = max(x.version, Version.from_str(update.version)) + languages = copy(x.languages) + for lang in update.languages: + for id in lang.identifiers: + languages[id] = lang + + markers = x.markers if update.markers is None else update.markers + watch_list = x.watch_list if update.watch_list is None else update.watch_list + ignore_list = x.ignore_list if update.ignore_list is None else update.ignore_list + annotation_format = x.annotation_format if update.annotation_format is None \ + else update.annotation_format + annotation = x.annotation if update.annotation is None else update.annotation + use_line_directives = x.use_line_directives if update.use_line_directives is None \ + else update.use_line_directives + + namespace_default = x.namespace_default if update.namespace_default is None \ + else update.namespace_default + namespace = x.namespace if update.namespace is None \ + else tuple(update.namespace.split("::")) + + hook = x.hook if update.hook is None else x.hook | update.hook + brei = x.brei if update.brei is None else update.brei + + hooks = copy(x.hooks) + for uh in update.hooks: + if uh.startswith("~"): + h = uh.removeprefix("~") + if h in hooks: + hooks.remove(h) + else: + hooks.add(uh) + + return Config( + version, languages, markers, watch_list, ignore_list, + annotation_format, annotation, use_line_directives, + namespace_default, namespace, hooks, hook, brei) diff --git a/entangled/config/config_update.py b/entangled/config/config_update.py new file mode 100644 index 0000000..1b9113a --- /dev/null +++ b/entangled/config/config_update.py @@ -0,0 +1,59 @@ +from msgspec import Struct, field + +from .document_style import DocumentStyle +from .language import Language +from .markers import Markers, default_markers, basic_markers +from .annotation_method import AnnotationMethod +from .namespace_default import NamespaceDefault + +from brei import Program + + +class ConfigUpdate(Struct): + """An update to existing config. This actually sets the API for all + config input. + + Attributes: + version: updates to the maximum. + style: fills in a preset if given. + languages: additive list of languages, identical identifiers will + overrule existing ones. + markers: overrides. + watch_list: overrides. + ignore_list: overrides. + annotation_format: overrides. + annotation: overrides. + use_line_directives: overrides. + hooks: additive, prepend a `~` character to disable a hook). + hook: merged with `|` operator (overrides one deep). + brei: overrides (TODO: implement merge, requires updating Brei). + """ + version: str + style: DocumentStyle | None = None + languages: list[Language] = field(default_factory=list) + markers: Markers | None = None + watch_list: list[str] | None = None + ignore_list: list[str] | None = None + annotation_format: str | None = None + annotation: AnnotationMethod | None = None + use_line_directives: bool | None = None + + namespace_default: NamespaceDefault | None = None + namespace: str | None = None + + hooks: list[str] = field(default_factory=list) + hook: dict[str, object] | None = None + brei: Program | None = None + + +prefab_config: dict[DocumentStyle, ConfigUpdate] = { + DocumentStyle.DEFAULT: ConfigUpdate( + version = "2.0", + markers = default_markers + ), + DocumentStyle.BASIC: ConfigUpdate( + version = "2.4", + markers = basic_markers, + hooks = ["quarto_attributes", "spdx_license", "shebang", "repl", "brei"] + ) +} diff --git a/entangled/config/document_style.py b/entangled/config/document_style.py new file mode 100644 index 0000000..a3c8e68 --- /dev/null +++ b/entangled/config/document_style.py @@ -0,0 +1,18 @@ +from enum import StrEnum + + +class DocumentStyle(StrEnum): + """Document style is a short hand for markdown style and hook settings. + + - `DEFAULT` is the default. We have fenced code blocks with a set of + attributes attached in curly braces. This setting offers the most + consistent syntax but is sometimes lacking in support from third-party + tools. + + - `BASIC` sets a simpler syntax where only the language is passed on the + same line as the code block fences. All other metadata is to be passed + using Quatro style attributes. + """ + + DEFAULT = "default" + BASIC = "basic" diff --git a/entangled/config/markers.py b/entangled/config/markers.py new file mode 100644 index 0000000..23658d8 --- /dev/null +++ b/entangled/config/markers.py @@ -0,0 +1,21 @@ +from msgspec import Struct + + +class Markers(Struct, frozen=True): + """Markers can be used to configure the Markdown dialect. Currently not used.""" + + open: str + close: str + begin_ignore: str = r"^\s*\~\~\~markdown\s*$" + end_ignore: str = r"^\s*\~\~\~\s*$" + + +default_markers = Markers( + r"^(?P\s*)```\s*{(?P[^{}]*)}\s*$", + r"^(?P\s*)```\s*$" +) + +basic_markers = Markers( + r"^(?P\s*)```(?P.*)$", + r"^(?P\s*)```\s*$" +) diff --git a/entangled/config/namespace_default.py b/entangled/config/namespace_default.py new file mode 100644 index 0000000..288685a --- /dev/null +++ b/entangled/config/namespace_default.py @@ -0,0 +1,14 @@ +from enum import StrEnum + + +class NamespaceDefault(StrEnum): + """Default namespace behaviour. + + - `GLOBAL` is the default. Identifiers are all collected into the global + namespace. + + - `PRIVATE` means that identifiers are only accessible within the same file. + """ + + GLOBAL = "global" + PRIVATE = "private" diff --git a/entangled/config/version.py b/entangled/config/version.py index 31ad6b4..aad1006 100644 --- a/entangled/config/version.py +++ b/entangled/config/version.py @@ -16,3 +16,6 @@ def to_str(self) -> str: @classmethod def from_str(cls, s: str) -> Version: return Version(tuple(int(sv) for sv in s.split("."))) + + def __lt__(self, other: Version) -> bool: + return self.numbers < other.numbers diff --git a/entangled/hooks/__init__.py b/entangled/hooks/__init__.py index ec92d8e..2f7388b 100644 --- a/entangled/hooks/__init__.py +++ b/entangled/hooks/__init__.py @@ -28,7 +28,7 @@ def get_hooks() -> list[HookBase]: active_hooks: list[HookBase] = [] - for h in config.get.hooks: + for h in sorted(config.get.hooks, key=lambda h: hooks[h].priority()): if h in hooks | external_hooks: try: hook_cfg = msgspec.convert(config.get.hook.get(h, {}), type=hooks[h].Config) diff --git a/entangled/hooks/base.py b/entangled/hooks/base.py index 975ff27..f2fe44d 100644 --- a/entangled/hooks/base.py +++ b/entangled/hooks/base.py @@ -22,6 +22,10 @@ class Config(Struct): def __init__(self, config: Config): pass + @staticmethod + def priority() -> int: + return 50 + def check_prerequisites(self): """When prerequisites aren't met, raise PrerequisitesFailed.""" pass diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index 4961614..3ae243e 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -81,6 +81,11 @@ def __init__(self, config: Hook.Config): super().__init__(config) self.config = config + @override + @staticmethod + def priority(): + return 10 + @override def on_read(self, code: CodeBlock): log.debug("quarto filter: %s", code) diff --git a/entangled/readers/code_block.py b/entangled/readers/code_block.py index 4c2fe09..32a2a83 100644 --- a/entangled/readers/code_block.py +++ b/entangled/readers/code_block.py @@ -1,15 +1,60 @@ from ..document import CodeBlock, Content from ..config import Config +from ..errors.user import IndentationError +from ..properties import read_properties, get_classes +from ..utility import first + from .types import InputStream, Reader, MarkdownStream +from .lines import lines from .delimiters import delimited_token_getter +from .text_location import TextLocation + +import re +import logging + + +def code_block_guard(origin: TextLocation, open_match: re.Match[str], close_match: re.Match[str]) -> bool: + open_indent = open_match["indent"] + close_indent = close_match["indent"] + if len(close_indent) < len(open_indent): + raise IndentationError(origin) + if open_indent != close_indent: + return False + return True + +def dedent(source: str, indent: str) -> str: + return "".join(line.removeprefix(indent) for line in lines(source)) -def code_block(config: Config) -> Reader[Content, None, CodeBlock | None]: - get_raw_token = delimited_token_getter(config.markers.open, config.markers.close) - def code_block_reader(input: InputStream) -> MarkdownStream[CodeBlock | None]: +def code_block(config: Config) -> Reader[Content, None, CodeBlock]: + get_raw_token = delimited_token_getter( + config.markers.open, config.markers.close, code_block_guard) + + def code_block_reader(input: InputStream) -> MarkdownStream[CodeBlock]: block = get_raw_token(input) if block is None: return None + indent = block.open_match["indent"] + properties = read_properties(block.open_match["properties"]) + language_class = first(get_classes(properties)) + language = config.get_language(language_class) if language_class else None + if language_class and not language: + logging.warning(f"`{block.origin}`: language `{language_class}` unknown.") + source = dedent(block.content, indent) + + return CodeBlock( + properties, + indent, + block.open_line, + block.close_line, + source, + block.origin, + language + ) + + yield None # pyright: ignore[reportUnreachable] + + return code_block_reader diff --git a/entangled/readers/delimiters.py b/entangled/readers/delimiters.py index 59d8b58..04c2e07 100644 --- a/entangled/readers/delimiters.py +++ b/entangled/readers/delimiters.py @@ -23,9 +23,22 @@ def string(self) -> str: Reconstructs the original input string. """ return self.open_line + self.content + self.close_line - -def delimited_token_getter(open: str, close: str) -> Callable[[InputStream], DelimitedToken | None]: + +def const[T, **Args](value: T) -> Callable[Args, T]: + def const_fn(*_1: Args.args, **_2: Args.kwargs) -> T: + return value + return const_fn + + +type DelimiterGuard = Callable[[TextLocation, re.Match[str], re.Match[str]], bool] + + +def delimited_token_getter( + open: str, + close: str, + guard: DelimiterGuard | None = None + ) -> Callable[[InputStream], DelimitedToken | None]: """ Creates a function that reads a given `DelimitedToken` from the input stream or returns `None` if the stream does not start @@ -34,6 +47,7 @@ def delimited_token_getter(open: str, close: str) -> Callable[[InputStream], Del Args: open: a regex on which the token is triggered close: a regex on which the enclosed content is closed + guard: an optional predicate for additional check on close pattern Returns: A `DelimitedToken` object containing the text location of @@ -46,11 +60,12 @@ def delimited_token_getter(open: str, close: str) -> Callable[[InputStream], Del """ open_pattern = re.compile(open) close_pattern = re.compile(close) + guard_fn: DelimiterGuard = guard or const(True) def get(input: InputStream) -> DelimitedToken | None: if not input: return None - + origin, open_line = input.peek() open_match = open_pattern.match(open_line.rstrip()) if not open_match: @@ -63,7 +78,7 @@ def get(input: InputStream) -> DelimitedToken | None: # iterate directly from the inner iterator. for _, line in input.iterator: close_match = close_pattern.match(line.rstrip()) - if not close_match: + if not close_match or not guard_fn(origin, open_match, close_match): content += line else: close_line = line @@ -72,5 +87,5 @@ def get(input: InputStream) -> DelimitedToken | None: content, close_line, close_match) raise ParseError(origin, "unexpected end of file") - + return get diff --git a/entangled/readers/lines.py b/entangled/readers/lines.py index fc95fda..154b4a6 100644 --- a/entangled/readers/lines.py +++ b/entangled/readers/lines.py @@ -6,17 +6,22 @@ from .peekable import peekable +def lines(text: str) -> Generator[str]: + pos = 0 + while (next_pos := text.find("\n", pos)) != -1: + yield text[pos:next_pos + 1] + pos = next_pos + 1 + + yield text[pos:] + + @peekable -def lines(filename: PurePath, text: str) -> Generator[InputToken]: +def numbered_lines(filename: PurePath, text: str) -> Generator[InputToken]: """ Iterate the lines in a file. Doesn't strip newlines. Works with both Windows and Unix line endings. """ location = TextLocation(filename) - pos = 0 - while (next_pos := text.find("\n", pos)) != -1: - yield (location, text[pos:next_pos + 1]) - pos = next_pos + 1 + for line in lines(text): + yield (location, line) location.increment() - - yield (location, text[pos:]) diff --git a/test/readers/test_lines.py b/test/readers/test_lines.py index 60e7305..973ed02 100644 --- a/test/readers/test_lines.py +++ b/test/readers/test_lines.py @@ -1,17 +1,20 @@ from pathlib import PurePath -from entangled.readers.lines import lines +from entangled.readers.lines import lines, numbered_lines from entangled.readers.peekable import Peekable -from entangled.readers.types import InputStream +from entangled.readers.text_location import TextLocation def test_lines(): - def collect(lst: InputStream) -> list[str]: - return list(map(lambda x: x[1], lst)) + assert lines("") == [""] + assert lines("\n") == ["\n", ""] + assert lines("a\nb") == ["a\n", "b"] + assert lines("a\nb\n") == ["a\n", "b\n", ""] + assert lines("a\r\nb\r\n") == ["a\r\n", "b\r\n", ""] - assert isinstance(lines(PurePath("-"), ""), Peekable) - assert collect(lines(PurePath("-"), "")) == [""] - assert collect(lines(PurePath("-"), "\n")) == ["\n", ""] - assert collect(lines(PurePath("-"), "a\nb")) == ["a\n", "b"] - assert collect(lines(PurePath("-"), "a\nb\n")) == ["a\n", "b\n", ""] - assert collect(lines(PurePath("-"), "a\r\nb\r\n")) == ["a\r\n", "b\r\n", ""] +def test_numbered_lines(): + assert isinstance(numbered_lines(PurePath("-"), ""), Peekable) + assert list(numbered_lines(PurePath("-"), "a\nb\n")) == [ + (TextLocation(PurePath("-"), 1), "a\n"), + (TextLocation(PurePath("-"), 2), "b\n") + ] From 486817a893aa66e79ae7b22a010ccf55377d5c15 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sat, 1 Nov 2025 23:37:59 +0100 Subject: [PATCH 07/48] make tests pass again --- entangled/config/__init__.py | 10 +++++++--- entangled/config/version.py | 2 +- entangled/readers/lines.py | 8 +++----- test/readers/test_lines.py | 16 ++++++++++------ test/readers/test_yaml_header.py | 4 ++-- test/test_config.py | 7 ++++--- 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index aba2d8f..8e29971 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -6,7 +6,6 @@ import threading from contextlib import contextmanager -from copy import deepcopy from pathlib import Path from typing import Any from itertools import chain @@ -14,11 +13,13 @@ import msgspec import tomllib +from .annotation_method import AnnotationMethod from .language import Language +from .markers import Markers from .config_data import Config from .config_update import ConfigUpdate from ..logging import logger - +from ..version import __version__ log = logger() @@ -85,7 +86,7 @@ def get(self) -> Config: def __call__(self, **kwargs): backup = self.config self.config = (self.config if self.config is not None else Config()) \ - | ConfigUpdate(**kwargs) + | ConfigUpdate(version=__version__, **kwargs) yield @@ -109,3 +110,6 @@ def get_input_files() -> list[Path]: if not any(path.match(pat) for pat in config.get.ignore_list) ] return sorted(input_file_list) + + +__all__ = ["config", "AnnotationMethod", "Markers"] diff --git a/entangled/config/version.py b/entangled/config/version.py index aad1006..99009ba 100644 --- a/entangled/config/version.py +++ b/entangled/config/version.py @@ -4,7 +4,7 @@ from typing import override -@dataclass +@dataclass(frozen=True) class Version(FromStr): numbers: tuple[int, ...] diff --git a/entangled/readers/lines.py b/entangled/readers/lines.py index 154b4a6..56c63b0 100644 --- a/entangled/readers/lines.py +++ b/entangled/readers/lines.py @@ -13,7 +13,7 @@ def lines(text: str) -> Generator[str]: pos = next_pos + 1 yield text[pos:] - + @peekable def numbered_lines(filename: PurePath, text: str) -> Generator[InputToken]: @@ -21,7 +21,5 @@ def numbered_lines(filename: PurePath, text: str) -> Generator[InputToken]: Iterate the lines in a file. Doesn't strip newlines. Works with both Windows and Unix line endings. """ - location = TextLocation(filename) - for line in lines(text): - yield (location, line) - location.increment() + for n, line in enumerate(lines(text)): + yield (TextLocation(filename, n+1), line) diff --git a/test/readers/test_lines.py b/test/readers/test_lines.py index 973ed02..cc85479 100644 --- a/test/readers/test_lines.py +++ b/test/readers/test_lines.py @@ -5,16 +5,20 @@ def test_lines(): - assert lines("") == [""] - assert lines("\n") == ["\n", ""] - assert lines("a\nb") == ["a\n", "b"] - assert lines("a\nb\n") == ["a\n", "b\n", ""] - assert lines("a\r\nb\r\n") == ["a\r\n", "b\r\n", ""] + def ll(inp: str): + return list(lines(inp)) + + assert ll("") == [""] + assert ll("\n") == ["\n", ""] + assert ll("a\nb") == ["a\n", "b"] + assert ll("a\nb\n") == ["a\n", "b\n", ""] + assert ll("a\r\nb\r\n") == ["a\r\n", "b\r\n", ""] def test_numbered_lines(): assert isinstance(numbered_lines(PurePath("-"), ""), Peekable) assert list(numbered_lines(PurePath("-"), "a\nb\n")) == [ (TextLocation(PurePath("-"), 1), "a\n"), - (TextLocation(PurePath("-"), 2), "b\n") + (TextLocation(PurePath("-"), 2), "b\n"), + (TextLocation(PurePath("-"), 3), "") ] diff --git a/test/readers/test_yaml_header.py b/test/readers/test_yaml_header.py index d1f8d90..10d64c6 100644 --- a/test/readers/test_yaml_header.py +++ b/test/readers/test_yaml_header.py @@ -3,7 +3,7 @@ import pytest from entangled.readers.yaml_header import read_yaml_header -from entangled.readers.lines import lines +from entangled.readers.lines import numbered_lines from entangled.errors.user import ParseError from entangled.readers.types import MarkdownStream @@ -47,7 +47,7 @@ def get_yaml_header(input: str) -> object: def reader() -> MarkdownStream[object]: nonlocal result - result = yield from read_yaml_header(lines(path, input)) + result = yield from read_yaml_header(numbered_lines(path, input)) return def run(): diff --git a/test/test_config.py b/test/test_config.py index 369172a..61b4b4f 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -1,9 +1,10 @@ from typing import Any import msgspec +from entangled.config.config_update import ConfigUpdate from entangled.config.version import Version from entangled.config.language import Language, Comment -from entangled.config import config, Config, AnnotationMethod, default +from entangled.config import config, Config, AnnotationMethod from entangled.commands import tangle from contextlib import chdir @@ -20,7 +21,7 @@ def test_config_constructable(): Language, {"name": "French", "identifiers": ["fr"], "comment": {"open": "excusez moi"}}, ) == Language("French", ["fr"], Comment("excusez moi")) - cfg1 = construct(Config, {"version": "2.0", "annotation": "naked"}) + cfg1 = Config() | construct(ConfigUpdate, {"version": "2.0", "annotation": "naked"}) assert cfg1.version == Version(numbers=(2, 0)) assert cfg1.annotation == AnnotationMethod.NAKED @@ -102,7 +103,7 @@ def test_pyproject_toml(tmp_path): sleep(0.1) config.read(force=True) - assert config.get == default + assert config.get == Config() Path("pyproject.toml").write_text(config_in_pyproject) sleep(0.1) From 46321c30643410a0764fc4330d3917cc82019e92 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sun, 2 Nov 2025 10:49:11 +0100 Subject: [PATCH 08/48] implement markdown reader --- entangled/config/__init__.py | 2 +- entangled/config/config_data.py | 4 +- entangled/document.py | 1 + entangled/readers/code_block.py | 60 -------------------- entangled/readers/markdown.py | 96 +++++++++++++++++++++++++++++--- entangled/readers/types.py | 3 +- entangled/readers/yaml_header.py | 12 ++-- 7 files changed, 101 insertions(+), 77 deletions(-) delete mode 100644 entangled/readers/code_block.py diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index 8e29971..4e47bd5 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -112,4 +112,4 @@ def get_input_files() -> list[Path]: return sorted(input_file_list) -__all__ = ["config", "AnnotationMethod", "Markers"] +__all__ = ["Config", "ConfigUpdate", "config", "AnnotationMethod", "Markers"] diff --git a/entangled/config/config_data.py b/entangled/config/config_data.py index 011ecf8..68b85ce 100644 --- a/entangled/config/config_data.py +++ b/entangled/config/config_data.py @@ -34,7 +34,6 @@ class Config: indicating markdown source locations. hooks: List of enabled hooks. hook: Sub-config of hooks. - loom: Sub-config of loom. """ version: Version = Version((2, 0)) languages: dict[str, Language] = field(default_factory=lambda: { @@ -56,6 +55,9 @@ class Config: hook: dict[str, object] = field(default_factory=dict) brei: Program = field(default_factory=Program) + def get_language(self, lang_id: str) -> Language | None: + return self.languages.get(lang_id, None) + def __or__(self, update: ConfigUpdate) -> Config: if update.style is not None: x = self | prefab_config[update.style] diff --git a/entangled/document.py b/entangled/document.py index 0894dd4..6ff3f4d 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -44,6 +44,7 @@ class CodeBlock: language: Language | None = None header: str | None = None mode: int | None = None + namespace: tuple[str, ...] | None = None @property def text(self) -> str: diff --git a/entangled/readers/code_block.py b/entangled/readers/code_block.py deleted file mode 100644 index 32a2a83..0000000 --- a/entangled/readers/code_block.py +++ /dev/null @@ -1,60 +0,0 @@ -from ..document import CodeBlock, Content -from ..config import Config -from ..errors.user import IndentationError -from ..properties import read_properties, get_classes -from ..utility import first - -from .types import InputStream, Reader, MarkdownStream -from .lines import lines -from .delimiters import delimited_token_getter -from .text_location import TextLocation - -import re -import logging - - -def code_block_guard(origin: TextLocation, open_match: re.Match[str], close_match: re.Match[str]) -> bool: - open_indent = open_match["indent"] - close_indent = close_match["indent"] - if len(close_indent) < len(open_indent): - raise IndentationError(origin) - if open_indent != close_indent: - return False - return True - - -def dedent(source: str, indent: str) -> str: - return "".join(line.removeprefix(indent) for line in lines(source)) - - -def code_block(config: Config) -> Reader[Content, None, CodeBlock]: - get_raw_token = delimited_token_getter( - config.markers.open, config.markers.close, code_block_guard) - - def code_block_reader(input: InputStream) -> MarkdownStream[CodeBlock]: - block = get_raw_token(input) - if block is None: - return None - - indent = block.open_match["indent"] - properties = read_properties(block.open_match["properties"]) - language_class = first(get_classes(properties)) - language = config.get_language(language_class) if language_class else None - if language_class and not language: - logging.warning(f"`{block.origin}`: language `{language_class}` unknown.") - source = dedent(block.content, indent) - - return CodeBlock( - properties, - indent, - block.open_line, - block.close_line, - source, - block.origin, - language - ) - - yield None # pyright: ignore[reportUnreachable] - - - return code_block_reader diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index e536b1f..a997b12 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -1,13 +1,93 @@ -from typing import cast -from .types import InputStream, MarkdownStream -from .yaml_header import get_config, read_yaml_header -from ..document import PlainText, CodeBlock, ReferenceMap -from ..config import Config, config +from ..document import CodeBlock, RawContent, PlainText +from ..config import Config +from ..errors.user import IndentationError +from ..properties import read_properties, get_classes +from ..utility import first +from .types import InputStream, Reader, RawMarkdownStream +from .lines import lines +from .delimiters import delimited_token_getter +from .text_location import TextLocation +from .yaml_header import read_yaml_header, get_config -def read_markdown(input: InputStream, refs: ReferenceMap | None = None) -> MarkdownStream[ReferenceMap]: - refs = refs or ReferenceMap() +import re +import logging + + +def ignore_block(config: Config) -> Reader[RawContent, bool]: + get_raw_token = delimited_token_getter( + config.markers.begin_ignore, config.markers.end_ignore + ) + + def ignore_block_reader(input: InputStream) -> RawMarkdownStream[bool]: + block = get_raw_token(input) + if block is None: + return False + yield PlainText(block.string) + return True + + return ignore_block_reader + + +def code_block_guard(origin: TextLocation, open_match: re.Match[str], close_match: re.Match[str]) -> bool: + open_indent = open_match["indent"] + close_indent = close_match["indent"] + if len(close_indent) < len(open_indent): + raise IndentationError(origin) + if open_indent != close_indent: + return False + return True + + +def dedent(source: str, indent: str) -> str: + return "".join(line.removeprefix(indent) for line in lines(source)) + + +def code_block(config: Config) -> Reader[RawContent, bool]: + get_raw_token = delimited_token_getter( + config.markers.open, config.markers.close, code_block_guard) + namespace = config.namespace + + def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: + block = get_raw_token(input) + if block is None: + return False + + indent = block.open_match["indent"] + properties = read_properties(block.open_match["properties"]) + language_class = first(get_classes(properties)) + language = config.get_language(language_class) if language_class else None + if language_class and not language: + logging.warning(f"`{block.origin}`: language `{language_class}` unknown.") + source = dedent(block.content, indent) + + yield CodeBlock( + properties, + indent, + block.open_line, + block.close_line, + source, + block.origin, + language + ) + + return True + + return code_block_reader + + +def raw_markdown(input: InputStream) -> RawMarkdownStream[None]: header = yield from read_yaml_header(input) config = get_config(header) - return refs + ignore_block_reader = ignore_block(config) + code_block_reader = code_block(config) + + while input: + if (yield from ignore_block_reader(input)): + continue + if (yield from code_block_reader(input)): + continue + + _, line = next(input) + yield PlainText(line) diff --git a/entangled/readers/types.py b/entangled/readers/types.py index 06e84cb..9bad907 100644 --- a/entangled/readers/types.py +++ b/entangled/readers/types.py @@ -3,12 +3,13 @@ from .text_location import TextLocation from .peekable import Peekable -from ..document import Content +from ..document import Content, RawContent type InputToken = tuple[TextLocation, str] type InputStream = Peekable[InputToken] type Reader[OutputToken, Result] = Callable[[InputStream], Generator[OutputToken, None, Result]] +type RawMarkdownStream[Result] = Generator[RawContent, None, Result] type MarkdownStream[Result] = Generator[Content, None, Result] diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py index 9e10670..a28193e 100644 --- a/entangled/readers/yaml_header.py +++ b/entangled/readers/yaml_header.py @@ -4,17 +4,17 @@ import yaml import msgspec -from ..config import Config +from ..config import Config, ConfigUpdate, config from ..document import PlainText from ..errors.user import ParseError -from .types import InputStream, MarkdownStream +from .types import InputStream, RawMarkdownStream from .delimiters import delimited_token_getter get_yaml_header_token = delimited_token_getter("---", "---") -def read_yaml_header(input: InputStream) -> MarkdownStream[object]: +def read_yaml_header(input: InputStream) -> RawMarkdownStream[object]: """ Reads the YAML header that can be found at the top of a Markdown document. """ @@ -30,7 +30,7 @@ def read_yaml_header(input: InputStream) -> MarkdownStream[object]: raise ParseError(delimited_token.origin, str(e)) -def get_config(header: object) -> Config | None: +def get_config(header: object) -> Config: """ Get the `entangled` component from the unstructured header data, and convert it to a `Config` object. @@ -43,12 +43,12 @@ def get_config(header: object) -> Config | None: if isinstance(header, dict): header = cast(dict[str, object], header) try: - return msgspec.convert(header.get("entangled", None), Config) + return config.get | msgspec.convert(header.get("entangled", None), ConfigUpdate) except msgspec.ValidationError as e: logging.error(e) raise TypeError() elif header is None: - return None + return config.get else: raise TypeError() From affa80e925a48d60c4ab8a0ecd5ea9fab3b4ca18 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sun, 2 Nov 2025 11:43:43 +0100 Subject: [PATCH 09/48] implement file processing back into new markdown reader --- entangled/document.py | 2 +- entangled/errors/user.py | 12 ++++- entangled/hooks/__init__.py | 9 ++-- entangled/properties.py | 9 ++++ entangled/readers/markdown.py | 90 ++++++++++++++++++++++++++++---- entangled/readers/yaml_header.py | 4 +- 6 files changed, 108 insertions(+), 18 deletions(-) diff --git a/entangled/document.py b/entangled/document.py index 6ff3f4d..a91168f 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -44,7 +44,7 @@ class CodeBlock: language: Language | None = None header: str | None = None mode: int | None = None - namespace: tuple[str, ...] | None = None + namespace: tuple[str, ...] = () @property def text(self) -> str: diff --git a/entangled/errors/user.py b/entangled/errors/user.py index 48208d6..4ac1601 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Callable +from typing import Any, Callable, override from pathlib import Path from ..readers.text_location import TextLocation @@ -74,3 +74,13 @@ class MissingReference(UserError): def __str__(self): return f"Missing reference `{self.ref_name}` at `{self.location}`" + + +@dataclass +class CodeAttributeError(UserError): + origin: TextLocation + msg: str + + @override + def __str__(self) -> str: + return f"{self.origin}: Attribute error: {self.msg}" diff --git a/entangled/hooks/__init__.py b/entangled/hooks/__init__.py index 2f7388b..3ef7be2 100644 --- a/entangled/hooks/__init__.py +++ b/entangled/hooks/__init__.py @@ -3,7 +3,7 @@ from .base import HookBase, PrerequisitesFailed from . import build, task, quarto_attributes, shebang, spdx_license, repl -from ..config import config +from ..config import Config, config from typing import TypeVar import msgspec @@ -26,9 +26,10 @@ } -def get_hooks() -> list[HookBase]: +def get_hooks(cfg: Config | None = None) -> list[HookBase]: + cfg = cfg or config.get active_hooks: list[HookBase] = [] - for h in sorted(config.get.hooks, key=lambda h: hooks[h].priority()): + for h in sorted(cfg.hooks, key=lambda h: hooks[h].priority()): if h in hooks | external_hooks: try: hook_cfg = msgspec.convert(config.get.hook.get(h, {}), type=hooks[h].Config) @@ -45,4 +46,4 @@ def get_hooks() -> list[HookBase]: return active_hooks -__all__ = ["hooks", "PrerequisitesFailed", "get_hooks"] +__all__ = ["hooks", "PrerequisitesFailed", "get_hooks", "HookBase"] diff --git a/entangled/properties.py b/entangled/properties.py index 2d160e0..c3ca6e9 100644 --- a/entangled/properties.py +++ b/entangled/properties.py @@ -90,3 +90,12 @@ def get_attribute(props: list[Property], key: str) -> Any: # pyright: ignore[re return next(p.value for p in props if isinstance(p, Attribute) and p.key == key) # pyright: ignore[reportAny] except StopIteration: return None + + +def get_attribute_string(props: list[Property], key: str) -> str | None: + x = get_attribute(props, key) + if x is None: + return None + if isinstance(x, str): + return x + raise TypeError() diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index a997b12..f1c2578 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -1,8 +1,15 @@ -from ..document import CodeBlock, RawContent, PlainText +from collections.abc import Generator, Iterator +from pathlib import PurePath + +from ..config.namespace_default import NamespaceDefault + +from ..document import CodeBlock, Content, RawContent, PlainText, ReferenceId, ReferenceMap from ..config import Config -from ..errors.user import IndentationError -from ..properties import read_properties, get_classes +from ..errors.user import CodeAttributeError, IndentationError +from ..properties import get_attribute_string, read_properties, get_classes from ..utility import first +from ..hooks import get_hooks, HookBase +from ..properties import get_id from .types import InputStream, Reader, RawMarkdownStream from .lines import lines @@ -43,10 +50,17 @@ def dedent(source: str, indent: str) -> str: return "".join(line.removeprefix(indent) for line in lines(source)) -def code_block(config: Config) -> Reader[RawContent, bool]: +def code_block(config: Config, filename: PurePath) -> Reader[RawContent, bool]: get_raw_token = delimited_token_getter( config.markers.open, config.markers.close, code_block_guard) - namespace = config.namespace + if config.namespace is None: + match config.namespace_default: + case NamespaceDefault.GLOBAL: + namespace = () + case NamespaceDefault.PRIVATE: + namespace = (filename.as_posix(),) + else: + namespace = config.namespace def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: block = get_raw_token(input) @@ -68,7 +82,8 @@ def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: block.close_line, source, block.origin, - language + language, + namespace = namespace ) return True @@ -76,12 +91,13 @@ def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: return code_block_reader -def raw_markdown(input: InputStream) -> RawMarkdownStream[None]: - header = yield from read_yaml_header(input) - config = get_config(header) +def raw_markdown(config: Config, input: InputStream) -> RawMarkdownStream[None]: + if not input: + return + filename = input.peek()[0].filename ignore_block_reader = ignore_block(config) - code_block_reader = code_block(config) + code_block_reader = code_block(config, filename) while input: if (yield from ignore_block_reader(input)): @@ -91,3 +107,57 @@ def raw_markdown(input: InputStream) -> RawMarkdownStream[None]: _, line = next(input) yield PlainText(line) + + +def process(hooks: list[HookBase], refs: ReferenceMap, code_block: CodeBlock) -> ReferenceId: + for h in hooks: + h.on_read(code_block) + + block_id = get_id(code_block.properties) + + try: + target_file = get_attribute_string(code_block.properties, "file") + except TypeError: + raise CodeAttributeError(code_block.origin, "`file` attribute should have string type") + + try: + if mode := get_attribute_string(code_block.properties, "mode"): + code_block.mode = int(mode, 8) + except TypeError: + raise CodeAttributeError(code_block.origin, "`mode` attribute should have string type") + + ref_name = block_id or target_file + if ref_name is None: + ref_name = f"unnamed-{code_block.origin}" + ref = refs.new_id(code_block.origin.filename, code_block.namespace, ref_name) + + refs[ref] = code_block + if target_file is not None: + refs.targets.add(target_file) + if target_file is not None and block_id is not None: + refs.alias[target_file] = block_id + + return ref + + +def refine(hooks: list[HookBase], refs: ReferenceMap, raw: Iterator[RawContent]) -> Generator[Content, None, ReferenceMap]: + plain_content: list[str] = [] + for token in raw: + match token: + case PlainText(t): + plain_content.append(t) + case CodeBlock(): + if plain_content: + yield PlainText("".join(plain_content)) + plain_content = [] + yield process(hooks, refs, token) + + return refs + + +def markdown(refs: ReferenceMap, input: InputStream) -> Generator[Content, None, ReferenceMap]: + header = yield from read_yaml_header(input) + config = get_config(header) + hooks = get_hooks(config) + refs = yield from refine(hooks, refs, raw_markdown(config, input)) + return refs diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py index a28193e..1ee40c3 100644 --- a/entangled/readers/yaml_header.py +++ b/entangled/readers/yaml_header.py @@ -7,14 +7,14 @@ from ..config import Config, ConfigUpdate, config from ..document import PlainText from ..errors.user import ParseError -from .types import InputStream, RawMarkdownStream +from .types import InputStream, MarkdownStream from .delimiters import delimited_token_getter get_yaml_header_token = delimited_token_getter("---", "---") -def read_yaml_header(input: InputStream) -> RawMarkdownStream[object]: +def read_yaml_header(input: InputStream) -> MarkdownStream[object]: """ Reads the YAML header that can be found at the top of a Markdown document. """ From 789b6ae288bde1b4fd463d377984a1229ac2238d Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Mon, 3 Nov 2025 01:13:01 +0100 Subject: [PATCH 10/48] add test for ignore block --- entangled/config/config_data.py | 2 +- entangled/readers/markdown.py | 36 +++++++++++++++++++++------------ entangled/readers/types.py | 14 ++++++++++++- test/readers/test_markdown.py | 24 ++++++++++++++++++++++ 4 files changed, 61 insertions(+), 15 deletions(-) create mode 100644 test/readers/test_markdown.py diff --git a/entangled/config/config_data.py b/entangled/config/config_data.py index 68b85ce..f7ed0a0 100644 --- a/entangled/config/config_data.py +++ b/entangled/config/config_data.py @@ -37,7 +37,7 @@ class Config: """ version: Version = Version((2, 0)) languages: dict[str, Language] = field(default_factory=lambda: { - i: l for l in languages for i in l.identifiers + i: lang for lang in languages for i in lang.identifiers }) markers: Markers = field(default_factory=lambda: default_markers) diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index f1c2578..d479f61 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -1,15 +1,15 @@ from collections.abc import Generator, Iterator from pathlib import PurePath +from functools import partial from ..config.namespace_default import NamespaceDefault from ..document import CodeBlock, Content, RawContent, PlainText, ReferenceId, ReferenceMap from ..config import Config from ..errors.user import CodeAttributeError, IndentationError -from ..properties import get_attribute_string, read_properties, get_classes +from ..properties import get_attribute_string, read_properties, get_classes, get_id from ..utility import first from ..hooks import get_hooks, HookBase -from ..properties import get_id from .types import InputStream, Reader, RawMarkdownStream from .lines import lines @@ -78,8 +78,8 @@ def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: yield CodeBlock( properties, indent, - block.open_line, - block.close_line, + block.open_line.removeprefix(indent), + block.close_line.removeprefix(indent), source, block.origin, language, @@ -109,7 +109,7 @@ def raw_markdown(config: Config, input: InputStream) -> RawMarkdownStream[None]: yield PlainText(line) -def process(hooks: list[HookBase], refs: ReferenceMap, code_block: CodeBlock) -> ReferenceId: +def process_code_block(hooks: list[HookBase], refs: ReferenceMap, code_block: CodeBlock) -> ReferenceId: for h in hooks: h.on_read(code_block) @@ -140,24 +140,34 @@ def process(hooks: list[HookBase], refs: ReferenceMap, code_block: CodeBlock) -> return ref -def refine(hooks: list[HookBase], refs: ReferenceMap, raw: Iterator[RawContent]) -> Generator[Content, None, ReferenceMap]: +def process_token(hooks: list[HookBase], refs: ReferenceMap, token: RawContent) -> Content: + match token: + case CodeBlock(): + return process_code_block(hooks, refs, token) + case _: + return token + + +def collect_plain_text[T](inp: Iterator[PlainText | T]) -> Generator[PlainText | T, None, None]: plain_content: list[str] = [] - for token in raw: + for token in inp: match token: case PlainText(t): plain_content.append(t) - case CodeBlock(): + case _: if plain_content: - yield PlainText("".join(plain_content)) + yield(PlainText("".join(plain_content))) plain_content = [] - yield process(hooks, refs, token) - - return refs + yield token def markdown(refs: ReferenceMap, input: InputStream) -> Generator[Content, None, ReferenceMap]: header = yield from read_yaml_header(input) config = get_config(header) hooks = get_hooks(config) - refs = yield from refine(hooks, refs, raw_markdown(config, input)) + + yield from map( + partial(process_token, hooks, refs), + collect_plain_text(raw_markdown(config, input))) + return refs diff --git a/entangled/readers/types.py b/entangled/readers/types.py index 9bad907..17522c6 100644 --- a/entangled/readers/types.py +++ b/entangled/readers/types.py @@ -1,5 +1,5 @@ from collections.abc import Callable, Generator -import functools +from typing import cast from .text_location import TextLocation from .peekable import Peekable @@ -18,3 +18,15 @@ def mapped(input: InputStream) -> Generator[Out, None, U]: x = yield from reader(input) return f(x) return mapped + + +def run_generator[O, R](g: Generator[O, None, R]) -> tuple[list[O], R]: + result: R | None = None + + def h() -> Generator[O]: + nonlocal result + result = yield from g + + out = list(h()) + + return out, cast(R, result) diff --git a/test/readers/test_markdown.py b/test/readers/test_markdown.py new file mode 100644 index 0000000..c851cfc --- /dev/null +++ b/test/readers/test_markdown.py @@ -0,0 +1,24 @@ +from pathlib import PurePath +from entangled.document import PlainText +from entangled.readers.markdown import ignore_block +from entangled.readers.lines import numbered_lines +from entangled.config import Config +from entangled.readers.types import run_generator + + +test0 = """ +abcdefg +""" + +test1 = """ +~~~markdown +``` {.python} +# this code block is ignored +``` +~~~ +""".strip() + +def test_ignore_block(): + rv, ol = run_generator(ignore_block(Config())(numbered_lines(PurePath("-"), test1))) + assert ol + assert rv == [PlainText(test1)] From 2d5bdb17c7b8c62147544445c1a584a59ee3e190 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Mon, 3 Nov 2025 12:51:13 +0100 Subject: [PATCH 11/48] add tests --- entangled/document.py | 51 ++++++++++-- entangled/readers/__init__.py | 0 entangled/readers/markdown.py | 38 +++++---- test/readers/test_markdown.py | 152 ++++++++++++++++++++++++++++++++-- 4 files changed, 211 insertions(+), 30 deletions(-) create mode 100644 entangled/readers/__init__.py diff --git a/entangled/document.py b/entangled/document.py index a91168f..4a344e7 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -1,5 +1,7 @@ +from __future__ import annotations + from textwrap import indent -from collections.abc import Iterable +from collections.abc import Generator, Iterable from typing import override from dataclasses import dataclass, field from collections import defaultdict @@ -59,6 +61,39 @@ def indented_text(self) -> str: RawContent = PlainText | CodeBlock +@dataclass +class Namespace: + subspace: defaultdict[str, Namespace] = field( + default_factory=lambda: defaultdict(Namespace) + ) + index: defaultdict[str, list[ReferenceId]] = field( + default_factory=lambda: defaultdict(list) + ) + aliases: dict[str, str] = field(default_factory=dict) + + def get(self, namespace: tuple[str, ...], name: str) -> list[ReferenceId]: + dir = self + for i, s in enumerate(namespace): + if s not in self.subspace: + raise KeyError(f"no subspace `{s}` found in namespace `{"::".join(namespace[:i])}`") + dir = dir.subspace[s] + + if name in dir.index: + return dir.index[name] + + if name in dir.aliases: + return dir.get((), dir.aliases[name]) + + raise KeyError(f"no reference `{name}` found in namespace `{"::".join(namespace)}`") + + def __getitem__(self, key: str | tuple[str, ...]) -> list[ReferenceId]: + match key: + case str(): + return self.get((), key) + case tuple(): + return self.get(key[:-1], key[-1]) + + @dataclass class ReferenceMap: """ @@ -69,16 +104,16 @@ class ReferenceMap: """ map: dict[ReferenceId, CodeBlock] = field(default_factory=dict) - index: defaultdict[str, list[ReferenceId]] = field( - default_factory=lambda: defaultdict(list) - ) + root: Namespace = field(default_factory=Namespace) targets: set[str] = field(default_factory=set) - alias: dict[str, str] = field(default_factory=dict) - def names(self) -> Iterable[str]: - return self.index.keys() + def by_name(self, n: str, namespace: tuple[str, ...] = ()) -> Generator[CodeBlock]: + name_path = n.split("::") + if len(name_path) == 1: + return (self.map[r] for r in self.root.get(namespace, name_path[0])) + else: + return (self.map[r] for r in self.root.get(name_path[:-1], name_path[-1])) - def by_name(self, n: str) -> Iterable[CodeBlock]: if n not in self.index and n not in self.alias: raise AttributeError(name=n, obj=self) if n not in self.index: diff --git a/entangled/readers/__init__.py b/entangled/readers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index d479f61..a99fefd 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -50,23 +50,27 @@ def dedent(source: str, indent: str) -> str: return "".join(line.removeprefix(indent) for line in lines(source)) -def code_block(config: Config, filename: PurePath) -> Reader[RawContent, bool]: +def code_block(config: Config) -> Reader[RawContent, bool]: get_raw_token = delimited_token_getter( config.markers.open, config.markers.close, code_block_guard) - if config.namespace is None: - match config.namespace_default: - case NamespaceDefault.GLOBAL: - namespace = () - case NamespaceDefault.PRIVATE: - namespace = (filename.as_posix(),) - else: - namespace = config.namespace def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: + if not input: + return False + block = get_raw_token(input) if block is None: return False + if config.namespace is None: + match config.namespace_default: + case NamespaceDefault.GLOBAL: + namespace = () + case NamespaceDefault.PRIVATE: + namespace = (block.origin.filename.as_posix(),) + else: + namespace = config.namespace + indent = block.open_match["indent"] properties = read_properties(block.open_match["properties"]) language_class = first(get_classes(properties)) @@ -95,9 +99,8 @@ def raw_markdown(config: Config, input: InputStream) -> RawMarkdownStream[None]: if not input: return - filename = input.peek()[0].filename ignore_block_reader = ignore_block(config) - code_block_reader = code_block(config, filename) + code_block_reader = code_block(config) while input: if (yield from ignore_block_reader(input)): @@ -150,16 +153,23 @@ def process_token(hooks: list[HookBase], refs: ReferenceMap, token: RawContent) def collect_plain_text[T](inp: Iterator[PlainText | T]) -> Generator[PlainText | T, None, None]: plain_content: list[str] = [] + + def flush(): + nonlocal plain_content + if plain_content: + yield(PlainText("".join(plain_content))) + plain_content = [] + for token in inp: match token: case PlainText(t): plain_content.append(t) case _: - if plain_content: - yield(PlainText("".join(plain_content))) - plain_content = [] + yield from flush() yield token + yield from flush() + def markdown(refs: ReferenceMap, input: InputStream) -> Generator[Content, None, ReferenceMap]: header = yield from read_yaml_header(input) diff --git a/test/readers/test_markdown.py b/test/readers/test_markdown.py index c851cfc..a65d5e2 100644 --- a/test/readers/test_markdown.py +++ b/test/readers/test_markdown.py @@ -1,14 +1,19 @@ +from functools import partial from pathlib import PurePath -from entangled.document import PlainText -from entangled.readers.markdown import ignore_block + +from entangled.config.namespace_default import NamespaceDefault +from entangled.document import PlainText, CodeBlock, ReferenceId, ReferenceMap +from entangled.readers.markdown import code_block, collect_plain_text, ignore_block, markdown, raw_markdown from entangled.readers.lines import numbered_lines -from entangled.config import Config -from entangled.readers.types import run_generator +from entangled.config import AnnotationMethod, Config, config +from entangled.readers.types import run_generator, Reader +from entangled.tangle import tangle_ref test0 = """ abcdefg -""" +""".strip() + test1 = """ ~~~markdown @@ -18,7 +23,138 @@ ~~~ """.strip() + +def run_reader[O, T](reader: Reader[O, T], inp: str, filename: str = "-") -> tuple[list[O], T]: + return run_generator(reader(numbered_lines(PurePath(filename), inp))) + + def test_ignore_block(): - rv, ol = run_generator(ignore_block(Config())(numbered_lines(PurePath("-"), test1))) - assert ol - assert rv == [PlainText(test1)] + ol, rv = run_reader(ignore_block(Config()), test0) + assert not rv and not ol + + ol, rv = run_reader(ignore_block(Config()), test1) + assert rv + assert ol == [PlainText(test1)] + + +test2 = """ +``` {.python} +# this code block should be read +``` +""".strip() + + +test3 = """ +First we have some other input + +``` {.python #test} +# so this should not be read directly by the `code_block` function +# once we read with `raw_markdown` that should change +``` +""".strip() + + +def test_code_block(): + ol, rv = run_reader(code_block(Config()), test0) + assert not rv and not ol + + ol, rv = run_reader(code_block(Config()), test2) + assert rv + + assert len(ol) == 1 + assert isinstance(ol[0], CodeBlock) + assert ol[0].source.strip() == "# this code block should be read" + + ol, rv = run_reader(code_block(Config()), test3) + assert not rv and not ol + + +def test_raw_markdown(): + ol, _ = run_reader(partial(raw_markdown, Config()), test0) + assert len(ol) == 1 + assert ol[0] == PlainText("abcdefg") + + ol, _ = run_reader(partial(raw_markdown, Config()), test2) + assert len(ol) == 1 + assert isinstance(ol[0], CodeBlock) + assert ol[0].source.strip() == "# this code block should be read" + + ol, _ = run_reader(partial(raw_markdown, Config()), test3) + assert isinstance(ol[0], PlainText) + assert ol[0].content.strip() == "First we have some other input" + assert isinstance(ol[-1], CodeBlock) + + +def test_collect_plain_text(): + assert list(collect_plain_text(iter([]))) == [] + assert list(collect_plain_text(iter([PlainText("a"), PlainText("b"), 4, PlainText("c")]))) == \ + [PlainText("ab"), 4, PlainText("c")] + + +def test_markdown(): + refs = ReferenceMap() + ol, refs = run_reader(partial(markdown, refs), test0) + assert len(ol) == 1 + assert ol[0] == PlainText("abcdefg") + assert not refs.map + + ol, refs = run_reader(partial(markdown, refs), test3) + assert isinstance(ol[-1], ReferenceId) + assert ol[-1].name == "test" + assert "test" in refs + + +test_ns_a = """ +First input: + +``` {.python #a} +# part a +``` + +``` {.python #refers-to-a} +<> +``` +""".strip() + + +test_ns_b = """ +Second input + +``` {.python #a} +# part b +``` +""".strip() + + +def test_global_namespace(): + refs = ReferenceMap() + + with config(namespace_default=NamespaceDefault.GLOBAL): + _, refs = run_reader(partial(markdown, refs), test_ns_a, "a.md") + _, refs = run_reader(partial(markdown, refs), test_ns_b, "b.md") + + cb = list(refs.by_name("a")) + assert len(cb) == 2 + assert cb[0].source.strip() == "# part a" + assert cb[1].source.strip() == "# part b" + + source, _ = tangle_ref(refs, "refers-to-a", AnnotationMethod.NAKED) + assert source.splitlines() == ["# part a", "# part b"] + + +def test_private_namespace(): + refs = ReferenceMap() + + with config(namespace_default=NamespaceDefault.PRIVATE): + _, refs = run_reader(partial(markdown, refs), test_ns_a, "a.md") + _, refs = run_reader(partial(markdown, refs), test_ns_b, "b.md") + + print(refs.map) + + cb = list(refs.by_name("a")) + assert len(cb) == 2 + assert cb[0].source.strip() == "# part a" + assert cb[1].source.strip() == "# part b" + + source, _ = tangle_ref(refs, "a.md::refers-to-a", AnnotationMethod.NAKED) + assert source.splitlines() == ["# part a"] From 476ceea5cb61ceecb01d98034c65d44bf13d165c Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Mon, 3 Nov 2025 19:52:43 +0100 Subject: [PATCH 12/48] implement namespaces object --- entangled/document.py | 60 ++++----------- entangled/model/__init__.py | 0 entangled/model/namespaces.py | 53 ++++++++++++++ entangled/readers/markdown.py | 2 +- entangled/tangle.py | 3 - pyproject.toml | 3 + uv.lock | 133 +++++++++++++++------------------- 7 files changed, 129 insertions(+), 125 deletions(-) create mode 100644 entangled/model/__init__.py create mode 100644 entangled/model/namespaces.py diff --git a/entangled/document.py b/entangled/document.py index 4a344e7..6a65fe3 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -4,10 +4,10 @@ from collections.abc import Generator, Iterable from typing import override from dataclasses import dataclass, field -from collections import defaultdict from functools import singledispatchmethod from pathlib import PurePath +from .model.namespaces import Namespace from .config.language import Language from .properties import Property from .errors.internal import InternalError @@ -61,37 +61,12 @@ def indented_text(self) -> str: RawContent = PlainText | CodeBlock -@dataclass -class Namespace: - subspace: defaultdict[str, Namespace] = field( - default_factory=lambda: defaultdict(Namespace) - ) - index: defaultdict[str, list[ReferenceId]] = field( - default_factory=lambda: defaultdict(list) - ) - aliases: dict[str, str] = field(default_factory=dict) - - def get(self, namespace: tuple[str, ...], name: str) -> list[ReferenceId]: - dir = self - for i, s in enumerate(namespace): - if s not in self.subspace: - raise KeyError(f"no subspace `{s}` found in namespace `{"::".join(namespace[:i])}`") - dir = dir.subspace[s] - - if name in dir.index: - return dir.index[name] - - if name in dir.aliases: - return dir.get((), dir.aliases[name]) - - raise KeyError(f"no reference `{name}` found in namespace `{"::".join(namespace)}`") - - def __getitem__(self, key: str | tuple[str, ...]) -> list[ReferenceId]: - match key: - case str(): - return self.get((), key) - case tuple(): - return self.get(key[:-1], key[-1]) +class ReferenceNamespace(Namespace[list[ReferenceId]]): + def add(self, ref: ReferenceId): + dir = self.make_sub(ref.namespace) + if ref.name not in dir.index: + dir.index[ref.name] = [] + dir.index[ref.name].append(ref) @dataclass @@ -104,35 +79,28 @@ class ReferenceMap: """ map: dict[ReferenceId, CodeBlock] = field(default_factory=dict) - root: Namespace = field(default_factory=Namespace) - targets: set[str] = field(default_factory=set) + root: ReferenceNamespace = field(default_factory=ReferenceNamespace) + targets: dict[str, str] = field(default_factory=dict) def by_name(self, n: str, namespace: tuple[str, ...] = ()) -> Generator[CodeBlock]: name_path = n.split("::") if len(name_path) == 1: return (self.map[r] for r in self.root.get(namespace, name_path[0])) else: - return (self.map[r] for r in self.root.get(name_path[:-1], name_path[-1])) - - if n not in self.index and n not in self.alias: - raise AttributeError(name=n, obj=self) - if n not in self.index: - return self.by_name(self.alias[n]) - - return (self.map[r] for r in self.index[n]) + return (self.map[r] for r in self.root.get((*name_path[:-1],), name_path[-1])) def new_id(self, filename: PurePath, namespace: tuple[str,...], name: str) -> ReferenceId: - c = length(filter(lambda r: r.file == filename, self.index[name])) + c = length(filter(lambda r: r.file == filename, self.root.get(namespace, name))) return ReferenceId(name, namespace, filename, c) def __setitem__(self, key: ReferenceId, value: CodeBlock): if key in self.map: raise InternalError("Duplicate key in ReferenceMap", [key]) self.map[key] = value - self.index[key.name].append(key) + self.root.add(key) def __contains__(self, key: str) -> bool: - return key in self.index + return key in self.root or key in self.alias def get_codeblock(self, key: ReferenceId) -> CodeBlock: return self.map[key] @@ -146,7 +114,7 @@ def _(self, key: ReferenceId) -> CodeBlock: return self.map[key] @__getitem__.register - def _(self, key: str) -> Iterable[CodeBlock]: + def _(self, key: str) -> Generator[CodeBlock]: return self.by_name(key) diff --git a/entangled/model/__init__.py b/entangled/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/entangled/model/namespaces.py b/entangled/model/namespaces.py new file mode 100644 index 0000000..00d4862 --- /dev/null +++ b/entangled/model/namespaces.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from collections import defaultdict + + +@dataclass +class Namespace[T]: + sep: str = "::" + subspace: defaultdict[str, Namespace[T]] = field( + default_factory=lambda: defaultdict(Namespace) + ) + index: dict[str, T] = field(default_factory=dict) + + def sub(self, namespace: tuple[str, ...]) -> Namespace[T]: + dir = self + for i, s in enumerate(namespace): + if s not in self.subspace: + raise KeyError(f"no subspace `{s}` found in namespace `{self.sep.join(namespace[:i])}`") + dir = dir.subspace[s] + return dir + + def make_sub(self, namespace: tuple[str, ...]) -> Namespace[T]: + dir = self + for s in namespace: + dir = dir.subspace[s] + return dir + + def get(self, namespace: tuple[str, ...], name: str) -> T: + dir = self.sub(namespace) + + if name in dir.index: + return dir.index[name] + + raise KeyError(f"no reference `{name}` found in namespace `{self.sep.join(namespace)}`") + + def __getitem__(self, key: str | tuple[str, ...]) -> T: + match key: + case str(): + path = key.split(self.sep) + return self.get((*path[:-1],), path[-1]) + case tuple(): + return self.get(key[:-1], key[-1]) + + def __setitem__(self, key: str, value: T): + path = key.split(self.sep) + dir = self.make_sub((*path[:-1],)) + dir.index[key] = value + + def __contains__(self, key: str) -> bool: + path = key.split(self.sep) + dir = self.sub((*path[:-1],)) + return path[-1] in dir.index diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index a99fefd..fcbf191 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -138,7 +138,7 @@ def process_code_block(hooks: list[HookBase], refs: ReferenceMap, code_block: Co if target_file is not None: refs.targets.add(target_file) if target_file is not None and block_id is not None: - refs.alias[target_file] = block_id + refs.root.make_sub(ref.namespace).alias[target_file] = block_id return ref diff --git a/entangled/tangle.py b/entangled/tangle.py index 6c24cf9..0945732 100644 --- a/entangled/tangle.py +++ b/entangled/tangle.py @@ -126,9 +126,6 @@ def tangle_ref( if annotation is None: annotation = config.get.annotation - if ref_name in refs.alias: - ref_name = refs.alias[ref_name] - if ref_name not in refs: raise KeyError(ref_name) v = _visited or Visitor() diff --git a/pyproject.toml b/pyproject.toml index 1f2cabd..c2a051a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,3 +66,6 @@ testpaths = ["test"] [tool.mypy] packages = ["entangled"] + +[tool.uv.sources] +msgspec = { git = "https://github.com/jcrist/msgspec" } diff --git a/uv.lock b/uv.lock index 39cf24f..06bcb23 100644 --- a/uv.lock +++ b/uv.lock @@ -335,7 +335,7 @@ requires-dist = [ { name = "copier", specifier = ">=9,<10" }, { name = "filelock", specifier = ">=3.12.0,<4" }, { name = "mawk", specifier = ">=0.1.7,<0.2" }, - { name = "msgspec", specifier = ">=0.19.0" }, + { name = "msgspec", git = "https://github.com/jcrist/msgspec" }, { name = "pexpect", specifier = ">=4.9.0,<5" }, { name = "pyyaml", specifier = ">=6.0.1,<7" }, { name = "repl-session", specifier = ">=0.2.0" }, @@ -641,7 +641,7 @@ wheels = [ [[package]] name = "mkdocs-material" -version = "9.6.22" +version = "9.6.23" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "babel" }, @@ -656,9 +656,9 @@ dependencies = [ { name = "pymdown-extensions" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5f/5d/317e37b6c43325cb376a1d6439df9cc743b8ee41c84603c2faf7286afc82/mkdocs_material-9.6.22.tar.gz", hash = "sha256:87c158b0642e1ada6da0cbd798a3389b0bc5516b90e5ece4a0fb939f00bacd1c", size = 4044968, upload-time = "2025-10-15T09:21:15.409Z" } +sdist = { url = "https://files.pythonhosted.org/packages/57/de/cc1d5139c2782b1a49e1ed1845b3298ed6076b9ba1c740ad7c952d8ffcf9/mkdocs_material-9.6.23.tar.gz", hash = "sha256:62ebc9cdbe90e1ae4f4e9b16a6aa5c69b93474c7b9e79ebc0b11b87f9f055e00", size = 4048130, upload-time = "2025-11-01T16:33:11.782Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/82/6fdb9a7a04fb222f4849ffec1006f891a0280825a20314d11f3ccdee14eb/mkdocs_material-9.6.22-py3-none-any.whl", hash = "sha256:14ac5f72d38898b2f98ac75a5531aaca9366eaa427b0f49fc2ecf04d99b7ad84", size = 9206252, upload-time = "2025-10-15T09:21:12.175Z" }, + { url = "https://files.pythonhosted.org/packages/f5/df/bc583e857174b0dc6df67d555123533f09e7e1ac0f3fae7693fb6840c0a3/mkdocs_material-9.6.23-py3-none-any.whl", hash = "sha256:3bf3f1d82d269f3a14ed6897bfc3a844cc05e1dc38045386691b91d7e6945332", size = 9210689, upload-time = "2025-11-01T16:33:08.196Z" }, ] [[package]] @@ -708,25 +708,8 @@ wheels = [ [[package]] name = "msgspec" -version = "0.19.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cf/9b/95d8ce458462b8b71b8a70fa94563b2498b89933689f3a7b8911edfae3d7/msgspec-0.19.0.tar.gz", hash = "sha256:604037e7cd475345848116e89c553aa9a233259733ab51986ac924ab1b976f8e", size = 216934, upload-time = "2024-12-27T17:40:28.597Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/5f/a70c24f075e3e7af2fae5414c7048b0e11389685b7f717bb55ba282a34a7/msgspec-0.19.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f98bd8962ad549c27d63845b50af3f53ec468b6318400c9f1adfe8b092d7b62f", size = 190485, upload-time = "2024-12-27T17:39:44.974Z" }, - { url = "https://files.pythonhosted.org/packages/89/b0/1b9763938cfae12acf14b682fcf05c92855974d921a5a985ecc197d1c672/msgspec-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43bbb237feab761b815ed9df43b266114203f53596f9b6e6f00ebd79d178cdf2", size = 183910, upload-time = "2024-12-27T17:39:46.401Z" }, - { url = "https://files.pythonhosted.org/packages/87/81/0c8c93f0b92c97e326b279795f9c5b956c5a97af28ca0fbb9fd86c83737a/msgspec-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cfc033c02c3e0aec52b71710d7f84cb3ca5eb407ab2ad23d75631153fdb1f12", size = 210633, upload-time = "2024-12-27T17:39:49.099Z" }, - { url = "https://files.pythonhosted.org/packages/d0/ef/c5422ce8af73928d194a6606f8ae36e93a52fd5e8df5abd366903a5ca8da/msgspec-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d911c442571605e17658ca2b416fd8579c5050ac9adc5e00c2cb3126c97f73bc", size = 213594, upload-time = "2024-12-27T17:39:51.204Z" }, - { url = "https://files.pythonhosted.org/packages/19/2b/4137bc2ed45660444842d042be2cf5b18aa06efd2cda107cff18253b9653/msgspec-0.19.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:757b501fa57e24896cf40a831442b19a864f56d253679f34f260dcb002524a6c", size = 214053, upload-time = "2024-12-27T17:39:52.866Z" }, - { url = "https://files.pythonhosted.org/packages/9d/e6/8ad51bdc806aac1dc501e8fe43f759f9ed7284043d722b53323ea421c360/msgspec-0.19.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5f0f65f29b45e2816d8bded36e6b837a4bf5fb60ec4bc3c625fa2c6da4124537", size = 219081, upload-time = "2024-12-27T17:39:55.142Z" }, - { url = "https://files.pythonhosted.org/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0", size = 187467, upload-time = "2024-12-27T17:39:56.531Z" }, - { url = "https://files.pythonhosted.org/packages/3c/cb/2842c312bbe618d8fefc8b9cedce37f773cdc8fa453306546dba2c21fd98/msgspec-0.19.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f12d30dd6266557aaaf0aa0f9580a9a8fbeadfa83699c487713e355ec5f0bd86", size = 190498, upload-time = "2024-12-27T17:40:00.427Z" }, - { url = "https://files.pythonhosted.org/packages/58/95/c40b01b93465e1a5f3b6c7d91b10fb574818163740cc3acbe722d1e0e7e4/msgspec-0.19.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82b2c42c1b9ebc89e822e7e13bbe9d17ede0c23c187469fdd9505afd5a481314", size = 183950, upload-time = "2024-12-27T17:40:04.219Z" }, - { url = "https://files.pythonhosted.org/packages/e8/f0/5b764e066ce9aba4b70d1db8b087ea66098c7c27d59b9dd8a3532774d48f/msgspec-0.19.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19746b50be214a54239aab822964f2ac81e38b0055cca94808359d779338c10e", size = 210647, upload-time = "2024-12-27T17:40:05.606Z" }, - { url = "https://files.pythonhosted.org/packages/9d/87/bc14f49bc95c4cb0dd0a8c56028a67c014ee7e6818ccdce74a4862af259b/msgspec-0.19.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60ef4bdb0ec8e4ad62e5a1f95230c08efb1f64f32e6e8dd2ced685bcc73858b5", size = 213563, upload-time = "2024-12-27T17:40:10.516Z" }, - { url = "https://files.pythonhosted.org/packages/53/2f/2b1c2b056894fbaa975f68f81e3014bb447516a8b010f1bed3fb0e016ed7/msgspec-0.19.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac7f7c377c122b649f7545810c6cd1b47586e3aa3059126ce3516ac7ccc6a6a9", size = 213996, upload-time = "2024-12-27T17:40:12.244Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5a/4cd408d90d1417e8d2ce6a22b98a6853c1b4d7cb7669153e4424d60087f6/msgspec-0.19.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5bc1472223a643f5ffb5bf46ccdede7f9795078194f14edd69e3aab7020d327", size = 219087, upload-time = "2024-12-27T17:40:14.881Z" }, - { url = "https://files.pythonhosted.org/packages/23/d8/f15b40611c2d5753d1abb0ca0da0c75348daf1252220e5dda2867bd81062/msgspec-0.19.0-cp313-cp313-win_amd64.whl", hash = "sha256:317050bc0f7739cb30d257ff09152ca309bf5a369854bbf1e57dffc310c1f20f", size = 187432, upload-time = "2024-12-27T17:40:16.256Z" }, -] +version = "0.19.1.dev24+gee7c6baac.d20251103" +source = { git = "https://github.com/jcrist/msgspec#ee7c6baac6c07c6b6064672278e2f784305b4b43" } [[package]] name = "mypy" @@ -846,14 +829,14 @@ wheels = [ [[package]] name = "plumbum" -version = "1.9.0" +version = "1.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pywin32", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f0/5d/49ba324ad4ae5b1a4caefafbce7a1648540129344481f2ed4ef6bb68d451/plumbum-1.9.0.tar.gz", hash = "sha256:e640062b72642c3873bd5bdc3effed75ba4d3c70ef6b6a7b907357a84d909219", size = 319083, upload-time = "2024-10-05T05:59:27.059Z" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/c8/11a5f792704b70f071a3dbc329105a98e9cc8d25daaf09f733c44eb0ef8e/plumbum-1.10.0.tar.gz", hash = "sha256:f8cbf0ecec0b73ff4e349398b65112a9e3f9300e7dc019001217dcc148d5c97c", size = 320039, upload-time = "2025-10-31T05:02:48.697Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/9d/d03542c93bb3d448406731b80f39c3d5601282f778328c22c77d270f4ed4/plumbum-1.9.0-py3-none-any.whl", hash = "sha256:9fd0d3b0e8d86e4b581af36edf3f3bbe9d1ae15b45b8caab28de1bcb27aaa7f5", size = 127970, upload-time = "2024-10-05T05:59:25.102Z" }, + { url = "https://files.pythonhosted.org/packages/79/ad/45312df6b63ba64ea35b8d8f5f0c577aac16e6b416eafe8e1cb34e03f9a7/plumbum-1.10.0-py3-none-any.whl", hash = "sha256:9583d737ac901c474d99d030e4d5eec4c4e6d2d7417b1cf49728cf3be34f6dc8", size = 127383, upload-time = "2025-10-31T05:02:47.002Z" }, ] [[package]] @@ -983,15 +966,15 @@ wheels = [ [[package]] name = "pyright" -version = "1.1.406" +version = "1.1.407" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nodeenv" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f7/16/6b4fbdd1fef59a0292cbb99f790b44983e390321eccbc5921b4d161da5d1/pyright-1.1.406.tar.gz", hash = "sha256:c4872bc58c9643dac09e8a2e74d472c62036910b3bd37a32813989ef7576ea2c", size = 4113151, upload-time = "2025-10-02T01:04:45.488Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/1b/0aa08ee42948b61745ac5b5b5ccaec4669e8884b53d31c8ec20b2fcd6b6f/pyright-1.1.407.tar.gz", hash = "sha256:099674dba5c10489832d4a4b2d302636152a9a42d317986c38474c76fe562262", size = 4122872, upload-time = "2025-10-24T23:17:15.145Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/a2/e309afbb459f50507103793aaef85ca4348b66814c86bc73908bdeb66d12/pyright-1.1.406-py3-none-any.whl", hash = "sha256:1d81fb43c2407bf566e97e57abb01c811973fdb21b2df8df59f870f688bdca71", size = 5980982, upload-time = "2025-10-02T01:04:43.137Z" }, + { url = "https://files.pythonhosted.org/packages/dc/93/b69052907d032b00c40cb656d21438ec00b3a471733de137a3f65a49a0a0/pyright-1.1.407-py3-none-any.whl", hash = "sha256:6dd419f54fcc13f03b52285796d65e639786373f433e243f8b94cf93a7444d21", size = 5997008, upload-time = "2025-10-24T23:17:13.159Z" }, ] [[package]] @@ -1217,40 +1200,40 @@ wheels = [ [[package]] name = "rich-argparse" -version = "1.7.1" +version = "1.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "rich" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/a6/34460d81e5534f6d2fc8e8d91ff99a5835fdca53578eac89e4f37b3a7c6d/rich_argparse-1.7.1.tar.gz", hash = "sha256:d7a493cde94043e41ea68fb43a74405fa178de981bf7b800f7a3bd02ac5c27be", size = 38094, upload-time = "2025-05-25T20:20:35.335Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/f7/1c65e0245d4c7009a87ac92908294a66e7e7635eccf76a68550f40c6df80/rich_argparse-1.7.2.tar.gz", hash = "sha256:64fd2e948fc96e8a1a06e0e72c111c2ce7f3af74126d75c0f5f63926e7289cd1", size = 38500, upload-time = "2025-11-01T10:35:44.232Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/f6/5fc0574af5379606ffd57a4b68ed88f9b415eb222047fe023aefcc00a648/rich_argparse-1.7.1-py3-none-any.whl", hash = "sha256:a8650b42e4a4ff72127837632fba6b7da40784842f08d7395eb67a9cbd7b4bf9", size = 25357, upload-time = "2025-05-25T20:20:33.793Z" }, + { url = "https://files.pythonhosted.org/packages/04/80/97b6f357ac458d9ad9872cc3183ca09ef7439ac89e030ea43053ba1294b6/rich_argparse-1.7.2-py3-none-any.whl", hash = "sha256:0559b1f47a19bbeb82bf15f95a057f99bcbbc98385532f57937f9fc57acc501a", size = 25476, upload-time = "2025-11-01T10:35:42.681Z" }, ] [[package]] name = "ruff" -version = "0.14.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429, upload-time = "2025-10-16T18:05:41.766Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415, upload-time = "2025-10-16T18:04:48.227Z" }, - { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267, upload-time = "2025-10-16T18:04:52.515Z" }, - { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872, upload-time = "2025-10-16T18:04:55.396Z" }, - { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558, upload-time = "2025-10-16T18:04:58.166Z" }, - { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898, upload-time = "2025-10-16T18:05:01.455Z" }, - { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168, upload-time = "2025-10-16T18:05:04.397Z" }, - { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942, upload-time = "2025-10-16T18:05:07.102Z" }, - { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622, upload-time = "2025-10-16T18:05:09.882Z" }, - { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143, upload-time = "2025-10-16T18:05:13.46Z" }, - { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844, upload-time = "2025-10-16T18:05:16.1Z" }, - { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241, upload-time = "2025-10-16T18:05:19.395Z" }, - { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476, upload-time = "2025-10-16T18:05:22.163Z" }, - { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749, upload-time = "2025-10-16T18:05:25.162Z" }, - { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758, upload-time = "2025-10-16T18:05:28.018Z" }, - { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811, upload-time = "2025-10-16T18:05:30.707Z" }, - { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467, upload-time = "2025-10-16T18:05:33.261Z" }, - { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123, upload-time = "2025-10-16T18:05:35.984Z" }, - { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636, upload-time = "2025-10-16T18:05:38.995Z" }, +version = "0.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/75/62/50b7727004dfe361104dfbf898c45a9a2fdfad8c72c04ae62900224d6ecf/ruff-0.14.3.tar.gz", hash = "sha256:4ff876d2ab2b161b6de0aa1f5bd714e8e9b4033dc122ee006925fbacc4f62153", size = 5558687, upload-time = "2025-10-31T00:26:26.878Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/8e/0c10ff1ea5d4360ab8bfca4cb2c9d979101a391f3e79d2616c9bf348cd26/ruff-0.14.3-py3-none-linux_armv6l.whl", hash = "sha256:876b21e6c824f519446715c1342b8e60f97f93264012de9d8d10314f8a79c371", size = 12535613, upload-time = "2025-10-31T00:25:44.302Z" }, + { url = "https://files.pythonhosted.org/packages/d3/c8/6724f4634c1daf52409fbf13fefda64aa9c8f81e44727a378b7b73dc590b/ruff-0.14.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b6fd8c79b457bedd2abf2702b9b472147cd860ed7855c73a5247fa55c9117654", size = 12855812, upload-time = "2025-10-31T00:25:47.793Z" }, + { url = "https://files.pythonhosted.org/packages/de/03/db1bce591d55fd5f8a08bb02517fa0b5097b2ccabd4ea1ee29aa72b67d96/ruff-0.14.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:71ff6edca490c308f083156938c0c1a66907151263c4abdcb588602c6e696a14", size = 11944026, upload-time = "2025-10-31T00:25:49.657Z" }, + { url = "https://files.pythonhosted.org/packages/0b/75/4f8dbd48e03272715d12c87dc4fcaaf21b913f0affa5f12a4e9c6f8a0582/ruff-0.14.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:786ee3ce6139772ff9272aaf43296d975c0217ee1b97538a98171bf0d21f87ed", size = 12356818, upload-time = "2025-10-31T00:25:51.949Z" }, + { url = "https://files.pythonhosted.org/packages/ec/9b/506ec5b140c11d44a9a4f284ea7c14ebf6f8b01e6e8917734a3325bff787/ruff-0.14.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cd6291d0061811c52b8e392f946889916757610d45d004e41140d81fb6cd5ddc", size = 12336745, upload-time = "2025-10-31T00:25:54.248Z" }, + { url = "https://files.pythonhosted.org/packages/c7/e1/c560d254048c147f35e7f8131d30bc1f63a008ac61595cf3078a3e93533d/ruff-0.14.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a497ec0c3d2c88561b6d90f9c29f5ae68221ac00d471f306fa21fa4264ce5fcd", size = 13101684, upload-time = "2025-10-31T00:25:56.253Z" }, + { url = "https://files.pythonhosted.org/packages/a5/32/e310133f8af5cd11f8cc30f52522a3ebccc5ea5bff4b492f94faceaca7a8/ruff-0.14.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e231e1be58fc568950a04fbe6887c8e4b85310e7889727e2b81db205c45059eb", size = 14535000, upload-time = "2025-10-31T00:25:58.397Z" }, + { url = "https://files.pythonhosted.org/packages/a2/a1/7b0470a22158c6d8501eabc5e9b6043c99bede40fa1994cadf6b5c2a61c7/ruff-0.14.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:469e35872a09c0e45fecf48dd960bfbce056b5db2d5e6b50eca329b4f853ae20", size = 14156450, upload-time = "2025-10-31T00:26:00.889Z" }, + { url = "https://files.pythonhosted.org/packages/0a/96/24bfd9d1a7f532b560dcee1a87096332e461354d3882124219bcaff65c09/ruff-0.14.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d6bc90307c469cb9d28b7cfad90aaa600b10d67c6e22026869f585e1e8a2db0", size = 13568414, upload-time = "2025-10-31T00:26:03.291Z" }, + { url = "https://files.pythonhosted.org/packages/a7/e7/138b883f0dfe4ad5b76b58bf4ae675f4d2176ac2b24bdd81b4d966b28c61/ruff-0.14.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2f8a0bbcffcfd895df39c9a4ecd59bb80dca03dc43f7fb63e647ed176b741e", size = 13315293, upload-time = "2025-10-31T00:26:05.708Z" }, + { url = "https://files.pythonhosted.org/packages/33/f4/c09bb898be97b2eb18476b7c950df8815ef14cf956074177e9fbd40b7719/ruff-0.14.3-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:678fdd7c7d2d94851597c23ee6336d25f9930b460b55f8598e011b57c74fd8c5", size = 13539444, upload-time = "2025-10-31T00:26:08.09Z" }, + { url = "https://files.pythonhosted.org/packages/9c/aa/b30a1db25fc6128b1dd6ff0741fa4abf969ded161599d07ca7edd0739cc0/ruff-0.14.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1ec1ac071e7e37e0221d2f2dbaf90897a988c531a8592a6a5959f0603a1ecf5e", size = 12252581, upload-time = "2025-10-31T00:26:10.297Z" }, + { url = "https://files.pythonhosted.org/packages/da/13/21096308f384d796ffe3f2960b17054110a9c3828d223ca540c2b7cc670b/ruff-0.14.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afcdc4b5335ef440d19e7df9e8ae2ad9f749352190e96d481dc501b753f0733e", size = 12307503, upload-time = "2025-10-31T00:26:12.646Z" }, + { url = "https://files.pythonhosted.org/packages/cb/cc/a350bac23f03b7dbcde3c81b154706e80c6f16b06ff1ce28ed07dc7b07b0/ruff-0.14.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7bfc42f81862749a7136267a343990f865e71fe2f99cf8d2958f684d23ce3dfa", size = 12675457, upload-time = "2025-10-31T00:26:15.044Z" }, + { url = "https://files.pythonhosted.org/packages/cb/76/46346029fa2f2078826bc88ef7167e8c198e58fe3126636e52f77488cbba/ruff-0.14.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a65e448cfd7e9c59fae8cf37f9221585d3354febaad9a07f29158af1528e165f", size = 13403980, upload-time = "2025-10-31T00:26:17.81Z" }, + { url = "https://files.pythonhosted.org/packages/9f/a4/35f1ef68c4e7b236d4a5204e3669efdeefaef21f0ff6a456792b3d8be438/ruff-0.14.3-py3-none-win32.whl", hash = "sha256:f3d91857d023ba93e14ed2d462ab62c3428f9bbf2b4fbac50a03ca66d31991f7", size = 12500045, upload-time = "2025-10-31T00:26:20.503Z" }, + { url = "https://files.pythonhosted.org/packages/03/15/51960ae340823c9859fb60c63301d977308735403e2134e17d1d2858c7fb/ruff-0.14.3-py3-none-win_amd64.whl", hash = "sha256:d7b7006ac0756306db212fd37116cce2bd307e1e109375e1c6c106002df0ae5f", size = 13594005, upload-time = "2025-10-31T00:26:22.533Z" }, + { url = "https://files.pythonhosted.org/packages/b7/73/4de6579bac8e979fca0a77e54dec1f1e011a0d268165eb8a9bc0982a6564/ruff-0.14.3-py3-none-win_arm64.whl", hash = "sha256:26eb477ede6d399d898791d01961e16b86f02bc2486d0d1a7a9bb2379d055dc1", size = 12590017, upload-time = "2025-10-31T00:26:24.52Z" }, ] [[package]] @@ -1273,27 +1256,27 @@ wheels = [ [[package]] name = "ty" -version = "0.0.1a24" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fc/71/a1db0d604be8d0067342e7aad74ab0c7fec6bea20eb33b6a6324baabf45f/ty-0.0.1a24.tar.gz", hash = "sha256:3273c514df5b9954c9928ee93b6a0872d12310ea8de42249a6c197720853e096", size = 4386721, upload-time = "2025-10-23T13:33:29.729Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/89/21fb275cb676d3480b67fbbf6eb162aec200b4dcb10c7885bffc754dc73f/ty-0.0.1a24-py3-none-linux_armv6l.whl", hash = "sha256:d478cd02278b988d5767df5821a0f03b99ef848f6fc29e8c77f30e859b89c779", size = 8833903, upload-time = "2025-10-23T13:32:53.552Z" }, - { url = "https://files.pythonhosted.org/packages/a2/22/beb127bce67fc2a1f3704b6b39505d77a7078a61becfbe10c5ee7ed9f5d8/ty-0.0.1a24-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:de758790f05f0a3bb396da4c75f770c85ab3a46095ec188b830c916bd5a5bc10", size = 8691210, upload-time = "2025-10-23T13:32:55.706Z" }, - { url = "https://files.pythonhosted.org/packages/39/bd/190f5e934339669191179fa01c60f5a140822dc465f0d4d312985903d109/ty-0.0.1a24-py3-none-macosx_11_0_arm64.whl", hash = "sha256:68f325ddc8cfb7a7883501e5e22f01284c5d5912aaa901d21e477f38edf4e625", size = 8138421, upload-time = "2025-10-23T13:32:58.718Z" }, - { url = "https://files.pythonhosted.org/packages/40/84/f08020dabad1e660957bb641b2ba42fe1e1e87192c234b1fc1fd6fb42cf2/ty-0.0.1a24-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49a52bbb1f8b0b29ad717d3fd70bd2afe752e991072fd13ff2fc14f03945c849", size = 8419861, upload-time = "2025-10-23T13:33:00.068Z" }, - { url = "https://files.pythonhosted.org/packages/e5/cc/e3812f7c1c2a0dcfb1bf8a5d6a7e5aa807a483a632c0d5734ea50a60a9ae/ty-0.0.1a24-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12945fe358fb0f73acf0b72a29efcc80da73f8d95cfe7f11a81e4d8d730e7b18", size = 8641443, upload-time = "2025-10-23T13:33:01.887Z" }, - { url = "https://files.pythonhosted.org/packages/e3/8b/3fc047d04afbba4780aba031dc80e06f6e95d888bbddb8fd6da502975cfb/ty-0.0.1a24-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6231e190989798b0860d15a8f225e3a06a6ce442a7083d743eb84f5b4b83b980", size = 8997853, upload-time = "2025-10-23T13:33:03.951Z" }, - { url = "https://files.pythonhosted.org/packages/e0/d9/ae1475d9200ecf6b196a59357ea3e4f4aa00e1d38c9237ca3f267a4a3ef7/ty-0.0.1a24-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7c6401f4a7532eab63dd7fe015c875792a701ca4b1a44fc0c490df32594e071f", size = 9676864, upload-time = "2025-10-23T13:33:05.744Z" }, - { url = "https://files.pythonhosted.org/packages/cc/d9/abd6849f0601b24d5d5098e47b00dfbdfe44a4f6776f2e54a21005739bdf/ty-0.0.1a24-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83c69759bfa2a00278aa94210eded35aea599215d16460445cbbf5b36f77c454", size = 9351386, upload-time = "2025-10-23T13:33:07.807Z" }, - { url = "https://files.pythonhosted.org/packages/63/5c/639e0fe3b489c65b12b38385fe5032024756bc07f96cd994d7df3ab579ef/ty-0.0.1a24-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71146713cb8f804aad2b2e87a8efa7e7df0a5a25aed551af34498bcc2721ae03", size = 9517674, upload-time = "2025-10-23T13:33:09.641Z" }, - { url = "https://files.pythonhosted.org/packages/78/ae/323f373fcf54a883e39ea3fb6f83ed6d1eda6dfd8246462d0cfd81dac781/ty-0.0.1a24-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4836854411059de592f0ecc62193f2b24fc3acbfe6ce6ce0bf2c6d1a5ea9de7", size = 9000468, upload-time = "2025-10-23T13:33:11.51Z" }, - { url = "https://files.pythonhosted.org/packages/14/26/1a4be005aa4326264f0e7ce554844d5ef8afc4c5600b9a38b05671e9ed18/ty-0.0.1a24-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a7f0b8546d27605e09cd0fe08dc28c1d177bf7498316dd11c3bb8ef9440bf2e1", size = 8377164, upload-time = "2025-10-23T13:33:13.504Z" }, - { url = "https://files.pythonhosted.org/packages/73/2f/dcd6b449084e53a2beb536d8721a2517143a2353413b5b323d6eb9a31705/ty-0.0.1a24-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4e2fbf7dce2311127748824e03d9de2279e96ab5713029c3fa58acbaf19b2f51", size = 8672709, upload-time = "2025-10-23T13:33:15.213Z" }, - { url = "https://files.pythonhosted.org/packages/dc/2e/8b3b45d46085a79547e6db5295f42c6b798a0240d34454181e2ca947183c/ty-0.0.1a24-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f35b7f0a65f7e34e59f34173164946c89a4c4b1d1c18cabe662356a35f33efcd", size = 8788732, upload-time = "2025-10-23T13:33:17.347Z" }, - { url = "https://files.pythonhosted.org/packages/cf/c5/7675ff8693ad13044d86d8d4c824caf6bbb00340df05ad93d0e9d1e0338b/ty-0.0.1a24-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:120fe95eaf2a200f531f949e3dd0a9d95ab38915ce388412873eae28c499c0b9", size = 9095693, upload-time = "2025-10-23T13:33:19.836Z" }, - { url = "https://files.pythonhosted.org/packages/62/0b/bdba5d31aa3f0298900675fd355eec63a9c682aa46ef743dbac8f28b4608/ty-0.0.1a24-py3-none-win32.whl", hash = "sha256:d8d8379264a8c14e1f4ca9e117e72df3bf0a0b0ca64c5fd18affbb6142d8662a", size = 8361302, upload-time = "2025-10-23T13:33:21.572Z" }, - { url = "https://files.pythonhosted.org/packages/b4/48/127a45e16c49563df82829542ca64b0bc387591a777df450972bc85957e6/ty-0.0.1a24-py3-none-win_amd64.whl", hash = "sha256:2e826d75bddd958643128c309f6c47673ed6cef2ea5f2b3cd1a1159a1392971a", size = 9039221, upload-time = "2025-10-23T13:33:23.055Z" }, - { url = "https://files.pythonhosted.org/packages/31/67/9161fbb8c1a2005938bdb5ccd4e4c98ee4bea2d262afb777a4b69aa15eb5/ty-0.0.1a24-py3-none-win_arm64.whl", hash = "sha256:2efbfcdc94d306f0d25f3efe2a90c0f953132ca41a1a47d0bae679d11cdb15aa", size = 8514044, upload-time = "2025-10-23T13:33:27.816Z" }, +version = "0.0.1a25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/6b/e73bc3c1039ea72936158a08313155a49e5aa5e7db5205a149fe516a4660/ty-0.0.1a25.tar.gz", hash = "sha256:5550b24b9dd0e0f8b4b2c1f0fcc608a55d0421dd67b6c364bc7bf25762334511", size = 4403670, upload-time = "2025-10-29T19:40:23.647Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/3b/4457231238a2eeb04cba4ba7cc33d735be68ee46ca40a98ae30e187de864/ty-0.0.1a25-py3-none-linux_armv6l.whl", hash = "sha256:d35b2c1f94a014a22875d2745aa0432761d2a9a8eb7212630d5caf547daeef6d", size = 8878803, upload-time = "2025-10-29T19:39:42.243Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fa/a328713dd310018fc7a381693d8588185baa2fdae913e01a6839187215df/ty-0.0.1a25-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:192edac94675a468bac7f6e04687a77a64698e4e1fe01f6a048bf9b6dde5b703", size = 8695667, upload-time = "2025-10-29T19:39:45.179Z" }, + { url = "https://files.pythonhosted.org/packages/22/e8/5707939118992ced2bf5385adc3ede7723c1b717b07ad14c495eea1e47b4/ty-0.0.1a25-py3-none-macosx_11_0_arm64.whl", hash = "sha256:949523621f336e01bc7d687b7bd08fe838edadbdb6563c2c057ed1d264e820cf", size = 8159012, upload-time = "2025-10-29T19:39:47.011Z" }, + { url = "https://files.pythonhosted.org/packages/eb/fb/ff313aa71602225cd78f1bce3017713d6d1b1c1e0fa8101ead4594a60d95/ty-0.0.1a25-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f78f621458c05e59e890061021198197f29a7b51a33eda82bbb036e7ed73d7", size = 8433675, upload-time = "2025-10-29T19:39:48.443Z" }, + { url = "https://files.pythonhosted.org/packages/c0/8d/cc7e7fb57215a15b575a43ed042bdd92971871e0decec1b26d2e7d969465/ty-0.0.1a25-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d9656fca8062a2c6709c30d76d662c96d2e7dbfee8f70e55ec6b6afd67b5d447", size = 8668456, upload-time = "2025-10-29T19:39:50.412Z" }, + { url = "https://files.pythonhosted.org/packages/b8/6d/d7bf5909ed2dcdcbc1e2ca7eea80929893e2d188d9c36b3fcb2b36532ff6/ty-0.0.1a25-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9f3bbf523b49935bbd76e230408d858dce0d614f44f5807bbbd0954f64e0f01", size = 9023543, upload-time = "2025-10-29T19:39:52.292Z" }, + { url = "https://files.pythonhosted.org/packages/b4/b8/72bcefb4be32e5a84f0b21de2552f16cdb4cae3eb271ac891c8199c26b1a/ty-0.0.1a25-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f13ea9815f4a54a0a303ca7bf411b0650e3c2a24fc6c7889ffba2c94f5e97a6a", size = 9700013, upload-time = "2025-10-29T19:39:57.283Z" }, + { url = "https://files.pythonhosted.org/packages/90/0d/cf7e794b840cf6b0bbecb022e593c543f85abad27a582241cf2095048cb1/ty-0.0.1a25-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eab6e33ebe202a71a50c3d5a5580e3bc1a85cda3ffcdc48cec3f1c693b7a873b", size = 9372574, upload-time = "2025-10-29T19:40:04.532Z" }, + { url = "https://files.pythonhosted.org/packages/1e/71/2d35e7d51b48eabd330e2f7b7e0bce541cbd95950c4d2f780e85f3366af1/ty-0.0.1a25-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6b9a31da43424cdab483703a54a561b93aabba84630788505329fc5294a9c62", size = 9535726, upload-time = "2025-10-29T19:40:06.548Z" }, + { url = "https://files.pythonhosted.org/packages/57/d3/01ecc23bbd8f3e0dfbcf9172d06d84e88155c5f416f1491137e8066fd859/ty-0.0.1a25-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a90d897a7c1a5ae9b41a4c7b0a42262a06361476ad88d783dbedd7913edadbc", size = 9003380, upload-time = "2025-10-29T19:40:08.683Z" }, + { url = "https://files.pythonhosted.org/packages/de/f9/cde9380d8a1a6ca61baeb9aecb12cbec90d489aa929be55cd78ad5c2ccd9/ty-0.0.1a25-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:93c7e7ab2859af0f866d34d27f4ae70dd4fb95b847387f082de1197f9f34e068", size = 8401833, upload-time = "2025-10-29T19:40:10.627Z" }, + { url = "https://files.pythonhosted.org/packages/0b/39/0acf3625b0c495011795a391016b572f97a812aca1d67f7a76621fdb9ebf/ty-0.0.1a25-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4a247061bd32bae3865a236d7f8b6c9916c80995db30ae1600999010f90623a9", size = 8706761, upload-time = "2025-10-29T19:40:12.575Z" }, + { url = "https://files.pythonhosted.org/packages/25/73/7de1648f3563dd9d416d36ab5f1649bfd7b47a179135027f31d44b89a246/ty-0.0.1a25-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1711dd587eccf04fd50c494dc39babe38f4cb345bc3901bf1d8149cac570e979", size = 8792426, upload-time = "2025-10-29T19:40:14.553Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8a/b6e761a65eac7acd10b2e452f49b2d8ae0ea163ca36bb6b18b2dadae251b/ty-0.0.1a25-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f4c9b0cf7995e2e3de9bab4d066063dea92019f2f62673b7574e3612643dd35", size = 9103991, upload-time = "2025-10-29T19:40:16.332Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/9324ae947fcc4322470326cf8276a3fc2f08dc82adec1de79d963fdf7af5/ty-0.0.1a25-py3-none-win32.whl", hash = "sha256:168fc8aee396d617451acc44cd28baffa47359777342836060c27aa6f37e2445", size = 8387095, upload-time = "2025-10-29T19:40:18.368Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2b/cb12cbc7db1ba310aa7b1de9b4e018576f653105993736c086ee67d2ec02/ty-0.0.1a25-py3-none-win_amd64.whl", hash = "sha256:a2fad3d8e92bb4d57a8872a6f56b1aef54539d36f23ebb01abe88ac4338efafb", size = 9059225, upload-time = "2025-10-29T19:40:20.278Z" }, + { url = "https://files.pythonhosted.org/packages/2f/c1/f6be8cdd0bf387c1d8ee9d14bb299b7b5d2c0532f550a6693216a32ec0c5/ty-0.0.1a25-py3-none-win_arm64.whl", hash = "sha256:dde2962d448ed87c48736e9a4bb13715a4cced705525e732b1c0dac1d4c66e3d", size = 8536832, upload-time = "2025-10-29T19:40:22.014Z" }, ] [[package]] From 0e00e5fe79ef06e4714f7df2432a8ea7eb5532a1 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 5 Nov 2025 09:02:13 +0100 Subject: [PATCH 13/48] ... --- entangled/document.py | 2 +- entangled/readers/markdown.py | 4 +--- entangled/tangle.py | 18 +++++++++--------- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/entangled/document.py b/entangled/document.py index 6a65fe3..70749c9 100644 --- a/entangled/document.py +++ b/entangled/document.py @@ -100,7 +100,7 @@ def __setitem__(self, key: ReferenceId, value: CodeBlock): self.root.add(key) def __contains__(self, key: str) -> bool: - return key in self.root or key in self.alias + return key in self.root def get_codeblock(self, key: ReferenceId) -> CodeBlock: return self.map[key] diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index fcbf191..c9d280c 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -136,9 +136,7 @@ def process_code_block(hooks: list[HookBase], refs: ReferenceMap, code_block: Co refs[ref] = code_block if target_file is not None: - refs.targets.add(target_file) - if target_file is not None and block_id is not None: - refs.root.make_sub(ref.namespace).alias[target_file] = block_id + refs.targets[target_file] = "::".join(list(code_block.namespace) + [ref_name]) return ref diff --git a/entangled/tangle.py b/entangled/tangle.py index 0945732..2dbf176 100644 --- a/entangled/tangle.py +++ b/entangled/tangle.py @@ -64,10 +64,10 @@ def on_begin(self) -> list[str]: else: return [] - @mawk.on_match(r"^(?P\s*)<<(?P[\w-]+)>>\s*$") + @mawk.on_match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$") def on_noweb(self, m: re.Match[str]) -> list[str]: try: - result, deps = tangle_ref(self.refs, m["refname"], type(self), self.visited) + result, deps = tangle_ref(self.refs, self.ref.namespace, m["refname"], type(self), self.visited) except KeyError: raise MissingReference(m["refname"], self.location) @@ -119,6 +119,7 @@ def on_eof(self): def tangle_ref( refs: ReferenceMap, + namespace: tuple[str, ...], ref_name: str, annotation: type[Tangler] | AnnotationMethod | None = None, _visited: Visitor[str] | None = None, @@ -126,25 +127,24 @@ def tangle_ref( if annotation is None: annotation = config.get.annotation - if ref_name not in refs: + if ref_name not in refs.root.sub(namespace): raise KeyError(ref_name) v = _visited or Visitor() if isinstance(annotation, AnnotationMethod): tangler = tanglers[annotation] - elif annotation is not None: - tangler = annotation else: - raise ValueError("impossible code path") + tangler = annotation - with v.visit(ref_name): + qual_name = "::".join(list(namespace) + [ref_name]) + with v.visit(qual_name): init = True result: list[str] = [] deps: set[PurePath] = set() - for ref in refs.index[ref_name]: + for ref in refs.root[ref_name]: t = tangler(refs, ref, init, v) result.append(t.tangle()) deps.update(t.deps) init = False - return "\n".join(result), deps + return "".join(result), deps From 95f8c4ddb431e7a67e859973192a59e62f65b993 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 6 Nov 2025 01:43:13 +0100 Subject: [PATCH 14/48] new readers seem to be working somewhat; namespaces work as advertised --- entangled/commands/reset.py | 4 +- entangled/config/__init__.py | 2 +- entangled/document.py | 169 -------------------- entangled/errors/user.py | 12 +- entangled/hooks/__init__.py | 2 +- entangled/hooks/base.py | 2 +- entangled/hooks/build.py | 17 +- entangled/hooks/quarto_attributes.py | 4 +- entangled/hooks/repl.py | 6 +- entangled/hooks/shebang.py | 3 +- entangled/hooks/spdx_license.py | 3 +- entangled/hooks/task.py | 22 +-- entangled/markdown_reader.py | 231 --------------------------- entangled/model/__init__.py | 18 +++ entangled/model/code_block.py | 61 +++++++ entangled/model/document.py | 49 ++++++ entangled/model/namespaces.py | 41 ++--- entangled/{ => model}/properties.py | 2 +- entangled/model/reference_id.py | 26 +++ entangled/model/reference_map.py | 99 ++++++++++++ entangled/model/reference_name.py | 29 ++++ entangled/model/tangle.py | 116 ++++++++++++++ entangled/readers/builder.py | 12 -- entangled/readers/lines.py | 4 +- entangled/readers/markdown.py | 16 +- entangled/readers/types.py | 2 +- entangled/readers/yaml_header.py | 10 +- entangled/tangle.py | 150 ----------------- test/readers/test_builder.py | 15 -- test/readers/test_markdown.py | 118 +++++++++++--- 30 files changed, 573 insertions(+), 672 deletions(-) delete mode 100644 entangled/document.py delete mode 100644 entangled/markdown_reader.py create mode 100644 entangled/model/code_block.py create mode 100644 entangled/model/document.py rename entangled/{ => model}/properties.py (99%) create mode 100644 entangled/model/reference_id.py create mode 100644 entangled/model/reference_map.py create mode 100644 entangled/model/reference_name.py create mode 100644 entangled/model/tangle.py delete mode 100644 entangled/readers/builder.py delete mode 100644 entangled/tangle.py delete mode 100644 test/readers/test_builder.py diff --git a/entangled/commands/reset.py b/entangled/commands/reset.py index da262e5..39374cf 100644 --- a/entangled/commands/reset.py +++ b/entangled/commands/reset.py @@ -45,8 +45,8 @@ def reset(): for h in hooks: h.pre_tangle(refs) - for tgt in refs.targets: - result, deps = tangle_ref(refs, tgt, annotation_method) + for (tgt, ref_name) in refs.targets.items(): + result, deps = tangle_ref(refs, ref_name, annotation_method) mask = next(iter(refs.by_name(tgt))).mode t.write(Path(tgt), result, list(map(Path, deps)), mask) diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index 4e47bd5..d21cbca 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -88,7 +88,7 @@ def __call__(self, **kwargs): self.config = (self.config if self.config is not None else Config()) \ | ConfigUpdate(version=__version__, **kwargs) - yield + yield self.config self.config = backup diff --git a/entangled/document.py b/entangled/document.py deleted file mode 100644 index 70749c9..0000000 --- a/entangled/document.py +++ /dev/null @@ -1,169 +0,0 @@ -from __future__ import annotations - -from textwrap import indent -from collections.abc import Generator, Iterable -from typing import override -from dataclasses import dataclass, field -from functools import singledispatchmethod -from pathlib import PurePath - -from .model.namespaces import Namespace -from .config.language import Language -from .properties import Property -from .errors.internal import InternalError -from .readers.text_location import TextLocation - - -def length[T](iter: Iterable[T]) -> int: - return sum(1 for _ in iter) - - -@dataclass -class ReferenceId: - name: str - namespace: tuple[str,...] - file: PurePath - ref_count: int - - @override - def __hash__(self) -> int: - return hash((self.name, self.namespace, self.file, self.ref_count)) - - -@dataclass -class PlainText: - content: str - - -@dataclass -class CodeBlock: - properties: list[Property] - indent: str - open_line: str - close_line: str - source: str - origin: TextLocation - language: Language | None = None - header: str | None = None - mode: int | None = None - namespace: tuple[str, ...] = () - - @property - def text(self) -> str: - return self.open_line + "\n" + self.source + "\n" + self.close_line - - @property - def indented_text(self) -> str: - return indent(self.text, self.indent) - - -Content = PlainText | ReferenceId -RawContent = PlainText | CodeBlock - - -class ReferenceNamespace(Namespace[list[ReferenceId]]): - def add(self, ref: ReferenceId): - dir = self.make_sub(ref.namespace) - if ref.name not in dir.index: - dir.index[ref.name] = [] - dir.index[ref.name].append(ref) - - -@dataclass -class ReferenceMap: - """ - Members: - `map`: maps references to actual code block content - `index`: maps names to list of references - `targets`: lists filenames; a target should be in `index` - """ - - map: dict[ReferenceId, CodeBlock] = field(default_factory=dict) - root: ReferenceNamespace = field(default_factory=ReferenceNamespace) - targets: dict[str, str] = field(default_factory=dict) - - def by_name(self, n: str, namespace: tuple[str, ...] = ()) -> Generator[CodeBlock]: - name_path = n.split("::") - if len(name_path) == 1: - return (self.map[r] for r in self.root.get(namespace, name_path[0])) - else: - return (self.map[r] for r in self.root.get((*name_path[:-1],), name_path[-1])) - - def new_id(self, filename: PurePath, namespace: tuple[str,...], name: str) -> ReferenceId: - c = length(filter(lambda r: r.file == filename, self.root.get(namespace, name))) - return ReferenceId(name, namespace, filename, c) - - def __setitem__(self, key: ReferenceId, value: CodeBlock): - if key in self.map: - raise InternalError("Duplicate key in ReferenceMap", [key]) - self.map[key] = value - self.root.add(key) - - def __contains__(self, key: str) -> bool: - return key in self.root - - def get_codeblock(self, key: ReferenceId) -> CodeBlock: - return self.map[key] - - @singledispatchmethod - def __getitem__(self, key: ReferenceId | str) -> CodeBlock | Iterable[CodeBlock]: - raise NotImplementedError(f"Invalid key: {type(key)}") - - @__getitem__.register - def _(self, key: ReferenceId) -> CodeBlock: - return self.map[key] - - @__getitem__.register - def _(self, key: str) -> Generator[CodeBlock]: - return self.by_name(key) - - -def content_to_text(r: ReferenceMap, c: Content) -> str: - """ - Reconstruct original plain text from a piece of content. - - Args: - r: the reference map. - c: the content. - - Returns: - A string, usually not terminated by a newline. - """ - match c: - case PlainText(s): - return s - case ReferenceId(): - return r.get_codeblock(c).indented_text - - raise ValueError("impossible code path") - - -def document_to_text(r: ReferenceMap, cs: Iterable[Content]) -> str: - """ - Reconstruct original plain text content from a reference map and - list of content. - - Args: - r: the reference map. - cs: a list of content. - - Returns: - A string, including a final newline. - - Usually this is the reconstructed content of a Markdown file. Most - editors have a convention to end a file with a newline, but this - newline is usually stripped when we read a file. - - Context: - In Python `"foo".splitlines()` gives the same as `"foo\n".splitlines()`, - with the exception of `"\n".splitlines()` giving `['']`, while - `"".splitlines()` returns `[]`. - - As an alternative, we could keep line endings by splitting with `keepends=True`, - and joining with `"".join(...)`. - """ - text = "\n".join(content_to_text(r, c) for c in cs) - if text[-1] != "\n": - return text + "\n" - else: - return text diff --git a/entangled/errors/user.py b/entangled/errors/user.py index 4ac1601..eda1960 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -3,6 +3,7 @@ from pathlib import Path from ..readers.text_location import TextLocation +from ..model.reference_name import ReferenceName class UserError(Exception): @@ -32,6 +33,15 @@ def __str__(self): return f"error: {self.msg}" +@dataclass +class MissingLanguageError(UserError): + origin: TextLocation + + @override + def __str__(self): + return f"{self.origin}: Missing language for code block." + + @dataclass class FileError(UserError): filename: Path @@ -69,7 +79,7 @@ def __str__(self): @dataclass class MissingReference(UserError): - ref_name: str + ref_name: ReferenceName location: TextLocation def __str__(self): diff --git a/entangled/hooks/__init__.py b/entangled/hooks/__init__.py index 3ef7be2..7c71a7d 100644 --- a/entangled/hooks/__init__.py +++ b/entangled/hooks/__init__.py @@ -13,7 +13,7 @@ external_hooks = { - name: discovered_hooks[name].load().Hook for name in discovered_hooks.names + name: discovered_hooks[name].load().Hook for name in discovered_hooks.names # pyright: ignore[reportAny] } hooks: dict[str, type[HookBase]] = { diff --git a/entangled/hooks/base.py b/entangled/hooks/base.py index f2fe44d..d36efe6 100644 --- a/entangled/hooks/base.py +++ b/entangled/hooks/base.py @@ -2,7 +2,7 @@ from typing import override from msgspec import Struct -from ..document import ReferenceMap, CodeBlock +from ..model import ReferenceMap, CodeBlock from ..io import Transaction diff --git a/entangled/hooks/build.py b/entangled/hooks/build.py index 925a2e2..5663f4e 100644 --- a/entangled/hooks/build.py +++ b/entangled/hooks/build.py @@ -8,15 +8,15 @@ from __future__ import annotations from dataclasses import dataclass from msgspec import field -from pathlib import Path +from pathlib import Path, PurePath from subprocess import run, SubprocessError, DEVNULL import logging from typing import final, override from entangled.config.language import Language -from ..properties import Property, get_attribute, get_classes -from ..document import ReferenceId, ReferenceMap, CodeBlock +from ..model.properties import Property, get_attribute, get_attribute_string, get_classes +from ..model import ReferenceId, ReferenceMap, CodeBlock from .base import HookBase, PrerequisitesFailed @@ -80,22 +80,21 @@ def check_prerequisites(self): @override def pre_tangle(self, refs: ReferenceMap): """Add a CodeBlock's target attribute to the list of targets.""" - for (ref, cb) in refs.map.items(): + for (ref, cb) in refs.items(): logging.debug("build hook: passing: %s", ref) if "build" not in get_classes(cb.properties): continue - target = get_attribute(cb.properties, "target") + target = get_attribute_string(cb.properties, "target") if target is None: continue if cb.language is None: continue logging.debug("build hook: target: %s", target) - script_file_name = get_attribute(cb.properties, "file") + script_file_name = get_attribute_string(cb.properties, "file") if script_file_name is None: - script_file_name = f".entangled/build/{ref.name}".replace(":", "_") - refs.index[script_file_name].append(ref) - refs.targets.add(script_file_name) + script_file_name = f".entangled/build/{ref.name.name}".replace(":", "_") + refs.register_target(PurePath(script_file_name), ref.name) deps = [str(s) for s in (get_attribute(cb.properties, "deps") or "").split()] self.recipes.append(Hook.Recipe(target, deps, cb.language, script_file_name)) diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index 3ae243e..c0312a0 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -5,8 +5,8 @@ from entangled.config.language import Language -from ..properties import Attribute, Class, Id, Property -from ..document import CodeBlock +from ..model.properties import Attribute, Class, Id, Property +from ..model import CodeBlock from .base import HookBase from ..logging import logger diff --git a/entangled/hooks/repl.py b/entangled/hooks/repl.py index 2333a36..bb8ce20 100644 --- a/entangled/hooks/repl.py +++ b/entangled/hooks/repl.py @@ -11,8 +11,8 @@ from ..logging import logger from ..io import Transaction -from ..document import CodeBlock, ReferenceMap -from ..properties import Class, get_attribute, get_id +from ..model import CodeBlock, ReferenceMap +from ..model.properties import Class, get_attribute, get_attribute_string, get_id log = logger() @@ -63,7 +63,7 @@ def on_read(self, code: CodeBlock): return if session_name not in self.sessions.keys(): - filename = get_attribute(code.properties, "session") + filename = get_attribute_string(code.properties, "session") if filename is None: log.error(f"{code.origin}: REPL hook session opened without session attribute.") return diff --git a/entangled/hooks/shebang.py b/entangled/hooks/shebang.py index 568c8ab..20247e3 100644 --- a/entangled/hooks/shebang.py +++ b/entangled/hooks/shebang.py @@ -1,7 +1,8 @@ from typing import final, override -from ..document import CodeBlock +from ..model import CodeBlock from .base import HookBase + @final class Hook(HookBase): @override diff --git a/entangled/hooks/spdx_license.py b/entangled/hooks/spdx_license.py index 4d8a776..3368009 100644 --- a/entangled/hooks/spdx_license.py +++ b/entangled/hooks/spdx_license.py @@ -1,7 +1,8 @@ from typing import final, override -from ..document import CodeBlock +from ..model import CodeBlock from .base import HookBase + @final class Hook(HookBase): @override diff --git a/entangled/hooks/task.py b/entangled/hooks/task.py index b2708e2..8544ad2 100644 --- a/entangled/hooks/task.py +++ b/entangled/hooks/task.py @@ -3,21 +3,20 @@ from dataclasses import dataclass, fields import json from pathlib import Path -from typing import Any, final, override +from typing import final, override, cast from ..config import AnnotationMethod from ..io import Transaction -from ..document import CodeBlock, ReferenceId, ReferenceMap -from ..properties import Class, Property, get_attribute, get_classes +from ..model import CodeBlock, ReferenceId, ReferenceMap, tangle_ref +from ..model.properties import Class, Property, get_attribute, get_attribute_string, get_classes from .base import HookBase from ..logging import logger -from ..tangle import tangle_ref log = logger() -def ensure_list(strs: str | list[str]) -> list[str]: +def ensure_list(strs: str | list[str] | object) -> list[str]: """Some options may be given either as a list or as a single string, where the string is supposed to have a whitespace separated list. This function converts from either to a list of strings. @@ -25,9 +24,10 @@ def ensure_list(strs: str | list[str]) -> list[str]: if isinstance(strs, str): return strs.split() elif isinstance(strs, list): - return strs + assert all(isinstance(s, str) for s in strs) # pyright: ignore[reportUnknownVariableType] + return cast(list[str], strs) else: - raise ValueError(f"Expected `str` or `list[str]`, got: {strs}") + raise ValueError(f"Expected string or list, got: {strs}") @final @@ -44,8 +44,8 @@ class Recipe: ref: ReferenceId def to_brei_task(self, refs: ReferenceMap): - cb = refs.get_codeblock(self.ref) - if (path := get_attribute(cb.properties, "file")) is None: + cb = refs[self.ref] + if (path := get_attribute_string(cb.properties, "file")) is None: script, _ = tangle_ref(refs, self.ref.name, AnnotationMethod.NAKED) else: script = None @@ -70,7 +70,7 @@ def __init__(self, config: Hook.Config): @override def pre_tangle(self, refs: ReferenceMap): - for ref, cb in refs.map.items(): + for ref, cb in refs.items(): if "task" not in get_classes(cb.properties): continue @@ -84,7 +84,7 @@ def pre_tangle(self, refs: ReferenceMap): case _: continue - record: dict[str, Any] = { + record: dict[str, object] = { f.name: get_attribute(cb.properties, f.name) for f in fields(Hook.Recipe) } diff --git a/entangled/markdown_reader.py b/entangled/markdown_reader.py deleted file mode 100644 index 28f1b55..0000000 --- a/entangled/markdown_reader.py +++ /dev/null @@ -1,231 +0,0 @@ -from copy import copy -from dataclasses import dataclass, field -from pathlib import Path - -import re -from typing import override -import mawk -import logging - -from entangled.config.language import Language -from entangled.io.transaction import Transaction -from entangled.io.virtual import FileCache - -from .config import config -from .utility import first -from .readers.text_location import TextLocation -from .document import CodeBlock, ReferenceMap, Content, PlainText, RawContent -from .properties import Property, read_properties, get_attribute, get_classes, get_id -from .hooks.base import HookBase -from .errors.user import IndentationError -from . import parsing - - -@dataclass -class PartialCodeBlock: - indent: str - open_line: str - origin: TextLocation - properties: list[Property] = field(default_factory=list) - close_line: str | None = None - source: str | None = None - language: Language | None = None - header: str | None = None - mode: int | None = None - - def finalize(self) -> CodeBlock: - assert self.close_line is not None - assert self.source is not None - - return CodeBlock( - properties = self.properties, - indent = self.indent, - open_line = self.open_line, - close_line = self.close_line, - source = self.source, - origin = self.origin, - language = self.language, - header = self.header, - mode = self.mode - ) - - -class MarkdownLexer(mawk.RuleSet): - """Reads a Markdown file, and splits it up into code blocks and other - content.""" - def __init__( - self, - filename: Path - ): - self.location: TextLocation = TextLocation(filename) - self.raw_content: list[RawContent] = [] - self.inside_codeblock: bool = False - self.current_content: list[str] = [] - self.current_codeblock: PartialCodeBlock | None = None - self.ignore: bool = False - - def flush_plain_text(self): - if self.current_content: - self.raw_content.append(PlainText("\n".join(self.current_content))) - self.current_content = [] - - @mawk.always - def on_next_line(self, _): - self.location.line_number += 1 - - @mawk.on_match(config.get.markers.begin_ignore) - def on_begin_ignore(self, _): - self.ignore = True - logging.debug("ignoring markdown block %s", self.location) - - @mawk.on_match(config.get.markers.end_ignore) - def on_end_ignore(self, _): - self.ignore = False - logging.debug("end of ignore") - - @mawk.on_match(config.get.markers.open) - def on_open_codeblock(self, m: re.Match[str]) -> list[str] | None: - if self.ignore: - return None - if self.inside_codeblock: - return None - logging.debug("triggered on codeblock: %s", m.group(0)) - self.current_codeblock = PartialCodeBlock( - indent = m["indent"], - origin = copy(self.location), - open_line = m[0].removeprefix(m["indent"]) - ) - try: - self.current_codeblock.properties.extend(read_properties(m["properties"])) - logging.debug("properties: %s", self.current_codeblock.properties) - self.flush_plain_text() - self.inside_codeblock = True - except parsing.Failure as f: - logging.error("%s: Parsing error: %s", self.location, f) - logging.error("Continuing parsing rest of document.") - return [] - - @mawk.on_match(config.get.markers.close) - def on_close_codeblock(self, m: re.Match[str]) -> list[str] | None: - if self.ignore: - return None - if not self.inside_codeblock or self.current_codeblock is None: - return None - - if len(m["indent"]) < len(self.current_codeblock.indent): - raise IndentationError(self.location) - - if m["indent"] != self.current_codeblock.indent: - return None # treat this as code-block content - - language_class = first(get_classes(self.current_codeblock.properties)) - language = config.get_language(language_class) if language_class else None - if language_class and not language: - logging.warning(f"`{self.location}`: language `{language_class}` unknown.") - self.current_codeblock.language = language - - self.current_codeblock.source = "\n".join( - line.removeprefix(self.current_codeblock.indent) - for line in self.current_content - ) - - self.current_codeblock.close_line = m[0].removeprefix(self.current_codeblock.indent) - - self.raw_content.append(self.current_codeblock.finalize()) - self.current_content = [] - - self.current_codeblock = None - self.inside_codeblock = False - return [] - - @mawk.always - def add_line(self, line: str) -> list[str]: - self.current_content.append(line) - return [] - - @override - def on_eof(self) -> list[str]: - self.flush_plain_text() - return [] - -def read_markdown_file( - t: Transaction, - path: Path, - refs: ReferenceMap | None = None, - hooks: list[HookBase] | None = None) \ - -> tuple[ReferenceMap, list[Content]]: - """ - Read a Markdown file. - - Args: - path: Path to the file. - refs: If given, references are added to this existing reference map. - hooks: List of hooks to be processed. - - Returns: - A reference map and a list of content. - - This splits the Markdown file into code blocks and other text. The resulting - content is a list of `PlainText | ReferenceId`. Each `ReferenceId` can be - looked up in the reference map. - """ - rel_path = path.resolve().relative_to(Path.cwd()) - return read_markdown_string(t.read(rel_path), rel_path, refs, hooks) - - -def read_markdown_string( - text: str, - path_str: Path | None = None, - refs: ReferenceMap | None = None, - hooks: list[HookBase] | None = None) \ - -> tuple[ReferenceMap, list[Content]]: - """ - Parse Markdown from a string. - - Args: - text: Input string. - path: Path to the file from which the string was read, used for printing - error messages. - refs: If given, references are added to this existing reference map. - hooks: List of hooks to be processed. - - Returns: - A reference map and a list of content. - """ - path_str = path_str or Path("-") - md = MarkdownLexer(path_str) - _ = md.run(text) - - hooks = hooks if hooks is not None else [] - refs = refs if refs is not None else ReferenceMap() - - def process(r: RawContent) -> Content: - match r: - case CodeBlock(): - for h in hooks: - h.on_read(r) - block_id = get_id(r.properties) - target_file = get_attribute(r.properties, "file") - - if mode := get_attribute(r.properties, "mode"): - r.mode = int(mode, 8) - - ref_name = block_id or target_file - if ref_name is None: - ref_name = f"unnamed-{r.origin}" - ref = refs.new_id(r.origin.filename, (), ref_name) - - refs[ref] = r - if target_file is not None: - refs.targets.add(target_file) - if target_file is not None and block_id is not None: - refs.alias[target_file] = block_id - - return ref - - case PlainText(): - return r - - content = list(map(process, md.raw_content)) - logging.debug("found ids: %s", list(refs.map.keys())) - return refs, content diff --git a/entangled/model/__init__.py b/entangled/model/__init__.py index e69de29..58324ac 100644 --- a/entangled/model/__init__.py +++ b/entangled/model/__init__.py @@ -0,0 +1,18 @@ +from .code_block import CodeBlock +from .reference_id import ReferenceId +from .reference_map import ReferenceMap +from .reference_name import ReferenceName +from .document import Document, PlainText, Content, RawContent +from .tangle import tangle_ref + +__all__ = [ + "Content", + "RawContent", + "PlainText", + "CodeBlock", + "ReferenceId", + "ReferenceName", + "ReferenceMap", + "Document", + "tangle_ref", +] diff --git a/entangled/model/code_block.py b/entangled/model/code_block.py new file mode 100644 index 0000000..5fcf746 --- /dev/null +++ b/entangled/model/code_block.py @@ -0,0 +1,61 @@ +from dataclasses import dataclass + +from ..readers.text_location import TextLocation +from ..readers.lines import lines +from ..config.language import Language +from .properties import Property + + +def indent(prefix: str, text: str) -> str: + return "".join(prefix + line for line in lines(text)) + + +@dataclass +class CodeBlock: + """ + Contains all distilled information on a codeblock. + + Attributes: + properties: Id, classes and attributes. + indent: The indentation prefix. + open_line: One or more lines preceding the source. When `quatro_attributes` + are enabled, these attribute lines are appended onto `open_line`. + close_line: One or more lines after the source content of the code block. + source: Source code in this code block. + origin: Original location in markup source. + language: Detected programming language. + header: (assumes `file` attribute) Content at the top of the code block + that should appear before the first comment line at the top of a + file (shebang or spdx license). + mode: (assumes `file` attribute) The access mode of the file being + written as a string in octal numbers. Example: "0755" to make a + file executable. + namespace: The namespace of the markup file from which the code block + was read. + """ + properties: list[Property] + indent: str + open_line: str + close_line: str + source: str + origin: TextLocation + language: Language | None = None + header: str | None = None + mode: int | None = None + namespace: tuple[str, ...] = () + + @property + def text(self) -> str: + """ + The unindented text that should be identical to the text from which the + code block was extracted. + """ + return self.open_line + (self.header or "") + self.source + self.close_line + + @property + def indented_text(self) -> str: + """ + The text that should be identical to the text from which the code block was + extracted, including the indentation. + """ + return indent(self.indent, self.text) diff --git a/entangled/model/document.py b/entangled/model/document.py new file mode 100644 index 0000000..e06c91f --- /dev/null +++ b/entangled/model/document.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass, field +from pathlib import PurePath + +from ..config.config_data import Config +from .reference_id import ReferenceId +from .reference_map import ReferenceMap +from .code_block import CodeBlock +from .tangle import tangle_ref + + +@dataclass +class PlainText: + content: str + + +type RawContent = PlainText | CodeBlock +type Content = PlainText | ReferenceId + + +def content_to_text(r: ReferenceMap, c: Content) -> str: + """ + Reconstruct original plain text from a piece of content. + + Args: + r: the reference map. + c: the content. + + Returns: + A string, usually not terminated by a newline. + """ + match c: + case PlainText(s): + return s + case ReferenceId(): + return r[c].indented_text + + +@dataclass +class Document: + config: Config = Config() + reference_map: ReferenceMap = field(default_factory=ReferenceMap) + content: dict[PurePath, list[Content]] = field(default_factory=dict) + + def source_text(self, path: PurePath) -> str: + return "".join(content_to_text(self.reference_map, c) for c in self.content[path]) + + def target_text(self, path: PurePath) -> tuple[str, set[PurePath]]: + ref_name = self.reference_map.select_by_target(path) + return tangle_ref(self.reference_map, ref_name, self.config.annotation) diff --git a/entangled/model/namespaces.py b/entangled/model/namespaces.py index 00d4862..47e039f 100644 --- a/entangled/model/namespaces.py +++ b/entangled/model/namespaces.py @@ -3,9 +3,14 @@ from dataclasses import dataclass, field from collections import defaultdict +from .reference_name import ReferenceName + @dataclass class Namespace[T]: + """ + A structure of nested namespaces containing objects of type `T`. + """ sep: str = "::" subspace: defaultdict[str, Namespace[T]] = field( default_factory=lambda: defaultdict(Namespace) @@ -26,28 +31,26 @@ def make_sub(self, namespace: tuple[str, ...]) -> Namespace[T]: dir = dir.subspace[s] return dir - def get(self, namespace: tuple[str, ...], name: str) -> T: - dir = self.sub(namespace) + def get(self, name: ReferenceName) -> T: + dir = self.sub(name.namespace) if name in dir.index: - return dir.index[name] + return dir.index[name.name] - raise KeyError(f"no reference `{name}` found in namespace `{self.sep.join(namespace)}`") + raise KeyError(f"no reference `{name.name}` found in namespace `{self.sep.join(name.namespace)}`") - def __getitem__(self, key: str | tuple[str, ...]) -> T: + def __getitem__(self, key: str | ReferenceName) -> T: match key: + case ReferenceName(): + return self.get(key) + case str(): - path = key.split(self.sep) - return self.get((*path[:-1],), path[-1]) - case tuple(): - return self.get(key[:-1], key[-1]) - - def __setitem__(self, key: str, value: T): - path = key.split(self.sep) - dir = self.make_sub((*path[:-1],)) - dir.index[key] = value - - def __contains__(self, key: str) -> bool: - path = key.split(self.sep) - dir = self.sub((*path[:-1],)) - return path[-1] in dir.index + return self.get(ReferenceName.from_str(key)) + + def __setitem__(self, key: ReferenceName, value: T): + dir = self.make_sub(key.namespace) + dir.index[key.name] = value + + def __contains__(self, key: ReferenceName) -> bool: + dir = self.sub(key.namespace) + return key.name in dir.index diff --git a/entangled/properties.py b/entangled/model/properties.py similarity index 99% rename from entangled/properties.py rename to entangled/model/properties.py index c3ca6e9..ad81eaf 100644 --- a/entangled/properties.py +++ b/entangled/model/properties.py @@ -8,7 +8,7 @@ from dataclasses import dataclass -from .parsing import ( +from ..parsing import ( Parser, many, tokenize, diff --git a/entangled/model/reference_id.py b/entangled/model/reference_id.py new file mode 100644 index 0000000..7675868 --- /dev/null +++ b/entangled/model/reference_id.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from pathlib import PurePath +from typing import override + +from .reference_name import ReferenceName + + +@dataclass(frozen=True) +class ReferenceId: + """ + The `ReferenceId` is the main key type into the `ReferenceMap`, and + uniquely identifies a single code block. + + Attributes: + name: Full qualified name of the reference. + file: The markup source file of this reference. + ref_count: The index within the source file, in case of repeated entries. + This index starts at 0 for each input file using the same name. + """ + name: ReferenceName + file: PurePath + ref_count: int + + @override + def __hash__(self) -> int: + return hash((self.name, self.file, self.ref_count)) diff --git a/entangled/model/reference_map.py b/entangled/model/reference_map.py new file mode 100644 index 0000000..a30ef1a --- /dev/null +++ b/entangled/model/reference_map.py @@ -0,0 +1,99 @@ +from collections import defaultdict +from collections.abc import Iterable, Iterator, Mapping, MutableMapping +from dataclasses import dataclass, field +from pathlib import PurePath +from typing import override + + +from ..errors.internal import InternalError + +from .code_block import CodeBlock +from .properties import get_attribute_string +from .reference_id import ReferenceId +from .reference_name import ReferenceName + + +def length[T](seq: Iterable[T]) -> int: + """Compute the length of an iterable.""" + i = 0 + for _ in seq: + i += 1 + return i + + +@dataclass +class ReferenceMap(MutableMapping[ReferenceId, CodeBlock]): + """ + Members: + `map`: maps references to actual code block content + `root`: namespace root + `targets`: lists filenames; a target should be in `index` + + The `ReferenceMap` implements `Mapping[ReferenceId, CodeBlock]`. In + addition to that, we keep an index on `ReferenceName`. + """ + + _map: dict[ReferenceId, CodeBlock] = field(default_factory=dict) + _index: defaultdict[ReferenceName, list[ReferenceId]] \ + = field(default_factory=lambda: defaultdict(list)) + _targets: dict[PurePath, ReferenceName] = field(default_factory=dict) + + def select_by_name(self, name: ReferenceName) -> list[ReferenceId]: + """Return a list of references with the same name.""" + return self._index[name] + + def has_name(self, key: ReferenceName) -> bool: + """Check that a name is present.""" + return key in self._index + + def new_id(self, filename: PurePath, name: ReferenceName) -> ReferenceId: + """Create a new `ReferenceId` with a `ref_count` succeeding the last one + by the same name.""" + c = length(filter(lambda r: r.file == filename, self._index[name])) + return ReferenceId(name, filename, c) + + def select_by_target(self, target: PurePath) -> ReferenceName: + return self._targets[target] + + def register_target(self, target: PurePath, ref_name: ReferenceName): + self._targets[target] = ref_name + + @override + def __contains__(self, key: object) -> bool: + return key in self._map + + @override + def __setitem__(self, key: ReferenceId, value: CodeBlock): + if key in self._map: + raise InternalError("Duplicate key in ReferenceMap", [key]) + self._map[key] = value + self._index[key.name].append(key) + + if filename := get_attribute_string(value.properties, "file"): + self._targets[PurePath(filename)] = key.name + + @override + def __getitem__(self, key: ReferenceId) -> CodeBlock: + return self._map[key] + + @override + def __delitem__(self, key: ReferenceId): + if key not in self: + return + + value = self._map[key] + if filename := get_attribute_string(value.properties, "file"): + del self._targets[PurePath(filename)] + self._index[key.name].remove(key) + del self._map[key] + + @override + def __len__(self) -> int: + return len(self._map) + + @override + def __iter__(self) -> Iterator[ReferenceId]: + return iter(self._map) + + def __bool__(self) -> bool: + return bool(self._map) diff --git a/entangled/model/reference_name.py b/entangled/model/reference_name.py new file mode 100644 index 0000000..62557b6 --- /dev/null +++ b/entangled/model/reference_name.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + + +@dataclass(frozen=True) +class ReferenceName: + """ + Collects the concepts of a namespace and name into one object. + """ + namespace: tuple[str, ...] + name: str + + @override + def __hash__(self) -> int: + return hash((*self.namespace, self.name)) + + @override + def __str__(self): + return "::".join(self.namespace) + "::" + self.name + + @staticmethod + def from_str(name: str, namespace: tuple[str, ...] = ()) -> ReferenceName: + path = name.split("::") + if len(path) == 1: + return ReferenceName(namespace, name) + else: + return ReferenceName(tuple(path[:-1]), path[-1]) diff --git a/entangled/model/tangle.py b/entangled/model/tangle.py new file mode 100644 index 0000000..ee7a268 --- /dev/null +++ b/entangled/model/tangle.py @@ -0,0 +1,116 @@ +from collections.abc import Callable, Generator, Iterator +from dataclasses import dataclass, field +from contextlib import contextmanager +from pathlib import PurePath + +import re +import os + + +from ..config import AnnotationMethod +from ..readers.lines import lines +from ..errors.user import CyclicReference, MissingLanguageError, MissingReference + +from .reference_map import ReferenceMap +from .reference_id import ReferenceId +from .reference_name import ReferenceName + + +@dataclass +class Visitor[T]: + _visited: dict[T, int] = field(default_factory=dict) + + def in_order(self) -> list[T]: + return [k for k, _ in sorted(self._visited.items(), key=lambda kv: kv[1])] + + @contextmanager + def visit(self, x: T): + if x in self._visited: + raise CyclicReference(str(x), list(map(str, self.in_order()))) + self._visited[x] = len(self._visited) + yield + del self._visited[x] + + +type Deps = set[PurePath] +type Tangler = Callable[[Tangler, Deps, ReferenceId, bool], Iterator[str]] + + +def indent(prefix: str, g: Iterator[str]) -> Iterator[str]: + return map(lambda line: prefix + line, g) + + +def naked_tangler(refs: ReferenceMap) -> Tangler: + visitor: Visitor[ReferenceId] = Visitor() + + def tangler( + recur: Tangler, deps: set[PurePath], ref: ReferenceId, skip_header: bool + ) -> Generator[str]: + code_block = refs[ref] + deps.add(code_block.origin.filename) + + if code_block.header and not skip_header: + yield code_block.header + + for line in lines(code_block.source): + if m := re.match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$", line.rstrip()): + ref_name = ReferenceName.from_str(m["refname"], code_block.namespace) + if not refs.has_name(ref_name): + raise MissingReference(ref_name, code_block.origin) + for ref in refs.select_by_name(ref_name): + with visitor.visit(ref): + yield from indent(m["indent"], recur(recur, deps, ref, False)) + else: + yield line + + return tangler + + +def annotated_tangler(refs: ReferenceMap) -> Tangler: + naked = naked_tangler(refs) + + def tangler( + recur: Tangler, deps: set[PurePath], ref: ReferenceId, skip_header: bool + ) -> Generator[str]: + code_block = refs[ref] + if code_block.language is None: + raise MissingLanguageError(code_block.origin) + + language = code_block.language + open_comment = language.comment.open + close_comment = ( + "" if language.comment.close is None else f" {language.comment.close}" + ) + + if code_block.header and not skip_header: + yield code_block.header + yield f"{open_comment} ~/~ begin <<{ref.file.as_posix()}#{ref.name}>>[{ref.ref_count}]{close_comment}{os.linesep}" + yield from naked(recur, deps, ref, True) + yield f"{open_comment} ~/~ end{close_comment}{os.linesep}" + + return tangler + + +tanglers = { + AnnotationMethod.NAKED: naked_tangler, + AnnotationMethod.STANDARD: annotated_tangler, + AnnotationMethod.SUPPLEMENTED: annotated_tangler, +} + + +def tangle_ref( + refs: ReferenceMap, + name: ReferenceName, + annotation: AnnotationMethod = AnnotationMethod.STANDARD, +) -> tuple[str, set[PurePath]]: + if not refs.has_name(name): + raise KeyError(name) + tangler = tanglers[annotation](refs) + deps: set[PurePath] = set() + out = "" + + for ref in refs.select_by_name(name): + for line in tangler(tangler, deps, ref, False): + out += line + + return out, deps diff --git a/entangled/readers/builder.py b/entangled/readers/builder.py deleted file mode 100644 index 9ad0731..0000000 --- a/entangled/readers/builder.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass -from collections.abc import Generator - - -@dataclass -class Builder[T, U]: - parent: Generator[T, None, U] - result: U | None = None - - def __iter__(self) -> Generator[T, None, U]: - self.result = yield from self.parent - return self.result diff --git a/entangled/readers/lines.py b/entangled/readers/lines.py index 56c63b0..5969d72 100644 --- a/entangled/readers/lines.py +++ b/entangled/readers/lines.py @@ -1,11 +1,13 @@ from collections.abc import Generator from pathlib import PurePath -from .types import InputToken from .text_location import TextLocation from .peekable import peekable +type InputToken = tuple[TextLocation, str] + + def lines(text: str) -> Generator[str]: pos = 0 while (next_pos := text.find("\n", pos)) != -1: diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index c9d280c..ac8b8da 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -1,13 +1,12 @@ from collections.abc import Generator, Iterator -from pathlib import PurePath from functools import partial from ..config.namespace_default import NamespaceDefault -from ..document import CodeBlock, Content, RawContent, PlainText, ReferenceId, ReferenceMap +from ..model import CodeBlock, Content, RawContent, PlainText, ReferenceId, ReferenceMap, ReferenceName from ..config import Config from ..errors.user import CodeAttributeError, IndentationError -from ..properties import get_attribute_string, read_properties, get_classes, get_id +from ..model.properties import get_attribute_string, read_properties, get_classes, get_id from ..utility import first from ..hooks import get_hooks, HookBase @@ -132,11 +131,8 @@ def process_code_block(hooks: list[HookBase], refs: ReferenceMap, code_block: Co ref_name = block_id or target_file if ref_name is None: ref_name = f"unnamed-{code_block.origin}" - ref = refs.new_id(code_block.origin.filename, code_block.namespace, ref_name) - + ref = refs.new_id(code_block.origin.filename, ReferenceName(code_block.namespace, ref_name)) refs[ref] = code_block - if target_file is not None: - refs.targets[target_file] = "::".join(list(code_block.namespace) + [ref_name]) return ref @@ -169,13 +165,13 @@ def flush(): yield from flush() -def markdown(refs: ReferenceMap, input: InputStream) -> Generator[Content, None, ReferenceMap]: +def markdown(config: Config, refs: ReferenceMap, input: InputStream) -> Generator[Content, None, Config]: header = yield from read_yaml_header(input) - config = get_config(header) + config = get_config(header, config) hooks = get_hooks(config) yield from map( partial(process_token, hooks, refs), collect_plain_text(raw_markdown(config, input))) - return refs + return config diff --git a/entangled/readers/types.py b/entangled/readers/types.py index 17522c6..f893311 100644 --- a/entangled/readers/types.py +++ b/entangled/readers/types.py @@ -3,7 +3,7 @@ from .text_location import TextLocation from .peekable import Peekable -from ..document import Content, RawContent +from ..model import Content, RawContent type InputToken = tuple[TextLocation, str] diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py index 1ee40c3..2eb7887 100644 --- a/entangled/readers/yaml_header.py +++ b/entangled/readers/yaml_header.py @@ -5,7 +5,7 @@ import msgspec from ..config import Config, ConfigUpdate, config -from ..document import PlainText +from ..model import PlainText from ..errors.user import ParseError from .types import InputStream, MarkdownStream from .delimiters import delimited_token_getter @@ -30,7 +30,7 @@ def read_yaml_header(input: InputStream) -> MarkdownStream[object]: raise ParseError(delimited_token.origin, str(e)) -def get_config(header: object) -> Config: +def get_config(header: object, base_config: Config | None = None) -> Config: """ Get the `entangled` component from the unstructured header data, and convert it to a `Config` object. @@ -40,15 +40,17 @@ def get_config(header: object) -> Config: other than an object/dictionary or the conversion to `Config` failed, a `TypeError` is raised. """ + base_config = base_config or config.get + if isinstance(header, dict): header = cast(dict[str, object], header) try: - return config.get | msgspec.convert(header.get("entangled", None), ConfigUpdate) + return base_config | msgspec.convert(header.get("entangled", None), ConfigUpdate) except msgspec.ValidationError as e: logging.error(e) raise TypeError() elif header is None: - return config.get + return base_config else: raise TypeError() diff --git a/entangled/tangle.py b/entangled/tangle.py deleted file mode 100644 index 2dbf176..0000000 --- a/entangled/tangle.py +++ /dev/null @@ -1,150 +0,0 @@ -from typing import TypeVar, Generic, override -from dataclasses import dataclass, field -from textwrap import indent -from contextlib import contextmanager -from copy import copy -from pathlib import PurePath - -import re -import mawk - -from .config import AnnotationMethod -from .readers.text_location import TextLocation -from .document import ( - ReferenceMap, - ReferenceId, - CodeBlock, -) -from .errors.user import CyclicReference, MissingReference -from .config import config - - -T = TypeVar("T") - - -@dataclass -class Visitor(Generic[T]): - _visited: dict[T, int] = field(default_factory=dict) - - def in_order(self) -> list[T]: - return [k for k, _ in sorted(self._visited.items(), key=lambda kv: kv[1])] - - @contextmanager - def visit(self, x: T): - if x in self._visited: - raise CyclicReference(str(x), list(map(str, self.in_order()))) - self._visited[x] = len(self._visited) - yield - del self._visited[x] - - -@dataclass -class Tangler(mawk.RuleSet): - refs: ReferenceMap - ref: ReferenceId - init: bool - visited: Visitor[str] - deps: set[PurePath] = field(init=False) - cb: CodeBlock = field(init=False) - location: TextLocation = field(init=False) - - def __post_init__(self): - self.cb = self.refs.get_codeblock(self.ref) - self.location = copy(self.cb.origin) - self.deps = { self.cb.origin.filename } - - @mawk.always - def lineno(self, _): - self.location.line_number += 1 - - @override - def on_begin(self) -> list[str]: - if self.cb.header is not None: - return [self.cb.header] - else: - return [] - - @mawk.on_match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$") - def on_noweb(self, m: re.Match[str]) -> list[str]: - try: - result, deps = tangle_ref(self.refs, self.ref.namespace, m["refname"], type(self), self.visited) - - except KeyError: - raise MissingReference(m["refname"], self.location) - - self.deps.update(deps) - return [indent(result, m["indent"])] - - def tangle(self): - return super().run(self.cb.source) - - -@dataclass -class AnnotatedTangler(Tangler): - close_comment: str = field(init=False) - - def __post_init__(self): - super().__post_init__() - assert self.cb.language - self.close_comment = ( - "" - if self.cb.language.comment.close is None - else f" {self.cb.language.comment.close}" - ) - - @override - def on_begin(self) -> list[str]: - assert self.cb.language - count = "init" if self.init else str(self.ref.ref_count) - result: list[str] = [] - if self.cb.header is not None: - result.append(self.cb.header) - result.append( - f"{self.cb.language.comment.open} ~/~ begin <<{self.ref.file.as_posix()}#{self.ref.name}>>[{count}]{self.close_comment}" - ) - return result - - @override - def on_eof(self): - assert self.cb.language - return [f"{self.cb.language.comment.open} ~/~ end{self.close_comment}"] - - -tanglers = { - AnnotationMethod.NAKED: Tangler, - AnnotationMethod.STANDARD: AnnotatedTangler, - AnnotationMethod.SUPPLEMENTED: AnnotatedTangler, -} - - -def tangle_ref( - refs: ReferenceMap, - namespace: tuple[str, ...], - ref_name: str, - annotation: type[Tangler] | AnnotationMethod | None = None, - _visited: Visitor[str] | None = None, -) -> tuple[str, set[PurePath]]: - if annotation is None: - annotation = config.get.annotation - - if ref_name not in refs.root.sub(namespace): - raise KeyError(ref_name) - v = _visited or Visitor() - - if isinstance(annotation, AnnotationMethod): - tangler = tanglers[annotation] - else: - tangler = annotation - - qual_name = "::".join(list(namespace) + [ref_name]) - with v.visit(qual_name): - init = True - result: list[str] = [] - deps: set[PurePath] = set() - for ref in refs.root[ref_name]: - t = tangler(refs, ref, init, v) - result.append(t.tangle()) - deps.update(t.deps) - init = False - - return "".join(result), deps diff --git a/test/readers/test_builder.py b/test/readers/test_builder.py deleted file mode 100644 index 9e5d6ce..0000000 --- a/test/readers/test_builder.py +++ /dev/null @@ -1,15 +0,0 @@ -from entangled.readers.builder import Builder - - -def make_sum(x): - s = 0 - for y in x: - s += y - yield y - return s - - -def test_builer(): - b = Builder(make_sum(range(10))) - assert list(b) == list(range(10)) - assert b.result == sum(range(10)) diff --git a/test/readers/test_markdown.py b/test/readers/test_markdown.py index a65d5e2..f46e651 100644 --- a/test/readers/test_markdown.py +++ b/test/readers/test_markdown.py @@ -2,12 +2,12 @@ from pathlib import PurePath from entangled.config.namespace_default import NamespaceDefault -from entangled.document import PlainText, CodeBlock, ReferenceId, ReferenceMap +from entangled.config.version import Version +from entangled.model import Document, PlainText, CodeBlock, ReferenceId, ReferenceMap, ReferenceName, tangle_ref from entangled.readers.markdown import code_block, collect_plain_text, ignore_block, markdown, raw_markdown from entangled.readers.lines import numbered_lines -from entangled.config import AnnotationMethod, Config, config +from entangled.config import AnnotationMethod, Config, ConfigUpdate from entangled.readers.types import run_generator, Reader -from entangled.tangle import tangle_ref test0 = """ @@ -93,15 +93,17 @@ def test_collect_plain_text(): def test_markdown(): refs = ReferenceMap() - ol, refs = run_reader(partial(markdown, refs), test0) + ol, _ = run_reader(partial(markdown, Config(), refs), test0) assert len(ol) == 1 assert ol[0] == PlainText("abcdefg") - assert not refs.map + assert not refs - ol, refs = run_reader(partial(markdown, refs), test3) + ol, _ = run_reader(partial(markdown, Config(), refs), test3) assert isinstance(ol[-1], ReferenceId) - assert ol[-1].name == "test" - assert "test" in refs + assert ol[-1].name.name == "test" + assert refs + assert refs.has_name(ReferenceName.from_str("test")) + assert ol[-1] in refs test_ns_a = """ @@ -111,7 +113,7 @@ def test_markdown(): # part a ``` -``` {.python #refers-to-a} +``` {.python file=a.py} <> ``` """.strip() @@ -123,38 +125,102 @@ def test_markdown(): ``` {.python #a} # part b ``` + +``` {.python file=b.py} +<> +``` """.strip() def test_global_namespace(): refs = ReferenceMap() - - with config(namespace_default=NamespaceDefault.GLOBAL): - _, refs = run_reader(partial(markdown, refs), test_ns_a, "a.md") - _, refs = run_reader(partial(markdown, refs), test_ns_b, "b.md") - - cb = list(refs.by_name("a")) + config = Config() | ConfigUpdate( + version="2.4", + namespace_default=NamespaceDefault.GLOBAL, + annotation=AnnotationMethod.NAKED) + doca, _ = run_reader(partial(markdown, config, refs), test_ns_a, "a.md") + docb, _ = run_reader(partial(markdown, config, refs), test_ns_b, "b.md") + doc = Document(config, refs, { PurePath("a.md"): doca, PurePath("b.md"): docb }) + + cb = [refs[r] for r in refs.select_by_name(ReferenceName.from_str("a"))] assert len(cb) == 2 assert cb[0].source.strip() == "# part a" assert cb[1].source.strip() == "# part b" - source, _ = tangle_ref(refs, "refers-to-a", AnnotationMethod.NAKED) + source, _ = doc.target_text(PurePath("a.py")) assert source.splitlines() == ["# part a", "# part b"] def test_private_namespace(): refs = ReferenceMap() + config = Config() | ConfigUpdate( + version="2.4", + namespace_default=NamespaceDefault.PRIVATE, + annotation=AnnotationMethod.NAKED) + doca, _ = run_reader(partial(markdown, config, refs), test_ns_a, "a.md") + docb, _ = run_reader(partial(markdown, config, refs), test_ns_b, "b.md") + doc = Document(config, refs, { PurePath("a.md"): doca, PurePath("b.md"): docb }) + + cb = [refs[r] for r in refs.select_by_name(ReferenceName.from_str("a.md::a"))] + assert len(cb) == 1 + assert cb[0].source.strip() == "# part a" - with config(namespace_default=NamespaceDefault.PRIVATE): - _, refs = run_reader(partial(markdown, refs), test_ns_a, "a.md") - _, refs = run_reader(partial(markdown, refs), test_ns_b, "b.md") + cb = [refs[r] for r in refs.select_by_name(ReferenceName.from_str("b.md::a"))] + assert len(cb) == 1 + assert cb[0].source.strip() == "# part b" - print(refs.map) + source, _ = doc.target_text(PurePath("a.py")) + assert source.splitlines() == ["# part a"] - cb = list(refs.by_name("a")) - assert len(cb) == 2 - assert cb[0].source.strip() == "# part a" - assert cb[1].source.strip() == "# part b" + source, _ = doc.target_text(PurePath("b.py")) + assert source.splitlines() == ["# part b"] - source, _ = tangle_ref(refs, "a.md::refers-to-a", AnnotationMethod.NAKED) - assert source.splitlines() == ["# part a"] + +test_ns_yaml1 = """ +--- +entangled: + version: "2.4" + namespace: q +--- + +``` {.python #hello} +print("hello") +``` +""".strip() + +test_ns_yaml2 = """ +--- +entangled: + version: "2.4" + namespace: p +--- + +``` {.python #hello} +print("world") +``` + +``` {.python #combined} +<> +<> +``` +""".strip() + +def test_yaml_namespace(): + refs = ReferenceMap() + doca, config = run_reader(partial(markdown, Config(), refs), test_ns_yaml1, "a.md") + assert config.namespace == ("q",) + docb, config = run_reader(partial(markdown, Config(), refs), test_ns_yaml2, "b.md") + assert config.namespace == ("p",) + + refq = ReferenceName.from_str("q::hello") + assert refs.has_name(refq) + cb = [refs[r] for r in refs.select_by_name(refq)] + assert cb[0].source == "print(\"hello\")\n" + + refp = ReferenceName.from_str("p::hello") + assert refs.has_name(refp) + cb = [refs[r] for r in refs.select_by_name(refp)] + assert cb[0].source == "print(\"world\")\n" + + src, _ = tangle_ref(refs, ReferenceName.from_str("p::combined"), annotation=AnnotationMethod.NAKED) + assert src == "print(\"hello\")\nprint(\"world\")\n" From 174fadc4285eb2631408e72361b0b64535bf6d03 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 6 Nov 2025 12:38:57 +0100 Subject: [PATCH 15/48] get test coverage for readers up to 100% --- entangled/errors/user.py | 37 +---- entangled/hooks/quarto_attributes.py | 2 +- entangled/hooks/task.py | 3 +- entangled/model/__init__.py | 12 +- entangled/model/code_block.py | 14 +- entangled/model/namespaces.py | 56 ------- entangled/model/reference_map.py | 2 +- entangled/model/tangle.py | 36 ++++- entangled/readers/delimiters.py | 2 +- entangled/readers/lines.py | 2 +- entangled/readers/markdown.py | 26 ++-- entangled/readers/types.py | 9 +- entangled/readers/yaml_header.py | 6 +- entangled/{readers => }/text_location.py | 0 test/model/test_code_block.py | 19 +++ test/{ => model}/test_properties.py | 2 +- test/readers/test_lines.py | 2 +- test/readers/test_markdown.py | 178 ++++++++++++++++++++++- test/readers/test_text_location.py | 2 +- test/readers/test_yaml_header.py | 24 ++- 20 files changed, 298 insertions(+), 136 deletions(-) delete mode 100644 entangled/model/namespaces.py rename entangled/{readers => }/text_location.py (100%) create mode 100644 test/model/test_code_block.py rename test/{ => model}/test_properties.py (88%) diff --git a/entangled/errors/user.py b/entangled/errors/user.py index eda1960..86f0c91 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -2,8 +2,7 @@ from typing import Any, Callable, override from pathlib import Path -from ..readers.text_location import TextLocation -from ..model.reference_name import ReferenceName +from ..text_location import TextLocation class UserError(Exception): @@ -22,26 +21,13 @@ def __str__(self): @dataclass class HelpfulUserError(UserError): - """Raise a user error and supply an optional function `func` for context. - - Make sure to also execute e.func() in your error handling.""" - + """Raise a user error with a message.""" msg: str - func: Callable[[], Any] = lambda: None def __str__(self): return f"error: {self.msg}" -@dataclass -class MissingLanguageError(UserError): - origin: TextLocation - - @override - def __str__(self): - return f"{self.origin}: Missing language for code block." - - @dataclass class FileError(UserError): filename: Path @@ -67,25 +53,6 @@ def __str__(self): return f"parse error at {self.location}: {self.msg}" -@dataclass -class CyclicReference(UserError): - ref_name: str - cycle: list[str] - - def __str__(self): - cycle_str = " -> ".join(self.cycle) - return f"Cyclic reference in <<{self.ref_name}>>: {cycle_str}" - - -@dataclass -class MissingReference(UserError): - ref_name: ReferenceName - location: TextLocation - - def __str__(self): - return f"Missing reference `{self.ref_name}` at `{self.location}`" - - @dataclass class CodeAttributeError(UserError): origin: TextLocation diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index c0312a0..817d629 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -16,7 +16,7 @@ def split_yaml_header(language: Language, source: str) -> tuple[str, str, object]: """Split source into YAML header and body.""" - trigger = re.escape(language.comment.open) + r"\s*\|(.*)" + trigger: str = re.escape(language.comment.open) + r"\s*\|(.*)" lines = source.splitlines() header_lines: list[str] = [] body_start: int = 0 diff --git a/entangled/hooks/task.py b/entangled/hooks/task.py index 8544ad2..5672911 100644 --- a/entangled/hooks/task.py +++ b/entangled/hooks/task.py @@ -8,8 +8,9 @@ from ..config import AnnotationMethod from ..io import Transaction -from ..model import CodeBlock, ReferenceId, ReferenceMap, tangle_ref +from ..model import CodeBlock, ReferenceId, ReferenceMap from ..model.properties import Class, Property, get_attribute, get_attribute_string, get_classes +from ..model.tangle import tangle_ref from .base import HookBase from ..logging import logger diff --git a/entangled/model/__init__.py b/entangled/model/__init__.py index 58324ac..0c52814 100644 --- a/entangled/model/__init__.py +++ b/entangled/model/__init__.py @@ -1,18 +1,16 @@ +from .document import Document, PlainText, Content, RawContent from .code_block import CodeBlock +from .reference_name import ReferenceName from .reference_id import ReferenceId from .reference_map import ReferenceMap -from .reference_name import ReferenceName -from .document import Document, PlainText, Content, RawContent -from .tangle import tangle_ref __all__ = [ + "Document", + "PlainText", "Content", "RawContent", - "PlainText", "CodeBlock", - "ReferenceId", "ReferenceName", + "ReferenceId", "ReferenceMap", - "Document", - "tangle_ref", ] diff --git a/entangled/model/code_block.py b/entangled/model/code_block.py index 5fcf746..ed375bd 100644 --- a/entangled/model/code_block.py +++ b/entangled/model/code_block.py @@ -1,13 +1,23 @@ from dataclasses import dataclass -from ..readers.text_location import TextLocation +import os + +from ..text_location import TextLocation from ..readers.lines import lines from ..config.language import Language from .properties import Property def indent(prefix: str, text: str) -> str: - return "".join(prefix + line for line in lines(text)) + def indent_line(line: str): + if line.strip() == "" and line.endswith(os.linesep): + return os.linesep + if line.strip() == "": + return "" + + return prefix + line + + return "".join(map(indent_line, lines(text))) @dataclass diff --git a/entangled/model/namespaces.py b/entangled/model/namespaces.py deleted file mode 100644 index 47e039f..0000000 --- a/entangled/model/namespaces.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field -from collections import defaultdict - -from .reference_name import ReferenceName - - -@dataclass -class Namespace[T]: - """ - A structure of nested namespaces containing objects of type `T`. - """ - sep: str = "::" - subspace: defaultdict[str, Namespace[T]] = field( - default_factory=lambda: defaultdict(Namespace) - ) - index: dict[str, T] = field(default_factory=dict) - - def sub(self, namespace: tuple[str, ...]) -> Namespace[T]: - dir = self - for i, s in enumerate(namespace): - if s not in self.subspace: - raise KeyError(f"no subspace `{s}` found in namespace `{self.sep.join(namespace[:i])}`") - dir = dir.subspace[s] - return dir - - def make_sub(self, namespace: tuple[str, ...]) -> Namespace[T]: - dir = self - for s in namespace: - dir = dir.subspace[s] - return dir - - def get(self, name: ReferenceName) -> T: - dir = self.sub(name.namespace) - - if name in dir.index: - return dir.index[name.name] - - raise KeyError(f"no reference `{name.name}` found in namespace `{self.sep.join(name.namespace)}`") - - def __getitem__(self, key: str | ReferenceName) -> T: - match key: - case ReferenceName(): - return self.get(key) - - case str(): - return self.get(ReferenceName.from_str(key)) - - def __setitem__(self, key: ReferenceName, value: T): - dir = self.make_sub(key.namespace) - dir.index[key.name] = value - - def __contains__(self, key: ReferenceName) -> bool: - dir = self.sub(key.namespace) - return key.name in dir.index diff --git a/entangled/model/reference_map.py b/entangled/model/reference_map.py index a30ef1a..fde6df3 100644 --- a/entangled/model/reference_map.py +++ b/entangled/model/reference_map.py @@ -1,5 +1,5 @@ from collections import defaultdict -from collections.abc import Iterable, Iterator, Mapping, MutableMapping +from collections.abc import Iterable, Iterator, MutableMapping from dataclasses import dataclass, field from pathlib import PurePath from typing import override diff --git a/entangled/model/tangle.py b/entangled/model/tangle.py index ee7a268..254e709 100644 --- a/entangled/model/tangle.py +++ b/entangled/model/tangle.py @@ -5,17 +5,49 @@ import re import os +from typing import override from ..config import AnnotationMethod from ..readers.lines import lines -from ..errors.user import CyclicReference, MissingLanguageError, MissingReference +from ..errors.user import UserError +from ..text_location import TextLocation from .reference_map import ReferenceMap from .reference_id import ReferenceId from .reference_name import ReferenceName +@dataclass +class CyclicReference(UserError): + ref_name: str + cycle: list[str] + + @override + def __str__(self): + cycle_str = " -> ".join(self.cycle) + return f"Cyclic reference in <<{self.ref_name}>>: {cycle_str}" + + +@dataclass +class MissingReference(UserError): + origin: TextLocation + ref_name: ReferenceName + + @override + def __str__(self): + return f"{self.origin}: Missing reference `{self.ref_name}`" + + +@dataclass +class MissingLanguageError(UserError): + origin: TextLocation + + @override + def __str__(self): + return f"{self.origin}: Missing language for code block." + + @dataclass class Visitor[T]: _visited: dict[T, int] = field(default_factory=dict) @@ -56,7 +88,7 @@ def tangler( if m := re.match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$", line.rstrip()): ref_name = ReferenceName.from_str(m["refname"], code_block.namespace) if not refs.has_name(ref_name): - raise MissingReference(ref_name, code_block.origin) + raise MissingReference(code_block.origin, ref_name) for ref in refs.select_by_name(ref_name): with visitor.visit(ref): yield from indent(m["indent"], recur(recur, deps, ref, False)) diff --git a/entangled/readers/delimiters.py b/entangled/readers/delimiters.py index 04c2e07..8fcef95 100644 --- a/entangled/readers/delimiters.py +++ b/entangled/readers/delimiters.py @@ -3,7 +3,7 @@ import re -from .text_location import TextLocation +from ..text_location import TextLocation from .types import InputStream from ..errors.user import ParseError diff --git a/entangled/readers/lines.py b/entangled/readers/lines.py index 5969d72..9cfa4d3 100644 --- a/entangled/readers/lines.py +++ b/entangled/readers/lines.py @@ -1,7 +1,7 @@ from collections.abc import Generator from pathlib import PurePath -from .text_location import TextLocation +from ..text_location import TextLocation from .peekable import peekable diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index ac8b8da..c2d55b6 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -6,14 +6,14 @@ from ..model import CodeBlock, Content, RawContent, PlainText, ReferenceId, ReferenceMap, ReferenceName from ..config import Config from ..errors.user import CodeAttributeError, IndentationError -from ..model.properties import get_attribute_string, read_properties, get_classes, get_id +from ..model.properties import get_attribute_string, get_attribute, read_properties, get_classes, get_id from ..utility import first from ..hooks import get_hooks, HookBase from .types import InputStream, Reader, RawMarkdownStream from .lines import lines from .delimiters import delimited_token_getter -from .text_location import TextLocation +from ..text_location import TextLocation from .yaml_header import read_yaml_header, get_config import re @@ -45,8 +45,14 @@ def code_block_guard(origin: TextLocation, open_match: re.Match[str], close_matc return True -def dedent(source: str, indent: str) -> str: - return "".join(line.removeprefix(indent) for line in lines(source)) +def dedent_line(location: TextLocation, indent: str, line: str): + if line.startswith(indent) or line.strip() == "": + return line.removeprefix(indent) + raise IndentationError(location) + + +def dedent(location: TextLocation, source: str, indent: str) -> str: + return "".join(dedent_line(location, indent, line) for line in lines(source)) def code_block(config: Config) -> Reader[RawContent, bool]: @@ -76,7 +82,7 @@ def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: language = config.get_language(language_class) if language_class else None if language_class and not language: logging.warning(f"`{block.origin}`: language `{language_class}` unknown.") - source = dedent(block.content, indent) + source = dedent(block.origin, block.content, indent) yield CodeBlock( properties, @@ -122,11 +128,13 @@ def process_code_block(hooks: list[HookBase], refs: ReferenceMap, code_block: Co except TypeError: raise CodeAttributeError(code_block.origin, "`file` attribute should have string type") - try: - if mode := get_attribute_string(code_block.properties, "mode"): + if mode := get_attribute(code_block.properties, "mode"): + if type(mode) is int: # bool is a subtype of int, and we really want an int + code_block.mode = mode + elif isinstance(mode, str): code_block.mode = int(mode, 8) - except TypeError: - raise CodeAttributeError(code_block.origin, "`mode` attribute should have string type") + else: + raise CodeAttributeError(code_block.origin, "`mode` attribute should have string or integer type") ref_name = block_id or target_file if ref_name is None: diff --git a/entangled/readers/types.py b/entangled/readers/types.py index f893311..9bdc451 100644 --- a/entangled/readers/types.py +++ b/entangled/readers/types.py @@ -1,7 +1,7 @@ from collections.abc import Callable, Generator from typing import cast -from .text_location import TextLocation +from ..text_location import TextLocation from .peekable import Peekable from ..model import Content, RawContent @@ -13,13 +13,6 @@ type MarkdownStream[Result] = Generator[Content, None, Result] -def map_reader[Out, T, U](f: Callable[[T], U], reader: Reader[Out, T]) -> Reader[Out, U]: - def mapped(input: InputStream) -> Generator[Out, None, U]: - x = yield from reader(input) - return f(x) - return mapped - - def run_generator[O, R](g: Generator[O, None, R]) -> tuple[list[O], R]: result: R | None = None diff --git a/entangled/readers/yaml_header.py b/entangled/readers/yaml_header.py index 2eb7887..4b6841a 100644 --- a/entangled/readers/yaml_header.py +++ b/entangled/readers/yaml_header.py @@ -6,7 +6,7 @@ from ..config import Config, ConfigUpdate, config from ..model import PlainText -from ..errors.user import ParseError +from ..errors.user import ParseError, HelpfulUserError from .types import InputStream, MarkdownStream from .delimiters import delimited_token_getter @@ -48,9 +48,9 @@ def get_config(header: object, base_config: Config | None = None) -> Config: return base_config | msgspec.convert(header.get("entangled", None), ConfigUpdate) except msgspec.ValidationError as e: logging.error(e) - raise TypeError() + raise HelpfulUserError("unable to read config") elif header is None: return base_config else: - raise TypeError() + raise HelpfulUserError(f"expected an object for config, got {type(header)}: {header}") diff --git a/entangled/readers/text_location.py b/entangled/text_location.py similarity index 100% rename from entangled/readers/text_location.py rename to entangled/text_location.py diff --git a/test/model/test_code_block.py b/test/model/test_code_block.py new file mode 100644 index 0000000..0b5a625 --- /dev/null +++ b/test/model/test_code_block.py @@ -0,0 +1,19 @@ +from pathlib import PurePath +from textwrap import indent +from entangled.model.code_block import CodeBlock +from entangled.text_location import TextLocation + + +def test_code_block(): + cb = CodeBlock( + properties=[], + open_line="```\n", + close_line="```\n", + source="hello\n", + indent="> ", + origin=TextLocation(PurePath("-"), 1) + ) + + expected = """```\nhello\n```\n""" + assert cb.text == expected + assert cb.indented_text == indent(expected.rstrip(), "> ") + "\n" diff --git a/test/test_properties.py b/test/model/test_properties.py similarity index 88% rename from test/test_properties.py rename to test/model/test_properties.py index 8247800..6e3b12c 100644 --- a/test/test_properties.py +++ b/test/model/test_properties.py @@ -1,4 +1,4 @@ -from entangled.properties import read_properties, Id, Class, Attribute +from entangled.model.properties import read_properties, Id, Class, Attribute def test_id(): diff --git a/test/readers/test_lines.py b/test/readers/test_lines.py index cc85479..e74bc13 100644 --- a/test/readers/test_lines.py +++ b/test/readers/test_lines.py @@ -1,7 +1,7 @@ from pathlib import PurePath from entangled.readers.lines import lines, numbered_lines from entangled.readers.peekable import Peekable -from entangled.readers.text_location import TextLocation +from entangled.text_location import TextLocation def test_lines(): diff --git a/test/readers/test_markdown.py b/test/readers/test_markdown.py index f46e651..301afad 100644 --- a/test/readers/test_markdown.py +++ b/test/readers/test_markdown.py @@ -1,15 +1,23 @@ from functools import partial from pathlib import PurePath +import pytest +import logging + from entangled.config.namespace_default import NamespaceDefault -from entangled.config.version import Version -from entangled.model import Document, PlainText, CodeBlock, ReferenceId, ReferenceMap, ReferenceName, tangle_ref +from entangled.errors.user import ParseError, IndentationError, CodeAttributeError +from entangled.model import Document, PlainText, CodeBlock, ReferenceId, ReferenceMap, ReferenceName +from entangled.model.tangle import tangle_ref from entangled.readers.markdown import code_block, collect_plain_text, ignore_block, markdown, raw_markdown from entangled.readers.lines import numbered_lines +from entangled.readers.peekable import Peekable from entangled.config import AnnotationMethod, Config, ConfigUpdate from entangled.readers.types import run_generator, Reader +empty_stream = Peekable(iter([])) + + test0 = """ abcdefg """.strip() @@ -29,6 +37,9 @@ def run_reader[O, T](reader: Reader[O, T], inp: str, filename: str = "-") -> tup def test_ignore_block(): + ol, _ = run_generator(ignore_block(Config())(empty_stream)) + assert not ol + ol, rv = run_reader(ignore_block(Config()), test0) assert not rv and not ol @@ -55,6 +66,9 @@ def test_ignore_block(): def test_code_block(): + ol, _ = run_generator(code_block(Config())(empty_stream)) + assert not ol + ol, rv = run_reader(code_block(Config()), test0) assert not rv and not ol @@ -70,10 +84,16 @@ def test_code_block(): def test_raw_markdown(): + ol, _ = run_generator(raw_markdown(Config(), empty_stream)) + assert not ol + ol, _ = run_reader(partial(raw_markdown, Config()), test0) assert len(ol) == 1 assert ol[0] == PlainText("abcdefg") + ol, _ = run_reader(partial(raw_markdown, Config()), test1) + assert not any(isinstance(x, CodeBlock) for x in ol) + ol, _ = run_reader(partial(raw_markdown, Config()), test2) assert len(ol) == 1 assert isinstance(ol[0], CodeBlock) @@ -106,6 +126,66 @@ def test_markdown(): assert ol[-1] in refs +test_indent1 = """ +This code block is indented: + + ``` {.python} + hello + + goodbye + ``` + +Note the lack of indentation due to a blank line. +""".strip() + + +test_indent_error1 = """ +This code is indented, but contains a line that is not correctly indented: + + ``` {.python} +hello + ``` +""".strip() + +test_indent_error2 = """ +This code is indented, but the closing fence indent doesn't match the opening: + + ``` {.python} + hello + ``` + +This should raise an `unexpected end of file`. +""".strip() + +test_indent_error3 = """ +This code is indented, but the closing fence indent doesn't match the opening: + + ``` {.python} + hello + ``` + +This should raise an `indentation error`. +""" + +def test_indentation(): + refs = ReferenceMap() + ol, _ = run_reader(partial(markdown, Config(), refs), test_indent1) + doc = Document(Config(), refs, {PurePath("a.md"): ol}) + + assert isinstance(ol[1], ReferenceId) + assert refs[ol[1]].indent == " " + assert doc.source_text(PurePath("a.md")) == test_indent1 + + with pytest.raises(IndentationError): + _ = run_reader(partial(markdown, Config(), refs), test_indent_error1) + + with pytest.raises(ParseError): + _ = run_reader(partial(markdown, Config(), refs), test_indent_error2) + + with pytest.raises(IndentationError): + _ = run_reader(partial(markdown, Config(), refs), test_indent_error3) + + test_ns_a = """ First input: @@ -207,9 +287,9 @@ def test_private_namespace(): def test_yaml_namespace(): refs = ReferenceMap() - doca, config = run_reader(partial(markdown, Config(), refs), test_ns_yaml1, "a.md") + _, config = run_reader(partial(markdown, Config(), refs), test_ns_yaml1, "a.md") assert config.namespace == ("q",) - docb, config = run_reader(partial(markdown, Config(), refs), test_ns_yaml2, "b.md") + _, config = run_reader(partial(markdown, Config(), refs), test_ns_yaml2, "b.md") assert config.namespace == ("p",) refq = ReferenceName.from_str("q::hello") @@ -224,3 +304,93 @@ def test_yaml_namespace(): src, _ = tangle_ref(refs, ReferenceName.from_str("p::combined"), annotation=AnnotationMethod.NAKED) assert src == "print(\"hello\")\nprint(\"world\")\n" + + +wrongly_typed_attribute1 = """ +--- +entangled: + version: "2.4" + style: basic +--- + +```python +#| file: 3 +``` +""".strip() + + +octal_mode_attribute2 = """ +--- +entangled: + version: "2.4" + style: basic +--- + +Note, the mode here is given in octal, and the YAML reader understands this, so this is +supported. + +```python +#| file: hello.py +#| mode: 0755 +#!/usr/bin/env python +print("Hello, World!") +``` +""".strip() + + +octal_mode_attribute1 = """ +``` {.python file=hello.py mode=0755} +print("Hello, World!") +``` +""".strip() + +wrongly_typed_mode_attribute = """ +--- +entangled: + version: "2.4" + style: basic +--- + +```python +#| file: hello.py +#| mode: true +print("Hello, World!") +``` +""".strip() + + +def test_file_attribute_type(): + refs = ReferenceMap() + + with pytest.raises(CodeAttributeError): + _ = run_reader(partial(markdown, Config(), refs), wrongly_typed_attribute1, "a.md") + + with pytest.raises(CodeAttributeError): + _ = run_reader(partial(markdown, Config(), refs), wrongly_typed_mode_attribute, "a.md") + ref = ReferenceId(ReferenceName((), "hello.py"), PurePath("a.md"), 0) + print(refs[ref]) + + for md in [octal_mode_attribute1, octal_mode_attribute2]: + _ = run_reader(partial(markdown, Config(), refs), md, "hello.md") + ref = ReferenceId(ReferenceName((), "hello.py"), PurePath("hello.md"), 0) + assert ref in refs + assert refs[ref].mode == 0o755 + + +unknown_language = """ +``` {.brainfuck #hello-world} +>++++++++[<+++++++++>-]<.>++++[<+++++++>-]<+.+++++++..+++.>>++++++[<+++++++>-]<+ ++.------------.>++++++[<+++++++++>-]<+.<.+++.------.--------.>>>++++[<++++++++>- +]<+. +``` +""".strip() + + +def test_unknown_language(caplog): + refs = ReferenceMap() + + with caplog.at_level(logging.WARNING): + _ = run_reader(partial(markdown, Config(), refs), unknown_language, "a.md") + assert "language `brainfuck` unknown" in caplog.text + + diff --git a/test/readers/test_text_location.py b/test/readers/test_text_location.py index 81ad6b9..65d7a01 100644 --- a/test/readers/test_text_location.py +++ b/test/readers/test_text_location.py @@ -1,5 +1,5 @@ from pathlib import PurePath -from entangled.readers.text_location import TextLocation +from entangled.text_location import TextLocation def test_text_location(): diff --git a/test/readers/test_yaml_header.py b/test/readers/test_yaml_header.py index 10d64c6..1c0cca5 100644 --- a/test/readers/test_yaml_header.py +++ b/test/readers/test_yaml_header.py @@ -2,9 +2,9 @@ import logging import pytest -from entangled.readers.yaml_header import read_yaml_header +from entangled.readers.yaml_header import read_yaml_header, get_config from entangled.readers.lines import numbered_lines -from entangled.errors.user import ParseError +from entangled.errors.user import ParseError, UserError from entangled.readers.types import MarkdownStream @@ -40,6 +40,18 @@ --- """ +input_not_an_object = """ +--- +[1, 2, 3] +--- +""".strip() + +input_invalid = """ +--- +entangled: + no_version_given: 0 +--- +""".strip() def get_yaml_header(input: str) -> object: path = PurePath("-") @@ -58,6 +70,7 @@ def run(): return result + def test_read_yaml_header(): assert get_yaml_header(input_correct) == { "title": "hello" } assert get_yaml_header(input_no_header) is None @@ -68,3 +81,10 @@ def test_read_yaml_header(): with pytest.raises(ParseError): _ = get_yaml_header(input_non_terminating) + + with pytest.raises(UserError): + _ = get_config(get_yaml_header(input_not_an_object)) + + with pytest.raises(UserError): + _ = get_config(get_yaml_header(input_invalid)) + From 832a6f71bdafa533b7b22d8fc4efe98c71c858ce Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 6 Nov 2025 16:35:20 +0100 Subject: [PATCH 16/48] create architecture.md; increase test coverage --- Makefile | 13 ++ architecture.md | 151 ++++++++++++++++++ entangled/errors/user.py | 2 +- entangled/hooks/quarto_attributes.py | 6 +- entangled/iterators/__init__.py | 6 + entangled/{readers => iterators}/lines.py | 1 + entangled/{readers => iterators}/peekable.py | 0 entangled/iterators/run_generator.py | 14 ++ entangled/model/code_block.py | 15 +- entangled/model/reference_name.py | 5 +- entangled/model/tangle.py | 22 +-- entangled/readers/__init__.py | 13 ++ entangled/readers/markdown.py | 6 +- entangled/readers/types.py | 13 +- test/{readers => iterators}/test_lines.py | 3 +- .../test_peekable.py} | 2 +- test/iterators/test_run_generator.py | 18 +++ test/model/test_code_block.py | 40 ++++- test/model/test_cycles.py | 65 ++++++++ test/model/test_document.py | 76 +++++++++ test/readers/test_markdown.py | 9 +- test/readers/test_yaml_header.py | 3 +- test/test_cycles.py | 63 -------- 23 files changed, 432 insertions(+), 114 deletions(-) create mode 100644 architecture.md create mode 100644 entangled/iterators/__init__.py rename entangled/{readers => iterators}/lines.py (99%) rename entangled/{readers => iterators}/peekable.py (100%) create mode 100644 entangled/iterators/run_generator.py rename test/{readers => iterators}/test_lines.py (86%) rename test/{readers/test_peakable.py => iterators/test_peekable.py} (88%) create mode 100644 test/iterators/test_run_generator.py create mode 100644 test/model/test_cycles.py create mode 100644 test/model/test_document.py delete mode 100644 test/test_cycles.py diff --git a/Makefile b/Makefile index 32bec2c..13367b1 100644 --- a/Makefile +++ b/Makefile @@ -8,3 +8,16 @@ test: docs: uv run mkdocs build + +define test_template = +.PHONY: test-$(1) + +test-$(1): + uv run pytest test/$(1) --cov=entangled/$(1) + uv run coverage xml +endef + +modules = readers io iterators model + +$(foreach mod,$(modules),$(eval $(call test_template,$(mod)))) + diff --git a/architecture.md b/architecture.md new file mode 100644 index 0000000..7b19b84 --- /dev/null +++ b/architecture.md @@ -0,0 +1,151 @@ +Entangled Architecture +====================== + +Entangled is organised into several sub-modules with clearly defined responsibilities: + +- `commands`, all sub-commands for the command line. +- `config`, all data types related to configuring Entangled. +- `hooks`, the hook subsystem. +- `io`, manages file I/O. +- `iterators`, support functions for iterators. +- `model`, the data model for Entangled, also contains the tangler. +- `readers`, reading data into the model. + +Imports in Python need to be acyclical, as follows: + +```mermaid +graph TD; + iterators --> model; + config --> hooks; + config --> commands; + config --> model; + config --> readers; + hooks --> readers; + iterators --> readers; + model --> readers; + readers --> commands; + io --> commands; + hooks --> commands; +``` + +Commands +-------- + +We use `click` to make the command line interface, and `rich` and `rich-click` to make it pretty. Every command is encapsulated in a transaction: + +```python +with transaction() as t: + ... +``` + +This transaction is the front-end for all I/O based operations. Communication with the user is all handled through the `logging` system. + +Note: in the past we used `argh` to parse arguments, but this package doesn't have the same level of support from the community. + +Config +------ + +Config is read from `entangled.toml` using the `msgspec` package. The config is separated into an in-memory representation `Config`, and a loadable structure `ConfigUpdate`. We load an update from `entangled.toml` or from a YAML header at the top of a Markdown file. This `ConfigUpdate` is merged with an existing `Config` using the `|` operator. This way we can stack different layers of configuration on top of each other. We can even have different Markdown dialects between files working together. + +Hooks +----- + +A hook is a class derived from `HookBase`, where you can override the following. A nested `Config` class that can be loaded by `msgspec`: + +```python +class Config(msgspec.Struct): + pass +``` + +An `__init__` method: + +```python +def __init__(self, config: Config): + super().__init__(config) +``` + +The `check_prerequisites` method checks that prerequisites are met. For instance, the build hook can use this to see that GNU Make is available. + +```python +def check_prerequisites(self): + pass +``` + +The `on_read` method is called right after a code block is being read. Example: `quarto_attributes` uses this method to translate the YAML mini header into code block attributes. + +```python +def on_read(self, code: CodeBlock): + pass +``` + +The `pre_tangle` method is run after all the Markdown is read, but before any output is written. Here you can define any additional output targets or modify the reference map in place. + +```python +def pre_tangle(self, refs: ReferenceMap): + pass +``` + +The `on_tangle` method lets you add actions to the I/O transaction. + +```python +def on_tangle(self, t: Transaction, refs: ReferenceMap): + pass +``` + +Lastly, `post_tangle` lets you do clean-up after tangle is complete. I've never used this. + +```python +def post_tangle(self, refs: ReferenceMap): + pass +``` + +Hooks can be used to implement many things that feel to the user like features. + +I/O +--- + +Offers a virtualization layer on top of all file IO. All IO in Entangled is organized into transactions. When conflicts are found that could endanger your data integrity, Entangled will fail to run the entire transaction. For instance, if you have a markdown file called `model.md` which generates a file called `model.py`, and you have edits in both of them, either `entangled tangle` or `entangled stitch` will see that and refuse to overwrite changes, unless you run with `-f/--force`. + A file database is kept containing MD5 hashes of all input files, to check that content hasn't changed without Entangled knowing about it. All input (and their hashes) are cached in `entangled.virtual.FileCache`. + +Iterators +--------- + +Internally, Entangled makes heavy use of generators to read files and process text line-by-line. Because both the `model` and `readers` modules use these operations, they need to be in a separate module. Crucially, this module contains the `Peekable` iterator, which allows us to peek into the future of an iterator by caching a single element. + +Model +----- + +The `model` contains some of the core functionality of Entangled. It defines the in-memory representation of a Markdown document, as well as the graph representing the code blocks and their references. + +- `ReferenceName` contains a `namespace: tuple[str, ...]` and `name: str`, representing a named code entity that may consist of multiple linked code blocks by the same name. +- `ReferenceId` is a unique identifier for every code block. This stores the reference `name`, but also its Markdown source `file` and a `ref_count` for when there are multiple code blocks of the same name. +- `Content` is either `PlainText` which is ignored by Entangled unless stitching, or a `ReferenceId`. +- `CodeBlock` contains all information on a code block including enclosing lines (i.e. the lines containing the three back-tics), its attributes, indentation and the origin of the content. +- `ReferenceMap` fundamentally acts as a `Mapping[ReferenceId, CodeBlock]`, but also contains an index for searching by `ReferenceName` or target file. +- `Document` collects configuration, a dictionary of content and the reference map for ease of use. + +Readers +------- + +Readers are implemented as `Callable[[InputStream], Generator[RawContent, None, T]]`. Here, `RawContent` is a form of `Content` where we're still dealing with `CodeBlock`s directly instead of `ReferenceId`. The third type-argument to `Generator` is kept abstract here. We can use it to pass values from one generator to the other. For instance (a simplified version): + +```python +def read_yaml_header(inp: InputStream) -> Generator[RawContent, None, ConfigUpdate]: + ... + yield plain_text + return config_update + +def read_markdown(inp: InputStream, refs: ReferenceMap) -> Generator[RawContent, None, Config]: + config_update = yield from read_yaml_header(inp) + config = Config() | config_update + yield from rest_of_markdown(config, inp, refs) + return config +``` + +Here we have a `read_yaml_header` reader that emits `PlainText`, but also parses the YAML header into a `ConfigUpdate`. We subsequently use that configuration to determine how to further read the rest of the Markdown file. This way we can completely process a Markdown file in a single pass, buffering only a single line at a time. + +Test Coverage +============= + +Unit tests for each module should cover 100% of that module. The `Makefile` contains test targets for every module that measure only the coverage on that module. + diff --git a/entangled/errors/user.py b/entangled/errors/user.py index 86f0c91..2380a3b 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Callable, override +from typing import Any, override from pathlib import Path from ..text_location import TextLocation diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index 817d629..9443727 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -17,7 +17,7 @@ def split_yaml_header(language: Language, source: str) -> tuple[str, str, object]: """Split source into YAML header and body.""" trigger: str = re.escape(language.comment.open) + r"\s*\|(.*)" - lines = source.splitlines() + lines = source.splitlines(keepends=True) header_lines: list[str] = [] body_start: int = 0 @@ -29,7 +29,7 @@ def split_yaml_header(language: Language, source: str) -> tuple[str, str, object body_start = i break - return "\n".join(lines[:body_start]), "\n".join(lines[body_start:]), yaml.safe_load("\n".join(header_lines)) + return "".join(lines[:body_start]), "".join(lines[body_start:]), yaml.safe_load("".join(header_lines)) def amend_code_properties(code_block: CodeBlock): @@ -71,7 +71,7 @@ def amend_code_properties(code_block: CodeBlock): if k not in ("id", "classes")) code_block.source = body - code_block.open_line += "\n" + header + code_block.open_line += header code_block.properties.extend(props) diff --git a/entangled/iterators/__init__.py b/entangled/iterators/__init__.py new file mode 100644 index 0000000..08d9226 --- /dev/null +++ b/entangled/iterators/__init__.py @@ -0,0 +1,6 @@ +from .lines import lines, numbered_lines +from .peekable import Peekable +from .run_generator import run_generator + +__all__ = ["Peekable", "lines", "numbered_lines", "run_generator"] + diff --git a/entangled/readers/lines.py b/entangled/iterators/lines.py similarity index 99% rename from entangled/readers/lines.py rename to entangled/iterators/lines.py index 9cfa4d3..d86e7ce 100644 --- a/entangled/readers/lines.py +++ b/entangled/iterators/lines.py @@ -2,6 +2,7 @@ from pathlib import PurePath from ..text_location import TextLocation + from .peekable import peekable diff --git a/entangled/readers/peekable.py b/entangled/iterators/peekable.py similarity index 100% rename from entangled/readers/peekable.py rename to entangled/iterators/peekable.py diff --git a/entangled/iterators/run_generator.py b/entangled/iterators/run_generator.py new file mode 100644 index 0000000..1eaf97b --- /dev/null +++ b/entangled/iterators/run_generator.py @@ -0,0 +1,14 @@ +from collections.abc import Generator +from typing import cast + + +def run_generator[O, R](g: Generator[O, None, R]) -> tuple[list[O], R]: + result: R | None = None + + def h() -> Generator[O]: + nonlocal result + result = yield from g + + out = list(h()) + + return out, cast(R, result) diff --git a/entangled/model/code_block.py b/entangled/model/code_block.py index ed375bd..e73619f 100644 --- a/entangled/model/code_block.py +++ b/entangled/model/code_block.py @@ -3,17 +3,22 @@ import os from ..text_location import TextLocation -from ..readers.lines import lines +from ..iterators.lines import lines from ..config.language import Language from .properties import Property def indent(prefix: str, text: str) -> str: def indent_line(line: str): - if line.strip() == "" and line.endswith(os.linesep): - return os.linesep - if line.strip() == "": - return "" + if line.endswith(os.linesep): + eol = os.linesep + else: + eol = "" + if line.strip() == "": + return "" + + if (prefix + line).strip() == "": + return eol return prefix + line diff --git a/entangled/model/reference_name.py b/entangled/model/reference_name.py index 62557b6..4f270cf 100644 --- a/entangled/model/reference_name.py +++ b/entangled/model/reference_name.py @@ -18,7 +18,10 @@ def __hash__(self) -> int: @override def __str__(self): - return "::".join(self.namespace) + "::" + self.name + if self.namespace: + return "::".join(self.namespace) + "::" + self.name + else: + return self.name @staticmethod def from_str(name: str, namespace: tuple[str, ...] = ()) -> ReferenceName: diff --git a/entangled/model/tangle.py b/entangled/model/tangle.py index 254e709..efa0d80 100644 --- a/entangled/model/tangle.py +++ b/entangled/model/tangle.py @@ -9,7 +9,7 @@ from ..config import AnnotationMethod -from ..readers.lines import lines +from ..iterators.lines import lines from ..errors.user import UserError from ..text_location import TextLocation @@ -58,7 +58,7 @@ def in_order(self) -> list[T]: @contextmanager def visit(self, x: T): if x in self._visited: - raise CyclicReference(str(x), list(map(str, self.in_order()))) + raise CyclicReference(str(x), [str(r.name) for r in self.in_order()]) self._visited[x] = len(self._visited) yield del self._visited[x] @@ -84,16 +84,16 @@ def tangler( if code_block.header and not skip_header: yield code_block.header - for line in lines(code_block.source): - if m := re.match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$", line.rstrip()): - ref_name = ReferenceName.from_str(m["refname"], code_block.namespace) - if not refs.has_name(ref_name): - raise MissingReference(code_block.origin, ref_name) - for ref in refs.select_by_name(ref_name): - with visitor.visit(ref): + with visitor.visit(ref): + for line in lines(code_block.source): + if m := re.match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$", line.rstrip()): + ref_name = ReferenceName.from_str(m["refname"], code_block.namespace) + if not refs.has_name(ref_name): + raise MissingReference(code_block.origin, ref_name) + for ref in refs.select_by_name(ref_name): yield from indent(m["indent"], recur(recur, deps, ref, False)) - else: - yield line + else: + yield line return tangler diff --git a/entangled/readers/__init__.py b/entangled/readers/__init__.py index e69de29..1aada28 100644 --- a/entangled/readers/__init__.py +++ b/entangled/readers/__init__.py @@ -0,0 +1,13 @@ +from pathlib import PurePath + +from .markdown import markdown +from ..iterators import numbered_lines, run_generator +from .types import Reader + + +def run_reader[O, T](reader: Reader[O, T], inp: str, filename: str = "-") -> tuple[list[O], T]: + return run_generator(reader(numbered_lines(PurePath(filename), inp))) + + +__all__ = ["markdown", "run_reader"] + diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index c2d55b6..ac98eb7 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -4,16 +4,16 @@ from ..config.namespace_default import NamespaceDefault from ..model import CodeBlock, Content, RawContent, PlainText, ReferenceId, ReferenceMap, ReferenceName +from ..model.properties import get_attribute_string, get_attribute, read_properties, get_classes, get_id from ..config import Config from ..errors.user import CodeAttributeError, IndentationError -from ..model.properties import get_attribute_string, get_attribute, read_properties, get_classes, get_id from ..utility import first from ..hooks import get_hooks, HookBase +from ..iterators.lines import lines +from ..text_location import TextLocation from .types import InputStream, Reader, RawMarkdownStream -from .lines import lines from .delimiters import delimited_token_getter -from ..text_location import TextLocation from .yaml_header import read_yaml_header, get_config import re diff --git a/entangled/readers/types.py b/entangled/readers/types.py index 9bdc451..4d4e76a 100644 --- a/entangled/readers/types.py +++ b/entangled/readers/types.py @@ -1,8 +1,7 @@ from collections.abc import Callable, Generator -from typing import cast from ..text_location import TextLocation -from .peekable import Peekable +from ..iterators.peekable import Peekable from ..model import Content, RawContent @@ -13,13 +12,3 @@ type MarkdownStream[Result] = Generator[Content, None, Result] -def run_generator[O, R](g: Generator[O, None, R]) -> tuple[list[O], R]: - result: R | None = None - - def h() -> Generator[O]: - nonlocal result - result = yield from g - - out = list(h()) - - return out, cast(R, result) diff --git a/test/readers/test_lines.py b/test/iterators/test_lines.py similarity index 86% rename from test/readers/test_lines.py rename to test/iterators/test_lines.py index e74bc13..8f98f67 100644 --- a/test/readers/test_lines.py +++ b/test/iterators/test_lines.py @@ -1,6 +1,5 @@ from pathlib import PurePath -from entangled.readers.lines import lines, numbered_lines -from entangled.readers.peekable import Peekable +from entangled.iterators import lines, numbered_lines, Peekable from entangled.text_location import TextLocation diff --git a/test/readers/test_peakable.py b/test/iterators/test_peekable.py similarity index 88% rename from test/readers/test_peakable.py rename to test/iterators/test_peekable.py index f9897ad..beb5c77 100644 --- a/test/readers/test_peakable.py +++ b/test/iterators/test_peekable.py @@ -1,6 +1,6 @@ from collections.abc import Generator import pytest -from entangled.readers.peekable import Peekable, peekable +from entangled.iterators.peekable import Peekable, peekable def test_peekable_class(): diff --git a/test/iterators/test_run_generator.py b/test/iterators/test_run_generator.py new file mode 100644 index 0000000..c1ef4fc --- /dev/null +++ b/test/iterators/test_run_generator.py @@ -0,0 +1,18 @@ +from entangled.iterators import run_generator + + +def g(): + yield 1 + yield 2 + return 3 + + +def h(): + return 1 + yield 0 + + +def test_run_generator(): + assert run_generator(g()) == ([1, 2], 3) + assert run_generator(h()) == ([], 1) + diff --git a/test/model/test_code_block.py b/test/model/test_code_block.py index 0b5a625..78223ee 100644 --- a/test/model/test_code_block.py +++ b/test/model/test_code_block.py @@ -3,13 +3,32 @@ from entangled.model.code_block import CodeBlock from entangled.text_location import TextLocation +from os import linesep as eol + + +expected_2 = """ ++``` ++hello ++ ++goodbye ++``` +""".lstrip() + + +expected_3 = """ ``` + hello + + goodbye + ``` +""" + def test_code_block(): cb = CodeBlock( properties=[], - open_line="```\n", - close_line="```\n", - source="hello\n", + open_line=f"```{eol}", + close_line=f"```{eol}", + source=f"hello{eol}", indent="> ", origin=TextLocation(PurePath("-"), 1) ) @@ -17,3 +36,18 @@ def test_code_block(): expected = """```\nhello\n```\n""" assert cb.text == expected assert cb.indented_text == indent(expected.rstrip(), "> ") + "\n" + + cb = CodeBlock( + properties=[], + open_line=f"```{eol}", + close_line=f"```{eol}", + source=f"hello{eol}{eol}goodbye{eol}", + indent="+", + origin=TextLocation(PurePath("-"), 1) + ) + assert cb.indented_text == expected_2 + + cb.indent = " " + assert cb.indented_text == expected_3 + + diff --git a/test/model/test_cycles.py b/test/model/test_cycles.py new file mode 100644 index 0000000..06a9779 --- /dev/null +++ b/test/model/test_cycles.py @@ -0,0 +1,65 @@ +from functools import partial +import pytest + +from entangled.model import ReferenceMap, ReferenceName +from entangled.model.tangle import tangle_ref, CyclicReference +from entangled.readers import markdown, run_reader +from entangled.config import Config, AnnotationMethod + + +md_source = """ +This should raise a `CyclicReference` error. + +``` {.python #hello} +<> +``` + +So should this: + +``` {.python #phobos} +<> +``` + +``` {.python #deimos} +<> +``` + +also when tangling from something else: + +``` {.python #mars} +<> +``` + +What should not throw an error is doubling a reference: + +``` {.python #helium} +<> +<> +``` + +``` {.python #electron} +negative charge +``` +""" + + + +@pytest.mark.timeout(5) +def test_cycles(): + refs = ReferenceMap() + _ = run_reader(partial(markdown, Config(), refs), md_source) + + with pytest.raises(CyclicReference): + _ = tangle_ref(refs, ReferenceName((), "hello"), AnnotationMethod.NAKED) + + with pytest.raises(CyclicReference): + _ = tangle_ref(refs, ReferenceName((), "phobos"), AnnotationMethod.NAKED) + + try: + _ = tangle_ref(refs, ReferenceName((), "mars"), AnnotationMethod.NAKED) + except CyclicReference as e: + assert e.cycle == ["mars", "phobos", "deimos"] + + result, _ = tangle_ref(refs, ReferenceName((), "helium"), AnnotationMethod.NAKED) + assert result == "negative charge\nnegative charge\n" + diff --git a/test/model/test_document.py b/test/model/test_document.py new file mode 100644 index 0000000..f94e4d3 --- /dev/null +++ b/test/model/test_document.py @@ -0,0 +1,76 @@ +from functools import partial +from pathlib import PurePath + +from entangled.config import Config, ConfigUpdate, AnnotationMethod +from entangled.model import Document, ReferenceMap +from entangled.readers import markdown, run_reader + + +md_source = """ +--- +entangled: + version: "2.4" + style: basic + annotation: naked +--- + +This is a basic example of an Entangled document. We'll compute Fibonacci numbers +in Haskell! + +```haskell +-- | id: fibonacci +fibonacci :: Int -> Int -> [Int] +fibonacci a b = a : fibonacci b (a + b) +``` + +The ability to write lazy expressions like these is unparalelled in other languages. + +```haskell +-- | file: fib.hs +<> + +main :: IO () +main = putStrLn $ show $ take 20 $ fibonacci 1 1 +``` + +Enjoy! +""".lstrip() + + +hs_tgt = """ +fibonacci :: Int -> Int -> [Int] +fibonacci a b = a : fibonacci b (a + b) + +main :: IO () +main = putStrLn $ show $ take 20 $ fibonacci 1 1 +""".lstrip() + + +hs_tgt_annotated = """ +-- ~/~ begin <>[0] +-- ~/~ begin <>[0] +fibonacci :: Int -> Int -> [Int] +fibonacci a b = a : fibonacci b (a + b) +-- ~/~ end + +main :: IO () +main = putStrLn $ show $ take 20 $ fibonacci 1 1 +-- ~/~ end +""".lstrip() + + +def test_document(): + refs = ReferenceMap() + path = PurePath("fib.md") + content, config = run_reader(partial(markdown, Config(), refs), md_source, path.as_posix()) + + doc = Document(config, refs, { path: content }) + assert doc.source_text(path) == md_source + + fib_hs, _ = doc.target_text(PurePath("fib.hs")) + assert fib_hs == hs_tgt + + doc.config |= ConfigUpdate(version="2.4", annotation=AnnotationMethod.STANDARD) + fib_hs, _ = doc.target_text(PurePath("fib.hs")) + assert fib_hs == hs_tgt_annotated + diff --git a/test/readers/test_markdown.py b/test/readers/test_markdown.py index 301afad..08e8b47 100644 --- a/test/readers/test_markdown.py +++ b/test/readers/test_markdown.py @@ -9,10 +9,9 @@ from entangled.model import Document, PlainText, CodeBlock, ReferenceId, ReferenceMap, ReferenceName from entangled.model.tangle import tangle_ref from entangled.readers.markdown import code_block, collect_plain_text, ignore_block, markdown, raw_markdown -from entangled.readers.lines import numbered_lines -from entangled.readers.peekable import Peekable +from entangled.iterators import Peekable, run_generator from entangled.config import AnnotationMethod, Config, ConfigUpdate -from entangled.readers.types import run_generator, Reader +from entangled.readers import run_reader empty_stream = Peekable(iter([])) @@ -32,10 +31,6 @@ """.strip() -def run_reader[O, T](reader: Reader[O, T], inp: str, filename: str = "-") -> tuple[list[O], T]: - return run_generator(reader(numbered_lines(PurePath(filename), inp))) - - def test_ignore_block(): ol, _ = run_generator(ignore_block(Config())(empty_stream)) assert not ol diff --git a/test/readers/test_yaml_header.py b/test/readers/test_yaml_header.py index 1c0cca5..7b6e694 100644 --- a/test/readers/test_yaml_header.py +++ b/test/readers/test_yaml_header.py @@ -1,9 +1,8 @@ from pathlib import PurePath -import logging import pytest from entangled.readers.yaml_header import read_yaml_header, get_config -from entangled.readers.lines import numbered_lines +from entangled.iterators import numbered_lines from entangled.errors.user import ParseError, UserError from entangled.readers.types import MarkdownStream diff --git a/test/test_cycles.py b/test/test_cycles.py deleted file mode 100644 index c686bd4..0000000 --- a/test/test_cycles.py +++ /dev/null @@ -1,63 +0,0 @@ -from pathlib import Path -import pytest - -from entangled.tangle import tangle_ref -from entangled.markdown_reader import read_markdown_string -from entangled.errors.user import CyclicReference -from entangled.config import AnnotationMethod - - -md_source = """ -This should raise a `CyclicReference` error. - -``` {.python #hello} -<> -``` - -So should this: - -``` {.python #phobos} -<> -``` - -``` {.python #deimos} -<> -``` - -also when tangling from something else: - -``` {.python #mars} -<> -``` - -What should not throw an error is doubling a reference: - -``` {.python #helium} -<> -<> -``` - -``` {.python #electron} -negative charge -``` -""" - - -@pytest.mark.timeout(5) -def test_cycles(): - refs, _ = read_markdown_string(md_source) - - with pytest.raises(CyclicReference): - tangle_ref(refs, "hello") - - with pytest.raises(CyclicReference): - result, _ = tangle_ref(refs, "phobos") - print(result) - - try: - tangle_ref(refs, "mars") - except CyclicReference as e: - assert e.cycle == ["mars", "phobos", "deimos"] - - result, _ = tangle_ref(refs, "helium", AnnotationMethod.NAKED) - assert result == "negative charge\nnegative charge" From 206a9f62bcafcec15a912cdce85e55b6c18391d1 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Fri, 7 Nov 2025 09:53:18 +0100 Subject: [PATCH 17/48] move document to interface module; start work on code reader --- Makefile | 7 +++- architecture.md | 2 +- entangled/hooks/quarto_attributes.py | 5 ++- entangled/interface/__init__.py | 3 ++ entangled/{model => interface}/document.py | 14 +------ entangled/model/__init__.py | 5 ++- entangled/model/content.py | 14 +++++++ entangled/readers/code.py | 46 ++++++++++++++++++++++ test/{model => interface}/test_document.py | 3 +- test/readers/test_markdown.py | 4 +- 10 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 entangled/interface/__init__.py rename entangled/{model => interface}/document.py (79%) create mode 100644 entangled/model/content.py create mode 100644 entangled/readers/code.py rename test/{model => interface}/test_document.py (95%) diff --git a/Makefile b/Makefile index 13367b1..91ba164 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,12 @@ test-$(1): uv run coverage xml endef -modules = readers io iterators model +modules = readers io iterators model interface $(foreach mod,$(modules),$(eval $(call test_template,$(mod)))) +.PHONY: test-modules + +test-modules: + uv run pytest $(modules:%=test/%) --cov=entangled -x + diff --git a/architecture.md b/architecture.md index 7b19b84..7e3100e 100644 --- a/architecture.md +++ b/architecture.md @@ -147,5 +147,5 @@ Here we have a `read_yaml_header` reader that emits `PlainText`, but also parses Test Coverage ============= -Unit tests for each module should cover 100% of that module. The `Makefile` contains test targets for every module that measure only the coverage on that module. +Unit tests for each module should cover most of that module. The `Makefile` contains test targets for every module that measure only the coverage on that module. diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index 9443727..fa897df 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -2,6 +2,7 @@ import re from typing import final, override, cast import yaml +from os import linesep as eol from entangled.config.language import Language @@ -29,7 +30,9 @@ def split_yaml_header(language: Language, source: str) -> tuple[str, str, object body_start = i break - return "".join(lines[:body_start]), "".join(lines[body_start:]), yaml.safe_load("".join(header_lines)) + return "".join(lines[:body_start]), \ + "".join(lines[body_start:]), \ + yaml.safe_load(eol.join(header_lines)) def amend_code_properties(code_block: CodeBlock): diff --git a/entangled/interface/__init__.py b/entangled/interface/__init__.py new file mode 100644 index 0000000..fd289e2 --- /dev/null +++ b/entangled/interface/__init__.py @@ -0,0 +1,3 @@ +from .document import Document + +__all__ = ["Document"] diff --git a/entangled/model/document.py b/entangled/interface/document.py similarity index 79% rename from entangled/model/document.py rename to entangled/interface/document.py index e06c91f..c0f8c02 100644 --- a/entangled/model/document.py +++ b/entangled/interface/document.py @@ -2,19 +2,7 @@ from pathlib import PurePath from ..config.config_data import Config -from .reference_id import ReferenceId -from .reference_map import ReferenceMap -from .code_block import CodeBlock -from .tangle import tangle_ref - - -@dataclass -class PlainText: - content: str - - -type RawContent = PlainText | CodeBlock -type Content = PlainText | ReferenceId +from ..model import ReferenceMap, ReferenceId, tangle_ref, PlainText, Content def content_to_text(r: ReferenceMap, c: Content) -> str: diff --git a/entangled/model/__init__.py b/entangled/model/__init__.py index 0c52814..938a521 100644 --- a/entangled/model/__init__.py +++ b/entangled/model/__init__.py @@ -1,11 +1,11 @@ -from .document import Document, PlainText, Content, RawContent +from .content import PlainText, Content, RawContent from .code_block import CodeBlock from .reference_name import ReferenceName from .reference_id import ReferenceId from .reference_map import ReferenceMap +from .tangle import tangle_ref __all__ = [ - "Document", "PlainText", "Content", "RawContent", @@ -13,4 +13,5 @@ "ReferenceName", "ReferenceId", "ReferenceMap", + "tangle_ref" ] diff --git a/entangled/model/content.py b/entangled/model/content.py new file mode 100644 index 0000000..ba98c77 --- /dev/null +++ b/entangled/model/content.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass +from .code_block import CodeBlock +from .reference_id import ReferenceId + + +@dataclass +class PlainText: + content: str + + +type RawContent = PlainText | CodeBlock +type Content = PlainText | ReferenceId + + diff --git a/entangled/readers/code.py b/entangled/readers/code.py new file mode 100644 index 0000000..654c75c --- /dev/null +++ b/entangled/readers/code.py @@ -0,0 +1,46 @@ +from collections.abc import Generator, Mapping +from dataclasses import dataclass +from pathlib import PurePath +from os import linesep as eol + +import re + +from .types import InputStream +from ..model import ReferenceId, ReferenceName + + +@dataclass +class Block: + reference_id: ReferenceId + indent: str + content: str + + +OPEN_BLOCK_EXPR = r"^(?P\s*).* ~/~ begin <<(?P[^#<>]+)#(?P[^#<>]+)>>\[(?P\d+)\]" +CLOSE_BLOCK_EXPR = r"^(?P\s*).* ~/~ end" + + + +def read_content(namespace_map: Mapping[PurePath, tuple[str, ...]], block: Block, input: InputStream) -> Generator[Block]: + + for _, line in input: + if m := re.match(CLOSE_BLOCK_EXPR, line): + assert m["indent"] == block.indent + yield block + return + + elif m := re.match(OPEN_BLOCK_EXPR, line): + assert m["indent"] <= block.indent + ref_name = ReferenceName.from_str(m["ref_name"]) + md_source = PurePath(m["source"]) + ref_count = int(m["ref_count"]) + if ref_name != block.ref_name: + namespace = namespace_map[md_source] + ref_str = ref_name.name if ref_name.namespace == namespace else str(ref_name) + block.content += m["indent"].removeprefix(block.indent) + "<<" + ref_str + ">>" + eol + new_block = Block(ReferenceId(ref_name, md_source, ref_count), m["indent"], "") + yield from read_content(namespace_map, new_block, input) + + else: + block.content += line.removeprefix(block.indent) + diff --git a/test/model/test_document.py b/test/interface/test_document.py similarity index 95% rename from test/model/test_document.py rename to test/interface/test_document.py index f94e4d3..34b901d 100644 --- a/test/model/test_document.py +++ b/test/interface/test_document.py @@ -2,8 +2,9 @@ from pathlib import PurePath from entangled.config import Config, ConfigUpdate, AnnotationMethod -from entangled.model import Document, ReferenceMap +from entangled.model import ReferenceMap from entangled.readers import markdown, run_reader +from entangled.interface import Document md_source = """ diff --git a/test/readers/test_markdown.py b/test/readers/test_markdown.py index 08e8b47..0627494 100644 --- a/test/readers/test_markdown.py +++ b/test/readers/test_markdown.py @@ -6,8 +6,8 @@ from entangled.config.namespace_default import NamespaceDefault from entangled.errors.user import ParseError, IndentationError, CodeAttributeError -from entangled.model import Document, PlainText, CodeBlock, ReferenceId, ReferenceMap, ReferenceName -from entangled.model.tangle import tangle_ref +from entangled.interface import Document +from entangled.model import PlainText, CodeBlock, ReferenceId, ReferenceMap, ReferenceName, tangle_ref from entangled.readers.markdown import code_block, collect_plain_text, ignore_block, markdown, raw_markdown from entangled.iterators import Peekable, run_generator from entangled.config import AnnotationMethod, Config, ConfigUpdate From 91f844e90d64f09e6b0e75ace71cd201dbb8b9f9 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sun, 9 Nov 2025 16:13:54 +0100 Subject: [PATCH 18/48] reimplement code reader and tests --- entangled/model/reference_id.py | 5 ++ entangled/model/tangle.py | 26 +++--- entangled/readers/code.py | 96 ++++++++++++++++----- pyproject.toml | 2 + test/interface/test_document.py | 4 +- test/model/test_cycles.py | 2 +- test/readers/test_code.py | 144 ++++++++++++++++++++++++++++++++ uv.lock | 20 ++++- 8 files changed, 265 insertions(+), 34 deletions(-) create mode 100644 test/readers/test_code.py diff --git a/entangled/model/reference_id.py b/entangled/model/reference_id.py index 7675868..397c7cd 100644 --- a/entangled/model/reference_id.py +++ b/entangled/model/reference_id.py @@ -24,3 +24,8 @@ class ReferenceId: @override def __hash__(self) -> int: return hash((self.name, self.file, self.ref_count)) + + @override + def __str__(self) -> str: + return f"{self.name}[{self.ref_count}]" + diff --git a/entangled/model/tangle.py b/entangled/model/tangle.py index efa0d80..a84669a 100644 --- a/entangled/model/tangle.py +++ b/entangled/model/tangle.py @@ -58,14 +58,14 @@ def in_order(self) -> list[T]: @contextmanager def visit(self, x: T): if x in self._visited: - raise CyclicReference(str(x), [str(r.name) for r in self.in_order()]) + raise CyclicReference(str(x), [str(r) for r in self.in_order()]) self._visited[x] = len(self._visited) yield del self._visited[x] type Deps = set[PurePath] -type Tangler = Callable[[Tangler, Deps, ReferenceId, bool], Iterator[str]] +type Tangler = Callable[[Tangler, Deps, ReferenceId, bool, bool], Iterator[str]] def indent(prefix: str, g: Iterator[str]) -> Iterator[str]: @@ -76,7 +76,7 @@ def naked_tangler(refs: ReferenceMap) -> Tangler: visitor: Visitor[ReferenceId] = Visitor() def tangler( - recur: Tangler, deps: set[PurePath], ref: ReferenceId, skip_header: bool + recur: Tangler, deps: set[PurePath], ref: ReferenceId, skip_header: bool, _: bool ) -> Generator[str]: code_block = refs[ref] deps.add(code_block.origin.filename) @@ -90,8 +90,10 @@ def tangler( ref_name = ReferenceName.from_str(m["refname"], code_block.namespace) if not refs.has_name(ref_name): raise MissingReference(code_block.origin, ref_name) - for ref in refs.select_by_name(ref_name): - yield from indent(m["indent"], recur(recur, deps, ref, False)) + ref_lst = refs.select_by_name(ref_name) + yield from indent(m["indent"], recur(recur, deps, ref_lst[0], False, True)) + for ref in ref_lst[1:]: + yield from indent(m["indent"], recur(recur, deps, ref, False, False)) else: yield line @@ -102,7 +104,7 @@ def annotated_tangler(refs: ReferenceMap) -> Tangler: naked = naked_tangler(refs) def tangler( - recur: Tangler, deps: set[PurePath], ref: ReferenceId, skip_header: bool + recur: Tangler, deps: set[PurePath], ref: ReferenceId, skip_header: bool, first: bool ) -> Generator[str]: code_block = refs[ref] if code_block.language is None: @@ -116,8 +118,9 @@ def tangler( if code_block.header and not skip_header: yield code_block.header - yield f"{open_comment} ~/~ begin <<{ref.file.as_posix()}#{ref.name}>>[{ref.ref_count}]{close_comment}{os.linesep}" - yield from naked(recur, deps, ref, True) + ref_count_str = "init" if first else str(ref.ref_count) + yield f"{open_comment} ~/~ begin <<{ref.file.as_posix()}#{ref.name}>>[{ref_count_str}]{close_comment}{os.linesep}" + yield from naked(recur, deps, ref, True, first) yield f"{open_comment} ~/~ end{close_comment}{os.linesep}" return tangler @@ -141,8 +144,11 @@ def tangle_ref( deps: set[PurePath] = set() out = "" - for ref in refs.select_by_name(name): - for line in tangler(tangler, deps, ref, False): + ref_lst = refs.select_by_name(name) + for line in tangler(tangler, deps, ref_lst[0], False, True): + out += line + for ref in ref_lst[1:]: + for line in tangler(tangler, deps, ref, False, False): out += line return out, deps diff --git a/entangled/readers/code.py b/entangled/readers/code.py index 654c75c..f8c3b9e 100644 --- a/entangled/readers/code.py +++ b/entangled/readers/code.py @@ -1,4 +1,4 @@ -from collections.abc import Generator, Mapping +from collections.abc import Generator from dataclasses import dataclass from pathlib import PurePath from os import linesep as eol @@ -7,6 +7,7 @@ from .types import InputStream from ..model import ReferenceId, ReferenceName +from ..errors.user import ParseError, IndentationError @dataclass @@ -16,31 +17,86 @@ class Block: content: str -OPEN_BLOCK_EXPR = r"^(?P\s*).* ~/~ begin <<(?P[^#<>]+)#(?P[^#<>]+)>>\[(?P\d+)\]" +OPEN_BLOCK_EXPR = r"^(?P\s*).* ~/~ begin <<(?P[^#<>]+)#(?P[^#<>]+)>>\[(?P\d+|init)\]" + + +@dataclass +class OpenBlockData: + ref: ReferenceId + is_init: bool + indent: str + + +def open_block(line: str) -> OpenBlockData | None: + if not (m := re.match(OPEN_BLOCK_EXPR, line)): + return None + + ref_name = ReferenceName.from_str(m["ref_name"]) + md_source = PurePath(m["source"]) + is_init = m["ref_count"] == "init" + ref_count = 0 if is_init else int(m["ref_count"]) + return OpenBlockData(ReferenceId(ref_name, md_source, ref_count), is_init, m["indent"]) + + CLOSE_BLOCK_EXPR = r"^(?P\s*).* ~/~ end" +@dataclass +class CloseBlockData: + indent: str + + +def close_block(line: str) -> CloseBlockData | None: + if not (m := re.match(CLOSE_BLOCK_EXPR, line)): + return None + return CloseBlockData(m["indent"]) + + +def read_top_level(input: InputStream) -> Generator[Block]: + if not input: + return -def read_content(namespace_map: Mapping[PurePath, tuple[str, ...]], block: Block, input: InputStream) -> Generator[Block]: + while input: + r = yield from read_block((), "", input) + if r is None: + _ = next(input) - for _, line in input: - if m := re.match(CLOSE_BLOCK_EXPR, line): - assert m["indent"] == block.indent - yield block - return - elif m := re.match(OPEN_BLOCK_EXPR, line): - assert m["indent"] <= block.indent - ref_name = ReferenceName.from_str(m["ref_name"]) - md_source = PurePath(m["source"]) - ref_count = int(m["ref_count"]) - if ref_name != block.ref_name: - namespace = namespace_map[md_source] - ref_str = ref_name.name if ref_name.namespace == namespace else str(ref_name) - block.content += m["indent"].removeprefix(block.indent) + "<<" + ref_str + ">>" + eol - new_block = Block(ReferenceId(ref_name, md_source, ref_count), m["indent"], "") - yield from read_content(namespace_map, new_block, input) +def read_block(namespace: tuple[str, ...], indent: str, input: InputStream) -> Generator[Block, None, str | None]: + if not input: + return None + pos, line = input.peek() + if (block_data := open_block(line)) is None: + return None + _ = next(input) + if block_data.indent < indent: + raise IndentationError(pos) + + content = "" + while input: + line = yield from read_block(block_data.ref.name.namespace, block_data.indent, input) + if line is not None: + content += line + continue + + pos, line = next(input) + if (close_block_data := close_block(line)) is None: + if not line.startswith(block_data.indent): + raise IndentationError(pos) + content += line.removeprefix(block_data.indent) else: - block.content += line.removeprefix(block.indent) + if close_block_data.indent != block_data.indent: + raise IndentationError(pos) + yield Block(block_data.ref, block_data.indent, content) + + if block_data.is_init: + extra_indent = block_data.indent.removeprefix(indent) + ref = block_data.ref + ref_str = ref.name if ref.name.namespace == namespace else str(ref.name) + return f"{extra_indent}<<{ref_str}>>{eol}" + else: + return "" + + raise ParseError(pos, "unexpected end of file") diff --git a/pyproject.toml b/pyproject.toml index c2a051a..1324272 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ dependencies = [ "repl-session>=0.2.0", "msgspec>=0.19.0", "watchdog>=6.0.0", + "click>=8.3.0", + "rich-click>=1.9.4", ] [project.urls] diff --git a/test/interface/test_document.py b/test/interface/test_document.py index 34b901d..5f805a0 100644 --- a/test/interface/test_document.py +++ b/test/interface/test_document.py @@ -48,8 +48,8 @@ hs_tgt_annotated = """ --- ~/~ begin <>[0] --- ~/~ begin <>[0] +-- ~/~ begin <>[init] +-- ~/~ begin <>[init] fibonacci :: Int -> Int -> [Int] fibonacci a b = a : fibonacci b (a + b) -- ~/~ end diff --git a/test/model/test_cycles.py b/test/model/test_cycles.py index 06a9779..cb46c8a 100644 --- a/test/model/test_cycles.py +++ b/test/model/test_cycles.py @@ -58,7 +58,7 @@ def test_cycles(): try: _ = tangle_ref(refs, ReferenceName((), "mars"), AnnotationMethod.NAKED) except CyclicReference as e: - assert e.cycle == ["mars", "phobos", "deimos"] + assert e.cycle == ["mars[0]", "phobos[0]", "deimos[0]"] result, _ = tangle_ref(refs, ReferenceName((), "helium"), AnnotationMethod.NAKED) assert result == "negative charge\nnegative charge\n" diff --git a/test/readers/test_code.py b/test/readers/test_code.py new file mode 100644 index 0000000..a01aac6 --- /dev/null +++ b/test/readers/test_code.py @@ -0,0 +1,144 @@ +from pathlib import PurePath + +from entangled.readers.code import read_top_level, read_block +from entangled.readers import run_reader +from entangled.model import ReferenceId, ReferenceName +from entangled.errors.user import IndentationError, ParseError +from entangled.iterators import run_generator, Peekable + +import pytest + + +hs_tgt_annotated = """ +-- ~/~ begin <>[init] +-- ~/~ begin <>[init] +fibonacci :: Int -> Int -> [Int] +fibonacci a b = a : fibonacci b (a + b) +-- ~/~ end + +main :: IO () +main = putStrLn $ show $ take 20 $ fibonacci 1 1 +-- ~/~ end +""".lstrip() + + +block0 = """ +fibonacci :: Int -> Int -> [Int] +fibonacci a b = a : fibonacci b (a + b) +""".lstrip() + + +block1 = """ +<> + +main :: IO () +main = putStrLn $ show $ take 20 $ fibonacci 1 1 +""".lstrip() + + +def test_code_reader(): + blocks, _ = run_reader(read_top_level, hs_tgt_annotated) + assert blocks[0].reference_id == ReferenceId(ReferenceName((), "fibonacci"), PurePath("fib.md"), 0) + assert blocks[0].indent == "" + assert blocks[0].content == block0 + assert blocks[1].reference_id == ReferenceId(ReferenceName((), "fib.hs"), PurePath("fib.md"), 0) + assert blocks[1].content == block1 + + +code2 = """ +# ~/~ begin <>[init] +only this: + # ~/~ begin <>[init] + a1 + # ~/~ end + # ~/~ begin <>[1] + a2 + # ~/~ end + # ~/~ begin <>[init] + a1 + # ~/~ end + # ~/~ begin <>[1] + a2 + # ~/~ end +# ~/~ end +""".lstrip() + + +code2_apy = """ +only this: + <> + <> +""".lstrip() + + +def test_ref_twice(): + blocks, _ = run_reader(read_top_level, code2) + assert blocks[0].reference_id == ReferenceId(ReferenceName((), "a"), PurePath("a.md"), 0) + assert blocks[0].indent == " " + assert blocks[4].content == code2_apy + + +code3 = """ +#!shebang! +# ~/~ begin <>[init] +and this +# ~/~ end +""".lstrip() + + +hasshebang = """ +and this +""".lstrip() + + +def test_header(): + blocks, _ = run_reader(read_top_level, code3) + assert blocks[0].content == hasshebang + + +indent_error1 = """ + # ~/~ begin <>[init] +This is an error + # ~/~ end +""" + +indent_error2 = """ +# ~/~ begin <>[init] +This is an error + # ~/~ end +""" + +indent_error3 = """ + # ~/~ begin <>[init] + This is an error +# ~/~ end +""" + +indent_error4 = """ + # ~/~ begin <>[init] +# ~/~ begin <>[init] +This is an error +# ~/~ end + # ~/~ end +""" + + +def test_indent_errors(): + for inp in [indent_error1, indent_error2, indent_error3, indent_error4]: + with pytest.raises(IndentationError): + _ = run_reader(read_top_level, inp) + + +def test_empty(): + assert run_generator(read_top_level(Peekable(iter([])))) == ([], None) + assert run_generator(read_block((), "", Peekable(iter([])))) == ([], None) + + +eof_error = """ +# ~/~ begin <>[init] +This is an error +""" + +def test_eof(): + with pytest.raises(ParseError): + _ = run_reader(read_top_level, eof_error) diff --git a/uv.lock b/uv.lock index 06bcb23..abfaab4 100644 --- a/uv.lock +++ b/uv.lock @@ -294,6 +294,7 @@ source = { editable = "." } dependencies = [ { name = "argh" }, { name = "brei" }, + { name = "click" }, { name = "copier" }, { name = "filelock" }, { name = "mawk" }, @@ -303,6 +304,7 @@ dependencies = [ { name = "repl-session" }, { name = "rich" }, { name = "rich-argparse" }, + { name = "rich-click" }, { name = "tomlkit" }, { name = "watchdog" }, ] @@ -332,6 +334,7 @@ dev = [ requires-dist = [ { name = "argh", specifier = ">=0.30,<0.31" }, { name = "brei", specifier = ">=0.2.3,<0.3" }, + { name = "click", specifier = ">=8.3.0" }, { name = "copier", specifier = ">=9,<10" }, { name = "filelock", specifier = ">=3.12.0,<4" }, { name = "mawk", specifier = ">=0.1.7,<0.2" }, @@ -341,6 +344,7 @@ requires-dist = [ { name = "repl-session", specifier = ">=0.2.0" }, { name = "rich", specifier = ">=13.3.5,<14" }, { name = "rich-argparse", specifier = ">=1.4.0,<2" }, + { name = "rich-click", specifier = ">=1.9.4" }, { name = "tomlkit", specifier = ">=0.12.1,<0.13" }, { name = "watchdog", specifier = ">=6.0.0" }, ] @@ -708,7 +712,7 @@ wheels = [ [[package]] name = "msgspec" -version = "0.19.1.dev24+gee7c6baac.d20251103" +version = "0.19.1.dev24+gee7c6baac.d20251107" source = { git = "https://github.com/jcrist/msgspec#ee7c6baac6c07c6b6064672278e2f784305b4b43" } [[package]] @@ -1210,6 +1214,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/80/97b6f357ac458d9ad9872cc3183ca09ef7439ac89e030ea43053ba1294b6/rich_argparse-1.7.2-py3-none-any.whl", hash = "sha256:0559b1f47a19bbeb82bf15f95a057f99bcbbc98385532f57937f9fc57acc501a", size = 25476, upload-time = "2025-11-01T10:35:42.681Z" }, ] +[[package]] +name = "rich-click" +version = "1.9.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bf/d8/f2c1b7e9a645ba40f756d7a5b195fc104729bc6b19061ba3ab385f342931/rich_click-1.9.4.tar.gz", hash = "sha256:af73dc68e85f3bebb80ce302a642b9fe3b65f3df0ceb42eb9a27c467c1b678c8", size = 73632, upload-time = "2025-10-25T01:08:49.142Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/6a/1f03adcb3cc7beb6f63aecc21565e9d515ccee653187fc4619cd0b42713b/rich_click-1.9.4-py3-none-any.whl", hash = "sha256:d70f39938bcecaf5543e8750828cbea94ef51853f7d0e174cda1e10543767389", size = 70245, upload-time = "2025-10-25T01:08:47.939Z" }, +] + [[package]] name = "ruff" version = "0.14.3" From d534ee597085861eeb845676aeda8677a685d9d9 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Mon, 10 Nov 2025 23:25:29 +0100 Subject: [PATCH 19/48] reworking commands to use click... (in progress) --- entangled/code_reader.py | 84 ------------------------------- entangled/commands/reset.py | 2 +- entangled/commands/stitch.py | 57 ++++----------------- entangled/commands/sync.py | 57 ++++++++++++--------- entangled/commands/tangle.py | 64 +++++++---------------- entangled/config/__init__.py | 7 +-- entangled/config/version.py | 6 +-- entangled/errors/user.py | 14 +++++- entangled/from_str.py | 27 ---------- entangled/interface/document.py | 76 +++++++++++++++++++--------- entangled/io/transaction.py | 7 +-- entangled/model/__init__.py | 5 +- entangled/model/content.py | 18 +++++++ entangled/model/reference_map.py | 3 ++ entangled/readers/__init__.py | 3 +- entangled/readers/code.py | 3 +- test/model/test_content.py | 15 ++++++ test/model/test_reference_map.py | 58 +++++++++++++++++++++ test/model/test_reference_name.py | 16 ++++++ test/readers/test_code.py | 2 - 20 files changed, 254 insertions(+), 270 deletions(-) delete mode 100644 entangled/code_reader.py delete mode 100644 entangled/from_str.py create mode 100644 test/model/test_content.py create mode 100644 test/model/test_reference_map.py create mode 100644 test/model/test_reference_name.py diff --git a/entangled/code_reader.py b/entangled/code_reader.py deleted file mode 100644 index a0ed021..0000000 --- a/entangled/code_reader.py +++ /dev/null @@ -1,84 +0,0 @@ -from dataclasses import dataclass, field -from pathlib import PurePath - -import mawk -import re - -from .readers.text_location import TextLocation -from .document import ReferenceId, ReferenceMap -from .errors.user import IndentationError - - -@dataclass -class Frame: - ref: ReferenceId - indent: str - content: list[str] = field(default_factory=list) - - -class CodeReader(mawk.RuleSet): - """Reads an annotated code file.""" - - def __init__(self, path: PurePath, refs: ReferenceMap): - self.location: TextLocation = TextLocation(path, 0) - self.stack: list[Frame] = [Frame(ReferenceId("#root#", (), PurePath("-"), -1), "")] - self.refs: ReferenceMap = refs - - @property - def current(self) -> Frame: - return self.stack[-1] - - @mawk.always - def increase_line_number(self, _): - self.location.line_number += 1 - - @mawk.on_match( - r"^(?P\s*).* ~/~ begin <<(?P[^#<>]+)#(?P[^#<>]+)>>\[(?Pinit|\d+)\]" - ) - def on_block_begin(self, m: re.Match[str]) -> list[str]: - full_ref_name = m["ref_name"] - ref_name = full_ref_name.split("::")[-1] - namespace = tuple(full_ref_name.split("::")[:-1]) - - # When there are lines above the first ref, say a shebang, swap - # them into the first block. - if len(self.stack) == 1 and len(self.stack[0].content) > 0: - content = self.stack[0].content - self.stack[0].content = [] - else: - content = [] - - if m["ref_count"] == "init": - ref_count = 0 - if not m["indent"].startswith(self.current.indent): - raise IndentationError(self.location) - indent = m["indent"].removeprefix(self.current.indent) - self.current.content.append(f"{indent}<<{ref_name}>>") - else: - ref_count = int(m["ref_count"]) - - self.stack.append( - Frame( - ReferenceId(m["ref_name"], namespace, PurePath(m["source"]), ref_count), - m["indent"], content - ) - ) - return [] - - @mawk.on_match(r"^(?P\s*).* ~/~ end") - def on_block_end(self, m: re.Match[str]) -> list[str]: - if m["indent"] != self.current.indent: - raise IndentationError(self.location) - self.refs.get_codeblock(self.current.ref).source = "\n".join(self.current.content) - _ = self.stack.pop() - return [] - - @mawk.always - def otherwise(self, line: str) -> list[str]: - if line.strip() == "": - self.current.content.append("") - return [] - if not line.startswith(self.current.indent): - raise IndentationError(self.location) - self.current.content.append(line.removeprefix(self.current.indent)) - return [] diff --git a/entangled/commands/reset.py b/entangled/commands/reset.py index 39374cf..4276bb2 100644 --- a/entangled/commands/reset.py +++ b/entangled/commands/reset.py @@ -9,7 +9,7 @@ from ..io import TransactionMode, transaction from ..config import config, get_input_files from ..hooks import get_hooks -from ..document import ReferenceMap +from ..model import ReferenceMap from ..errors.user import UserError import logging diff --git a/entangled/commands/stitch.py b/entangled/commands/stitch.py index 112a85b..fd8843b 100644 --- a/entangled/commands/stitch.py +++ b/entangled/commands/stitch.py @@ -1,41 +1,15 @@ -from itertools import chain -from pathlib import Path +import click -import logging -import argh # type: ignore - -from ..config import config -from ..document import ReferenceMap, Content, PlainText, ReferenceId +from ..interface import Document from ..io import transaction, TransactionMode from ..errors.user import UserError -from ..config import get_input_files - - -def stitch_markdown(reference_map: ReferenceMap, content: list[Content]) -> str: - def get(item: Content): - match item: - case PlainText(s): - return s - case ReferenceId(): - return reference_map.get_codeblock(item).indented_text - return "\n".join(get(i) for i in content) + "\n" - -@argh.arg("--force", help="force overwrite on conflict") -@argh.arg("-s", "--show", help="only show, don't act") +@click.command() +@click.option("-f", "--force", help="force overwrite on conflict") +@click.option("-s", "--show", help="only show, don't act") def stitch(*, force: bool = False, show: bool = False): """Stitch code changes back into the Markdown""" - config.read() - - # these imports depend on config being read - from ..markdown_reader import read_markdown_file - from ..code_reader import CodeReader - from ..hooks import get_hooks - - input_file_list = get_input_files() - hooks = get_hooks() - if show: mode = TransactionMode.SHOW elif force: @@ -43,22 +17,13 @@ def stitch(*, force: bool = False, show: bool = False): else: mode = TransactionMode.FAIL - refs = ReferenceMap() - content: dict[Path, list[Content]] = {} try: - with transaction(mode) as t: - for path in input_file_list: - logging.debug("reading `%s`", path) - _, c = read_markdown_file(t, path, refs=refs, hooks=hooks) - content[path] = c + doc = Document() - for path in t.db.managed_files: - logging.debug("reading `%s`", path) - t.update(path) - _ = CodeReader(path, refs).run(t.read(path)) - - for path in input_file_list: - t.write(path, stitch_markdown(refs, content[path]), []) + with transaction(mode) as t: + doc.load(t) + doc.load_all_code(t) + doc.stitch(t) except UserError as e: - logging.error(str(e)) + e.handle() diff --git a/entangled/commands/sync.py b/entangled/commands/sync.py index df1a86d..dcb8c2f 100644 --- a/entangled/commands/sync.py +++ b/entangled/commands/sync.py @@ -1,51 +1,62 @@ -from typing import Optional, Callable -from itertools import chain -from pathlib import Path - -import logging +from enum import Enum from ..io import filedb, FileCache -from ..config import config -from .stitch import stitch, get_input_files +from ..interface import Document +from ..errors.user import UserError from .tangle import tangle +from .stitch import stitch +import logging +import click -def _stitch_then_tangle(): - stitch() - tangle() +class Action(Enum): + NOTHING = 0 + TANGLE = 1 + STITCH = 2 -def sync_action() -> Callable[[], None] | None: - input_file_list = get_input_files() + +def sync_action(doc: Document) -> Action: + input_file_list = doc.input_files() fs = FileCache() with filedb(readonly=True) as db: changed = set(db.changed_files(fs)) if not all(f in db for f in input_file_list): - return tangle + return Action.TANGLE if not changed: - return None + return Action.NOTHING if changed.isdisjoint(db.managed_files): - logging.info("Tangling") - return tangle + return Action.TANGLE if changed.issubset(db.managed_files): - logging.info("Stitching") - return _stitch_then_tangle + return Action.STITCH logging.error("changed: %s", [str(p) for p in changed]) logging.error( "Both markdown and code seem to have changed, don't know what to do now." ) - return None + return Action.NOTHING +@click.command() def sync(): """Be smart wether to tangle or stich""" - config.read() - action = sync_action() - if action is not None: - action() + try: + doc = Document() + match sync_action(doc): + case Action.TANGLE: + logging.info("Tangling.") + tangle() + case Action.STITCH: + logging.info("Stitching.") + stitch() + tangle() + case Action.NOTHING: + pass + except UserError as e: + e.handle() + diff --git a/entangled/commands/tangle.py b/entangled/commands/tangle.py index cb0b5ac..82c59d4 100644 --- a/entangled/commands/tangle.py +++ b/entangled/commands/tangle.py @@ -1,74 +1,44 @@ -from pathlib import Path +import click -import argh # type: ignore -import logging - -from ..document import ReferenceMap -from ..config import config, AnnotationMethod, get_input_files +from ..config import AnnotationMethod from ..io import transaction, TransactionMode from ..hooks import get_hooks from ..errors.user import UserError +from ..interface import Document -@argh.arg( - "-a", - "--annotate", - choices=[m.name.lower() for m in AnnotationMethod], - help="annotation method", -) -@argh.arg("--force", help="force overwrite on conflict") -@argh.arg("-s", "--show", help="only show, don't act") -@argh.arg("-r", "--reset-db", help="resets database") -def tangle(*, annotate: str | None = None, force: bool = False, show: bool = False, reset_db: bool = False): +@click.command +@click.option("-a", "--annotate", help="annotation method") +@click.option("-f", "--force", help="force overwriting existing files") +@click.option("-s", "--show", help="only show what would happen") +def tangle(*, annotate: AnnotationMethod | None = None, force: bool = False, show: bool = False): """Tangle codes from Markdown""" - config.read() - - # these imports depend on config being read - from ..markdown_reader import read_markdown_file - from ..tangle import tangle_ref - - if annotate is None: - annotation_method = config.get.annotation - else: - annotation_method = AnnotationMethod[annotate.upper()] - - input_file_list = get_input_files() - - refs = ReferenceMap() - hooks = get_hooks() - logging.debug("tangling with hooks: %s", [h.__module__ for h in hooks]) - if show: mode = TransactionMode.SHOW - elif reset_db: - mode = TransactionMode.RESETDB elif force: mode = TransactionMode.FORCE else: mode = TransactionMode.FAIL try: + doc = Document() + with transaction(mode) as t: - for path in input_file_list: - logging.debug("reading `%s`", path) - t.update(path) - _, _ = read_markdown_file(t, path, refs=refs, hooks=hooks) + doc.load(t) + hooks = get_hooks(doc.config) for h in hooks: - h.pre_tangle(refs) + h.pre_tangle(doc.reference_map) - for tgt in refs.targets: - result, deps = tangle_ref(refs, tgt, annotation_method) - mask = next(iter(refs.by_name(tgt))).mode - t.write(Path(tgt), result, list(map(Path, deps)), mask) + doc.tangle(t, annotate) for h in hooks: - h.on_tangle(t, refs) + h.on_tangle(t, doc.reference_map) t.clear_orphans() for h in hooks: - h.post_tangle(refs) + h.post_tangle(doc.reference_map) except UserError as e: - logging.error(str(e)) + e.handle() diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index d21cbca..c9e6666 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -103,11 +103,12 @@ def get_language(self, lang_name: str) -> Language | None: Config is read from `entangled.toml` file.""" -def get_input_files() -> list[Path]: - include_file_list = chain.from_iterable(map(Path(".").glob, config.get.watch_list)) +def get_input_files(cfg: Config | None = None) -> list[Path]: + cfg = cfg or config.get + include_file_list = chain.from_iterable(map(Path(".").glob, cfg.watch_list)) input_file_list = [ path for path in include_file_list - if not any(path.match(pat) for pat in config.get.ignore_list) + if not any(path.match(pat) for pat in cfg.ignore_list) ] return sorted(input_file_list) diff --git a/entangled/config/version.py b/entangled/config/version.py index 99009ba..115a463 100644 --- a/entangled/config/version.py +++ b/entangled/config/version.py @@ -1,18 +1,14 @@ from __future__ import annotations from dataclasses import dataclass -from ..from_str import FromStr -from typing import override @dataclass(frozen=True) -class Version(FromStr): +class Version: numbers: tuple[int, ...] - @override def to_str(self) -> str: return ".".join(str(i) for i in self.numbers) - @override @classmethod def from_str(cls, s: str) -> Version: return Version(tuple(int(sv) for sv in s.split("."))) diff --git a/entangled/errors/user.py b/entangled/errors/user.py index 2380a3b..f41b2e5 100644 --- a/entangled/errors/user.py +++ b/entangled/errors/user.py @@ -1,7 +1,10 @@ from dataclasses import dataclass -from typing import Any, override +from typing import Any, override, Callable from pathlib import Path +import logging +import sys + from ..text_location import TextLocation @@ -9,6 +12,8 @@ class UserError(Exception): def __str__(self) -> str: return "Unknown user error." + def handle(self): + pass @dataclass class ConfigError(UserError): @@ -23,10 +28,17 @@ def __str__(self): class HelpfulUserError(UserError): """Raise a user error with a message.""" msg: str + action: Callable[[], None] = lambda: None def __str__(self): return f"error: {self.msg}" + @override + def handle(self): + self.action() + logging.error(str(self)) + sys.exit(-1) + @dataclass class FileError(UserError): diff --git a/entangled/from_str.py b/entangled/from_str.py deleted file mode 100644 index dae2437..0000000 --- a/entangled/from_str.py +++ /dev/null @@ -1,27 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Self, Any - - -class FromStr(ABC): - @abstractmethod - def to_str(self) -> str: - ... - - @classmethod - @abstractmethod - def from_str(cls, s: str) -> Self: - ... - - -def enc_hook(obj: Any) -> str: - if isinstance(obj, FromStr): - return obj.to_str() - else: - raise NotImplementedError # (f"Object should implement `FromStr`, got: `{type(obj)}`.") - - -def dec_hook(cls: type, obj: Any) -> Any: - if issubclass(cls, FromStr) and isinstance(obj, str): - return cls.from_str(obj) - else: - raise NotImplementedError(f"Unsupported type, couldn't decode `{obj}` to `{cls}`.") diff --git a/entangled/interface/document.py b/entangled/interface/document.py index c0f8c02..580997e 100644 --- a/entangled/interface/document.py +++ b/entangled/interface/document.py @@ -1,37 +1,67 @@ from dataclasses import dataclass, field -from pathlib import PurePath +from pathlib import PurePath, Path -from ..config.config_data import Config -from ..model import ReferenceMap, ReferenceId, tangle_ref, PlainText, Content - - -def content_to_text(r: ReferenceMap, c: Content) -> str: - """ - Reconstruct original plain text from a piece of content. - - Args: - r: the reference map. - c: the content. - - Returns: - A string, usually not terminated by a newline. - """ - match c: - case PlainText(s): - return s - case ReferenceId(): - return r[c].indented_text +from ..config import Config, get_input_files, read_config, AnnotationMethod +from ..model import ReferenceMap, tangle_ref, Content, content_to_text +from ..io import Transaction +from ..readers import markdown, code +from ..iterators import numbered_lines, run_generator @dataclass class Document: config: Config = Config() reference_map: ReferenceMap = field(default_factory=ReferenceMap) - content: dict[PurePath, list[Content]] = field(default_factory=dict) + content: dict[Path, list[Content]] = field(default_factory=dict) - def source_text(self, path: PurePath) -> str: + def __post_init__(self): + self.load_config() + + def load_config(self): + self.config = read_config() + + def input_files(self): + return get_input_files(self.config) + + def source_text(self, path: Path) -> str: return "".join(content_to_text(self.reference_map, c) for c in self.content[path]) def target_text(self, path: PurePath) -> tuple[str, set[PurePath]]: ref_name = self.reference_map.select_by_target(path) return tangle_ref(self.reference_map, ref_name, self.config.annotation) + + def write_target(self, t: Transaction, path: Path, annotation: AnnotationMethod | None = None): + ref_name = self.reference_map.select_by_target(path) + refs = self.reference_map.select_by_name(ref_name) + main_block = self.reference_map[refs[0]] + annotation = self.config.annotation if annotation is None else annotation + text, deps = tangle_ref(self.reference_map, ref_name, annotation) + t.write(path, text, map(Path, deps), main_block.mode) + + def load_source(self, t: Transaction, path: Path): + reader = markdown(self.config, self.reference_map, numbered_lines(path, t.read(path))) + content, _ = run_generator(reader) + self.content[path] = content + + def load_code(self, t: Transaction, path: Path): + reader = code(numbered_lines(path, t.read(path))) + for block in reader: + self.reference_map[block.reference_id].source = block.content + + def load_all_code(self, t: Transaction): + for tgt in self.reference_map.targets(): + if Path(tgt) in t.fs: + self.load_code(t, Path(tgt)) + + def load(self, t: Transaction): + for p in get_input_files(self.config): + self.load_source(t, p) + + def tangle(self, t: Transaction, annotation: AnnotationMethod | None = None): + for tgt in self.reference_map.targets(): + self.write_target(t, Path(tgt), annotation) + + def stitch(self, t: Transaction): + for path in self.content: + t.write(path, self.source_text(path), []) + diff --git a/entangled/io/transaction.py b/entangled/io/transaction.py index b9b99dc..45168c8 100644 --- a/entangled/io/transaction.py +++ b/entangled/io/transaction.py @@ -1,4 +1,5 @@ from abc import ABCMeta, abstractmethod +from collections.abc import Iterable from dataclasses import dataclass, field from functools import cached_property from pathlib import Path @@ -145,16 +146,16 @@ class Transaction: def update(self, path: Path): self.updates.append(path) - def write(self, path: Path, content: str, sources: list[Path], mode: int | None = None): + def write(self, path: Path, content: str, sources: Iterable[Path], mode: int | None = None): if path in self.passed: raise InternalError("Path is being written to twice", [path]) self.passed.add(path) if path not in self.db: logging.debug("creating target `%s`", path) - self.actions.append(Create(path, content, mode, sources)) + self.actions.append(Create(path, content, mode, list(sources))) elif not self.db.check(path, content): logging.debug("target `%s` changed", path) - self.actions.append(Write(path, content, mode, sources)) + self.actions.append(Write(path, content, mode, list(sources))) else: logging.debug("target `%s` unchanged", path) diff --git a/entangled/model/__init__.py b/entangled/model/__init__.py index 938a521..d74f20e 100644 --- a/entangled/model/__init__.py +++ b/entangled/model/__init__.py @@ -1,4 +1,4 @@ -from .content import PlainText, Content, RawContent +from .content import PlainText, Content, RawContent, content_to_text from .code_block import CodeBlock from .reference_name import ReferenceName from .reference_id import ReferenceId @@ -13,5 +13,6 @@ "ReferenceName", "ReferenceId", "ReferenceMap", - "tangle_ref" + "tangle_ref", + "content_to_text" ] diff --git a/entangled/model/content.py b/entangled/model/content.py index ba98c77..859b382 100644 --- a/entangled/model/content.py +++ b/entangled/model/content.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from .code_block import CodeBlock from .reference_id import ReferenceId +from .reference_map import ReferenceMap @dataclass @@ -12,3 +13,20 @@ class PlainText: type Content = PlainText | ReferenceId +def content_to_text(r: ReferenceMap, c: Content) -> str: + """ + Reconstruct original plain text from a piece of content. + + Args: + r: the reference map. + c: the content. + + Returns: + A string, usually not terminated by a newline. + """ + match c: + case PlainText(s): + return s + case ReferenceId(): + return r[c].indented_text + diff --git a/entangled/model/reference_map.py b/entangled/model/reference_map.py index fde6df3..00f5b8f 100644 --- a/entangled/model/reference_map.py +++ b/entangled/model/reference_map.py @@ -58,6 +58,9 @@ def select_by_target(self, target: PurePath) -> ReferenceName: def register_target(self, target: PurePath, ref_name: ReferenceName): self._targets[target] = ref_name + def targets(self) -> Iterable[PurePath]: + return self._targets.keys() + @override def __contains__(self, key: object) -> bool: return key in self._map diff --git a/entangled/readers/__init__.py b/entangled/readers/__init__.py index 1aada28..fd7316e 100644 --- a/entangled/readers/__init__.py +++ b/entangled/readers/__init__.py @@ -1,6 +1,7 @@ from pathlib import PurePath from .markdown import markdown +from .code import read_top_level as code from ..iterators import numbered_lines, run_generator from .types import Reader @@ -9,5 +10,5 @@ def run_reader[O, T](reader: Reader[O, T], inp: str, filename: str = "-") -> tup return run_generator(reader(numbered_lines(PurePath(filename), inp))) -__all__ = ["markdown", "run_reader"] +__all__ = ["markdown", "code", "run_reader"] diff --git a/entangled/readers/code.py b/entangled/readers/code.py index f8c3b9e..322d05b 100644 --- a/entangled/readers/code.py +++ b/entangled/readers/code.py @@ -13,7 +13,6 @@ @dataclass class Block: reference_id: ReferenceId - indent: str content: str @@ -88,7 +87,7 @@ def read_block(namespace: tuple[str, ...], indent: str, input: InputStream) -> G else: if close_block_data.indent != block_data.indent: raise IndentationError(pos) - yield Block(block_data.ref, block_data.indent, content) + yield Block(block_data.ref, content) if block_data.is_init: extra_indent = block_data.indent.removeprefix(indent) diff --git a/test/model/test_content.py b/test/model/test_content.py new file mode 100644 index 0000000..908f881 --- /dev/null +++ b/test/model/test_content.py @@ -0,0 +1,15 @@ +from entangled.text_location import TextLocation +from entangled.model import ReferenceMap, ReferenceId, ReferenceName, CodeBlock +from entangled.model.content import PlainText, content_to_text + +from pathlib import PurePath + +def test_content_to_text(): + refs = ReferenceMap() + ref = ReferenceId(ReferenceName((), "a"), PurePath("a.md"), 0) + code_block = CodeBlock(properties=[], indent="", open_line="", source="x", close_line="", origin=TextLocation(PurePath(""), 0)) + refs[ref] = code_block + + assert content_to_text(refs, ref) == "x" + assert content_to_text(refs, PlainText("y")) == "y" + diff --git a/test/model/test_reference_map.py b/test/model/test_reference_map.py new file mode 100644 index 0000000..856e42f --- /dev/null +++ b/test/model/test_reference_map.py @@ -0,0 +1,58 @@ +from entangled.model import CodeBlock, ReferenceName, ReferenceId, ReferenceMap +from entangled.model.reference_map import ReferenceMap +from entangled.text_location import TextLocation +from entangled.errors.internal import InternalError +from entangled.model.properties import Attribute + +from pathlib import PurePath + +import pytest + +def mock_code_block() -> CodeBlock: + return CodeBlock(properties=[], indent="", open_line="", close_line="", source="", origin=TextLocation(PurePath("-"), 0)) + +def ref(name: str) -> ReferenceName: + return ReferenceName((), name) + +def test_reference_map(): + refs = ReferenceMap() + assert bool(refs) == False + r1 = refs.new_id(PurePath("x.md"), ref("a")) + cb1 = mock_code_block() + assert r1 not in refs + refs[r1] = cb1 + assert r1 in refs + assert refs[r1] is cb1 + + with pytest.raises(InternalError): + refs[r1] = mock_code_block() + + r2 = refs.new_id(PurePath("x.md"), ref("a")) + refs[r2] = mock_code_block() + assert r2.ref_count == 1 + r3 = refs.new_id(PurePath("y.md"), ref("a")) + refs[r3] = mock_code_block() + assert r3.ref_count == 0 + assert refs.select_by_name(ref("a")) == [r1, r2, r3] + + refs.register_target(PurePath("a.py"), ref("a")) + assert refs.select_by_target(PurePath("a.py")) == ref("a") + + cb2 = mock_code_block() + cb2.properties.append(Attribute("file", "b.py")) + r4 = refs.new_id(PurePath("x.md"), ref("b")) + refs[r4] = cb2 + assert refs.select_by_target(PurePath("b.py")) == ref("b") + + assert len(refs) == 4 + assert set(refs) == { r1, r2, r3, r4 } + + r5 = refs.new_id(PurePath("x.md"), ref("b")) + del refs[r5] + assert len(refs) == 4 + del refs[r4] + assert len(refs) == 3 + assert r4 not in refs + + + diff --git a/test/model/test_reference_name.py b/test/model/test_reference_name.py new file mode 100644 index 0000000..0432745 --- /dev/null +++ b/test/model/test_reference_name.py @@ -0,0 +1,16 @@ +from entangled.model.reference_name import ReferenceName + + +def test_reference_name(): + n1 = ReferenceName.from_str("a") + assert n1.namespace == () + assert n1.name == "a" + assert str(n1) == "a" + + n2 = ReferenceName.from_str("a::b::c") + assert n2.namespace == ("a", "b") + assert n2.name == "c" + assert str(n2) == "a::b::c" + + assert hash(n1) != hash(n2) + diff --git a/test/readers/test_code.py b/test/readers/test_code.py index a01aac6..a21ee82 100644 --- a/test/readers/test_code.py +++ b/test/readers/test_code.py @@ -39,7 +39,6 @@ def test_code_reader(): blocks, _ = run_reader(read_top_level, hs_tgt_annotated) assert blocks[0].reference_id == ReferenceId(ReferenceName((), "fibonacci"), PurePath("fib.md"), 0) - assert blocks[0].indent == "" assert blocks[0].content == block0 assert blocks[1].reference_id == ReferenceId(ReferenceName((), "fib.hs"), PurePath("fib.md"), 0) assert blocks[1].content == block1 @@ -74,7 +73,6 @@ def test_code_reader(): def test_ref_twice(): blocks, _ = run_reader(read_top_level, code2) assert blocks[0].reference_id == ReferenceId(ReferenceName((), "a"), PurePath("a.md"), 0) - assert blocks[0].indent == " " assert blocks[4].content == code2_apy From 566a894d4f17feb62790774cab5cdb7beff19895 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Tue, 11 Nov 2025 10:19:05 +0100 Subject: [PATCH 20/48] create new rich click main interface --- entangled/commands/__init__.py | 16 ++-------------- entangled/commands/main.py | 19 +++++++++++++++++++ entangled/commands/stitch.py | 6 ++++-- entangled/commands/tangle.py | 13 ++++++++----- entangled/hooks/__init__.py | 2 +- entangled/interface/document.py | 2 ++ entangled/main.py | 31 ++++--------------------------- examples/.gitignore | 2 ++ examples/standard/.editorconfig | 4 ++++ examples/standard/docs/index.md | 13 +++++++++++++ examples/standard/prime_sieve.cpp | 23 +++++++++++++++++++++++ 11 files changed, 82 insertions(+), 49 deletions(-) create mode 100644 entangled/commands/main.py create mode 100644 examples/.gitignore create mode 100644 examples/standard/.editorconfig create mode 100644 examples/standard/docs/index.md create mode 100644 examples/standard/prime_sieve.cpp diff --git a/entangled/commands/__init__.py b/entangled/commands/__init__.py index c6f631c..634040a 100644 --- a/entangled/commands/__init__.py +++ b/entangled/commands/__init__.py @@ -1,19 +1,7 @@ -from .new import new -from .status import status +from .main import main from .stitch import stitch -from .sync import sync from .tangle import tangle -from .reset import reset -from .watch import watch -from .brei import brei __all__ = [ - "new", - "brei", - "reset", - "status", - "stitch", - "sync", - "tangle", - "watch", + "main", "stitch", "tangle" ] diff --git a/entangled/commands/main.py b/entangled/commands/main.py new file mode 100644 index 0000000..0431459 --- /dev/null +++ b/entangled/commands/main.py @@ -0,0 +1,19 @@ +from ..logging import configure, logger +from ..version import __version__ + +import sys +import rich_click as click + + +@click.group(invoke_without_command=True) +@click.rich_config({"commands_before_options": True, "theme": "nord-modern"}) +@click.option("-v", "--version", is_flag=True, help="Show version.") +@click.option("-d", "--debug", is_flag=True, help="Enable debugging.") +def main(version: bool = False, debug: bool = False): + if version: + print(f"Entangled {__version__}") + sys.exit(0) + + configure(debug) + logger().debug(f"Welcome to Entangled v{__version__}!") + diff --git a/entangled/commands/stitch.py b/entangled/commands/stitch.py index fd8843b..ccc3c98 100644 --- a/entangled/commands/stitch.py +++ b/entangled/commands/stitch.py @@ -1,11 +1,13 @@ -import click +import rich_click as click + +from .main import main from ..interface import Document from ..io import transaction, TransactionMode from ..errors.user import UserError -@click.command() +@main.command() @click.option("-f", "--force", help="force overwrite on conflict") @click.option("-s", "--show", help="only show, don't act") def stitch(*, force: bool = False, show: bool = False): diff --git a/entangled/commands/tangle.py b/entangled/commands/tangle.py index 82c59d4..efd93fd 100644 --- a/entangled/commands/tangle.py +++ b/entangled/commands/tangle.py @@ -1,4 +1,6 @@ -import click +import rich_click as click + +from .main import main from ..config import AnnotationMethod from ..io import transaction, TransactionMode @@ -7,10 +9,11 @@ from ..interface import Document -@click.command -@click.option("-a", "--annotate", help="annotation method") -@click.option("-f", "--force", help="force overwriting existing files") -@click.option("-s", "--show", help="only show what would happen") +@main.command() +@click.option("-a", "--annotate", type=click.Choice(AnnotationMethod, case_sensitive=False), + help="annotation method") +@click.option("-f", "--force", is_flag=True, help="force overwriting existing files") +@click.option("-s", "--show", is_flag=True, help="only show what would happen") def tangle(*, annotate: AnnotationMethod | None = None, force: bool = False, show: bool = False): """Tangle codes from Markdown""" if show: diff --git a/entangled/hooks/__init__.py b/entangled/hooks/__init__.py index 7c71a7d..309c4d3 100644 --- a/entangled/hooks/__init__.py +++ b/entangled/hooks/__init__.py @@ -32,7 +32,7 @@ def get_hooks(cfg: Config | None = None) -> list[HookBase]: for h in sorted(cfg.hooks, key=lambda h: hooks[h].priority()): if h in hooks | external_hooks: try: - hook_cfg = msgspec.convert(config.get.hook.get(h, {}), type=hooks[h].Config) + hook_cfg = msgspec.convert(cfg.hook.get(h, {}), type=hooks[h].Config) hook_instance = hooks[h](hook_cfg) hook_instance.check_prerequisites() active_hooks.append(hook_instance) diff --git a/entangled/interface/document.py b/entangled/interface/document.py index 580997e..a4ea49f 100644 --- a/entangled/interface/document.py +++ b/entangled/interface/document.py @@ -40,6 +40,7 @@ def write_target(self, t: Transaction, path: Path, annotation: AnnotationMethod def load_source(self, t: Transaction, path: Path): reader = markdown(self.config, self.reference_map, numbered_lines(path, t.read(path))) + t.update(path) content, _ = run_generator(reader) self.content[path] = content @@ -63,5 +64,6 @@ def tangle(self, t: Transaction, annotation: AnnotationMethod | None = None): def stitch(self, t: Transaction): for path in self.content: + t.update(path) t.write(path, self.source_text(path), []) diff --git a/entangled/main.py b/entangled/main.py index 3a9a211..ce290b7 100644 --- a/entangled/main.py +++ b/entangled/main.py @@ -1,45 +1,22 @@ -from .logging import configure, logger +from .logging import logger -import argh # type: ignore import sys import traceback -from rich_argparse import RichHelpFormatter -from .commands import new, status, stitch, sync, tangle, watch, brei, reset +from .commands import main from .errors.internal import bug_contact from .errors.user import HelpfulUserError, UserError from .version import __version__ def cli(): - import argparse - try: - parser = argparse.ArgumentParser(formatter_class=RichHelpFormatter) - _ = parser.add_argument( - "-d", "--debug", action="store_true", help="enable debug messages" - ) - _ = parser.add_argument( - "-v", "--version", action="store_true", help="show version number" - ) - _ = argh.add_commands(parser, [new, brei, reset, status, stitch, sync, tangle, watch], - func_kwargs={"formatter_class": RichHelpFormatter}) - args = parser.parse_args() - - if args.version: - print(f"Entangled {__version__}") - sys.exit(0) - - configure(args.debug) - _ = argh.dispatch(parser) - + main() except KeyboardInterrupt: logger().info("Goodbye") sys.exit(0) except HelpfulUserError as e: - logger().error(e, exc_info=False) - e.func() - sys.exit(0) + e.handle() except UserError as e: logger().error(e, exc_info=False) sys.exit(0) diff --git a/examples/.gitignore b/examples/.gitignore new file mode 100644 index 0000000..efb682c --- /dev/null +++ b/examples/.gitignore @@ -0,0 +1,2 @@ +**/.entangled/* + diff --git a/examples/standard/.editorconfig b/examples/standard/.editorconfig new file mode 100644 index 0000000..46940b2 --- /dev/null +++ b/examples/standard/.editorconfig @@ -0,0 +1,4 @@ +[*] +indent_style = space +indent_size = 4 + diff --git a/examples/standard/docs/index.md b/examples/standard/docs/index.md new file mode 100644 index 0000000..5b20a2b --- /dev/null +++ b/examples/standard/docs/index.md @@ -0,0 +1,13 @@ +Computing Primes +================ + +``` {.cpp file=src/prime_sieve.cpp} +#include +#include +#include + +int main() { + return EXIT_SUCCESS; +} +``` + diff --git a/examples/standard/prime_sieve.cpp b/examples/standard/prime_sieve.cpp new file mode 100644 index 0000000..3c07b25 --- /dev/null +++ b/examples/standard/prime_sieve.cpp @@ -0,0 +1,23 @@ +/* ~/~ begin <>[init] */ +#include +#include +#include + +int main() { + std::vector sieve(100, true); + sieve[0] = false; + sieve[1] = false; + for (size_t i = 0; i < 100; ++i) { + if (!sieve[i]) { + continue; + } + + std::cout << i << std::endl; + + for (size_t j = i*2; j < 100; j += i) { + sieve[j] = false; + } + } + return EXIT_SUCCESS; +} +/* ~/~ end */ From d2ffc6d53ca07a3ff055789e07a8c957f80c3a25 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 13 Nov 2025 15:26:27 +0100 Subject: [PATCH 21/48] update db upon reading files --- entangled/interface/document.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/entangled/interface/document.py b/entangled/interface/document.py index a4ea49f..32440aa 100644 --- a/entangled/interface/document.py +++ b/entangled/interface/document.py @@ -43,11 +43,13 @@ def load_source(self, t: Transaction, path: Path): t.update(path) content, _ = run_generator(reader) self.content[path] = content + t.update(path) def load_code(self, t: Transaction, path: Path): reader = code(numbered_lines(path, t.read(path))) for block in reader: self.reference_map[block.reference_id].source = block.content + t.update(path) def load_all_code(self, t: Transaction): for tgt in self.reference_map.targets(): From c232b7ef4d681280ac3ba0261b0c3af9b60ec744 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 13 Nov 2025 16:20:16 +0100 Subject: [PATCH 22/48] track dependencies when stitching --- entangled/interface/document.py | 15 +- entangled/model/content.py | 8 +- examples/cards-game/README.md | 444 ++++++++++++++++++ examples/cards-game/src/cards_game/card.py | 45 ++ examples/cards-game/src/cards_game/deck.py | 16 + examples/cards-game/src/cards_game/exact.py | 71 +++ .../cards-game/src/cards_game/forty_two.py | 45 ++ examples/standard/docs/index.md | 16 +- examples/standard/{ => src}/prime_sieve.cpp | 0 9 files changed, 652 insertions(+), 8 deletions(-) create mode 100644 examples/cards-game/README.md create mode 100644 examples/cards-game/src/cards_game/card.py create mode 100644 examples/cards-game/src/cards_game/deck.py create mode 100644 examples/cards-game/src/cards_game/exact.py create mode 100644 examples/cards-game/src/cards_game/forty_two.py rename examples/standard/{ => src}/prime_sieve.cpp (100%) diff --git a/entangled/interface/document.py b/entangled/interface/document.py index 32440aa..e22b973 100644 --- a/entangled/interface/document.py +++ b/entangled/interface/document.py @@ -23,8 +23,15 @@ def load_config(self): def input_files(self): return get_input_files(self.config) - def source_text(self, path: Path) -> str: - return "".join(content_to_text(self.reference_map, c) for c in self.content[path]) + def source_text(self, path: Path) -> tuple[str, set[PurePath]]: + deps = set() + text = "" + for content in self.content[path]: + t, d = content_to_text(self.reference_map, content) + if d is not None: + deps.add(d) + text += t + return text, deps def target_text(self, path: PurePath) -> tuple[str, set[PurePath]]: ref_name = self.reference_map.select_by_target(path) @@ -66,6 +73,6 @@ def tangle(self, t: Transaction, annotation: AnnotationMethod | None = None): def stitch(self, t: Transaction): for path in self.content: - t.update(path) - t.write(path, self.source_text(path), []) + text, deps = self.source_text(path) + t.write(path, text, map(Path, deps)) diff --git a/entangled/model/content.py b/entangled/model/content.py index 859b382..a9b32ea 100644 --- a/entangled/model/content.py +++ b/entangled/model/content.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from pathlib import PurePath from .code_block import CodeBlock from .reference_id import ReferenceId from .reference_map import ReferenceMap @@ -13,7 +14,7 @@ class PlainText: type Content = PlainText | ReferenceId -def content_to_text(r: ReferenceMap, c: Content) -> str: +def content_to_text(r: ReferenceMap, c: Content) -> tuple[str, PurePath | None]: """ Reconstruct original plain text from a piece of content. @@ -26,7 +27,8 @@ def content_to_text(r: ReferenceMap, c: Content) -> str: """ match c: case PlainText(s): - return s + return s, None case ReferenceId(): - return r[c].indented_text + code_block = r[c] + return code_block.indented_text, code_block.origin.filename diff --git a/examples/cards-game/README.md b/examples/cards-game/README.md new file mode 100644 index 0000000..15d5bfe --- /dev/null +++ b/examples/cards-game/README.md @@ -0,0 +1,444 @@ +--- +entangled: + version: "2.4" + style: basic +--- + +A Silly Card Game +================= + +Implements a silly game, counting how many cards we need to draw before reaching a Black Jack score of 42, counting aces as one point. This example uses principles from functional programming to solve this problem. Often introductions to functional programming emphasise the concept of immutability but not that of expressibility. + +This code demonstrates several principles (not all isolated to functional programming!): + +- **No illegal data.** Choose a type to represent your data that makes it + illegal to express illegal states. In this case we go the extra mile to + implement a `Card` data type composed of a `Suit` and `Kind` enum. +- **Use dataclasses.** we define types to combine data meaningfully. + Preferably, a dataclass should be `frozen` to enforce immutability. +- **Small functions that are (mostly) pure.** every function has a single + responsibility. +- **Use iterators/generators to compute sequences lazily.** This is part of a + general tendency in functional programming to blur the boundary between + data and code. Generators are data masquerading as code. Python's + generator syntax makes this sort of programming particularly pretty. +- **Encapsulated mutability.** In the `shuffled_deck` function, we use in-place + mutability of a list of cards, but this mutability is encapsulated to + that function. +- **Use generic elemental functions that combine into more complex behaviour.** + Specifically, we use `accumulate`, `zip`, `map` and `enumerate` to + manipulate iterators. + +The game +-------- + +The problem is as follows: we have a stack of playing cards. Numbered cards have the value of their number (one through ten), and picture cards (Jack, Queen and King) have a value of ten. We keep drawing cards from the deck until we reach a value of 42 or higher. How many times do you need to draw a card? + +The following Python code sets up a Monte-Carlo experiment: + +```python +import random + +def run_game() -> int: + deck = list(range(52)) + random.shuffle(deck) + total = 0 + count = 0 + + for card in deck: + count += 1 + + if card % 13 >= 10: + value = 10 + else: + value = card % 13 + 1 + + total += value + + if total >= 42: + return count +``` + +We can run the `run_game` function multiple times to build statistics. This code has several issues. For one, it comes straight out of the Fortran programmers handbook! Jokes asside, what are the problems here? + +- The code is hard to read. What do we mean by that? Some would claim that this is some of the most readable code out there! Well, the control-flow and logic are very clear, however not so our intent. +- The code is hard to test. If there is an issue with this code somewhere, it is not so easy to spot, and since the function is monolithic, we can't test different parts for their logic. + +We might modularize the code and use better variable names to improve things a bit. Also, we can use some of the idioms in Python. + +```python +def shuffled_deck() -> list[int]: + deck = range(52) + random.shuffle(deck) + return deck + +def card_value(card: int) -> int: + if card % 13 >= 10: + return 10 + else: + return card % 13 + 1 + +def run_game_2() -> int: + deck = shuffled_deck() + + total = 0 + for i, card in enumerate(deck, start=1): + total += card_value(card) + if total >= 42: + return i +``` + +Here we've used the `enumerate` function to create an enumerated for-loop, instead of hand-counting the number of cards, and moved some parts to separate functions. Things have improved now, but this is still nowhere near what a functional programmer would do. What follows now will feel silly for this problem, but it isn't there to solve the problem, it is there to teach you ways to think that you might not have seen before. + +Data representation +------------------- + +In the previous example we represented our cards as integers. This is efficient, but it doesn't communicate what the logic in our program is about. You should prefer data structures that make it obvious what the data means and that make it impossible to represent states that should not exist. Enters the `enum` and the `dataclass`. + +```python +#| file: src/cards_game/card.py +from enum import Enum, StrEnum +from dataclasses import dataclass + + +class Suit(StrEnum): + SPADES = "spades" + CLOVES = "cloves" + HEARTS = "hearts" + DIAMONDS = "diamonds" + + def __repr__(self) -> str: + return self.name[0] + + +class Kind(Enum): + ACE = 1 + TWO = 2 + THREE = 3 + FOUR = 4 + FIVE = 5 + SIX = 6 + SEVEN = 7 + EIGHT = 8 + NINE = 9 + TEN = 10 + JACK = 11 + QUEEN = 12 + KING = 13 + + def __repr__(self) -> str: + if self.value <= 10: + return str(self.value) + else: + return self.name[0] + + +@dataclass(frozen = True) +class Card: + suit: Suit + kind: Kind + + def __repr__(self) -> str: + return repr(self.suit) + repr(self.kind) +``` + +Yes, this is extremely verbose, but hey, now you can reuse the `Card` type when you feel like programming other card games! This demonstrates two types of enum in Python: the classic numeric enum, and the `StrEnum` that is backed by internal string values. Python has a way of optimizing short strings that are used as identifiers, so don't worry about efficiency here, worry about what makes sense! + +We could've implemented a method on the `Card` type to give us a card's value in the game of Black Jack, but this value is more a property of the game of Black Jack than that of the card itself. We keep the class empty and clean. + +### Note on using `__repr__` + +The `Suit`, `Kind`, and `Card` classes implement the `__repr__` method to get a nice representation in a Python REPL. In production code it is desirable that the `__repr__` method returns a representation of an +object that would evaluate back to the original object, so: + +```python +assert eval(repr(obj)) == obj +``` + +should always hold. Here that principle is violated for convenience. + +Encapsulating mutable state +--------------------------- + +Next we need to generate a deck of cards and shuffle it. + +```python +def sorted_deck() -> Generator[Card]: + for suit in Suit: + for kind in Kind: + yield Card(suit, kind) +``` + +The `sorted_deck` function is a little **gem** of the Python language. In as few words as it would take to describe what it does, we have an implementation that is both readable and efficient. Seeing and writing functions like this should release a small dose of dopamine in your brain. If we would write this function in completely functional style, we get a function that is short, powerful, but arguably less "Pythonic". + +```python +#| id: deck +#| file: src/cards_game/deck.py +from collections.abc import Iterator +from itertools import starmap, product +import random + +from .card import Suit, Kind, Card + +def sorted_deck() -> Iterator[Card]: + return starmap(Card, product(Suit, Kind)) +``` + +To shuffle the deck we need to store it in a list, then call `random.shuffle`, which shuffles items in a list in-place, and return it. + +```python +#| id: deck +def shuffled_deck() -> list[Card]: + deck = list(sorted_deck()) + random.shuffle(deck) + return deck +``` + +Here we used a mutating procedure, but it is encapsulated in a small function, so it won't affect the logic beyond the confines of this function. + +Higher order functions +---------------------- + +The `map`, `reduce` (better known as *fold*), `filter`, and similar functions are collectively known as *higher order functions*. This means that they take smaller functions as their input to produce more complex behaviour. + +The advantages of this approach are not as self-evident as some of the others in this tutorial. Often, writing code in terms of simple for-loops can be more readable. In a sense this is a good thing. The designers of Python found these patterns so important that they made syntax like list-comprehensions and generators, just to make this easier. What is important is that, as a programmer, you learn to think in terms of these abstract operations on sequences. Then, when you need to handle data that is a bit larger, you can abstract over these operations. + +Advantages of `itertools` functions over hand-rolling a loop: + +- **robustness**. You have a smaller chance of introducing subtle bugs. +- **scalability**. If you need to scale up, you can consider using libraries like Dask to handle the computation. +- **composability**. These routines form a kind of mini-language (also known as a domain-specific language or DSL) that allow for components to be reused and combined in new ways. + +Disadvantages or possible traps are: + +- **readability**. This can go either way. The danger is that when you start using `itertools` and relatives in earnest, you'll be tempted to go all-the-way, resulting in code that is very hard to read, especially for peers that don't use `itertools` every day. +- **mental overhead**. For many programmers, remembering what each and every iterator adaptor is doing can be a burden. In this tutorial we use only a few functions, but checkout the [`more-itertools` package](https://more-itertools.readthedocs.io/en/stable/). +- **off-by-one**. Many of these functions are variations on a theme. For instance, we'll use the `takewhile` function from `itertools`, but you might also want a version of that function that also includes the first item that fails the test. There is a `takewhile_inclusive` variant in `more-itertools`, but you don't need that if you roll your own generator. + +With that discussion out of the way, we'll now implement the rest of the game with maximum utilisation of the `itertools` library. + +One-liners +---------- + +```python +#| id: forty-two +#| file: src/cards_game/forty_two.py +from .card import Card +from collections.abc import Generator, Iterable +from functools import reduce +from dataclasses import dataclass +from enum import Enum, StrEnum +from itertools import accumulate, takewhile, repeat, starmap, product +from collections import defaultdict +``` + +The functional programmer prefers all their functions to be one liners. This is also why they detest any control-flow like `for` loops and `if` statements. Sometimes though, it can't be helped: the `if` statement is a necessary part of any Turing complete system. Luckily, we have the ternary expression (no matter how ugly it is in Python). + +```python +#| id: forty-two +def black_jack_min_value(card: Card) -> int: + return card.kind.value if card.kind.value < 10 else 10 +``` + +We'll define a little accumulator function to keep the score: + +```python +#| id: forty-two +def keep_score(score: int, card: Card) -> int: + return score + black_jack_min_value(card) +``` + +And a predicate (a function of one argument returning a `bool`) to indicate the end of the game: + +```python +#| id: forty-two +def game_continues(score: int) -> bool: + return score < 42 +``` + +Now, to run the game, we have another one-liner: + +```python +#| id: forty-two +def run_game(deck: Iterable[Card]) -> Iterable[int]: + return takewhile(game_continues, accumulate(deck, keep_score, initial=0)) +``` + +Now, that almost reads as plain English! + +Folding +------- + +The `run_game` function is a one-liner. This is only possible because we carefully designed the `keep_score` function to fit in the *foldable* design pattern. The `keep_score` function takes a state and an update as input, and returns a new state: + +```python +def update(s: State, ds: Delta) -> State: + pass +``` + +When we can model the progress of a game in such a function, we gain access to a plethora of *reducing* or *folding* functions: + +```python +def fold[S, D](update: Callable[[S, D], S], iter: Iterable[D], state: S) -> S: + for item in iter: + state = update(state, item) + return state +``` + +In this case, we can use `itertools.accumulate` that yields intermediate results as well. + +Getting answers +--------------- + +To get a usable answer, we need two more one-liners. + +```python +#| id: forty-two +def length[T](iter: Iterable[T]) -> int: + return sum(1 for _ in iter) + +def trace_game(deck: Iterable[Card]) -> list[tuple[Card, int]]: + return list(zip(deck, run_game(deck))) + +def trial() -> int: + return length(run_game(shuffled_deck())) + +def experiment(n: int) -> Iterable[int]: + return starmap(trial, repeat((), n)) + +def histogram[T](iter: Iterable[T]) -> defaultdict[T, int]: + def tally[T](hist: defaultdict[T, int], item: T) -> defaultdict[T, int]: + hist[item] += 1 + return hist + + return reduce(tally, iter, defaultdict(lambda: 0)) +``` + +The exact answer +---------------- + +As a little extra: what are the complete statistics of this game? We have 52 cards. At a minimum we need to draw five of them to get to 42 points, and at most 17 cards ($4 \times (1 + 2 + 3 + 4) + 5 = 45$), so that leaves between 311875200 and 7805769880904240998072320000 combinations to compute! + +Problems like these lend themselves really well to a technique called *dynamic programming*. That sounds fancy, but what it means is that we cache computations (also known as *memoization*) to speed things up. The `functools` library has a `cache` decorator. + +As a first example, we look at the factorial function. We put a print-statement in there to see when the function is being evaluated: + +```python +from functools import cache + +@cache +def factorial(x): + print(f"computing {x}!") + if n == 0: + return 1 + return n * factorial(n - 1) +``` + +> [!TIP] +> ### Exercise: compute the binomial coefficient +> The binomial coefficient can be defined by the recursion, +> +> $$\binom{n}{k} = \binom{n-1}{k-1} + \binom{n-1}{k},$$ +> +> with the base cases, +> +> $$\binom{n}{0} = 1\quad\textrm{and}\binom{n}{n} = 1.$$ +> +> Implement this recursion with a cached function. Tip: the binomial coefficent has a symmetry that can make your implementation even more efficient, +> +> $$\binom{n}{k} = \binom{n}{n - k}.$$ +> +>
Solution +> +> ```python +> @cache +> def binomial_coefficient(a: int, b: int) -> int: +> if b > a: +> return 0 +> if a == 0: +> return 1 +> if b > (a // 2): +> return binomial_coefficient(a, a - b) +> return binomial_coefficient(a-1, b-1) + binomial_coefficient(a-1, b) +> ``` +> +>
+ +```python +#| file: src/cards_game/exact.py +from __future__ import annotations + +from collections.abc import Generator, Iterable +from dataclasses import dataclass, field +from functools import cache +from copy import copy +import numpy as np +import numpy.typing as npt + + +type UIntArray = npt.NDArray[np.uint64] +type FloatArray = npt.NDArray[np.float64] + + +MAX_HAND: UIntArray = np.array([4] * 9 + [16], dtype=np.uint64) +HIST_SIZE: int = 13 +HIST_OFFSET: int = 5 + + +def histogram_zero() -> FloatArray: + return np.zeros(HIST_SIZE, dtype=np.float64) + + +def histogram_singleton(n: int) -> FloatArray: + h = histogram_zero() + h[n - HIST_OFFSET] = 1 + return h + + +@dataclass +class Hand: + n_cards: int = 0 + score: int = 0 + counts: IntArray = field(default_factory=lambda: np.zeros(10, dtype=np.uint64)) + + def __hash__(self) -> int: + h = 0 + for (i, n) in enumerate(self.counts): + h |= int(n) << (3 * i) + return h + + def __eq__(self, other) -> bool: + return (self.counts == other.counts).all() + + def draw(self) -> Generator[tuple[int, Hand]]: + """Generates new hands with multiplicity.""" + for i in range(len(self.counts)): + n = self.counts[i] + m = MAX_HAND[i] + if n < m: + c = copy(self.counts) + c[i] += 1 + yield (m - n, Hand(self.n_cards + 1, self.score + i + 1, c)) + + +def compute_dist(): + cache: dict[int, FloatArray] = dict() + + def run(h: Hand) -> FloatArray: + x = hash(h) + if x not in cache: + if h.score >= 42: + cache[x] = histogram_singleton(h.n_cards) + else: + cache[x] = (1.0 / (52 - h.n_cards)) * sum((m * compute_dist(x, h2) \ + for (m, h2) in h.draw()), histogram_zero()) + return cache[x] + + return run(Hand()) +``` + +Conclusion +---------- + +Python is a very flexible language, and it allows us to write some beautiful functional code. However, the language was not designed to be functional to this extend. Overuse of these techniques will negatively affect readability. The conclusion should be: there are times when to use and times when not to use this, and there is no golden rule that can tell you when that is. Go, and scroll up towards the crappy quasi-fortran discombobulation that we started with, and compare with the exquisite beauty we have crafted from it. Somehow, I'm still not sure which is better or worse. + diff --git a/examples/cards-game/src/cards_game/card.py b/examples/cards-game/src/cards_game/card.py new file mode 100644 index 0000000..66df84c --- /dev/null +++ b/examples/cards-game/src/cards_game/card.py @@ -0,0 +1,45 @@ +# ~/~ begin <>[init] +from enum import Enum, StrEnum +from dataclasses import dataclass + + +class Suit(StrEnum): + SPADES = "spades" + CLOVES = "cloves" + HEARTS = "hearts" + DIAMONDS = "diamonds" + + def __repr__(self) -> str: + return self.name[0] + + +class Kind(Enum): + ACE = 1 + TWO = 2 + THREE = 3 + FOUR = 4 + FIVE = 5 + SIX = 6 + SEVEN = 7 + EIGHT = 8 + NINE = 9 + TEN = 10 + JACK = 11 + QUEEN = 12 + KING = 13 + + def __repr__(self) -> str: + if self.value <= 10: + return str(self.value) + else: + return self.name[0] + + +@dataclass(frozen = True) +class Card: + suit: Suit + kind: Kind + + def __repr__(self) -> str: + return repr(self.suit) + repr(self.kind) +# ~/~ end diff --git a/examples/cards-game/src/cards_game/deck.py b/examples/cards-game/src/cards_game/deck.py new file mode 100644 index 0000000..3a3fdf2 --- /dev/null +++ b/examples/cards-game/src/cards_game/deck.py @@ -0,0 +1,16 @@ +# ~/~ begin <>[init] +from collections.abc import Iterator +from itertools import starmap, product +import random + +from .card import Suit, Kind, Card + +def sorted_deck() -> Iterator[Card]: + return starmap(Card, product(Suit, Kind)) +# ~/~ end +# ~/~ begin <>[1] +def shuffled_deck() -> list[Card]: + deck = list(sorted_deck()) + random.shuffle(deck) + return deck +# ~/~ end diff --git a/examples/cards-game/src/cards_game/exact.py b/examples/cards-game/src/cards_game/exact.py new file mode 100644 index 0000000..d7b8d0d --- /dev/null +++ b/examples/cards-game/src/cards_game/exact.py @@ -0,0 +1,71 @@ +# ~/~ begin <>[init] +from __future__ import annotations + +from collections.abc import Generator, Iterable +from dataclasses import dataclass, field +from functools import cache +from copy import copy +import numpy as np +import numpy.typing as npt + + +type UIntArray = npt.NDArray[np.uint64] +type FloatArray = npt.NDArray[np.float64] + + +MAX_HAND: UIntArray = np.array([4] * 9 + [16], dtype=np.uint64) +HIST_SIZE: int = 13 +HIST_OFFSET: int = 5 + + +def histogram_zero() -> FloatArray: + return np.zeros(HIST_SIZE, dtype=np.float64) + + +def histogram_singleton(n: int) -> FloatArray: + h = histogram_zero() + h[n - HIST_OFFSET] = 1 + return h + + +@dataclass +class Hand: + n_cards: int = 0 + score: int = 0 + counts: IntArray = field(default_factory=lambda: np.zeros(10, dtype=np.uint64)) + + def __hash__(self) -> int: + h = 0 + for (i, n) in enumerate(self.counts): + h |= int(n) << (3 * i) + return h + + def __eq__(self, other) -> bool: + return (self.counts == other.counts).all() + + def draw(self) -> Generator[tuple[int, Hand]]: + """Generates new hands with multiplicity.""" + for i in range(len(self.counts)): + n = self.counts[i] + m = MAX_HAND[i] + if n < m: + c = copy(self.counts) + c[i] += 1 + yield (m - n, Hand(self.n_cards + 1, self.score + i + 1, c)) + + +def compute_dist(): + cache: dict[int, FloatArray] = dict() + + def run(h: Hand) -> FloatArray: + x = hash(h) + if x not in cache: + if h.score >= 42: + cache[x] = histogram_singleton(h.n_cards) + else: + cache[x] = (1.0 / (52 - h.n_cards)) * sum((m * compute_dist(x, h2) \ + for (m, h2) in h.draw()), histogram_zero()) + return cache[x] + + return run(Hand()) +# ~/~ end diff --git a/examples/cards-game/src/cards_game/forty_two.py b/examples/cards-game/src/cards_game/forty_two.py new file mode 100644 index 0000000..4e73ceb --- /dev/null +++ b/examples/cards-game/src/cards_game/forty_two.py @@ -0,0 +1,45 @@ +# ~/~ begin <>[init] +from .card import Card +from collections.abc import Generator, Iterable +from functools import reduce +from dataclasses import dataclass +from enum import Enum, StrEnum +from itertools import accumulate, takewhile, repeat, starmap, product +from collections import defaultdict +# ~/~ end +# ~/~ begin <>[1] +def black_jack_min_value(card: Card) -> int: + return card.kind.value if card.kind.value < 10 else 10 +# ~/~ end +# ~/~ begin <>[2] +def keep_score(score: int, card: Card) -> int: + return score + black_jack_min_value(card) +# ~/~ end +# ~/~ begin <>[3] +def game_continues(score: int) -> bool: + return score < 42 +# ~/~ end +# ~/~ begin <>[4] +def run_game(deck: Iterable[Card]) -> Iterable[int]: + return takewhile(game_continues, accumulate(deck, keep_score, initial=0)) +# ~/~ end +# ~/~ begin <>[5] +def length[T](iter: Iterable[T]) -> int: + return sum(1 for _ in iter) + +def trace_game(deck: Iterable[Card]) -> list[tuple[Card, int]]: + return list(zip(deck, run_game(deck))) + +def trial() -> int: + return length(run_game(shuffled_deck())) + +def experiment(n: int) -> Iterable[int]: + return starmap(trial, repeat((), n)) + +def histogram[T](iter: Iterable[T]) -> defaultdict[T, int]: + def tally[T](hist: defaultdict[T, int], item: T) -> defaultdict[T, int]: + hist[item] += 1 + return hist + + return reduce(tally, iter, defaultdict(lambda: 0)) +# ~/~ end diff --git a/examples/standard/docs/index.md b/examples/standard/docs/index.md index 5b20a2b..55fe509 100644 --- a/examples/standard/docs/index.md +++ b/examples/standard/docs/index.md @@ -4,9 +4,23 @@ Computing Primes ``` {.cpp file=src/prime_sieve.cpp} #include #include -#include +#include int main() { + std::vector sieve(100, true); + sieve[0] = false; + sieve[1] = false; + for (size_t i = 0; i < 100; ++i) { + if (!sieve[i]) { + continue; + } + + std::cout << i << std::endl; + + for (size_t j = i*2; j < 100; j += i) { + sieve[j] = false; + } + } return EXIT_SUCCESS; } ``` diff --git a/examples/standard/prime_sieve.cpp b/examples/standard/src/prime_sieve.cpp similarity index 100% rename from examples/standard/prime_sieve.cpp rename to examples/standard/src/prime_sieve.cpp From 2c1a1d849e2455900d5adddf36c395beb30b48d5 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Fri, 14 Nov 2025 16:38:58 +0100 Subject: [PATCH 23/48] port the new command to click --- entangled/commands/__init__.py | 3 +- entangled/commands/new.py | 76 ++++++++++++++++------------------ 2 files changed, 37 insertions(+), 42 deletions(-) diff --git a/entangled/commands/__init__.py b/entangled/commands/__init__.py index 634040a..19abaaf 100644 --- a/entangled/commands/__init__.py +++ b/entangled/commands/__init__.py @@ -1,7 +1,8 @@ from .main import main from .stitch import stitch from .tangle import tangle +from .new import new __all__ = [ - "main", "stitch", "tangle" + "main", "stitch", "tangle", "new" ] diff --git a/entangled/commands/new.py b/entangled/commands/new.py index 80e7e97..ab0c61f 100644 --- a/entangled/commands/new.py +++ b/entangled/commands/new.py @@ -1,13 +1,11 @@ -import argh # type: ignore -from argh.utils import get_subparsers -import argparse +import click from pathlib import Path -from rich_argparse import RichHelpFormatter from rich.console import Console from rich.table import Table from ..errors.user import HelpfulUserError +from .main import main description = """Create a new entangled project from a template. @@ -42,61 +40,52 @@ def print_help() -> None: For this we first have to create a parser and attach this `new` function to it. Then we have a parser object that we can `print_help()` from. """ - parser = argparse.ArgumentParser(formatter_class=RichHelpFormatter) - argh.add_commands(parser, [new], func_kwargs={"formatter_class": RichHelpFormatter}) - get_subparsers(parser).choices["new"].print_help() + ctx = click.get_current_context() + click.echo(ctx.get_help()) + ctx.exit() -@argh.arg( - "-a", - "--answers-file", +@main.command( + epilog="This command, and the options provided are a front-end for `copier`. " + + "See https://copier.readthedocs.io/en/stable/ for more information." +) +@click.option( + "-a", "--answers-file", help="Update using this path (relative to [project_path]) to find the answers file for `copier`", ) -@argh.arg( - "-d", - "--data", +@click.option( + "-d", "--data", help='"VARIABLE1=VALUE1;VARIABLE1=VALUE2" Make VARIABLEs available as VALUEs when rendering the template; make sure to use quotation marks for multiple variables/values', ) -@argh.arg( - "-D", - "--defaults", +@click.option( + "-D", "--defaults", is_flag=True, help="Use default answers to questions, which might be null if not specified; overwrites when combined with -d!", ) -@argh.arg( - "-p", - "--pretend", +@click.option( + "-p", "--pretend", is_flag=True, help="Run but do not make any changes", ) -@argh.arg( - "-o", - "--overwrite", +@click.option( + "-o", "--overwrite", is_flag=True, help="Overwrite files that already exist, without asking.", ) -@argh.arg( - "-l", - "--list-templates", +@click.option( + "-l", "--list-templates", is_flag=True, help="List all official templates and exit", ) -@argh.arg( - "-t", - "--trust", +@click.option( + "-t", "--trust", is_flag=True, help='Allow templates with unsafe features (Jinja extensions, migrations, tasks); "True" for officially supported templates', ) -@argh.arg( - "template", - # default=None, - nargs="?", - help="Template handle or URL; initialize a new project from this template", +@click.argument( + "template" ) -@argh.arg( - "project-path", - # default=None, - nargs="?", - help="Initialize a new project at this path", +@click.argument( + "project-path" ) def new( - template: str | None, - project_path: Path | None, *, + template: str, + project_path: Path, *, answers_file: str | None = None, data: str = "", defaults: bool = False, @@ -105,7 +94,12 @@ def new( list_templates: bool = False, trust: bool = False, ) -> None: - """Create a new entangled project from a template.""" + """Create a new entangled project from a template. + + TEMPLATE Template handle or URL; initialize a new project from this template. + + PROJECT_PATH Initialize a new project at this path. + """ from ..config.templates import templates as AVAILABLE_TEMPLATES from ..config.templates import Template From 02be917c82ae3188e6801404ca9c3c43e2c4c8de Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Fri, 14 Nov 2025 16:47:46 +0100 Subject: [PATCH 24/48] port reset command to click and new architecture --- entangled/commands/__init__.py | 3 ++- entangled/commands/reset.py | 44 ++++++++++++---------------------- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/entangled/commands/__init__.py b/entangled/commands/__init__.py index 19abaaf..ed1fd74 100644 --- a/entangled/commands/__init__.py +++ b/entangled/commands/__init__.py @@ -2,7 +2,8 @@ from .stitch import stitch from .tangle import tangle from .new import new +from .reset import reset __all__ = [ - "main", "stitch", "tangle", "new" + "main", "stitch", "tangle", "new", "reset" ] diff --git a/entangled/commands/reset.py b/entangled/commands/reset.py index 4276bb2..5f726bc 100644 --- a/entangled/commands/reset.py +++ b/entangled/commands/reset.py @@ -7,56 +7,42 @@ """ from ..io import TransactionMode, transaction -from ..config import config, get_input_files from ..hooks import get_hooks -from ..model import ReferenceMap from ..errors.user import UserError +from ..interface import Document +from .main import main import logging -from pathlib import Path +@main.command() def reset(): """ Resets the database. This performs a tangle without actually writing output to the files, but updating the database as if we were. """ - config.read() - - # these imports depend on config being read - from ..markdown_reader import read_markdown_file - from ..tangle import tangle_ref - - input_file_list = get_input_files() - - refs = ReferenceMap() - hooks = get_hooks() - logging.debug("tangling with hooks: %s", [h.__module__ for h in hooks]) - mode = TransactionMode.RESETDB - annotation_method = config.get.annotation - + try: + doc = Document() + mode = TransactionMode.RESETDB + with transaction(mode) as t: - for path in input_file_list: - logging.debug("reading `%s`", path) - t.update(path) - _, _ = read_markdown_file(t, path, refs=refs, hooks=hooks) + doc.load(t) + annotation_method = doc.config.annotation + hooks = get_hooks(doc.config) for h in hooks: - h.pre_tangle(refs) - - for (tgt, ref_name) in refs.targets.items(): - result, deps = tangle_ref(refs, ref_name, annotation_method) - mask = next(iter(refs.by_name(tgt))).mode - t.write(Path(tgt), result, list(map(Path, deps)), mask) + h.pre_tangle(doc.reference_map) + + doc.tangle(t, annotation_method) for h in hooks: - h.on_tangle(t, refs) + h.on_tangle(t, doc.reference_map) t.clear_orphans() for h in hooks: - h.post_tangle(refs) + h.post_tangle(doc.reference_map) except UserError as e: logging.error(str(e)) From e82907a22241fae2ce14793d3870a4221d362f38 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sat, 15 Nov 2025 11:39:59 +0100 Subject: [PATCH 25/48] bring rest of commands back --- entangled/commands/__init__.py | 6 +++++- entangled/commands/brei.py | 20 +++++++++++++------- entangled/commands/main.py | 7 ++++++- entangled/commands/reset.py | 6 +++--- entangled/commands/status.py | 4 ++++ entangled/commands/stitch.py | 2 +- entangled/commands/sync.py | 5 +++-- entangled/commands/tangle.py | 2 +- entangled/commands/watch.py | 3 +++ 9 files changed, 39 insertions(+), 16 deletions(-) diff --git a/entangled/commands/__init__.py b/entangled/commands/__init__.py index ed1fd74..74b391f 100644 --- a/entangled/commands/__init__.py +++ b/entangled/commands/__init__.py @@ -3,7 +3,11 @@ from .tangle import tangle from .new import new from .reset import reset +from .sync import sync +from .watch import watch +from .status import status +from .brei import brei __all__ = [ - "main", "stitch", "tangle", "new", "reset" + "main", "stitch", "tangle", "new", "reset", "sync", "watch", "status", "brei" ] diff --git a/entangled/commands/brei.py b/entangled/commands/brei.py index fcd8107..5adedbf 100644 --- a/entangled/commands/brei.py +++ b/entangled/commands/brei.py @@ -1,18 +1,20 @@ from pathlib import Path from collections.abc import Awaitable from typing import Any -import argh # type: ignore import asyncio import textwrap from ..config import config from brei import resolve_tasks, Phony from ..logging import logger +from .main import main + +import click log = logger() -async def main(target_strs: list[str], force_run: bool, throttle: int | None): +async def brei_main(target_strs: list[str], force_run: bool, throttle: int | None): if not Path(".entangled").exists(): Path(".entangled").mkdir() @@ -33,10 +35,14 @@ async def main(target_strs: list[str], force_run: bool, throttle: int | None): log.error(msg) -@argh.arg("targets", nargs="+", help="name of target to run") -@argh.arg("-B", "--force-run", help="rebuild all dependencies") -@argh.arg("-j", "--throttle", help="limit number of concurrent jobs") +@main.command() +@click.argument("targets", nargs=-1) +@click.option("-B", "--force-run", is_flag=True, help="rebuild all dependencies") +@click.option("-j", "--throttle", is_flag=True, help="limit number of concurrent jobs") def brei(targets: list[str], *, force_run: bool = False, throttle: int | None = None): - """Build one of the configured targets.""" + """Build one of the configured targets. + + TARGETS Names of the targets to run. + """ config.read() - asyncio.run(main(targets, force_run, throttle)) + asyncio.run(brei_main(targets, force_run, throttle)) diff --git a/entangled/commands/main.py b/entangled/commands/main.py index 0431459..2ad2660 100644 --- a/entangled/commands/main.py +++ b/entangled/commands/main.py @@ -5,11 +5,16 @@ import rich_click as click -@click.group(invoke_without_command=True) +@click.group( + invoke_without_command=True, + epilog="See https://entangled.github.io/ for more help and tutorials." +) @click.rich_config({"commands_before_options": True, "theme": "nord-modern"}) @click.option("-v", "--version", is_flag=True, help="Show version.") @click.option("-d", "--debug", is_flag=True, help="Enable debugging.") def main(version: bool = False, debug: bool = False): + """Your literate programming toolkit. + """ if version: print(f"Entangled {__version__}") sys.exit(0) diff --git a/entangled/commands/reset.py b/entangled/commands/reset.py index 5f726bc..203d695 100644 --- a/entangled/commands/reset.py +++ b/entangled/commands/reset.py @@ -15,11 +15,11 @@ import logging -@main.command() +@main.command(short_help="Reset the file database.") def reset(): """ - Resets the database. This performs a tangle without actually writing - output to the files, but updating the database as if we were. + Resets the file database. This performs a tangle without actually + writing output to the files, but updating the database as if we were. """ try: diff --git a/entangled/commands/status.py b/entangled/commands/status.py index b9a5f58..a38a70a 100644 --- a/entangled/commands/status.py +++ b/entangled/commands/status.py @@ -10,6 +10,8 @@ from rich.panel import Panel from rich.tree import Tree +from .main import main + def tree_from_files(files: Iterable[Path]): tree = Tree(label=".") @@ -53,6 +55,8 @@ def rich_status(): console.print(group) +@main.command() def status(): + """Print a status overview.""" config.read() rich_status() diff --git a/entangled/commands/stitch.py b/entangled/commands/stitch.py index ccc3c98..5de5b39 100644 --- a/entangled/commands/stitch.py +++ b/entangled/commands/stitch.py @@ -11,7 +11,7 @@ @click.option("-f", "--force", help="force overwrite on conflict") @click.option("-s", "--show", help="only show, don't act") def stitch(*, force: bool = False, show: bool = False): - """Stitch code changes back into the Markdown""" + """Stitch code changes back into the documentation.""" if show: mode = TransactionMode.SHOW elif force: diff --git a/entangled/commands/sync.py b/entangled/commands/sync.py index dcb8c2f..479477c 100644 --- a/entangled/commands/sync.py +++ b/entangled/commands/sync.py @@ -3,11 +3,12 @@ from ..io import filedb, FileCache from ..interface import Document from ..errors.user import UserError + from .tangle import tangle from .stitch import stitch +from .main import main import logging -import click class Action(Enum): @@ -42,7 +43,7 @@ def sync_action(doc: Document) -> Action: return Action.NOTHING -@click.command() +@main.command() def sync(): """Be smart wether to tangle or stich""" try: diff --git a/entangled/commands/tangle.py b/entangled/commands/tangle.py index efd93fd..2635212 100644 --- a/entangled/commands/tangle.py +++ b/entangled/commands/tangle.py @@ -15,7 +15,7 @@ @click.option("-f", "--force", is_flag=True, help="force overwriting existing files") @click.option("-s", "--show", is_flag=True, help="only show what would happen") def tangle(*, annotate: AnnotationMethod | None = None, force: bool = False, show: bool = False): - """Tangle codes from Markdown""" + """Tangle codes from the documentation.""" if show: mode = TransactionMode.SHOW elif force: diff --git a/entangled/commands/watch.py b/entangled/commands/watch.py index 8997d06..9e64647 100644 --- a/entangled/commands/watch.py +++ b/entangled/commands/watch.py @@ -8,6 +8,8 @@ from ..config import config from ..status import find_watch_dirs +from .main import main + class EventHandler(FileSystemEventHandler): def __init__(self): @@ -60,6 +62,7 @@ def stop() -> bool: observer.join() +@main.command() def watch(): """Keep a loop running, watching for changes.""" config.read() From 91506382e8c2ccc3629097ac1dc9808220d61e74 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Sat, 15 Nov 2025 16:53:32 +0100 Subject: [PATCH 26/48] add manual --- docs/man-pages/english.md | 491 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 491 insertions(+) create mode 100644 docs/man-pages/english.md diff --git a/docs/man-pages/english.md b/docs/man-pages/english.md new file mode 100644 index 0000000..96cc215 --- /dev/null +++ b/docs/man-pages/english.md @@ -0,0 +1,491 @@ +% entangled(1) version {__version__} | User Commands + +NAME +==== + +Entangled - Your literate programming toolkit. + +SYNOPSIS +======== + +`entangled [options] [command] [arguments]...` + +DESCRIPTION +=========== + +Entangled helps you write Literate Programs in Markdown. You put all your code inside Markdown code blocks. Entangled automatically extracts the code and writes it to more traditional source files. You can then edit these generated files, and the changes are being fed back to the Markdown. + +OPTIONS +======= + +`--help` + +: Show the help message and exit. + +`-v, --version` + +: Show version and exit. + +`-d, --debug` + +: Print debug messages. + +COMMANDS +======== + +Each command has its own arguments and flags that can be inspected using `entangled --help`. + +`brei ` + +: Build one of the configured targets. Entangled has an integrated build system called `brei` (https://entangled.github.io/brei) that can be used to produce figures, or other artifacts. Targets and their dependencies can be specified in code blocks using the `brei` hook. + +`new