From 8644329629a1192f0d4f6a105cfd1204b9375e3a Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 20:37:33 -0400 Subject: [PATCH 01/51] Add Python/uv tool to generate Translator component dependency diagrams Sets up a click-based CLI (generate_diagram.py) in translator-components-diagram/ that reads a components CSV, validates id references, and uses Graphviz to produce a dependency diagram. Nodes are clustered by owner team; solid arrows show hard dependencies, dashed arrows show optional "uses" relationships. Components outside the active filter appear as grayed ghost nodes. Also adds a root .gitignore that excludes all data/ directories (generated outputs and input CSV live there and are not checked in). Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 6 + .../generate_diagram.py | 301 ++++++++++++++++++ translator-components-diagram/pyproject.toml | 19 ++ translator-components-diagram/uv.lock | 48 +++ 4 files changed, 374 insertions(+) create mode 100644 .gitignore create mode 100644 translator-components-diagram/generate_diagram.py create mode 100644 translator-components-diagram/pyproject.toml create mode 100644 translator-components-diagram/uv.lock diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..762a6a1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Generated output and input data files live in data/ directories +data/ +**/data/ + +# IDE files +.idea/ diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py new file mode 100644 index 0000000..134cc79 --- /dev/null +++ b/translator-components-diagram/generate_diagram.py @@ -0,0 +1,301 @@ +"""Generate dependency diagrams for Translator platform components.""" + +import csv +import json +import os +import sys +from pathlib import Path + +import click +import graphviz + +# Refactor status values that indicate active components +DEFAULT_STATUSES = ["Continues into Refactor", "New in Refactor"] + +# Owner → pastel fill color mapping (soft colors work well on white backgrounds) +OWNER_COLORS = { + "DOGSURF": "#AED6F1", + "DINGO": "#A9DFBF", + "Core Components WG": "#F9E79F", + "NCATS": "#F1948A", + "Retriever": "#D7BDE2", + "Shepherd": "#FAD7A0", + "DOGSLED": "#A8D8EA", + "CATRAX": "#C8E6C9", + "UI": "#FFDDC1", + "None": "#E8E8E8", +} +FALLBACK_COLORS = [ + "#B0BEC5", "#BCAAA4", "#CE93D8", "#80CBC4", + "#EF9A9A", "#FFCC80", "#C5E1A5", "#80DEEA", +] +_color_index = 0 + + +def get_owner_color(owner: str, color_map: dict) -> str: + global _color_index + if owner not in color_map: + color_map[owner] = FALLBACK_COLORS[_color_index % len(FALLBACK_COLORS)] + _color_index += 1 + return color_map[owner] + + +def parse_id_list(field: str) -> list[str]: + """Split a comma-separated field into a list of stripped, non-empty strings.""" + if not field or not field.strip(): + return [] + return [part.strip() for part in field.split(",") if part.strip()] + + +def load_components(csv_path: Path) -> list[dict]: + with csv_path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + rows = [] + for row in reader: + row["_depends_on"] = parse_id_list(row.get("Depends on", "")) + row["_uses"] = parse_id_list(row.get("Uses", "")) + rows.append(row) + return rows + + +def validate(components: list[dict]) -> bool: + """Print warnings for any reference issues. Returns False if any warnings.""" + id_lower_map = {c["id"].lower(): c["id"] for c in components} + ok = True + for comp in components: + comp_id = comp["id"] + for ref in comp["_depends_on"] + comp["_uses"]: + ref_lower = ref.lower() + if ref_lower not in id_lower_map: + click.echo( + f"WARNING: '{comp_id}' references unknown id '{ref}' " + f"in Depends on/Uses", + err=True, + ) + ok = False + elif id_lower_map[ref_lower] != ref: + click.echo( + f"WARNING: '{comp_id}' references '{ref}' but the actual id " + f"is '{id_lower_map[ref_lower]}' (case mismatch)", + err=True, + ) + return ok + + +def write_json(components: list[dict], out_path: Path) -> None: + exportable = [] + for comp in components: + row = {k: v for k, v in comp.items() if not k.startswith("_")} + row["depends_on"] = comp["_depends_on"] + row["uses"] = comp["_uses"] + exportable.append(row) + with out_path.open("w", encoding="utf-8") as f: + json.dump(exportable, f, indent=2, ensure_ascii=False) + click.echo(f"Wrote {out_path}") + + +def build_graph( + components: list[dict], + active_statuses: set[str] | None, + direction: str, + color_map: dict, +) -> graphviz.Digraph: + id_lower_map = {c["id"].lower(): c for c in components} + + if active_statuses is None: + active_set = {c["id"] for c in components} + else: + active_set = {c["id"] for c in components if c["Refactor status"] in active_statuses} + + # Collect ghost ids: referenced by active components but not in active_set + ghost_ids: set[str] = set() + for comp in components: + if comp["id"] not in active_set: + continue + for ref in comp["_depends_on"] + comp["_uses"]: + canonical = id_lower_map.get(ref.lower(), {}).get("id", ref) + if canonical not in active_set: + ghost_ids.add(canonical) + + dot = graphviz.Digraph( + name="translator_components", + graph_attr={ + "rankdir": direction, + "fontname": "Helvetica", + "fontsize": "12", + "splines": "ortho", + "nodesep": "0.5", + "ranksep": "1.0", + }, + node_attr={ + "fontname": "Helvetica", + "fontsize": "11", + "style": "filled,rounded", + "shape": "box", + }, + edge_attr={"fontname": "Helvetica", "fontsize": "9"}, + ) + + # Group active nodes by owner, emit as clusters + owners: dict[str, list[dict]] = {} + for comp in components: + if comp["id"] not in active_set: + continue + owner = comp.get("Owner", "None") or "None" + owners.setdefault(owner, []).append(comp) + + for owner, members in sorted(owners.items()): + fill = get_owner_color(owner, color_map) + with dot.subgraph(name=f"cluster_{owner}") as sub: + sub.attr( + label=owner, + style="rounded", + color="#888888", + fontname="Helvetica", + fontsize="12", + ) + for comp in members: + is_new = comp["Refactor status"] == "New in Refactor" + label = f"{comp['Apps']}\n{comp['id']}" + sub.node( + comp["id"], + label=label, + fillcolor=fill, + penwidth="2.0" if is_new else "1.0", + ) + + # Ghost nodes (outside clusters, muted style) + for ghost_id in sorted(ghost_ids): + comp = id_lower_map.get(ghost_id.lower()) + apps_name = comp["Apps"] if comp else ghost_id + label = f"{apps_name}\n{ghost_id}\n(excluded)" + dot.node( + ghost_id, + label=label, + fillcolor="#D3D3D3", + style="filled,rounded,dashed", + fontcolor="#666666", + color="#999999", + ) + + # Edges — resolve ids case-insensitively + for comp in components: + if comp["id"] not in active_set: + continue + for ref in comp["_depends_on"]: + target = id_lower_map.get(ref.lower(), {}).get("id", ref) + if target in active_set or target in ghost_ids: + dot.edge(target, comp["id"]) # B → A: B must run for A + for ref in comp["_uses"]: + target = id_lower_map.get(ref.lower(), {}).get("id", ref) + if target in active_set or target in ghost_ids: + dot.edge(target, comp["id"], style="dashed") # B → A: A uses B + + return dot + + +@click.command() +@click.option( + "--input", "input_path", + default="data/components.csv", + show_default=True, + type=click.Path(exists=True, dir_okay=False, path_type=Path), + help="CSV input file.", +) +@click.option( + "--output-dir", "output_dir", + default="data", + show_default=True, + type=click.Path(file_okay=False, path_type=Path), + help="Directory for output files.", +) +@click.option( + "--output-name", "output_name", + default="diagram", + show_default=True, + help="Base filename for output files (without extension).", +) +@click.option( + "--refactor-status", "refactor_status", + default=",".join(DEFAULT_STATUSES), + show_default=True, + help="Comma-separated list of Refactor status values to include.", +) +@click.option( + "--all", "include_all", + is_flag=True, + default=False, + help="Include all components regardless of Refactor status.", +) +@click.option( + "--format", "extra_formats", + multiple=True, + type=click.Choice(["pdf", "svg", "png"]), + help="Additional output formats (PNG always produced). Can be repeated.", +) +@click.option( + "--direction", + default="LR", + show_default=True, + type=click.Choice(["LR", "TB"]), + help="Graph layout direction.", +) +def main( + input_path: Path, + output_dir: Path, + output_name: str, + refactor_status: str, + include_all: bool, + extra_formats: tuple[str, ...], + direction: str, +) -> None: + """Validate components CSV and generate a Graphviz dependency diagram.""" + output_dir.mkdir(parents=True, exist_ok=True) + + click.echo(f"Loading {input_path} ...") + components = load_components(input_path) + click.echo(f"Loaded {len(components)} components.") + + click.echo("Validating references ...") + validate(components) + + # Write JSON (all components, regardless of filter) + json_path = output_dir / "components.json" + write_json(components, json_path) + + # Determine active statuses + active_statuses: set[str] | None + if include_all: + active_statuses = None + click.echo("Including all components (no filter).") + else: + active_statuses = {s.strip() for s in refactor_status.split(",") if s.strip()} + active_count = sum(1 for c in components if c["Refactor status"] in active_statuses) + click.echo( + f"Filtering to {active_count} components with status: " + + ", ".join(sorted(active_statuses)) + ) + + color_map = dict(OWNER_COLORS) + dot = build_graph(components, active_statuses, direction, color_map) + + # Save .dot source + dot_path = output_dir / f"{output_name}.dot" + dot_path.write_text(dot.source, encoding="utf-8") + click.echo(f"Wrote {dot_path}") + + # Render PNG (always) + formats_to_render = {"png"} | set(extra_formats) + for fmt in sorted(formats_to_render): + rendered = dot.render( + filename=str(output_dir / output_name), + format=fmt, + cleanup=False, + ) + # graphviz appends format extension; rename away the extra copy + expected = output_dir / f"{output_name}.{fmt}" + click.echo(f"Wrote {expected}") + + +if __name__ == "__main__": + main() diff --git a/translator-components-diagram/pyproject.toml b/translator-components-diagram/pyproject.toml new file mode 100644 index 0000000..bf4749f --- /dev/null +++ b/translator-components-diagram/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "translator-components-diagram" +version = "0.1.0" +description = "Generate dependency diagrams for Translator platform components" +requires-python = ">=3.11" +dependencies = [ + "click>=8.0", + "graphviz>=0.20", +] + +[project.scripts] +generate-diagram = "generate_diagram:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."] diff --git a/translator-components-diagram/uv.lock b/translator-components-diagram/uv.lock new file mode 100644 index 0000000..e05bbb3 --- /dev/null +++ b/translator-components-diagram/uv.lock @@ -0,0 +1,48 @@ +version = 1 +revision = 3 +requires-python = ">=3.11" + +[[package]] +name = "click" +version = "8.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/98/518d8e5081007684232226f475082b30087d0f585e8457db087298259f49/click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96", size = 353007, upload-time = "2026-05-22T04:08:37.769Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/0d/67e5b4109ea4a837e80daa87c2c696711955e40449a97e8926672534def2/click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2", size = 116639, upload-time = "2026-05-22T04:08:35.26Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "graphviz" +version = "0.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" }, +] + +[[package]] +name = "translator-components-diagram" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "click" }, + { name = "graphviz" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.0" }, + { name = "graphviz", specifier = ">=0.20" }, +] From c4096216da3f70bc940fedea259eaa7e7f1c9e97 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 20:43:58 -0400 Subject: [PATCH 02/51] Add --google-sheet option to download CSV from a world-readable Google Sheet Reads GOOGLE_SHEET_ID from a gitignored .env file in the script directory and downloads the sheet's CSV export to data/components.csv before processing. Supports --sheet-gid for selecting a non-default tab. Also gitignores .env files and adds python-dotenv as a dependency. Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 4 ++ .../generate_diagram.py | 41 +++++++++++++++++-- translator-components-diagram/pyproject.toml | 1 + translator-components-diagram/uv.lock | 11 +++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 762a6a1..141e95c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,9 @@ data/ **/data/ +# Local environment files (contain secrets like Google Sheet IDs) +.env +**/.env + # IDE files .idea/ diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 134cc79..c83e2af 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -3,11 +3,12 @@ import csv import json import os -import sys +import urllib.request from pathlib import Path import click import graphviz +from dotenv import load_dotenv # Refactor status values that indicate active components DEFAULT_STATUSES = ["Continues into Refactor", "New in Refactor"] @@ -199,8 +200,21 @@ def build_graph( "--input", "input_path", default="data/components.csv", show_default=True, - type=click.Path(exists=True, dir_okay=False, path_type=Path), - help="CSV input file.", + type=click.Path(dir_okay=False, path_type=Path), + help="CSV input file (used unless --google-sheet is set).", +) +@click.option( + "--google-sheet", "google_sheet", + is_flag=True, + default=False, + help="Download CSV from Google Sheet instead of reading a local file. " + "Reads GOOGLE_SHEET_ID from .env in the script directory.", +) +@click.option( + "--sheet-gid", "sheet_gid", + default=0, + show_default=True, + help="Google Sheet tab GID (0 = first tab).", ) @click.option( "--output-dir", "output_dir", @@ -242,6 +256,8 @@ def build_graph( ) def main( input_path: Path, + google_sheet: bool, + sheet_gid: int, output_dir: Path, output_name: str, refactor_status: str, @@ -252,6 +268,25 @@ def main( """Validate components CSV and generate a Graphviz dependency diagram.""" output_dir.mkdir(parents=True, exist_ok=True) + if google_sheet: + env_path = Path(__file__).parent / ".env" + load_dotenv(env_path) + sheet_id = os.environ.get("GOOGLE_SHEET_ID", "").strip() + if not sheet_id: + raise click.ClickException( + f"GOOGLE_SHEET_ID is not set. Fill it in at {env_path}" + ) + url = ( + f"https://docs.google.com/spreadsheets/d/{sheet_id}" + f"/export?format=csv&gid={sheet_gid}" + ) + download_path = output_dir / "components.csv" + click.echo(f"Downloading CSV from Google Sheet to {download_path} ...") + urllib.request.urlretrieve(url, download_path) + input_path = download_path + elif not input_path.exists(): + raise click.ClickException(f"Input file not found: {input_path}") + click.echo(f"Loading {input_path} ...") components = load_components(input_path) click.echo(f"Loaded {len(components)} components.") diff --git a/translator-components-diagram/pyproject.toml b/translator-components-diagram/pyproject.toml index bf4749f..cb8a9bd 100644 --- a/translator-components-diagram/pyproject.toml +++ b/translator-components-diagram/pyproject.toml @@ -6,6 +6,7 @@ requires-python = ">=3.11" dependencies = [ "click>=8.0", "graphviz>=0.20", + "python-dotenv>=1.0", ] [project.scripts] diff --git a/translator-components-diagram/uv.lock b/translator-components-diagram/uv.lock index e05bbb3..16a7cdd 100644 --- a/translator-components-diagram/uv.lock +++ b/translator-components-diagram/uv.lock @@ -32,6 +32,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + [[package]] name = "translator-components-diagram" version = "0.1.0" @@ -39,10 +48,12 @@ source = { editable = "." } dependencies = [ { name = "click" }, { name = "graphviz" }, + { name = "python-dotenv" }, ] [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.0" }, { name = "graphviz", specifier = ">=0.20" }, + { name = "python-dotenv", specifier = ">=1.0" }, ] From 490bfc66fbdb104c2b8e521defa319cbe74fa33a Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 20:45:10 -0400 Subject: [PATCH 03/51] Added an empty .env.default file. --- translator-components-diagram/.env.default | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 translator-components-diagram/.env.default diff --git a/translator-components-diagram/.env.default b/translator-components-diagram/.env.default new file mode 100644 index 0000000..1f21e16 --- /dev/null +++ b/translator-components-diagram/.env.default @@ -0,0 +1,2 @@ +# The Google Sheet ID to download the component information from +GOOGLE_SHEET_ID= From a7e38825fff4b3d9fa89747337cea07088046b36 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 22:41:45 -0400 Subject: [PATCH 04/51] Update diagram: new column names, node labels, edge styles, and legend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Read 'Name' field (renamed from 'Apps') and 'Gets data from' (renamed from 'Depends on') from the updated Google Sheet column layout - Node labels now show Name / id / Owner on three lines - 'Gets data from' edges now run A→B (data flows toward the source) - 'Uses' edges are dotted bidirectional (A←··→B) - Add a legend cluster explaining both edge types Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index c83e2af..7a52497 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -53,7 +53,7 @@ def load_components(csv_path: Path) -> list[dict]: reader = csv.DictReader(f) rows = [] for row in reader: - row["_depends_on"] = parse_id_list(row.get("Depends on", "")) + row["_depends_on"] = parse_id_list(row.get("Gets data from", "")) row["_uses"] = parse_id_list(row.get("Uses", "")) rows.append(row) return rows @@ -70,7 +70,7 @@ def validate(components: list[dict]) -> bool: if ref_lower not in id_lower_map: click.echo( f"WARNING: '{comp_id}' references unknown id '{ref}' " - f"in Depends on/Uses", + f"in Gets data from/Uses", err=True, ) ok = False @@ -157,7 +157,8 @@ def build_graph( ) for comp in members: is_new = comp["Refactor status"] == "New in Refactor" - label = f"{comp['Apps']}\n{comp['id']}" + owner = comp.get("Owner", "None") or "None" + label = f"{comp['Name']}\n{comp['id']}\n{owner}" sub.node( comp["id"], label=label, @@ -168,8 +169,9 @@ def build_graph( # Ghost nodes (outside clusters, muted style) for ghost_id in sorted(ghost_ids): comp = id_lower_map.get(ghost_id.lower()) - apps_name = comp["Apps"] if comp else ghost_id - label = f"{apps_name}\n{ghost_id}\n(excluded)" + name = comp["Name"] if comp else ghost_id + owner = (comp.get("Owner", "") or "") if comp else "" + label = f"{name}\n{ghost_id}\n{owner}\n(excluded)" if owner else f"{name}\n{ghost_id}\n(excluded)" dot.node( ghost_id, label=label, @@ -186,11 +188,29 @@ def build_graph( for ref in comp["_depends_on"]: target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: - dot.edge(target, comp["id"]) # B → A: B must run for A + dot.edge(comp["id"], target) # A → B: A gets data from B for ref in comp["_uses"]: target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: - dot.edge(target, comp["id"], style="dashed") # B → A: A uses B + dot.edge(comp["id"], target, style="dotted", dir="both") # A ←··→ B: A uses B + + # Legend + with dot.subgraph(name="cluster_legend") as leg: + leg.attr( + label="Legend", + style="filled,rounded", + fillcolor="#FAFAFA", + color="#AAAAAA", + fontname="Helvetica", + fontsize="11", + margin="12", + ) + leg.node("_leg_a1", label="Component A", fillcolor="white", penwidth="1.0") + leg.node("_leg_b1", label="Data Source B", fillcolor="white", penwidth="1.0") + leg.edge("_leg_a1", "_leg_b1", xlabel="Gets data from") + leg.node("_leg_a2", label="Component C", fillcolor="white", penwidth="1.0") + leg.node("_leg_b2", label="Component D", fillcolor="white", penwidth="1.0") + leg.edge("_leg_a2", "_leg_b2", xlabel="Uses", style="dotted", dir="both") return dot From d7035b6b31dde6fc6ac9553d816b9ab00bc444d0 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 22:54:18 -0400 Subject: [PATCH 05/51] Update edge semantics: rename column, reverse arrow direction, horizontal layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Read 'Gets results from' (renamed from 'Gets data from') and 'Calls' (renamed from 'Uses') columns - Solid arrows now run B→A for 'Gets results from' (provider → consumer), consistent with dotted 'Calls' arrows — both point from provider to consumer - Default layout direction changed to TB for a wide horizontal output - Legend updated to reflect new column names and corrected arrow directions Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 7a52497..15e52f2 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -53,8 +53,8 @@ def load_components(csv_path: Path) -> list[dict]: reader = csv.DictReader(f) rows = [] for row in reader: - row["_depends_on"] = parse_id_list(row.get("Gets data from", "")) - row["_uses"] = parse_id_list(row.get("Uses", "")) + row["_depends_on"] = parse_id_list(row.get("Gets results from", "")) + row["_uses"] = parse_id_list(row.get("Calls", "")) rows.append(row) return rows @@ -70,7 +70,7 @@ def validate(components: list[dict]) -> bool: if ref_lower not in id_lower_map: click.echo( f"WARNING: '{comp_id}' references unknown id '{ref}' " - f"in Gets data from/Uses", + f"in Gets results from/Calls", err=True, ) ok = False @@ -188,11 +188,11 @@ def build_graph( for ref in comp["_depends_on"]: target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: - dot.edge(comp["id"], target) # A → B: A gets data from B + dot.edge(target, comp["id"]) # B → A: B provides results to A for ref in comp["_uses"]: target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: - dot.edge(comp["id"], target, style="dotted", dir="both") # A ←··→ B: A uses B + dot.edge(target, comp["id"], style="dotted") # B ··→ A: B provides results to A # Legend with dot.subgraph(name="cluster_legend") as leg: @@ -207,10 +207,10 @@ def build_graph( ) leg.node("_leg_a1", label="Component A", fillcolor="white", penwidth="1.0") leg.node("_leg_b1", label="Data Source B", fillcolor="white", penwidth="1.0") - leg.edge("_leg_a1", "_leg_b1", xlabel="Gets data from") + leg.edge("_leg_b1", "_leg_a1", xlabel="Gets results from") leg.node("_leg_a2", label="Component C", fillcolor="white", penwidth="1.0") leg.node("_leg_b2", label="Component D", fillcolor="white", penwidth="1.0") - leg.edge("_leg_a2", "_leg_b2", xlabel="Uses", style="dotted", dir="both") + leg.edge("_leg_b2", "_leg_a2", xlabel="Calls", style="dotted") return dot @@ -269,7 +269,7 @@ def build_graph( ) @click.option( "--direction", - default="LR", + default="TB", show_default=True, type=click.Choice(["LR", "TB"]), help="Graph layout direction.", From 2ebc831e5ea79505ca81cc4d75181c0e8841af96 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 22:58:12 -0400 Subject: [PATCH 06/51] Remove owner cluster boxes from diagram Owner is already shown in each node label, so the cluster boxes were cluttering the data flow layout. Nodes now float freely and are arranged purely by their Gets results from / Calls relationships. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 15e52f2..816785e 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -137,34 +137,20 @@ def build_graph( edge_attr={"fontname": "Helvetica", "fontsize": "9"}, ) - # Group active nodes by owner, emit as clusters - owners: dict[str, list[dict]] = {} + # Add active nodes (no owner clustering — owner is shown in the label) for comp in components: if comp["id"] not in active_set: continue owner = comp.get("Owner", "None") or "None" - owners.setdefault(owner, []).append(comp) - - for owner, members in sorted(owners.items()): fill = get_owner_color(owner, color_map) - with dot.subgraph(name=f"cluster_{owner}") as sub: - sub.attr( - label=owner, - style="rounded", - color="#888888", - fontname="Helvetica", - fontsize="12", - ) - for comp in members: - is_new = comp["Refactor status"] == "New in Refactor" - owner = comp.get("Owner", "None") or "None" - label = f"{comp['Name']}\n{comp['id']}\n{owner}" - sub.node( - comp["id"], - label=label, - fillcolor=fill, - penwidth="2.0" if is_new else "1.0", - ) + is_new = comp["Refactor status"] == "New in Refactor" + label = f"{comp['Name']}\n{comp['id']}\n{owner}" + dot.node( + comp["id"], + label=label, + fillcolor=fill, + penwidth="2.0" if is_new else "1.0", + ) # Ghost nodes (outside clusters, muted style) for ghost_id in sorted(ghost_ids): From 4635356d37e9f309d0fd3e293e6578cb8175df90 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 23:03:34 -0400 Subject: [PATCH 07/51] Update color scheme to reflect team roles - NCATS (red) and UI (pink): vivid/prominent as the main consumers - DOGSLED (blue), DOGSURF (green), CATRAX (amber): distinct colors for the three main teams - Core Components WG (purple), DINGO (cyan), Shepherd (lime), Retriever (brown): distinct from the main teams for specialized cross-team groups Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 816785e..f6d1bb8 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -13,17 +13,20 @@ # Refactor status values that indicate active components DEFAULT_STATUSES = ["Continues into Refactor", "New in Refactor"] -# Owner → pastel fill color mapping (soft colors work well on white backgrounds) +# Owner → fill color mapping OWNER_COLORS = { - "DOGSURF": "#AED6F1", - "DINGO": "#A9DFBF", - "Core Components WG": "#F9E79F", - "NCATS": "#F1948A", - "Retriever": "#D7BDE2", - "Shepherd": "#FAD7A0", - "DOGSLED": "#A8D8EA", - "CATRAX": "#C8E6C9", - "UI": "#FFDDC1", + # Main customers: bright and prominent + "NCATS": "#EF5350", # vivid red + "UI": "#EC407A", # vivid pink + # Three main teams: distinct solid colors + "DOGSLED": "#42A5F5", # blue + "DOGSURF": "#66BB6A", # green + "CATRAX": "#FFA726", # amber + # Specialized cross-team groups: distinct from the teams above + "Core Components WG": "#AB47BC", # purple + "DINGO": "#26C6DA", # cyan + "Shepherd": "#D4E157", # lime + "Retriever": "#8D6E63", # brown "None": "#E8E8E8", } FALLBACK_COLORS = [ From 9a49bd6eabb5f166a56130384453b5b0f0af34fe Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 23:14:03 -0400 Subject: [PATCH 08/51] Reverse Calls arrow direction and update legend labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Dotted 'Calls' edges now run A→B (caller to callee) - Legend node labels: Producer/Consumer for 'Gets results from', Component/Service for 'Calls' - Legend edge labels: 'Results' and 'API call' Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index f6d1bb8..c01689e 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -181,7 +181,7 @@ def build_graph( for ref in comp["_uses"]: target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: - dot.edge(target, comp["id"], style="dotted") # B ··→ A: B provides results to A + dot.edge(comp["id"], target, style="dotted") # A ··→ B: A sends request to B # Legend with dot.subgraph(name="cluster_legend") as leg: @@ -193,13 +193,14 @@ def build_graph( fontname="Helvetica", fontsize="11", margin="12", + rank="min", ) - leg.node("_leg_a1", label="Component A", fillcolor="white", penwidth="1.0") - leg.node("_leg_b1", label="Data Source B", fillcolor="white", penwidth="1.0") - leg.edge("_leg_b1", "_leg_a1", xlabel="Gets results from") - leg.node("_leg_a2", label="Component C", fillcolor="white", penwidth="1.0") - leg.node("_leg_b2", label="Component D", fillcolor="white", penwidth="1.0") - leg.edge("_leg_b2", "_leg_a2", xlabel="Calls", style="dotted") + leg.node("_leg_a1", label="Producer", fillcolor="white", penwidth="1.0") + leg.node("_leg_b1", label="Consumer", fillcolor="white", penwidth="1.0") + leg.edge("_leg_a1", "_leg_b1", xlabel="Results") + leg.node("_leg_a2", label="Component", fillcolor="white", penwidth="1.0") + leg.node("_leg_b2", label="Service", fillcolor="white", penwidth="1.0") + leg.edge("_leg_a2", "_leg_b2", xlabel="API call", style="dotted") return dot From 9b0f130c3a126be33fe27b267241ee054c9bc58e Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 23:20:23 -0400 Subject: [PATCH 09/51] Add entry and exit terminal nodes to diagram - 'External data sources' cylinder at the top feeds into kgx-storage-pipeline, marking where the solid-line data flow begins - 'User' double-border oval at the bottom receives from ui, marking where results ultimately go Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index c01689e..8114445 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -183,6 +183,29 @@ def build_graph( if target in active_set or target in ghost_ids: dot.edge(comp["id"], target, style="dotted") # A ··→ B: A sends request to B + # Entry node: External data sources (cylinder = data store) + terminal_attrs = dict( + style="filled", + fillcolor="#CFD8DC", + fontname="Helvetica", + fontsize="11", + penwidth="1.5", + ) + dot.node("_external_sources", label="External\ndata sources", + shape="cylinder", **terminal_attrs) + with dot.subgraph() as src_rank: + src_rank.attr(rank="min") + src_rank.node("_external_sources") + dot.edge("_external_sources", "kgx-storage-pipeline") + + # Exit node: User (double-border oval = terminal endpoint) + dot.node("_user", label="User", shape="oval", + peripheries="2", **terminal_attrs) + with dot.subgraph() as sink_rank: + sink_rank.attr(rank="max") + sink_rank.node("_user") + dot.edge("ui", "_user") + # Legend with dot.subgraph(name="cluster_legend") as leg: leg.attr( From 138380c6670b95b5c23bd95cb0a16fd5c615ef06 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 23:28:59 -0400 Subject: [PATCH 10/51] Add planned-but-not-implemented edge convention (~prefix) IDs prefixed with '~' in 'Gets results from' or 'Calls' columns are treated as planned connections. These render in gray: dashed for planned 'Gets results from', dotted for planned 'Calls'. Validation and JSON output also cover planned refs. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 8114445..e6de139 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -44,11 +44,21 @@ def get_owner_color(owner: str, color_map: dict) -> str: return color_map[owner] -def parse_id_list(field: str) -> list[str]: - """Split a comma-separated field into a list of stripped, non-empty strings.""" - if not field or not field.strip(): - return [] - return [part.strip() for part in field.split(",") if part.strip()] +def parse_id_list(field: str) -> tuple[list[str], list[str]]: + """Split a comma-separated field into (implemented_ids, planned_ids). + + IDs prefixed with '~' are planned-but-not-yet-implemented. + """ + implemented, planned = [], [] + for part in field.split(","): + part = part.strip() + if not part: + continue + if part.startswith("~"): + planned.append(part[1:].strip()) + else: + implemented.append(part) + return implemented, planned def load_components(csv_path: Path) -> list[dict]: @@ -56,8 +66,12 @@ def load_components(csv_path: Path) -> list[dict]: reader = csv.DictReader(f) rows = [] for row in reader: - row["_depends_on"] = parse_id_list(row.get("Gets results from", "")) - row["_uses"] = parse_id_list(row.get("Calls", "")) + impl, planned = parse_id_list(row.get("Gets results from", "")) + row["_depends_on"] = impl + row["_depends_on_planned"] = planned + impl, planned = parse_id_list(row.get("Calls", "")) + row["_uses"] = impl + row["_uses_planned"] = planned rows.append(row) return rows @@ -68,7 +82,11 @@ def validate(components: list[dict]) -> bool: ok = True for comp in components: comp_id = comp["id"] - for ref in comp["_depends_on"] + comp["_uses"]: + all_refs = ( + comp["_depends_on"] + comp["_depends_on_planned"] + + comp["_uses"] + comp["_uses_planned"] + ) + for ref in all_refs: ref_lower = ref.lower() if ref_lower not in id_lower_map: click.echo( @@ -91,7 +109,9 @@ def write_json(components: list[dict], out_path: Path) -> None: for comp in components: row = {k: v for k, v in comp.items() if not k.startswith("_")} row["depends_on"] = comp["_depends_on"] + row["depends_on_planned"] = comp["_depends_on_planned"] row["uses"] = comp["_uses"] + row["uses_planned"] = comp["_uses_planned"] exportable.append(row) with out_path.open("w", encoding="utf-8") as f: json.dump(exportable, f, indent=2, ensure_ascii=False) @@ -116,7 +136,11 @@ def build_graph( for comp in components: if comp["id"] not in active_set: continue - for ref in comp["_depends_on"] + comp["_uses"]: + all_refs = ( + comp["_depends_on"] + comp["_depends_on_planned"] + + comp["_uses"] + comp["_uses_planned"] + ) + for ref in all_refs: canonical = id_lower_map.get(ref.lower(), {}).get("id", ref) if canonical not in active_set: ghost_ids.add(canonical) @@ -171,6 +195,7 @@ def build_graph( ) # Edges — resolve ids case-insensitively + PLANNED_COLOR = "#999999" for comp in components: if comp["id"] not in active_set: continue @@ -178,10 +203,18 @@ def build_graph( target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: dot.edge(target, comp["id"]) # B → A: B provides results to A + for ref in comp["_depends_on_planned"]: + target = id_lower_map.get(ref.lower(), {}).get("id", ref) + if target in active_set or target in ghost_ids: + dot.edge(target, comp["id"], style="dashed", color=PLANNED_COLOR) for ref in comp["_uses"]: target = id_lower_map.get(ref.lower(), {}).get("id", ref) if target in active_set or target in ghost_ids: dot.edge(comp["id"], target, style="dotted") # A ··→ B: A sends request to B + for ref in comp["_uses_planned"]: + target = id_lower_map.get(ref.lower(), {}).get("id", ref) + if target in active_set or target in ghost_ids: + dot.edge(comp["id"], target, style="dotted", color=PLANNED_COLOR) # Entry node: External data sources (cylinder = data store) terminal_attrs = dict( From 2add57330240b8bb5e16a9458e9536accc8d8955 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 25 May 2026 23:32:03 -0400 Subject: [PATCH 11/51] Add README for translator-components-diagram tool Covers purpose, quick start, CSV format, all diagram conventions (node colours, edge types, ghost nodes, planned edges, terminal nodes), CLI options, repository layout, and a list of possible future improvements. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/README.md | 195 ++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 translator-components-diagram/README.md diff --git a/translator-components-diagram/README.md b/translator-components-diagram/README.md new file mode 100644 index 0000000..382fc73 --- /dev/null +++ b/translator-components-diagram/README.md @@ -0,0 +1,195 @@ +# Translator Components Diagram + +A Python CLI tool that reads a spreadsheet of Translator platform components, +validates their dependency declarations, and produces Graphviz diagrams showing +how data flows through the system and which services call each other. + +## Purpose + +The Translator platform comprises many components maintained by different teams. +This tool makes the overall architecture visible by turning a human-maintained +Google Sheet into a shareable diagram. The default view filters to components +that are active in the current refactor ("Continues into Refactor" and +"New in Refactor"), so the diagram stays focused on what is currently relevant. + +## Quick start + +Requires Python ≥ 3.11 and [uv](https://docs.astral.sh/uv/). +The [Graphviz](https://graphviz.org/) system package must also be installed +(`brew install graphviz` on macOS). + +```bash +cd translator-components-diagram +uv sync # first-time setup; creates .venv/ + +# Download latest data from the Google Sheet and regenerate +uv run generate_diagram.py --google-sheet + +# Use a locally cached CSV instead +uv run generate_diagram.py + +# Include all components, not just the refactor-active ones +uv run generate_diagram.py --all + +# Also produce a PDF (useful for presentations) +uv run generate_diagram.py --google-sheet --format pdf +``` + +## Input data + +### Google Sheet + +The canonical source of truth is a world-readable Google Sheet. Its ID is +stored in `.env` (gitignored; never committed): + +``` +# translator-components-diagram/.env +GOOGLE_SHEET_ID= +``` + +Run with `--google-sheet` to download the latest CSV export into `data/` and +use it immediately. The downloaded file is also gitignored. + +### CSV format + +The sheet must have these columns (order does not matter): + +| Column | Description | +|---|---| +| `id` | Unique machine-readable identifier (kebab-case preferred) | +| `Name` | Human-readable display name shown in the diagram | +| `Owner` | Team that owns the component; controls node colour | +| `Component in ITRB` | ITRB category (informational only) | +| `Refactor status` | Lifecycle status — see filtering below | +| `Gets results from` | Comma-separated IDs this component receives data from | +| `Calls` | Comma-separated IDs this component makes optional API calls to | +| `Notes` | Free-text notes (not used by the tool) | + +#### Planned (not-yet-implemented) relationships + +Prefix any ID in `Gets results from` or `Calls` with `~` to mark it as +planned but not yet implemented: + +``` +Gets results from: nodenorm-es, ~new-service +Calls: ars, ~future-api +``` + +Planned edges render in gray; implemented edges render in black. + +## Output files + +All outputs go to `data/` (gitignored) by default. + +| File | Always? | Description | +|---|---|---| +| `data/diagram.png` | yes | Main shareable diagram | +| `data/diagram.dot` | yes | Graphviz source — useful for debugging or tweaking | +| `data/components.json` | yes | All components parsed (all statuses, not filtered) | +| `data/diagram.pdf` | `--format pdf` | Vector format for presentations | +| `data/diagram.svg` | `--format svg` | Vector format for web embedding | + +> The `.dot` and `.json` files are intended to eventually be committed to the +> repo so people can inspect the data without running the tool. + +## Diagram conventions + +### Node colours (by Owner) + +| Owner | Colour | Rationale | +|---|---|---| +| NCATS | Red | Main customer | +| UI | Pink | Main customer | +| DOGSLED | Blue | Main team | +| DOGSURF | Green | Main team | +| CATRAX | Amber | Main team | +| Core Components WG | Purple | Specialized cross-team group | +| DINGO | Cyan | Specialized cross-team group | +| Shepherd | Lime | Specialized cross-team group | +| Retriever | Brown | Specialized cross-team group | + +New owners not listed above receive fallback colours automatically. + +### Node border weight + +- **Bold border** — component is "New in Refactor" +- **Normal border** — component "Continues into Refactor" + +### Edge types + +| Style | Meaning | +|---|---| +| Solid black arrow B → A | B provides results to A ("Gets results from") | +| Gray dashed arrow B → A | Same, but planned / not yet implemented | +| Dotted black arrow A → B | A makes an optional API call to B ("Calls") | +| Gray dotted arrow A → B | Same, but planned / not yet implemented | + +### Special nodes + +- **External data sources** (cylinder, gray) — entry point; represents all + upstream data stores that feed into `kgx-storage-pipeline` +- **User** (double-border oval, gray) — exit point; the human end-consumer + who receives results from the UI + +### Ghost nodes + +Components that are referenced by an active component but are themselves +outside the current filter (e.g. "Removed after Refactor") appear as gray +dashed boxes labelled `(excluded)`. This keeps cross-boundary edges visible +without cluttering the main diagram. + +## All CLI options + +``` +uv run generate_diagram.py [OPTIONS] + + --input PATH Local CSV file [default: data/components.csv] + --google-sheet Download CSV from Google Sheet (reads GOOGLE_SHEET_ID + from .env) instead of using --input + --sheet-gid INTEGER Google Sheet tab GID (0 = first tab) [default: 0] + --output-dir PATH Directory for output files [default: data] + --output-name TEXT Base filename for outputs [default: diagram] + --refactor-status TEXT Comma-separated Refactor status values to include + [default: "Continues into Refactor,New in Refactor"] + --all Include all components regardless of Refactor status + --format [png|pdf|svg] Additional output format (PNG always produced; + can be repeated) + --direction [LR|TB] Graphviz layout direction [default: TB] + --help Show this message and exit. +``` + +## Repository layout + +``` +translator-components-diagram/ +├── generate_diagram.py # The tool +├── pyproject.toml # uv/hatchling project metadata and dependencies +├── uv.lock # Pinned dependency versions +├── .env # GOOGLE_SHEET_ID — gitignored, fill in locally +├── README.md # This file +└── data/ # Gitignored — all inputs and outputs go here + ├── components.csv # Downloaded from Google Sheet + ├── components.json # Parsed component data (all statuses) + ├── diagram.dot # Graphviz source + └── diagram.png # Rendered diagram +``` + +## Possible future improvements + +- **Compact HTML-table legend** — replace the current node-based legend with a + Graphviz HTML label table, which would be much smaller and allow adding the + planned-edge styles without growing the legend box. +- **Commit `.dot` and `.json` to Git** — move these outputs outside `data/` so + they are version-controlled and reviewable without running the tool. +- **Interactive SVG or HTML output** — embed tooltips (owner, notes, status) + using Graphviz's `tooltip` attribute or a post-processing step with a library + like `d3-graphviz`. +- **Grouping / filtering by ITRB category** — the `Component in ITRB` column + is loaded but not currently used; it could drive an alternative colour scheme + or a `--group-by itrb` flag. +- **Cycle detection** — the validator checks for unknown IDs but does not yet + detect dependency cycles, which would be a useful integrity check. +- **Multiple sheet tabs** — `--sheet-gid` already supports non-default tabs; + a `--all-tabs` mode could merge or overlay multiple views. +- **Diff mode** — compare two runs of the tool (e.g. before and after a sprint) + and highlight added, removed, or changed components and edges. From eeeb3e835b7d77cb0c3a3370fa5985ac0ea40509 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 02:31:08 -0400 Subject: [PATCH 12/51] Reject inputs that would silently corrupt the diagram MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - validate() now treats unknown refs and duplicate ids as hard errors; main() raises ClickException so a broken sheet never silently renders. - build_graph resolves refs via a resolve() helper that returns None for unknowns instead of falling back to the raw ref string, so missing components no longer materialize as phantom ghost nodes. - Hardcoded entry/exit edges (External-sources → kgx-storage-pipeline and ui → User) are gated on the target id being in active_set or ghost_ids, so filtering or renaming those components no longer leaves default-styled phantom boxes in the diagram. - Google Sheet download switches from urlretrieve to urlopen with a Content-Type check, so HTML login pages from private/missing sheets raise an error instead of being saved as components.csv. - CSV is read with utf-8-sig so a UTF-8 BOM (e.g. from an Excel resave) no longer corrupts the first column header and KeyError on c['id']. Co-Authored-By: Claude Opus 4.7 --- .../generate_diagram.py | 134 +++++++++++++----- 1 file changed, 102 insertions(+), 32 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index e6de139..8e8e285 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -3,6 +3,7 @@ import csv import json import os +import urllib.error import urllib.request from pathlib import Path @@ -62,7 +63,9 @@ def parse_id_list(field: str) -> tuple[list[str], list[str]]: def load_components(csv_path: Path) -> list[dict]: - with csv_path.open(newline="", encoding="utf-8") as f: + # utf-8-sig strips a UTF-8 BOM if present (Excel-resaved or Windows-edited files), + # otherwise the first header would read as "id" and KeyError on c["id"]. + with csv_path.open(newline="", encoding="utf-8-sig") as f: reader = csv.DictReader(f) rows = [] for row in reader: @@ -77,9 +80,31 @@ def load_components(csv_path: Path) -> list[dict]: def validate(components: list[dict]) -> bool: - """Print warnings for any reference issues. Returns False if any warnings.""" - id_lower_map = {c["id"].lower(): c["id"] for c in components} + """Print messages for any reference issues. + + Returns False on hard errors (duplicate ids, unknown referenced ids). + Case-mismatch references are informational and do not flip the return + value, because the case-insensitive lookup in build_graph still resolves + them to the canonical component. + """ ok = True + + # Hard error: duplicate ids (case-insensitive). The id_lower_map below + # would silently keep only the last duplicate, so detect them up front. + seen: dict[str, str] = {} + for comp in components: + key = comp["id"].lower() + if key in seen: + click.echo( + f"ERROR: duplicate id (case-insensitive): " + f"'{seen[key]}' and '{comp['id']}'", + err=True, + ) + ok = False + else: + seen[key] = comp["id"] + + id_lower_map = {c["id"].lower(): c["id"] for c in components} for comp in components: comp_id = comp["id"] all_refs = ( @@ -90,7 +115,7 @@ def validate(components: list[dict]) -> bool: ref_lower = ref.lower() if ref_lower not in id_lower_map: click.echo( - f"WARNING: '{comp_id}' references unknown id '{ref}' " + f"ERROR: '{comp_id}' references unknown id '{ref}' " f"in Gets results from/Calls", err=True, ) @@ -131,6 +156,16 @@ def build_graph( else: active_set = {c["id"] for c in components if c["Refactor status"] in active_statuses} + def resolve(ref: str) -> str | None: + """Resolve a ref string to its canonical component id, or None if unknown. + + Unknown refs return None rather than falling back to the raw ref so they + do not render as phantom ghost nodes; validate() is responsible for + surfacing them as errors before we reach here. + """ + match = id_lower_map.get(ref.lower()) + return match["id"] if match else None + # Collect ghost ids: referenced by active components but not in active_set ghost_ids: set[str] = set() for comp in components: @@ -141,8 +176,8 @@ def build_graph( + comp["_uses"] + comp["_uses_planned"] ) for ref in all_refs: - canonical = id_lower_map.get(ref.lower(), {}).get("id", ref) - if canonical not in active_set: + canonical = resolve(ref) + if canonical is not None and canonical not in active_set: ghost_ids.add(canonical) dot = graphviz.Digraph( @@ -194,29 +229,40 @@ def build_graph( color="#999999", ) - # Edges — resolve ids case-insensitively + # Edges — resolve ids case-insensitively; unknown refs are skipped (validate() flagged them) PLANNED_COLOR = "#999999" + + def edge_target(ref: str) -> str | None: + target = resolve(ref) + if target is None: + return None + if target in active_set or target in ghost_ids: + return target + return None + for comp in components: if comp["id"] not in active_set: continue for ref in comp["_depends_on"]: - target = id_lower_map.get(ref.lower(), {}).get("id", ref) - if target in active_set or target in ghost_ids: + target = edge_target(ref) + if target is not None: dot.edge(target, comp["id"]) # B → A: B provides results to A for ref in comp["_depends_on_planned"]: - target = id_lower_map.get(ref.lower(), {}).get("id", ref) - if target in active_set or target in ghost_ids: + target = edge_target(ref) + if target is not None: dot.edge(target, comp["id"], style="dashed", color=PLANNED_COLOR) for ref in comp["_uses"]: - target = id_lower_map.get(ref.lower(), {}).get("id", ref) - if target in active_set or target in ghost_ids: + target = edge_target(ref) + if target is not None: dot.edge(comp["id"], target, style="dotted") # A ··→ B: A sends request to B for ref in comp["_uses_planned"]: - target = id_lower_map.get(ref.lower(), {}).get("id", ref) - if target in active_set or target in ghost_ids: + target = edge_target(ref) + if target is not None: dot.edge(comp["id"], target, style="dotted", color=PLANNED_COLOR) - # Entry node: External data sources (cylinder = data store) + # Entry/exit terminal nodes — only added when the components they connect + # to are present in the diagram. Without the gate, hardcoded edges to + # missing ids would silently create default-styled phantom nodes. terminal_attrs = dict( style="filled", fillcolor="#CFD8DC", @@ -224,20 +270,24 @@ def build_graph( fontsize="11", penwidth="1.5", ) - dot.node("_external_sources", label="External\ndata sources", - shape="cylinder", **terminal_attrs) - with dot.subgraph() as src_rank: - src_rank.attr(rank="min") - src_rank.node("_external_sources") - dot.edge("_external_sources", "kgx-storage-pipeline") - - # Exit node: User (double-border oval = terminal endpoint) - dot.node("_user", label="User", shape="oval", - peripheries="2", **terminal_attrs) - with dot.subgraph() as sink_rank: - sink_rank.attr(rank="max") - sink_rank.node("_user") - dot.edge("ui", "_user") + + ENTRY_TARGET = "kgx-storage-pipeline" + if ENTRY_TARGET in active_set or ENTRY_TARGET in ghost_ids: + dot.node("_external_sources", label="External\ndata sources", + shape="cylinder", **terminal_attrs) + with dot.subgraph() as src_rank: + src_rank.attr(rank="min") + src_rank.node("_external_sources") + dot.edge("_external_sources", ENTRY_TARGET) + + EXIT_SOURCE = "ui" + if EXIT_SOURCE in active_set or EXIT_SOURCE in ghost_ids: + dot.node("_user", label="User", shape="oval", + peripheries="2", **terminal_attrs) + with dot.subgraph() as sink_rank: + sink_rank.attr(rank="max") + sink_rank.node("_user") + dot.edge(EXIT_SOURCE, "_user") # Legend with dot.subgraph(name="cluster_legend") as leg: @@ -348,7 +398,24 @@ def main( ) download_path = output_dir / "components.csv" click.echo(f"Downloading CSV from Google Sheet to {download_path} ...") - urllib.request.urlretrieve(url, download_path) + # Use urlopen + content-type check rather than urlretrieve: a private + # or missing sheet redirects to a 200 HTML login page, which would + # otherwise be silently saved as components.csv. + try: + with urllib.request.urlopen(url) as response: + content_type = response.headers.get("Content-Type", "") + body = response.read() + except urllib.error.URLError as exc: + raise click.ClickException( + f"Failed to download Google Sheet ({url}): {exc}" + ) from exc + if "text/csv" not in content_type.lower(): + raise click.ClickException( + f"Google Sheet response was not CSV (Content-Type: " + f"{content_type or 'unset'}). The sheet may be private, " + f"the ID may be wrong, or the gid may not exist. URL: {url}" + ) + download_path.write_bytes(body) input_path = download_path elif not input_path.exists(): raise click.ClickException(f"Input file not found: {input_path}") @@ -358,7 +425,10 @@ def main( click.echo(f"Loaded {len(components)} components.") click.echo("Validating references ...") - validate(components) + if not validate(components): + raise click.ClickException( + "Validation failed; fix the errors above and re-run." + ) # Write JSON (all components, regardless of filter) json_path = output_dir / "components.json" From e8d36f48741e0fd25584dfd90db04d90fbed09dd Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 02:33:59 -0400 Subject: [PATCH 13/51] Tidy code-review nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Encapsulate fallback-color state in ColorAssigner so repeated main() invocations in one process don't drift via a module-level counter. - cleanup=True on dot.render so the extension-less duplicate of the dot source is removed (we already write {output_name}.dot explicitly). - Drop rank='min' from the legend cluster — graphviz ignores rank on cluster subgraphs, so the attribute was misleading. - Drop 'png' from --format choices and update the README: PNG is always produced, so listing it as a togglable option was confusing. Co-Authored-By: Claude Opus 4.7 --- translator-components-diagram/README.md | 4 +- .../generate_diagram.py | 51 +++++++++++-------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/translator-components-diagram/README.md b/translator-components-diagram/README.md index 382fc73..41b863c 100644 --- a/translator-components-diagram/README.md +++ b/translator-components-diagram/README.md @@ -152,8 +152,8 @@ uv run generate_diagram.py [OPTIONS] --refactor-status TEXT Comma-separated Refactor status values to include [default: "Continues into Refactor,New in Refactor"] --all Include all components regardless of Refactor status - --format [png|pdf|svg] Additional output format (PNG always produced; - can be repeated) + --format [pdf|svg] Additional output format beyond PNG (PNG is + always produced; can be repeated) --direction [LR|TB] Graphviz layout direction [default: TB] --help Show this message and exit. ``` diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 8e8e285..4d3e99d 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -34,15 +34,23 @@ "#B0BEC5", "#BCAAA4", "#CE93D8", "#80CBC4", "#EF9A9A", "#FFCC80", "#C5E1A5", "#80DEEA", ] -_color_index = 0 -def get_owner_color(owner: str, color_map: dict) -> str: - global _color_index - if owner not in color_map: - color_map[owner] = FALLBACK_COLORS[_color_index % len(FALLBACK_COLORS)] - _color_index += 1 - return color_map[owner] +class ColorAssigner: + """Assigns fill colors to owners, falling back to a rotating palette.""" + + def __init__(self, base_colors: dict[str, str], fallback_colors: list[str]): + self.color_map: dict[str, str] = dict(base_colors) + self.fallback_colors = fallback_colors + self.next_fallback = 0 + + def get(self, owner: str) -> str: + if owner not in self.color_map: + self.color_map[owner] = self.fallback_colors[ + self.next_fallback % len(self.fallback_colors) + ] + self.next_fallback += 1 + return self.color_map[owner] def parse_id_list(field: str) -> tuple[list[str], list[str]]: @@ -147,7 +155,7 @@ def build_graph( components: list[dict], active_statuses: set[str] | None, direction: str, - color_map: dict, + colors: ColorAssigner, ) -> graphviz.Digraph: id_lower_map = {c["id"].lower(): c for c in components} @@ -204,7 +212,7 @@ def resolve(ref: str) -> str | None: if comp["id"] not in active_set: continue owner = comp.get("Owner", "None") or "None" - fill = get_owner_color(owner, color_map) + fill = colors.get(owner) is_new = comp["Refactor status"] == "New in Refactor" label = f"{comp['Name']}\n{comp['id']}\n{owner}" dot.node( @@ -289,7 +297,8 @@ def edge_target(ref: str) -> str | None: sink_rank.node("_user") dot.edge(EXIT_SOURCE, "_user") - # Legend + # Legend — note: `rank` is not honored on cluster subgraphs in graphviz, + # so legend placement is left to the layout engine. with dot.subgraph(name="cluster_legend") as leg: leg.attr( label="Legend", @@ -299,7 +308,6 @@ def edge_target(ref: str) -> str | None: fontname="Helvetica", fontsize="11", margin="12", - rank="min", ) leg.node("_leg_a1", label="Producer", fillcolor="white", penwidth="1.0") leg.node("_leg_b1", label="Consumer", fillcolor="white", penwidth="1.0") @@ -360,8 +368,9 @@ def edge_target(ref: str) -> str | None: @click.option( "--format", "extra_formats", multiple=True, - type=click.Choice(["pdf", "svg", "png"]), - help="Additional output formats (PNG always produced). Can be repeated.", + type=click.Choice(["pdf", "svg"]), + help="Additional output formats beyond PNG (PNG is always produced). " + "Can be repeated.", ) @click.option( "--direction", @@ -447,25 +456,25 @@ def main( + ", ".join(sorted(active_statuses)) ) - color_map = dict(OWNER_COLORS) - dot = build_graph(components, active_statuses, direction, color_map) + colors = ColorAssigner(OWNER_COLORS, FALLBACK_COLORS) + dot = build_graph(components, active_statuses, direction, colors) # Save .dot source dot_path = output_dir / f"{output_name}.dot" dot_path.write_text(dot.source, encoding="utf-8") click.echo(f"Wrote {dot_path}") - # Render PNG (always) + # Render PNG (always) plus any extra formats. cleanup=True removes the + # intermediate extension-less dot source that render() writes alongside + # the rendered file — we already keep the canonical copy in {output_name}.dot. formats_to_render = {"png"} | set(extra_formats) for fmt in sorted(formats_to_render): - rendered = dot.render( + dot.render( filename=str(output_dir / output_name), format=fmt, - cleanup=False, + cleanup=True, ) - # graphviz appends format extension; rename away the extra copy - expected = output_dir / f"{output_name}.{fmt}" - click.echo(f"Wrote {expected}") + click.echo(f"Wrote {output_dir / f'{output_name}.{fmt}'}") if __name__ == "__main__": From 68adeed0f69ab9e1e9a8a926a28f63ce34e1f25d Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 02:51:17 -0400 Subject: [PATCH 14/51] Refactor to dataclass and improve diagram readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code: - Replace stringly-typed dict rows with a Component dataclass so column typos KeyError at parse time, not midway through rendering. - Extract index_by_id() helper used by both validate and build_graph (previously each built its own parallel id_lower_map). - Sort components by id after load so .dot and .json diffs are stable across CSV row reorderings. - Split build_graph into _compute_active_set, _compute_ghost_ids, _add_active_nodes, _add_ghost_nodes, _add_edges, _add_terminal_nodes, and _add_legend. - Add text_color_for() — picks black/white text from background luminance so dark fills (NCATS red, Retriever brown, etc.) stay readable. - Component.display_name falls back to id when Name is empty. Diagram: - dpi=150 and splines=polyline for sharper, less-busy PNGs. - Drop owner from node labels — already encoded by fill color, and shown in a new HTML-table owner legend. - Recolor planned edges to soft indigo (#7986CB) so they no longer blur with ghost-node gray borders (#999999). - Expand the legend to cover all visual encodings the README documents: owner color swatches, Producer→Consumer / API-call / Planned edge styles, bold border = New in Refactor, (excluded) ghost node, and the cylinder / double-oval terminal shapes. Ergonomics: - load .env from cwd first (standard dotenv behavior, walks up the tree) then from the script directory as a fallback. Document the search order in --google-sheet help text. - Add __pycache__, *.pyc, .DS_Store to .gitignore. README updated for new planned-edge color and to drop the obsolete "compact HTML-table legend" future-improvement bullet (now implemented). Co-Authored-By: Claude Opus 4.7 --- .gitignore | 5 + translator-components-diagram/README.md | 10 +- .../generate_diagram.py | 485 ++++++++++++------ 3 files changed, 347 insertions(+), 153 deletions(-) diff --git a/.gitignore b/.gitignore index 141e95c..4ddcd7a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,8 @@ data/ # IDE files .idea/ + +# Python caches and OS metadata +__pycache__/ +*.pyc +.DS_Store diff --git a/translator-components-diagram/README.md b/translator-components-diagram/README.md index 41b863c..1abd72b 100644 --- a/translator-components-diagram/README.md +++ b/translator-components-diagram/README.md @@ -120,9 +120,12 @@ New owners not listed above receive fallback colours automatically. | Style | Meaning | |---|---| | Solid black arrow B → A | B provides results to A ("Gets results from") | -| Gray dashed arrow B → A | Same, but planned / not yet implemented | +| Indigo dashed arrow B → A | Same, but planned / not yet implemented | | Dotted black arrow A → B | A makes an optional API call to B ("Calls") | -| Gray dotted arrow A → B | Same, but planned / not yet implemented | +| Indigo dotted arrow A → B | Same, but planned / not yet implemented | + +Planned-edge indigo is distinct from the gray used for ghost-node borders, +so the two encodings don't blur together visually. ### Special nodes @@ -176,9 +179,6 @@ translator-components-diagram/ ## Possible future improvements -- **Compact HTML-table legend** — replace the current node-based legend with a - Graphviz HTML label table, which would be much smaller and allow adding the - planned-edge styles without growing the legend box. - **Commit `.dot` and `.json` to Git** — move these outputs outside `data/` so they are version-controlled and reviewable without running the tool. - **Interactive SVG or HTML output** — embed tooltips (owner, notes, status) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 4d3e99d..b3d1742 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -1,10 +1,12 @@ """Generate dependency diagrams for Translator platform components.""" import csv +import html import json import os import urllib.error import urllib.request +from dataclasses import dataclass, field from pathlib import Path import click @@ -14,8 +16,8 @@ # Refactor status values that indicate active components DEFAULT_STATUSES = ["Continues into Refactor", "New in Refactor"] -# Owner → fill color mapping -OWNER_COLORS = { +# Owner → fill color mapping. Insertion order doubles as legend order. +OWNER_COLORS: dict[str, str] = { # Main customers: bright and prominent "NCATS": "#EF5350", # vivid red "UI": "#EC407A", # vivid pink @@ -34,6 +36,18 @@ "#B0BEC5", "#BCAAA4", "#CE93D8", "#80CBC4", "#EF9A9A", "#FFCC80", "#C5E1A5", "#80DEEA", ] +# Soft indigo for planned edges — distinct from the ghost-node gray +# (#999999) so planned edges don't visually blur with excluded-node borders. +PLANNED_EDGE_COLOR = "#7986CB" +GHOST_BORDER_COLOR = "#999999" +GHOST_FILL_COLOR = "#D3D3D3" +GHOST_FONT_COLOR = "#666666" +TERMINAL_FILL_COLOR = "#CFD8DC" + +# Hardcoded entry/exit anchors — the diagram has a single data-flow entry +# (External data sources → kgx-storage-pipeline) and a single exit (UI → User). +ENTRY_TARGET = "kgx-storage-pipeline" +EXIT_SOURCE = "ui" class ColorAssigner: @@ -53,13 +67,52 @@ def get(self, owner: str) -> str: return self.color_map[owner] -def parse_id_list(field: str) -> tuple[list[str], list[str]]: +def text_color_for(fill_hex: str) -> str: + """Return "black" or "white" for adequate contrast against a hex fill.""" + h = fill_hex.lstrip("#") + r, g, b = (int(h[i:i + 2], 16) / 255 for i in (0, 2, 4)) + # Rec. 709 perceptual luminance + luminance = 0.2126 * r + 0.7152 * g + 0.0722 * b + return "black" if luminance > 0.5 else "white" + + +@dataclass +class Component: + """A single row of the components CSV after parsing.""" + + id: str + name: str + owner: str + itrb: str + refactor_status: str + notes: str + depends_on: list[str] = field(default_factory=list) + depends_on_planned: list[str] = field(default_factory=list) + uses: list[str] = field(default_factory=list) + uses_planned: list[str] = field(default_factory=list) + + @property + def display_name(self) -> str: + # Fall back to id when Name is missing — otherwise the label + # starts with a blank line. + return self.name or self.id + + def all_refs(self) -> list[str]: + return ( + self.depends_on + + self.depends_on_planned + + self.uses + + self.uses_planned + ) + + +def parse_id_list(field_value: str) -> tuple[list[str], list[str]]: """Split a comma-separated field into (implemented_ids, planned_ids). IDs prefixed with '~' are planned-but-not-yet-implemented. """ implemented, planned = [], [] - for part in field.split(","): + for part in field_value.split(","): part = part.strip() if not part: continue @@ -70,24 +123,44 @@ def parse_id_list(field: str) -> tuple[list[str], list[str]]: return implemented, planned -def load_components(csv_path: Path) -> list[dict]: - # utf-8-sig strips a UTF-8 BOM if present (Excel-resaved or Windows-edited files), - # otherwise the first header would read as "id" and KeyError on c["id"]. +def load_components(csv_path: Path) -> list[Component]: + """Parse the CSV into a sorted list of Components. + + Sorted by lowercase id for deterministic .dot / .json output across CSV + row reorderings. + """ + # utf-8-sig strips a UTF-8 BOM if present (Excel-resaved or Windows-edited + # files), otherwise the first header would read as "id" and KeyError. with csv_path.open(newline="", encoding="utf-8-sig") as f: reader = csv.DictReader(f) - rows = [] + rows: list[Component] = [] for row in reader: - impl, planned = parse_id_list(row.get("Gets results from", "")) - row["_depends_on"] = impl - row["_depends_on_planned"] = planned - impl, planned = parse_id_list(row.get("Calls", "")) - row["_uses"] = impl - row["_uses_planned"] = planned - rows.append(row) + depends_on, depends_on_planned = parse_id_list( + row.get("Gets results from", "") + ) + uses, uses_planned = parse_id_list(row.get("Calls", "")) + rows.append(Component( + id=row.get("id", "").strip(), + name=row.get("Name", "").strip(), + owner=(row.get("Owner") or "None").strip() or "None", + itrb=row.get("Component in ITRB", "").strip(), + refactor_status=row.get("Refactor status", "").strip(), + notes=row.get("Notes", "").strip(), + depends_on=depends_on, + depends_on_planned=depends_on_planned, + uses=uses, + uses_planned=uses_planned, + )) + rows.sort(key=lambda c: c.id.lower()) return rows -def validate(components: list[dict]) -> bool: +def index_by_id(components: list[Component]) -> dict[str, Component]: + """Case-insensitive lookup from lower(id) to Component.""" + return {c.id.lower(): c for c in components} + + +def validate(components: list[Component]) -> bool: """Print messages for any reference issues. Returns False on hard errors (duplicate ids, unknown referenced ids). @@ -97,189 +170,183 @@ def validate(components: list[dict]) -> bool: """ ok = True - # Hard error: duplicate ids (case-insensitive). The id_lower_map below - # would silently keep only the last duplicate, so detect them up front. + # Hard error: duplicate ids (case-insensitive). The index below would + # silently keep only the last duplicate, so detect them up front. seen: dict[str, str] = {} for comp in components: - key = comp["id"].lower() + key = comp.id.lower() if key in seen: click.echo( f"ERROR: duplicate id (case-insensitive): " - f"'{seen[key]}' and '{comp['id']}'", + f"'{seen[key]}' and '{comp.id}'", err=True, ) ok = False else: - seen[key] = comp["id"] + seen[key] = comp.id - id_lower_map = {c["id"].lower(): c["id"] for c in components} + index = index_by_id(components) for comp in components: - comp_id = comp["id"] - all_refs = ( - comp["_depends_on"] + comp["_depends_on_planned"] - + comp["_uses"] + comp["_uses_planned"] - ) - for ref in all_refs: - ref_lower = ref.lower() - if ref_lower not in id_lower_map: + for ref in comp.all_refs(): + match = index.get(ref.lower()) + if match is None: click.echo( - f"ERROR: '{comp_id}' references unknown id '{ref}' " + f"ERROR: '{comp.id}' references unknown id '{ref}' " f"in Gets results from/Calls", err=True, ) ok = False - elif id_lower_map[ref_lower] != ref: + elif match.id != ref: click.echo( - f"WARNING: '{comp_id}' references '{ref}' but the actual id " - f"is '{id_lower_map[ref_lower]}' (case mismatch)", + f"WARNING: '{comp.id}' references '{ref}' but the actual id " + f"is '{match.id}' (case mismatch)", err=True, ) return ok -def write_json(components: list[dict], out_path: Path) -> None: - exportable = [] - for comp in components: - row = {k: v for k, v in comp.items() if not k.startswith("_")} - row["depends_on"] = comp["_depends_on"] - row["depends_on_planned"] = comp["_depends_on_planned"] - row["uses"] = comp["_uses"] - row["uses_planned"] = comp["_uses_planned"] - exportable.append(row) +def write_json(components: list[Component], out_path: Path) -> None: + """Serialise components to JSON, preserving the original CSV column names.""" + exportable = [ + { + "id": c.id, + "Name": c.name, + "Owner": c.owner, + "Component in ITRB": c.itrb, + "Refactor status": c.refactor_status, + "Notes": c.notes, + "depends_on": c.depends_on, + "depends_on_planned": c.depends_on_planned, + "uses": c.uses, + "uses_planned": c.uses_planned, + } + for c in components + ] with out_path.open("w", encoding="utf-8") as f: json.dump(exportable, f, indent=2, ensure_ascii=False) click.echo(f"Wrote {out_path}") -def build_graph( - components: list[dict], - active_statuses: set[str] | None, - direction: str, - colors: ColorAssigner, -) -> graphviz.Digraph: - id_lower_map = {c["id"].lower(): c for c in components} +# --- Graph construction helpers -------------------------------------------- - if active_statuses is None: - active_set = {c["id"] for c in components} - else: - active_set = {c["id"] for c in components if c["Refactor status"] in active_statuses} - def resolve(ref: str) -> str | None: - """Resolve a ref string to its canonical component id, or None if unknown. +def _compute_active_set( + components: list[Component], + active_statuses: set[str] | None, +) -> set[str]: + if active_statuses is None: + return {c.id for c in components} + return {c.id for c in components if c.refactor_status in active_statuses} - Unknown refs return None rather than falling back to the raw ref so they - do not render as phantom ghost nodes; validate() is responsible for - surfacing them as errors before we reach here. - """ - match = id_lower_map.get(ref.lower()) - return match["id"] if match else None - # Collect ghost ids: referenced by active components but not in active_set - ghost_ids: set[str] = set() +def _compute_ghost_ids( + components: list[Component], + index: dict[str, Component], + active_set: set[str], +) -> set[str]: + ghost: set[str] = set() for comp in components: - if comp["id"] not in active_set: + if comp.id not in active_set: continue - all_refs = ( - comp["_depends_on"] + comp["_depends_on_planned"] - + comp["_uses"] + comp["_uses_planned"] - ) - for ref in all_refs: - canonical = resolve(ref) - if canonical is not None and canonical not in active_set: - ghost_ids.add(canonical) + for ref in comp.all_refs(): + match = index.get(ref.lower()) + if match is not None and match.id not in active_set: + ghost.add(match.id) + return ghost - dot = graphviz.Digraph( - name="translator_components", - graph_attr={ - "rankdir": direction, - "fontname": "Helvetica", - "fontsize": "12", - "splines": "ortho", - "nodesep": "0.5", - "ranksep": "1.0", - }, - node_attr={ - "fontname": "Helvetica", - "fontsize": "11", - "style": "filled,rounded", - "shape": "box", - }, - edge_attr={"fontname": "Helvetica", "fontsize": "9"}, - ) - # Add active nodes (no owner clustering — owner is shown in the label) +def _add_active_nodes( + dot: graphviz.Digraph, + components: list[Component], + active_set: set[str], + colors: ColorAssigner, +) -> None: for comp in components: - if comp["id"] not in active_set: + if comp.id not in active_set: continue - owner = comp.get("Owner", "None") or "None" - fill = colors.get(owner) - is_new = comp["Refactor status"] == "New in Refactor" - label = f"{comp['Name']}\n{comp['id']}\n{owner}" + fill = colors.get(comp.owner) + is_new = comp.refactor_status == "New in Refactor" + # Owner is encoded by node color and shown in the legend, not in the label. + label = f"{comp.display_name}\n{comp.id}" dot.node( - comp["id"], + comp.id, label=label, fillcolor=fill, + fontcolor=text_color_for(fill), penwidth="2.0" if is_new else "1.0", ) - # Ghost nodes (outside clusters, muted style) + +def _add_ghost_nodes( + dot: graphviz.Digraph, + ghost_ids: set[str], + index: dict[str, Component], +) -> None: for ghost_id in sorted(ghost_ids): - comp = id_lower_map.get(ghost_id.lower()) - name = comp["Name"] if comp else ghost_id - owner = (comp.get("Owner", "") or "") if comp else "" - label = f"{name}\n{ghost_id}\n{owner}\n(excluded)" if owner else f"{name}\n{ghost_id}\n(excluded)" + comp = index.get(ghost_id.lower()) + name = comp.display_name if comp else ghost_id + label = f"{name}\n{ghost_id}\n(excluded)" dot.node( ghost_id, label=label, - fillcolor="#D3D3D3", + fillcolor=GHOST_FILL_COLOR, style="filled,rounded,dashed", - fontcolor="#666666", - color="#999999", + fontcolor=GHOST_FONT_COLOR, + color=GHOST_BORDER_COLOR, ) - # Edges — resolve ids case-insensitively; unknown refs are skipped (validate() flagged them) - PLANNED_COLOR = "#999999" +def _add_edges( + dot: graphviz.Digraph, + components: list[Component], + index: dict[str, Component], + active_set: set[str], + ghost_ids: set[str], +) -> None: def edge_target(ref: str) -> str | None: - target = resolve(ref) - if target is None: + match = index.get(ref.lower()) + if match is None: return None + target = match.id if target in active_set or target in ghost_ids: return target return None for comp in components: - if comp["id"] not in active_set: + if comp.id not in active_set: continue - for ref in comp["_depends_on"]: - target = edge_target(ref) - if target is not None: - dot.edge(target, comp["id"]) # B → A: B provides results to A - for ref in comp["_depends_on_planned"]: - target = edge_target(ref) - if target is not None: - dot.edge(target, comp["id"], style="dashed", color=PLANNED_COLOR) - for ref in comp["_uses"]: - target = edge_target(ref) - if target is not None: - dot.edge(comp["id"], target, style="dotted") # A ··→ B: A sends request to B - for ref in comp["_uses_planned"]: - target = edge_target(ref) - if target is not None: - dot.edge(comp["id"], target, style="dotted", color=PLANNED_COLOR) - - # Entry/exit terminal nodes — only added when the components they connect - # to are present in the diagram. Without the gate, hardcoded edges to - # missing ids would silently create default-styled phantom nodes. + for ref in comp.depends_on: + t = edge_target(ref) + if t is not None: + dot.edge(t, comp.id) # B → A: B provides results to A + for ref in comp.depends_on_planned: + t = edge_target(ref) + if t is not None: + dot.edge(t, comp.id, style="dashed", color=PLANNED_EDGE_COLOR) + for ref in comp.uses: + t = edge_target(ref) + if t is not None: + dot.edge(comp.id, t, style="dotted") # A ··→ B: API call + for ref in comp.uses_planned: + t = edge_target(ref) + if t is not None: + dot.edge(comp.id, t, style="dotted", color=PLANNED_EDGE_COLOR) + + +def _add_terminal_nodes( + dot: graphviz.Digraph, + active_set: set[str], + ghost_ids: set[str], +) -> None: + """Add entry/exit nodes, gated on the components they connect to.""" terminal_attrs = dict( style="filled", - fillcolor="#CFD8DC", + fillcolor=TERMINAL_FILL_COLOR, fontname="Helvetica", fontsize="11", penwidth="1.5", ) - ENTRY_TARGET = "kgx-storage-pipeline" if ENTRY_TARGET in active_set or ENTRY_TARGET in ghost_ids: dot.node("_external_sources", label="External\ndata sources", shape="cylinder", **terminal_attrs) @@ -288,7 +355,6 @@ def edge_target(ref: str) -> str | None: src_rank.node("_external_sources") dot.edge("_external_sources", ENTRY_TARGET) - EXIT_SOURCE = "ui" if EXIT_SOURCE in active_set or EXIT_SOURCE in ghost_ids: dot.node("_user", label="User", shape="oval", peripheries="2", **terminal_attrs) @@ -297,8 +363,34 @@ def edge_target(ref: str) -> str | None: sink_rank.node("_user") dot.edge(EXIT_SOURCE, "_user") - # Legend — note: `rank` is not honored on cluster subgraphs in graphviz, - # so legend placement is left to the layout engine. + +def _owner_legend_html(colors: ColorAssigner) -> str: + """Build an HTML-table label listing every owner and its fill color. + + Two-column layout: a colored swatch on the left, the owner name on a + neutral background on the right. This keeps text contrast uniform + regardless of how dark the swatch is. + """ + rows = ['Owner'] + for owner, fill in colors.color_map.items(): + rows.append( + f'' + f' ' + f'{html.escape(owner)}' + f'' + ) + table = ( + '' + + "".join(rows) + + '
' + ) + # Python graphviz treats labels starting with '<' as HTML-like — the + # outer angle brackets are the marker, inner is the table. + return f"<{table}>" + + +def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: + """Build a legend covering owner colors, edge styles, and node styles.""" with dot.subgraph(name="cluster_legend") as leg: leg.attr( label="Legend", @@ -309,12 +401,100 @@ def edge_target(ref: str) -> str | None: fontsize="11", margin="12", ) - leg.node("_leg_a1", label="Producer", fillcolor="white", penwidth="1.0") - leg.node("_leg_b1", label="Consumer", fillcolor="white", penwidth="1.0") - leg.edge("_leg_a1", "_leg_b1", xlabel="Results") - leg.node("_leg_a2", label="Component", fillcolor="white", penwidth="1.0") - leg.node("_leg_b2", label="Service", fillcolor="white", penwidth="1.0") - leg.edge("_leg_a2", "_leg_b2", xlabel="API call", style="dotted") + + # Owner-color key as a compact HTML table. + leg.node( + "_leg_owners", + label=_owner_legend_html(colors), + shape="plain", + ) + + # Edge style examples — provider→consumer / API call / planned. + leg.node("_leg_p", label="Producer", fillcolor="white", penwidth="1.0") + leg.node("_leg_c", label="Consumer", fillcolor="white", penwidth="1.0") + leg.edge("_leg_p", "_leg_c", xlabel="Results") + + leg.node("_leg_a", label="Component", fillcolor="white", penwidth="1.0") + leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") + leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") + + leg.node("_leg_pp", label="Producer*", fillcolor="white", penwidth="1.0") + leg.node("_leg_cc", label="Consumer*", fillcolor="white", penwidth="1.0") + leg.edge( + "_leg_pp", "_leg_cc", + xlabel="Planned", + style="dashed", + color=PLANNED_EDGE_COLOR, + ) + + # Node style examples — bold = new, dashed = ghost, terminal shapes. + leg.node( + "_leg_new", + label="New in\nRefactor", + fillcolor="white", + penwidth="2.0", + ) + leg.node( + "_leg_ghost", + label="(excluded)", + fillcolor=GHOST_FILL_COLOR, + style="filled,rounded,dashed", + fontcolor=GHOST_FONT_COLOR, + color=GHOST_BORDER_COLOR, + ) + leg.node( + "_leg_entry", + label="Data\nsource", + shape="cylinder", + fillcolor=TERMINAL_FILL_COLOR, + penwidth="1.5", + ) + leg.node( + "_leg_exit", + label="User", + shape="oval", + peripheries="2", + fillcolor=TERMINAL_FILL_COLOR, + penwidth="1.5", + ) + + +def build_graph( + components: list[Component], + active_statuses: set[str] | None, + direction: str, + colors: ColorAssigner, +) -> graphviz.Digraph: + """Assemble the full graph from the parsed component list.""" + index = index_by_id(components) + active_set = _compute_active_set(components, active_statuses) + ghost_ids = _compute_ghost_ids(components, index, active_set) + + dot = graphviz.Digraph( + name="translator_components", + graph_attr={ + "rankdir": direction, + "fontname": "Helvetica", + "fontsize": "12", + "splines": "polyline", + "nodesep": "0.5", + "ranksep": "1.0", + "dpi": "150", + }, + node_attr={ + "fontname": "Helvetica", + "fontsize": "11", + "style": "filled,rounded", + "shape": "box", + }, + edge_attr={"fontname": "Helvetica", "fontsize": "9"}, + ) + + _add_active_nodes(dot, components, active_set, colors) + _add_ghost_nodes(dot, ghost_ids, index) + _add_edges(dot, components, index, active_set, ghost_ids) + _add_terminal_nodes(dot, active_set, ghost_ids) + _add_legend(dot, colors) return dot @@ -332,7 +512,7 @@ def edge_target(ref: str) -> str | None: is_flag=True, default=False, help="Download CSV from Google Sheet instead of reading a local file. " - "Reads GOOGLE_SHEET_ID from .env in the script directory.", + "Reads GOOGLE_SHEET_ID from .env (cwd, then the script directory).", ) @click.option( "--sheet-gid", "sheet_gid", @@ -394,12 +574,17 @@ def main( output_dir.mkdir(parents=True, exist_ok=True) if google_sheet: - env_path = Path(__file__).parent / ".env" - load_dotenv(env_path) + # Look for .env in cwd first (standard dotenv behavior, walks up the + # tree), then fall back to one next to the script for users who run + # the tool from a different directory. override=False keeps the cwd + # value winning when both files exist. + load_dotenv() + load_dotenv(Path(__file__).parent / ".env", override=False) sheet_id = os.environ.get("GOOGLE_SHEET_ID", "").strip() if not sheet_id: raise click.ClickException( - f"GOOGLE_SHEET_ID is not set. Fill it in at {env_path}" + "GOOGLE_SHEET_ID is not set. Add it to .env in the current " + f"directory or next to {Path(__file__).name}." ) url = ( f"https://docs.google.com/spreadsheets/d/{sheet_id}" @@ -449,8 +634,12 @@ def main( active_statuses = None click.echo("Including all components (no filter).") else: - active_statuses = {s.strip() for s in refactor_status.split(",") if s.strip()} - active_count = sum(1 for c in components if c["Refactor status"] in active_statuses) + active_statuses = { + s.strip() for s in refactor_status.split(",") if s.strip() + } + active_count = sum( + 1 for c in components if c.refactor_status in active_statuses + ) click.echo( f"Filtering to {active_count} components with status: " + ", ".join(sorted(active_statuses)) From d298253d29713f9feb02c7702f4b09f34c687f97 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 02:51:38 -0400 Subject: [PATCH 15/51] Add pytest tests for pure functions 31 tests covering parse_id_list (tilde handling, whitespace, empty), ColorAssigner (known + unknown owners, palette rotation, state isolation across instances), text_color_for (black/white pick by luminance), index_by_id (case insensitivity), validate (clean, unknown ref, duplicate ids case-insensitive, case-mismatch as warning not error), Component (display_name fallback, all_refs), and load_components (sort order, UTF-8 BOM tolerance, empty Owner becomes "None"). Pytest is added as a dev-group dependency (PEP 735), discoverable via `uv sync --group dev && uv run pytest`. Co-Authored-By: Claude Opus 4.7 --- translator-components-diagram/pyproject.toml | 8 + .../tests/__init__.py | 0 .../tests/test_generate_diagram.py | 235 ++++++++++++++++++ translator-components-diagram/uv.lock | 60 +++++ 4 files changed, 303 insertions(+) create mode 100644 translator-components-diagram/tests/__init__.py create mode 100644 translator-components-diagram/tests/test_generate_diagram.py diff --git a/translator-components-diagram/pyproject.toml b/translator-components-diagram/pyproject.toml index cb8a9bd..599a235 100644 --- a/translator-components-diagram/pyproject.toml +++ b/translator-components-diagram/pyproject.toml @@ -12,9 +12,17 @@ dependencies = [ [project.scripts] generate-diagram = "generate_diagram:main" +[dependency-groups] +dev = [ + "pytest>=8.0", +] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["."] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/translator-components-diagram/tests/__init__.py b/translator-components-diagram/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/translator-components-diagram/tests/test_generate_diagram.py b/translator-components-diagram/tests/test_generate_diagram.py new file mode 100644 index 0000000..e121089 --- /dev/null +++ b/translator-components-diagram/tests/test_generate_diagram.py @@ -0,0 +1,235 @@ +"""Tests for the pure functions in generate_diagram.""" + +import textwrap + +import pytest + +from generate_diagram import ( + Component, + ColorAssigner, + FALLBACK_COLORS, + OWNER_COLORS, + index_by_id, + load_components, + parse_id_list, + text_color_for, + validate, +) + + +# --- parse_id_list --------------------------------------------------------- + + +class TestParseIdList: + def test_empty(self): + assert parse_id_list("") == ([], []) + + def test_single_implemented(self): + assert parse_id_list("foo") == (["foo"], []) + + def test_single_planned(self): + assert parse_id_list("~foo") == ([], ["foo"]) + + def test_mixed(self): + assert parse_id_list("foo, ~bar, baz") == (["foo", "baz"], ["bar"]) + + def test_strips_whitespace(self): + assert parse_id_list(" foo , ~ bar ") == (["foo"], ["bar"]) + + def test_skips_empty_entries(self): + assert parse_id_list("foo,,bar,") == (["foo", "bar"], []) + + def test_tilde_followed_by_space(self): + assert parse_id_list("~ foo") == ([], ["foo"]) + + +# --- ColorAssigner --------------------------------------------------------- + + +class TestColorAssigner: + def test_known_owner_returns_base_color(self): + ca = ColorAssigner(OWNER_COLORS, FALLBACK_COLORS) + assert ca.get("NCATS") == OWNER_COLORS["NCATS"] + + def test_unknown_owner_gets_first_fallback(self): + ca = ColorAssigner({}, FALLBACK_COLORS) + assert ca.get("MysteryTeam") == FALLBACK_COLORS[0] + + def test_unknown_owners_rotate_through_fallback_palette(self): + ca = ColorAssigner({}, FALLBACK_COLORS) + assigned = [ca.get(f"team{i}") for i in range(len(FALLBACK_COLORS) + 2)] + # First N pick palette in order, then wrap around. + assert assigned[: len(FALLBACK_COLORS)] == FALLBACK_COLORS + assert assigned[len(FALLBACK_COLORS)] == FALLBACK_COLORS[0] + assert assigned[len(FALLBACK_COLORS) + 1] == FALLBACK_COLORS[1] + + def test_same_unknown_owner_keeps_same_color(self): + ca = ColorAssigner({}, FALLBACK_COLORS) + first = ca.get("teamA") + _ = ca.get("teamB") + assert ca.get("teamA") == first + + def test_state_does_not_leak_between_instances(self): + # Regression: a previous version used a module-level _color_index + # global, which leaked state across runs. + ca1 = ColorAssigner({}, FALLBACK_COLORS) + _ = ca1.get("teamA") + _ = ca1.get("teamB") + ca2 = ColorAssigner({}, FALLBACK_COLORS) + assert ca2.get("teamC") == FALLBACK_COLORS[0] + + +# --- text_color_for -------------------------------------------------------- + + +class TestTextColorFor: + def test_pure_white_picks_black(self): + assert text_color_for("#FFFFFF") == "black" + + def test_pure_black_picks_white(self): + assert text_color_for("#000000") == "white" + + def test_light_yellow_picks_black(self): + # D4E157 (lime) is very light + assert text_color_for("#D4E157") == "black" + + def test_dark_brown_picks_white(self): + # 8D6E63 (brown) is moderately dark + assert text_color_for("#8D6E63") == "white" + + def test_accepts_hex_without_hash(self): + assert text_color_for("FFFFFF") == "black" + + +# --- index_by_id ----------------------------------------------------------- + + +def _comp(id_: str, **kwargs) -> Component: + """Build a Component with sensible defaults for the optional fields.""" + return Component( + id=id_, + name=kwargs.get("name", id_), + owner=kwargs.get("owner", "None"), + itrb=kwargs.get("itrb", ""), + refactor_status=kwargs.get("refactor_status", "Continues into Refactor"), + notes=kwargs.get("notes", ""), + depends_on=kwargs.get("depends_on", []), + depends_on_planned=kwargs.get("depends_on_planned", []), + uses=kwargs.get("uses", []), + uses_planned=kwargs.get("uses_planned", []), + ) + + +class TestIndexById: + def test_lookup_is_case_insensitive(self): + index = index_by_id([_comp("Foo"), _comp("bar")]) + assert index["foo"].id == "Foo" + assert index["bar"].id == "bar" + + def test_missing_returns_none(self): + index = index_by_id([_comp("foo")]) + assert index.get("nope") is None + + +# --- validate -------------------------------------------------------------- + + +class TestValidate: + def test_clean_input_returns_true(self): + components = [ + _comp("a", depends_on=["b"]), + _comp("b"), + ] + assert validate(components) is True + + def test_unknown_ref_is_hard_error(self, capsys): + components = [_comp("a", depends_on=["ghost"])] + assert validate(components) is False + assert "unknown id 'ghost'" in capsys.readouterr().err + + def test_unknown_planned_ref_is_hard_error(self): + components = [_comp("a", depends_on_planned=["ghost"])] + assert validate(components) is False + + def test_duplicate_id_is_hard_error(self, capsys): + components = [_comp("foo"), _comp("foo")] + assert validate(components) is False + assert "duplicate id" in capsys.readouterr().err + + def test_case_insensitive_duplicate_is_hard_error(self): + components = [_comp("Foo"), _comp("foo")] + assert validate(components) is False + + def test_case_mismatch_is_warning_not_error(self, capsys): + # case-mismatch is informational only — build_graph resolves + # case-insensitively. The return must stay True. + components = [ + _comp("foo"), + _comp("a", depends_on=["FOO"]), + ] + assert validate(components) is True + assert "case mismatch" in capsys.readouterr().err + + +# --- Component ------------------------------------------------------------- + + +class TestComponent: + def test_display_name_falls_back_to_id_when_name_empty(self): + c = _comp("foo", name="") + assert c.display_name == "foo" + + def test_display_name_uses_name_when_present(self): + c = _comp("foo", name="Foo Service") + assert c.display_name == "Foo Service" + + def test_all_refs_concatenates_all_four_lists(self): + c = _comp( + "x", + depends_on=["a"], + depends_on_planned=["b"], + uses=["c"], + uses_planned=["d"], + ) + assert c.all_refs() == ["a", "b", "c", "d"] + + +# --- load_components ------------------------------------------------------- + + +CSV_FIXTURE = textwrap.dedent("""\ + id,Name,Owner,Component in ITRB,Refactor status,Gets results from,Calls,Notes + bbb,Beta,DOGSLED,cat,Continues into Refactor,aaa,~ccc, + aaa,Alpha,NCATS,cat,New in Refactor,,,first note +""") + + +class TestLoadComponents: + def test_parses_csv_and_sorts_by_id(self, tmp_path): + csv_path = tmp_path / "components.csv" + csv_path.write_text(CSV_FIXTURE, encoding="utf-8") + components = load_components(csv_path) + assert [c.id for c in components] == ["aaa", "bbb"] + assert components[0].name == "Alpha" + assert components[0].refactor_status == "New in Refactor" + assert components[1].depends_on == ["aaa"] + assert components[1].uses_planned == ["ccc"] + + def test_tolerates_utf8_bom(self, tmp_path): + # An Excel resave can prepend a UTF-8 BOM. With plain utf-8 the + # first header would become "id" and KeyError on c.id. + csv_path = tmp_path / "components.csv" + csv_path.write_bytes("".encode("utf-8") + CSV_FIXTURE.encode("utf-8")) + components = load_components(csv_path) + assert components[0].id == "aaa" + + def test_empty_owner_becomes_none(self, tmp_path): + csv_path = tmp_path / "components.csv" + csv_path.write_text( + "id,Name,Owner,Component in ITRB,Refactor status," + "Gets results from,Calls,Notes\n" + "x,Ex,,,New in Refactor,,,\n", + encoding="utf-8", + ) + components = load_components(csv_path) + assert components[0].owner == "None" diff --git a/translator-components-diagram/uv.lock b/translator-components-diagram/uv.lock index 16a7cdd..f904599 100644 --- a/translator-components-diagram/uv.lock +++ b/translator-components-diagram/uv.lock @@ -32,6 +32,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "packaging" +version = "26.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.2" @@ -51,9 +103,17 @@ dependencies = [ { name = "python-dotenv" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.0" }, { name = "graphviz", specifier = ">=0.20" }, { name = "python-dotenv", specifier = ">=1.0" }, ] + +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=8.0" }] From c877ea7033120bbf310d55384b9eadb07ea38825 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 03:06:03 -0400 Subject: [PATCH 16/51] Move owner-to-color mapping into owner-colors.csv The OWNER_COLORS dict in generate_diagram.py is replaced by a sibling owner-colors.csv (two columns: owner, color) loaded at runtime via load_owner_colors(). Row order in the CSV doubles as legend order, so reordering rows reorders the legend without any Python edit. Add tests for the loader (parse, row-order preservation, whitespace trim, missing-file and missing-column ClickExceptions, and a smoke-test that the shipped CSV always loads). README points maintainers at the CSV for colour changes. Co-Authored-By: Claude Opus 4.7 --- translator-components-diagram/README.md | 20 +++---- .../generate_diagram.py | 41 +++++++------ .../owner-colors.csv | 11 ++++ .../tests/test_generate_diagram.py | 58 ++++++++++++++++++- 4 files changed, 97 insertions(+), 33 deletions(-) create mode 100644 translator-components-diagram/owner-colors.csv diff --git a/translator-components-diagram/README.md b/translator-components-diagram/README.md index 1abd72b..376edce 100644 --- a/translator-components-diagram/README.md +++ b/translator-components-diagram/README.md @@ -96,19 +96,11 @@ All outputs go to `data/` (gitignored) by default. ### Node colours (by Owner) -| Owner | Colour | Rationale | -|---|---|---| -| NCATS | Red | Main customer | -| UI | Pink | Main customer | -| DOGSLED | Blue | Main team | -| DOGSURF | Green | Main team | -| CATRAX | Amber | Main team | -| Core Components WG | Purple | Specialized cross-team group | -| DINGO | Cyan | Specialized cross-team group | -| Shepherd | Lime | Specialized cross-team group | -| Retriever | Brown | Specialized cross-team group | - -New owners not listed above receive fallback colours automatically. +Owner-to-colour mappings live in [`owner-colors.csv`](owner-colors.csv) +(two columns: `owner`, `color`). Edit that file to add a new owner, +re-order the legend, or change a colour — no Python edit required. + +New owners not listed there receive fallback colours automatically. ### Node border weight @@ -166,6 +158,8 @@ uv run generate_diagram.py [OPTIONS] ``` translator-components-diagram/ ├── generate_diagram.py # The tool +├── owner-colors.csv # Owner → fill colour mapping (edit me) +├── tests/ # pytest suite for the pure functions ├── pyproject.toml # uv/hatchling project metadata and dependencies ├── uv.lock # Pinned dependency versions ├── .env # GOOGLE_SHEET_ID — gitignored, fill in locally diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index b3d1742..3f42014 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -16,22 +16,11 @@ # Refactor status values that indicate active components DEFAULT_STATUSES = ["Continues into Refactor", "New in Refactor"] -# Owner → fill color mapping. Insertion order doubles as legend order. -OWNER_COLORS: dict[str, str] = { - # Main customers: bright and prominent - "NCATS": "#EF5350", # vivid red - "UI": "#EC407A", # vivid pink - # Three main teams: distinct solid colors - "DOGSLED": "#42A5F5", # blue - "DOGSURF": "#66BB6A", # green - "CATRAX": "#FFA726", # amber - # Specialized cross-team groups: distinct from the teams above - "Core Components WG": "#AB47BC", # purple - "DINGO": "#26C6DA", # cyan - "Shepherd": "#D4E157", # lime - "Retriever": "#8D6E63", # brown - "None": "#E8E8E8", -} +# Owner → fill color mapping lives in owner-colors.csv (alongside the script) +# so non-Python edits can change it without touching code. Row order in the +# CSV doubles as legend order in the diagram. +DEFAULT_OWNER_COLORS_PATH = Path(__file__).parent / "owner-colors.csv" + FALLBACK_COLORS = [ "#B0BEC5", "#BCAAA4", "#CE93D8", "#80CBC4", "#EF9A9A", "#FFCC80", "#C5E1A5", "#80DEEA", @@ -123,6 +112,24 @@ def parse_id_list(field_value: str) -> tuple[list[str], list[str]]: return implemented, planned +def load_owner_colors(path: Path = DEFAULT_OWNER_COLORS_PATH) -> dict[str, str]: + """Load the owner→color mapping from a CSV with columns owner,color. + + Order is preserved from the file; that order also determines legend order. + """ + if not path.exists(): + raise click.ClickException(f"Owner-colors file not found: {path}") + with path.open(newline="", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + missing_cols = {"owner", "color"} - set(reader.fieldnames or []) + if missing_cols: + raise click.ClickException( + f"{path} is missing required columns: " + + ", ".join(sorted(missing_cols)) + ) + return {row["owner"].strip(): row["color"].strip() for row in reader} + + def load_components(csv_path: Path) -> list[Component]: """Parse the CSV into a sorted list of Components. @@ -645,7 +652,7 @@ def main( + ", ".join(sorted(active_statuses)) ) - colors = ColorAssigner(OWNER_COLORS, FALLBACK_COLORS) + colors = ColorAssigner(load_owner_colors(), FALLBACK_COLORS) dot = build_graph(components, active_statuses, direction, colors) # Save .dot source diff --git a/translator-components-diagram/owner-colors.csv b/translator-components-diagram/owner-colors.csv new file mode 100644 index 0000000..ab45f7e --- /dev/null +++ b/translator-components-diagram/owner-colors.csv @@ -0,0 +1,11 @@ +owner,color +NCATS,#EF5350 +UI,#EC407A +DOGSLED,#42A5F5 +DOGSURF,#66BB6A +CATRAX,#FFA726 +Core Components WG,#AB47BC +DINGO,#26C6DA +Shepherd,#D4E157 +Retriever,#8D6E63 +None,#E8E8E8 diff --git a/translator-components-diagram/tests/test_generate_diagram.py b/translator-components-diagram/tests/test_generate_diagram.py index e121089..93ba0c9 100644 --- a/translator-components-diagram/tests/test_generate_diagram.py +++ b/translator-components-diagram/tests/test_generate_diagram.py @@ -2,15 +2,16 @@ import textwrap +import click import pytest from generate_diagram import ( Component, ColorAssigner, FALLBACK_COLORS, - OWNER_COLORS, index_by_id, load_components, + load_owner_colors, parse_id_list, text_color_for, validate, @@ -48,8 +49,10 @@ def test_tilde_followed_by_space(self): class TestColorAssigner: def test_known_owner_returns_base_color(self): - ca = ColorAssigner(OWNER_COLORS, FALLBACK_COLORS) - assert ca.get("NCATS") == OWNER_COLORS["NCATS"] + base = {"FooTeam": "#ABCDEF", "BarTeam": "#012345"} + ca = ColorAssigner(base, FALLBACK_COLORS) + assert ca.get("FooTeam") == "#ABCDEF" + assert ca.get("BarTeam") == "#012345" def test_unknown_owner_gets_first_fallback(self): ca = ColorAssigner({}, FALLBACK_COLORS) @@ -233,3 +236,52 @@ def test_empty_owner_becomes_none(self, tmp_path): ) components = load_components(csv_path) assert components[0].owner == "None" + + +# --- load_owner_colors ----------------------------------------------------- + + +class TestLoadOwnerColors: + def test_parses_owner_color_csv(self, tmp_path): + path = tmp_path / "owner-colors.csv" + path.write_text( + "owner,color\nNCATS,#EF5350\nUI,#EC407A\n", + encoding="utf-8", + ) + assert load_owner_colors(path) == { + "NCATS": "#EF5350", + "UI": "#EC407A", + } + + def test_preserves_row_order(self, tmp_path): + path = tmp_path / "owner-colors.csv" + path.write_text( + "owner,color\nzeta,#111\nalpha,#222\nbeta,#333\n", + encoding="utf-8", + ) + # Order matters for legend layout — must match CSV order, not sorted. + assert list(load_owner_colors(path)) == ["zeta", "alpha", "beta"] + + def test_strips_whitespace(self, tmp_path): + path = tmp_path / "owner-colors.csv" + path.write_text( + "owner,color\n NCATS , #EF5350 \n", + encoding="utf-8", + ) + assert load_owner_colors(path) == {"NCATS": "#EF5350"} + + def test_missing_file_raises_clickexception(self, tmp_path): + with pytest.raises(click.ClickException, match="not found"): + load_owner_colors(tmp_path / "missing.csv") + + def test_missing_column_raises_clickexception(self, tmp_path): + path = tmp_path / "owner-colors.csv" + path.write_text("owner,hue\nNCATS,red\n", encoding="utf-8") + with pytest.raises(click.ClickException, match="missing required columns"): + load_owner_colors(path) + + def test_default_file_loads(self): + # The repo-shipped owner-colors.csv must always be loadable. + result = load_owner_colors() + assert "NCATS" in result + assert result["NCATS"].startswith("#") From 30247425ec400f0002912673cca7c8130245b70c Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 09:38:39 -0400 Subject: [PATCH 17/51] Render ubiquitous components as per-caller clones; tighten layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-cutting infrastructure (Jaeger, etc.) that nearly every component calls produces long converging edges that obscure the actual data-flow structure. A new Ubiquitous boolean column in the sheet (TRUE/yes/1) flags such components to render as a small per-caller copy next to each caller rather than as a single central node. - Component.ubiquitous parsed from the new column with a tolerant _parse_bool helper (TRUE/yes/y/1, case-insensitive). - _compute_ghost_ids and _add_active_nodes skip ubiquitous components so no central or ghost copy is rendered. - _add_edges emits a per-caller clone node (idempotent) with a synthetic id "{caller}__{target}" the first time a caller references a ubiquitous target, then routes the edge to it. The clone reuses the full styling (fill colour, font colour, border weight) so it reads as the same component. - _emit_component_node factored out from _add_active_nodes for reuse by the clone path. - write_json includes Ubiquitous so the JSON export stays a complete round-trip of the CSV columns. - Legend gains an "Ubiquitous (cloned per caller)" entry. Cheap layout knobs applied alongside (concentrate=true to merge parallel edges, splines=true for free routing, ranksep 1.0→0.5, nodesep 0.5→0.3) — together with the Jaeger duplication this packs the diagram into roughly a third of its previous footprint. 16 new tests cover _parse_bool variants, the Ubiquitous column being parsed when present, gracefully defaulting to False when the column is missing (back-compat for older sheets), and the dataclass default. Co-Authored-By: Claude Opus 4.7 --- translator-components-diagram/README.md | 11 ++ .../generate_diagram.py | 106 +++++++++++++----- .../tests/test_generate_diagram.py | 52 +++++++++ 3 files changed, 142 insertions(+), 27 deletions(-) diff --git a/translator-components-diagram/README.md b/translator-components-diagram/README.md index 376edce..dceb43b 100644 --- a/translator-components-diagram/README.md +++ b/translator-components-diagram/README.md @@ -63,6 +63,7 @@ The sheet must have these columns (order does not matter): | `Refactor status` | Lifecycle status — see filtering below | | `Gets results from` | Comma-separated IDs this component receives data from | | `Calls` | Comma-separated IDs this component makes optional API calls to | +| `Ubiquitous` | `TRUE` to render this component as a per-caller clone (see below) | | `Notes` | Free-text notes (not used by the tool) | #### Planned (not-yet-implemented) relationships @@ -77,6 +78,16 @@ Calls: ars, ~future-api Planned edges render in gray; implemented edges render in black. +#### Ubiquitous components + +Cross-cutting infrastructure that nearly every component depends on +(telemetry, name resolution, logging…) creates long converging edges in +the diagram that obscure the real data-flow structure. Marking such a +component `TRUE` in the `Ubiquitous` column renders it as a small copy +next to each caller instead of as a single central node — the underlying +data stays normalised, only the visual layout duplicates. Jaeger (OTel) +is the canonical example. + ## Output files All outputs go to `data/` (gitignored) by default. diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 3f42014..e69bcc2 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -75,6 +75,7 @@ class Component: itrb: str refactor_status: str notes: str + ubiquitous: bool = False depends_on: list[str] = field(default_factory=list) depends_on_planned: list[str] = field(default_factory=list) uses: list[str] = field(default_factory=list) @@ -95,6 +96,11 @@ def all_refs(self) -> list[str]: ) +def _parse_bool(value: str) -> bool: + """Parse a CSV boolean cell — accepts TRUE/yes/1 (case-insensitive).""" + return value.strip().lower() in ("true", "yes", "y", "1") + + def parse_id_list(field_value: str) -> tuple[list[str], list[str]]: """Split a comma-separated field into (implemented_ids, planned_ids). @@ -153,6 +159,7 @@ def load_components(csv_path: Path) -> list[Component]: itrb=row.get("Component in ITRB", "").strip(), refactor_status=row.get("Refactor status", "").strip(), notes=row.get("Notes", "").strip(), + ubiquitous=_parse_bool(row.get("Ubiquitous", "")), depends_on=depends_on, depends_on_planned=depends_on_planned, uses=uses, @@ -222,6 +229,7 @@ def write_json(components: list[Component], out_path: Path) -> None: "Component in ITRB": c.itrb, "Refactor status": c.refactor_status, "Notes": c.notes, + "Ubiquitous": c.ubiquitous, "depends_on": c.depends_on, "depends_on_planned": c.depends_on_planned, "uses": c.uses, @@ -253,15 +261,42 @@ def _compute_ghost_ids( ) -> set[str]: ghost: set[str] = set() for comp in components: - if comp.id not in active_set: + if comp.id not in active_set or comp.ubiquitous: continue for ref in comp.all_refs(): match = index.get(ref.lower()) - if match is not None and match.id not in active_set: + if match is None or match.ubiquitous: + # Ubiquitous targets render as per-caller clones, never as ghosts. + continue + if match.id not in active_set: ghost.add(match.id) return ghost +def _emit_component_node( + dot: graphviz.Digraph, + comp: Component, + node_id: str, + colors: ColorAssigner, +) -> None: + """Render a Component as a graphviz node at the given id. + + Used both for primary node placement and for per-caller ubiquitous clones + (which use a synthetic id like "{caller}__{target}"). + """ + fill = colors.get(comp.owner) + is_new = comp.refactor_status == "New in Refactor" + # Owner is encoded by node color and shown in the legend, not in the label. + label = f"{comp.display_name}\n{comp.id}" + dot.node( + node_id, + label=label, + fillcolor=fill, + fontcolor=text_color_for(fill), + penwidth="2.0" if is_new else "1.0", + ) + + def _add_active_nodes( dot: graphviz.Digraph, components: list[Component], @@ -269,19 +304,11 @@ def _add_active_nodes( colors: ColorAssigner, ) -> None: for comp in components: - if comp.id not in active_set: + if comp.id not in active_set or comp.ubiquitous: + # Ubiquitous components don't get a central node — they're emitted + # per-caller from _add_edges. continue - fill = colors.get(comp.owner) - is_new = comp.refactor_status == "New in Refactor" - # Owner is encoded by node color and shown in the legend, not in the label. - label = f"{comp.display_name}\n{comp.id}" - dot.node( - comp.id, - label=label, - fillcolor=fill, - fontcolor=text_color_for(fill), - penwidth="2.0" if is_new else "1.0", - ) + _emit_component_node(dot, comp, comp.id, colors) def _add_ghost_nodes( @@ -309,33 +336,47 @@ def _add_edges( index: dict[str, Component], active_set: set[str], ghost_ids: set[str], + colors: ColorAssigner, ) -> None: - def edge_target(ref: str) -> str | None: + emitted_clones: set[str] = set() + + def edge_target(caller_id: str, ref: str) -> str | None: + """Return the graphviz node id to draw an edge to, or None to skip. + + For ubiquitous targets, emit (idempotently) a per-caller clone node and + return its synthetic id. The clone uses the same visual style as the + original so callers can recognise it. + """ match = index.get(ref.lower()) if match is None: return None - target = match.id - if target in active_set or target in ghost_ids: - return target + if match.ubiquitous: + clone_id = f"{caller_id}__{match.id}" + if clone_id not in emitted_clones: + _emit_component_node(dot, match, clone_id, colors) + emitted_clones.add(clone_id) + return clone_id + if match.id in active_set or match.id in ghost_ids: + return match.id return None for comp in components: - if comp.id not in active_set: + if comp.id not in active_set or comp.ubiquitous: continue for ref in comp.depends_on: - t = edge_target(ref) + t = edge_target(comp.id, ref) if t is not None: dot.edge(t, comp.id) # B → A: B provides results to A for ref in comp.depends_on_planned: - t = edge_target(ref) + t = edge_target(comp.id, ref) if t is not None: dot.edge(t, comp.id, style="dashed", color=PLANNED_EDGE_COLOR) for ref in comp.uses: - t = edge_target(ref) + t = edge_target(comp.id, ref) if t is not None: dot.edge(comp.id, t, style="dotted") # A ··→ B: API call for ref in comp.uses_planned: - t = edge_target(ref) + t = edge_target(comp.id, ref) if t is not None: dot.edge(comp.id, t, style="dotted", color=PLANNED_EDGE_COLOR) @@ -449,6 +490,12 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: fontcolor=GHOST_FONT_COLOR, color=GHOST_BORDER_COLOR, ) + leg.node( + "_leg_ubiq", + label="Ubiquitous\n(cloned per caller)", + fillcolor="white", + penwidth="1.0", + ) leg.node( "_leg_entry", label="Data\nsource", @@ -483,9 +530,14 @@ def build_graph( "rankdir": direction, "fontname": "Helvetica", "fontsize": "12", - "splines": "polyline", - "nodesep": "0.5", - "ranksep": "1.0", + # splines=true gives graphviz freedom to route edges as smooth + # curves around nodes; combined with concentrate=true (merges + # parallel edges going to the same place) this packs the layout + # tighter at the cost of wigglier lines. + "splines": "true", + "concentrate": "true", + "nodesep": "0.3", + "ranksep": "0.5", "dpi": "150", }, node_attr={ @@ -499,7 +551,7 @@ def build_graph( _add_active_nodes(dot, components, active_set, colors) _add_ghost_nodes(dot, ghost_ids, index) - _add_edges(dot, components, index, active_set, ghost_ids) + _add_edges(dot, components, index, active_set, ghost_ids, colors) _add_terminal_nodes(dot, active_set, ghost_ids) _add_legend(dot, colors) diff --git a/translator-components-diagram/tests/test_generate_diagram.py b/translator-components-diagram/tests/test_generate_diagram.py index 93ba0c9..19a076d 100644 --- a/translator-components-diagram/tests/test_generate_diagram.py +++ b/translator-components-diagram/tests/test_generate_diagram.py @@ -9,6 +9,7 @@ Component, ColorAssigner, FALLBACK_COLORS, + _parse_bool, index_by_id, load_components, load_owner_colors, @@ -116,6 +117,7 @@ def _comp(id_: str, **kwargs) -> Component: itrb=kwargs.get("itrb", ""), refactor_status=kwargs.get("refactor_status", "Continues into Refactor"), notes=kwargs.get("notes", ""), + ubiquitous=kwargs.get("ubiquitous", False), depends_on=kwargs.get("depends_on", []), depends_on_planned=kwargs.get("depends_on_planned", []), uses=kwargs.get("uses", []), @@ -285,3 +287,53 @@ def test_default_file_loads(self): result = load_owner_colors() assert "NCATS" in result assert result["NCATS"].startswith("#") + + +# --- _parse_bool ----------------------------------------------------------- + + +class TestParseBool: + @pytest.mark.parametrize("value", ["TRUE", "true", "True", "yes", "Y", "1"]) + def test_truthy_values(self, value): + assert _parse_bool(value) is True + + @pytest.mark.parametrize("value", ["FALSE", "false", "no", "", "0", " "]) + def test_falsy_values(self, value): + assert _parse_bool(value) is False + + def test_strips_whitespace(self): + assert _parse_bool(" TRUE ") is True + + +# --- Ubiquitous column in load_components ---------------------------------- + + +class TestUbiquitousColumn: + def test_ubiquitous_column_parsed(self, tmp_path): + csv_path = tmp_path / "components.csv" + csv_path.write_text( + "id,Name,Owner,Component in ITRB,Refactor status," + "Gets results from,Calls,Ubiquitous,Notes\n" + "jaeger,Jaeger,DOGSLED,obs,Continues into Refactor,,,TRUE,\n" + "ars,ARS,NCATS,svc,New in Refactor,,,,\n", + encoding="utf-8", + ) + components = load_components(csv_path) + by_id = {c.id: c for c in components} + assert by_id["jaeger"].ubiquitous is True + assert by_id["ars"].ubiquitous is False + + def test_missing_ubiquitous_column_defaults_false(self, tmp_path): + # Older sheets without the column should still parse cleanly. + csv_path = tmp_path / "components.csv" + csv_path.write_text( + "id,Name,Owner,Component in ITRB,Refactor status," + "Gets results from,Calls,Notes\n" + "foo,Foo,NCATS,svc,New in Refactor,,,\n", + encoding="utf-8", + ) + components = load_components(csv_path) + assert components[0].ubiquitous is False + + def test_dataclass_default_is_false(self): + assert _comp("foo").ubiquitous is False From 6df5ef78934498a9b7f057030beb1d5765d90548 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 10:57:47 -0400 Subject: [PATCH 18/51] Improved name. --- translator-components-diagram/{.env.default => env.default} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename translator-components-diagram/{.env.default => env.default} (100%) diff --git a/translator-components-diagram/.env.default b/translator-components-diagram/env.default similarity index 100% rename from translator-components-diagram/.env.default rename to translator-components-diagram/env.default From 7c3ea2f419645aafd4bc8a77d8162283ab674cee Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 11:04:30 -0400 Subject: [PATCH 19/51] Trim legend to owner colors and edge styles only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove Data source, User, Planned, New in Refactor, Excluded, and Ubiquitous entries — they're either self-evident or no longer needed. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 45 ------------------- 1 file changed, 45 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index e69bcc2..e104554 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -466,51 +466,6 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - leg.node("_leg_pp", label="Producer*", fillcolor="white", penwidth="1.0") - leg.node("_leg_cc", label="Consumer*", fillcolor="white", penwidth="1.0") - leg.edge( - "_leg_pp", "_leg_cc", - xlabel="Planned", - style="dashed", - color=PLANNED_EDGE_COLOR, - ) - - # Node style examples — bold = new, dashed = ghost, terminal shapes. - leg.node( - "_leg_new", - label="New in\nRefactor", - fillcolor="white", - penwidth="2.0", - ) - leg.node( - "_leg_ghost", - label="(excluded)", - fillcolor=GHOST_FILL_COLOR, - style="filled,rounded,dashed", - fontcolor=GHOST_FONT_COLOR, - color=GHOST_BORDER_COLOR, - ) - leg.node( - "_leg_ubiq", - label="Ubiquitous\n(cloned per caller)", - fillcolor="white", - penwidth="1.0", - ) - leg.node( - "_leg_entry", - label="Data\nsource", - shape="cylinder", - fillcolor=TERMINAL_FILL_COLOR, - penwidth="1.5", - ) - leg.node( - "_leg_exit", - label="User", - shape="oval", - peripheries="2", - fillcolor=TERMINAL_FILL_COLOR, - penwidth="1.5", - ) def build_graph( From 287efd0d757c3b8c6b96b7368c443ea9756a7a8b Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 12:57:10 -0400 Subject: [PATCH 20/51] Filter owner legend to only show owners present in the rendered diagram Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index e104554..5654069 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -46,6 +46,7 @@ def __init__(self, base_colors: dict[str, str], fallback_colors: list[str]): self.color_map: dict[str, str] = dict(base_colors) self.fallback_colors = fallback_colors self.next_fallback = 0 + self._used: set[str] = set() def get(self, owner: str) -> str: if owner not in self.color_map: @@ -53,8 +54,14 @@ def get(self, owner: str) -> str: self.next_fallback % len(self.fallback_colors) ] self.next_fallback += 1 + self._used.add(owner) return self.color_map[owner] + @property + def used_colors(self) -> dict[str, str]: + """Color map restricted to owners actually rendered, in original order.""" + return {k: v for k, v in self.color_map.items() if k in self._used} + def text_color_for(fill_hex: str) -> str: """Return "black" or "white" for adequate contrast against a hex fill.""" @@ -420,7 +427,7 @@ def _owner_legend_html(colors: ColorAssigner) -> str: regardless of how dark the swatch is. """ rows = ['Owner'] - for owner, fill in colors.color_map.items(): + for owner, fill in colors.used_colors.items(): rows.append( f'' f' ' From 4d1fddba4f2ead45440f2d0bf475f2f01d673fd7 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 26 May 2026 18:25:15 -0400 Subject: [PATCH 21/51] Updated documentation. --- translator-components-diagram/README.md | 10 ++--- .../generate_diagram.py | 45 +++++++++++++------ 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/translator-components-diagram/README.md b/translator-components-diagram/README.md index dceb43b..466379a 100644 --- a/translator-components-diagram/README.md +++ b/translator-components-diagram/README.md @@ -23,16 +23,16 @@ cd translator-components-diagram uv sync # first-time setup; creates .venv/ # Download latest data from the Google Sheet and regenerate -uv run generate_diagram.py --google-sheet +uv run generate-diagram --google-sheet # Use a locally cached CSV instead -uv run generate_diagram.py +uv run generate-diagram # Include all components, not just the refactor-active ones -uv run generate_diagram.py --all +uv run generate-diagram --all # Also produce a PDF (useful for presentations) -uv run generate_diagram.py --google-sheet --format pdf +uv run generate-diagram --google-sheet --format pdf ``` ## Input data @@ -147,7 +147,7 @@ without cluttering the main diagram. ## All CLI options ``` -uv run generate_diagram.py [OPTIONS] +uv run generate-diagram [OPTIONS] --input PATH Local CSV file [default: data/components.csv] --google-sheet Download CSV from Google Sheet (reads GOOGLE_SHEET_ID diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 5654069..f17ffa4 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -426,7 +426,7 @@ def _owner_legend_html(colors: ColorAssigner) -> str: neutral background on the right. This keeps text contrast uniform regardless of how dark the swatch is. """ - rows = ['Owner'] + rows = [] for owner, fill in colors.used_colors.items(): rows.append( f'' @@ -446,25 +446,28 @@ def _owner_legend_html(colors: ColorAssigner) -> str: def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: """Build a legend covering owner colors, edge styles, and node styles.""" - with dot.subgraph(name="cluster_legend") as leg: - leg.attr( - label="Legend", - style="filled,rounded", - fillcolor="#FAFAFA", - color="#AAAAAA", - fontname="Helvetica", - fontsize="11", - margin="12", - ) + _cluster_attrs = dict( + style="filled,rounded", + fillcolor="#FAFAFA", + color="#AAAAAA", + fontname="Helvetica", + fontsize="11", + margin="12", + ) - # Owner-color key as a compact HTML table. - leg.node( + # Owner-color key in its own cluster so it doesn't crowd the edge examples. + with dot.subgraph(name="cluster_legend_owners") as own: + own.attr(label="Owner", **_cluster_attrs) + own.node( "_leg_owners", label=_owner_legend_html(colors), shape="plain", ) - # Edge style examples — provider→consumer / API call / planned. + # Edge style examples — provider→consumer / API call. + with dot.subgraph(name="cluster_legend") as leg: + leg.attr(label="Legend", **_cluster_attrs) + leg.node("_leg_p", label="Producer", fillcolor="white", penwidth="1.0") leg.node("_leg_c", label="Consumer", fillcolor="white", penwidth="1.0") leg.edge("_leg_p", "_leg_c", xlabel="Results") @@ -473,6 +476,17 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") + # Place both legend clusters at the same rank (side by side, not diagonal). + # newrank=true (set on the graph) lets rank=same work across cluster + # boundaries without displacing nodes from their clusters. + # constraint=false on the ordering edge keeps it from creating a + # rank dependency (which would cause diagonal placement). + with dot.subgraph() as s: + s.attr(rank="same") + s.node("_leg_owners") + s.node("_leg_p") + dot.edge("_leg_owners", "_leg_p", style="invis", constraint="false") + def build_graph( @@ -500,6 +514,9 @@ def build_graph( "concentrate": "true", "nodesep": "0.3", "ranksep": "0.5", + # Required for rank=same to work correctly across cluster + # boundaries (e.g. keeping both legend clusters level). + "newrank": "true", "dpi": "150", }, node_attr={ From ec04a47dd533e94d7662589bfd6b5806060dcb9f Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:14:23 -0400 Subject: [PATCH 22/51] Support "Part of" column to group components into labeled layer clusters Nodes sharing the same "Part of" value are wrapped in a dotted-border cluster subgraph with the group name as its label. Ubiquitous components are excluded from grouping (they render as per-caller clones). The field is also exported to components.json. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 71 ++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index f17ffa4..566697f 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -83,6 +83,7 @@ class Component: refactor_status: str notes: str ubiquitous: bool = False + part_of: str = "" depends_on: list[str] = field(default_factory=list) depends_on_planned: list[str] = field(default_factory=list) uses: list[str] = field(default_factory=list) @@ -167,6 +168,7 @@ def load_components(csv_path: Path) -> list[Component]: refactor_status=row.get("Refactor status", "").strip(), notes=row.get("Notes", "").strip(), ubiquitous=_parse_bool(row.get("Ubiquitous", "")), + part_of=row.get("Part of", "").strip(), depends_on=depends_on, depends_on_planned=depends_on_planned, uses=uses, @@ -237,6 +239,7 @@ def write_json(components: list[Component], out_path: Path) -> None: "Refactor status": c.refactor_status, "Notes": c.notes, "Ubiquitous": c.ubiquitous, + "Part of": c.part_of, "depends_on": c.depends_on, "depends_on_planned": c.depends_on_planned, "uses": c.uses, @@ -304,17 +307,35 @@ def _emit_component_node( ) +def _compute_groups( + components: list[Component], + active_set: set[str], + ghost_ids: set[str], +) -> dict[str, list[str]]: + """Map Part-of label → node ids for active (non-ubiquitous) and ghost nodes.""" + groups: dict[str, list[str]] = {} + for comp in components: + if not comp.part_of or comp.ubiquitous: + continue + if comp.id in active_set or comp.id in ghost_ids: + groups.setdefault(comp.part_of, []).append(comp.id) + return groups + + def _add_active_nodes( dot: graphviz.Digraph, components: list[Component], active_set: set[str], colors: ColorAssigner, + skip_ids: set[str] | None = None, ) -> None: for comp in components: if comp.id not in active_set or comp.ubiquitous: # Ubiquitous components don't get a central node — they're emitted # per-caller from _add_edges. continue + if skip_ids and comp.id in skip_ids: + continue _emit_component_node(dot, comp, comp.id, colors) @@ -322,8 +343,11 @@ def _add_ghost_nodes( dot: graphviz.Digraph, ghost_ids: set[str], index: dict[str, Component], + skip_ids: set[str] | None = None, ) -> None: for ghost_id in sorted(ghost_ids): + if skip_ids and ghost_id in skip_ids: + continue comp = index.get(ghost_id.lower()) name = comp.display_name if comp else ghost_id label = f"{name}\n{ghost_id}\n(excluded)" @@ -337,6 +361,45 @@ def _add_ghost_nodes( ) +def _add_group_clusters( + dot: graphviz.Digraph, + groups: dict[str, list[str]], + components: list[Component], + active_set: set[str], + ghost_ids: set[str], + index: dict[str, Component], + colors: ColorAssigner, +) -> None: + """Wrap each Part-of group in a labeled dotted-border cluster subgraph.""" + for group_label, node_ids in sorted(groups.items()): + safe = group_label.lower().replace(" ", "_").replace("/", "_") + with dot.subgraph(name=f"cluster_group_{safe}") as sg: + sg.attr( + label=html.escape(group_label), + style="dashed", + color="#555555", + fontname="Helvetica", + fontsize="12", + penwidth="1.5", + bgcolor="transparent", + ) + for node_id in sorted(node_ids): + comp = index.get(node_id.lower()) + if comp and node_id in active_set: + _emit_component_node(sg, comp, node_id, colors) + elif node_id in ghost_ids: + name = comp.display_name if comp else node_id + label = f"{name}\n{node_id}\n(excluded)" + sg.node( + node_id, + label=label, + fillcolor=GHOST_FILL_COLOR, + style="filled,rounded,dashed", + fontcolor=GHOST_FONT_COLOR, + color=GHOST_BORDER_COLOR, + ) + + def _add_edges( dot: graphviz.Digraph, components: list[Component], @@ -528,8 +591,12 @@ def build_graph( edge_attr={"fontname": "Helvetica", "fontsize": "9"}, ) - _add_active_nodes(dot, components, active_set, colors) - _add_ghost_nodes(dot, ghost_ids, index) + groups = _compute_groups(components, active_set, ghost_ids) + grouped_ids = {nid for ids in groups.values() for nid in ids} + + _add_group_clusters(dot, groups, components, active_set, ghost_ids, index, colors) + _add_active_nodes(dot, components, active_set, colors, skip_ids=grouped_ids) + _add_ghost_nodes(dot, ghost_ids, index, skip_ids=grouped_ids) _add_edges(dot, components, index, active_set, ghost_ids, colors) _add_terminal_nodes(dot, active_set, ghost_ids) _add_legend(dot, colors) From 28b6db3dd4386445e0b6c0874face918eabc6fc3 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:21:23 -0400 Subject: [PATCH 23/51] Replace hardcoded terminal nodes with CSV-driven Externals column MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each component can now declare external entities in a new "Externals" column using standard CSV syntax. A '<' prefix marks a data source (rendered as a cylinder at rank=min); '>' marks a sink (double-oval at rank=max). Multiple components can reference the same external name — one node is emitted and one edge per referencing component is drawn. Externals are styled in amber (#FFE082) with a bold border (penwidth=2.5) and larger font (13pt) so they stand out as the diagram's entry/exit tier. Removes the hardcoded ENTRY_TARGET / EXIT_SOURCE constants and _add_terminal_nodes in favour of the new _add_external_nodes_and_edges. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 122 ++++++++++++++---- 1 file changed, 94 insertions(+), 28 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 566697f..6857981 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -31,12 +31,9 @@ GHOST_BORDER_COLOR = "#999999" GHOST_FILL_COLOR = "#D3D3D3" GHOST_FONT_COLOR = "#666666" -TERMINAL_FILL_COLOR = "#CFD8DC" - -# Hardcoded entry/exit anchors — the diagram has a single data-flow entry -# (External data sources → kgx-storage-pipeline) and a single exit (UI → User). -ENTRY_TARGET = "kgx-storage-pipeline" -EXIT_SOURCE = "ui" +# Warm amber for external-entity nodes (sources and sinks) so they stand out +# clearly against the component fill colors. +EXTERNAL_FILL_COLOR = "#FFE082" class ColorAssigner: @@ -84,6 +81,7 @@ class Component: notes: str ubiquitous: bool = False part_of: str = "" + externals: list[tuple[str, str]] = field(default_factory=list) depends_on: list[str] = field(default_factory=list) depends_on_planned: list[str] = field(default_factory=list) uses: list[str] = field(default_factory=list) @@ -126,6 +124,35 @@ def parse_id_list(field_value: str) -> tuple[list[str], list[str]]: return implemented, planned +def parse_externals(field_value: str) -> list[tuple[str, str]]: + """Parse the Externals column into a list of (direction, name) pairs. + + Values are standard CSV (commas as separators, double-quotes for names + that contain commas). Each token must start with '<' (external source + that sends data *into* this component) or '>' (external sink that + receives data *from* this component). + + Examples + -------- + ``User`` + ``"Researcher`` + """ + if not field_value.strip(): + return [] + result = [] + reader = csv.reader([field_value]) + for row in reader: + for token in row: + token = token.strip() + if not token: + continue + if token.startswith("<"): + result.append(("in", token[1:].strip())) + elif token.startswith(">"): + result.append(("out", token[1:].strip())) + return result + + def load_owner_colors(path: Path = DEFAULT_OWNER_COLORS_PATH) -> dict[str, str]: """Load the owner→color mapping from a CSV with columns owner,color. @@ -169,6 +196,7 @@ def load_components(csv_path: Path) -> list[Component]: notes=row.get("Notes", "").strip(), ubiquitous=_parse_bool(row.get("Ubiquitous", "")), part_of=row.get("Part of", "").strip(), + externals=parse_externals(row.get("Externals", "")), depends_on=depends_on, depends_on_planned=depends_on_planned, uses=uses, @@ -240,6 +268,7 @@ def write_json(components: list[Component], out_path: Path) -> None: "Notes": c.notes, "Ubiquitous": c.ubiquitous, "Part of": c.part_of, + "Externals": [{"direction": d, "name": n} for d, n in c.externals], "depends_on": c.depends_on, "depends_on_planned": c.depends_on_planned, "uses": c.uses, @@ -451,35 +480,72 @@ def edge_target(caller_id: str, ref: str) -> str | None: dot.edge(comp.id, t, style="dotted", color=PLANNED_EDGE_COLOR) -def _add_terminal_nodes( +def _ext_node_id(name: str) -> str: + """Stable graphviz node ID derived from an external-entity name.""" + safe = "".join(c if c.isalnum() else "_" for c in name.lower()) + return f"_ext_{safe}" + + +def _add_external_nodes_and_edges( dot: graphviz.Digraph, + components: list[Component], active_set: set[str], - ghost_ids: set[str], ) -> None: - """Add entry/exit nodes, gated on the components they connect to.""" - terminal_attrs = dict( + """Emit external-entity nodes and their edges from the Externals column. + + Sources (direction "in") become cylinder nodes at rank=min; sinks + (direction "out") become double-oval nodes at rank=max. Multiple + components can reference the same external name — one node is emitted + and one edge per referencing component is drawn. + """ + in_nodes: dict[str, str] = {} # node_id → display name + out_nodes: dict[str, str] = {} # node_id → display name + in_edges: list[tuple[str, str]] = [] # (ext_id, comp_id) + out_edges: list[tuple[str, str]] = [] # (comp_id, ext_id) + + for comp in components: + if comp.id not in active_set or comp.ubiquitous: + continue + for direction, name in comp.externals: + nid = _ext_node_id(name) + if direction == "in": + in_nodes[nid] = name + in_edges.append((nid, comp.id)) + else: + out_nodes[nid] = name + out_edges.append((comp.id, nid)) + + if not in_nodes and not out_nodes: + return + + ext_attrs = dict( style="filled", - fillcolor=TERMINAL_FILL_COLOR, + fillcolor=EXTERNAL_FILL_COLOR, fontname="Helvetica", - fontsize="11", - penwidth="1.5", + fontsize="13", + penwidth="2.5", ) - if ENTRY_TARGET in active_set or ENTRY_TARGET in ghost_ids: - dot.node("_external_sources", label="External\ndata sources", - shape="cylinder", **terminal_attrs) - with dot.subgraph() as src_rank: - src_rank.attr(rank="min") - src_rank.node("_external_sources") - dot.edge("_external_sources", ENTRY_TARGET) + for nid, name in in_nodes.items(): + dot.node(nid, label=name, shape="cylinder", **ext_attrs) + for nid, name in out_nodes.items(): + dot.node(nid, label=name, shape="oval", peripheries="2", **ext_attrs) + + if in_nodes: + with dot.subgraph() as s: + s.attr(rank="min") + for nid in in_nodes: + s.node(nid) + if out_nodes: + with dot.subgraph() as s: + s.attr(rank="max") + for nid in out_nodes: + s.node(nid) - if EXIT_SOURCE in active_set or EXIT_SOURCE in ghost_ids: - dot.node("_user", label="User", shape="oval", - peripheries="2", **terminal_attrs) - with dot.subgraph() as sink_rank: - sink_rank.attr(rank="max") - sink_rank.node("_user") - dot.edge(EXIT_SOURCE, "_user") + for src, dst in in_edges: + dot.edge(src, dst) + for src, dst in out_edges: + dot.edge(src, dst) def _owner_legend_html(colors: ColorAssigner) -> str: @@ -598,7 +664,7 @@ def build_graph( _add_active_nodes(dot, components, active_set, colors, skip_ids=grouped_ids) _add_ghost_nodes(dot, ghost_ids, index, skip_ids=grouped_ids) _add_edges(dot, components, index, active_set, ghost_ids, colors) - _add_terminal_nodes(dot, active_set, ghost_ids) + _add_external_nodes_and_edges(dot, components, active_set) _add_legend(dot, colors) return dot From f8472b0b890c014358af33ec333530106136d04d Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:24:46 -0400 Subject: [PATCH 24/51] Move group cluster label to bottom of the bounding box Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 1 + 1 file changed, 1 insertion(+) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 6857981..8f58558 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -405,6 +405,7 @@ def _add_group_clusters( with dot.subgraph(name=f"cluster_group_{safe}") as sg: sg.attr( label=html.escape(group_label), + labelloc="b", style="dashed", color="#555555", fontname="Helvetica", From 2933075b6f316869ea52e086eb7175d7841f0c26 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:26:24 -0400 Subject: [PATCH 25/51] Style group cluster label as a dark tab with white bold text Renders the label as an HTML table cell with a #555555 background and white 13pt bold text, matching the cluster border color. This gives each group a clear header tab at the top of its bounding box. Graphviz cluster labels don't support rotation, so a left-edge label isn't achievable without complex workarounds. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 8f58558..5604575 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -403,13 +403,18 @@ def _add_group_clusters( for group_label, node_ids in sorted(groups.items()): safe = group_label.lower().replace(" ", "_").replace("/", "_") with dot.subgraph(name=f"cluster_group_{safe}") as sg: + tab_label = ( + f'<' + f'
' + f'{html.escape(group_label)}' + f'
>' + ) sg.attr( - label=html.escape(group_label), - labelloc="b", + label=tab_label, + labelloc="t", style="dashed", color="#555555", fontname="Helvetica", - fontsize="12", penwidth="1.5", bgcolor="transparent", ) From 89ff7b97d8911ba0303e1b3e93d5ee1abefe2652 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:28:41 -0400 Subject: [PATCH 26/51] Shrink group label tab and switch cluster border to solid gray fill Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 5604575..cbc1a3f 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -404,15 +404,16 @@ def _add_group_clusters( safe = group_label.lower().replace(" ", "_").replace("/", "_") with dot.subgraph(name=f"cluster_group_{safe}") as sg: tab_label = ( - f'<' + f'<
' f'
' - f'{html.escape(group_label)}' + f'{html.escape(group_label)}' f'
>' ) sg.attr( label=tab_label, labelloc="t", - style="dashed", + style="filled", + fillcolor="#DDDDDD", color="#555555", fontname="Helvetica", penwidth="1.5", From 42f483064c79b734f157d8bb325fc0e5658c110a Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:36:31 -0400 Subject: [PATCH 27/51] Move owner legend to bottom rank (rank=max) Pins the owner-color legend to the bottom of the diagram alongside any sink-external nodes. The invisible side-by-side ordering edge to the edge-style legend cluster is removed so the owner legend can float to wherever the layout engine places it (typically the right side). Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index cbc1a3f..0d1d03d 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -612,16 +612,12 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - # Place both legend clusters at the same rank (side by side, not diagonal). - # newrank=true (set on the graph) lets rank=same work across cluster - # boundaries without displacing nodes from their clusters. - # constraint=false on the ordering edge keeps it from creating a - # rank dependency (which would cause diagonal placement). + # Pin the owner legend to the bottom rank so it sits below the main graph. + # Horizontal placement is left to the layout engine (typically ends up on + # the right when sink-external nodes occupy the left of the bottom tier). with dot.subgraph() as s: - s.attr(rank="same") + s.attr(rank="max") s.node("_leg_owners") - s.node("_leg_p") - dot.edge("_leg_owners", "_leg_p", style="invis", constraint="false") From 3bdf94a525c2c54d0c6b63425800bf4668fd51aa Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 16:39:37 -0400 Subject: [PATCH 28/51] Pin externals to top/bottom rows; nudge owner legend to bottom-right Incoming externals (<) are already pinned to rank=min and outgoing (>) to rank=max. _add_external_nodes_and_edges now returns the sink node IDs so build_graph can add invisible constraint=false ordering edges from each sink external to _leg_owners, nudging the owner legend to the right of the sink nodes within the shared rank=max row. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 0d1d03d..47ea7c5 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -497,13 +497,16 @@ def _add_external_nodes_and_edges( dot: graphviz.Digraph, components: list[Component], active_set: set[str], -) -> None: +) -> set[str]: """Emit external-entity nodes and their edges from the Externals column. Sources (direction "in") become cylinder nodes at rank=min; sinks (direction "out") become double-oval nodes at rank=max. Multiple components can reference the same external name — one node is emitted and one edge per referencing component is drawn. + + Returns the set of sink (out) node IDs so callers can add ordering + edges to nudge co-rank nodes (e.g. the owner legend) to the right. """ in_nodes: dict[str, str] = {} # node_id → display name out_nodes: dict[str, str] = {} # node_id → display name @@ -523,7 +526,7 @@ def _add_external_nodes_and_edges( out_edges.append((comp.id, nid)) if not in_nodes and not out_nodes: - return + return set() ext_attrs = dict( style="filled", @@ -554,6 +557,8 @@ def _add_external_nodes_and_edges( for src, dst in out_edges: dot.edge(src, dst) + return set(out_nodes.keys()) + def _owner_legend_html(colors: ColorAssigner) -> str: """Build an HTML-table label listing every owner and its fill color. @@ -667,8 +672,12 @@ def build_graph( _add_active_nodes(dot, components, active_set, colors, skip_ids=grouped_ids) _add_ghost_nodes(dot, ghost_ids, index, skip_ids=grouped_ids) _add_edges(dot, components, index, active_set, ghost_ids, colors) - _add_external_nodes_and_edges(dot, components, active_set) + sink_ext_ids = _add_external_nodes_and_edges(dot, components, active_set) _add_legend(dot, colors) + # Invisible ordering edges from sink externals → owner legend nudge the + # legend rightward within the shared rank=max row. + for nid in sorted(sink_ext_ids): + dot.edge(nid, "_leg_owners", style="invis", constraint="false") return dot From cd8d10c75d81e05e79b9705e8150770454c867a3 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 17:10:24 -0400 Subject: [PATCH 29/51] Remove rank pinning from owner legend; let layout engine place it freely Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 47ea7c5..1c04dad 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -497,16 +497,13 @@ def _add_external_nodes_and_edges( dot: graphviz.Digraph, components: list[Component], active_set: set[str], -) -> set[str]: +) -> None: """Emit external-entity nodes and their edges from the Externals column. Sources (direction "in") become cylinder nodes at rank=min; sinks (direction "out") become double-oval nodes at rank=max. Multiple components can reference the same external name — one node is emitted and one edge per referencing component is drawn. - - Returns the set of sink (out) node IDs so callers can add ordering - edges to nudge co-rank nodes (e.g. the owner legend) to the right. """ in_nodes: dict[str, str] = {} # node_id → display name out_nodes: dict[str, str] = {} # node_id → display name @@ -526,7 +523,7 @@ def _add_external_nodes_and_edges( out_edges.append((comp.id, nid)) if not in_nodes and not out_nodes: - return set() + return ext_attrs = dict( style="filled", @@ -557,8 +554,6 @@ def _add_external_nodes_and_edges( for src, dst in out_edges: dot.edge(src, dst) - return set(out_nodes.keys()) - def _owner_legend_html(colors: ColorAssigner) -> str: """Build an HTML-table label listing every owner and its fill color. @@ -617,12 +612,7 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - # Pin the owner legend to the bottom rank so it sits below the main graph. - # Horizontal placement is left to the layout engine (typically ends up on - # the right when sink-external nodes occupy the left of the bottom tier). - with dot.subgraph() as s: - s.attr(rank="max") - s.node("_leg_owners") + pass # no rank pinning — let the layout engine place the owner legend freely @@ -672,12 +662,8 @@ def build_graph( _add_active_nodes(dot, components, active_set, colors, skip_ids=grouped_ids) _add_ghost_nodes(dot, ghost_ids, index, skip_ids=grouped_ids) _add_edges(dot, components, index, active_set, ghost_ids, colors) - sink_ext_ids = _add_external_nodes_and_edges(dot, components, active_set) + _add_external_nodes_and_edges(dot, components, active_set) _add_legend(dot, colors) - # Invisible ordering edges from sink externals → owner legend nudge the - # legend rightward within the shared rank=max row. - for nid in sorted(sink_ext_ids): - dot.edge(nid, "_leg_owners", style="invis", constraint="false") return dot From 1f35586c35aea2a5dd3e92bc6c97666d5bba0462 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 17:12:11 -0400 Subject: [PATCH 30/51] Pin both legend clusters to rank=max (bottom of diagram) Declaration order in the DOT source doesn't affect dot layout; only rank constraints do. Pins _leg_owners (owner colors) and _leg_p (edge-style examples) to rank=max so both clusters stay at the bottom. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 1c04dad..c2d9132 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -612,7 +612,11 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - pass # no rank pinning — let the layout engine place the owner legend freely + # Pin both legend clusters to the bottom of the diagram. + with dot.subgraph() as s: + s.attr(rank="max") + s.node("_leg_owners") + s.node("_leg_p") From 0b0812b67597a39dcc4c042f471b39af34191428 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 17:13:10 -0400 Subject: [PATCH 31/51] Pin all edge-legend nodes to rank=max to prevent cluster stretching Pinning only _leg_p left _leg_c/_leg_a/_leg_b free, causing the legend cluster to span multiple ranks and stretch vertically. Pinning all four keeps them on the same rank row. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index c2d9132..3dd5e60 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -612,11 +612,16 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - # Pin both legend clusters to the bottom of the diagram. + # Pin both legend clusters to the bottom of the diagram. All four edge- + # example nodes must be included — pinning only _leg_p while leaving + # _leg_c/_leg_a/_leg_b free causes the cluster to stretch across ranks. with dot.subgraph() as s: s.attr(rank="max") s.node("_leg_owners") s.node("_leg_p") + s.node("_leg_c") + s.node("_leg_a") + s.node("_leg_b") From 9373700e0e22e91154154bb0544883e02f3e6c71 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 17:16:04 -0400 Subject: [PATCH 32/51] Stack edge-legend pairs on separate rows; float legend cluster freely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds rank=same subgraphs inside cluster_legend to lock each example pair (Producer→Consumer, Component→Service) onto its own horizontal row, with an invisible ordering edge keeping row 1 above row 2. Removes the four edge-legend nodes from the rank=max pin — they no longer need it since the internal rank constraints keep them compact. Only the owner legend stays pinned to rank=max. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 3dd5e60..0355da4 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -600,7 +600,7 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: shape="plain", ) - # Edge style examples — provider→consumer / API call. + # Edge style examples — provider→consumer / API call, one pair per row. with dot.subgraph(name="cluster_legend") as leg: leg.attr(label="Legend", **_cluster_attrs) @@ -612,16 +612,23 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - # Pin both legend clusters to the bottom of the diagram. All four edge- - # example nodes must be included — pinning only _leg_p while leaving - # _leg_c/_leg_a/_leg_b free causes the cluster to stretch across ranks. + # Lock each pair onto its own horizontal row. + with leg.subgraph() as row1: + row1.attr(rank="same") + row1.node("_leg_p") + row1.node("_leg_c") + with leg.subgraph() as row2: + row2.attr(rank="same") + row2.node("_leg_a") + row2.node("_leg_b") + # Invisible edge to keep row 1 above row 2. + leg.edge("_leg_p", "_leg_a", style="invis") + + # Pin the owner legend to the bottom; the edge-style legend floats freely + # (its internal rank="same" rows keep it compact wherever it lands). with dot.subgraph() as s: s.attr(rank="max") s.node("_leg_owners") - s.node("_leg_p") - s.node("_leg_c") - s.node("_leg_a") - s.node("_leg_b") From 968688a1f681e24759357fa9deb64e8c8bbce2a6 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 17:22:21 -0400 Subject: [PATCH 33/51] Add external node shapes to the edge-style legend Adds a third row to cluster_legend showing the two external-entity shapes: a cylinder (data source, rank=min) and a double-oval (user/agent, rank=max), styled identically to the real external nodes (amber fill, penwidth 2.5). Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 0355da4..fce9050 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -612,7 +612,15 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") - # Lock each pair onto its own horizontal row. + _ext = dict( + fillcolor=EXTERNAL_FILL_COLOR, style="filled", + fontname="Helvetica", fontsize="13", penwidth="2.5", + ) + leg.node("_leg_src", label="Data source", shape="cylinder", **_ext) + leg.node("_leg_sink", label="User / agent", shape="oval", + peripheries="2", **_ext) + + # Lock each pair / row onto the same horizontal rank. with leg.subgraph() as row1: row1.attr(rank="same") row1.node("_leg_p") @@ -621,8 +629,13 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: row2.attr(rank="same") row2.node("_leg_a") row2.node("_leg_b") - # Invisible edge to keep row 1 above row 2. - leg.edge("_leg_p", "_leg_a", style="invis") + with leg.subgraph() as row3: + row3.attr(rank="same") + row3.node("_leg_src") + row3.node("_leg_sink") + # Invisible edges enforce row order: row1 → row2 → row3. + leg.edge("_leg_p", "_leg_a", style="invis") + leg.edge("_leg_a", "_leg_src", style="invis") # Pin the owner legend to the bottom; the edge-style legend floats freely # (its internal rank="same" rows keep it compact wherever it lands). From acb0ee6a28e0a17b81d59fca3515f365c43e2626 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 29 May 2026 17:23:18 -0400 Subject: [PATCH 34/51] Improved Legend labels. --- README.md | 1 + translator-components-diagram/generate_diagram.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d3b6602..6a19c45 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ # Core Components Working Group A working group of the [NCATS Biomedical Data Translator](https://ncats.nih.gov/research/research-activities/translator) project. +Can diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index fce9050..d4ed513 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -616,8 +616,8 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: fillcolor=EXTERNAL_FILL_COLOR, style="filled", fontname="Helvetica", fontsize="13", penwidth="2.5", ) - leg.node("_leg_src", label="Data source", shape="cylinder", **_ext) - leg.node("_leg_sink", label="User / agent", shape="oval", + leg.node("_leg_src", label="Database", shape="cylinder", **_ext) + leg.node("_leg_sink", label="User/agent", shape="oval", peripheries="2", **_ext) # Lock each pair / row onto the same horizontal rank. From 3c660218f7c118c1c656d8612768b7057ee6d29b Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 18:25:30 -0400 Subject: [PATCH 35/51] Add Hide column support to suppress components from diagram output Components with Hide=TRUE are excluded from the active set, never rendered as ghost nodes, and have all edges to/from them dropped. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index d4ed513..a8b8f4a 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -80,6 +80,7 @@ class Component: refactor_status: str notes: str ubiquitous: bool = False + hide: bool = False part_of: str = "" externals: list[tuple[str, str]] = field(default_factory=list) depends_on: list[str] = field(default_factory=list) @@ -195,6 +196,7 @@ def load_components(csv_path: Path) -> list[Component]: refactor_status=row.get("Refactor status", "").strip(), notes=row.get("Notes", "").strip(), ubiquitous=_parse_bool(row.get("Ubiquitous", "")), + hide=_parse_bool(row.get("Hide", "")), part_of=row.get("Part of", "").strip(), externals=parse_externals(row.get("Externals", "")), depends_on=depends_on, @@ -267,6 +269,7 @@ def write_json(components: list[Component], out_path: Path) -> None: "Refactor status": c.refactor_status, "Notes": c.notes, "Ubiquitous": c.ubiquitous, + "Hide": c.hide, "Part of": c.part_of, "Externals": [{"direction": d, "name": n} for d, n in c.externals], "depends_on": c.depends_on, @@ -289,8 +292,8 @@ def _compute_active_set( active_statuses: set[str] | None, ) -> set[str]: if active_statuses is None: - return {c.id for c in components} - return {c.id for c in components if c.refactor_status in active_statuses} + return {c.id for c in components if not c.hide} + return {c.id for c in components if c.refactor_status in active_statuses and not c.hide} def _compute_ghost_ids( @@ -304,8 +307,9 @@ def _compute_ghost_ids( continue for ref in comp.all_refs(): match = index.get(ref.lower()) - if match is None or match.ubiquitous: + if match is None or match.ubiquitous or match.hide: # Ubiquitous targets render as per-caller clones, never as ghosts. + # Hidden components are suppressed entirely — not even as ghosts. continue if match.id not in active_set: ghost.add(match.id) @@ -456,6 +460,8 @@ def edge_target(caller_id: str, ref: str) -> str | None: match = index.get(ref.lower()) if match is None: return None + if match.hide: + return None if match.ubiquitous: clone_id = f"{caller_id}__{match.id}" if clone_id not in emitted_clones: From cc9f0374e561d08a5a25b6026b3ad2b42fee5784 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 22:32:03 -0400 Subject: [PATCH 36/51] Widen Results edge in legend for better label visibility Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index a8b8f4a..4842bc9 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -612,7 +612,7 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_p", label="Producer", fillcolor="white", penwidth="1.0") leg.node("_leg_c", label="Consumer", fillcolor="white", penwidth="1.0") - leg.edge("_leg_p", "_leg_c", xlabel="Results") + leg.edge("_leg_p", "_leg_c", xlabel="Results", minlen="2") leg.node("_leg_a", label="Component", fillcolor="white", penwidth="1.0") leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") From 8d123972474312346aa5d9e5bd3addf1a1d01391 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 22:33:46 -0400 Subject: [PATCH 37/51] Suppress dotted/dashed edges when a solid edge already exists between the same nodes Fixes visual corruption where concentrate=true caused dotted edges to overwrite solid ones between the same node pair. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 4842bc9..a8b6111 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -449,6 +449,10 @@ def _add_edges( colors: ColorAssigner, ) -> None: emitted_clones: set[str] = set() + # Track (src, dst) pairs that already have a solid edge so that a + # dotted/dashed edge between the same two nodes — which concentrate=true + # would merge, losing the solid style — is suppressed in favour of solid. + solid_edges: set[tuple[str, str]] = set() def edge_target(caller_id: str, ref: str) -> str | None: """Return the graphviz node id to draw an edge to, or None to skip. @@ -479,17 +483,18 @@ def edge_target(caller_id: str, ref: str) -> str | None: t = edge_target(comp.id, ref) if t is not None: dot.edge(t, comp.id) # B → A: B provides results to A + solid_edges.add((t, comp.id)) for ref in comp.depends_on_planned: t = edge_target(comp.id, ref) - if t is not None: + if t is not None and (t, comp.id) not in solid_edges: dot.edge(t, comp.id, style="dashed", color=PLANNED_EDGE_COLOR) for ref in comp.uses: t = edge_target(comp.id, ref) - if t is not None: + if t is not None and (comp.id, t) not in solid_edges: dot.edge(comp.id, t, style="dotted") # A ··→ B: API call for ref in comp.uses_planned: t = edge_target(comp.id, ref) - if t is not None: + if t is not None and (comp.id, t) not in solid_edges: dot.edge(comp.id, t, style="dotted", color=PLANNED_EDGE_COLOR) From f25ce99ea1d60656f2ad340092e92aa446cffe46 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 22:36:05 -0400 Subject: [PATCH 38/51] Increase legend edge minlen to 5 for clearer edge labels Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index a8b6111..dd80763 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -617,11 +617,11 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_p", label="Producer", fillcolor="white", penwidth="1.0") leg.node("_leg_c", label="Consumer", fillcolor="white", penwidth="1.0") - leg.edge("_leg_p", "_leg_c", xlabel="Results", minlen="2") + leg.edge("_leg_p", "_leg_c", xlabel="Results", minlen="5") leg.node("_leg_a", label="Component", fillcolor="white", penwidth="1.0") leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") - leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted") + leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted", minlen="5") _ext = dict( fillcolor=EXTERNAL_FILL_COLOR, style="filled", From 039be30353c38870d87d6670fe01b4c68059f537 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 22:56:18 -0400 Subject: [PATCH 39/51] Style planned edges in red so they stand out clearly Planned/in-development "Gets results from" edges are now solid red, and planned "Calls" edges are dotted red, replacing the previous dashed/dotted indigo that was hard to distinguish from normal edges. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index dd80763..5473bd5 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -487,7 +487,8 @@ def edge_target(caller_id: str, ref: str) -> str | None: for ref in comp.depends_on_planned: t = edge_target(comp.id, ref) if t is not None and (t, comp.id) not in solid_edges: - dot.edge(t, comp.id, style="dashed", color=PLANNED_EDGE_COLOR) + # Planned/in-development "Gets results from" — solid red to stand out + dot.edge(t, comp.id, style="solid", color="red") for ref in comp.uses: t = edge_target(comp.id, ref) if t is not None and (comp.id, t) not in solid_edges: @@ -495,7 +496,8 @@ def edge_target(caller_id: str, ref: str) -> str | None: for ref in comp.uses_planned: t = edge_target(comp.id, ref) if t is not None and (comp.id, t) not in solid_edges: - dot.edge(comp.id, t, style="dotted", color=PLANNED_EDGE_COLOR) + # Planned/in-development "Calls" — dotted red to stand out + dot.edge(comp.id, t, style="dotted", color="red") def _ext_node_id(name: str) -> str: From a243ef7f280ef1324c7f1c6b9ed8d933dbdd279c Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 22:59:19 -0400 Subject: [PATCH 40/51] Change API call edges from dotted to dashed Dashed lines are easier to distinguish from solid "Gets results from" edges than the previous widely-spaced dots. Updates both the diagram edges (implemented and planned) and the legend example. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 5473bd5..205bdde 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -450,7 +450,7 @@ def _add_edges( ) -> None: emitted_clones: set[str] = set() # Track (src, dst) pairs that already have a solid edge so that a - # dotted/dashed edge between the same two nodes — which concentrate=true + # dashed edge between the same two nodes — which concentrate=true # would merge, losing the solid style — is suppressed in favour of solid. solid_edges: set[tuple[str, str]] = set() @@ -492,12 +492,12 @@ def edge_target(caller_id: str, ref: str) -> str | None: for ref in comp.uses: t = edge_target(comp.id, ref) if t is not None and (comp.id, t) not in solid_edges: - dot.edge(comp.id, t, style="dotted") # A ··→ B: API call + dot.edge(comp.id, t, style="dashed") # A --→ B: API call for ref in comp.uses_planned: t = edge_target(comp.id, ref) if t is not None and (comp.id, t) not in solid_edges: - # Planned/in-development "Calls" — dotted red to stand out - dot.edge(comp.id, t, style="dotted", color="red") + # Planned/in-development "Calls" — dashed red to stand out + dot.edge(comp.id, t, style="dashed", color="red") def _ext_node_id(name: str) -> str: @@ -623,7 +623,7 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_a", label="Component", fillcolor="white", penwidth="1.0") leg.node("_leg_b", label="Service", fillcolor="white", penwidth="1.0") - leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dotted", minlen="5") + leg.edge("_leg_a", "_leg_b", xlabel="API call", style="dashed", minlen="5") _ext = dict( fillcolor=EXTERNAL_FILL_COLOR, style="filled", From 39d671e31351510ea3f8fed52b536f8cdc7fad86 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 23:11:27 -0400 Subject: [PATCH 41/51] Add --no-concentrate flag to disable edge merging concentrate=true can cause mixed solid/dashed edges between nearby nodes to render incorrectly merged (e.g. solid edge visually branching off a dashed edge). --no-concentrate disables this behaviour; --concentrate (the default) preserves the existing layout. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 205bdde..83f435b 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -663,6 +663,7 @@ def build_graph( active_statuses: set[str] | None, direction: str, colors: ColorAssigner, + concentrate: bool = True, ) -> graphviz.Digraph: """Assemble the full graph from the parsed component list.""" index = index_by_id(components) @@ -676,11 +677,11 @@ def build_graph( "fontname": "Helvetica", "fontsize": "12", # splines=true gives graphviz freedom to route edges as smooth - # curves around nodes; combined with concentrate=true (merges - # parallel edges going to the same place) this packs the layout - # tighter at the cost of wigglier lines. + # curves around nodes; concentrate merges partially-parallel edges + # to pack the layout tighter (disable if mixed solid/dashed edges + # render incorrectly merged). "splines": "true", - "concentrate": "true", + "concentrate": "true" if concentrate else "false", "nodesep": "0.3", "ranksep": "0.5", # Required for rank=same to work correctly across cluster @@ -770,6 +771,13 @@ def build_graph( type=click.Choice(["LR", "TB"]), help="Graph layout direction.", ) +@click.option( + "--concentrate/--no-concentrate", + default=True, + show_default=True, + help="Merge partially-parallel edges (concentrate=true). Disable if solid " + "and dashed edges between nearby nodes render incorrectly merged.", +) def main( input_path: Path, google_sheet: bool, @@ -780,6 +788,7 @@ def main( include_all: bool, extra_formats: tuple[str, ...], direction: str, + concentrate: bool, ) -> None: """Validate components CSV and generate a Graphviz dependency diagram.""" output_dir.mkdir(parents=True, exist_ok=True) @@ -857,7 +866,7 @@ def main( ) colors = ColorAssigner(load_owner_colors(), FALLBACK_COLORS) - dot = build_graph(components, active_statuses, direction, colors) + dot = build_graph(components, active_statuses, direction, colors, concentrate=concentrate) # Save .dot source dot_path = output_dir / f"{output_name}.dot" From ee7252c9d294d7ce0561185a27a2e638440aa83d Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 23:12:37 -0400 Subject: [PATCH 42/51] Change --concentrate default to off (no-concentrate) Mixed solid/dashed edges render more correctly without concentrate. --concentrate can still be passed to enable it; --no-concentrate is retained as a no-op for forward compatibility. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 83f435b..f5eaa98 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -663,7 +663,7 @@ def build_graph( active_statuses: set[str] | None, direction: str, colors: ColorAssigner, - concentrate: bool = True, + concentrate: bool = False, ) -> graphviz.Digraph: """Assemble the full graph from the parsed component list.""" index = index_by_id(components) @@ -773,7 +773,7 @@ def build_graph( ) @click.option( "--concentrate/--no-concentrate", - default=True, + default=False, show_default=True, help="Merge partially-parallel edges (concentrate=true). Disable if solid " "and dashed edges between nearby nodes render incorrectly merged.", From 537f3131d0e129c38db727bbd18abd8bd7da0bdb Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 23:19:56 -0400 Subject: [PATCH 43/51] Add CLAUDE.md for translator-components-diagram MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents script layout with line-number references, CSV column → Component field mapping, and "I want to change X" navigation patterns so quick edits don't require reading the full 891-line file. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/CLAUDE.md | 113 ++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 translator-components-diagram/CLAUDE.md diff --git a/translator-components-diagram/CLAUDE.md b/translator-components-diagram/CLAUDE.md new file mode 100644 index 0000000..c687bce --- /dev/null +++ b/translator-components-diagram/CLAUDE.md @@ -0,0 +1,113 @@ +# translator-components-diagram + +Generates Graphviz dependency diagrams for Translator platform components from a Google Sheet CSV. + +## Quick start + +```bash +# Download from Google Sheet and render (most common) +uv run generate-diagram --google-sheet + +# From a local CSV +uv run generate-diagram --input data/components.csv + +# Include all components (not just active refactor statuses) +uv run generate-diagram --google-sheet --all + +# Left-to-right layout, PDF output +uv run generate-diagram --google-sheet --direction LR --format pdf + +# Run tests +uv run pytest +``` + +## Script layout (`generate_diagram.py`) + +| Lines | What's there | +|-------|-------------| +| 17–36 | Global constants: `DEFAULT_STATUSES`, `FALLBACK_COLORS`, color constants for planned/ghost/external nodes | +| 39–61 | `ColorAssigner` — maps owners to fill colors, falls back to rotating palette | +| 63–69 | `text_color_for` — picks black/white text for contrast against a fill hex | +| 72–103 | `Component` dataclass — one CSV row after parsing | +| 106–154 | CSV parsing utilities: `_parse_bool`, `parse_id_list`, `parse_externals` | +| 157–213 | Data loading: `load_owner_colors`, `load_components`, `index_by_id` | +| 216–258 | `validate` — duplicate ID detection, unknown reference checking | +| 261–284 | `write_json` — serializes all components to `components.json` | +| 290–711 | Graph construction: `_compute_*`, `_add_*`, `build_graph` (see table below) | +| 714–891 | CLI: `@click.option` decorators + `main` | + +### Graph construction helpers (290–711) + +| Function | Lines | Purpose | +|----------|-------|---------| +| `_compute_active_set` | 290–296 | IDs to render based on refactor status filter | +| `_compute_ghost_ids` | 299–316 | IDs of excluded-but-referenced components (shown dimmed) | +| `_emit_component_node` | 319–341 | Renders one component node (used for primary nodes and ubiquitous clones) | +| `_compute_groups` | 343–355 | Groups nodes by `Part of` label | +| `_add_active_nodes` | 358–372 | Emits all non-grouped, non-ubiquitous active nodes | +| `_add_ghost_nodes` | 375–394 | Emits dimmed nodes for excluded-but-referenced components | +| `_add_group_clusters` | 397–440 | Wraps `Part of` groups in labeled dotted-border subgraphs | +| `_add_edges` | 443–500 | Emits all dependency edges (solid/dashed, implemented/planned) | +| `_ext_node_id` | 503–506 | Stable node ID from an external-entity name | +| `_add_external_nodes_and_edges` | 509–568 | Emits external source/sink nodes from the `Externals` column | +| `_owner_legend_html` | 571–593 | Builds HTML-table label for the owner-color legend | +| `_add_legend` | 596–658 | Assembles the full legend (owner swatches + edge style examples) | +| `build_graph` | 661–711 | Top-level assembler — calls all the above in order | + +## Data model + +CSV column → `Component` field: + +| CSV column | Field | Notes | +|-----------|-------|-------| +| `id` | `id` | Unique identifier; case-insensitive for references | +| `Name` | `name` | Display name; falls back to `id` if blank | +| `Owner` | `owner` | Defaults to `"None"` if blank | +| `Component in ITRB` | `itrb` | Informational only | +| `Refactor status` | `refactor_status` | Drives active-set filtering | +| `Gets results from` | `depends_on` / `depends_on_planned` | Comma-separated IDs; `~` prefix = planned | +| `Calls` | `uses` / `uses_planned` | Comma-separated IDs; `~` prefix = planned | +| `Notes` | `notes` | Informational only | +| `Ubiquitous` | `ubiquitous` | TRUE/yes/1 → render as per-caller clones | +| `Hide` | `hide` | TRUE/yes/1 → suppress entirely (not even as ghost) | +| `Part of` | `part_of` | Groups node into a named cluster subgraph | +| `Externals` | `externals` | `Sink` = data out | + +## Common change patterns + +**Change owner node colors** → edit `owner-colors.csv` (no code change). Row order = legend order. + +**Change ghost/external node colors** → constants `GHOST_FILL_COLOR`, `GHOST_BORDER_COLOR`, `GHOST_FONT_COLOR`, `EXTERNAL_FILL_COLOR` at lines 31–36. + +**Change planned-edge color** → `PLANNED_EDGE_COLOR` constant at line 30. + +**Change active refactor statuses** → `DEFAULT_STATUSES` list at line 17. + +**Change node shape or border style** → `_emit_component_node` (line 319) for active nodes; `_add_ghost_nodes` (line 375) for ghost nodes. The `is_new` bold border is set at line 339. + +**Change edge styles** (solid/dashed/color) → `_add_edges` (line 443). Each of the four dependency lists (`depends_on`, `depends_on_planned`, `uses`, `uses_planned`) has its own `dot.edge(...)` call (lines 483–500). + +**Change external node shapes** → `_add_external_nodes_and_edges` (line 509). Sources use `shape="cylinder"`, sinks use `shape="oval", peripheries="2"`. + +**Change graph layout settings** (dpi, ranksep, splines) → `build_graph` `graph_attr` dict at line 673. + +**Add a new CSV column** → three places: +1. `load_components` (line 175) — read from `row` +2. `Component` dataclass (line 72) — add the field +3. `write_json` (line 261) — add to the export dict + +**Add a new CLI flag** → add `@click.option` before `main` (line 714) and add the parameter to the `main` signature. + +**Change the legend** → `_add_legend` (line 596) for structure; `_owner_legend_html` (line 571) for the owner-color table HTML. + +## Special features + +**Ubiquitous components** (e.g. telemetry, logging): Set `Ubiquitous=TRUE` in the CSV. Instead of one central node, a per-caller clone is emitted inline next to each caller. No central node is created. Logic lives in `edge_target()` inside `_add_edges` (line 457). These components are excluded from `_add_active_nodes` and `_compute_ghost_ids`. + +**Ghost nodes**: When an active component references one that is filtered out (wrong refactor status), the excluded component appears dimmed with `(excluded)` in its label. Computed by `_compute_ghost_ids` (line 299). + +**Planned edges** (`~id` in `Gets results from` or `Calls`): Parsed as `depends_on_planned` / `uses_planned` by `parse_id_list` (line 111). Rendered in red in `_add_edges` (lines 488–500). Solid red for "Gets results from", dashed red for "Calls". + +**`--concentrate` flag**: Merges partially-parallel edges. Off by default because it can visually blend solid and dashed edges between nearby nodes. + +**Google Sheet download**: Checks `Content-Type: text/csv` to catch the case where a private/missing sheet returns an HTML login page instead of CSV (line 826). From a4b7dc57606cb16697e9f767e5ce10143b3d2d55 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 23:32:07 -0400 Subject: [PATCH 44/51] Show non-ITRB hosting location on component node labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a third line to component labels for components hosted outside ITRB (the default). RENCI shows as "Hosted at: RENCI 🌐", Local as "Hosted at: Local 💻", and Unknown as "Hosted at: Unknown ❓". Also notes in CLAUDE.md that the diagram script should be run by the user, not Claude. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/CLAUDE.md | 2 ++ translator-components-diagram/generate_diagram.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/translator-components-diagram/CLAUDE.md b/translator-components-diagram/CLAUDE.md index c687bce..8ddb930 100644 --- a/translator-components-diagram/CLAUDE.md +++ b/translator-components-diagram/CLAUDE.md @@ -2,6 +2,8 @@ Generates Graphviz dependency diagrams for Translator platform components from a Google Sheet CSV. +> **Note for Claude:** After making code changes, do not run `uv run generate-diagram` yourself — the user will run it. Only run `uv run pytest` to check for test failures. + ## Quick start ```bash diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index f5eaa98..7b1079d 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -34,6 +34,8 @@ # Warm amber for external-entity nodes (sources and sinks) so they stand out # clearly against the component fill colors. EXTERNAL_FILL_COLOR = "#FFE082" +# Emoji labels for non-default hosting locations (ITRB is the default and shown as nothing). +HOSTED_AT_EMOJI: dict[str, str] = {"RENCI": "🌐", "Local": "💻", "Unknown": "❓"} class ColorAssigner: @@ -82,6 +84,7 @@ class Component: ubiquitous: bool = False hide: bool = False part_of: str = "" + hosted_at: str = "" externals: list[tuple[str, str]] = field(default_factory=list) depends_on: list[str] = field(default_factory=list) depends_on_planned: list[str] = field(default_factory=list) @@ -198,6 +201,7 @@ def load_components(csv_path: Path) -> list[Component]: ubiquitous=_parse_bool(row.get("Ubiquitous", "")), hide=_parse_bool(row.get("Hide", "")), part_of=row.get("Part of", "").strip(), + hosted_at=row.get("Hosted at", "").strip(), externals=parse_externals(row.get("Externals", "")), depends_on=depends_on, depends_on_planned=depends_on_planned, @@ -271,6 +275,7 @@ def write_json(components: list[Component], out_path: Path) -> None: "Ubiquitous": c.ubiquitous, "Hide": c.hide, "Part of": c.part_of, + "Hosted at": c.hosted_at, "Externals": [{"direction": d, "name": n} for d, n in c.externals], "depends_on": c.depends_on, "depends_on_planned": c.depends_on_planned, @@ -331,6 +336,10 @@ def _emit_component_node( is_new = comp.refactor_status == "New in Refactor" # Owner is encoded by node color and shown in the legend, not in the label. label = f"{comp.display_name}\n{comp.id}" + if comp.hosted_at and comp.hosted_at != "ITRB": + emoji = HOSTED_AT_EMOJI.get(comp.hosted_at, "") + suffix = f" {emoji}" if emoji else "" + label += f"\nHosted at: {comp.hosted_at}{suffix}" dot.node( node_id, label=label, From 0cd7ca1f8e8afee3db1587d9692a7e148cbf1dcb Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 2 Jun 2026 23:36:59 -0400 Subject: [PATCH 45/51] Update CLAUDE.md files with hosted-at data model and workflow notes Adds Hosted at column to the data model table, a "Change node label format" common-change entry, and a note to not run the diagram script (the user runs it). Also creates a top-level CLAUDE.md describing the repo structure. Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 14 ++++++++++++++ translator-components-diagram/CLAUDE.md | 3 +++ 2 files changed, 17 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..af84e55 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,14 @@ +# Core-Components-Working-Group + +This repository contains tooling for the Translator platform's Core Components Working Group. + +## Subdirectories + +| Directory | Purpose | +|-----------|---------| +| `translator-components-diagram/` | Generates Graphviz dependency diagrams from the Translator components Google Sheet. See its own `CLAUDE.md` for full details. | + +## Workflow notes for Claude + +- After making code changes in `translator-components-diagram/`, do **not** run `uv run generate-diagram` — the user will run the script themselves. Only run `uv run pytest` to check for test failures. +- The active branch for diagram work is `add-translator-components-diagrams-code`; PRs target `main`. diff --git a/translator-components-diagram/CLAUDE.md b/translator-components-diagram/CLAUDE.md index 8ddb930..a80a3f5 100644 --- a/translator-components-diagram/CLAUDE.md +++ b/translator-components-diagram/CLAUDE.md @@ -73,6 +73,7 @@ CSV column → `Component` field: | `Ubiquitous` | `ubiquitous` | TRUE/yes/1 → render as per-caller clones | | `Hide` | `hide` | TRUE/yes/1 → suppress entirely (not even as ghost) | | `Part of` | `part_of` | Groups node into a named cluster subgraph | +| `Hosted at` | `hosted_at` | Deployment location; `ITRB` is default (no label shown); others get a third label line, e.g. `Hosted at: RENCI 🌐` | | `Externals` | `externals` | `Sink` = data out | ## Common change patterns @@ -85,6 +86,8 @@ CSV column → `Component` field: **Change active refactor statuses** → `DEFAULT_STATUSES` list at line 17. +**Change node label format** → `_emit_component_node` (line 319). Active node labels are `display_name\nid` plus an optional third line for non-ITRB hosts. Emoji mapping lives in `HOSTED_AT_EMOJI` at line ~37. + **Change node shape or border style** → `_emit_component_node` (line 319) for active nodes; `_add_ghost_nodes` (line 375) for ghost nodes. The `is_new` bold border is set at line 339. **Change edge styles** (solid/dashed/color) → `_add_edges` (line 443). Each of the four dependency lists (`depends_on`, `depends_on_planned`, `uses`, `uses_planned`) has its own `dot.edge(...)` call (lines 483–500). From 0cbe3260b3083129d97f6312d954b85486786be5 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 9 Jun 2026 03:09:42 -0400 Subject: [PATCH 46/51] Add Layer field to Component data model Adds a `layer` field to the Component dataclass and a `layer_column` parameter to `load_components` so any CSV column can be read into it. Also exports the field in `write_json`. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 7b1079d..8258418 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -85,6 +85,7 @@ class Component: hide: bool = False part_of: str = "" hosted_at: str = "" + layer: str = "" externals: list[tuple[str, str]] = field(default_factory=list) depends_on: list[str] = field(default_factory=list) depends_on_planned: list[str] = field(default_factory=list) @@ -175,7 +176,7 @@ def load_owner_colors(path: Path = DEFAULT_OWNER_COLORS_PATH) -> dict[str, str]: return {row["owner"].strip(): row["color"].strip() for row in reader} -def load_components(csv_path: Path) -> list[Component]: +def load_components(csv_path: Path, layer_column: str = "") -> list[Component]: """Parse the CSV into a sorted list of Components. Sorted by lowercase id for deterministic .dot / .json output across CSV @@ -202,6 +203,7 @@ def load_components(csv_path: Path) -> list[Component]: hide=_parse_bool(row.get("Hide", "")), part_of=row.get("Part of", "").strip(), hosted_at=row.get("Hosted at", "").strip(), + layer=row.get(layer_column, "").strip() if layer_column else "", externals=parse_externals(row.get("Externals", "")), depends_on=depends_on, depends_on_planned=depends_on_planned, @@ -276,6 +278,7 @@ def write_json(components: list[Component], out_path: Path) -> None: "Hide": c.hide, "Part of": c.part_of, "Hosted at": c.hosted_at, + "Layer": c.layer, "Externals": [{"direction": d, "name": n} for d, n in c.externals], "depends_on": c.depends_on, "depends_on_planned": c.depends_on_planned, From 08ea3a38a457aa6bd2f6b167290ce26bc6660144 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 9 Jun 2026 03:10:08 -0400 Subject: [PATCH 47/51] Add --layer-column flag to generate per-layer sub-figures When --layer-column is passed, generates one legend-free PNG sub-figure per distinct layer value found in that CSV column. Each sub-figure shows the in-layer components at full owner colors with a bold border (penwidth 4.0) to mark them as the focus, and their direct neighbors (both upstream and downstream) in their normal owner colors with a thinner border. Part-of clusters, edge styles, and the active refactor-status filter all apply as in the main diagram. Helper additions: - _layer_filename(): sanitises a layer label to a safe filename stem - build_layer_subgraph(): builds the legend-free Digraph for one layer - penwidth param on _emit_component_node() for border-weight overrides Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 217 +++++++++++++++++- 1 file changed, 215 insertions(+), 2 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 8258418..32d7afa 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -4,6 +4,7 @@ import html import json import os +import re import urllib.error import urllib.request from dataclasses import dataclass, field @@ -36,6 +37,8 @@ EXTERNAL_FILL_COLOR = "#FFE082" # Emoji labels for non-default hosting locations (ITRB is the default and shown as nothing). HOSTED_AT_EMOJI: dict[str, str] = {"RENCI": "🌐", "Local": "💻", "Unknown": "❓"} +# Bold border penwidth for in-layer nodes in per-layer sub-figures. +IN_LAYER_PENWIDTH = "4.0" class ColorAssigner: @@ -329,11 +332,13 @@ def _emit_component_node( comp: Component, node_id: str, colors: ColorAssigner, + penwidth: str | None = None, ) -> None: """Render a Component as a graphviz node at the given id. Used both for primary node placement and for per-caller ubiquitous clones (which use a synthetic id like "{caller}__{target}"). + penwidth overrides the default (2.0 for "New in Refactor", 1.0 otherwise). """ fill = colors.get(comp.owner) is_new = comp.refactor_status == "New in Refactor" @@ -348,7 +353,7 @@ def _emit_component_node( label=label, fillcolor=fill, fontcolor=text_color_for(fill), - penwidth="2.0" if is_new else "1.0", + penwidth=penwidth if penwidth is not None else ("2.0" if is_new else "1.0"), ) @@ -723,6 +728,162 @@ def build_graph( return dot +def _layer_filename(layer: str) -> str: + """Convert a layer label to a safe filename stem.""" + safe = re.sub(r"[^\w\s-]", "", layer.lower()) + safe = re.sub(r"[\s-]+", "_", safe).strip("_") + return safe or "layer" + + +def build_layer_subgraph( + components: list[Component], + layer_value: str, + active_set: set[str], + index: dict[str, Component], + direction: str, + colors: ColorAssigner, +) -> graphviz.Digraph: + """Build a legend-free sub-diagram showing one layer and its direct neighbors.""" + in_layer = { + c.id for c in components + if c.id in active_set and c.layer == layer_value and not c.ubiquitous and not c.hide + } + + # Direct neighbors (both directions) that are outside this layer + out_of_layer: set[str] = set() + for comp in components: + if comp.id not in in_layer: + continue + for ref in comp.all_refs(): + match = index.get(ref.lower()) + if ( + match + and match.id not in in_layer + and match.id in active_set + and not match.ubiquitous + and not match.hide + ): + out_of_layer.add(match.id) + for comp in components: + if comp.ubiquitous or comp.hide or comp.id in in_layer or comp.id not in active_set: + continue + for ref in comp.all_refs(): + match = index.get(ref.lower()) + if match and match.id in in_layer: + out_of_layer.add(comp.id) + break + + visible = in_layer | out_of_layer + + dot = graphviz.Digraph( + name=f"layer_{_layer_filename(layer_value)}", + graph_attr={ + "rankdir": direction, + "fontname": "Helvetica", + "fontsize": "12", + "splines": "true", + "concentrate": "false", + "nodesep": "0.3", + "ranksep": "0.5", + "newrank": "true", + "dpi": "150", + }, + node_attr={ + "fontname": "Helvetica", + "fontsize": "11", + "style": "filled,rounded", + "shape": "box", + }, + edge_attr={"fontname": "Helvetica", "fontsize": "9"}, + ) + + # Clusters for in-layer nodes that have a Part-of group + groups: dict[str, list[str]] = {} + for comp in components: + if not comp.part_of or comp.ubiquitous or comp.id not in in_layer: + continue + groups.setdefault(comp.part_of, []).append(comp.id) + grouped_in_layer = {nid for ids in groups.values() for nid in ids} + + for group_label, node_ids in sorted(groups.items()): + safe = group_label.lower().replace(" ", "_").replace("/", "_") + with dot.subgraph(name=f"cluster_group_{safe}") as sg: + tab_label = ( + f'<' + f"
" + f'{html.escape(group_label)}' + f"
>" + ) + sg.attr( + label=tab_label, + labelloc="t", + style="filled", + fillcolor="#DDDDDD", + color="#555555", + fontname="Helvetica", + penwidth="1.5", + bgcolor="transparent", + ) + for node_id in sorted(node_ids): + comp = index.get(node_id.lower()) + if comp: + _emit_component_node(sg, comp, node_id, colors, penwidth=IN_LAYER_PENWIDTH) + + # Ungrouped in-layer nodes + for comp in components: + if comp.id not in in_layer or comp.ubiquitous or comp.id in grouped_in_layer: + continue + _emit_component_node(dot, comp, comp.id, colors, penwidth=IN_LAYER_PENWIDTH) + + # Out-of-layer neighbors — full owner colors, default border weight + for ool_id in sorted(out_of_layer): + comp = index.get(ool_id.lower()) + if comp: + _emit_component_node(dot, comp, ool_id, colors) + + # Edges — only those with at least one in-layer endpoint + emitted_clones: set[str] = set() + solid_edges: set[tuple[str, str]] = set() + + def _sub_target(caller_id: str, ref: str) -> str | None: + match = index.get(ref.lower()) + if match is None or match.hide: + return None + if match.ubiquitous: + if caller_id in in_layer: + clone_id = f"{caller_id}__{match.id}" + if clone_id not in emitted_clones: + _emit_component_node(dot, match, clone_id, colors) + emitted_clones.add(clone_id) + return clone_id + return None + return match.id if match.id in visible else None + + for comp in components: + if comp.id not in visible or comp.ubiquitous: + continue + for ref in comp.depends_on: + t = _sub_target(comp.id, ref) + if t is not None and (t in in_layer or comp.id in in_layer): + dot.edge(t, comp.id) + solid_edges.add((t, comp.id)) + for ref in comp.depends_on_planned: + t = _sub_target(comp.id, ref) + if t is not None and (t in in_layer or comp.id in in_layer) and (t, comp.id) not in solid_edges: + dot.edge(t, comp.id, style="solid", color="red") + for ref in comp.uses: + t = _sub_target(comp.id, ref) + if t is not None and (comp.id in in_layer or t in in_layer) and (comp.id, t) not in solid_edges: + dot.edge(comp.id, t, style="dashed") + solid_edges.add((comp.id, t)) + for ref in comp.uses_planned: + t = _sub_target(comp.id, ref) + if t is not None and (comp.id in in_layer or t in in_layer) and (comp.id, t) not in solid_edges: + dot.edge(comp.id, t, style="dashed", color="red") + + return dot + + @click.command() @click.option( "--input", "input_path", @@ -790,6 +951,15 @@ def build_graph( help="Merge partially-parallel edges (concentrate=true). Disable if solid " "and dashed edges between nearby nodes render incorrectly merged.", ) +@click.option( + "--layer-column", "layer_column", + default="", + show_default=True, + help="CSV column name to use for layer-based sub-figures (e.g. 'Layer'). " + "When set, one PNG sub-figure is written per distinct value found in " + "that column, showing in-layer nodes at full color and direct " + "neighbors from other layers greyed out. Leave empty to skip.", +) def main( input_path: Path, google_sheet: bool, @@ -801,6 +971,7 @@ def main( extra_formats: tuple[str, ...], direction: str, concentrate: bool, + layer_column: str, ) -> None: """Validate components CSV and generate a Graphviz dependency diagram.""" output_dir.mkdir(parents=True, exist_ok=True) @@ -847,7 +1018,7 @@ def main( raise click.ClickException(f"Input file not found: {input_path}") click.echo(f"Loading {input_path} ...") - components = load_components(input_path) + components = load_components(input_path, layer_column=layer_column) click.echo(f"Loaded {len(components)} components.") click.echo("Validating references ...") @@ -897,6 +1068,48 @@ def main( ) click.echo(f"Wrote {output_dir / f'{output_name}.{fmt}'}") + # Per-layer sub-figures + if layer_column: + _index = index_by_id(components) + _active_set = _compute_active_set(components, active_statuses) + layers = sorted({c.layer for c in components if c.layer}) + if not layers: + click.echo( + f"Note: no values found in '{layer_column}' column; " + "no layer sub-figures written." + ) + else: + click.echo( + f"Generating {len(layers)} layer sub-figure(s) " + f"from '{layer_column}' column ..." + ) + for layer_value in layers: + in_layer_count = sum( + 1 for c in components + if c.id in _active_set + and c.layer == layer_value + and not c.ubiquitous + and not c.hide + ) + if in_layer_count == 0: + click.echo( + f" Skipping '{layer_value}' " + "(no active non-ubiquitous components)." + ) + continue + layer_dot = build_layer_subgraph( + components, layer_value, _active_set, _index, direction, colors + ) + stem = f"{output_name}_{_layer_filename(layer_value)}" + layer_dot_path = output_dir / f"{stem}.dot" + layer_dot_path.write_text(layer_dot.source, encoding="utf-8") + layer_dot.render( + filename=str(output_dir / stem), + format="png", + cleanup=True, + ) + click.echo(f" Wrote {output_dir / f'{stem}.png'}") + if __name__ == "__main__": main() From deb970284042bd92bc119a335b3601a67a5bcf4a Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 9 Jun 2026 03:25:43 -0400 Subject: [PATCH 48/51] Split legends into separate PNGs by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds --split-legends/--no-split-legends (default: on). When active, the owner and edge-style legends are omitted from the main diagram and written as standalone PNGs instead: {output_name}_owners.png — owner-color key {output_name}_legend.png — edge-style examples Use --no-split-legends to restore the original embedded-legend behavior. Refactored _add_legend into _add_owner_cluster / _add_edge_cluster helpers so both the combined path and the standalone graph builders share the same cluster-building code. Co-Authored-By: Claude Sonnet 4.6 --- .../generate_diagram.py | 105 ++++++++++++++---- 1 file changed, 81 insertions(+), 24 deletions(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 32d7afa..179b5f0 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -585,6 +585,16 @@ def _add_external_nodes_and_edges( dot.edge(src, dst) +_LEGEND_CLUSTER_ATTRS = dict( + style="filled,rounded", + fillcolor="#FAFAFA", + color="#AAAAAA", + fontname="Helvetica", + fontsize="11", + margin="12", +) + + def _owner_legend_html(colors: ColorAssigner) -> str: """Build an HTML-table label listing every owner and its fill color. @@ -610,29 +620,17 @@ def _owner_legend_html(colors: ColorAssigner) -> str: return f"<{table}>" -def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: - """Build a legend covering owner colors, edge styles, and node styles.""" - _cluster_attrs = dict( - style="filled,rounded", - fillcolor="#FAFAFA", - color="#AAAAAA", - fontname="Helvetica", - fontsize="11", - margin="12", - ) - - # Owner-color key in its own cluster so it doesn't crowd the edge examples. +def _add_owner_cluster(dot: graphviz.Digraph, colors: ColorAssigner) -> None: + """Add the owner-color key cluster to dot.""" with dot.subgraph(name="cluster_legend_owners") as own: - own.attr(label="Owner", **_cluster_attrs) - own.node( - "_leg_owners", - label=_owner_legend_html(colors), - shape="plain", - ) + own.attr(label="Owner", **_LEGEND_CLUSTER_ATTRS) + own.node("_leg_owners", label=_owner_legend_html(colors), shape="plain") + - # Edge style examples — provider→consumer / API call, one pair per row. +def _add_edge_cluster(dot: graphviz.Digraph) -> None: + """Add the edge-style example cluster to dot.""" with dot.subgraph(name="cluster_legend") as leg: - leg.attr(label="Legend", **_cluster_attrs) + leg.attr(label="Legend", **_LEGEND_CLUSTER_ATTRS) leg.node("_leg_p", label="Producer", fillcolor="white", penwidth="1.0") leg.node("_leg_c", label="Consumer", fillcolor="white", penwidth="1.0") @@ -650,7 +648,6 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: leg.node("_leg_sink", label="User/agent", shape="oval", peripheries="2", **_ext) - # Lock each pair / row onto the same horizontal rank. with leg.subgraph() as row1: row1.attr(rank="same") row1.node("_leg_p") @@ -663,10 +660,15 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: row3.attr(rank="same") row3.node("_leg_src") row3.node("_leg_sink") - # Invisible edges enforce row order: row1 → row2 → row3. leg.edge("_leg_p", "_leg_a", style="invis") leg.edge("_leg_a", "_leg_src", style="invis") + +def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: + """Add both legend clusters to dot (for the combined main diagram).""" + _add_owner_cluster(dot, colors) + _add_edge_cluster(dot) + # Pin the owner legend to the bottom; the edge-style legend floats freely # (its internal rank="same" rows keep it compact wherever it lands). with dot.subgraph() as s: @@ -674,6 +676,34 @@ def _add_legend(dot: graphviz.Digraph, colors: ColorAssigner) -> None: s.node("_leg_owners") +def _build_owners_graph(colors: ColorAssigner) -> graphviz.Digraph: + """Standalone diagram containing only the owner-color legend.""" + dot = graphviz.Digraph( + name="owners_legend", + graph_attr={"fontname": "Helvetica", "fontsize": "11", "dpi": "150"}, + ) + _add_owner_cluster(dot, colors) + return dot + + +def _build_edge_legend_graph() -> graphviz.Digraph: + """Standalone diagram containing only the edge-style legend.""" + dot = graphviz.Digraph( + name="edge_legend", + graph_attr={ + "fontname": "Helvetica", "fontsize": "11", "dpi": "150", + "rankdir": "TB", "newrank": "true", + }, + node_attr={ + "fontname": "Helvetica", "fontsize": "11", + "style": "filled,rounded", "shape": "box", + }, + edge_attr={"fontname": "Helvetica", "fontsize": "9"}, + ) + _add_edge_cluster(dot) + return dot + + def build_graph( components: list[Component], @@ -681,6 +711,7 @@ def build_graph( direction: str, colors: ColorAssigner, concentrate: bool = False, + include_legend: bool = True, ) -> graphviz.Digraph: """Assemble the full graph from the parsed component list.""" index = index_by_id(components) @@ -723,7 +754,8 @@ def build_graph( _add_ghost_nodes(dot, ghost_ids, index, skip_ids=grouped_ids) _add_edges(dot, components, index, active_set, ghost_ids, colors) _add_external_nodes_and_edges(dot, components, active_set) - _add_legend(dot, colors) + if include_legend: + _add_legend(dot, colors) return dot @@ -951,6 +983,14 @@ def _sub_target(caller_id: str, ref: str) -> str | None: help="Merge partially-parallel edges (concentrate=true). Disable if solid " "and dashed edges between nearby nodes render incorrectly merged.", ) +@click.option( + "--split-legends/--no-split-legends", "split_legends", + default=True, + show_default=True, + help="Write owner and edge-style legends as separate PNGs " + "({output_name}_owners.png / {output_name}_legend.png) and omit " + "them from the main diagram. Use --no-split-legends to embed them.", +) @click.option( "--layer-column", "layer_column", default="", @@ -971,6 +1011,7 @@ def main( extra_formats: tuple[str, ...], direction: str, concentrate: bool, + split_legends: bool, layer_column: str, ) -> None: """Validate components CSV and generate a Graphviz dependency diagram.""" @@ -1049,7 +1090,10 @@ def main( ) colors = ColorAssigner(load_owner_colors(), FALLBACK_COLORS) - dot = build_graph(components, active_statuses, direction, colors, concentrate=concentrate) + dot = build_graph( + components, active_statuses, direction, colors, + concentrate=concentrate, include_legend=not split_legends, + ) # Save .dot source dot_path = output_dir / f"{output_name}.dot" @@ -1068,6 +1112,19 @@ def main( ) click.echo(f"Wrote {output_dir / f'{output_name}.{fmt}'}") + # Separate legend files + if split_legends: + for legend_stem, legend_dot in [ + (f"{output_name}_owners", _build_owners_graph(colors)), + (f"{output_name}_legend", _build_edge_legend_graph()), + ]: + legend_dot.render( + filename=str(output_dir / legend_stem), + format="png", + cleanup=True, + ) + click.echo(f"Wrote {output_dir / f'{legend_stem}.png'}") + # Per-layer sub-figures if layer_column: _index = index_by_id(components) From 1908cf4e54157f834f1fba649649e3aa31a28fc8 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 9 Jun 2026 03:32:23 -0400 Subject: [PATCH 49/51] Include external nodes in per-layer sub-figures Calls _add_external_nodes_and_edges with the in_layer set so that external sources and sinks appear in sub-figures for the components that declare them, matching the behaviour of the main diagram. Co-Authored-By: Claude Sonnet 4.6 --- translator-components-diagram/generate_diagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index 179b5f0..ace0604 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -913,6 +913,8 @@ def _sub_target(caller_id: str, ref: str) -> str | None: if t is not None and (comp.id in in_layer or t in in_layer) and (comp.id, t) not in solid_edges: dot.edge(comp.id, t, style="dashed", color="red") + _add_external_nodes_and_edges(dot, components, in_layer) + return dot From 2b94bd2a4dddee0cbeb8cb5f4b43ad5217099a98 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 9 Jun 2026 04:14:51 -0400 Subject: [PATCH 50/51] Removed spurious text. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 6a19c45..d3b6602 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ # Core Components Working Group A working group of the [NCATS Biomedical Data Translator](https://ncats.nih.gov/research/research-activities/translator) project. -Can From cd33e5784dc88afbb6a682f4c0448d503ea7760b Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Sun, 14 Jun 2026 18:12:07 -0400 Subject: [PATCH 51/51] Added Scripps logo as a hosting location. --- translator-components-diagram/generate_diagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/translator-components-diagram/generate_diagram.py b/translator-components-diagram/generate_diagram.py index ace0604..929b4e6 100644 --- a/translator-components-diagram/generate_diagram.py +++ b/translator-components-diagram/generate_diagram.py @@ -36,7 +36,7 @@ # clearly against the component fill colors. EXTERNAL_FILL_COLOR = "#FFE082" # Emoji labels for non-default hosting locations (ITRB is the default and shown as nothing). -HOSTED_AT_EMOJI: dict[str, str] = {"RENCI": "🌐", "Local": "💻", "Unknown": "❓"} +HOSTED_AT_EMOJI: dict[str, str] = {"RENCI": "🌐", "Scripps": "🌐", "Local": "💻", "Unknown": "❓"} # Bold border penwidth for in-layer nodes in per-layer sub-figures. IN_LAYER_PENWIDTH = "4.0"