From 2187840f2a40a8b32bb8049c7cfec3704c2730fc Mon Sep 17 00:00:00 2001
From: carlosabadia <cutillascarlos@gmail.com>
Date: Mon, 4 May 2026 13:35:23 +0200
Subject: [PATCH 1/4] Add checks for broken docs urls

---
 .github/workflows/check_doc_links.yml |  39 ++++++++
 docs/app/scripts/check_doc_links.py   | 136 ++++++++++++++++++++++++++
 docs/app/tests/test_doc_links.py      | 128 ++++++++++++++++++++++++
 3 files changed, 303 insertions(+)
 create mode 100644 .github/workflows/check_doc_links.yml
 create mode 100644 docs/app/scripts/check_doc_links.py
 create mode 100644 docs/app/tests/test_doc_links.py

diff --git a/.github/workflows/check_doc_links.yml b/.github/workflows/check_doc_links.yml
new file mode 100644
index 00000000000..37c959b44b4
--- /dev/null
+++ b/.github/workflows/check_doc_links.yml
@@ -0,0 +1,39 @@
+name: check-doc-links
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.id || github.ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+    branches: ["main"]
+    paths:
+      - "docs/**/*.md"
+      - "docs/app/scripts/check_doc_links.py"
+      - ".github/workflows/check_doc_links.yml"
+  push:
+    branches: ["main"]
+    paths:
+      - "docs/**/*.md"
+      - "docs/app/scripts/check_doc_links.py"
+      - ".github/workflows/check_doc_links.yml"
+
+jobs:
+  check-doc-links:
+    timeout-minutes: 20
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: docs/app
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup_build_env
+        with:
+          python-version: 3.14
+          run-uv-sync: true
+      - name: Build frontend to generate sitemap.xml
+        run: uv run reflex export --frontend-only --no-zip
+      - name: Validate /docs links against sitemap.xml
+        run: uv run python scripts/check_doc_links.py
diff --git a/docs/app/scripts/check_doc_links.py b/docs/app/scripts/check_doc_links.py
new file mode 100644
index 00000000000..c4d744d1e52
--- /dev/null
+++ b/docs/app/scripts/check_doc_links.py
@@ -0,0 +1,136 @@
+"""Validate /docs/* markdown links against the generated sitemap.xml.
+
+For every .md file under the docs tree, find markdown links of the form
+`[text](/docs/...)` and verify:
+
+1. The URL path contains no underscores (URLs use hyphens).
+2. After stripping the `/docs` prefix, the path exists in sitemap.xml.
+
+Run after building the frontend so .web/public/sitemap.xml is present, e.g.:
+
+    cd docs/app
+    uv run reflex export --frontend-only --no-zip
+    uv run python scripts/check_doc_links.py
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from urllib.parse import urlparse
+
+LINK_RE = re.compile(r"\]\(\s*(/docs(?=[/)#?\s])[^)]*?)(?:\s+\"[^\"]*\")?\s*\)")
+SITEMAP_NS = {"sm": "https://www.sitemaps.org/schemas/sitemap/0.9"}
+SKIP_DIRS = {".web", "node_modules", "__pycache__", ".git", ".venv", "dist", "build"}
+
+
+def _normalize(path: str) -> str:
+    path = path.split("#", 1)[0].split("?", 1)[0]
+    if not path.startswith("/"):
+        path = "/" + path
+    return path.rstrip("/") or "/"
+
+
+def _strip_docs_prefix(path: str) -> str:
+    """Drop a leading `/docs` segment so both deployment styles compare equal."""
+    if path == "/docs":
+        return "/"
+    if path.startswith("/docs/"):
+        return path[len("/docs") :]
+    return path
+
+
+def load_sitemap_paths(sitemap_path: Path) -> set[str]:
+    """Return the set of normalized URL paths declared in sitemap.xml."""
+    tree = ET.parse(sitemap_path)
+    paths: set[str] = set()
+    for loc in tree.getroot().findall("sm:url/sm:loc", SITEMAP_NS):
+        if loc.text is None:
+            continue
+        path = urlparse(loc.text.strip()).path
+        paths.add(_strip_docs_prefix(_normalize(path)))
+    return paths
+
+
+def iter_md_files(md_root: Path):
+    """Yield .md files under md_root, skipping build/vendor directories."""
+    for path in md_root.rglob("*.md"):
+        if any(part in SKIP_DIRS for part in path.relative_to(md_root).parts):
+            continue
+        yield path
+
+
+def iter_md_links(md_root: Path):
+    """Yield (file, line_no, raw_url) for every /docs/* markdown link."""
+    for md_file in iter_md_files(md_root):
+        try:
+            text = md_file.read_text(encoding="utf-8")
+        except OSError:
+            continue
+        for line_no, line in enumerate(text.splitlines(), start=1):
+            for match in LINK_RE.finditer(line):
+                yield md_file, line_no, match.group(1)
+
+
+def check(md_root: Path, sitemap_path: Path) -> list[str]:
+    """Return a list of human-readable error strings."""
+    if not sitemap_path.is_file():
+        return [
+            f"sitemap.xml not found at {sitemap_path}. "
+            "Build the frontend first (e.g. `uv run reflex export --frontend-only --no-zip`)."
+        ]
+
+    valid_paths = load_sitemap_paths(sitemap_path)
+    errors: list[str] = []
+
+    for md_file, line_no, raw in iter_md_links(md_root):
+        location = f"{md_file}:{line_no}"
+
+        if "_" in raw:
+            errors.append(
+                f"{location}: link contains an underscore (use hyphens): {raw!r}"
+            )
+
+        # Compare in /docs-stripped form so the check works whether the
+        # sitemap entries include the /docs prefix or not.
+        sitemap_key = _strip_docs_prefix(_normalize(raw))
+        if sitemap_key not in valid_paths:
+            errors.append(
+                f"{location}: {raw!r} -> {sitemap_key!r} not found in sitemap"
+            )
+
+    return errors
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    here = Path(__file__).resolve().parent
+    parser.add_argument(
+        "--md-root",
+        type=Path,
+        default=here.parent.parent,
+        help="Root directory containing .md docs (default: ../..).",
+    )
+    parser.add_argument(
+        "--sitemap",
+        type=Path,
+        default=here.parent / ".web" / "public" / "sitemap.xml",
+        help="Path to sitemap.xml (default: ../.web/public/sitemap.xml).",
+    )
+    args = parser.parse_args()
+
+    errors = check(args.md_root.resolve(), args.sitemap.resolve())
+    if errors:
+        print(f"Found {len(errors)} broken /docs link(s):", file=sys.stderr)
+        for err in errors:
+            print(f"  {err}", file=sys.stderr)
+        return 1
+    print("All /docs links resolve against sitemap.xml.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/docs/app/tests/test_doc_links.py b/docs/app/tests/test_doc_links.py
new file mode 100644
index 00000000000..227bff03f65
--- /dev/null
+++ b/docs/app/tests/test_doc_links.py
@@ -0,0 +1,128 @@
+"""Unit tests for scripts/check_doc_links.py."""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.append(str(Path(__file__).resolve().parent.parent / "scripts"))
+
+from check_doc_links import LINK_RE, _normalize, check
+
+SITEMAP_XML = """<?xml version='1.0' encoding='utf-8'?>
+<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">
+  <url><loc>http://localhost:3000/getting-started/basics/</loc></url>
+  <url><loc>http://localhost:3000/library/disclosure/</loc></url>
+</urlset>
+"""
+
+SITEMAP_XML_WITH_DOCS_PREFIX = """<?xml version='1.0' encoding='utf-8'?>
+<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">
+  <url><loc>http://localhost:3000/docs/getting-started/basics/</loc></url>
+  <url><loc>http://localhost:3000/docs/library/disclosure/</loc></url>
+</urlset>
+"""
+
+
+@pytest.fixture
+def docs_tree(tmp_path: Path) -> tuple[Path, Path]:
+    """Create a tmp docs root + sitemap.xml and return their paths."""
+    sitemap = tmp_path / "sitemap.xml"
+    sitemap.write_text(SITEMAP_XML)
+    md_root = tmp_path / "docs"
+    md_root.mkdir()
+    return md_root, sitemap
+
+
+def test_normalize_strips_fragment_query_and_trailing_slash():
+    assert _normalize("/foo/bar/") == "/foo/bar"
+    assert _normalize("/foo/bar#section") == "/foo/bar"
+    assert _normalize("/foo/bar?x=1") == "/foo/bar"
+    assert _normalize("/") == "/"
+
+
+def test_link_re_matches_basic_link():
+    matches = LINK_RE.findall("see [basics](/docs/getting-started/basics) here")
+    assert matches == ["/docs/getting-started/basics"]
+
+
+def test_link_re_does_not_match_docs_prefix_without_separator():
+    """`/docsfoo` and `/docs-foo` must not be treated as /docs links."""
+    assert LINK_RE.findall("[x](/docsfoo/bar)") == []
+    assert LINK_RE.findall("[x](/docs-foo/bar)") == []
+
+
+def test_link_re_keeps_fragment_and_query():
+    assert LINK_RE.findall("[x](/docs/foo#anchor)") == ["/docs/foo#anchor"]
+    assert LINK_RE.findall("[x](/docs/foo?q=1)") == ["/docs/foo?q=1"]
+
+
+def test_check_passes_for_valid_link(docs_tree):
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[ok](/docs/getting-started/basics)\n")
+    assert check(md_root, sitemap) == []
+
+
+def test_check_flags_missing_link(docs_tree):
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[bad](/docs/no-such-page)\n")
+    errors = check(md_root, sitemap)
+    assert len(errors) == 1
+    assert "not found in sitemap" in errors[0]
+
+
+def test_check_flags_underscore_and_missing(docs_tree):
+    """Underscore link is reported twice: once for the underscore, once for missing."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[under](/docs/getting_started/basics)\n")
+    errors = check(md_root, sitemap)
+    assert len(errors) == 2
+    assert any("underscore" in e for e in errors)
+    assert any("not found in sitemap" in e for e in errors)
+
+
+def test_check_ignores_fragment_for_sitemap_lookup(docs_tree):
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[anchor](/docs/getting-started/basics#section)\n")
+    assert check(md_root, sitemap) == []
+
+
+def test_check_ignores_query_for_sitemap_lookup(docs_tree):
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[q](/docs/library/disclosure?x=1)\n")
+    assert check(md_root, sitemap) == []
+
+
+def test_check_ignores_docs_prefix_lookalikes(docs_tree):
+    """`/docsfoo` should not even be treated as a /docs link."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[x](/docsfoo/bar)\n")
+    assert check(md_root, sitemap) == []
+
+
+def test_check_skips_build_dirs(docs_tree):
+    md_root, sitemap = docs_tree
+    skipped = md_root / "node_modules" / "vendor"
+    skipped.mkdir(parents=True)
+    (skipped / "README.md").write_text("[bad](/docs/no-such-page)\n")
+    assert check(md_root, sitemap) == []
+
+
+def test_check_returns_helpful_message_when_sitemap_missing(tmp_path):
+    errors = check(tmp_path, tmp_path / "missing.xml")
+    assert len(errors) == 1
+    assert "sitemap.xml not found" in errors[0]
+
+
+def test_check_works_when_sitemap_has_docs_prefix(tmp_path: Path):
+    """Both deployment styles (with or without /docs prefix in sitemap) work."""
+    sitemap = tmp_path / "sitemap.xml"
+    sitemap.write_text(SITEMAP_XML_WITH_DOCS_PREFIX)
+    md_root = tmp_path / "docs"
+    md_root.mkdir()
+    (md_root / "page.md").write_text(
+        "[ok](/docs/getting-started/basics)\n[bad](/docs/no-such-page)\n"
+    )
+    errors = check(md_root, sitemap)
+    assert len(errors) == 1
+    assert "no-such-page" in errors[0]

From 873b5925d1de98e5e1411b422e97c03dae65f836 Mon Sep 17 00:00:00 2001
From: carlosabadia <cutillascarlos@gmail.com>
Date: Mon, 4 May 2026 14:24:44 +0200
Subject: [PATCH 2/4] updates

---
 docs/app/scripts/check_doc_links.py | 3 ++-
 docs/app/tests/test_doc_links.py    | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/app/scripts/check_doc_links.py b/docs/app/scripts/check_doc_links.py
index c4d744d1e52..a94fc683893 100644
--- a/docs/app/scripts/check_doc_links.py
+++ b/docs/app/scripts/check_doc_links.py
@@ -88,8 +88,9 @@ def check(md_root: Path, sitemap_path: Path) -> list[str]:
 
     for md_file, line_no, raw in iter_md_links(md_root):
         location = f"{md_file}:{line_no}"
+        path_only = raw.split("#", 1)[0].split("?", 1)[0]
 
-        if "_" in raw:
+        if "_" in path_only:
             errors.append(
                 f"{location}: link contains an underscore (use hyphens): {raw!r}"
             )
diff --git a/docs/app/tests/test_doc_links.py b/docs/app/tests/test_doc_links.py
index 227bff03f65..2ecb3cfa24d 100644
--- a/docs/app/tests/test_doc_links.py
+++ b/docs/app/tests/test_doc_links.py
@@ -87,6 +87,13 @@ def test_check_ignores_fragment_for_sitemap_lookup(docs_tree):
     assert check(md_root, sitemap) == []
 
 
+def test_check_allows_underscores_in_fragment(docs_tree):
+    """Heading anchors like `#python_code` legitimately contain underscores."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[x](/docs/getting-started/basics#python_code)\n")
+    assert check(md_root, sitemap) == []
+
+
 def test_check_ignores_query_for_sitemap_lookup(docs_tree):
     md_root, sitemap = docs_tree
     (md_root / "page.md").write_text("[q](/docs/library/disclosure?x=1)\n")

From 5b405dd9bf7c4244af792c5f002a524f13961e51 Mon Sep 17 00:00:00 2001
From: carlosabadia <cutillascarlos@gmail.com>
Date: Tue, 5 May 2026 19:02:05 +0200
Subject: [PATCH 3/4] combine ci and be more verbose

---
 .github/workflows/check_doc_links.yml         | 39 -------------------
 .../.github/workflows/integration_tests.yml   |  3 ++
 docs/app/scripts/check_doc_links.py           | 18 ++++++---
 docs/app/tests/test_doc_links.py              |  9 +++++
 4 files changed, 25 insertions(+), 44 deletions(-)
 delete mode 100644 .github/workflows/check_doc_links.yml

diff --git a/.github/workflows/check_doc_links.yml b/.github/workflows/check_doc_links.yml
deleted file mode 100644
index 37c959b44b4..00000000000
--- a/.github/workflows/check_doc_links.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: check-doc-links
-permissions:
-  contents: read
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.id || github.ref }}
-  cancel-in-progress: true
-
-on:
-  pull_request:
-    branches: ["main"]
-    paths:
-      - "docs/**/*.md"
-      - "docs/app/scripts/check_doc_links.py"
-      - ".github/workflows/check_doc_links.yml"
-  push:
-    branches: ["main"]
-    paths:
-      - "docs/**/*.md"
-      - "docs/app/scripts/check_doc_links.py"
-      - ".github/workflows/check_doc_links.yml"
-
-jobs:
-  check-doc-links:
-    timeout-minutes: 20
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: docs/app
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/setup_build_env
-        with:
-          python-version: 3.14
-          run-uv-sync: true
-      - name: Build frontend to generate sitemap.xml
-        run: uv run reflex export --frontend-only --no-zip
-      - name: Validate /docs links against sitemap.xml
-        run: uv run python scripts/check_doc_links.py
diff --git a/docs/app/.github/workflows/integration_tests.yml b/docs/app/.github/workflows/integration_tests.yml
index 1a03be2b533..399c2207439 100644
--- a/docs/app/.github/workflows/integration_tests.yml
+++ b/docs/app/.github/workflows/integration_tests.yml
@@ -63,3 +63,6 @@ jobs:
 
       - name: Export the website
         run: reflex export
+
+      - name: Validate /docs links against generated sitemap
+        run: uv run python scripts/check_doc_links.py
diff --git a/docs/app/scripts/check_doc_links.py b/docs/app/scripts/check_doc_links.py
index a94fc683893..3a96cee4e0c 100644
--- a/docs/app/scripts/check_doc_links.py
+++ b/docs/app/scripts/check_doc_links.py
@@ -84,25 +84,33 @@ def check(md_root: Path, sitemap_path: Path) -> list[str]:
         ]
 
     valid_paths = load_sitemap_paths(sitemap_path)
-    errors: list[str] = []
+    print(f"Loaded {len(valid_paths)} URLs from sitemap {sitemap_path}")
+
+    md_files = list(iter_md_files(md_root))
+    if not md_files:
+        return [f"No .md files found under {md_root}. Check --md-root."]
+    print(f"Scanning {len(md_files)} markdown file(s) under {md_root}")
 
+    errors: list[str] = []
+    links_checked = 0
     for md_file, line_no, raw in iter_md_links(md_root):
+        links_checked += 1
         location = f"{md_file}:{line_no}"
         path_only = raw.split("#", 1)[0].split("?", 1)[0]
+        sitemap_key = _strip_docs_prefix(_normalize(raw))
+        ok = sitemap_key in valid_paths and "_" not in path_only
+        print(f"  [{'OK  ' if ok else 'FAIL'}] {location} -> {raw}")
 
         if "_" in path_only:
             errors.append(
                 f"{location}: link contains an underscore (use hyphens): {raw!r}"
             )
-
-        # Compare in /docs-stripped form so the check works whether the
-        # sitemap entries include the /docs prefix or not.
-        sitemap_key = _strip_docs_prefix(_normalize(raw))
         if sitemap_key not in valid_paths:
             errors.append(
                 f"{location}: {raw!r} -> {sitemap_key!r} not found in sitemap"
             )
 
+    print(f"Checked {links_checked} /docs link(s) across {len(md_files)} file(s).")
     return errors
 
 
diff --git a/docs/app/tests/test_doc_links.py b/docs/app/tests/test_doc_links.py
index 2ecb3cfa24d..546b237a68f 100644
--- a/docs/app/tests/test_doc_links.py
+++ b/docs/app/tests/test_doc_links.py
@@ -109,6 +109,7 @@ def test_check_ignores_docs_prefix_lookalikes(docs_tree):
 
 def test_check_skips_build_dirs(docs_tree):
     md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text("[ok](/docs/getting-started/basics)\n")
     skipped = md_root / "node_modules" / "vendor"
     skipped.mkdir(parents=True)
     (skipped / "README.md").write_text("[bad](/docs/no-such-page)\n")
@@ -121,6 +122,14 @@ def test_check_returns_helpful_message_when_sitemap_missing(tmp_path):
     assert "sitemap.xml not found" in errors[0]
 
 
+def test_check_errors_when_md_root_has_no_markdown(docs_tree):
+    """If the docs tree is empty, fail loudly instead of silently passing."""
+    md_root, sitemap = docs_tree
+    errors = check(md_root, sitemap)
+    assert len(errors) == 1
+    assert "No .md files found" in errors[0]
+
+
 def test_check_works_when_sitemap_has_docs_prefix(tmp_path: Path):
     """Both deployment styles (with or without /docs prefix in sitemap) work."""
     sitemap = tmp_path / "sitemap.xml"

From c7ed33947134d94fda7b46eaa7473238d79dbe52 Mon Sep 17 00:00:00 2001
From: carlosabadia <cutillascarlos@gmail.com>
Date: Tue, 5 May 2026 19:21:36 +0200
Subject: [PATCH 4/4] move from regex

---
 docs/app/scripts/check_doc_links.py | 142 +++++++++++++++++++++-------
 docs/app/tests/test_doc_links.py    |  66 +++++++++----
 2 files changed, 158 insertions(+), 50 deletions(-)

diff --git a/docs/app/scripts/check_doc_links.py b/docs/app/scripts/check_doc_links.py
index 3a96cee4e0c..238ec4bbdbd 100644
--- a/docs/app/scripts/check_doc_links.py
+++ b/docs/app/scripts/check_doc_links.py
@@ -1,12 +1,16 @@
 """Validate /docs/* markdown links against the generated sitemap.xml.
 
-For every .md file under the docs tree, find markdown links of the form
-`[text](/docs/...)` and verify:
+For every .md file under the docs tree, parse it with reflex-docgen's
+markdown parser and verify every `[text](/docs/...)` link:
 
 1. The URL path contains no underscores (URLs use hyphens).
 2. After stripping the `/docs` prefix, the path exists in sitemap.xml.
 
-Run after building the frontend so .web/public/sitemap.xml is present, e.g.:
+Using the real markdown AST means links inside fenced code blocks are
+correctly ignored, reference-style and multi-line links are caught, and
+escapes/edge cases are handled the same way the docs site renders them.
+
+Run after building the frontend so .web/public/sitemap.xml is present:
 
     cd docs/app
     uv run reflex export --frontend-only --no-zip
@@ -16,13 +20,29 @@
 from __future__ import annotations
 
 import argparse
-import re
 import sys
 import xml.etree.ElementTree as ET
+from collections.abc import Iterator
 from pathlib import Path
 from urllib.parse import urlparse
 
-LINK_RE = re.compile(r"\]\(\s*(/docs(?=[/)#?\s])[^)]*?)(?:\s+\"[^\"]*\")?\s*\)")
+from reflex_docgen.markdown import (
+    Block,
+    BoldSpan,
+    DirectiveBlock,
+    HeadingBlock,
+    ImageSpan,
+    ItalicSpan,
+    LinkSpan,
+    ListBlock,
+    QuoteBlock,
+    Span,
+    StrikethroughSpan,
+    TableBlock,
+    TextBlock,
+    parse_document,
+)
+
 SITEMAP_NS = {"sm": "https://www.sitemaps.org/schemas/sitemap/0.9"}
 SKIP_DIRS = {".web", "node_modules", "__pycache__", ".git", ".venv", "dist", "build"}
 
@@ -55,7 +75,7 @@ def load_sitemap_paths(sitemap_path: Path) -> set[str]:
     return paths
 
 
-def iter_md_files(md_root: Path):
+def iter_md_files(md_root: Path) -> Iterator[Path]:
     """Yield .md files under md_root, skipping build/vendor directories."""
     for path in md_root.rglob("*.md"):
         if any(part in SKIP_DIRS for part in path.relative_to(md_root).parts):
@@ -63,20 +83,56 @@ def iter_md_files(md_root: Path):
         yield path
 
 
-def iter_md_links(md_root: Path):
-    """Yield (file, line_no, raw_url) for every /docs/* markdown link."""
-    for md_file in iter_md_files(md_root):
-        try:
-            text = md_file.read_text(encoding="utf-8")
-        except OSError:
-            continue
-        for line_no, line in enumerate(text.splitlines(), start=1):
-            for match in LINK_RE.finditer(line):
-                yield md_file, line_no, match.group(1)
+def _walk_spans(spans: tuple[Span, ...]) -> Iterator[LinkSpan]:
+    """Recursively yield every LinkSpan inside a span tree."""
+    for span in spans:
+        if isinstance(span, LinkSpan):
+            yield span
+            yield from _walk_spans(span.children)
+        elif isinstance(span, (BoldSpan, ItalicSpan, StrikethroughSpan, ImageSpan)):
+            yield from _walk_spans(span.children)
+
+
+def _walk_blocks(blocks: tuple[Block, ...]) -> Iterator[LinkSpan]:
+    """Recursively yield every LinkSpan in a block tree, skipping CodeBlock."""
+    for block in blocks:
+        if isinstance(block, (HeadingBlock, TextBlock)):
+            yield from _walk_spans(block.children)
+        elif isinstance(block, ListBlock):
+            for item in block.items:
+                yield from _walk_blocks(item.children)
+        elif isinstance(block, (QuoteBlock, DirectiveBlock)):
+            yield from _walk_blocks(block.children)
+        elif isinstance(block, TableBlock):
+            for row in (block.header, *block.rows):
+                for cell in row.cells:
+                    yield from _walk_spans(cell.children)
+
+
+def _line_for(text: str, target: str, cursor: int) -> tuple[int, int]:
+    """Locate the next occurrence of `](target)` after cursor.
+
+    Returns ``(line_number, new_cursor)``. If the link is reference-style
+    (no `](target)` in source), falls back to scanning for `]: target`.
+    Returns ``line_number == 0`` if the target can't be located.
+    """
+    needle = "](" + target
+    pos = text.find(needle, cursor)
+    if pos == -1:
+        # Reference-style links resolve to the same target but live in
+        # a `[label]: target` definition further down the file.
+        pos = text.find("]: " + target, cursor)
+    if pos == -1:
+        return 0, cursor
+    return text.count("\n", 0, pos) + 1, pos + len(needle)
 
 
 def check(md_root: Path, sitemap_path: Path) -> list[str]:
-    """Return a list of human-readable error strings."""
+    """Return a list of human-readable error strings.
+
+    Prints a per-link trail and a summary so CI logs make it obvious which
+    files were scanned and which links were validated.
+    """
     if not sitemap_path.is_file():
         return [
             f"sitemap.xml not found at {sitemap_path}. "
@@ -93,22 +149,42 @@ def check(md_root: Path, sitemap_path: Path) -> list[str]:
 
     errors: list[str] = []
     links_checked = 0
-    for md_file, line_no, raw in iter_md_links(md_root):
-        links_checked += 1
-        location = f"{md_file}:{line_no}"
-        path_only = raw.split("#", 1)[0].split("?", 1)[0]
-        sitemap_key = _strip_docs_prefix(_normalize(raw))
-        ok = sitemap_key in valid_paths and "_" not in path_only
-        print(f"  [{'OK  ' if ok else 'FAIL'}] {location} -> {raw}")
-
-        if "_" in path_only:
-            errors.append(
-                f"{location}: link contains an underscore (use hyphens): {raw!r}"
-            )
-        if sitemap_key not in valid_paths:
-            errors.append(
-                f"{location}: {raw!r} -> {sitemap_key!r} not found in sitemap"
-            )
+    for md_file in md_files:
+        try:
+            text = md_file.read_text(encoding="utf-8")
+        except OSError:
+            continue
+        try:
+            doc = parse_document(text)
+        except Exception as exc:
+            errors.append(f"{md_file}: failed to parse markdown ({exc})")
+            continue
+
+        cursor = 0
+        for link in _walk_blocks(doc.blocks):
+            target = link.target
+            if not (target == "/docs" or target.startswith("/docs/")):
+                continue
+
+            line_no, cursor = _line_for(text, target, cursor)
+            location = f"{md_file}:{line_no}" if line_no else str(md_file)
+            links_checked += 1
+
+            path_only = _normalize(target)
+            sitemap_key = _strip_docs_prefix(path_only)
+            has_underscore = "_" in path_only
+            in_sitemap = sitemap_key in valid_paths
+            status = "OK" if (in_sitemap and not has_underscore) else "FAIL"
+            print(f"  [{status:<4}] {location} -> {target}")
+
+            if has_underscore:
+                errors.append(
+                    f"{location}: link contains an underscore (use hyphens): {target!r}"
+                )
+            if not in_sitemap:
+                errors.append(
+                    f"{location}: {target!r} -> {sitemap_key!r} not found in sitemap"
+                )
 
     print(f"Checked {links_checked} /docs link(s) across {len(md_files)} file(s).")
     return errors
diff --git a/docs/app/tests/test_doc_links.py b/docs/app/tests/test_doc_links.py
index 546b237a68f..d6ac011f6ab 100644
--- a/docs/app/tests/test_doc_links.py
+++ b/docs/app/tests/test_doc_links.py
@@ -7,7 +7,7 @@
 
 sys.path.append(str(Path(__file__).resolve().parent.parent / "scripts"))
 
-from check_doc_links import LINK_RE, _normalize, check
+from check_doc_links import _normalize, check
 
 SITEMAP_XML = """<?xml version='1.0' encoding='utf-8'?>
 <urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">
@@ -41,22 +41,6 @@ def test_normalize_strips_fragment_query_and_trailing_slash():
     assert _normalize("/") == "/"
 
 
-def test_link_re_matches_basic_link():
-    matches = LINK_RE.findall("see [basics](/docs/getting-started/basics) here")
-    assert matches == ["/docs/getting-started/basics"]
-
-
-def test_link_re_does_not_match_docs_prefix_without_separator():
-    """`/docsfoo` and `/docs-foo` must not be treated as /docs links."""
-    assert LINK_RE.findall("[x](/docsfoo/bar)") == []
-    assert LINK_RE.findall("[x](/docs-foo/bar)") == []
-
-
-def test_link_re_keeps_fragment_and_query():
-    assert LINK_RE.findall("[x](/docs/foo#anchor)") == ["/docs/foo#anchor"]
-    assert LINK_RE.findall("[x](/docs/foo?q=1)") == ["/docs/foo?q=1"]
-
-
 def test_check_passes_for_valid_link(docs_tree):
     md_root, sitemap = docs_tree
     (md_root / "page.md").write_text("[ok](/docs/getting-started/basics)\n")
@@ -130,6 +114,54 @@ def test_check_errors_when_md_root_has_no_markdown(docs_tree):
     assert "No .md files found" in errors[0]
 
 
+def test_check_ignores_links_in_fenced_code_blocks(docs_tree):
+    """Links inside ``` fences are not real links and must be skipped."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text(
+        "Some text.\n\n```python\n# See [doc](/docs/no-such-page) for details\n```\n"
+    )
+    assert check(md_root, sitemap) == []
+
+
+def test_check_resolves_reference_style_links(docs_tree):
+    """`[label][ref]` + `[ref]: /docs/foo` should resolve and be checked."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text(
+        "See [the basics][b] for details.\n\n[b]: /docs/no-such-page\n"
+    )
+    errors = check(md_root, sitemap)
+    assert len(errors) == 1
+    assert "no-such-page" in errors[0]
+
+
+def test_check_reports_distinct_lines_for_repeated_target(docs_tree):
+    """Two links to the same /docs target on different lines must report distinct line numbers."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text(
+        "First [x](/docs/no-such-page) here.\n"
+        "Some other text.\n"
+        "Second [y](/docs/no-such-page) here.\n"
+    )
+    errors = check(md_root, sitemap)
+    assert len(errors) == 2
+    line_numbers = {err.split(":", 2)[1] for err in errors}
+    assert line_numbers == {"1", "3"}
+
+
+def test_check_finds_links_inside_lists_and_tables(docs_tree):
+    """Links inside list items and table cells must still be checked."""
+    md_root, sitemap = docs_tree
+    (md_root / "page.md").write_text(
+        "- bullet [bad](/docs/no-such-list-page)\n\n"
+        "| col |\n|-----|\n| [bad](/docs/no-such-table-page) |\n"
+    )
+    errors = check(md_root, sitemap)
+    assert len(errors) == 2
+    joined = "\n".join(errors)
+    assert "no-such-list-page" in joined
+    assert "no-such-table-page" in joined
+
+
 def test_check_works_when_sitemap_has_docs_prefix(tmp_path: Path):
     """Both deployment styles (with or without /docs prefix in sitemap) work."""
     sitemap = tmp_path / "sitemap.xml"