ds-ebooks · pull · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/README.md b/README.md
diff --git a/website/build.py b/website/build.py
@@ -243,16 +243,14 @@ def write_sitemap_xml(path: Path, urls: Sequence[tuple[str, str]]) -> None:
 
 def top_level_heading_text(line: str) -> str | None:
     stripped = line.strip()
-    if not stripped.startswith("# "):
+    match = re.match(r"^(#{1,2})\s+(.+)$", stripped)
+    if match is None:
         return None
-    return stripped.removeprefix("#").strip().strip("#").strip().strip("*").strip()
-
-
-LLMS_CATEGORIES_PLACEHOLDER = "{{ categories_md }}"
+    return match.group(2).strip().strip("#").strip().strip("*").strip()
 
 
 def extract_categories_body(markdown: str) -> str:
-    """Return content under the `# Categories` heading, excluding the heading line itself."""
+    """Return content from `Categories` through `Projects`, excluding later sections."""
     lines = markdown.splitlines(keepends=True)
     start_idx = None
     end_idx = len(lines)
@@ -264,19 +262,40 @@ def extract_categories_body(markdown: str) -> str:
             start_idx = i + 1
             while start_idx < len(lines) and lines[start_idx].strip() == "":
                 start_idx += 1
-        elif start_idx is not None and i >= start_idx:
+        elif start_idx is not None and heading.lower() in ("resources", "contributing"):
             end_idx = i
             break
     if start_idx is None:
         return ""
     return "".join(lines[start_idx:end_idx]).rstrip() + "\n"
 
 
-def build_llms_txt(template_text: str, readme_text: str, stars_data: dict[str, dict]) -> str:
-    """Render the llms.txt template by injecting the README's Categories body, then annotate stars."""
-    body = extract_categories_body(readme_text).rstrip()
-    rendered = template_text.replace(LLMS_CATEGORIES_PLACEHOLDER, body)
-    return annotate_entries_with_stars(rendered, stars_data, format_stars=str)
+def build_llms_txt(
+    template_text: str,
+    *,
+    readme_text: str,
+    stars_data: dict[str, dict],
+    categories: Sequence[ParsedSection],
+    total_entries: int,
+) -> str:
+    """Render the llms.txt entry point with the curated category catalog."""
+    categories_md = annotate_entries_with_stars(
+        extract_categories_body(readme_text).rstrip(),
+        stars_data,
+        format_stars=lambda n: f"GitHub stars: {n}",
+    )
+    text_env = Environment(autoescape=False, trim_blocks=True, lstrip_blocks=True)
+    rendered = text_env.from_string(template_text).render(
+        site_url=SITE_URL,
+        github_repo_url="https://github.com/vinta/awesome-python",
+        contributing_url="https://github.com/vinta/awesome-python/blob/master/CONTRIBUTING.md",
+        sponsorship_url=SPONSORSHIP_PUBLIC_URL,
+        sitemap_url=SITEMAP_URL,
+        categories_md=categories_md,
+        total_entries=total_entries,
+        total_categories=len(categories),
+    )
+    return rendered.rstrip() + "\n"
 
 
 def annotate_entries_with_stars(
@@ -588,11 +607,16 @@ def render_category(
     if static_src.exists():
         shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
 
-    markdown_index = annotate_entries_with_stars(remove_sponsors_section(readme_text), stars_data)
     sponsorship_md = repo_root / "SPONSORSHIP.md"
     sponsorship_md_mtime = datetime.fromtimestamp(sponsorship_md.stat().st_mtime, tz=UTC).date().isoformat()
     llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
-    llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
+    llms_txt = build_llms_txt(
+        llms_template,
+        readme_text=readme_text,
+        stars_data=stars_data,
+        categories=categories,
+        total_entries=total_entries,
+    )
     (site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")
     sitemap_date = build_date.date().isoformat()
     sitemap_urls = [(SITE_URL, sitemap_date)]
@@ -604,7 +628,6 @@ def render_category(
         sitemap_urls.append((subcategory_public_url(cat_slug, sub_slug), sitemap_date))
     sitemap_urls.append((SPONSORSHIP_PUBLIC_URL, sponsorship_md_mtime))
     write_sitemap_xml(site_dir / "sitemap.xml", sitemap_urls)
-    (site_dir / "index.md").write_text(markdown_index, encoding="utf-8")
     (site_dir / "llms.txt").write_text(llms_txt, encoding="utf-8")
 
     print(f"Built site with {len(parsed_groups)} groups, {len(categories)} categories")

diff --git a/website/readme_parser.py b/website/readme_parser.py
@@ -114,6 +114,13 @@ def _heading_text(node: SyntaxTreeNode) -> str:
     return ""
 
 
+def _heading_level(node: SyntaxTreeNode) -> int | None:
+    """Return the numeric level for a heading node."""
+    if node.type != "heading" or not node.tag.startswith("h"):
+        return None
+    return int(node.tag[1:])
+
+
 def _extract_description_children(nodes: list[SyntaxTreeNode]) -> list[SyntaxTreeNode]:
     """Extract description children from the first paragraph if it's a single <em> block.
 
@@ -303,7 +310,7 @@ def _parse_grouped_sections(
 ) -> list[ParsedGroup]:
     """Parse nodes into groups of categories using bold markers as group boundaries.
 
-    Bold-only paragraphs (**Group Name**) delimit groups. H2 headings under each
+    Bold-only paragraphs (**Group Name**) delimit groups. H3 headings under each
     bold marker become categories within that group. Categories appearing before
     any bold marker go into an "Other" group.
     """
@@ -341,7 +348,7 @@ def flush_group() -> None:
             flush_group()
             current_group_name = bold_name
             current_cat_body = []
-        elif node.type == "heading" and node.tag == "h2":
+        elif node.type == "heading" and node.tag in ("h2", "h3"):
             flush_cat()
             current_cat_name = _heading_text(node)
             current_cat_body = []
@@ -383,7 +390,7 @@ def _parse_sponsor_item(inline: SyntaxTreeNode) -> ParsedSponsor | None:
 
 
 def parse_sponsors(text: str) -> list[ParsedSponsor]:
-    """Parse the `# Sponsors` section of README.md into a list of sponsors.
+    """Parse the `Sponsors` section of README.md into a list of sponsors.
 
     Expects bullets in the form `**[name](url)**: description`.
     Returns [] if no Sponsors section exists.
@@ -395,14 +402,18 @@ def parse_sponsors(text: str) -> list[ParsedSponsor]:
 
     start_idx = None
     end_idx = len(children)
+    start_level = None
     for i, node in enumerate(children):
-        if node.type == "heading" and node.tag == "h1":
-            title = _heading_text(node).strip().lower()
-            if start_idx is None and title == "sponsors":
-                start_idx = i + 1
-            elif start_idx is not None:
-                end_idx = i
-                break
+        level = _heading_level(node)
+        if level is None:
+            continue
+        title = _heading_text(node).strip().lower()
+        if start_idx is None and title == "sponsors":
+            start_idx = i + 1
+            start_level = level
+        elif start_idx is not None and start_level is not None and level <= start_level:
+            end_idx = i
+            break
     if start_idx is None:
         return []
 
@@ -426,26 +437,26 @@ def parse_readme(text: str) -> list[ParsedGroup]:
     """Parse README.md text into grouped categories.
 
     Returns a list of ParsedGroup dicts containing nested categories.
-    Content between the thematic break (---) and # Resources or # Contributing
-    is parsed as categories grouped by bold markers (**Group Name**).
+    Content between the Projects heading and Resources or Contributing is parsed
+    as categories grouped by bold markers (**Group Name**).
     """
     md = MarkdownIt("commonmark")
     tokens = md.parse(text)
     root = SyntaxTreeNode(tokens)
     children = root.children
 
-    # Find thematic break (---) and section boundaries in one pass
-    hr_idx = None
+    # Find Projects and section boundaries in one pass.
+    projects_idx = None
     cat_end_idx = None
     for i, node in enumerate(children):
-        if hr_idx is None and node.type == "hr":
-            hr_idx = i
-        elif node.type == "heading" and node.tag == "h1":
+        if _heading_level(node) in (1, 2):
             text_content = _heading_text(node)
-            if cat_end_idx is None and text_content in ("Resources", "Contributing"):
+            if projects_idx is None and text_content == "Projects":
+                projects_idx = i
+            elif cat_end_idx is None and text_content in ("Resources", "Contributing"):
                 cat_end_idx = i
-    if hr_idx is None:
+    if projects_idx is None:
         return []
 
-    cat_nodes = children[hr_idx + 1 : cat_end_idx or len(children)]
+    cat_nodes = children[projects_idx + 1 : cat_end_idx or len(children)]
     return _parse_grouped_sections(cat_nodes)
diff --git a/website/templates/base.html b/website/templates/base.html
@@ -14,7 +14,7 @@
     <meta name="description" content="{{ meta_description | trim }}" />
     <link rel="canonical" href="{{ canonical_url | trim }}" />
     {% block alternate_links %}
-    <link rel="alternate" type="text/markdown" href="/index.md" />
+    <link rel="alternate" type="text/plain" href="/llms.txt" title="LLMs text entry point" />
     {% endblock %}
     <meta property="og:type" content="website" />
     <meta property="og:title" content="{{ meta_title | trim }}" />

diff --git a/website/templates/llms.txt b/website/templates/llms.txt
@@ -1,9 +1,17 @@
 # Awesome Python
 
-An opinionated guide to the best Python frameworks, libraries, tools, and resources.
+Awesome Python is an opinionated catalog of {{ total_entries }} Python frameworks, libraries, tools, and resources across {{ total_categories }} {% if total_categories == 1 %}category{% else %}categories{% endif %}.
 
-Use this curated list when you need to find a high-quality Python library or tool for tasks such as web development, data science, machine learning, AI agents, automation, testing, or DevOps. The trailing number on each entry is its star count on GitHub.
+Scan the category index, then jump to the matching section for direct project links and short descriptions. GitHub entries with known star data end with a `GitHub stars: N` note in parentheses; treat it as popularity context, not a quality guarantee. Use the homepage for project context outside the catalog.
 
-# Categories
+## Primary Links
+
+- Homepage: {{ site_url }}
+- GitHub repository: {{ github_repo_url }}
+- Contributing guide: {{ contributing_url }}
+- Sponsorship: {{ sponsorship_url }}
+- Sitemap: {{ sitemap_url }}
+
+## Categories
 
 {{ categories_md }}