Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ __pycache__/
website/output/
website/data/

# claude code
# planning docs
docs/

# agents
.agents/
.claude/skills/
.gstack/
.playwright-cli/
.superpowers/
.playwright-cli/
skills-lock.json

# codex
.agents/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Awesome Python

An opinionated list of Python frameworks, libraries, tools, and resources.
An opinionated guide to the best Python frameworks, libraries, tools, and resources.

# **Sponsors**

Expand Down
141 changes: 139 additions & 2 deletions website/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import json
import re
import shutil
import xml.etree.ElementTree as ET
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
Expand All @@ -12,6 +14,11 @@
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors

GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
MARKDOWN_LINK_RE = re.compile(r"\[[^\]]+\]\(([^)\s]+)\)")
BULLET_LINE_RE = re.compile(r"^\s*-\s")
SITE_URL = "https://awesome-python.com/"
SITEMAP_URL = f"{SITE_URL}sitemap.xml"
SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9"

SOURCE_TYPE_DOMAINS = {
"docs.python.org": "Built-in",
Expand Down Expand Up @@ -67,6 +74,125 @@ def sort_key(entry: dict) -> tuple[int, int, int, str]:
return sorted(entries, key=sort_key)


def build_robots_txt() -> str:
return (
"User-agent: *\n"
"Content-Signal: search=yes, ai-input=yes, ai-train=yes\n"
"Allow: /\n"
"\n"
f"Sitemap: {SITEMAP_URL}\n"
)


def write_sitemap_xml(path: Path, urls: Sequence[tuple[str, str]]) -> None:
ET.register_namespace("", SITEMAP_NS)
urlset = ET.Element(f"{{{SITEMAP_NS}}}urlset")
for url, lastmod in urls:
url_el = ET.SubElement(urlset, f"{{{SITEMAP_NS}}}url")
loc_el = ET.SubElement(url_el, f"{{{SITEMAP_NS}}}loc")
loc_el.text = url
lastmod_el = ET.SubElement(url_el, f"{{{SITEMAP_NS}}}lastmod")
lastmod_el.text = lastmod

ET.ElementTree(urlset).write(path, encoding="utf-8", xml_declaration=True)
with path.open("ab") as f:
f.write(b"\n")


def top_level_heading_text(line: str) -> str | None:
stripped = line.strip()
if not stripped.startswith("# "):
return None
return stripped.removeprefix("#").strip().strip("#").strip().strip("*").strip()


LLMS_CATEGORIES_PLACEHOLDER = "{{ categories_md }}"


def extract_categories_body(markdown: str) -> str:
"""Return content under the `# Categories` heading, excluding the heading line itself."""
lines = markdown.splitlines(keepends=True)
start_idx = None
end_idx = len(lines)
for i, line in enumerate(lines):
heading = top_level_heading_text(line)
if heading is None:
continue
if start_idx is None and heading.lower() == "categories":
start_idx = i + 1
while start_idx < len(lines) and lines[start_idx].strip() == "":
start_idx += 1
elif start_idx is not None and i >= start_idx:
end_idx = i
break
if start_idx is None:
return ""
return "".join(lines[start_idx:end_idx]).rstrip() + "\n"


def build_llms_txt(template_text: str, readme_text: str, stars_data: dict[str, dict]) -> str:
"""Render the llms.txt template by injecting the README's Categories body, then annotate stars."""
body = extract_categories_body(readme_text).rstrip()
rendered = template_text.replace(LLMS_CATEGORIES_PLACEHOLDER, body)
return annotate_entries_with_stars(rendered, stars_data, format_stars=str)


def annotate_entries_with_stars(
markdown: str,
stars_data: dict[str, dict],
*,
format_stars=None,
) -> str:
"""Append the star count to bullet entry lines whose first GitHub link has known star data.

`format_stars` controls the parenthesized text. Defaults to "{N} GitHub stars".
Pass `str` for a bare number.
"""
if format_stars is None:
format_stars = lambda n: f"{n} GitHub stars" # noqa: E731 lambda-assignment
lines = markdown.splitlines(keepends=True)
out: list[str] = []
for line in lines:
if not BULLET_LINE_RE.match(line):
out.append(line)
continue
annotated = line
for match in MARKDOWN_LINK_RE.finditer(line):
repo_key = extract_github_repo(match.group(1))
if not repo_key:
continue
entry = stars_data.get(repo_key)
if not entry or "stars" not in entry:
continue
stripped = line.rstrip("\n")
ending = line[len(stripped):]
annotated = f"{stripped} ({format_stars(entry['stars'])}){ending}"
break
out.append(annotated)
return "".join(out)


def remove_sponsors_section(markdown: str) -> str:
lines = markdown.splitlines(keepends=True)
start_idx = None
for i, line in enumerate(lines):
heading = top_level_heading_text(line)
if heading and heading.lower() == "sponsors":
start_idx = i
break

if start_idx is None:
return markdown

end_idx = len(lines)
for i, line in enumerate(lines[start_idx + 1 :], start=start_idx + 1):
if top_level_heading_text(line):
end_idx = i
break

return "".join(lines[:start_idx] + lines[end_idx:])


def extract_entries(
categories: list[ParsedSection],
groups: list[ParsedGroup],
Expand Down Expand Up @@ -131,6 +257,7 @@ def build(repo_root: Path) -> None:
categories = [cat for g in parsed_groups for cat in g["categories"]]
total_entries = sum(c["entry_count"] for c in categories)
entries = extract_entries(categories, parsed_groups)
build_date = datetime.now(UTC)

stars_data = load_stars(website / "data" / "github_stars.json")

Expand All @@ -155,6 +282,8 @@ def build(repo_root: Path) -> None:
env = Environment(
loader=FileSystemLoader(website / "templates"),
autoescape=True,
trim_blocks=True,
lstrip_blocks=True,
)

site_dir = website / "output"
Expand All @@ -171,7 +300,7 @@ def build(repo_root: Path) -> None:
total_entries=total_entries,
total_categories=len(categories),
repo_stars=repo_stars,
build_date=datetime.now(UTC).strftime("%B %d, %Y"),
build_date=build_date.strftime("%B %d, %Y"),
sponsors=sponsors,
),
encoding="utf-8",
Expand All @@ -182,7 +311,15 @@ def build(repo_root: Path) -> None:
if static_src.exists():
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)

(site_dir / "llms.txt").write_text(readme_text, encoding="utf-8")
markdown_index = annotate_entries_with_stars(
remove_sponsors_section(readme_text), stars_data
)
llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
(site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")
write_sitemap_xml(site_dir / "sitemap.xml", [(SITE_URL, build_date.date().isoformat())])
(site_dir / "index.md").write_text(markdown_index, encoding="utf-8")
(site_dir / "llms.txt").write_text(llms_txt, encoding="utf-8")

print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories")
print(f"Total entries: {total_entries}")
Expand Down
35 changes: 18 additions & 17 deletions website/templates/base.html
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
<!doctype html>
<html lang="en">
<head>
{% set default_meta_title = "Awesome Python" %}
{% set default_meta_description = "An opinionated guide to the best Python frameworks, libraries, and tools. Explore " ~ (entries | length) ~ " curated projects across " ~ total_categories ~ " categories, from AI and agents to data science and web development." %}
{% set canonical_url = "https://awesome-python.com/" %}
{% set social_image_url = "https://awesome-python.com/static/og-image.png" %}
{% set meta_title %}{% block title %}{{ default_meta_title }}{% endblock %}{% endset %}
{% set meta_description %}{% block description %}{{ default_meta_description }}{% endblock %}{% endset %}
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>{% block title %}Awesome Python{% endblock %}</title>
<meta
name="description"
content="{% block description %}An opinionated list of Python frameworks, libraries, tools, and resources. {{ total_entries }} projects across {{ categories | length }} categories.{% endblock %}"
/>
<link rel="canonical" href="https://awesome-python.com/" />
<title>{{ meta_title | trim }}</title>
<meta name="description" content="{{ meta_description | trim }}" />
<link rel="canonical" href="{{ canonical_url }}" />
<link rel="alternate" type="text/markdown" href="/index.md" />
<meta property="og:type" content="website" />
<meta property="og:title" content="Awesome Python" />
<meta
property="og:description"
content="An opinionated list of Python frameworks, libraries, tools, and resources."
/>
<meta
property="og:image"
content="https://awesome-python.com/static/og-image.png"
/>
<meta property="og:url" content="https://awesome-python.com/" />
<meta name="twitter:card" content="summary" />
<meta property="og:title" content="{{ meta_title | trim }}" />
<meta property="og:description" content="{{ meta_description | trim }}" />
<meta property="og:image" content="{{ social_image_url }}" />
<meta property="og:url" content="{{ canonical_url }}" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="{{ meta_title | trim }}" />
<meta name="twitter:description" content="{{ meta_description | trim }}" />
<meta name="twitter:image" content="{{ social_image_url }}" />
<meta name="theme-color" content="#1c1410" />
<link rel="icon" href="/static/favicon.svg" type="image/svg+xml" />
<link rel="preconnect" href="https://fonts.googleapis.com" />
Expand Down
9 changes: 9 additions & 0 deletions website/templates/llms.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Awesome Python

An opinionated guide to the best Python frameworks, libraries, tools, and resources.

Use this curated list when you need to find a high-quality Python library or tool for tasks such as web development, data science, machine learning, AI agents, automation, testing, or DevOps. The trailing number on each entry is its star count on GitHub.

# Categories

{{ categories_md }}
Loading