From f688bc64dee6d65b903e929de396fe7359e9ad01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:42:15 +0000 Subject: [PATCH 1/4] feat: HN/YouTube sidebar layout, channel support, reduce max HN hits Agent-Logs-Url: https://github.com/HanClinto/SimpleGitBlog/sessions/067cd850-bbc4-42f2-aaef-3427ecee8660 Co-authored-by: HanClinto <796749+HanClinto@users.noreply.github.com> --- .gitignore | 1 + blog/generate.py | 56 ++++++++- blog/ingestors/hackernews.py | 2 +- blog/ingestors/youtube.py | 176 +++++++++++++++++++++++----- blog/static/style.css | 130 +++++++++++++++++++- blog/templates/config.html | 5 +- blog/templates/index.html | 108 ++++++++++++++++- config/youtube_channels.txt.example | 29 +++++ 8 files changed, 470 insertions(+), 37 deletions(-) create mode 100644 config/youtube_channels.txt.example diff --git a/.gitignore b/.gitignore index ecdd11e..c0aac48 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ _site/ # Personal ingestor configuration — keep these local, never commit real values. # Copy the corresponding .example file and fill in your own IDs/usernames. config/youtube_playlists.txt +config/youtube_channels.txt config/hackernews.txt diff --git a/blog/generate.py b/blog/generate.py index e337d23..843e66b 100644 --- a/blog/generate.py +++ b/blog/generate.py @@ -102,6 +102,7 @@ def generate_site( token: str | None, output_dir: Path, youtube_playlist_ids: str | None = None, + youtube_channel_ids: str | None = None, hn_usernames: list[str] | None = None, ) -> None: _start = time.monotonic() @@ -118,15 +119,16 @@ def generate_site( writing_posts = github_issues.ingest(repo, token, CONFIG_DIR) print(f" {len(writing_posts)} post(s) ingested from GitHub Issues.") - # --- YouTube playlists (My Watching) — uses free public RSS feeds, no API key --- + # --- YouTube playlists & channels (My Watching) — uses free public RSS feeds, no API key --- watching_posts: list[dict] = [] playlist_ids = youtube.load_playlist_ids(CONFIG_DIR, youtube_playlist_ids) - if playlist_ids: - print("Fetching YouTube playlists (My Watching)…") - watching_posts = youtube.ingest(CONFIG_DIR, youtube_playlist_ids) + channel_ids = youtube.load_channel_ids(CONFIG_DIR, youtube_channel_ids) + if playlist_ids or channel_ids: + print("Fetching YouTube content (My Watching)…") + watching_posts = youtube.ingest(CONFIG_DIR, youtube_playlist_ids, youtube_channel_ids) print(f" {len(watching_posts)} post(s) ingested from YouTube.") else: - print("YOUTUBE_PLAYLIST_IDS not configured — skipping YouTube ingestor.") + print("YOUTUBE_PLAYLIST_IDS / YOUTUBE_CHANNEL_IDS not configured — skipping YouTube ingestor.") # --- Hacker News (My Reading) — requires HN_USERNAME --- reading_posts: list[dict] = [] @@ -157,6 +159,45 @@ def generate_site( reverse=True, ) + # --- Sidebar data --- + # Split HN posts into stories vs. comments for separate sidebar panels + _SIDEBAR_LIMIT = 5 + hn_stories = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "story"] + hn_comments = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "comment"] + + # Build per-username HN profile links (use first username if multiple) + _hn_user = (hn_usernames or [None])[0] + hn_submitted_url = ( + f"https://news.ycombinator.com/submitted?id={_hn_user}" if _hn_user else None + ) + hn_threads_url = ( + f"https://news.ycombinator.com/threads?id={_hn_user}" if _hn_user else None + ) + hn_profile_url = ( + f"https://news.ycombinator.com/user?id={_hn_user}" if _hn_user else None + ) + + # Collect unique YouTube "view more" URLs (one per playlist/channel) + seen_view_more: set[str] = set() + youtube_view_more_urls: list[dict] = [] + for p in watching_posts: + vmu = p.get("metadata", {}).get("view_more_url") + stype = p.get("metadata", {}).get("source_type", "playlist") + if vmu and vmu not in seen_view_more: + seen_view_more.add(vmu) + youtube_view_more_urls.append({"url": vmu, "source_type": stype}) + + sidebar = { + "hn_stories": hn_stories[:_SIDEBAR_LIMIT], + "hn_comments": hn_comments[:_SIDEBAR_LIMIT], + "hn_submitted_url": hn_submitted_url, + "hn_threads_url": hn_threads_url, + "hn_profile_url": hn_profile_url, + "hn_username": _hn_user, + "watching": watching_posts[:_SIDEBAR_LIMIT], + "youtube_view_more_urls": youtube_view_more_urls, + } + # --- Jinja2 setup --- env = Environment( loader=FileSystemLoader(str(TEMPLATES_DIR)), @@ -204,7 +245,7 @@ def generate_site( # Render index page index_tmpl = env.get_template("index.html") - index_html = index_tmpl.render(sections=active_sections) + index_html = index_tmpl.render(sections=active_sections, sidebar=sidebar) (output_dir / "index.html").write_text(index_html, encoding="utf-8") print("Wrote index.html") @@ -235,6 +276,7 @@ def generate_site( config_ctx = { "hn_usernames": hn_usernames or [], "playlist_ids": playlist_ids, + "channel_ids": channel_ids, "hidden_labels": sorted(hidden_labels), "blocked_user_count": len(blocked_users), "writing_post_count": len(writing_posts), @@ -270,6 +312,7 @@ def main() -> None: output_dir = Path(os.environ.get("OUTPUT_DIR", "_site")).resolve() youtube_playlist_ids = os.environ.get("YOUTUBE_PLAYLIST_IDS") or None + youtube_channel_ids = os.environ.get("YOUTUBE_CHANNEL_IDS") or None # HN usernames: from HN_USERNAME env var and/or local config file (gitignored) hn_usernames = hackernews.load_usernames(CONFIG_DIR, os.environ.get("HN_USERNAME") or None) @@ -279,6 +322,7 @@ def main() -> None: token=token, output_dir=output_dir, youtube_playlist_ids=youtube_playlist_ids, + youtube_channel_ids=youtube_channel_ids, hn_usernames=hn_usernames or None, ) diff --git a/blog/ingestors/hackernews.py b/blog/ingestors/hackernews.py index edd2e6b..1823718 100644 --- a/blog/ingestors/hackernews.py +++ b/blog/ingestors/hackernews.py @@ -25,7 +25,7 @@ _HN_ALGOLIA_BASE = "https://hn.algolia.com/api/v1" _HN_ITEM_BASE = "https://news.ycombinator.com/item" _CONFIG_FILE = "hackernews.txt" -_MAX_HITS_PER_TYPE = 100 +_MAX_HITS_PER_TYPE = 20 # --------------------------------------------------------------------------- diff --git a/blog/ingestors/youtube.py b/blog/ingestors/youtube.py index 56875d2..366ffb7 100644 --- a/blog/ingestors/youtube.py +++ b/blog/ingestors/youtube.py @@ -1,26 +1,30 @@ """ -YouTube playlist ingestor for SimpleGitBlog. +YouTube ingestor for SimpleGitBlog. -Fetches videos from one or more YouTube playlists via YouTube's public Atom/RSS -feed — **no API key required**. +Fetches videos from one or more YouTube playlists AND/OR YouTube channels via +YouTube's public Atom/RSS feeds — **no API key required**. -Feed URL: - https://www.youtube.com/feeds/videos.xml?playlist_id={PLAYLIST_ID} +Feed URLs: + Playlist: https://www.youtube.com/feeds/videos.xml?playlist_id={PLAYLIST_ID} + Channel: https://www.youtube.com/feeds/videos.xml?channel_id={CHANNEL_ID} -Each playlist returns up to the 15 most-recently-added videos. For a personal -"My Watching" section this is normally plenty; add multiple playlists (e.g. one -per year) if you need more history. +Each feed returns up to the 15 most-recently-added videos. Section: "watching" (My Watching) Configuration (GitHub Actions repository settings — do NOT hardcode values): Variable: YOUTUBE_PLAYLIST_IDS Comma-separated playlist IDs - -For local development, you may also place playlist IDs in -``config/youtube_playlists.txt`` (one per line, # comments supported). -That file is gitignored so your personal IDs stay off of version control. + Variable: YOUTUBE_CHANNEL_IDS Comma-separated channel IDs (UCxxxxxx) + or channel handles (@username) — handles + are resolved automatically. + +For local development, you may also place IDs in: + ``config/youtube_playlists.txt`` (one playlist ID per line) + ``config/youtube_channels.txt`` (one channel ID or @handle per line) +Both files are gitignored so your personal IDs stay off of version control. """ +import re import xml.etree.ElementTree as ET from pathlib import Path @@ -28,7 +32,8 @@ from blog.utils import extract_excerpt, format_date, format_datetime, plain_text_to_html -_CONFIG_FILE = "youtube_playlists.txt" +_PLAYLIST_CONFIG_FILE = "youtube_playlists.txt" +_CHANNEL_CONFIG_FILE = "youtube_channels.txt" _RSS_BASE = "https://www.youtube.com/feeds/videos.xml" # XML namespace map for YouTube Atom feeds @@ -60,7 +65,34 @@ def load_playlist_ids(config_dir: Path, env_playlist_ids: str | None = None) -> if pid: ids.add(pid) - config_file = config_dir / _CONFIG_FILE + config_file = config_dir / _PLAYLIST_CONFIG_FILE + if config_file.exists(): + for line in config_file.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if line and not line.startswith("#"): + ids.add(line) + + return sorted(ids) + + +def load_channel_ids(config_dir: Path, env_channel_ids: str | None = None) -> list[str]: + """ + Return a deduplicated list of YouTube channel IDs (or @handles) from two sources: + + 1. ``env_channel_ids`` — the value of the YOUTUBE_CHANNEL_IDS env var + (comma-separated). Accepts ``UCxxxxxx`` channel IDs or ``@handle`` forms. + 2. ``config/youtube_channels.txt`` — optional local-dev override file + (gitignored; never committed with real IDs). + """ + ids: set[str] = set() + + if env_channel_ids: + for cid in env_channel_ids.split(","): + cid = cid.strip() + if cid: + ids.add(cid) + + config_file = config_dir / _CHANNEL_CONFIG_FILE if config_file.exists(): for line in config_file.read_text(encoding="utf-8").splitlines(): line = line.strip() @@ -70,32 +102,75 @@ def load_playlist_ids(config_dir: Path, env_playlist_ids: str | None = None) -> return sorted(ids) +def _resolve_channel_id(handle_or_id: str) -> str | None: + """ + Resolve a channel handle (``@username``) or channel ID (``UCxxxxxx``) to a + confirmed channel ID. + + If the argument looks like a channel ID already (starts with ``UC``), it is + returned as-is. Otherwise the channel page is fetched and the RSS feed link + (which contains the channel ID) is extracted from the HTML. + + Returns the channel ID string, or ``None`` if resolution fails. + """ + # Already looks like a channel ID + if re.match(r'^UC[A-Za-z0-9_-]{20,}$', handle_or_id): + return handle_or_id + + # Build the canonical channel URL + if handle_or_id.startswith("@"): + channel_url = f"https://www.youtube.com/{handle_or_id}" + else: + channel_url = f"https://www.youtube.com/@{handle_or_id}" + + try: + resp = requests.get( + channel_url, + headers={"User-Agent": "Mozilla/5.0 (compatible; SimpleGitBlog/1.0)"}, + timeout=15, + ) + resp.raise_for_status() + except requests.RequestException as exc: + print(f" Warning: could not fetch channel page for {handle_or_id}: {exc}") + return None + + # Look for the RSS feed link in the page HTML, which contains the channel_id + m = re.search( + r'https://www\.youtube\.com/feeds/videos\.xml\?channel_id=(UC[A-Za-z0-9_-]+)', + resp.text, + ) + if m: + return m.group(1) + + print(f" Warning: could not extract channel ID from {channel_url}") + return None + + # --------------------------------------------------------------------------- # RSS / Atom feed helpers # --------------------------------------------------------------------------- -def _fetch_playlist_feed(playlist_id: str) -> list[dict]: +def _fetch_feed(url: str, label: str) -> list[dict]: """ - Fetch videos from a YouTube playlist Atom feed. + Fetch videos from a YouTube Atom feed (playlist or channel). Returns a list of raw entry dicts extracted from the feed. No API key required — the feed is publicly accessible. """ - url = f"{_RSS_BASE}?playlist_id={playlist_id}" try: response = requests.get(url, timeout=30) response.raise_for_status() except requests.HTTPError as exc: - print(f" Warning: YouTube RSS error for playlist {playlist_id}: {exc}") + print(f" Warning: YouTube RSS error for {label}: {exc}") return [] except requests.RequestException as exc: - print(f" Warning: YouTube RSS request failed for playlist {playlist_id}: {exc}") + print(f" Warning: YouTube RSS request failed for {label}: {exc}") return [] try: root = ET.fromstring(response.content) except ET.ParseError as exc: - print(f" Warning: could not parse YouTube RSS for playlist {playlist_id}: {exc}") + print(f" Warning: could not parse YouTube RSS for {label}: {exc}") return [] entries = [] @@ -136,11 +211,23 @@ def _fetch_playlist_feed(playlist_id: str) -> list[dict]: return entries +def _fetch_playlist_feed(playlist_id: str) -> list[dict]: + """Fetch videos from a YouTube playlist Atom feed.""" + url = f"{_RSS_BASE}?playlist_id={playlist_id}" + return _fetch_feed(url, f"playlist {playlist_id}") + + +def _fetch_channel_feed(channel_id: str) -> list[dict]: + """Fetch latest videos from a YouTube channel Atom feed.""" + url = f"{_RSS_BASE}?channel_id={channel_id}" + return _fetch_feed(url, f"channel {channel_id}") + + # --------------------------------------------------------------------------- # Post processing # --------------------------------------------------------------------------- -def _process_entry(entry: dict, playlist_id: str) -> dict | None: +def _process_entry(entry: dict, source_type: str, source_id: str, view_more_url: str) -> dict | None: """Convert a raw feed entry dict into the common post schema.""" video_id = entry.get("video_id", "").strip() if not video_id: @@ -177,9 +264,13 @@ def _process_entry(entry: dict, playlist_id: str) -> dict | None: "comments": [], "metadata": { "video_id": video_id, - "playlist_id": playlist_id, + "source_type": source_type, # "playlist" or "channel" + "source_id": source_id, + "view_more_url": view_more_url, "channel_name": author_name, "thumbnail_url": thumbnail_url, + # Legacy aliases kept for template compatibility + "playlist_id": source_id if source_type == "playlist" else None, }, } @@ -191,17 +282,23 @@ def _process_entry(entry: dict, playlist_id: str) -> dict | None: def ingest( config_dir: Path, env_playlist_ids: str | None = None, + env_channel_ids: str | None = None, ) -> list[dict]: """ - Fetch YouTube playlist videos via public RSS feeds and return posts in - the common schema. No API key required. + Fetch YouTube playlist and channel videos via public RSS feeds and return + posts in the common schema. No API key required. Playlist IDs come from ``env_playlist_ids`` (YOUTUBE_PLAYLIST_IDS env var) and/or the local ``config/youtube_playlists.txt`` file. + + Channel IDs/handles come from ``env_channel_ids`` (YOUTUBE_CHANNEL_IDS env + var) and/or the local ``config/youtube_channels.txt`` file. """ playlist_ids = load_playlist_ids(config_dir, env_playlist_ids) - if not playlist_ids: - print(" No YouTube playlist IDs configured.") + channel_ids_raw = load_channel_ids(config_dir, env_channel_ids) + + if not playlist_ids and not channel_ids_raw: + print(" No YouTube playlist IDs or channel IDs configured.") return [] posts: list[dict] = [] @@ -211,8 +308,33 @@ def ingest( print(f" Fetching playlist RSS: {playlist_id}") entries = _fetch_playlist_feed(playlist_id) print(f" {len(entries)} video(s) found.") + view_more_url = f"https://www.youtube.com/playlist?list={playlist_id}" + for entry in entries: + post = _process_entry(entry, "playlist", playlist_id, view_more_url) + if post is None: + continue + vid = post["metadata"]["video_id"] + if vid not in seen_video_ids: + seen_video_ids.add(vid) + posts.append(post) + + for raw_id in channel_ids_raw: + print(f" Resolving YouTube channel: {raw_id}") + channel_id = _resolve_channel_id(raw_id) + if not channel_id: + print(f" Skipping — could not resolve channel ID for: {raw_id}") + continue + print(f" Fetching channel RSS: {channel_id}") + entries = _fetch_channel_feed(channel_id) + print(f" {len(entries)} video(s) found.") + # Build a human-friendly "view more" URL using the original handle if given + if raw_id.startswith("@") or not raw_id.startswith("UC"): + handle = raw_id if raw_id.startswith("@") else f"@{raw_id}" + view_more_url = f"https://www.youtube.com/{handle}/videos" + else: + view_more_url = f"https://www.youtube.com/channel/{channel_id}/videos" for entry in entries: - post = _process_entry(entry, playlist_id) + post = _process_entry(entry, "channel", channel_id, view_more_url) if post is None: continue vid = post["metadata"]["video_id"] diff --git a/blog/static/style.css b/blog/static/style.css index ac38a6e..823ef19 100644 --- a/blog/static/style.css +++ b/blog/static/style.css @@ -30,7 +30,7 @@ img { max-width: 100%; height: auto; } /* --- Layout ----------------------------------------------- */ .container { - max-width: 800px; + max-width: 1100px; margin: 0 auto; padding: 0 1.25rem; } @@ -509,6 +509,122 @@ main.container { padding-top: 2.5rem; padding-bottom: 2.5rem; } color: #334155; } +/* --- Two-column page layout ------------------------------- */ +.page-layout { + display: grid; + grid-template-columns: 1fr 320px; + gap: 2.5rem; + align-items: start; +} + +.main-column { min-width: 0; } + +/* --- Page sidebar ----------------------------------------- */ +.page-sidebar { + position: sticky; + top: 1.5rem; + display: flex; + flex-direction: column; + gap: 1.5rem; +} + +/* --- Sidebar sections ------------------------------------- */ +.sidebar-section { + background: #fff; + border: 1px solid #e2e8f0; + border-radius: 10px; + padding: 1rem 1.1rem; +} + +.sidebar-section__heading { + font-size: 0.95rem; + font-weight: 700; + color: #111; + margin: 0 0 0.75rem; + display: flex; + align-items: center; + gap: 0.35rem; + border-bottom: 1px solid #e2e8f0; + padding-bottom: 0.5rem; +} + +.sidebar-list { + list-style: none; + margin: 0 0 0.5rem; + padding: 0; + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.sidebar-item { font-size: 0.85rem; } + +.sidebar-item__title { + display: block; + font-weight: 600; + color: #1e293b; + text-decoration: none; + line-height: 1.35; +} +.sidebar-item__title:hover { color: #0969da; text-decoration: underline; } + +.sidebar-item__meta { + font-size: 0.78rem; + color: #94a3b8; + margin-top: 0.2rem; +} +.sidebar-item__meta a { color: #94a3b8; } +.sidebar-item__meta a:hover { color: #0969da; } + +.sidebar-item__excerpt { + margin: 0.25rem 0 0; + font-size: 0.78rem; + color: #64748b; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; + overflow: hidden; +} + +/* --- Video sidebar items ---------------------------------- */ +.sidebar-list--videos { gap: 0.8rem; } + +.sidebar-item--video { + display: flex; + gap: 0.65rem; + align-items: flex-start; +} + +.sidebar-item__thumb-link { flex-shrink: 0; } + +.sidebar-item__thumb { + width: 100px; + height: 56px; + object-fit: cover; + border-radius: 5px; + display: block; +} + +.sidebar-item__video-info { flex: 1; min-width: 0; } +.sidebar-item--video .sidebar-item__title { font-size: 0.82rem; } + +/* --- Sidebar "view all" link ------------------------------ */ +.sidebar-section__view-all { + display: block; + font-size: 0.78rem; + font-weight: 600; + color: #0969da; + text-decoration: none; + margin-top: 0.5rem; +} +.sidebar-section__view-all:hover { text-decoration: underline; } + +.sidebar-section__view-more-links { + display: flex; + flex-direction: column; + gap: 0.25rem; +} + /* --- Labels sidebar --------------------------------------- */ .labels-sidebar { margin-top: 3rem; @@ -658,6 +774,18 @@ a.config-detail:hover { color: #0969da; } } /* --- Responsive ------------------------------------------- */ +@media (max-width: 900px) { + .page-layout { + grid-template-columns: 1fr; + } + + .page-sidebar { + position: static; + border-top: 2px solid #e2e8f0; + padding-top: 2rem; + } +} + @media (max-width: 600px) { html { font-size: 16px; } diff --git a/blog/templates/config.html b/blog/templates/config.html index 580a68d..a1500ea 100644 --- a/blog/templates/config.html +++ b/blog/templates/config.html @@ -52,10 +52,13 @@

📡 Content Sources

{% if playlist_ids %}
{{ playlist_ids | length }} playlist{{ 's' if playlist_ids | length != 1 else '' }} {% endif %} + {% if channel_ids %} +
{{ channel_ids | length }} channel{{ 's' if channel_ids | length != 1 else '' }} + {% endif %} My Watching - {% if playlist_ids %} + {% if playlist_ids or channel_ids %} ✅ Configured {% else %} — Not configured diff --git a/blog/templates/index.html b/blog/templates/index.html index 00d409d..ad7839b 100644 --- a/blog/templates/index.html +++ b/blog/templates/index.html @@ -3,8 +3,13 @@ {% block title %}{{ repo_name }} — Blog{% endblock %} {% block content %} +
+ +{# ====== MAIN COLUMN — My Writing ====== #} +
{% if sections %} {% for section in sections %} + {% if section.key == 'writing' %}

@@ -44,11 +49,12 @@

{% endfor %}

+ {% endif %} {% endfor %} {% else %}

- + My Writing

@@ -74,4 +80,104 @@

Browse by label

{% endif %} +
{# /main-column #} + +{# ====== SIDEBAR — HN + YouTube ====== #} +{% if sidebar.hn_stories or sidebar.hn_comments or sidebar.watching %} +{# /page-sidebar #} +{% endif %} + +
{# /page-layout #} {% endblock %} diff --git a/config/youtube_channels.txt.example b/config/youtube_channels.txt.example new file mode 100644 index 0000000..1bb6c71 --- /dev/null +++ b/config/youtube_channels.txt.example @@ -0,0 +1,29 @@ +# YouTube channel IDs or handles for the "My Watching" section. +# +# HOW TO USE +# ---------- +# No API key is required! The ingestor uses YouTube's public Atom/RSS feeds. +# +# The recommended approach is to set YOUTUBE_CHANNEL_IDS as a GitHub Actions +# repository Variable (repo Settings → Secrets and variables → Variables). +# That way your channel IDs are never committed to source control, so forks +# of this blog start with a clean slate. +# +# For local development only, you may create the file +# config/youtube_channels.txt +# (without the .example extension) and add one entry per line. +# That filename is gitignored, so it will never be committed. +# +# ACCEPTED FORMATS +# ---------------- +# @YourChannelHandle — your YouTube handle (e.g. @HanClinto) +# UCxxxxxxxxxxxxxxxxxxxxxx — raw channel ID starting with UC +# +# Handles are resolved automatically at build time by fetching the channel page. +# +# Each channel returns up to the 15 most-recently-uploaded videos. +# +# EXAMPLE ENTRIES (remove the leading # to activate) +# ---------------------------------------------------- +# @HanClinto +# UCxxxxxxxxxxxxxxxxxxxxxx From 4383b2dbd7999bac5a1396cb49e8a235e622cd01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:44:42 +0000 Subject: [PATCH 2/4] feat: auto-discover YouTube channel from GitHub profile social links Agent-Logs-Url: https://github.com/HanClinto/SimpleGitBlog/sessions/067cd850-bbc4-42f2-aaef-3427ecee8660 Co-authored-by: HanClinto <796749+HanClinto@users.noreply.github.com> --- blog/generate.py | 39 +++++++- blog/ingestors/github_profile.py | 166 +++++++++++++++++++++++++++++++ blog/static/style.css | 20 ++++ blog/templates/config.html | 74 +++++++++++++- 4 files changed, 295 insertions(+), 4 deletions(-) create mode 100644 blog/ingestors/github_profile.py diff --git a/blog/generate.py b/blog/generate.py index 843e66b..bf67b39 100644 --- a/blog/generate.py +++ b/blog/generate.py @@ -43,7 +43,7 @@ from jinja2 import Environment, FileSystemLoader # noqa: E402 import urllib.parse # noqa: E402 -from blog.ingestors import github_issues, hackernews, youtube # noqa: E402 +from blog.ingestors import github_issues, github_profile, hackernews, youtube # noqa: E402 # --------------------------------------------------------------------------- # Paths @@ -107,13 +107,28 @@ def generate_site( ) -> None: _start = time.monotonic() + repo_owner = repo.split("/")[0] repo_name = repo.split("/")[-1] repo_url = f"https://github.com/{repo}" + # Build GitHub API request headers + gh_headers: dict[str, str] = {"Accept": "application/vnd.github+json"} + if token: + gh_headers["Authorization"] = f"Bearer {token}" + # Load config files early so we can pass them to the config page hidden_labels = github_issues._load_hidden_labels(CONFIG_DIR) blocked_users = github_issues._load_blocked_users(CONFIG_DIR) + # --- GitHub owner profile & social links --- + print(f"Fetching GitHub profile for: {repo_owner}…") + owner_profile = github_profile.fetch_owner_profile(repo_owner, gh_headers) + if owner_profile: + print(f" Profile: {owner_profile.name or owner_profile.login}") + print(f" Social links: {len(owner_profile.social_links)} found.") + else: + print(" Could not fetch GitHub profile — social link auto-discovery disabled.") + # --- GitHub Issues (My Writing) — always runs --- print("Fetching GitHub Issues (My Writing)…") writing_posts = github_issues.ingest(repo, token, CONFIG_DIR) @@ -123,12 +138,28 @@ def generate_site( watching_posts: list[dict] = [] playlist_ids = youtube.load_playlist_ids(CONFIG_DIR, youtube_playlist_ids) channel_ids = youtube.load_channel_ids(CONFIG_DIR, youtube_channel_ids) + + # Auto-discover YouTube channel from GitHub social links when not explicitly configured + profile_youtube_handles: list[str] = [] + if owner_profile: + profile_youtube_handles = github_profile.extract_youtube_handles(owner_profile.social_links) + auto_discovered_channels = ( + profile_youtube_handles + if (not channel_ids and profile_youtube_handles) + else [] + ) + effective_channel_ids_str = youtube_channel_ids + if auto_discovered_channels and not channel_ids: + print(f" Auto-discovered YouTube channel(s) from GitHub profile: {auto_discovered_channels}") + effective_channel_ids_str = ",".join(auto_discovered_channels) + channel_ids = youtube.load_channel_ids(CONFIG_DIR, effective_channel_ids_str) + if playlist_ids or channel_ids: print("Fetching YouTube content (My Watching)…") - watching_posts = youtube.ingest(CONFIG_DIR, youtube_playlist_ids, youtube_channel_ids) + watching_posts = youtube.ingest(CONFIG_DIR, youtube_playlist_ids, effective_channel_ids_str) print(f" {len(watching_posts)} post(s) ingested from YouTube.") else: - print("YOUTUBE_PLAYLIST_IDS / YOUTUBE_CHANNEL_IDS not configured — skipping YouTube ingestor.") + print("YOUTUBE_PLAYLIST_IDS / YOUTUBE_CHANNEL_IDS not configured and none found in GitHub profile — skipping YouTube ingestor.") # --- Hacker News (My Reading) — requires HN_USERNAME --- reading_posts: list[dict] = [] @@ -277,6 +308,8 @@ def generate_site( "hn_usernames": hn_usernames or [], "playlist_ids": playlist_ids, "channel_ids": channel_ids, + "auto_discovered_channels": auto_discovered_channels, + "owner_profile": owner_profile, "hidden_labels": sorted(hidden_labels), "blocked_user_count": len(blocked_users), "writing_post_count": len(writing_posts), diff --git a/blog/ingestors/github_profile.py b/blog/ingestors/github_profile.py new file mode 100644 index 0000000..cde4043 --- /dev/null +++ b/blog/ingestors/github_profile.py @@ -0,0 +1,166 @@ +""" +GitHub profile helper for SimpleGitBlog. + +Fetches the repository owner's public GitHub profile and linked social accounts +via the GitHub REST API. Social links are used to auto-discover optional +integrations (e.g. YouTube channel) when explicit configuration is absent. + +The social-accounts endpoint is public — no authentication required — though +a token raises the rate limit from 60 to 5 000 req/hour. + +Endpoints used: + GET /users/{username} → name, bio, avatar, website, twitter_username + GET /users/{username}/social_accounts → [{provider, url}, …] + +Recognised social providers (non-exhaustive): + youtube → https://www.youtube.com/@handle or /channel/UCxx + twitter → https://twitter.com/handle / https://x.com/handle + linkedin → https://www.linkedin.com/in/slug + twitch → https://www.twitch.tv/handle + +Social links are exposed on the config/transparency page so visitors can see +exactly where content is pulled from. +""" + +from __future__ import annotations + +import re +from typing import NamedTuple + +import requests + +_GITHUB_API = "https://api.github.com" +_TIMEOUT = 15 + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + +class SocialLink(NamedTuple): + provider: str # e.g. "youtube", "twitter", "linkedin" + url: str # canonical URL as stored on GitHub + + +class OwnerProfile(NamedTuple): + login: str + name: str | None + bio: str | None + avatar_url: str | None + website: str | None # the "blog" field on the profile + twitter_username: str | None # dedicated twitter_username field + social_links: list[SocialLink] + + +# --------------------------------------------------------------------------- +# Fetching +# --------------------------------------------------------------------------- + +def fetch_owner_profile(owner: str, headers: dict) -> OwnerProfile | None: + """ + Fetch the public GitHub profile and social accounts for ``owner``. + + Returns ``None`` if either request fails (network error, rate limit, etc.). + Failures are non-fatal — the blog simply skips auto-discovery. + """ + # --- Basic profile --- + try: + resp = requests.get( + f"{_GITHUB_API}/users/{owner}", + headers=headers, + timeout=_TIMEOUT, + ) + resp.raise_for_status() + profile_data = resp.json() + except requests.RequestException as exc: + print(f" Warning: could not fetch GitHub profile for {owner}: {exc}") + return None + + # --- Social accounts --- + social_links: list[SocialLink] = [] + try: + resp2 = requests.get( + f"{_GITHUB_API}/users/{owner}/social_accounts", + headers=headers, + timeout=_TIMEOUT, + ) + resp2.raise_for_status() + for item in resp2.json(): + provider = (item.get("provider") or "").lower().strip() + url = (item.get("url") or "").strip() + if provider and url: + social_links.append(SocialLink(provider=provider, url=url)) + except requests.RequestException as exc: + print(f" Warning: could not fetch social accounts for {owner}: {exc}") + # Non-fatal — continue with an empty list + + return OwnerProfile( + login=profile_data.get("login") or owner, + name=profile_data.get("name") or None, + bio=profile_data.get("bio") or None, + avatar_url=profile_data.get("avatar_url") or None, + website=profile_data.get("blog") or None, + twitter_username=profile_data.get("twitter_username") or None, + social_links=social_links, + ) + + +# --------------------------------------------------------------------------- +# Social link extraction helpers +# --------------------------------------------------------------------------- + +def extract_youtube_handles(social_links: list[SocialLink]) -> list[str]: + """ + Return YouTube channel handles or IDs found in the owner's social links. + + Recognised URL forms: + https://www.youtube.com/@handle + https://youtube.com/@handle + https://www.youtube.com/channel/UCxxxxxxxx + https://www.youtube.com/c/custom-name (treated as @custom-name) + https://www.youtube.com/user/username (treated as @username) + + Returns a list of strings that can be passed directly to + ``youtube.load_channel_ids()`` or ``youtube._resolve_channel_id()``. + """ + handles: list[str] = [] + for link in social_links: + if link.provider != "youtube": + continue + url = link.url.rstrip("/") + + # @handle form + m = re.search(r'youtube\.com/(@[A-Za-z0-9_.-]+)', url, re.IGNORECASE) + if m: + handles.append(m.group(1)) + continue + + # /channel/UCxxxxxxxx + m = re.search(r'youtube\.com/channel/(UC[A-Za-z0-9_-]{20,})', url, re.IGNORECASE) + if m: + handles.append(m.group(1)) + continue + + # /c/slug or /user/slug — treat as @slug + m = re.search(r'youtube\.com/(?:c|user)/([A-Za-z0-9_.-]+)', url, re.IGNORECASE) + if m: + handles.append(f"@{m.group(1)}") + continue + + return handles + + +def extract_twitter_url(social_links: list[SocialLink]) -> str | None: + """Return the first Twitter / X social link URL, or None.""" + for link in social_links: + if link.provider in ("twitter", "x"): + return link.url + return None + + +def extract_linkedin_url(social_links: list[SocialLink]) -> str | None: + """Return the first LinkedIn social link URL, or None.""" + for link in social_links: + if link.provider == "linkedin": + return link.url + return None diff --git a/blog/static/style.css b/blog/static/style.css index 823ef19..e959a78 100644 --- a/blog/static/style.css +++ b/blog/static/style.css @@ -677,6 +677,26 @@ main.container { padding-top: 2.5rem; padding-bottom: 2.5rem; } } .post-nav a:hover { color: #0969da; text-decoration: underline; } +.config-section__subheading { + font-size: 1rem; + font-weight: 700; + color: #334155; + margin: 1.25rem 0 0.4rem; +} + +.config-profile { + display: flex; + align-items: flex-start; + gap: 0.85rem; + margin-bottom: 1rem; +} + +.config-profile__avatar { + border-radius: 50%; + flex-shrink: 0; + object-fit: cover; +} + /* --- Config page ------------------------------------------ */ .config-alert { padding: 0.9rem 1.25rem; diff --git a/blog/templates/config.html b/blog/templates/config.html index a1500ea..9c6de21 100644 --- a/blog/templates/config.html +++ b/blog/templates/config.html @@ -20,6 +20,73 @@

🔧 Blog Configuration

{% endif %} +{# ------------------------------------------------------------------ #} +{# GitHub Owner Profile & Social Links #} +{# ------------------------------------------------------------------ #} +{% if owner_profile %} +
+

👤 GitHub Profile

+
+ {% if owner_profile.avatar_url %} + {{ owner_profile.login }} + {% endif %} +
+ + + {{ owner_profile.name or owner_profile.login }} + + + {% if owner_profile.name and owner_profile.name != owner_profile.login %} + @{{ owner_profile.login }} + {% endif %} + {% if owner_profile.bio %} +

{{ owner_profile.bio }}

+ {% endif %} +
+
+ {% if owner_profile.social_links %} +

🔗 Linked Social Accounts

+ + + + + + {% for link in owner_profile.social_links %} + + + + + + {% endfor %} + +
PlatformURLUsed for
{{ link.provider | capitalize }} + + {{ link.url }} + + + {% if link.provider == 'youtube' %} + {% if auto_discovered_channels %} + ✅ Auto-discovered — My Watching + {% else %} + Overridden by YOUTUBE_CHANNEL_IDS + {% endif %} + {% elif link.provider in ('twitter', 'x') %} + — Displayed (future integration) + {% elif link.provider == 'linkedin' %} + — Displayed (future integration) + {% else %} + — Not yet integrated + {% endif %} +
+ {% if owner_profile.website %} +

Website: {{ owner_profile.website }}

+ {% endif %} + {% else %} +

No social accounts linked on this GitHub profile.

+ {% endif %} +
+{% endif %} + {# ------------------------------------------------------------------ #} {# Content Sources #} {# ------------------------------------------------------------------ #} @@ -53,7 +120,12 @@

📡 Content Sources


{{ playlist_ids | length }} playlist{{ 's' if playlist_ids | length != 1 else '' }} {% endif %} {% if channel_ids %} -
{{ channel_ids | length }} channel{{ 's' if channel_ids | length != 1 else '' }} +
+ {% if auto_discovered_channels %} + {{ channel_ids | length }} channel{{ 's' if channel_ids | length != 1 else '' }} (auto-discovered from GitHub profile) + {% else %} + {{ channel_ids | length }} channel{{ 's' if channel_ids | length != 1 else '' }} + {% endif %} {% endif %} My Watching From 8ea1ad20fa93557a330984c0db88e07d19c701c6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:47:18 +0000 Subject: [PATCH 3/4] fix: address code review issues (regex precision, view_more_url logic, emoji consistency) Agent-Logs-Url: https://github.com/HanClinto/SimpleGitBlog/sessions/067cd850-bbc4-42f2-aaef-3427ecee8660 Co-authored-by: HanClinto <796749+HanClinto@users.noreply.github.com> --- blog/generate.py | 3 +-- blog/ingestors/github_profile.py | 12 ++++++------ blog/ingestors/youtube.py | 7 +++---- blog/templates/index.html | 8 ++++---- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/blog/generate.py b/blog/generate.py index bf67b39..dbafb30 100644 --- a/blog/generate.py +++ b/blog/generate.py @@ -193,9 +193,8 @@ def generate_site( # --- Sidebar data --- # Split HN posts into stories vs. comments for separate sidebar panels _SIDEBAR_LIMIT = 5 - hn_stories = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "story"] + hn_stories = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "story"] hn_comments = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "comment"] - # Build per-username HN profile links (use first username if multiple) _hn_user = (hn_usernames or [None])[0] hn_submitted_url = ( diff --git a/blog/ingestors/github_profile.py b/blog/ingestors/github_profile.py index cde4043..9dfd883 100644 --- a/blog/ingestors/github_profile.py +++ b/blog/ingestors/github_profile.py @@ -129,20 +129,20 @@ def extract_youtube_handles(social_links: list[SocialLink]) -> list[str]: continue url = link.url.rstrip("/") - # @handle form - m = re.search(r'youtube\.com/(@[A-Za-z0-9_.-]+)', url, re.IGNORECASE) + # @handle form — handles only allow letters, digits, underscores, hyphens + m = re.search(r'youtube\.com/(@[A-Za-z0-9_-]+)', url, re.IGNORECASE) if m: handles.append(m.group(1)) continue - # /channel/UCxxxxxxxx - m = re.search(r'youtube\.com/channel/(UC[A-Za-z0-9_-]{20,})', url, re.IGNORECASE) + # /channel/UCxxxxxxxx (UC + exactly 22 base64-ish chars) + m = re.search(r'youtube\.com/channel/(UC[A-Za-z0-9_-]{22})', url, re.IGNORECASE) if m: handles.append(m.group(1)) continue - # /c/slug or /user/slug — treat as @slug - m = re.search(r'youtube\.com/(?:c|user)/([A-Za-z0-9_.-]+)', url, re.IGNORECASE) + # /c/slug or /user/slug — treat as @slug (allow dots for legacy /user/ names) + m = re.search(r'youtube\.com/(?:c|user)/([A-Za-z0-9_-]+)', url, re.IGNORECASE) if m: handles.append(f"@{m.group(1)}") continue diff --git a/blog/ingestors/youtube.py b/blog/ingestors/youtube.py index 366ffb7..2dd2227 100644 --- a/blog/ingestors/youtube.py +++ b/blog/ingestors/youtube.py @@ -114,7 +114,7 @@ def _resolve_channel_id(handle_or_id: str) -> str | None: Returns the channel ID string, or ``None`` if resolution fails. """ # Already looks like a channel ID - if re.match(r'^UC[A-Za-z0-9_-]{20,}$', handle_or_id): + if re.match(r'^UC[A-Za-z0-9_-]{22}$', handle_or_id): return handle_or_id # Build the canonical channel URL @@ -328,9 +328,8 @@ def ingest( entries = _fetch_channel_feed(channel_id) print(f" {len(entries)} video(s) found.") # Build a human-friendly "view more" URL using the original handle if given - if raw_id.startswith("@") or not raw_id.startswith("UC"): - handle = raw_id if raw_id.startswith("@") else f"@{raw_id}" - view_more_url = f"https://www.youtube.com/{handle}/videos" + if raw_id.startswith("@"): + view_more_url = f"https://www.youtube.com/{raw_id}/videos" else: view_more_url = f"https://www.youtube.com/channel/{channel_id}/videos" for entry in entries: diff --git a/blog/templates/index.html b/blog/templates/index.html index ad7839b..92de65a 100644 --- a/blog/templates/index.html +++ b/blog/templates/index.html @@ -54,7 +54,7 @@

{% else %}

- + My Writing

@@ -90,7 +90,7 @@

Browse by label

{% if sidebar.hn_stories %}