From f688bc64dee6d65b903e929de396fe7359e9ad01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 14 Apr 2026 03:42:15 +0000 Subject: [PATCH 1/4] feat: HN/YouTube sidebar layout, channel support, reduce max HN hits Agent-Logs-Url: https://github.com/HanClinto/SimpleGitBlog/sessions/067cd850-bbc4-42f2-aaef-3427ecee8660 Co-authored-by: HanClinto <796749+HanClinto@users.noreply.github.com> --- .gitignore | 1 + blog/generate.py | 56 ++++++++- blog/ingestors/hackernews.py | 2 +- blog/ingestors/youtube.py | 176 +++++++++++++++++++++++----- blog/static/style.css | 130 +++++++++++++++++++- blog/templates/config.html | 5 +- blog/templates/index.html | 108 ++++++++++++++++- config/youtube_channels.txt.example | 29 +++++ 8 files changed, 470 insertions(+), 37 deletions(-) create mode 100644 config/youtube_channels.txt.example diff --git a/.gitignore b/.gitignore index ecdd11e..c0aac48 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ _site/ # Personal ingestor configuration — keep these local, never commit real values. # Copy the corresponding .example file and fill in your own IDs/usernames. config/youtube_playlists.txt +config/youtube_channels.txt config/hackernews.txt diff --git a/blog/generate.py b/blog/generate.py index e337d23..843e66b 100644 --- a/blog/generate.py +++ b/blog/generate.py @@ -102,6 +102,7 @@ def generate_site( token: str | None, output_dir: Path, youtube_playlist_ids: str | None = None, + youtube_channel_ids: str | None = None, hn_usernames: list[str] | None = None, ) -> None: _start = time.monotonic() @@ -118,15 +119,16 @@ def generate_site( writing_posts = github_issues.ingest(repo, token, CONFIG_DIR) print(f" {len(writing_posts)} post(s) ingested from GitHub Issues.") - # --- YouTube playlists (My Watching) — uses free public RSS feeds, no API key --- + # --- YouTube playlists & channels (My Watching) — uses free public RSS feeds, no API key --- watching_posts: list[dict] = [] playlist_ids = youtube.load_playlist_ids(CONFIG_DIR, youtube_playlist_ids) - if playlist_ids: - print("Fetching YouTube playlists (My Watching)…") - watching_posts = youtube.ingest(CONFIG_DIR, youtube_playlist_ids) + channel_ids = youtube.load_channel_ids(CONFIG_DIR, youtube_channel_ids) + if playlist_ids or channel_ids: + print("Fetching YouTube content (My Watching)…") + watching_posts = youtube.ingest(CONFIG_DIR, youtube_playlist_ids, youtube_channel_ids) print(f" {len(watching_posts)} post(s) ingested from YouTube.") else: - print("YOUTUBE_PLAYLIST_IDS not configured — skipping YouTube ingestor.") + print("YOUTUBE_PLAYLIST_IDS / YOUTUBE_CHANNEL_IDS not configured — skipping YouTube ingestor.") # --- Hacker News (My Reading) — requires HN_USERNAME --- reading_posts: list[dict] = [] @@ -157,6 +159,45 @@ def generate_site( reverse=True, ) + # --- Sidebar data --- + # Split HN posts into stories vs. comments for separate sidebar panels + _SIDEBAR_LIMIT = 5 + hn_stories = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "story"] + hn_comments = [p for p in reading_posts if p.get("metadata", {}).get("hn_type") == "comment"] + + # Build per-username HN profile links (use first username if multiple) + _hn_user = (hn_usernames or [None])[0] + hn_submitted_url = ( + f"https://news.ycombinator.com/submitted?id={_hn_user}" if _hn_user else None + ) + hn_threads_url = ( + f"https://news.ycombinator.com/threads?id={_hn_user}" if _hn_user else None + ) + hn_profile_url = ( + f"https://news.ycombinator.com/user?id={_hn_user}" if _hn_user else None + ) + + # Collect unique YouTube "view more" URLs (one per playlist/channel) + seen_view_more: set[str] = set() + youtube_view_more_urls: list[dict] = [] + for p in watching_posts: + vmu = p.get("metadata", {}).get("view_more_url") + stype = p.get("metadata", {}).get("source_type", "playlist") + if vmu and vmu not in seen_view_more: + seen_view_more.add(vmu) + youtube_view_more_urls.append({"url": vmu, "source_type": stype}) + + sidebar = { + "hn_stories": hn_stories[:_SIDEBAR_LIMIT], + "hn_comments": hn_comments[:_SIDEBAR_LIMIT], + "hn_submitted_url": hn_submitted_url, + "hn_threads_url": hn_threads_url, + "hn_profile_url": hn_profile_url, + "hn_username": _hn_user, + "watching": watching_posts[:_SIDEBAR_LIMIT], + "youtube_view_more_urls": youtube_view_more_urls, + } + # --- Jinja2 setup --- env = Environment( loader=FileSystemLoader(str(TEMPLATES_DIR)), @@ -204,7 +245,7 @@ def generate_site( # Render index page index_tmpl = env.get_template("index.html") - index_html = index_tmpl.render(sections=active_sections) + index_html = index_tmpl.render(sections=active_sections, sidebar=sidebar) (output_dir / "index.html").write_text(index_html, encoding="utf-8") print("Wrote index.html") @@ -235,6 +276,7 @@ def generate_site( config_ctx = { "hn_usernames": hn_usernames or [], "playlist_ids": playlist_ids, + "channel_ids": channel_ids, "hidden_labels": sorted(hidden_labels), "blocked_user_count": len(blocked_users), "writing_post_count": len(writing_posts), @@ -270,6 +312,7 @@ def main() -> None: output_dir = Path(os.environ.get("OUTPUT_DIR", "_site")).resolve() youtube_playlist_ids = os.environ.get("YOUTUBE_PLAYLIST_IDS") or None + youtube_channel_ids = os.environ.get("YOUTUBE_CHANNEL_IDS") or None # HN usernames: from HN_USERNAME env var and/or local config file (gitignored) hn_usernames = hackernews.load_usernames(CONFIG_DIR, os.environ.get("HN_USERNAME") or None) @@ -279,6 +322,7 @@ def main() -> None: token=token, output_dir=output_dir, youtube_playlist_ids=youtube_playlist_ids, + youtube_channel_ids=youtube_channel_ids, hn_usernames=hn_usernames or None, ) diff --git a/blog/ingestors/hackernews.py b/blog/ingestors/hackernews.py index edd2e6b..1823718 100644 --- a/blog/ingestors/hackernews.py +++ b/blog/ingestors/hackernews.py @@ -25,7 +25,7 @@ _HN_ALGOLIA_BASE = "https://hn.algolia.com/api/v1" _HN_ITEM_BASE = "https://news.ycombinator.com/item" _CONFIG_FILE = "hackernews.txt" -_MAX_HITS_PER_TYPE = 100 +_MAX_HITS_PER_TYPE = 20 # --------------------------------------------------------------------------- diff --git a/blog/ingestors/youtube.py b/blog/ingestors/youtube.py index 56875d2..366ffb7 100644 --- a/blog/ingestors/youtube.py +++ b/blog/ingestors/youtube.py @@ -1,26 +1,30 @@ """ -YouTube playlist ingestor for SimpleGitBlog. +YouTube ingestor for SimpleGitBlog. -Fetches videos from one or more YouTube playlists via YouTube's public Atom/RSS -feed — **no API key required**. +Fetches videos from one or more YouTube playlists AND/OR YouTube channels via +YouTube's public Atom/RSS feeds — **no API key required**. -Feed URL: - https://www.youtube.com/feeds/videos.xml?playlist_id={PLAYLIST_ID} +Feed URLs: + Playlist: https://www.youtube.com/feeds/videos.xml?playlist_id={PLAYLIST_ID} + Channel: https://www.youtube.com/feeds/videos.xml?channel_id={CHANNEL_ID} -Each playlist returns up to the 15 most-recently-added videos. For a personal -"My Watching" section this is normally plenty; add multiple playlists (e.g. one -per year) if you need more history. +Each feed returns up to the 15 most-recently-added videos. Section: "watching" (My Watching) Configuration (GitHub Actions repository settings — do NOT hardcode values): Variable: YOUTUBE_PLAYLIST_IDS Comma-separated playlist IDs - -For local development, you may also place playlist IDs in -``config/youtube_playlists.txt`` (one per line, # comments supported). -That file is gitignored so your personal IDs stay off of version control. + Variable: YOUTUBE_CHANNEL_IDS Comma-separated channel IDs (UCxxxxxx) + or channel handles (@username) — handles + are resolved automatically. + +For local development, you may also place IDs in: + ``config/youtube_playlists.txt`` (one playlist ID per line) + ``config/youtube_channels.txt`` (one channel ID or @handle per line) +Both files are gitignored so your personal IDs stay off of version control. """ +import re import xml.etree.ElementTree as ET from pathlib import Path @@ -28,7 +32,8 @@ from blog.utils import extract_excerpt, format_date, format_datetime, plain_text_to_html -_CONFIG_FILE = "youtube_playlists.txt" +_PLAYLIST_CONFIG_FILE = "youtube_playlists.txt" +_CHANNEL_CONFIG_FILE = "youtube_channels.txt" _RSS_BASE = "https://www.youtube.com/feeds/videos.xml" # XML namespace map for YouTube Atom feeds @@ -60,7 +65,34 @@ def load_playlist_ids(config_dir: Path, env_playlist_ids: str | None = None) -> if pid: ids.add(pid) - config_file = config_dir / _CONFIG_FILE + config_file = config_dir / _PLAYLIST_CONFIG_FILE + if config_file.exists(): + for line in config_file.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if line and not line.startswith("#"): + ids.add(line) + + return sorted(ids) + + +def load_channel_ids(config_dir: Path, env_channel_ids: str | None = None) -> list[str]: + """ + Return a deduplicated list of YouTube channel IDs (or @handles) from two sources: + + 1. ``env_channel_ids`` — the value of the YOUTUBE_CHANNEL_IDS env var + (comma-separated). Accepts ``UCxxxxxx`` channel IDs or ``@handle`` forms. + 2. ``config/youtube_channels.txt`` — optional local-dev override file + (gitignored; never committed with real IDs). + """ + ids: set[str] = set() + + if env_channel_ids: + for cid in env_channel_ids.split(","): + cid = cid.strip() + if cid: + ids.add(cid) + + config_file = config_dir / _CHANNEL_CONFIG_FILE if config_file.exists(): for line in config_file.read_text(encoding="utf-8").splitlines(): line = line.strip() @@ -70,32 +102,75 @@ def load_playlist_ids(config_dir: Path, env_playlist_ids: str | None = None) -> return sorted(ids) +def _resolve_channel_id(handle_or_id: str) -> str | None: + """ + Resolve a channel handle (``@username``) or channel ID (``UCxxxxxx``) to a + confirmed channel ID. + + If the argument looks like a channel ID already (starts with ``UC``), it is + returned as-is. Otherwise the channel page is fetched and the RSS feed link + (which contains the channel ID) is extracted from the HTML. + + Returns the channel ID string, or ``None`` if resolution fails. + """ + # Already looks like a channel ID + if re.match(r'^UC[A-Za-z0-9_-]{20,}$', handle_or_id): + return handle_or_id + + # Build the canonical channel URL + if handle_or_id.startswith("@"): + channel_url = f"https://www.youtube.com/{handle_or_id}" + else: + channel_url = f"https://www.youtube.com/@{handle_or_id}" + + try: + resp = requests.get( + channel_url, + headers={"User-Agent": "Mozilla/5.0 (compatible; SimpleGitBlog/1.0)"}, + timeout=15, + ) + resp.raise_for_status() + except requests.RequestException as exc: + print(f" Warning: could not fetch channel page for {handle_or_id}: {exc}") + return None + + # Look for the RSS feed link in the page HTML, which contains the channel_id + m = re.search( + r'https://www\.youtube\.com/feeds/videos\.xml\?channel_id=(UC[A-Za-z0-9_-]+)', + resp.text, + ) + if m: + return m.group(1) + + print(f" Warning: could not extract channel ID from {channel_url}") + return None + + # --------------------------------------------------------------------------- # RSS / Atom feed helpers # --------------------------------------------------------------------------- -def _fetch_playlist_feed(playlist_id: str) -> list[dict]: +def _fetch_feed(url: str, label: str) -> list[dict]: """ - Fetch videos from a YouTube playlist Atom feed. + Fetch videos from a YouTube Atom feed (playlist or channel). Returns a list of raw entry dicts extracted from the feed. No API key required — the feed is publicly accessible. """ - url = f"{_RSS_BASE}?playlist_id={playlist_id}" try: response = requests.get(url, timeout=30) response.raise_for_status() except requests.HTTPError as exc: - print(f" Warning: YouTube RSS error for playlist {playlist_id}: {exc}") + print(f" Warning: YouTube RSS error for {label}: {exc}") return [] except requests.RequestException as exc: - print(f" Warning: YouTube RSS request failed for playlist {playlist_id}: {exc}") + print(f" Warning: YouTube RSS request failed for {label}: {exc}") return [] try: root = ET.fromstring(response.content) except ET.ParseError as exc: - print(f" Warning: could not parse YouTube RSS for playlist {playlist_id}: {exc}") + print(f" Warning: could not parse YouTube RSS for {label}: {exc}") return [] entries = [] @@ -136,11 +211,23 @@ def _fetch_playlist_feed(playlist_id: str) -> list[dict]: return entries +def _fetch_playlist_feed(playlist_id: str) -> list[dict]: + """Fetch videos from a YouTube playlist Atom feed.""" + url = f"{_RSS_BASE}?playlist_id={playlist_id}" + return _fetch_feed(url, f"playlist {playlist_id}") + + +def _fetch_channel_feed(channel_id: str) -> list[dict]: + """Fetch latest videos from a YouTube channel Atom feed.""" + url = f"{_RSS_BASE}?channel_id={channel_id}" + return _fetch_feed(url, f"channel {channel_id}") + + # --------------------------------------------------------------------------- # Post processing # --------------------------------------------------------------------------- -def _process_entry(entry: dict, playlist_id: str) -> dict | None: +def _process_entry(entry: dict, source_type: str, source_id: str, view_more_url: str) -> dict | None: """Convert a raw feed entry dict into the common post schema.""" video_id = entry.get("video_id", "").strip() if not video_id: @@ -177,9 +264,13 @@ def _process_entry(entry: dict, playlist_id: str) -> dict | None: "comments": [], "metadata": { "video_id": video_id, - "playlist_id": playlist_id, + "source_type": source_type, # "playlist" or "channel" + "source_id": source_id, + "view_more_url": view_more_url, "channel_name": author_name, "thumbnail_url": thumbnail_url, + # Legacy aliases kept for template compatibility + "playlist_id": source_id if source_type == "playlist" else None, }, } @@ -191,17 +282,23 @@ def _process_entry(entry: dict, playlist_id: str) -> dict | None: def ingest( config_dir: Path, env_playlist_ids: str | None = None, + env_channel_ids: str | None = None, ) -> list[dict]: """ - Fetch YouTube playlist videos via public RSS feeds and return posts in - the common schema. No API key required. + Fetch YouTube playlist and channel videos via public RSS feeds and return + posts in the common schema. No API key required. Playlist IDs come from ``env_playlist_ids`` (YOUTUBE_PLAYLIST_IDS env var) and/or the local ``config/youtube_playlists.txt`` file. + + Channel IDs/handles come from ``env_channel_ids`` (YOUTUBE_CHANNEL_IDS env + var) and/or the local ``config/youtube_channels.txt`` file. """ playlist_ids = load_playlist_ids(config_dir, env_playlist_ids) - if not playlist_ids: - print(" No YouTube playlist IDs configured.") + channel_ids_raw = load_channel_ids(config_dir, env_channel_ids) + + if not playlist_ids and not channel_ids_raw: + print(" No YouTube playlist IDs or channel IDs configured.") return [] posts: list[dict] = [] @@ -211,8 +308,33 @@ def ingest( print(f" Fetching playlist RSS: {playlist_id}") entries = _fetch_playlist_feed(playlist_id) print(f" {len(entries)} video(s) found.") + view_more_url = f"https://www.youtube.com/playlist?list={playlist_id}" + for entry in entries: + post = _process_entry(entry, "playlist", playlist_id, view_more_url) + if post is None: + continue + vid = post["metadata"]["video_id"] + if vid not in seen_video_ids: + seen_video_ids.add(vid) + posts.append(post) + + for raw_id in channel_ids_raw: + print(f" Resolving YouTube channel: {raw_id}") + channel_id = _resolve_channel_id(raw_id) + if not channel_id: + print(f" Skipping — could not resolve channel ID for: {raw_id}") + continue + print(f" Fetching channel RSS: {channel_id}") + entries = _fetch_channel_feed(channel_id) + print(f" {len(entries)} video(s) found.") + # Build a human-friendly "view more" URL using the original handle if given + if raw_id.startswith("@") or not raw_id.startswith("UC"): + handle = raw_id if raw_id.startswith("@") else f"@{raw_id}" + view_more_url = f"https://www.youtube.com/{handle}/videos" + else: + view_more_url = f"https://www.youtube.com/channel/{channel_id}/videos" for entry in entries: - post = _process_entry(entry, playlist_id) + post = _process_entry(entry, "channel", channel_id, view_more_url) if post is None: continue vid = post["metadata"]["video_id"] diff --git a/blog/static/style.css b/blog/static/style.css index ac38a6e..823ef19 100644 --- a/blog/static/style.css +++ b/blog/static/style.css @@ -30,7 +30,7 @@ img { max-width: 100%; height: auto; } /* --- Layout ----------------------------------------------- */ .container { - max-width: 800px; + max-width: 1100px; margin: 0 auto; padding: 0 1.25rem; } @@ -509,6 +509,122 @@ main.container { padding-top: 2.5rem; padding-bottom: 2.5rem; } color: #334155; } +/* --- Two-column page layout ------------------------------- */ +.page-layout { + display: grid; + grid-template-columns: 1fr 320px; + gap: 2.5rem; + align-items: start; +} + +.main-column { min-width: 0; } + +/* --- Page sidebar ----------------------------------------- */ +.page-sidebar { + position: sticky; + top: 1.5rem; + display: flex; + flex-direction: column; + gap: 1.5rem; +} + +/* --- Sidebar sections ------------------------------------- */ +.sidebar-section { + background: #fff; + border: 1px solid #e2e8f0; + border-radius: 10px; + padding: 1rem 1.1rem; +} + +.sidebar-section__heading { + font-size: 0.95rem; + font-weight: 700; + color: #111; + margin: 0 0 0.75rem; + display: flex; + align-items: center; + gap: 0.35rem; + border-bottom: 1px solid #e2e8f0; + padding-bottom: 0.5rem; +} + +.sidebar-list { + list-style: none; + margin: 0 0 0.5rem; + padding: 0; + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.sidebar-item { font-size: 0.85rem; } + +.sidebar-item__title { + display: block; + font-weight: 600; + color: #1e293b; + text-decoration: none; + line-height: 1.35; +} +.sidebar-item__title:hover { color: #0969da; text-decoration: underline; } + +.sidebar-item__meta { + font-size: 0.78rem; + color: #94a3b8; + margin-top: 0.2rem; +} +.sidebar-item__meta a { color: #94a3b8; } +.sidebar-item__meta a:hover { color: #0969da; } + +.sidebar-item__excerpt { + margin: 0.25rem 0 0; + font-size: 0.78rem; + color: #64748b; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; + overflow: hidden; +} + +/* --- Video sidebar items ---------------------------------- */ +.sidebar-list--videos { gap: 0.8rem; } + +.sidebar-item--video { + display: flex; + gap: 0.65rem; + align-items: flex-start; +} + +.sidebar-item__thumb-link { flex-shrink: 0; } + +.sidebar-item__thumb { + width: 100px; + height: 56px; + object-fit: cover; + border-radius: 5px; + display: block; +} + +.sidebar-item__video-info { flex: 1; min-width: 0; } +.sidebar-item--video .sidebar-item__title { font-size: 0.82rem; } + +/* --- Sidebar "view all" link ------------------------------ */ +.sidebar-section__view-all { + display: block; + font-size: 0.78rem; + font-weight: 600; + color: #0969da; + text-decoration: none; + margin-top: 0.5rem; +} +.sidebar-section__view-all:hover { text-decoration: underline; } + +.sidebar-section__view-more-links { + display: flex; + flex-direction: column; + gap: 0.25rem; +} + /* --- Labels sidebar --------------------------------------- */ .labels-sidebar { margin-top: 3rem; @@ -658,6 +774,18 @@ a.config-detail:hover { color: #0969da; } } /* --- Responsive ------------------------------------------- */ +@media (max-width: 900px) { + .page-layout { + grid-template-columns: 1fr; + } + + .page-sidebar { + position: static; + border-top: 2px solid #e2e8f0; + padding-top: 2rem; + } +} + @media (max-width: 600px) { html { font-size: 16px; } diff --git a/blog/templates/config.html b/blog/templates/config.html index 580a68d..a1500ea 100644 --- a/blog/templates/config.html +++ b/blog/templates/config.html @@ -52,10 +52,13 @@
{{ owner_profile.bio }}
+ {% endif %} +| Platform | URL | Used for |
|---|---|---|
| {{ link.provider | capitalize }} | ++ + {{ link.url }} + + | ++ {% if link.provider == 'youtube' %} + {% if auto_discovered_channels %} + ✅ Auto-discovered — My Watching + {% else %} + Overridden by YOUTUBE_CHANNEL_IDS + {% endif %} + {% elif link.provider in ('twitter', 'x') %} + — Displayed (future integration) + {% elif link.provider == 'linkedin' %} + — Displayed (future integration) + {% else %} + — Not yet integrated + {% endif %} + | +
Website: {{ owner_profile.website }}
+ {% endif %} + {% else %} +No social accounts linked on this GitHub profile.
+ {% endif %} +