From 1dd0cf6c5106dce0e8fa63caca88bffa179ccefa Mon Sep 17 00:00:00 2001 From: mac Date: Tue, 12 May 2026 22:25:13 +0800 Subject: [PATCH] resolve current issue --- README.md | 4 ++-- src/paperscout/models.py | 38 ++++++++++++++++++++++++++++++++++++++ src/paperscout/monitor.py | 17 +++-------------- src/paperscout/scout.py | 3 +-- src/paperscout/sources.py | 31 ++----------------------------- src/paperscout/storage.py | 6 ++---- tests/test_monitor.py | 5 ++--- tests/test_scout.py | 5 ++--- tests/test_scout_extra.py | 3 ++- tests/test_storage.py | 4 ++-- 10 files changed, 56 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 7bdd0af..be3bbc0 100644 --- a/README.md +++ b/README.md @@ -316,9 +316,9 @@ paperscout/ src/paperscout/ __main__.py Entry point; wires together all components config.py All settings via pydantic-settings - models.py Paper dataclass, PaperPrefix/PaperType/FileExt enums + models.py Paper, Tier, ProbeHit, PerUserMatches; PaperPrefix/PaperType/FileExt enums sources.py WG21Index (PaperCache-backed), ISOProber, open-std.org scraper - monitor.py Scheduler, diff engine, PerUserMatches, PollResult + monitor.py Scheduler, diff engine, PollResult scout.py Slack Bolt app, MessageQueue, notify_channel, notify_users storage.py PaperCache, ProbeState, UserWatchlist (all PostgreSQL-backed) db.py ThreadedConnectionPool init and schema DDL diff --git a/src/paperscout/models.py b/src/paperscout/models.py index 395e946..8263c13 100644 --- a/src/paperscout/models.py +++ b/src/paperscout/models.py @@ -4,6 +4,7 @@ import re from dataclasses import dataclass, field +from datetime import datetime from enum import Enum @@ -105,3 +106,40 @@ def from_index_entry(key: str, entry: dict) -> Paper: github_url=entry.get("github_url", ""), issues=entry.get("issues", []) or [], ) + + +# ── ISO probe / watchlist match shapes (kept here to avoid storage↔monitor cycles) ─ + + +class Tier(str, Enum): + """Probe priority bucket for isocpp HEAD requests.""" + + WATCHLIST = "watchlist" + FRONTIER = "frontier" + RECENT = "recent" + COLD = "cold" + + +@dataclass(slots=True) +class ProbeHit: + """Successful HEAD to an unpublished draft URL plus optional excerpt text.""" + + url: str + prefix: str + number: int + revision: int + extension: str + tier: Tier + front_text: str = "" + last_modified: datetime | None = field(default=None) + # True when Last-Modified is within alert_modified_hours of now, + # or when the header is absent (first-ever discovery of a new file). + is_recent: bool = False + + +@dataclass +class PerUserMatches: + """One user's watchlist hits: ``(paper|hit, 'author'|'paper')`` tuples.""" + + papers: list[tuple[Paper, str]] = field(default_factory=list) + probe_hits: list[tuple[ProbeHit, str]] = field(default_factory=list) diff --git a/src/paperscout/monitor.py b/src/paperscout/monitor.py index f1b522b..af6d6b3 100644 --- a/src/paperscout/monitor.py +++ b/src/paperscout/monitor.py @@ -5,12 +5,12 @@ import asyncio import logging import time -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime, timezone from .config import Settings, settings -from .models import Paper -from .sources import ISOProber, ProbeHit, WG21Index +from .models import Paper, PerUserMatches, ProbeHit +from .sources import ISOProber, WG21Index from .storage import ProbeState, UserWatchlist log = logging.getLogger(__name__) @@ -53,17 +53,6 @@ def diff_snapshots( return DiffResult(new_papers=new_papers, updated_papers=updated_papers) -# ── Per-User Matches ───────────────────────────────────────────────────────── - - -@dataclass -class PerUserMatches: - """One user's watchlist hits: ``(paper|hit, 'author'|'paper')`` tuples.""" - - papers: list[tuple[Paper, str]] = field(default_factory=list) - probe_hits: list[tuple[ProbeHit, str]] = field(default_factory=list) - - # ── Poll Result ────────────────────────────────────────────────────────────── diff --git a/src/paperscout/scout.py b/src/paperscout/scout.py index 1ccd272..aaf239f 100644 --- a/src/paperscout/scout.py +++ b/src/paperscout/scout.py @@ -12,9 +12,8 @@ from slack_sdk.errors import SlackApiError from .config import settings -from .models import Paper +from .models import Paper, Tier from .monitor import PollResult -from .sources import Tier from .storage import ProbeState, UserWatchlist log = logging.getLogger(__name__) diff --git a/src/paperscout/sources.py b/src/paperscout/sources.py index 2ce912e..ec598e4 100644 --- a/src/paperscout/sources.py +++ b/src/paperscout/sources.py @@ -7,15 +7,14 @@ import re import time from collections.abc import Iterable -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import date, datetime, timedelta, timezone from email.utils import parsedate_to_datetime -from enum import Enum import httpx from .config import Settings, settings -from .models import Paper +from .models import Paper, ProbeHit, Tier from .storage import PaperCache, ProbeState, UserWatchlist log = logging.getLogger(__name__) @@ -150,32 +149,6 @@ def known_p_numbers(self) -> set[int]: ISO_BASE = "https://isocpp.org/files/papers/" -class Tier(str, Enum): - """Probe priority bucket for isocpp HEAD requests.""" - - WATCHLIST = "watchlist" - FRONTIER = "frontier" - RECENT = "recent" - COLD = "cold" - - -@dataclass(slots=True) -class ProbeHit: - """Successful HEAD to an unpublished draft URL plus optional excerpt text.""" - - url: str - prefix: str - number: int - revision: int - extension: str - tier: Tier - front_text: str = "" - last_modified: datetime | None = field(default=None) - # True when Last-Modified is within alert_modified_hours of now, - # or when the header is absent (first-ever discovery of a new file). - is_recent: bool = False - - _TAG_RE = re.compile(r"<[^>]+>") _PDF_MAX_BYTES = 2 * 1024 * 1024 # 2 MB cap to avoid huge downloads diff --git a/src/paperscout/storage.py b/src/paperscout/storage.py index 5481752..37c4b3d 100644 --- a/src/paperscout/storage.py +++ b/src/paperscout/storage.py @@ -10,11 +10,11 @@ from contextlib import contextmanager from typing import TYPE_CHECKING +from .models import PerUserMatches + if TYPE_CHECKING: from psycopg2.pool import ThreadedConnectionPool - from .monitor import PerUserMatches - log = logging.getLogger(__name__) # isocpp.org draft URLs (same path shape as ISOProber) @@ -341,8 +341,6 @@ def matches_for_users( probe_hits: list, # list[ProbeHit] ) -> dict[str, PerUserMatches]: """Users with at least one author or paper-number match in this poll.""" - from .monitor import PerUserMatches # local import to avoid circular - all_entries = self._get_all_entries() if not all_entries: return {} diff --git a/tests/test_monitor.py b/tests/test_monitor.py index f2577fe..8c44720 100644 --- a/tests/test_monitor.py +++ b/tests/test_monitor.py @@ -8,15 +8,14 @@ import pytest -from paperscout.models import Paper +from paperscout.models import Paper, PerUserMatches, ProbeHit from paperscout.monitor import ( DiffResult, - PerUserMatches, PollResult, Scheduler, diff_snapshots, ) -from paperscout.sources import ISOProber, ProbeHit, WG21Index +from paperscout.sources import ISOProber, WG21Index from paperscout.storage import ProbeState, UserWatchlist from tests.conftest import make_test_settings diff --git a/tests/test_scout.py b/tests/test_scout.py index e00e34a..7693ccd 100644 --- a/tests/test_scout.py +++ b/tests/test_scout.py @@ -5,8 +5,8 @@ from datetime import datetime, timedelta, timezone from unittest.mock import MagicMock, patch -from paperscout.models import Paper -from paperscout.monitor import DiffResult, DPTransition, PerUserMatches, PollResult +from paperscout.models import Paper, PerUserMatches, ProbeHit +from paperscout.monitor import DiffResult, DPTransition, PollResult from paperscout.scout import ( _batch_lines, _fmt_lm, @@ -25,7 +25,6 @@ notify_users, register_handlers, ) -from paperscout.sources import ProbeHit from paperscout.storage import ProbeState, UserWatchlist # ── Helpers ─────────────────────────────────────────────────────────────────── diff --git a/tests/test_scout_extra.py b/tests/test_scout_extra.py index 8397928..a64bce2 100644 --- a/tests/test_scout_extra.py +++ b/tests/test_scout_extra.py @@ -4,7 +4,8 @@ from unittest.mock import MagicMock, patch -from paperscout.monitor import DiffResult, PerUserMatches, PollResult +from paperscout.models import PerUserMatches +from paperscout.monitor import DiffResult, PollResult from paperscout.scout import _batch_lines, notify_channel, notify_users from tests.test_scout import _make_result diff --git a/tests/test_storage.py b/tests/test_storage.py index 2effc1c..ded2160 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -330,7 +330,7 @@ def test_matches_for_users_empty_watchlist(self, fake_pool): assert wl.matches_for_users([paper], []) == {} def test_matches_for_users_probe_hit_author(self, fake_pool): - from paperscout.sources import ProbeHit + from paperscout.models import ProbeHit wl = UserWatchlist(fake_pool) wl.add("U1", "niebler") @@ -349,7 +349,7 @@ def test_matches_for_users_probe_hit_author(self, fake_pool): assert len(result["U1"].probe_hits) == 1 def test_matches_for_users_probe_hit_paper_number(self, fake_pool): - from paperscout.sources import ProbeHit + from paperscout.models import ProbeHit wl = UserWatchlist(fake_pool) wl.add("U1", "9999")