Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,9 @@ paperscout/
src/paperscout/
__main__.py Entry point; wires together all components
config.py All settings via pydantic-settings
models.py Paper dataclass, PaperPrefix/PaperType/FileExt enums
models.py Paper, Tier, ProbeHit, PerUserMatches; PaperPrefix/PaperType/FileExt enums
sources.py WG21Index (PaperCache-backed), ISOProber, open-std.org scraper
monitor.py Scheduler, diff engine, PerUserMatches, PollResult
monitor.py Scheduler, diff engine, PollResult
scout.py Slack Bolt app, MessageQueue, notify_channel, notify_users
storage.py PaperCache, ProbeState, UserWatchlist (all PostgreSQL-backed)
db.py ThreadedConnectionPool init and schema DDL
Expand Down
38 changes: 38 additions & 0 deletions src/paperscout/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import re
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum


Expand Down Expand Up @@ -105,3 +106,40 @@ def from_index_entry(key: str, entry: dict) -> Paper:
github_url=entry.get("github_url", ""),
issues=entry.get("issues", []) or [],
)


# ── ISO probe / watchlist match shapes (kept here to avoid storage↔monitor cycles) ─


class Tier(str, Enum):
"""Probe priority bucket for isocpp HEAD requests."""

WATCHLIST = "watchlist"
FRONTIER = "frontier"
RECENT = "recent"
COLD = "cold"


@dataclass(slots=True)
class ProbeHit:
"""Successful HEAD to an unpublished draft URL plus optional excerpt text."""

url: str
prefix: str
number: int
revision: int
extension: str
tier: Tier
front_text: str = ""
last_modified: datetime | None = field(default=None)
# True when Last-Modified is within alert_modified_hours of now,
# or when the header is absent (first-ever discovery of a new file).
is_recent: bool = False


@dataclass
class PerUserMatches:
"""One user's watchlist hits: ``(paper|hit, 'author'|'paper')`` tuples."""

papers: list[tuple[Paper, str]] = field(default_factory=list)
probe_hits: list[tuple[ProbeHit, str]] = field(default_factory=list)
17 changes: 3 additions & 14 deletions src/paperscout/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import asyncio
import logging
import time
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime, timezone

from .config import Settings, settings
from .models import Paper
from .sources import ISOProber, ProbeHit, WG21Index
from .models import Paper, PerUserMatches, ProbeHit
from .sources import ISOProber, WG21Index
from .storage import ProbeState, UserWatchlist

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -53,17 +53,6 @@ def diff_snapshots(
return DiffResult(new_papers=new_papers, updated_papers=updated_papers)


# ── Per-User Matches ─────────────────────────────────────────────────────────


@dataclass
class PerUserMatches:
"""One user's watchlist hits: ``(paper|hit, 'author'|'paper')`` tuples."""

papers: list[tuple[Paper, str]] = field(default_factory=list)
probe_hits: list[tuple[ProbeHit, str]] = field(default_factory=list)


# ── Poll Result ──────────────────────────────────────────────────────────────


Expand Down
3 changes: 1 addition & 2 deletions src/paperscout/scout.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
from slack_sdk.errors import SlackApiError

from .config import settings
from .models import Paper
from .models import Paper, Tier
from .monitor import PollResult
from .sources import Tier
from .storage import ProbeState, UserWatchlist

log = logging.getLogger(__name__)
Expand Down
31 changes: 2 additions & 29 deletions src/paperscout/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
import re
import time
from collections.abc import Iterable
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import date, datetime, timedelta, timezone
from email.utils import parsedate_to_datetime
from enum import Enum

import httpx

from .config import Settings, settings
from .models import Paper
from .models import Paper, ProbeHit, Tier
from .storage import PaperCache, ProbeState, UserWatchlist

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -150,32 +149,6 @@ def known_p_numbers(self) -> set[int]:
ISO_BASE = "https://isocpp.org/files/papers/"


class Tier(str, Enum):
"""Probe priority bucket for isocpp HEAD requests."""

WATCHLIST = "watchlist"
FRONTIER = "frontier"
RECENT = "recent"
COLD = "cold"


@dataclass(slots=True)
class ProbeHit:
"""Successful HEAD to an unpublished draft URL plus optional excerpt text."""

url: str
prefix: str
number: int
revision: int
extension: str
tier: Tier
front_text: str = ""
last_modified: datetime | None = field(default=None)
# True when Last-Modified is within alert_modified_hours of now,
# or when the header is absent (first-ever discovery of a new file).
is_recent: bool = False


_TAG_RE = re.compile(r"<[^>]+>")
_PDF_MAX_BYTES = 2 * 1024 * 1024 # 2 MB cap to avoid huge downloads

Expand Down
6 changes: 2 additions & 4 deletions src/paperscout/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from contextlib import contextmanager
from typing import TYPE_CHECKING

from .models import PerUserMatches

if TYPE_CHECKING:
from psycopg2.pool import ThreadedConnectionPool

from .monitor import PerUserMatches

log = logging.getLogger(__name__)

# isocpp.org draft URLs (same path shape as ISOProber)
Expand Down Expand Up @@ -341,8 +341,6 @@ def matches_for_users(
probe_hits: list, # list[ProbeHit]
) -> dict[str, PerUserMatches]:
"""Users with at least one author or paper-number match in this poll."""
from .monitor import PerUserMatches # local import to avoid circular

all_entries = self._get_all_entries()
if not all_entries:
return {}
Expand Down
5 changes: 2 additions & 3 deletions tests/test_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@

import pytest

from paperscout.models import Paper
from paperscout.models import Paper, PerUserMatches, ProbeHit
from paperscout.monitor import (
DiffResult,
PerUserMatches,
PollResult,
Scheduler,
diff_snapshots,
)
from paperscout.sources import ISOProber, ProbeHit, WG21Index
from paperscout.sources import ISOProber, WG21Index
from paperscout.storage import ProbeState, UserWatchlist
from tests.conftest import make_test_settings

Expand Down
5 changes: 2 additions & 3 deletions tests/test_scout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch

from paperscout.models import Paper
from paperscout.monitor import DiffResult, DPTransition, PerUserMatches, PollResult
from paperscout.models import Paper, PerUserMatches, ProbeHit
from paperscout.monitor import DiffResult, DPTransition, PollResult
from paperscout.scout import (
_batch_lines,
_fmt_lm,
Expand All @@ -25,7 +25,6 @@
notify_users,
register_handlers,
)
from paperscout.sources import ProbeHit
from paperscout.storage import ProbeState, UserWatchlist

# ── Helpers ───────────────────────────────────────────────────────────────────
Expand Down
3 changes: 2 additions & 1 deletion tests/test_scout_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from unittest.mock import MagicMock, patch

from paperscout.monitor import DiffResult, PerUserMatches, PollResult
from paperscout.models import PerUserMatches
from paperscout.monitor import DiffResult, PollResult
from paperscout.scout import _batch_lines, notify_channel, notify_users
from tests.test_scout import _make_result

Expand Down
4 changes: 2 additions & 2 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def test_matches_for_users_empty_watchlist(self, fake_pool):
assert wl.matches_for_users([paper], []) == {}

def test_matches_for_users_probe_hit_author(self, fake_pool):
from paperscout.sources import ProbeHit
from paperscout.models import ProbeHit

wl = UserWatchlist(fake_pool)
wl.add("U1", "niebler")
Expand All @@ -349,7 +349,7 @@ def test_matches_for_users_probe_hit_author(self, fake_pool):
assert len(result["U1"].probe_hits) == 1

def test_matches_for_users_probe_hit_paper_number(self, fake_pool):
from paperscout.sources import ProbeHit
from paperscout.models import ProbeHit

wl = UserWatchlist(fake_pool)
wl.add("U1", "9999")
Expand Down