From 55150e70af7d8933b1368c4088e07158978c756b Mon Sep 17 00:00:00 2001
From: ghinks <ghinks@yahoo.com>
Date: Wed, 11 Mar 2026 06:12:06 -0400
Subject: [PATCH 1/2] test: overhaul integration tests and expand README
 testing docs

Rewrite test_integration.py with a proper fixture-based structure:
- session-scoped github_env and date_windows fixtures for shared setup
- module-scoped fetched_workspace fixture that runs fetch once and
  reuses the database across all classify tests
- date windows computed dynamically relative to today rather than
  hardcoded values
- four parametrised fetch variants (default, with-dates, reset-db, config)
- dedicated classify tests: table output, stricter threshold, JSON output,
  and --exclude-primary-merged

Expand the README Development section with instructions for running unit
tests only, integration tests only, and each individual integration test
by its pytest node ID.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md                 |  64 ++++++++
 tests/test_integration.py | 335 +++++++++++++++++++++++++++++++++-----
 2 files changed, 361 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index f936595..794db2c 100644
--- a/README.md
+++ b/README.md
@@ -276,10 +276,74 @@ uv sync --group dev
 
 ### Running Tests
 
+Run the full test suite:
+
 ```bash
 uv run pytest
 ```
 
+Run **unit tests only** (excludes integration tests that call the real GitHub API):
+
+```bash
+uv run pytest -m "not integration"
+```
+
+Run **integration tests only** (requires a valid `GITHUB_TOKEN` or an authenticated `gh` CLI session):
+
+```bash
+uv run pytest -m integration
+```
+
+#### Running individual integration tests
+
+Integration tests live in `tests/test_integration.py`. They are marked `@pytest.mark.integration` and target the `expressjs/express` repository as a real-world fixture.
+
+**`test_fetch_examples_integration`** — four parametrised variants of the `fetch` command. Run all four at once:
+
+```bash
+uv run pytest tests/test_integration.py::test_fetch_examples_integration
+```
+
+Or run a single variant by its explicit ID:
+
+```bash
+# Variant 1 — fetch with default date range (no explicit collate window)
+uv run pytest "tests/test_integration.py::test_fetch_examples_integration[fetch-default]"
+
+# Variant 2 — fetch with explicit --collate-start / --collate-end
+uv run pytest "tests/test_integration.py::test_fetch_examples_integration[fetch-with-dates]"
+
+# Variant 3 — fetch with --reset-db and explicit date range
+uv run pytest "tests/test_integration.py::test_fetch_examples_integration[fetch-reset-db]"
+
+# Variant 4 — fetch using a --config TOML file
+uv run pytest "tests/test_integration.py::test_fetch_examples_integration[fetch-config]"
+```
+
+**`test_classify_example_table_output`** — classify with default table output:
+
+```bash
+uv run pytest tests/test_integration.py::test_classify_example_table_output
+```
+
+**`test_classify_example_stricter_threshold`** — classify with `--threshold 3.0`:
+
+```bash
+uv run pytest tests/test_integration.py::test_classify_example_stricter_threshold
+```
+
+**`test_classify_example_json_output`** — classify with `--format json` and validates the JSON payload:
+
+```bash
+uv run pytest tests/test_integration.py::test_classify_example_json_output
+```
+
+**`test_classify_example_exclude_primary_merged`** — classify with `--exclude-primary-merged`:
+
+```bash
+uv run pytest tests/test_integration.py::test_classify_example_exclude_primary_merged
+```
+
 ### Linting & Formatting
 
 ```bash
diff --git a/tests/test_integration.py b/tests/test_integration.py
index ed2e607..6ac1b74 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1,59 +1,318 @@
+import json
 import os
 import shutil
 import subprocess
+from collections.abc import Callable, Iterator
+from dataclasses import dataclass
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
 
 import pytest
 
+REPO_NAME = "expressjs/express"
 
-@pytest.mark.integration
-@pytest.mark.timeout(300)
-def test_review_classify_integration() -> None:
-    """
-    Integration test that runs the full CLI command against a real repo.
-    Required: 'gh' CLI tool must be authenticated.
-    """
-    # 1. Get GitHub Token
+
+@dataclass(frozen=True)
+class DateWindows:
+    fetch_start: str
+    fetch_end: str
+    classify_start: str
+    classify_end: str
+
+
+FetchArgsBuilder = Callable[[DateWindows, Path], list[str]]
+
+
+@dataclass(frozen=True)
+class CommandResult:
+    command: list[str]
+    result: subprocess.CompletedProcess[str]
+
+
+def _date_windows() -> DateWindows:
+    today = datetime.now(UTC).date()
+    fetch_end = today.strftime("%Y-%m-%d")
+    fetch_start = (today - timedelta(days=182)).strftime("%Y-%m-%d")
+    classify_end = (today - timedelta(days=30)).strftime("%Y-%m-%d")
+    return DateWindows(
+        fetch_start=fetch_start,
+        fetch_end=fetch_end,
+        classify_start=fetch_start,
+        classify_end=classify_end,
+    )
+
+
+def _github_env() -> dict[str, str]:
     if not shutil.which("gh"):
         pytest.skip("GitHub CLI (gh) not found")
 
     try:
-        # Capture token from gh cli
         token = subprocess.check_output(["gh", "auth", "token"], text=True).strip()
     except subprocess.CalledProcessError:
         pytest.skip("Could not get GITHUB_TOKEN from gh CLI. Is it authenticated?")
 
-    # 2. Prepare environment
     env = os.environ.copy()
     env["GITHUB_TOKEN"] = token
+    return env
+
 
-    # 3. Construct command
-    # "uv run review-classify fetch --repo expressjs/express \\
-    #      --collate-start 2024-12-01 --collate-end 2024-12-31"
-    cmd = [
-        "uv",
-        "run",
-        "review-classify",
-        "fetch",
-        "--repo",
-        "expressjs/express",
-        "--collate-start",
-        "2024-12-01",
-        "--collate-end",
-        "2024-12-31",
-    ]
-
-    # 4. Run command
-    print(f"Running command: {' '.join(cmd)}")
-    result = subprocess.run(cmd, env=env, capture_output=True, text=True)
-
-    # 5. Assertions
+def _run_cli(
+    args: list[str],
+    env: dict[str, str],
+    cwd: Path,
+) -> CommandResult:
+    command_env = env.copy()
+    command_env.setdefault("UV_CACHE_DIR", str(cwd / ".uv-cache"))
+    command = ["uv", "run", "review-classify", *args]
+    result = subprocess.run(
+        command,
+        cwd=cwd,
+        env=command_env,
+        capture_output=True,
+        text=True,
+    )
     if result.returncode != 0:
-        print("STDOUT:", result.stdout)
-        print("STDERR:", result.stderr)
+        pytest.fail(
+            "Command failed.\n"
+            f"Command: {' '.join(command)}\n"
+            f"STDOUT:\n{result.stdout}\n"
+            f"STDERR:\n{result.stderr}"
+        )
+    return CommandResult(command=command, result=result)
+
+
+@pytest.fixture(scope="session")
+def github_env() -> dict[str, str]:
+    return _github_env()
+
+
+@pytest.fixture(scope="session")
+def date_windows() -> DateWindows:
+    return _date_windows()
+
+
+@pytest.fixture
+def isolated_workspace(tmp_path: Path) -> Path:
+    return tmp_path
+
+
+@pytest.fixture(scope="module")
+def fetched_workspace(
+    tmp_path_factory: pytest.TempPathFactory,
+    github_env: dict[str, str],
+    date_windows: DateWindows,
+) -> Iterator[Path]:
+    workspace = tmp_path_factory.mktemp("integration-db")
+    fetch_result = _run_cli(
+        [
+            "fetch",
+            "--repo",
+            REPO_NAME,
+            "--collate-start",
+            date_windows.fetch_start,
+            "--collate-end",
+            date_windows.fetch_end,
+        ],
+        env=github_env,
+        cwd=workspace,
+    )
+
+    assert f"Fetching {REPO_NAME}..." in fetch_result.result.stdout
+    assert "Saving" in fetch_result.result.stdout
+    assert workspace.joinpath("review_classification.db").exists()
+    yield workspace
+
+
+@pytest.mark.integration
+@pytest.mark.timeout(1800)
+@pytest.mark.parametrize(
+    ("args_builder", "expected_stdout"),
+    [
+        (
+            lambda _dates, _workspace: ["fetch", "--repo", REPO_NAME],
+            "Successfully saved",
+        ),
+        (
+            lambda dates, _workspace: [
+                "fetch",
+                "--repo",
+                REPO_NAME,
+                "--collate-start",
+                dates.fetch_start,
+                "--collate-end",
+                dates.fetch_end,
+            ],
+            "Successfully saved",
+        ),
+        (
+            lambda dates, _workspace: [
+                "fetch",
+                "--repo",
+                REPO_NAME,
+                "--reset-db",
+                "--collate-start",
+                dates.fetch_start,
+                "--collate-end",
+                dates.fetch_end,
+            ],
+            "Database reset complete.",
+        ),
+        (
+            lambda dates, workspace: [
+                "fetch",
+                "--config",
+                str(_write_fetch_config(workspace, dates)),
+            ],
+            "Successfully saved",
+        ),
+    ],
+    ids=["fetch-default", "fetch-with-dates", "fetch-reset-db", "fetch-config"],
+)
+def test_fetch_examples_integration(
+    args_builder: FetchArgsBuilder,
+    expected_stdout: str,
+    github_env: dict[str, str],
+    date_windows: DateWindows,
+    isolated_workspace: Path,
+) -> None:
+    result = _run_cli(
+        args_builder(date_windows, isolated_workspace),
+        env=github_env,
+        cwd=isolated_workspace,
+    )
+
+    assert f"Fetching {REPO_NAME}..." in result.result.stdout
+    assert "Saving" in result.result.stdout
+    assert expected_stdout in result.result.stdout
+    assert isolated_workspace.joinpath("review_classification.db").exists()
+
+
+@pytest.mark.integration
+@pytest.mark.timeout(1800)
+def test_classify_example_table_output(
+    github_env: dict[str, str],
+    date_windows: DateWindows,
+    fetched_workspace: Path,
+) -> None:
+    result = _run_cli(
+        [
+            "classify",
+            "--repo",
+            REPO_NAME,
+            "--start",
+            date_windows.classify_start,
+            "--end",
+            date_windows.classify_end,
+        ],
+        env=github_env,
+        cwd=fetched_workspace,
+    )
+
+    assert (
+        "No outliers detected out of" in result.result.stdout
+        or f"Repository: {REPO_NAME}" in result.result.stdout
+    )
+
+
+@pytest.mark.integration
+@pytest.mark.timeout(1800)
+def test_classify_example_stricter_threshold(
+    github_env: dict[str, str],
+    date_windows: DateWindows,
+    fetched_workspace: Path,
+) -> None:
+    result = _run_cli(
+        [
+            "classify",
+            "--repo",
+            REPO_NAME,
+            "--start",
+            date_windows.classify_start,
+            "--end",
+            date_windows.classify_end,
+            "--threshold",
+            "3.0",
+        ],
+        env=github_env,
+        cwd=fetched_workspace,
+    )
+
+    assert result.result.stdout.strip() != ""
+
+
+@pytest.mark.integration
+@pytest.mark.timeout(1800)
+def test_classify_example_json_output(
+    github_env: dict[str, str],
+    date_windows: DateWindows,
+    fetched_workspace: Path,
+) -> None:
+    result = _run_cli(
+        [
+            "classify",
+            "--repo",
+            REPO_NAME,
+            "--start",
+            date_windows.classify_start,
+            "--end",
+            date_windows.classify_end,
+            "--format",
+            "json",
+        ],
+        env=github_env,
+        cwd=fetched_workspace,
+    )
+
+    payload = json.loads(result.result.stdout)
+    assert isinstance(payload, list)
+    if payload:
+        first_item = payload[0]
+        assert first_item["is_outlier"] is True
+        assert "pr_number" in first_item
+        assert "outlier_features" in first_item
+
+
+@pytest.mark.integration
+@pytest.mark.timeout(1800)
+def test_classify_example_exclude_primary_merged(
+    github_env: dict[str, str],
+    date_windows: DateWindows,
+    fetched_workspace: Path,
+) -> None:
+    result = _run_cli(
+        [
+            "classify",
+            "--repo",
+            REPO_NAME,
+            "--start",
+            date_windows.classify_start,
+            "--end",
+            date_windows.classify_end,
+            "--exclude-primary-merged",
+            "--min-samples",
+            "5",
+        ],
+        env=github_env,
+        cwd=fetched_workspace,
+    )
+
+    assert result.result.stdout.strip() != ""
+
 
-    assert result.returncode == 0, (
-        f"Command failed with return code {result.returncode}"
+def _write_fetch_config(workspace: Path, dates: DateWindows) -> Path:
+    config_path = workspace / "config.toml"
+    config_path.write_text(
+        "\n".join(
+            [
+                "[defaults]",
+                f'collate_start = "{dates.fetch_start}"',
+                f'collate_end = "{dates.fetch_end}"',
+                "",
+                "[[repositories]]",
+                f'name = "{REPO_NAME}"',
+                "",
+            ]
+        ),
+        encoding="utf-8",
     )
-    # Check for expected output strings
-    assert "Fetching PRs for expressjs/express" in result.stdout
-    assert "Saving" in result.stdout
+    return config_path

From 66651535bae279b5041c08509d150fd3ded6968f Mon Sep 17 00:00:00 2001
From: ghinks <ghinks@yahoo.com>
Date: Wed, 11 Mar 2026 06:44:19 -0400
Subject: [PATCH 2/2] test: reduce min_sample_size from 30 to 10 in integration
 tests

Lower the minimum sample threshold across all integration tests so
they can pass against repositories with fewer than 30 merged PRs in
the collation window.

- tests/test_integration.py: add --min-samples 10 to all classify
  commands that did not already specify it; update the
  exclude-primary-merged test from 5 to 10 for consistency
- tests/test_outlier_detection_integration.py: change all explicit
  min_sample_size=30 calls to min_sample_size=10; update the
  InsufficientDataError match string accordingly

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tests/test_integration.py                   |  8 +++++++-
 tests/test_outlier_detection_integration.py | 12 ++++++------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index 6ac1b74..3fc7fc7 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -203,6 +203,8 @@ def test_classify_example_table_output(
             date_windows.classify_start,
             "--end",
             date_windows.classify_end,
+            "--min-samples",
+            "10",
         ],
         env=github_env,
         cwd=fetched_workspace,
@@ -232,6 +234,8 @@ def test_classify_example_stricter_threshold(
             date_windows.classify_end,
             "--threshold",
             "3.0",
+            "--min-samples",
+            "10",
         ],
         env=github_env,
         cwd=fetched_workspace,
@@ -258,6 +262,8 @@ def test_classify_example_json_output(
             date_windows.classify_end,
             "--format",
             "json",
+            "--min-samples",
+            "10",
         ],
         env=github_env,
         cwd=fetched_workspace,
@@ -290,7 +296,7 @@ def test_classify_example_exclude_primary_merged(
             date_windows.classify_end,
             "--exclude-primary-merged",
             "--min-samples",
-            "5",
+            "10",
         ],
         env=github_env,
         cwd=fetched_workspace,
diff --git a/tests/test_outlier_detection_integration.py b/tests/test_outlier_detection_integration.py
index dc60105..512f9b1 100644
--- a/tests/test_outlier_detection_integration.py
+++ b/tests/test_outlier_detection_integration.py
@@ -87,7 +87,7 @@ def test_outlier_detection_with_extreme_pr(test_session: Session) -> None:
 
     # Detect outliers
     results = detect_outliers_for_repository(
-        test_session, repo_name, min_sample_size=30
+        test_session, repo_name, min_sample_size=10
     )
 
     # Verify outlier detected
@@ -169,8 +169,8 @@ def test_insufficient_data_error_raised(test_session: Session) -> None:
     test_session.commit()
 
     # Should raise InsufficientDataError
-    with pytest.raises(InsufficientDataError, match="at least 30"):
-        detect_outliers_for_repository(test_session, repo_name, min_sample_size=30)
+    with pytest.raises(InsufficientDataError, match="at least 10"):
+        detect_outliers_for_repository(test_session, repo_name, min_sample_size=10)
 
 
 def test_outlier_detection_with_classify_date_range(test_session: Session) -> None:
@@ -251,7 +251,7 @@ def test_outlier_detection_with_classify_date_range(test_session: Session) -> No
     results = detect_outliers_for_repository(
         test_session,
         repo_name,
-        min_sample_size=30,
+        min_sample_size=10,
         classify_start=classify_start,
         classify_end=classify_end,
     )
@@ -342,7 +342,7 @@ def test_classify_window_is_baseline_for_stats(test_session: Session) -> None:
     results_with_window = detect_outliers_for_repository(
         test_session,
         repo_name,
-        min_sample_size=30,
+        min_sample_size=10,
         classify_start=classify_start,
         classify_end=classify_end,
     )
@@ -359,7 +359,7 @@ def test_classify_window_is_baseline_for_stats(test_session: Session) -> None:
     results_no_window = detect_outliers_for_repository(
         test_session,
         repo_name,
-        min_sample_size=30,
+        min_sample_size=10,
     )
     pr_no_window = next(r for r in results_no_window if r.pr_number == 100)