diff --git a/CLAUDE.md b/CLAUDE.md index 879573a5..b852dc40 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -227,9 +227,10 @@ ssh -i infrastructure/nat-instance-ssh-private-key.pem ec2-user@54.176.165.89 - NO TYPE: IGNORE: Do not use # type: ignore comments to suppress type errors. This silences the type checker without fixing the actual type mismatch. Fix the underlying type issues instead. - NO CAST: Do not use typing.cast() to suppress type errors. Cast doesn't validate or guarantee the type is correct - it just tells the type checker to trust you without verification. Fix the underlying type issues instead. **EXCEPTIONS**: (1) Use cast when external libraries (e.g., Supabase) return `Any` and you need proper type inference. Example: `return cast(str, result.data["token"])` when `result.data` is typed as `Any` by the library. (2) Use cast in test files when creating test fixtures that need to satisfy TypedDict requirements. - NO SUPPRESSION: Do not use # pyright: reportArgumentType=false or any other pyright suppression comments. Fix the underlying type issues instead. -- NO ANY: Do not use Any type. Fix the specific type issues instead. When you have to use `as any` in TypeScript/JavaScript, ALWAYS add a comment above explaining why it's needed. Example: +- NO ANY: Do not use Any type. Fix the specific type issues instead. `response.json()` returns `Any` - always assign it to a fully typed variable (e.g., `commits: list[dict[str, dict[str, str]]] = response.json()`). `dict` without value types makes `.get()` return `Any` - always specify value types so downstream `.get()` calls return concrete types, not `Any`. Similarly, chaining 2+ `.get()` calls propagates `Any` - break the chain into separate variables. +- NO `.get()` DEFAULTS: Do not use `.get("key", {})` or `.get("key", "")`. Use `.get("key")` and handle `None` explicitly. Defaults hide missing data and make bugs harder to find. When you have to use `as any` in TypeScript/JavaScript, ALWAYS add a comment above explaining why it's needed. Example: - No annotations: Don't use annotations like var: type = value. Fix the root cause of type issues instead. -- NO UNNECESSARY HELPER FUNCTIONS: Do not extract code into helper functions unless the code is used multiple times. Single-use helper functions add unnecessary indirection and make code harder to follow. Keep logic inline when it's only used once. +- SINGLE RESPONSIBILITY: One file, one function. Don't create 2+ functions in a file. If a helper is used multiple times within the file, keep it as a private function in that file. If it's used only once, inline it. If it's needed by other files, move it to its own file. - ALWAYS USE `@handle_exceptions` DECORATOR: Every function (sync or async) must use the `@handle_exceptions` decorator from `utils.error.handle_exceptions`. This ensures consistent error handling, Sentry reporting, and logging across the codebase. Use `@handle_exceptions(default_return_value=..., raise_on_error=False)` with an appropriate default return value. - NO `__init__.py`: Do not create `__init__.py` files. Python 3.3+ supports implicit namespace packages, so `__init__.py` is not required. This project uses Python 3.13. diff --git a/services/github/users/get_email_from_commits.py b/services/github/users/get_email_from_commits.py new file mode 100644 index 00000000..82ba5f21 --- /dev/null +++ b/services/github/users/get_email_from_commits.py @@ -0,0 +1,28 @@ +import requests + +from config import GITHUB_API_URL, TIMEOUT +from services.github.utils.create_headers import create_headers +from utils.error.handle_exceptions import handle_exceptions + +NOREPLY_SUFFIX = "@users.noreply.github.com" + + +@handle_exceptions(default_return_value=None, raise_on_error=False) +def get_email_from_commits(owner: str, repo: str, username: str, token: str): + """Fallback: get email from recent commits when the public profile has no email. + https://docs.github.com/en/rest/commits/commits#list-commits""" + response = requests.get( + url=f"{GITHUB_API_URL}/repos/{owner}/{repo}/commits", + headers=create_headers(token=token), + params={"author": username, "per_page": 5}, + timeout=TIMEOUT, + ) + response.raise_for_status() + commits: list[dict[str, dict[str, dict[str, str]]]] = response.json() + for commit in commits: + commit_obj = commit.get("commit", {}) + author = commit_obj.get("author", {}) + email = author.get("email", "") + if email and not email.endswith(NOREPLY_SUFFIX): + return email + return None diff --git a/services/github/users/get_user_public_email.py b/services/github/users/get_user_public_email.py index 1f6ab197..c96d0782 100644 --- a/services/github/users/get_user_public_email.py +++ b/services/github/users/get_user_public_email.py @@ -30,9 +30,11 @@ def get_user_public_info(username: str, token: str): ) response.raise_for_status() user_data: dict[str, str | None] = response.json() + email = user_data.get("email") + name: str = user_data.get("name") or "" # Title-case if all lowercase or all uppercase (e.g., "wes nishio" -> "Wes Nishio", "HIROSHI" -> "Hiroshi") # Cross-ref: website/app/api/auth/[...nextauth]/route.ts if name == name.lower() or name == name.upper(): name = name.title() - return UserPublicInfo(email=user_data.get("email"), display_name=name) + return UserPublicInfo(email=email, display_name=name) diff --git a/services/github/users/test_get_email_from_commits.py b/services/github/users/test_get_email_from_commits.py new file mode 100644 index 00000000..60940199 --- /dev/null +++ b/services/github/users/test_get_email_from_commits.py @@ -0,0 +1,55 @@ +from unittest.mock import MagicMock, patch + +from services.github.users.get_email_from_commits import get_email_from_commits + + +@patch("services.github.users.get_email_from_commits.requests.get") +def test_returns_first_non_noreply_email(mock_get: MagicMock): + mock_get.return_value.status_code = 200 + mock_get.return_value.raise_for_status = MagicMock() + mock_get.return_value.json.return_value = [ + {"commit": {"author": {"email": "wes@gitauto.ai"}}}, + ] + result = get_email_from_commits( + owner="gitautoai", repo="gitauto", username="wes", token="tok" + ) + assert result == "wes@gitauto.ai" + + +@patch("services.github.users.get_email_from_commits.requests.get") +def test_skips_noreply_email(mock_get: MagicMock): + mock_get.return_value.status_code = 200 + mock_get.return_value.raise_for_status = MagicMock() + mock_get.return_value.json.return_value = [ + {"commit": {"author": {"email": "123+user@users.noreply.github.com"}}}, + {"commit": {"author": {"email": "real@example.com"}}}, + ] + result = get_email_from_commits(owner="o", repo="r", username="u", token="t") + assert result == "real@example.com" + + +@patch("services.github.users.get_email_from_commits.requests.get") +def test_returns_none_when_all_noreply(mock_get: MagicMock): + mock_get.return_value.status_code = 200 + mock_get.return_value.raise_for_status = MagicMock() + mock_get.return_value.json.return_value = [ + {"commit": {"author": {"email": "123+user@users.noreply.github.com"}}}, + ] + result = get_email_from_commits(owner="o", repo="r", username="u", token="t") + assert result is None + + +@patch("services.github.users.get_email_from_commits.requests.get") +def test_returns_none_when_no_commits(mock_get: MagicMock): + mock_get.return_value.status_code = 200 + mock_get.return_value.raise_for_status = MagicMock() + mock_get.return_value.json.return_value = [] + result = get_email_from_commits(owner="o", repo="r", username="u", token="t") + assert result is None + + +@patch("services.github.users.get_email_from_commits.requests.get") +def test_returns_none_on_api_error(mock_get: MagicMock): + mock_get.side_effect = Exception("API error") + result = get_email_from_commits(owner="o", repo="r", username="u", token="t") + assert result is None diff --git a/services/github/users/test_get_user_public_email.py b/services/github/users/test_get_user_public_email.py index 08ff47e0..88636b43 100644 --- a/services/github/users/test_get_user_public_email.py +++ b/services/github/users/test_get_user_public_email.py @@ -47,7 +47,10 @@ def test_get_user_public_info_successful_request( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.email == "test@example.com" assert result.display_name == "Test User" @@ -56,7 +59,8 @@ def test_get_user_public_info_bot_user_skips_api_call(sample_token): """Test that bot usernames skip the API call (bots have name=null, email=null).""" with patch("services.github.users.get_user_public_email.requests.get") as mock_get: result = get_user_public_info( - username="github-actions[bot]", token=sample_token + username="github-actions[bot]", + token=sample_token, ) assert result == DEFAULT_RETURN mock_get.assert_not_called() @@ -71,7 +75,10 @@ def test_get_user_public_info_calls_correct_api_endpoint(sample_username, sample } mock_get.return_value.raise_for_status.return_value = None - get_user_public_info(username=sample_username, token=sample_token) + get_user_public_info( + username=sample_username, + token=sample_token, + ) expected_url = f"{GITHUB_API_URL}/users/{sample_username}" mock_get.assert_called_once() @@ -94,7 +101,10 @@ def test_get_user_public_info_uses_correct_headers(sample_username, sample_token } mock_get.return_value.raise_for_status.return_value = None - get_user_public_info(username=sample_username, token=sample_token) + get_user_public_info( + username=sample_username, + token=sample_token, + ) mock_create_headers.assert_called_once_with(token=sample_token) mock_get.assert_called_once() @@ -111,7 +121,10 @@ def test_get_user_public_info_uses_correct_timeout(sample_username, sample_token } mock_get.return_value.raise_for_status.return_value = None - get_user_public_info(username=sample_username, token=sample_token) + get_user_public_info( + username=sample_username, + token=sample_token, + ) mock_get.assert_called_once() _, kwargs = mock_get.call_args @@ -126,7 +139,10 @@ def test_get_user_public_info_calls_raise_for_status( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - get_user_public_info(username=sample_username, token=sample_token) + get_user_public_info( + username=sample_username, + token=sample_token, + ) mock_response.raise_for_status.assert_called_once() @@ -147,7 +163,10 @@ def test_get_user_public_info_extracts_email_and_name_from_response( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.email == expected_email assert result.display_name == "Some User" @@ -173,7 +192,10 @@ def test_get_user_public_info_with_different_usernames(): "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=username, token="test-token") + result = get_user_public_info( + username=username, + token="test-token", + ) assert result.email == expected_email assert result.display_name == expected_name @@ -223,7 +245,10 @@ def test_get_user_public_info_http_error_returns_default(sample_username, sample "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result == DEFAULT_RETURN @@ -235,7 +260,10 @@ def test_get_user_public_info_request_exception_returns_default( "services.github.users.get_user_public_email.requests.get", side_effect=RequestException("Network error"), ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result == DEFAULT_RETURN @@ -245,7 +273,10 @@ def test_get_user_public_info_timeout_returns_default(sample_username, sample_to "services.github.users.get_user_public_email.requests.get", side_effect=Timeout("Request timed out"), ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result == DEFAULT_RETURN @@ -261,7 +292,10 @@ def test_get_user_public_info_json_decode_error_returns_default( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result == DEFAULT_RETURN @@ -280,7 +314,10 @@ def test_get_user_public_info_missing_email_key_returns_none_email( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.email is None assert result.display_name == "Test User" @@ -301,7 +338,10 @@ def test_get_user_public_info_null_email_value_returns_none_email( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.email is None assert result.display_name == "Test User" @@ -321,7 +361,10 @@ def test_get_user_public_info_missing_name_key_returns_empty_display_name( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.email == "test@example.com" assert result.display_name == "" @@ -339,7 +382,10 @@ def test_get_user_public_info_title_cases_lowercase_name(sample_username, sample "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.display_name == "Wes Nishio" @@ -356,7 +402,10 @@ def test_get_user_public_info_title_cases_uppercase_name(sample_username, sample "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.display_name == "Hiroshi" @@ -373,7 +422,10 @@ def test_get_user_public_info_preserves_mixed_case_name(sample_username, sample_ "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.display_name == "Wes Nishio" @@ -392,7 +444,10 @@ def test_get_user_public_info_empty_name_returns_empty_display_name( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.display_name == "" @@ -411,5 +466,8 @@ def test_get_user_public_info_null_name_returns_empty_display_name( "services.github.users.get_user_public_email.requests.get", return_value=mock_response, ): - result = get_user_public_info(username=sample_username, token=sample_token) + result = get_user_public_info( + username=sample_username, + token=sample_token, + ) assert result.display_name == "" diff --git a/services/github/utils/deconstruct_github_payload.py b/services/github/utils/deconstruct_github_payload.py index d7e2d90c..3267720a 100644 --- a/services/github/utils/deconstruct_github_payload.py +++ b/services/github/utils/deconstruct_github_payload.py @@ -6,6 +6,7 @@ from services.github.branches.check_branch_exists import check_branch_exists from services.github.types.github_types import BaseArgs, PrLabeledPayload from services.github.token.get_installation_token import get_installation_access_token +from services.github.users.get_email_from_commits import get_email_from_commits from services.github.users.get_user_public_email import get_user_public_info from services.supabase.repositories.get_repository import get_repository from utils.error.handle_exceptions import handle_exceptions @@ -77,6 +78,10 @@ def deconstruct_github_payload( github_urls, other_urls = extract_urls(text=pr_body) sender_info = get_user_public_info(username=sender_name, token=token) sender_email = sender_info.email + if not sender_email: + sender_email = get_email_from_commits( + owner=owner_name, repo=repo_name, username=sender_name, token=token + ) sender_display_name = sender_info.display_name base_args: BaseArgs = { diff --git a/services/webhook/check_suite_handler.py b/services/webhook/check_suite_handler.py index a7dc686c..967873b4 100644 --- a/services/webhook/check_suite_handler.py +++ b/services/webhook/check_suite_handler.py @@ -48,6 +48,7 @@ from services.github.workflow_runs.get_workflow_run_logs import get_workflow_run_logs from services.claude.tools.tools import TOOLS_FOR_PRS from services.slack.slack_notify import slack_notify +from services.github.users.get_email_from_commits import get_email_from_commits from services.github.users.get_user_public_email import get_user_public_info from services.supabase.check_suites.insert_check_suite import insert_check_suite from services.supabase.codecov_tokens.get_codecov_token import get_codecov_token @@ -173,6 +174,10 @@ async def handle_check_suite( sender_id = payload["sender"]["id"] sender_name = payload["sender"]["login"] sender_info = get_user_public_info(username=sender_name, token=token) + if not sender_info.email: + sender_info.email = get_email_from_commits( + owner=owner_name, repo=repo_name, username=sender_name, token=token + ) # Extract PR related variables and return if no PR is associated with this check suite pull_requests = check_suite["pull_requests"] diff --git a/services/webhook/handle_installation.py b/services/webhook/handle_installation.py index a1b5fd1c..c76d1099 100644 --- a/services/webhook/handle_installation.py +++ b/services/webhook/handle_installation.py @@ -1,5 +1,6 @@ from services.github.token.get_installation_token import get_installation_access_token from services.github.types.github_types import InstallationPayload +from services.github.users.get_email_from_commits import get_email_from_commits from services.github.users.get_user_public_email import get_user_public_info from services.supabase.credits.check_grant_exists import check_grant_exists from services.supabase.credits.insert_credit import insert_credit @@ -27,6 +28,14 @@ async def handle_installation_created(payload: InstallationPayload): sender_name = payload["sender"]["login"] token = get_installation_access_token(installation_id=installation_id) user_info = get_user_public_info(username=sender_name, token=token) + if not user_info.email and repositories: + for repo in repositories: + email = get_email_from_commits( + owner=owner_name, repo=repo["name"], username=sender_name, token=token + ) + if email: + user_info.email = email + break if not check_owner_exists(owner_id=owner_id): customer_id = create_stripe_customer( diff --git a/services/webhook/handle_installation_repos_added.py b/services/webhook/handle_installation_repos_added.py index a400b1b7..506f3d3d 100644 --- a/services/webhook/handle_installation_repos_added.py +++ b/services/webhook/handle_installation_repos_added.py @@ -1,6 +1,9 @@ from services.github.token.get_installation_token import get_installation_access_token from services.github.types.github_types import InstallationRepositoriesPayload +from services.github.users.get_email_from_commits import get_email_from_commits +from services.github.users.get_user_public_email import get_user_public_info from services.supabase.installations.is_installation_valid import is_installation_valid +from services.supabase.users.upsert_user import upsert_user from services.webhook.process_repositories import process_repositories from services.webhook.setup_handler import setup_handler from utils.error.handle_exceptions import handle_exceptions @@ -28,6 +31,23 @@ async def handle_installation_repos_added( # Process added repositories repositories = payload["repositories_added"] + + # Capture sender's email (this sender may differ from the original installer) + sender_info = get_user_public_info(username=sender_name, token=token) + if not sender_info.email and repositories: + for repo in repositories: + email = get_email_from_commits( + owner=owner_name, repo=repo["name"], username=sender_name, token=token + ) + if email: + sender_info.email = email + break + upsert_user( + user_id=sender_id, + user_name=sender_name, + email=sender_info.email, + display_name=sender_info.display_name, + ) await process_repositories( owner_id=owner_id, owner_name=owner_name, diff --git a/services/webhook/review_run_handler.py b/services/webhook/review_run_handler.py index 468ae59b..60ef7842 100644 --- a/services/webhook/review_run_handler.py +++ b/services/webhook/review_run_handler.py @@ -32,6 +32,7 @@ from services.github.pulls.get_pull_request_files import get_pull_request_files from services.github.pulls.get_review_thread_comments import get_review_thread_comments from services.github.token.get_installation_token import get_installation_access_token +from services.github.users.get_email_from_commits import get_email_from_commits from services.github.users.get_user_public_email import get_user_public_info from services.github.trees.get_local_file_tree import get_local_file_tree from services.github.types.github_types import ReviewBaseArgs @@ -107,6 +108,10 @@ async def handle_review_run( installation_id: int = payload["installation"]["id"] token = get_installation_access_token(installation_id=installation_id) sender_info = get_user_public_info(username=sender_name, token=token) + if not sender_info.email: + sender_info.email = get_email_from_commits( + owner=owner_name, repo=repo_name, username=sender_name, token=token + ) # Get all comments in the review thread thread_comments = get_review_thread_comments( diff --git a/services/webhook/test_handle_installation.py b/services/webhook/test_handle_installation.py index 05561dbc..ca4b74e1 100644 --- a/services/webhook/test_handle_installation.py +++ b/services/webhook/test_handle_installation.py @@ -63,6 +63,14 @@ def mock_get_user_public_info(): yield mock +@pytest.fixture +def mock_get_email_from_commits(): + """Mock get_email_from_commits function.""" + with patch("services.webhook.handle_installation.get_email_from_commits") as mock: + mock.return_value = None + yield mock + + @pytest.fixture def mock_check_owner_exists(): """Mock check_owner_exists function.""" @@ -126,6 +134,7 @@ def mock_process_repositories(): def all_mocks( mock_get_installation_access_token, mock_get_user_public_info, + mock_get_email_from_commits, mock_check_owner_exists, mock_create_stripe_customer, mock_insert_owner, @@ -139,6 +148,7 @@ def all_mocks( return { "get_installation_access_token": mock_get_installation_access_token, "get_user_public_info": mock_get_user_public_info, + "get_email_from_commits": mock_get_email_from_commits, "check_owner_exists": mock_check_owner_exists, "create_stripe_customer": mock_create_stripe_customer, "insert_owner": mock_insert_owner, diff --git a/services/webhook/test_handle_installation_repos_added.py b/services/webhook/test_handle_installation_repos_added.py index aec7723f..7728595b 100644 --- a/services/webhook/test_handle_installation_repos_added.py +++ b/services/webhook/test_handle_installation_repos_added.py @@ -11,6 +11,7 @@ # Local imports from services.github.types.github_types import InstallationRepositoriesPayload +from services.github.users.get_user_public_email import UserPublicInfo from services.webhook.handle_installation_repos_added import ( handle_installation_repos_added, ) @@ -65,6 +66,35 @@ def mock_get_installation_access_token(): yield mock +@pytest.fixture(autouse=True) +def mock_get_user_public_info(): + """Mock get_user_public_info function.""" + with patch( + "services.webhook.handle_installation_repos_added.get_user_public_info" + ) as mock: + mock.return_value = UserPublicInfo( + email="test@example.com", display_name="Test Sender" + ) + yield mock + + +@pytest.fixture(autouse=True) +def mock_get_email_from_commits(): + """Mock get_email_from_commits function.""" + with patch( + "services.webhook.handle_installation_repos_added.get_email_from_commits" + ) as mock: + mock.return_value = None + yield mock + + +@pytest.fixture(autouse=True) +def mock_upsert_user(): + """Mock upsert_user function.""" + with patch("services.webhook.handle_installation_repos_added.upsert_user") as mock: + yield mock + + @pytest.fixture def mock_process_repositories(): """Mock process_repositories function."""