From e5891fe39254c9ad8cb25a19d5a33a363766b3f3 Mon Sep 17 00:00:00 2001
From: Daniel Sogl <me@danielsogl.de>
Date: Thu, 19 Feb 2026 14:37:35 +0100
Subject: [PATCH 1/3] fix(client): add in-memory rate limit fallback when
 Valkey is unavailable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

set_global_rate_limit() is decorated with @handle_valkey_error which
silently swallows ValkeyError. If Valkey is down when Blizzard returns
HTTP 403, the rate limit flag is never stored, and subsequent requests
continue hitting Blizzard — potentially causing extended bans.

Add an in-memory timestamp (_rate_limited_until) as a fallback that
works even when Valkey is unavailable. Since BlizzardClient is a
singleton, this effectively rate-limits the entire process. The Valkey
check remains for cross-worker coordination; the in-memory check adds
defense in depth.
---
 app/adapters/blizzard/client.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/app/adapters/blizzard/client.py b/app/adapters/blizzard/client.py
index e0574e38..cebe0305 100644
--- a/app/adapters/blizzard/client.py
+++ b/app/adapters/blizzard/client.py
@@ -28,6 +28,7 @@ class BlizzardClient(metaclass=Singleton):
 
     def __init__(self):
         self.cache_manager = CacheManager()
+        self._rate_limited_until: float = 0
         self.client = httpx.AsyncClient(
             headers={
                 "User-Agent": (
@@ -129,11 +130,19 @@ async def _check_rate_limit(self) -> None:
 
         Returns HTTP 429 with Retry-After header if rate limited.
 
+        Checks both Valkey (shared across workers) and an in-memory timestamp
+        (fallback when Valkey is unavailable).
+
         Note: Nginx also performs this check on API cache miss for better performance,
         but this method remains necessary for:
         - Race conditions (concurrent requests when rate limit is first set)
         - Defense in depth (if nginx check fails or is bypassed)
         """
+        # Check in-memory fallback first (works even when Valkey is down)
+        remaining = self._rate_limited_until - time.monotonic()
+        if remaining > 0:
+            raise self._too_many_requests_response(retry_after=int(remaining) or 1)
+
         if await self.cache_manager.is_being_rate_limited():
             raise self._too_many_requests_response(
                 retry_after=await self.cache_manager.get_global_rate_limit_remaining_time()
@@ -164,7 +173,10 @@ async def _blizzard_forbidden_error(self) -> HTTPException:
         Also prevent further calls to Blizzard for a given amount of time.
         """
 
-        # We have to block future requests to Blizzard, cache the information on Valkey
+        # Block future requests: store in Valkey (shared) and in-memory (fallback)
+        self._rate_limited_until = (
+            time.monotonic() + settings.blizzard_rate_limit_retry_after
+        )
         await self.cache_manager.set_global_rate_limit()
 
         # Track rate limit event

From d6bf8b62cc66fdfc78d37d02f4a7774804adadff Mon Sep 17 00:00:00 2001
From: Daniel Sogl <me@danielsogl.de>
Date: Thu, 19 Feb 2026 14:40:30 +0100
Subject: [PATCH 2/3] fix: sync in-memory rate limit fallback with Valkey TTL

When Valkey reports an active rate limit, sync the in-memory
_rate_limited_until timestamp with Valkey's remaining TTL. This
ensures the in-memory fallback can still protect against Blizzard
requests if Valkey becomes unavailable mid-rate-limit window.

Addresses review feedback from sourcery-ai.
---
 app/adapters/blizzard/client.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/adapters/blizzard/client.py b/app/adapters/blizzard/client.py
index cebe0305..0c9c7f89 100644
--- a/app/adapters/blizzard/client.py
+++ b/app/adapters/blizzard/client.py
@@ -144,8 +144,12 @@ async def _check_rate_limit(self) -> None:
             raise self._too_many_requests_response(retry_after=int(remaining) or 1)
 
         if await self.cache_manager.is_being_rate_limited():
+            remaining_ttl = await self.cache_manager.get_global_rate_limit_remaining_time()
+            # Sync in-memory fallback with Valkey TTL so it can protect
+            # if Valkey becomes unavailable mid-rate-limit window
+            self._rate_limited_until = time.monotonic() + float(remaining_ttl)
             raise self._too_many_requests_response(
-                retry_after=await self.cache_manager.get_global_rate_limit_remaining_time()
+                retry_after=int(remaining_ttl) or 1
             )
 
     def blizzard_response_error_from_response(

From e201df1ede95f5fae4608082615b0e9a56209032 Mon Sep 17 00:00:00 2001
From: Daniel Sogl <me@danielsogl.de>
Date: Sat, 21 Feb 2026 11:48:42 +0100
Subject: [PATCH 3/3] fix(tests): reset in-memory rate limit state between
 tests and use math.ceil

Two issues fixed:
- BlizzardClient singleton's _rate_limited_until persisted across tests,
  causing 429 responses in unrelated tests after any rate limit trigger
- int() truncation turned 4.99s into "4 seconds", mismatching expected
  "5 seconds" in assertions; math.ceil() preserves correct rounding

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 app/adapters/blizzard/client.py | 5 +++--
 tests/conftest.py               | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/app/adapters/blizzard/client.py b/app/adapters/blizzard/client.py
index 0c9c7f89..41c3faa3 100644
--- a/app/adapters/blizzard/client.py
+++ b/app/adapters/blizzard/client.py
@@ -1,5 +1,6 @@
 """Blizzard HTTP client adapter implementing BlizzardClientPort"""
 
+import math
 import time
 
 import httpx
@@ -141,7 +142,7 @@ async def _check_rate_limit(self) -> None:
         # Check in-memory fallback first (works even when Valkey is down)
         remaining = self._rate_limited_until - time.monotonic()
         if remaining > 0:
-            raise self._too_many_requests_response(retry_after=int(remaining) or 1)
+            raise self._too_many_requests_response(retry_after=math.ceil(remaining))
 
         if await self.cache_manager.is_being_rate_limited():
             remaining_ttl = await self.cache_manager.get_global_rate_limit_remaining_time()
@@ -149,7 +150,7 @@ async def _check_rate_limit(self) -> None:
             # if Valkey becomes unavailable mid-rate-limit window
             self._rate_limited_until = time.monotonic() + float(remaining_ttl)
             raise self._too_many_requests_response(
-                retry_after=int(remaining_ttl) or 1
+                retry_after=math.ceil(float(remaining_ttl))
             )
 
     def blizzard_response_error_from_response(
diff --git a/tests/conftest.py b/tests/conftest.py
index 8fd25f18..d88a6ad6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,6 +9,7 @@
 import pytest_asyncio
 from fastapi.testclient import TestClient
 
+from app.adapters.blizzard import BlizzardClient
 from app.adapters.storage import SQLiteStorage
 from app.main import app
 
@@ -45,6 +46,9 @@ async def _patch_before_every_test(
     await valkey_server.flushdb()
     await storage_db.clear_all_data()
 
+    # Reset in-memory rate limit state on the singleton
+    BlizzardClient()._rate_limited_until = 0
+
     with (
         patch("app.helpers.settings.discord_webhook_enabled", False),
         patch("app.helpers.settings.profiler", None),