From 4ab333257bd249bab75a1bbdd0539b27ae5e4f45 Mon Sep 17 00:00:00 2001
From: yeyitech <yeyitech@gmail.com>
Date: Wed, 17 Jun 2026 15:38:05 +0800
Subject: [PATCH 1/2] fix(server): refresh api-keys cache on miss + TTL so
 multi-instance deploys see new accounts (#2351)

In a multi-instance deployment behind a load balancer, accounts and
users created on Instance A via /api/v1/admin/accounts wrote to shared
AGFS but never propagated to Instance B's in-memory api-keys cache,
which loaded once at startup. Subsequent requests routed to Instance B
returned 401 even though the new account was persisted.

The cache now refreshes on cache-miss before declaring an unknown key
unauthorized, and entries expire after a 30-second TTL so changes
written elsewhere are picked up on the next request after the TTL.
Local writes (admin endpoints) invalidate immediately so the
originating instance always sees its own writes without waiting.

Concurrent misses for the same key dedupe through an asyncio.Lock so a
thundering-herd of 401s after a fresh account creation only triggers
one reload.

No new dependencies (no Redis / no pub-sub). Storage format unchanged.
TTL is a module constant; no new config field.

Closes #2351

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 openviking/server/api_keys/legacy.py          | 149 +++++++++-
 openviking/server/api_keys/new.py             |  39 +++
 openviking/server/auth.py                     |   2 +-
 .../test_api_keys_cache_invalidation.py       | 262 ++++++++++++++++++
 4 files changed, 450 insertions(+), 2 deletions(-)
 create mode 100644 tests/server/test_api_keys_cache_invalidation.py

diff --git a/openviking/server/api_keys/legacy.py b/openviking/server/api_keys/legacy.py
index 9b4694f3da..a2e4f69253 100644
--- a/openviking/server/api_keys/legacy.py
+++ b/openviking/server/api_keys/legacy.py
@@ -2,11 +2,13 @@
 # SPDX-License-Identifier: AGPL-3.0
 """Legacy API Key management (original implementation)."""
 
+import asyncio
 import fnmatch
 import hashlib
 import hmac
 import json
 import secrets
+import time
 from datetime import datetime, timezone
 from typing import Dict, Optional
 
@@ -44,6 +46,16 @@
 LEGACY_ARGON2_PARALLELISM = ARGON2_PARALLELISM
 LEGACY_ARGON2_HASH_LENGTH = ARGON2_HASH_LENGTH
 
+# In-memory cache TTL. Multi-instance deploys share AGFS state but each instance
+# keeps its own dict; without a TTL an account created on instance A would never
+# appear on instance B. Cache-miss reload (see resolve_with_refresh) handles the
+# urgent case; this constant bounds staleness for already-cached entries.
+ACCOUNTS_CACHE_TTL_SECONDS = 30.0
+
+# Bound how often a *known-bad* key can trigger a reload. Without this a stream
+# of bogus keys from a misconfigured client would reload AGFS on every request.
+UNKNOWN_KEY_RELOAD_COOLDOWN_SECONDS = ACCOUNTS_CACHE_TTL_SECONDS
+
 
 class LegacyAPIKeyManager:
     """Manages API keys for multi-tenant authentication (legacy implementation)."""
@@ -69,6 +81,18 @@ def __init__(
         self._accounts: Dict[str, AccountInfo] = {}
         # Prefix index: key_prefix -> list[UserKeyEntry]
         self._prefix_index: Dict[str, list[UserKeyEntry]] = {}
+        # Monotonic clock at last successful AGFS load. ``None`` means "never
+        # loaded" — resolve_with_refresh treats this as expired so the first
+        # cache-miss for an unknown key always triggers a reload.
+        self._loaded_at: Optional[float] = None
+        # Lazily created in resolve_with_refresh (constructor may run outside
+        # an event loop, e.g. test fixtures). Dedupes concurrent reloads so a
+        # burst of misses for a freshly-created key only hits AGFS once.
+        self._reload_lock: Optional[asyncio.Lock] = None
+        # Negative cache: monotonic timestamp of the most recent forced reload
+        # triggered by *this* unknown key. Bounds the reload rate when a client
+        # repeatedly presents a key that genuinely doesn't exist.
+        self._unknown_key_reload_at: Dict[str, float] = {}
 
     def _discard_account_state(self, account_id: str) -> None:
         """Remove an account and its key index entries from in-memory state."""
@@ -105,7 +129,16 @@ async def _rollback_create_account(self, account_id: str) -> None:
             logger.exception("Failed to persist rollback for account %s", account_id)
 
     async def load(self) -> None:
-        """Load accounts and user keys from VikingFS into memory."""
+        """Load accounts and user keys from VikingFS into memory.
+
+        Safe to call repeatedly — clears existing in-memory state first so a
+        reload picks up writes made by peer instances against shared AGFS.
+        """
+        load_start = time.monotonic()
+        # Reset in-memory state so peer-instance deletions are also reflected.
+        self._accounts.clear()
+        self._prefix_index.clear()
+
         accounts_data = await self._read_json(ACCOUNTS_PATH)
         if accounts_data is None:
             # First run: create default account
@@ -172,6 +205,16 @@ async def load(self) -> None:
             len(self._accounts),
             sum(len(info.users) for info in self._accounts.values()),
         )
+        elapsed = time.monotonic() - load_start
+        if elapsed > 1.0:
+            logger.warning(
+                "LegacyAPIKeyManager.load() took %.2fs (>1s) — consider scoping"
+                " reloads if storage layer adds per-key reads",
+                elapsed,
+            )
+        self._loaded_at = time.monotonic()
+        # A successful reload makes any prior negative-cache entries obsolete.
+        self._unknown_key_reload_at.clear()
 
     def resolve(self, api_key: str) -> ResolvedIdentity:
         """Resolve an API key to identity. Sequential matching: root key first, then user key index."""
@@ -205,6 +248,104 @@ def resolve(self, api_key: str) -> ResolvedIdentity:
 
         raise UnauthenticatedError("Invalid API Key")
 
+    def _cache_is_stale(self) -> bool:
+        """Whether the in-memory cache is older than the TTL."""
+        if self._loaded_at is None:
+            return True
+        return (time.monotonic() - self._loaded_at) > ACCOUNTS_CACHE_TTL_SECONDS
+
+    def _should_skip_unknown_key_reload(self, api_key: str) -> bool:
+        """Whether this unknown key has already triggered a recent reload."""
+        last = self._unknown_key_reload_at.get(api_key)
+        if last is None:
+            return False
+        return (time.monotonic() - last) < UNKNOWN_KEY_RELOAD_COOLDOWN_SECONDS
+
+    def invalidate_cache(self) -> None:
+        """Clear the unknown-key cooldown after a local write.
+
+        Local writes mutate ``self._accounts`` synchronously so the new state
+        is already visible to ``resolve``. The only stale piece is the
+        negative cache: a key looked up *before* it was minted would have
+        been recorded as unknown and would otherwise sit in cooldown until
+        the TTL elapsed. Clearing the cooldown lets the just-minted key
+        resolve on the very next request.
+        """
+        self._unknown_key_reload_at.clear()
+
+    async def _reload_locked(self, force: bool = False) -> None:
+        """Re-read AGFS state under the dedup lock.
+
+        ``force=True`` semantics: at least one reload after the call site
+        observed a stale cache. Concurrent forced reloads dedupe — once any
+        caller's reload completes inside the lock, later callers see a fresh
+        ``_loaded_at`` and skip.
+        """
+        if self._reload_lock is None:
+            self._reload_lock = asyncio.Lock()
+        # Snapshot the load timestamp *before* acquiring the lock so we can
+        # tell whether a peer caller already refreshed by the time we get in.
+        observed = self._loaded_at
+        async with self._reload_lock:
+            if force:
+                if self._loaded_at is not None and self._loaded_at != observed:
+                    # Another caller refreshed while we were queued. Done.
+                    return
+            else:
+                if not self._cache_is_stale():
+                    return
+            logger.info("api-key cache reload from AGFS (force=%s)", force)
+            await self.load()
+
+    async def resolve_with_refresh(self, api_key: str) -> ResolvedIdentity:
+        """Like ``resolve`` but reloads from AGFS on cache miss / TTL expiry.
+
+        Two-pronged freshness for multi-instance deploys:
+          1. Cache-miss reload — an unknown ``api_key`` triggers one forced
+             reload before declaring 401, so accounts created on a peer
+             instance become visible on the next request.
+          2. TTL refresh — entries older than ``ACCOUNTS_CACHE_TTL_SECONDS``
+             are refreshed before the lookup, bounding staleness for entries
+             we already think we know.
+
+        Concurrent callers dedupe through ``_reload_lock`` so a thundering
+        herd of 401s after a fresh account creation only hits AGFS once.
+        """
+        if not api_key:
+            raise UnauthenticatedError("Missing API Key")
+
+        # Periodic refresh — picks up rotations / removals on peer instances.
+        if self._cache_is_stale():
+            await self._reload_locked(force=False)
+
+        try:
+            return self.resolve(api_key)
+        except UnauthenticatedError:
+            pass
+
+        # Cache miss. Avoid hammering AGFS for keys we just confirmed missing
+        # *and for which a reload has already been performed*. If a peer
+        # caller is mid-reload, our _reload_locked call below will piggy-back
+        # on that reload rather than starting a new one.
+        if self._should_skip_unknown_key_reload(api_key):
+            raise UnauthenticatedError("Invalid API Key")
+
+        observed_loaded_at = self._loaded_at
+        await self._reload_locked(force=True)
+        # Only stamp the negative cache *after* the reload resolved the
+        # question (key truly absent). Stamping before would make peer
+        # callers in the same burst short-circuit their own reload attempt
+        # without ever seeing the post-reload state.
+        try:
+            return self.resolve(api_key)
+        except UnauthenticatedError:
+            # Only count this as a confirmed miss if our caller (or a peer)
+            # actually reloaded — otherwise a concurrent caller's stamp could
+            # cause us to mark a key as missing without re-checking AGFS.
+            if self._loaded_at != observed_loaded_at:
+                self._unknown_key_reload_at[api_key] = time.monotonic()
+            raise
+
     async def create_account(
         self,
         account_id: str,
@@ -269,6 +410,7 @@ async def create_account(
         except Exception:
             await self._rollback_create_account(account_id)
             raise
+        self.invalidate_cache()
         return key
 
     async def delete_account(self, account_id: str) -> None:
@@ -279,6 +421,7 @@ async def delete_account(self, account_id: str) -> None:
         self._discard_account_state(account_id)
 
         await self._save_accounts_json()
+        self.invalidate_cache()
 
     async def register_user(self, account_id: str, user_id: str, role: str = "user") -> str:
         """Register a new user in an account. Returns the user's API key (legacy format)."""
@@ -328,6 +471,7 @@ async def register_user(self, account_id: str, user_id: str, role: str = "user")
             self._prefix_index[key_prefix].append(entry)
 
         await self._save_users_json(account_id)
+        self.invalidate_cache()
         return key
 
     async def remove_user(self, account_id: str, user_id: str) -> None:
@@ -359,6 +503,7 @@ async def remove_user(self, account_id: str, user_id: str) -> None:
                     del self._prefix_index[key_prefix]
 
         await self._save_users_json(account_id)
+        self.invalidate_cache()
 
     async def regenerate_key(self, account_id: str, user_id: str) -> str:
         """Regenerate a user's API key. Old key is immediately invalidated."""
@@ -422,6 +567,7 @@ async def regenerate_key(self, account_id: str, user_id: str) -> str:
             self._prefix_index[new_key_prefix].append(entry)
 
         await self._save_users_json(account_id)
+        self.invalidate_cache()
         return new_key
 
     async def set_role(self, account_id: str, user_id: str, role: str) -> None:
@@ -450,6 +596,7 @@ async def set_role(self, account_id: str, user_id: str, role: str) -> None:
                         break
 
         await self._save_users_json(account_id)
+        self.invalidate_cache()
 
     def get_accounts(self) -> list:
         """List all accounts."""
diff --git a/openviking/server/api_keys/new.py b/openviking/server/api_keys/new.py
index 6f97d1dfa8..33e2592761 100644
--- a/openviking/server/api_keys/new.py
+++ b/openviking/server/api_keys/new.py
@@ -157,6 +157,42 @@ def resolve(self, api_key: str) -> ResolvedIdentity:
         # Fall back to legacy resolver for legacy keys
         return self._legacy.resolve(api_key)
 
+    async def resolve_with_refresh(self, api_key: str) -> ResolvedIdentity:
+        """Resolve an API key, reloading from AGFS on cache miss / TTL expiry.
+
+        See ``LegacyAPIKeyManager.resolve_with_refresh`` for the policy. The
+        new-format fast path goes through the same ``resolve`` body, so
+        cache-miss reload behaves identically for new and legacy keys.
+        """
+        import time as _time
+
+        if not api_key:
+            raise UnauthenticatedError("Missing API Key")
+
+        if self._legacy._cache_is_stale():
+            await self._legacy._reload_locked(force=False)
+
+        try:
+            return self.resolve(api_key)
+        except UnauthenticatedError:
+            pass
+
+        if self._legacy._should_skip_unknown_key_reload(api_key):
+            raise UnauthenticatedError("Invalid API Key")
+
+        observed_loaded_at = self._legacy._loaded_at
+        await self._legacy._reload_locked(force=True)
+        try:
+            return self.resolve(api_key)
+        except UnauthenticatedError:
+            if self._legacy._loaded_at != observed_loaded_at:
+                self._legacy._unknown_key_reload_at[api_key] = _time.monotonic()
+            raise
+
+    def invalidate_cache(self) -> None:
+        """Proxy to ``LegacyAPIKeyManager.invalidate_cache``."""
+        self._legacy.invalidate_cache()
+
     async def create_account(
         self,
         account_id: str,
@@ -233,6 +269,7 @@ async def create_account(
         except Exception:
             await self._legacy._rollback_create_account(account_id)
             raise
+        self._legacy.invalidate_cache()
         return key
 
     async def delete_account(self, account_id: str) -> None:
@@ -292,6 +329,7 @@ async def register_user(self, account_id: str, user_id: str, role: str = "user")
             self._legacy._prefix_index[key_prefix].append(entry)
 
         await self._legacy._save_users_json(account_id)
+        self._legacy.invalidate_cache()
         return key
 
     async def remove_user(self, account_id: str, user_id: str) -> None:
@@ -366,6 +404,7 @@ async def regenerate_key(self, account_id: str, user_id: str) -> str:
             self._legacy._prefix_index[new_key_prefix].append(entry)
 
         await self._legacy._save_users_json(account_id)
+        self._legacy.invalidate_cache()
         return new_key
 
     async def set_role(self, account_id: str, user_id: str, role: str) -> None:
diff --git a/openviking/server/auth.py b/openviking/server/auth.py
index 86316f0e83..ff21e4bf0e 100644
--- a/openviking/server/auth.py
+++ b/openviking/server/auth.py
@@ -332,7 +332,7 @@ async def resolve_identity(
     if oauth_identity is not None:
         return oauth_identity
 
-    identity = api_key_manager.resolve(api_key)
+    identity = await api_key_manager.resolve_with_refresh(api_key)
     identity.account_id = identity.account_id or "default"
     identity.user_id = identity.user_id or "default"
 
diff --git a/tests/server/test_api_keys_cache_invalidation.py b/tests/server/test_api_keys_cache_invalidation.py
new file mode 100644
index 0000000000..ec06f7589b
--- /dev/null
+++ b/tests/server/test_api_keys_cache_invalidation.py
@@ -0,0 +1,262 @@
+# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
+# SPDX-License-Identifier: AGPL-3.0
+
+"""Tests for the api-keys cache TTL + cache-miss reload behavior (issue #2351).
+
+These tests bypass real AGFS by overriding ``_read_json`` / ``_write_json`` on
+``LegacyAPIKeyManager`` with an in-memory dict, so they isolate the cache
+invalidation logic from the storage backend (which the existing
+``test_api_key_manager.py`` covers via a real OpenVikingService).
+"""
+
+import asyncio
+import json
+from typing import Optional
+from unittest.mock import MagicMock
+
+import pytest
+
+from openviking.server.api_keys import APIKeyManager
+from openviking.server.api_keys import legacy as legacy_module
+from openviking_cli.exceptions import UnauthenticatedError
+
+ROOT_KEY = "test-root-key-abcdef1234567890abcdef1234567890"
+
+
+class _FakeAGFS:
+    """In-memory AGFS replacement; only the methods the manager uses."""
+
+    def __init__(self, store: Optional[dict] = None):
+        self.store: dict = store if store is not None else {}
+        self.read_count = 0
+
+    async def _read_json(self, path: str):
+        self.read_count += 1
+        if path not in self.store:
+            return None
+        return json.loads(self.store[path])
+
+    async def _write_json(self, path: str, data: dict):
+        self.store[path] = json.dumps(data)
+
+    async def _ensure_parent_dirs_async(self, path: str):
+        return None
+
+
+def _make_manager(fake: _FakeAGFS) -> APIKeyManager:
+    """Construct an APIKeyManager whose AGFS calls are redirected to ``fake``."""
+    # Build a manager with a stubbed VikingFS (we override the I/O methods so
+    # the real AsyncAGFSClient is never called).
+    viking_fs = MagicMock()
+    viking_fs.agfs = MagicMock()
+    mgr = APIKeyManager(root_key=ROOT_KEY, viking_fs=viking_fs)
+    # Patch the legacy I/O. The manager exposes _legacy via a property.
+    mgr._legacy._read_json = fake._read_json  # type: ignore[assignment]
+    mgr._legacy._write_json = fake._write_json  # type: ignore[assignment]
+    mgr._legacy._ensure_parent_dirs_async = fake._ensure_parent_dirs_async  # type: ignore[assignment]
+    return mgr
+
+
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_unknown_key_triggers_one_reload():
+    """A cache-miss on a key that exists in AGFS should trigger one reload and succeed."""
+    fake = _FakeAGFS()
+
+    # Instance A: create an account+user. This populates AGFS.
+    mgr_a = _make_manager(fake)
+    await mgr_a.load()
+    user_key = await mgr_a.create_account("acme", "alice")
+
+    # Instance B: a *fresh* manager loads later (not yet aware of acme/alice).
+    # Simulate "loaded earlier than the create" by loading before the data was
+    # written — already done above for mgr_a; for mgr_b we just need to
+    # discard whatever AGFS state is there at load time.
+    mgr_b = _make_manager(fake)
+    # Snapshot the store, load with empty store, then restore.
+    saved = dict(fake.store)
+    fake.store.clear()
+    await mgr_b.load()
+    fake.store.update(saved)
+
+    # Pre-condition: instance B doesn't know the key yet.
+    with pytest.raises(UnauthenticatedError):
+        mgr_b.resolve(user_key)
+
+    reads_before = fake.read_count
+    # The async path with refresh should reload + succeed.
+    identity = await mgr_b.resolve_with_refresh(user_key)
+    assert identity.account_id == "acme"
+    assert identity.user_id == "alice"
+    # And the reload actually hit AGFS.
+    assert fake.read_count > reads_before
+
+
+@pytest.mark.asyncio
+async def test_ttl_expires_entry(monkeypatch):
+    """When TTL elapses, the next resolve_with_refresh reloads from AGFS."""
+    fake = _FakeAGFS()
+    mgr = _make_manager(fake)
+    await mgr.load()
+    user_key = await mgr.create_account("acme", "alice")
+
+    # Sanity: a known key resolves without further reload.
+    reads_before = fake.read_count
+    await mgr.resolve_with_refresh(user_key)
+    # No miss, no expiry → no reload.
+    assert fake.read_count == reads_before
+
+    # Advance the monotonic clock past the TTL.
+    real_monotonic = legacy_module.time.monotonic
+    offset = legacy_module.ACCOUNTS_CACHE_TTL_SECONDS + 5.0
+    monkeypatch.setattr(
+        legacy_module.time, "monotonic", lambda: real_monotonic() + offset
+    )
+
+    reads_before = fake.read_count
+    await mgr.resolve_with_refresh(user_key)
+    # TTL elapsed → reload happened (AGFS reads bumped).
+    assert fake.read_count > reads_before
+
+
+@pytest.mark.asyncio
+async def test_local_write_invalidates_immediately():
+    """A locally-created account is resolvable on the same instance with no TTL wait.
+
+    This also exercises the negative-cache invalidation: if a key was looked
+    up *before* the account existed, the failed lookup shouldn't poison the
+    next lookup post-creation.
+    """
+    fake = _FakeAGFS()
+    mgr = _make_manager(fake)
+    await mgr.load()
+
+    # We want to construct a key that will be created next, but we don't know
+    # what create_account will mint. So instead: try a bogus key first, then
+    # create, then try the real key. The bogus key seeds the negative cache;
+    # invalidate_cache should still let the real key resolve immediately.
+    fake_attempt = "deadbeef" * 8  # 64 hex chars, won't collide with real key
+    with pytest.raises(UnauthenticatedError):
+        await mgr.resolve_with_refresh(fake_attempt)
+
+    # Now actually create.
+    user_key = await mgr.create_account("acme", "alice")
+
+    # Same instance, same process: should resolve without going through TTL.
+    identity = await mgr.resolve_with_refresh(user_key)
+    assert identity.account_id == "acme"
+    assert identity.user_id == "alice"
+
+
+@pytest.mark.asyncio
+async def test_concurrent_misses_dedupe_reload():
+    """10 concurrent misses for the same unknown key should trigger only one reload."""
+    fake = _FakeAGFS()
+
+    # Set up AGFS to contain an account that mgr_b doesn't know about yet.
+    mgr_a = _make_manager(fake)
+    await mgr_a.load()
+    user_key = await mgr_a.create_account("acme", "alice")
+
+    mgr_b = _make_manager(fake)
+    saved = dict(fake.store)
+    fake.store.clear()
+    await mgr_b.load()
+    fake.store.update(saved)
+
+    # Spy on the storage layer.
+    real_load = mgr_b._legacy.load
+    call_count = {"n": 0}
+
+    async def counting_load():
+        call_count["n"] += 1
+        # Tiny await so concurrent callers can pile up at the lock.
+        await asyncio.sleep(0.01)
+        await real_load()
+
+    mgr_b._legacy.load = counting_load  # type: ignore[assignment]
+
+    results = await asyncio.gather(
+        *[mgr_b.resolve_with_refresh(user_key) for _ in range(10)],
+        return_exceptions=True,
+    )
+
+    # All ten succeed (they may resolve before or after the reload, but must
+    # not raise).
+    for r in results:
+        assert not isinstance(r, BaseException), r
+        assert r.account_id == "acme"
+        assert r.user_id == "alice"
+
+    # Exactly one forced reload despite 10 concurrent misses.
+    assert call_count["n"] == 1, f"expected 1 reload, got {call_count['n']}"
+
+
+@pytest.mark.asyncio
+async def test_known_invalid_key_does_not_reload_every_time():
+    """A key that *isn't* in AGFS shouldn't reload AGFS on every request."""
+    fake = _FakeAGFS()
+    mgr = _make_manager(fake)
+    await mgr.load()
+    # Don't create the key in AGFS — it stays unknown.
+    bad_key = "a" * 64
+
+    real_load = mgr._legacy.load
+    reload_count = {"n": 0}
+
+    async def counting_load():
+        reload_count["n"] += 1
+        await real_load()
+
+    mgr._legacy.load = counting_load  # type: ignore[assignment]
+
+    # Hammer with the same bad key 20 times.
+    for _ in range(20):
+        with pytest.raises(UnauthenticatedError):
+            await mgr.resolve_with_refresh(bad_key)
+
+    # First miss reloads; subsequent ones hit the negative cache and skip.
+    # Allow at most a small constant — anything not bounded is a regression.
+    assert reload_count["n"] <= 2, (
+        f"unbounded reloads on known-bad key: {reload_count['n']}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Sanity checks for the underlying primitives.
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_load_is_idempotent():
+    """Calling load() twice must not duplicate the prefix index."""
+    fake = _FakeAGFS()
+    mgr = _make_manager(fake)
+    await mgr.load()
+    user_key = await mgr.create_account("acme", "alice")
+
+    prefix_size_before = sum(len(v) for v in mgr._legacy._prefix_index.values())
+    await mgr._legacy.load()
+    prefix_size_after = sum(len(v) for v in mgr._legacy._prefix_index.values())
+
+    assert prefix_size_before == prefix_size_after
+    # And the key still resolves.
+    assert mgr.resolve(user_key).account_id == "acme"
+
+
+@pytest.mark.asyncio
+async def test_invalidate_cache_clears_negative_cache():
+    """invalidate_cache clears _unknown_key_reload_at so a freshly-created key resolves."""
+    fake = _FakeAGFS()
+    mgr = _make_manager(fake)
+    await mgr.load()
+
+    bad_key = "z" * 64
+    with pytest.raises(UnauthenticatedError):
+        await mgr.resolve_with_refresh(bad_key)
+    assert bad_key in mgr._legacy._unknown_key_reload_at
+
+    mgr._legacy.invalidate_cache()
+    assert mgr._legacy._unknown_key_reload_at == {}

From a661f2b324e822c60db9b638859423ab68ef6e63 Mon Sep 17 00:00:00 2001
From: yeyitech <yeyitech@gmail.com>
Date: Wed, 17 Jun 2026 18:03:16 +0800
Subject: [PATCH 2/2] style: ruff format

---
 .../parse/parsers/code/ast/code_tools.py      |  4 +--
 .../parse/parsers/code/ast/extractor.py       |  4 +--
 openviking/parse/parsers/markdown.py          | 36 +++++++------------
 tests/parse/test_code_tools.py                |  4 +--
 .../test_api_keys_cache_invalidation.py       |  8 ++---
 5 files changed, 19 insertions(+), 37 deletions(-)

diff --git a/openviking/parse/parsers/code/ast/code_tools.py b/openviking/parse/parsers/code/ast/code_tools.py
index 4873b86239..97004610a2 100644
--- a/openviking/parse/parsers/code/ast/code_tools.py
+++ b/openviking/parse/parsers/code/ast/code_tools.py
@@ -127,9 +127,7 @@ def search_symbols(query: str, files: List[Tuple[str, str]]) -> str:
     return "\n".join(out)
 
 
-def _resolve_symbol(
-    skeleton: CodeSkeleton, symbol: str
-) -> Optional[Tuple[str, int, int]]:
+def _resolve_symbol(skeleton: CodeSkeleton, symbol: str) -> Optional[Tuple[str, int, int]]:
     """Find a symbol by 'foo' (bare) or 'Foo.bar' (qualified). Case sensitive.
 
     Search priority for bare names (no dot):
diff --git a/openviking/parse/parsers/code/ast/extractor.py b/openviking/parse/parsers/code/ast/extractor.py
index fee68b9928..bd4b814830 100644
--- a/openviking/parse/parsers/code/ast/extractor.py
+++ b/openviking/parse/parsers/code/ast/extractor.py
@@ -107,9 +107,7 @@ def extract(self, file_name: str, content: str) -> Optional[CodeSkeleton]:
         try:
             return extractor.extract(file_name, content)
         except Exception as e:
-            logger.warning(
-                "AST extraction failed for '%s' (language: %s): %s", file_name, lang, e
-            )
+            logger.warning("AST extraction failed for '%s' (language: %s): %s", file_name, lang, e)
             return None
 
     def extract_skeleton(
diff --git a/openviking/parse/parsers/markdown.py b/openviking/parse/parsers/markdown.py
index c53d6cfad1..36532482fb 100644
--- a/openviking/parse/parsers/markdown.py
+++ b/openviking/parse/parsers/markdown.py
@@ -340,9 +340,7 @@ async def _compute_layout(
             content, frontmatter = self._extract_frontmatter(content)
             if frontmatter:
                 meta["frontmatter"] = frontmatter
-                logger.debug(
-                    f"[MarkdownParser] Extracted frontmatter: {list(frontmatter.keys())}"
-                )
+                logger.debug(f"[MarkdownParser] Extracted frontmatter: {list(frontmatter.keys())}")
 
         explicit_name = kwargs.get("resource_name")
         if not explicit_name and kwargs.get("source_name"):
@@ -593,7 +591,6 @@ async def _ingest_local_images(
             seen_paths = set()
 
             for path_str in image_refs:
-
                 # Skip remote URIs
                 if self._is_remote_uri(path_str):
                     continue
@@ -629,7 +626,9 @@ async def _ingest_local_images(
                     image_bytes = await asyncio.to_thread(resolved_path.read_bytes)
 
                     # Validate pixel size and file size; skip non-compliant images
-                    if not await asyncio.to_thread(self._is_valid_image, image_bytes, resolved_path):
+                    if not await asyncio.to_thread(
+                        self._is_valid_image, image_bytes, resolved_path
+                    ):
                         continue
 
                     # Get filename and deduplicate
@@ -649,7 +648,9 @@ async def _ingest_local_images(
                     logger.warning(f"[MarkdownParser] Failed to ingest image {resolved_path}: {e}")
 
             if file_mappings:
-                rel_md_path = md_uri[len(root_prefix) + 1 :] if md_uri.startswith(root_prefix) else md_uri
+                rel_md_path = (
+                    md_uri[len(root_prefix) + 1 :] if md_uri.startswith(root_prefix) else md_uri
+                )
                 mappings[rel_md_path] = file_mappings
 
         # Write a single mapping file at the root directory for rewrite_image_uris
@@ -688,9 +689,7 @@ def _resolve_image_path(
 
             # Reject absolute paths: they can point anywhere on the host
             if path.is_absolute():
-                logger.warning(
-                    f"[MarkdownParser] Rejected absolute image path: {path_str}"
-                )
+                logger.warning(f"[MarkdownParser] Rejected absolute image path: {path_str}")
                 return None
 
             # Build the list of allowed roots to confine resolution to.
@@ -760,9 +759,7 @@ def _is_valid_image(self, image_bytes: bytes, source_path: Path) -> bool:
         """
         # File size check (local file path limit: 10 MB)
         if len(image_bytes) > self.IMAGE_MAX_FILE_BYTES:
-            logger.warning(
-                f"[MarkdownParser] Image exceeds 10MB, skipping: {source_path}"
-            )
+            logger.warning(f"[MarkdownParser] Image exceeds 10MB, skipping: {source_path}")
             return False
 
         # Pixel size check
@@ -932,20 +929,15 @@ def _to_rel(new_disk: str, keep_suffix: bool) -> str:
                     rel
                     for rel, text in layout.items()
                     if any(
-                        _gh_slug(h) == anchor
-                        for h in re.findall(r"^#{1,6}\s+(.+)$", text, re.M)
+                        _gh_slug(h) == anchor for h in re.findall(r"^#{1,6}\s+(.+)$", text, re.M)
                     )
                 ]
                 if len(hits) == 1:
-                    return _to_rel(
-                        os.path.join(target_parent, hits[0]), keep_suffix=True
-                    )
+                    return _to_rel(os.path.join(target_parent, hits[0]), keep_suffix=True)
             # B) Single-file document (a bare file, or a directory holding exactly one
             #    file) → the anchor/query lives in that one file; keep the suffix.
             if len(layout) == 1:
-                return _to_rel(
-                    os.path.join(target_parent, next(iter(layout))), keep_suffix=True
-                )
+                return _to_rel(os.path.join(target_parent, next(iter(layout))), keep_suffix=True)
 
         # C) Single bare file with no suffix to place (e.g. a future small .md kept as
         #    a file) → point at the file itself (empty suffix ⇒ no trailing slash).
@@ -971,9 +963,7 @@ async def _ingest_will_handle_image(self, link: str) -> bool:
             if resolved is not None:
                 try:
                     image_bytes = await asyncio.to_thread(resolved.read_bytes)
-                    handled = await asyncio.to_thread(
-                        self._is_valid_image, image_bytes, resolved
-                    )
+                    handled = await asyncio.to_thread(self._is_valid_image, image_bytes, resolved)
                 except Exception:
                     handled = False
             cache[link] = handled
diff --git a/tests/parse/test_code_tools.py b/tests/parse/test_code_tools.py
index 55a2333b26..172f08dfde 100644
--- a/tests/parse/test_code_tools.py
+++ b/tests/parse/test_code_tools.py
@@ -281,14 +281,14 @@ def test_outline_unsupported_language(self):
 # ---------------------------------------------------------------------------
 
 
-SECOND_FILE = '''def greet():
+SECOND_FILE = """def greet():
     pass
 
 
 class Other:
     def helper(self):
         pass
-'''
+"""
 
 
 class TestSearchSymbols:
diff --git a/tests/server/test_api_keys_cache_invalidation.py b/tests/server/test_api_keys_cache_invalidation.py
index ec06f7589b..6d4b2d9bed 100644
--- a/tests/server/test_api_keys_cache_invalidation.py
+++ b/tests/server/test_api_keys_cache_invalidation.py
@@ -111,9 +111,7 @@ async def test_ttl_expires_entry(monkeypatch):
     # Advance the monotonic clock past the TTL.
     real_monotonic = legacy_module.time.monotonic
     offset = legacy_module.ACCOUNTS_CACHE_TTL_SECONDS + 5.0
-    monkeypatch.setattr(
-        legacy_module.time, "monotonic", lambda: real_monotonic() + offset
-    )
+    monkeypatch.setattr(legacy_module.time, "monotonic", lambda: real_monotonic() + offset)
 
     reads_before = fake.read_count
     await mgr.resolve_with_refresh(user_key)
@@ -219,9 +217,7 @@ async def counting_load():
 
     # First miss reloads; subsequent ones hit the negative cache and skip.
     # Allow at most a small constant — anything not bounded is a regression.
-    assert reload_count["n"] <= 2, (
-        f"unbounded reloads on known-bad key: {reload_count['n']}"
-    )
+    assert reload_count["n"] <= 2, f"unbounded reloads on known-bad key: {reload_count['n']}"
 
 
 # ---------------------------------------------------------------------------