Skip to content

Commit ff34160

Browse files
committed
fix(backend): improve OAuth token refresh resilience for transient failures
Tokens that expire during a transient provider error (e.g. 500) were permanently excluded from future refresh attempts because the query filtered out already-expired tokens. This left installations stuck in requires_reauth state forever. - Allow refreshing tokens expired up to 24 hours ago - Add hourly retry backoff for expired tokens via updated_at check - Update token updated_at on failure so backoff works correctly - Clear requires_reauth status on successful token recovery
1 parent 46a897a commit ff34160

1 file changed

Lines changed: 69 additions & 9 deletions

File tree

services/backend/src/jobs/refresh-oauth-tokens.ts

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { FastifyBaseLogger } from 'fastify';
22
import { getDb } from '../db';
33
import { mcpOauthTokens, mcpServerInstallations, mcpServers } from '../db/schema';
4-
import { and, eq, lt, gt, isNotNull } from 'drizzle-orm';
4+
import { and, eq, lt, gt, or, isNotNull } from 'drizzle-orm';
55
import { OAuthTokenService } from '../services/OAuthTokenService';
66
import { OAuthDiscoveryService } from '../services/OAuthDiscoveryService';
77
import { decrypt } from '../utils/encryption';
@@ -12,15 +12,17 @@ import { decrypt } from '../utils/encryption';
1212
* This background job runs every 5 minutes and refreshes tokens that:
1313
* - Have a refresh_token (NOT NULL)
1414
* - Have an expires_at timestamp (NOT NULL)
15-
* - Expire within the next 10 minutes
16-
* - Are not already expired
15+
* - Expire within the next 10 minutes, OR
16+
* - Already expired up to 24 hours ago (retry with hourly backoff)
1717
*
1818
* For each expiring token:
1919
* 1. Discovers OAuth endpoints from MCP server
2020
* 2. Decrypts the refresh token
2121
* 3. Calls OAuth token endpoint to refresh
2222
* 4. Encrypts and stores new access token
2323
* 5. Handles refresh token rotation if provider sends new refresh_token
24+
* 6. On successful recovery of expired token, clears requires_reauth status
25+
* 7. On failure, updates token's updated_at to enable hourly retry backoff
2426
*/
2527
export async function refreshExpiringOAuthTokens(logger: FastifyBaseLogger) {
2628
try {
@@ -29,6 +31,10 @@ export async function refreshExpiringOAuthTokens(logger: FastifyBaseLogger) {
2931
// Tokens expiring within next 10 minutes
3032
const expiryThreshold = new Date(Date.now() + 10 * 60 * 1000);
3133
const now = new Date();
34+
// Allow retrying tokens that expired up to 24 hours ago
35+
const expiredFloor = new Date(Date.now() - 24 * 60 * 60 * 1000);
36+
// For already-expired tokens, only retry if not attempted in the last hour
37+
const retryFloor = new Date(Date.now() - 60 * 60 * 1000);
3238

3339
logger.trace(
3440
{
@@ -54,14 +60,21 @@ export async function refreshExpiringOAuthTokens(logger: FastifyBaseLogger) {
5460
.innerJoin(mcpServers, eq(mcpServerInstallations.server_id, mcpServers.id))
5561
.where(
5662
and(
57-
// Must have refresh token
5863
isNotNull(mcpOauthTokens.refresh_token),
59-
// Must have expiry timestamp
6064
isNotNull(mcpOauthTokens.expires_at),
61-
// Expires within threshold
62-
lt(mcpOauthTokens.expires_at, expiryThreshold),
63-
// Not already expired
64-
gt(mcpOauthTokens.expires_at, now)
65+
or(
66+
// Case 1: Token expiring soon (within 10 min) — always refresh
67+
and(
68+
lt(mcpOauthTokens.expires_at, expiryThreshold),
69+
gt(mcpOauthTokens.expires_at, now)
70+
),
71+
// Case 2: Token already expired (up to 24h ago) — retry with hourly backoff
72+
and(
73+
lt(mcpOauthTokens.expires_at, now),
74+
gt(mcpOauthTokens.expires_at, expiredFloor),
75+
lt(mcpOauthTokens.updated_at, retryFloor)
76+
)
77+
)
6578
)
6679
);
6780

@@ -182,6 +195,35 @@ export async function refreshExpiringOAuthTokens(logger: FastifyBaseLogger) {
182195
// Update encrypted tokens in database
183196
await tokenService.updateRefreshedTokens(token.id, newTokens, db);
184197

198+
// If token was already expired and we successfully refreshed,
199+
// clear requires_reauth status so the user can reconnect
200+
if (token.expires_at && token.expires_at < now) {
201+
const { getSchema } = await import('../db');
202+
const { mcpServerInstances } = getSchema();
203+
await db
204+
.update(mcpServerInstances)
205+
.set({
206+
status: 'offline',
207+
status_message: 'OAuth token refreshed successfully. Reconnection needed.',
208+
status_updated_at: new Date(),
209+
})
210+
.where(
211+
and(
212+
eq(mcpServerInstances.installation_id, installation.id),
213+
eq(mcpServerInstances.status, 'requires_reauth')
214+
)
215+
);
216+
217+
logger.info(
218+
{
219+
operation: 'refresh_expiring_oauth_tokens',
220+
installationId: installation.id,
221+
tokenId: token.id,
222+
},
223+
'Cleared requires_reauth status after successful token recovery'
224+
);
225+
}
226+
185227
logger.info(
186228
{
187229
tokenId: token.id,
@@ -192,6 +234,7 @@ export async function refreshExpiringOAuthTokens(logger: FastifyBaseLogger) {
192234
oldExpiresAt: token.expires_at,
193235
newExpiresIn: newTokens.expires_in,
194236
clientId,
237+
wasExpired: token.expires_at ? token.expires_at < now : false,
195238
operation: 'refresh_expiring_oauth_tokens',
196239
},
197240
'Token refreshed successfully'
@@ -211,6 +254,23 @@ export async function refreshExpiringOAuthTokens(logger: FastifyBaseLogger) {
211254
'Failed to refresh token'
212255
);
213256

257+
// Update token's updated_at to track last refresh attempt (enables hourly retry backoff)
258+
try {
259+
await db
260+
.update(mcpOauthTokens)
261+
.set({ updated_at: new Date() })
262+
.where(eq(mcpOauthTokens.id, token.id));
263+
} catch (updateError) {
264+
logger.error(
265+
{
266+
error: updateError instanceof Error ? updateError.message : 'Unknown error',
267+
tokenId: token.id,
268+
operation: 'refresh_expiring_oauth_tokens',
269+
},
270+
'Failed to update token updated_at after refresh failure'
271+
);
272+
}
273+
214274
// Update ALL user instances status to requires_reauth
215275
try {
216276
const { getSchema } = await import('../db');

0 commit comments

Comments
 (0)