diff --git a/docs/AI_powered_rescheduling/sprint_4/story_5.md b/docs/AI_powered_rescheduling/sprint_4/story_5.md index 7f0209bf..e17e14ab 100644 --- a/docs/AI_powered_rescheduling/sprint_4/story_5.md +++ b/docs/AI_powered_rescheduling/sprint_4/story_5.md @@ -419,7 +419,11 @@ Le UI devono usare `retryable=true` per chiedere al planner se vuole ritentare ( ### 3) Logging & Audit * Logging strutturato: `event=ai_broker_attempt_failed`, `event=metrics_computation_failed`. -* Campi minimi: `correlation_id`, `error_code`, `stage`, `retry_attempt`. +* Per i retry broker mantenere sempre i campi strutturati: `provider`, `attempt`, `correlation_id`, `error_code`, `backoff_ms`. +* Distinguere chiaramente i due casi operativi: + * `event=ai_broker_retry_backoff` con `wait_reason=normal_retry_backoff` o `wait_reason=rate_limit_backoff` per retry standard. + * `event=ai_broker_rate_limit_fixed_wait` con `wait_reason=rate_limit_window_reset_429` quando si applica la finestra forzata di 60s su 429. +* In incident handling, usare `ai_broker_rate_limit_fixed_wait` per identificare rapidamente blocchi dovuti al reset finestra quota (non confonderli con i retry con backoff normale). * Nessun dato personale nel log (GDPR). *** diff --git a/src/main/java/org/cswteams/ms3/ai/broker/AgentBrokerImpl.java b/src/main/java/org/cswteams/ms3/ai/broker/AgentBrokerImpl.java index f77e9c26..9792eff1 100644 --- a/src/main/java/org/cswteams/ms3/ai/broker/AgentBrokerImpl.java +++ b/src/main/java/org/cswteams/ms3/ai/broker/AgentBrokerImpl.java @@ -167,13 +167,15 @@ private AiScheduleVariantsResponse executeWithRetry(AgentProviderAdapter adapter dto == null || dto.variants == null ? 0 : dto.variants.size()); return mapVariants(dto); } catch (AiProtocolException ex) { - logger.warn("event=ai_broker_attempt_failed attempt={} correlation_id={} error_code={}", + logger.warn("event=ai_broker_attempt_failed provider={} attempt={} correlation_id={} error_code={}", + provider, attempt, request.getCorrelationId(), ex.getCode()); lastException = ex; } catch (RuntimeException ex) { - logger.warn("event=ai_broker_attempt_failed attempt={} correlation_id={} error_code=TRANSPORT_FAILURE", + logger.warn("event=ai_broker_attempt_failed provider={} attempt={} correlation_id={} error_code=TRANSPORT_FAILURE", + provider, attempt, request.getCorrelationId()); lastException = AiProtocolException.transportFailure("AI provider call failed", ex); @@ -181,21 +183,41 @@ private AiScheduleVariantsResponse executeWithRetry(AgentProviderAdapter adapter if (attempt < maxRetries) { boolean rateLimited = isRateLimitedFailure(lastException); + String errorCode = getErrorCode(lastException); if (rateLimited && estimatedTotalTokens > RATE_LIMIT_RETRY_TOKEN_CUTOFF) { - logger.warn("event=ai_broker_rate_limit_retry_skipped attempt={} correlation_id={} estimated_total_tokens={} retry_token_cutoff={}", + logger.warn("event=ai_broker_rate_limit_retry_skipped provider={} attempt={} correlation_id={} error_code={} estimated_total_tokens={} retry_token_cutoff={}", + provider, attempt, request.getCorrelationId(), + errorCode, estimatedTotalTokens, RATE_LIMIT_RETRY_TOKEN_CUTOFF); break; } Duration retryDelay = computeRetryDelay(backoff, attempt, rateLimited, adapter.provider()); - if (rateLimited) { - logger.warn("event=ai_broker_rate_limit_backoff attempt={} correlation_id={} backoff_ms={} estimated_total_tokens={}", + if (rateLimited && provider == AgentProvider.GEMMA) { + logger.warn("event=ai_broker_rate_limit_fixed_wait provider={} attempt={} correlation_id={} error_code={} backoff_ms={} wait_reason=rate_limit_window_reset_429 estimated_total_tokens={}", + provider, attempt, request.getCorrelationId(), + errorCode, retryDelay.toMillis(), estimatedTotalTokens); + } else if (rateLimited) { + logger.warn("event=ai_broker_retry_backoff provider={} attempt={} correlation_id={} error_code={} backoff_ms={} wait_reason=rate_limit_backoff estimated_total_tokens={}", + provider, + attempt, + request.getCorrelationId(), + errorCode, + retryDelay.toMillis(), + estimatedTotalTokens); + } else { + logger.info("event=ai_broker_retry_backoff provider={} attempt={} correlation_id={} error_code={} backoff_ms={} wait_reason=normal_retry_backoff", + provider, + attempt, + request.getCorrelationId(), + errorCode, + retryDelay.toMillis()); } sleep(retryDelay); } @@ -312,6 +334,13 @@ private static boolean containsRateLimitMarker(String message) { return normalized.contains("429") || normalized.contains("too many requests") || normalized.contains("rate limit"); } + private static String getErrorCode(AiProtocolException exception) { + if (exception == null || exception.getCode() == null) { + return "UNKNOWN"; + } + return exception.getCode().name(); + } + void sleep(Duration backoff) { if (backoff == null || backoff.isZero() || backoff.isNegative()) { return;