Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/AI_powered_rescheduling/sprint_4/story_5.md
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,11 @@ Le UI devono usare `retryable=true` per chiedere al planner se vuole ritentare (
### 3) Logging & Audit

* Logging strutturato: `event=ai_broker_attempt_failed`, `event=metrics_computation_failed`.
* Campi minimi: `correlation_id`, `error_code`, `stage`, `retry_attempt`.
* Per i retry broker mantenere sempre i campi strutturati: `provider`, `attempt`, `correlation_id`, `error_code`, `backoff_ms`.
* Distinguere chiaramente i due casi operativi:
* `event=ai_broker_retry_backoff` con `wait_reason=normal_retry_backoff` o `wait_reason=rate_limit_backoff` per retry standard.
* `event=ai_broker_rate_limit_fixed_wait` con `wait_reason=rate_limit_window_reset_429` quando si applica la finestra forzata di 60s su 429.
* In incident handling, usare `ai_broker_rate_limit_fixed_wait` per identificare rapidamente blocchi dovuti al reset finestra quota (non confonderli con i retry con backoff normale).
* Nessun dato personale nel log (GDPR).

***
Expand Down
39 changes: 34 additions & 5 deletions src/main/java/org/cswteams/ms3/ai/broker/AgentBrokerImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,35 +167,57 @@ private AiScheduleVariantsResponse executeWithRetry(AgentProviderAdapter adapter
dto == null || dto.variants == null ? 0 : dto.variants.size());
return mapVariants(dto);
} catch (AiProtocolException ex) {
logger.warn("event=ai_broker_attempt_failed attempt={} correlation_id={} error_code={}",
logger.warn("event=ai_broker_attempt_failed provider={} attempt={} correlation_id={} error_code={}",
provider,
attempt,
request.getCorrelationId(),
ex.getCode());
lastException = ex;
} catch (RuntimeException ex) {
logger.warn("event=ai_broker_attempt_failed attempt={} correlation_id={} error_code=TRANSPORT_FAILURE",
logger.warn("event=ai_broker_attempt_failed provider={} attempt={} correlation_id={} error_code=TRANSPORT_FAILURE",
provider,
attempt,
request.getCorrelationId());
lastException = AiProtocolException.transportFailure("AI provider call failed", ex);
}

if (attempt < maxRetries) {
boolean rateLimited = isRateLimitedFailure(lastException);
String errorCode = getErrorCode(lastException);
if (rateLimited && estimatedTotalTokens > RATE_LIMIT_RETRY_TOKEN_CUTOFF) {
logger.warn("event=ai_broker_rate_limit_retry_skipped attempt={} correlation_id={} estimated_total_tokens={} retry_token_cutoff={}",
logger.warn("event=ai_broker_rate_limit_retry_skipped provider={} attempt={} correlation_id={} error_code={} estimated_total_tokens={} retry_token_cutoff={}",
provider,
attempt,
request.getCorrelationId(),
errorCode,
estimatedTotalTokens,
RATE_LIMIT_RETRY_TOKEN_CUTOFF);
break;
}
Duration retryDelay = computeRetryDelay(backoff, attempt, rateLimited, adapter.provider());
if (rateLimited) {
logger.warn("event=ai_broker_rate_limit_backoff attempt={} correlation_id={} backoff_ms={} estimated_total_tokens={}",
if (rateLimited && provider == AgentProvider.GEMMA) {
logger.warn("event=ai_broker_rate_limit_fixed_wait provider={} attempt={} correlation_id={} error_code={} backoff_ms={} wait_reason=rate_limit_window_reset_429 estimated_total_tokens={}",
provider,
attempt,
request.getCorrelationId(),
errorCode,
retryDelay.toMillis(),
estimatedTotalTokens);
} else if (rateLimited) {
logger.warn("event=ai_broker_retry_backoff provider={} attempt={} correlation_id={} error_code={} backoff_ms={} wait_reason=rate_limit_backoff estimated_total_tokens={}",
provider,
attempt,
request.getCorrelationId(),
errorCode,
retryDelay.toMillis(),
estimatedTotalTokens);
} else {
logger.info("event=ai_broker_retry_backoff provider={} attempt={} correlation_id={} error_code={} backoff_ms={} wait_reason=normal_retry_backoff",
provider,
attempt,
request.getCorrelationId(),
errorCode,
retryDelay.toMillis());
}
sleep(retryDelay);
}
Expand Down Expand Up @@ -312,6 +334,13 @@ private static boolean containsRateLimitMarker(String message) {
return normalized.contains("429") || normalized.contains("too many requests") || normalized.contains("rate limit");
}

private static String getErrorCode(AiProtocolException exception) {
if (exception == null || exception.getCode() == null) {
return "UNKNOWN";
}
return exception.getCode().name();
}

void sleep(Duration backoff) {
if (backoff == null || backoff.isZero() || backoff.isNegative()) {
return;
Expand Down