From dc54e738d44187e4c895dd598dc1f6f874154466 Mon Sep 17 00:00:00 2001 From: TurinTech Bot Date: Tue, 13 May 2025 23:58:30 +0000 Subject: [PATCH] Artemis Changes --- .../service/ratelimit/RateLimiterService.java | 41 ++++++++++++++++--- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/llmproxy/service/ratelimit/RateLimiterService.java b/src/main/java/com/llmproxy/service/ratelimit/RateLimiterService.java index 7f5ed5c..f18a45d 100644 --- a/src/main/java/com/llmproxy/service/ratelimit/RateLimiterService.java +++ b/src/main/java/com/llmproxy/service/ratelimit/RateLimiterService.java @@ -13,26 +13,30 @@ @Slf4j public class RateLimiterService { private double tokens; - private Instant lastRefill; + private long lastRefillMs; // Using milliseconds instead of Instant for better performance private final double refillRate; private final double maxTokens; private final Map clientLimiters = new ConcurrentHashMap<>(); private Function allowClientFunc; + private static final int MAX_CLIENTS = 10000; + private long lastCleanupMs; + private static final long CLEANUP_INTERVAL_MS = 300000; // 5 minutes public RateLimiterService( @Value("${rate-limit.requests-per-minute:60}") int requestsPerMinute, @Value("${rate-limit.burst:10}") int burst) { this.tokens = burst; - this.lastRefill = Instant.now(); + this.lastRefillMs = System.currentTimeMillis(); + this.lastCleanupMs = this.lastRefillMs; this.refillRate = (double) requestsPerMinute / 60.0; // Convert to per-second this.maxTokens = burst; } public synchronized boolean allow() { - Instant now = Instant.now(); - double elapsed = (now.toEpochMilli() - lastRefill.toEpochMilli()) / 1000.0; + long nowMs = System.currentTimeMillis(); + double elapsed = (nowMs - lastRefillMs) / 1000.0; tokens = Math.min(maxTokens, tokens + elapsed * refillRate); - lastRefill = now; + lastRefillMs = nowMs; if (tokens >= 1.0) { tokens -= 1.0; @@ -46,6 +50,9 @@ public boolean allowClient(String clientId) { return allowClientFunc.apply(clientId); } + // Check if cleanup is needed + maybeCleanupClients(); + RateLimiterService clientLimiter = clientLimiters.computeIfAbsent(clientId, id -> new RateLimiterService((int) (refillRate * 60), (int) maxTokens)); @@ -55,4 +62,26 @@ public boolean allowClient(String clientId) { public void setAllowClientFunc(Function func) { this.allowClientFunc = func; } -} + + private void maybeCleanupClients() { + // Only check periodically to avoid overhead + long now = System.currentTimeMillis(); + if (clientLimiters.size() > MAX_CLIENTS || + (clientLimiters.size() > 100 && now - lastCleanupMs > CLEANUP_INTERVAL_MS)) { + + lastCleanupMs = now; + + // Simple approach: just clear half of the map when it gets too large + // In a more sophisticated implementation, we could track last access time + if (clientLimiters.size() > MAX_CLIENTS / 2) { + int toRemove = clientLimiters.size() / 2; + clientLimiters.keySet().stream() + .limit(toRemove) + .forEach(clientLimiters::remove); + + log.info("Cleaned up {} client rate limiters, {} remaining", + toRemove, clientLimiters.size()); + } + } + } +} \ No newline at end of file