Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 37 additions & 42 deletions src/main/java/com/llmproxy/service/cache/CacheService.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package com.llmproxy.service.cache;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.llmproxy.model.QueryRequest;
Expand All @@ -13,89 +11,86 @@
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

@Service
@Slf4j
public class CacheService {
private final Cache<String, QueryResponse> cache;
private final ObjectMapper objectMapper;
private final boolean enabled;

public CacheService(
@Value("${cache.enabled:true}") boolean enabled,
@Value("${cache.ttl.seconds:300}") int ttlSeconds,
@Value("${cache.max-items:1000}") int maxItems,
ObjectMapper objectMapper) {
com.fasterxml.jackson.databind.ObjectMapper objectMapper // Kept for bean compatibility, but not used
) {
this.enabled = enabled;
this.objectMapper = objectMapper;


this.cache = Caffeine.newBuilder()
.maximumSize(maxItems)
.expireAfterWrite(ttlSeconds, TimeUnit.SECONDS)
.build();

log.info("Cache initialized: enabled={}, ttl={}s, maxItems={}", enabled, ttlSeconds, maxItems);
}

public QueryResponse get(QueryRequest request) {
if (!enabled) {
return null;
}

String cacheKey = generateCacheKey(request);
QueryResponse cachedResponse = cache.getIfPresent(cacheKey);

if (cachedResponse != null) {
log.debug("Cache hit for key: {}", cacheKey);
return cachedResponse;
}

log.debug("Cache miss for key: {}", cacheKey);
return null;
}

public void set(QueryRequest request, QueryResponse response) {
if (!enabled) {
return;
}

String cacheKey = generateCacheKey(request);
cache.put(cacheKey, response);

log.debug("Added response to cache with key: {}, model: {}", cacheKey, response.getModel());
}

private String generateCacheKey(QueryRequest request) {
Map<String, String> data = new HashMap<>();
data.put("query", request.getQuery());
data.put("model", request.getModel() != null ? request.getModel().toString() : "");
data.put("task_type", request.getTaskType() != null ? request.getTaskType().toString() : "");

// Efficiently build the key string
String query = request.getQuery() != null ? request.getQuery() : "";
String model = request.getModel() != null ? request.getModel().toString() : "";
String taskType = request.getTaskType() != null ? request.getTaskType().toString() : "";

StringBuilder keyBuilder = new StringBuilder();
keyBuilder.append("query:").append(query).append(';');
keyBuilder.append("model:").append(model).append(';');
keyBuilder.append("task_type:").append(taskType);

String rawKey = keyBuilder.toString();
try {
String jsonData = objectMapper.writeValueAsString(data);
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] hash = digest.digest(jsonData.getBytes(StandardCharsets.UTF_8));

StringBuilder hexString = new StringBuilder();
for (byte b : hash) {
String hex = Integer.toHexString(0xff & b);
if (hex.length() == 1) {
hexString.append('0');
}
hexString.append(hex);
byte[] hash = digest.digest(rawKey.getBytes(StandardCharsets.UTF_8));
// Efficient hex conversion
char[] hexArray = "0123456789abcdef".toCharArray();
char[] hexChars = new char[hash.length * 2];
for (int j = 0; j < hash.length; j++) {
int v = hash[j] & 0xFF;
hexChars[j * 2] = hexArray[v >>> 4];
hexChars[j * 2 + 1] = hexArray[v & 0x0F];
}
return hexString.toString();

} catch (JsonProcessingException | NoSuchAlgorithmException e) {
return new String(hexChars);
} catch (NoSuchAlgorithmException e) {
log.error("Error generating cache key: {}", e.getMessage());
return String.format("%s:%s:%s",
request.getQuery(),
request.getModel(),
request.getTaskType());
// fallback: direct key (may potentially expose internal details if logged in error)
return rawKey;
}
}
}
}