go-authgate
diff --git a/‎.env.example‎
Lines changed: 19 additions & 0 deletions b/‎.env.example‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/CONFIGURATION.md‎
Lines changed: 147 additions & 0 deletions b/‎docs/CONFIGURATION.md‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎internal/bootstrap/bootstrap.go‎
Lines changed: 10 additions & 0 deletions b/‎internal/bootstrap/bootstrap.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎internal/bootstrap/cache.go‎
Lines changed: 14 additions & 0 deletions b/‎internal/bootstrap/cache.go‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎internal/bootstrap/server.go‎
Lines changed: 12 additions & 0 deletions b/‎internal/bootstrap/server.go‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎internal/bootstrap/services.go‎
Lines changed: 20 additions & 4 deletions b/‎internal/bootstrap/services.go‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎internal/config/config.go‎
Lines changed: 30 additions & 0 deletions b/‎internal/config/config.go‎
Lines changed: 30 additions & 0 deletions
@@ -205,6 +205,25 @@ EXPIRED_TOKEN_CLEANUP_INTERVAL=1h      # How often to run the cleanup (default:
 # Client-side cache size per connection in MB for redis-aside mode only (default: 32MB)
 # CLIENT_COUNT_CACHE_SIZE_PER_CONN=32
 
+# ============================================================
+# Client Cache Settings (caches OAuth client lookups by client_id)
+# ============================================================
+# Cache backend for OAuth client lookups. Every OAuth flow (device code, authorization code,
+# token exchange) queries the client record. Caching reduces DB load significantly.
+# In single-instance deployments "memory" is sufficient.
+# In multi-pod deployments use "redis" or "redis-aside" for shared cache with invalidation.
+# CLIENT_CACHE_TYPE=memory
+
+# Server-side cache lifetime for client records (default: 5m)
+# Mutations (create, update, delete, approve, reject, secret regeneration) always invalidate immediately.
+# CLIENT_CACHE_TTL=5m
+
+# Client-side cache TTL for redis-aside mode only (default: 30s)
+# CLIENT_CACHE_CLIENT_TTL=30s
+
+# Client-side cache size per connection in MB for redis-aside mode only (default: 32MB)
+# CLIENT_CACHE_SIZE_PER_CONN=32
+
 # ============================================================
 # Token Cache Settings
 # ============================================================
 
@@ -549,7 +549,7 @@ docker run -d \
 
 - **SQLite**: Suitable for < 1000 concurrent devices, single-instance deployments
 - **PostgreSQL**: Recommended for production, supports horizontal scaling
-- **Multi-Pod**: Use PostgreSQL + Redis for rate limiting and user cache across pods (`RATE_LIMIT_STORE=redis`, `USER_CACHE_TYPE=redis` or `redis-aside`). Note: `redis-aside` requires Redis >= 7.0.
+- **Multi-Pod**: Use PostgreSQL + Redis for rate limiting, user cache, client cache, and token cache across pods (`RATE_LIMIT_STORE=redis`, `USER_CACHE_TYPE=redis` or `redis-aside`, `CLIENT_CACHE_TYPE=redis` or `redis-aside`, `TOKEN_CACHE_TYPE=redis` or `redis-aside`). Note: `redis-aside` requires Redis >= 7.0.
 
 **[Performance Guide →](docs/PERFORMANCE.md)**
 
 
@@ -12,6 +12,8 @@ This guide covers all configuration options for AuthGate, including environment
 - [Service-to-Service Authentication](#service-to-service-authentication)
 - [HTTP Retry with Exponential Backoff](#http-retry-with-exponential-backoff)
 - [User Cache](#user-cache)
+- [Client Cache](#client-cache)
+- [Token Cache](#token-cache)
 - [Rate Limiting](#rate-limiting)
 - [CORS (Cross-Origin Resource Sharing)](#cors-cross-origin-resource-sharing)
 
@@ -722,6 +724,151 @@ USER_CACHE_SIZE_PER_CONN=32  # Adjust based on available memory per pod
 
 ---
 
+## Client Cache
+
+Every OAuth flow (device code, authorization code, token exchange, client credentials) queries the `OAuthApplication` record to validate the client. Caching these lookups reduces database pressure on busy deployments.
+
+The cache is always enabled with no feature flag required. Mutations (create, update, delete, secret regeneration, approve/reject) always invalidate the cache entry immediately.
+
+### How It Works
+
+The cache uses a **cache-aside pattern**:
+
+1. On the first request for a client ID, the DB is queried and the result is stored in cache with a TTL
+2. Client secrets are **stripped before caching** (defense-in-depth — secrets are never stored in the cache backend)
+3. Cache entries are invalidated immediately on any write operation (create, update, delete, secret rotation)
+
+### Cache Backends
+
+| Backend     | Env value          | Use case                                                                          |
+| ----------- | ------------------ | --------------------------------------------------------------------------------- |
+| Memory      | `memory` (default) | Single-instance, zero external dependencies                                       |
+| Redis       | `redis`            | 2–5 pods, shared cache across instances                                           |
+| Redis-aside | `redis-aside`      | 5+ pods, client-side caching with stampede protection — **requires Redis >= 7.0** |
+
+### Configuration
+
+```bash
+# Cache backend: memory (default), redis, or redis-aside
+CLIENT_CACHE_TYPE=memory
+
+# How long a cached client record is valid (default: 5m); must be > 0
+# Mutations always invalidate immediately, so this is only a fallback TTL.
+CLIENT_CACHE_TTL=5m
+
+# Client-side TTL for redis-aside mode only (default: 30s); must be > 0
+CLIENT_CACHE_CLIENT_TTL=30s
+
+# Client-side cache size per connection in MB for redis-aside mode only (default: 32MB)
+# Total memory per pod = cache_size × connections (~10 based on GOMAXPROCS) → default ~320MB
+CLIENT_CACHE_SIZE_PER_CONN=32
+```
+
+Redis-based backends also require the shared Redis settings:
+
+```bash
+REDIS_ADDR=localhost:6379
+REDIS_PASSWORD=
+REDIS_DB=0
+```
+
+### Multi-Pod Recommendation
+
+```bash
+# 2–5 pods: Redis shared cache
+CLIENT_CACHE_TYPE=redis
+REDIS_ADDR=redis-service:6379
+
+# 5+ pods or DDoS protection: redis-aside with client-side caching
+CLIENT_CACHE_TYPE=redis-aside
+REDIS_ADDR=redis-service:6379
+CLIENT_CACHE_CLIENT_TTL=30s
+CLIENT_CACHE_SIZE_PER_CONN=32  # Adjust based on available memory per pod
+```
+
+> **Note**: `redis-aside` uses RESP3 client-side caching for automatic invalidation across all pods and requires **Redis >= 7.0**. Memory usage per pod is `CLIENT_CACHE_SIZE_PER_CONN × ~10 connections` (default ~320MB).
+
+---
+
+## Token Cache
+
+`/oauth/tokeninfo` and every request protected by token-based auth call `GetAccessTokenByHash`, which hits the database on every validation. The token cache absorbs these lookups, reducing DB load significantly on high-traffic deployments.
+
+The token cache is **disabled by default** (`TOKEN_CACHE_ENABLED=false`). Enable it for production deployments with significant token validation traffic.
+
+### How It Works
+
+The cache uses a **cache-aside pattern**:
+
+1. On the first validation of a token hash, the DB is queried and the result is stored in cache with a TTL
+2. Subsequent validations within the TTL window are served from cache
+3. Token revocation, rotation, and status changes always **explicitly invalidate** the cache entry — the TTL is a fallback only
+
+### Cache Backends
+
+| Backend     | Env value          | Use case                                                                          |
+| ----------- | ------------------ | --------------------------------------------------------------------------------- |
+| Memory      | `memory` (default) | Single-instance, zero external dependencies                                       |
+| Redis       | `redis`            | 2–5 pods, shared cache across instances                                           |
+| Redis-aside | `redis-aside`      | 5+ pods, client-side caching with RESP3 real-time invalidation — **requires Redis >= 7.0** |
+
+### Configuration
+
+```bash
+# Enable token verification cache (default: false)
+TOKEN_CACHE_ENABLED=false
+
+# Cache backend: memory (default), redis, or redis-aside
+TOKEN_CACHE_TYPE=memory
+
+# Cache lifetime (default: 10h — matches JWT_EXPIRATION)
+# Revocation uses explicit cache invalidation; this TTL is a fallback for rare missed invalidations.
+TOKEN_CACHE_TTL=10h
+
+# Client-side TTL for redis-aside mode only (default: 1h)
+# RESP3 handles real-time invalidation; this TTL is a safety net for missed notifications.
+TOKEN_CACHE_CLIENT_TTL=1h
+
+# Client-side cache size per connection in MB for redis-aside mode only (default: 32MB)
+# Total memory per pod = cache_size × connections (~10 based on GOMAXPROCS) → default ~320MB
+TOKEN_CACHE_SIZE_PER_CONN=32
+```
+
+Redis-based backends also require the shared Redis settings:
+
+```bash
+REDIS_ADDR=localhost:6379
+REDIS_PASSWORD=
+REDIS_DB=0
+```
+
+### TTL Trade-offs
+
+| Setting                   | Behaviour                                                                           |
+| ------------------------- | ----------------------------------------------------------------------------------- |
+| `TOKEN_CACHE_TTL=10h`     | Default — matches JWT expiry; cached tokens expire naturally alongside JWT          |
+| `TOKEN_CACHE_CLIENT_TTL=1h` | redis-aside client-side TTL; RESP3 invalidation fires immediately on revocation  |
+
+### Multi-Pod Recommendation
+
+```bash
+# Enable with Redis for multi-pod deployments
+TOKEN_CACHE_ENABLED=true
+TOKEN_CACHE_TYPE=redis
+REDIS_ADDR=redis-service:6379
+
+# Or redis-aside for real-time invalidation across all pods (requires Redis >= 7.0)
+TOKEN_CACHE_ENABLED=true
+TOKEN_CACHE_TYPE=redis-aside
+REDIS_ADDR=redis-service:6379
+TOKEN_CACHE_CLIENT_TTL=1h
+TOKEN_CACHE_SIZE_PER_CONN=32
+```
+
+> **Note**: `redis-aside` uses RESP3 client-side caching with **real-time invalidation** — when a token is revoked, all pods drop their client-side cache entry immediately via RESP3 push notifications. This requires **Redis >= 7.0**. Memory usage per pod is `TOKEN_CACHE_SIZE_PER_CONN × ~10 connections` (default ~320MB).
+
+---
+
 ## Rate Limiting
 
 AuthGate includes built-in rate limiting to protect against brute force attacks, credential stuffing, and API abuse. The rate limiting system is production-ready with support for both single-instance and distributed deployments.
 
@@ -33,6 +33,8 @@ type Application struct {
 	UserCacheCloser        func() error
 	ClientCountCache       core.Cache[int64]
 	ClientCountCacheCloser func() error
+	ClientCache            core.Cache[models.OAuthApplication]
+	ClientCacheCloser      func() error
 	TokenCache             core.Cache[models.AccessToken]
 	TokenCacheCloser       func() error
 	RateLimitRedisClient   *redis.Client
@@ -116,6 +118,12 @@ func (app *Application) initializeInfrastructure(ctx context.Context) error {
 		return err
 	}
 
+	// Client Cache (caches OAuth client lookups by client_id)
+	app.ClientCache, app.ClientCacheCloser, err = initializeClientCache(ctx, app.Config)
+	if err != nil {
+		return err
+	}
+
 	// Token Cache
 	app.TokenCache, app.TokenCacheCloser, err = initializeTokenCache(ctx, app.Config)
 	if err != nil {
@@ -154,6 +162,7 @@ func (app *Application) initializeBusinessLayer() {
 		app.MetricsRecorder,
 		app.UserCache,
 		app.ClientCountCache,
+		app.ClientCache,
 		app.TokenProvider,
 		app.TokenCache,
 	)
@@ -206,6 +215,7 @@ func (app *Application) startWithGracefulShutdown() {
 	addCacheCleanupJob(m, app.MetricsCache, app.Config)
 	addUserCacheCleanupJob(m, app.UserCache, app.Config)
 	addClientCountCacheCleanupJob(m, app.ClientCountCache, app.Config)
+	addClientCacheCleanupJob(m, app.ClientCache, app.Config)
 	addTokenCacheCleanupJob(m, app.TokenCache, app.Config)
 	addDatabaseShutdownJob(m, app.DB, app.Config)
 	addAuditLogCleanupJob(m, app.Config, app.AuditService)
 
@@ -131,6 +131,20 @@ func initializeTokenCache(
 	})
 }
 
+// initializeClientCache initializes the OAuth client cache (always enabled, defaults to memory)
+func initializeClientCache(
+	ctx context.Context,
+	cfg *config.Config,
+) (core.Cache[models.OAuthApplication], func() error, error) {
+	return initializeCache[models.OAuthApplication](ctx, cfg, cacheOpts{
+		cacheType:   cfg.ClientCacheType,
+		keyPrefix:   "authgate:clients:",
+		clientTTL:   cfg.ClientCacheClientTTL,
+		sizePerConn: cfg.ClientCacheSizePerConn,
+		label:       "Client",
+	})
+}
+
 // initializeUserCache initializes the user cache (always enabled, defaults to memory)
 func initializeUserCache(
 	ctx context.Context,
 
@@ -264,6 +264,18 @@ func addClientCountCacheCleanupJob(
 	addNamedCacheShutdownJob(m, "client count cache", clientCountCache.Close, cfg.CacheCloseTimeout)
 }
 
+// addClientCacheCleanupJob adds OAuth client cache cleanup on shutdown
+func addClientCacheCleanupJob(
+	m *graceful.Manager,
+	clientCache core.Cache[models.OAuthApplication],
+	cfg *config.Config,
+) {
+	if clientCache == nil {
+		return
+	}
+	addNamedCacheShutdownJob(m, "client cache", clientCache.Close, cfg.CacheCloseTimeout)
+}
+
 // addTokenCacheCleanupJob adds token cache cleanup on shutdown
 func addTokenCacheCleanupJob(
 	m *graceful.Manager,
 
@@ -27,6 +27,7 @@ func initializeServices(
 	prometheusMetrics core.Recorder,
 	userCache core.Cache[models.User],
 	clientCountCache core.Cache[int64],
+	clientCache core.Cache[models.OAuthApplication],
 	tokenProvider core.TokenProvider,
 	tokenCache core.Cache[models.AccessToken],
 ) serviceSet {
@@ -45,7 +46,18 @@ func initializeServices(
 		userCache,
 		cfg.UserCacheTTL,
 	)
-	deviceService := services.NewDeviceService(db, cfg, auditService, prometheusMetrics)
+	clientService := services.NewClientService(
+		db, auditService,
+		clientCountCache, cfg.ClientCountCacheTTL,
+		clientCache, cfg.ClientCacheTTL,
+	)
+	deviceService := services.NewDeviceService(
+		db,
+		cfg,
+		auditService,
+		prometheusMetrics,
+		clientService,
+	)
 	tokenService := services.NewTokenService(
 		db,
 		cfg,
@@ -54,11 +66,15 @@ func initializeServices(
 		auditService,
 		prometheusMetrics,
 		tokenCache,
+		clientService,
 	)
-	clientService := services.NewClientService(
-		db, auditService, clientCountCache, cfg.ClientCountCacheTTL,
+	authorizationService := services.NewAuthorizationService(
+		db,
+		cfg,
+		auditService,
+		tokenService,
+		clientService,
 	)
-	authorizationService := services.NewAuthorizationService(db, cfg, auditService, tokenService)
 	dashboardService := services.NewDashboardService(db, auditService)
 
 	return serviceSet{
 
@@ -185,6 +185,12 @@ type Config struct {
 	ClientCountCacheClientTTL   time.Duration // CLIENT_COUNT_CACHE_CLIENT_TTL for redis-aside (default: 10m)
 	ClientCountCacheSizePerConn int           // CLIENT_COUNT_CACHE_SIZE_PER_CONN for redis-aside in MB (default: 32MB)
 
+	// Client Cache settings (caches OAuth client lookups by client_id)
+	ClientCacheType        string        // CLIENT_CACHE_TYPE: memory|redis|redis-aside (default: memory)
+	ClientCacheTTL         time.Duration // CLIENT_CACHE_TTL: cache lifetime (default: 5m)
+	ClientCacheClientTTL   time.Duration // CLIENT_CACHE_CLIENT_TTL for redis-aside client-side TTL (default: 30s)
+	ClientCacheSizePerConn int           // CLIENT_CACHE_SIZE_PER_CONN: client-side cache size per connection in MB for redis-aside (default: 32MB)
+
 	// Token Cache settings (reduces DB queries for token verification)
 	TokenCacheEnabled     bool          // TOKEN_CACHE_ENABLED: enable token verification cache (default: false)
 	TokenCacheType        string        // TOKEN_CACHE_TYPE: memory|redis|redis-aside (default: memory)
@@ -389,6 +395,12 @@ func Load() *Config {
 			32,
 		), // 32MB default
 
+		// Client Cache settings
+		ClientCacheType:        getEnv("CLIENT_CACHE_TYPE", CacheTypeMemory),
+		ClientCacheTTL:         getEnvDuration("CLIENT_CACHE_TTL", 5*time.Minute),
+		ClientCacheClientTTL:   getEnvDuration("CLIENT_CACHE_CLIENT_TTL", 30*time.Second),
+		ClientCacheSizePerConn: getEnvInt("CLIENT_CACHE_SIZE_PER_CONN", 32), // 32MB default
+
 		// Token Cache settings
 		TokenCacheEnabled:     getEnvBool("TOKEN_CACHE_ENABLED", false),
 		TokenCacheType:        getEnv("TOKEN_CACHE_TYPE", CacheTypeMemory),
@@ -598,6 +610,24 @@ func (c *Config) Validate() error {
 		)
 	}
 
+	// Client Cache validation
+	if err := validateCacheType("CLIENT_CACHE_TYPE", c.ClientCacheType, c.RedisAddr); err != nil {
+		return err
+	}
+	if c.ClientCacheTTL <= 0 {
+		return fmt.Errorf(
+			"CLIENT_CACHE_TTL must be a positive duration (got %s)",
+			c.ClientCacheTTL,
+		)
+	}
+	if c.ClientCacheType == CacheTypeRedisAside && c.ClientCacheClientTTL <= 0 {
+		return fmt.Errorf(
+			"CLIENT_CACHE_CLIENT_TTL must be a positive duration when CLIENT_CACHE_TYPE=%q (got %s)",
+			CacheTypeRedisAside,
+			c.ClientCacheClientTTL,
+		)
+	}
+
 	// Token cache validation (only when enabled)
 	if c.TokenCacheEnabled {
 		if err := validateCacheType("TOKEN_CACHE_TYPE", c.TokenCacheType, c.RedisAddr); err != nil {