LerianStudio · fredcamaral · Apr 1, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -132,11 +132,12 @@ Build and shell:
 
 - `NewManager(certPath, keyPath string) (*Manager, error)` — loads PEM files at construction; if both paths are empty an unconfigured manager is returned (TLS optional). Returns `ErrIncompleteConfig` when exactly one path is provided.
 - Key parsing order: PKCS#8 → PKCS#1 (RSA) → EC (SEC 1). Key file must have mode `0600` or stricter; looser permissions return an error before reading.
-- Atomic hot-reload via `(*Manager).Rotate(cert *x509.Certificate, key crypto.Signer) error` — validates expiry and public-key match before swapping under a write lock.
+- Atomic hot-reload via `(*Manager).Rotate(cert *x509.Certificate, key crypto.Signer, intermediates ...[]byte) error` — validates expiry and public-key match before swapping under a write lock.
 - Sentinel errors: `ErrNilManager`, `ErrCertRequired`, `ErrKeyRequired`, `ErrExpired`, `ErrNoPEMBlock`, `ErrKeyParseFailure`, `ErrNotSigner`, `ErrKeyMismatch`, `ErrIncompleteConfig`.
 - Read accessors (all nil-safe, read-locked): `GetCertificate() *x509.Certificate`, `GetSigner() crypto.Signer`, `PublicKey() crypto.PublicKey`, `ExpiresAt() time.Time`, `DaysUntilExpiry() int`.
 - TLS integration: `TLSCertificate() tls.Certificate` (returns populated `tls.Certificate` struct); `GetCertificateFunc() func(*tls.ClientHelloInfo) (*tls.Certificate, error)` — suitable for assignment to `tls.Config.GetCertificate` for transparent hot-reload.
 - Package-level helper: `LoadFromFiles(certPath, keyPath string) (*x509.Certificate, crypto.Signer, error)` — validates without touching any manager state, useful for pre-flight checking before calling `Rotate`.
+- Package-level helper: `LoadFromFilesWithChain(certPath, keyPath string) (*x509.Certificate, crypto.Signer, [][]byte, error)` — returns the full DER chain for chain-preserving hot-reload.
 
 ### SSRF validation (`commons/security/ssrf`)
 
@@ -192,7 +193,7 @@ Build and shell:
 
 - `New(conn *libRedis.Client, keyPrefix string, maxRetries int, opts ...Option) *Handler` — returns nil when `conn` is nil; all Handler methods guard against a nil receiver with `ErrNilHandler`.
 - Functional options for Handler: `WithLogger`, `WithTracer`, `WithMetrics`, `WithModule`.
-- `DLQMetrics` interface: `RecordRetried(ctx, source)`, `RecordExhausted(ctx, source)` — nil-safe, skipped when not set.
+- `DLQMetrics` interface: `RecordRetried(ctx, source)`, `RecordExhausted(ctx, source)`, `RecordLost(ctx, source)` — a nop implementation is used when not set.
 - Key operations: `Enqueue(ctx, *FailedMessage) error` (RPush), `Dequeue(ctx, source string) (*FailedMessage, error)` (LPop, destructive), `QueueLength(ctx, source string) (int64, error)`.
 - Tenant-scoped Redis keys: `"<prefix><tenantID>:<source>"` (e.g. `"dlq:tenant-abc:outbound"`); falls back to `"<prefix><source>"` when no tenant is in context.
 - `ScanQueues(ctx, source string) ([]string, error)` — uses `SCAN` (non-blocking) to discover all tenant-scoped keys for a source; suitable for background consumers without tenant context.
@@ -211,7 +212,7 @@ Build and shell:
 - Functional options: `WithLogger`, `WithKeyPrefix` (default `"idempotency:"`), `WithKeyTTL` (default 7 days), `WithMaxKeyLength` (default 256), `WithRedisTimeout` (default 500ms), `WithRejectedHandler`, `WithMaxBodyCache` (default 1 MB).
 - `(*Middleware).Check() fiber.Handler` — registers the middleware on a Fiber route.
 - Only applies to mutating methods (POST, PUT, PATCH, DELETE); GET/HEAD/OPTIONS pass through unconditionally.
-- Idempotency key is read from the `Idempotency-Key` request header (`constants.IdempotencyKey`); missing key passes through.
+- Idempotency key is read from the `X-Idempotency` request header (`constants.IdempotencyKey`); missing key passes through.
 - Key too long → 400 JSON `VALIDATION_ERROR` (or custom `WithRejectedHandler`).
 - Redis SetNX atomically claims the key as `"processing"` for the TTL duration.
 - Duplicate request behavior:

diff --git a/commons/certificate/certificate.go b/commons/certificate/certificate.go
@@ -101,9 +101,11 @@ func (m *Manager) Rotate(cert *x509.Certificate, key crypto.Signer, intermediate
 		return ErrKeyRequired
 	}
 
-	// Guard against interface wrapping a nil concrete value.
-	pub := key.Public()
-	if pub == nil {
+	// Guard against interface wrapping a nil concrete value. Real crypto types
+	// (ecdsa, rsa, ed25519) panic on nil receiver in Public(), so we recover
+	// rather than relying on the return value check.
+	pub, panicked := safePublic(key)
+	if panicked || pub == nil {
 		return ErrKeyRequired
 	}
 
@@ -167,6 +169,10 @@ func (m *Manager) GetCertificate() *x509.Certificate {
 }
 
 // GetSigner returns the current private key as a crypto.Signer, or nil if none is loaded.
+// The returned signer shares the underlying private key material with the manager (it is
+// NOT a copy). Callers must not modify the concrete key via unsafe type assertions.
+// Ownership of the key is transferred to the manager at [Rotate] time — the caller
+// must not mutate the key after calling Rotate.
 func (m *Manager) GetSigner() crypto.Signer {
 	if m == nil {
 		return nil
@@ -264,7 +270,14 @@ func (m *Manager) TLSCertificate() tls.Certificate {
 // GetCertificateFunc returns a function suitable for use as [tls.Config.GetCertificate].
 // The returned function always serves the most recently loaded certificate, making
 // hot-reload transparent to the TLS layer.
+// Safe to call on a nil receiver (returns a function that always returns [ErrNilManager]).
 func (m *Manager) GetCertificateFunc() func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
+	if m == nil {
+		return func(_ *tls.ClientHelloInfo) (*tls.Certificate, error) {
+			return nil, ErrNilManager
+		}
+	}
+
 	return func(_ *tls.ClientHelloInfo) (*tls.Certificate, error) {
 		cert := m.TLSCertificate()
 		if cert.Certificate == nil {
@@ -379,6 +392,19 @@ func loadFromFiles(certPath, keyPath string) (*x509.Certificate, crypto.Signer,
 	return cert, signer, certChain, nil
 }
 
+// safePublic calls key.Public() with panic recovery. Real crypto types (ecdsa,
+// rsa, ed25519) panic when called on a nil concrete value behind a non-nil
+// interface. This function converts that panic to a (nil, true) return.
+func safePublic(key crypto.Signer) (pub crypto.PublicKey, panicked bool) {
+	defer func() {
+		if r := recover(); r != nil {
+			panicked = true
+		}
+	}()
+
+	return key.Public(), false
+}
+
 // publicKeysMatch compares two public keys by their DER-encoded PKIX representation.
 func publicKeysMatch(certPublicKey, signerPublicKey any) bool {
 	certDER, err := x509.MarshalPKIXPublicKey(certPublicKey)

diff --git a/commons/certificate/certificate_test.go b/commons/certificate/certificate_test.go
@@ -830,6 +830,25 @@ func TestNewManager_InvalidCertPath_ErrorSpecificity(t *testing.T) {
 // 10. TestNilManager_SubTests
 // ---------------------------------------------------------------------------
 
+// ---------------------------------------------------------------------------
+// TestLoadFromFilesWithChain — verify chain is returned correctly
+// ---------------------------------------------------------------------------
+
+func TestLoadFromFilesWithChain(t *testing.T) {
+	t.Parallel()
+
+	certPath, keyPath := generateTestCert(t, time.Now().Add(365*24*time.Hour))
+
+	cert, signer, chain, err := LoadFromFilesWithChain(certPath, keyPath)
+	require.NoError(t, err)
+	assert.NotNil(t, cert, "certificate must not be nil")
+	assert.NotNil(t, signer, "signer must not be nil")
+	require.NotNil(t, chain, "chain must not be nil")
+	require.Len(t, chain, 1, "single self-signed cert should produce a 1-element chain")
+	assert.Equal(t, cert.Raw, chain[0],
+		"chain[0] must equal the leaf certificate's raw DER bytes")
+}
+
 func TestNilManager_SubTests(t *testing.T) {
 	t.Parallel()
 

diff --git a/commons/dlq/consumer.go b/commons/dlq/consumer.go
@@ -2,7 +2,6 @@ package dlq
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"slices"
 	"sync"
@@ -12,7 +11,6 @@ import (
 	libOtel "github.com/LerianStudio/lib-commons/v4/commons/opentelemetry"
 	libRuntime "github.com/LerianStudio/lib-commons/v4/commons/runtime"
 	tmcore "github.com/LerianStudio/lib-commons/v4/commons/tenant-manager/core"
-	"github.com/redis/go-redis/v9"
 	"go.opentelemetry.io/otel/trace"
 	"go.opentelemetry.io/otel/trace/noop"
 )
@@ -148,8 +146,13 @@ func NewConsumer(handler *Handler, retryFn RetryFunc, opts ...ConsumerOption) (*
 	}
 
 	// Inherit handler settings when not overridden via options.
+	// Default to nopDLQMetrics to eliminate nil checks at every call site.
 	if c.metrics == nil {
-		c.metrics = handler.metrics
+		if handler.metrics != nil {
+			c.metrics = handler.metrics
+		} else {
+			c.metrics = nopDLQMetrics{}
+		}
 	}
 
 	if c.module == "" {
@@ -281,7 +284,7 @@ func (c *Consumer) processSource(ctx context.Context, source string) {
 	// Build a context per discovered tenant. Include the bare (non-tenant) context
 	// only when no tenant keys were found — if tenant keys exist, draining the
 	// global (non-tenant) key too would double-process the same logical queue.
-	keyContexts := []context.Context{}
+	var keyContexts []context.Context
 
 	for _, key := range tenantKeys {
 		tenantID := c.handler.ExtractTenantFromKey(key, source)
@@ -404,9 +407,7 @@ func (c *Consumer) processMessage(ctx context.Context, msg *FailedMessage) bool
 			)
 
 			metricSource := c.sanitizeMetricSource(msg.Source)
-			if c.metrics != nil {
-				c.metrics.RecordLost(ctx, metricSource)
-			}
+			c.metrics.RecordLost(ctx, metricSource)
 
 			return true
 		}
@@ -419,9 +420,7 @@ func (c *Consumer) processMessage(ctx context.Context, msg *FailedMessage) bool
 		libOtel.HandleSpanError(span, "dlq message exhausted", ErrMessageExhausted)
 
 		metricSource := c.sanitizeMetricSource(msg.Source)
-		if c.metrics != nil {
-			c.metrics.RecordExhausted(ctx, metricSource)
-		}
+		c.metrics.RecordExhausted(ctx, metricSource)
 
 		c.logger.Log(ctx, libLog.LevelError, "dlq consumer: message permanently failed, discarding",
 			libLog.String("source", msg.Source),
@@ -451,9 +450,7 @@ func (c *Consumer) processMessage(ctx context.Context, msg *FailedMessage) bool
 			)
 
 			metricSource := c.sanitizeMetricSource(msg.Source)
-			if c.metrics != nil {
-				c.metrics.RecordLost(ctx, metricSource)
-			}
+			c.metrics.RecordLost(ctx, metricSource)
 
 			return true
 		}
@@ -469,9 +466,7 @@ func (c *Consumer) processMessage(ctx context.Context, msg *FailedMessage) bool
 
 	// Retry succeeded — record metric and discard.
 	metricSource := c.sanitizeMetricSource(msg.Source)
-	if c.metrics != nil {
-		c.metrics.RecordRetried(ctx, metricSource)
-	}
+	c.metrics.RecordRetried(ctx, metricSource)
 
 	c.logger.Log(ctx, libLog.LevelInfo, "dlq consumer: message retry succeeded",
 		libLog.String("source", msg.Source),
@@ -510,9 +505,3 @@ func (c *Consumer) sanitizeMetricSource(source string) string {
 
 	return "unknown"
 }
-
-// isRedisNilError reports whether err wraps redis.Nil. The Handler uses
-// fmt.Errorf("%w") so errors.Is unwraps correctly through the chain.
-func isRedisNilError(err error) bool {
-	return errors.Is(err, redis.Nil)
-}
diff --git a/commons/dlq/consumer_test.go b/commons/dlq/consumer_test.go
@@ -659,6 +659,55 @@ func TestNewConsumer_NilOptions(t *testing.T) {
 	assert.NotNil(t, c.tracer, "tracer must remain non-nil")
 }
 
+// TestProcessOnce_RetryFuncPanics verifies that if the caller-provided retryFn
+// panics, the consumer recovers gracefully: the test does not crash, and the
+// message is re-enqueued with an incremented retry count so it is not lost.
+func TestProcessOnce_RetryFuncPanics(t *testing.T) {
+	t.Parallel()
+
+	metrics := &mockMetrics{}
+
+	retryFn := func(_ context.Context, _ *FailedMessage) error {
+		panic("boom from retryFn")
+	}
+
+	c, h, mr := newTestConsumer(t, retryFn, metrics)
+
+	ctx := tmcore.ContextWithTenantID(context.Background(), "tenant-panic")
+
+	// Inject a retryable message with a past NextRetryAt so the consumer
+	// considers it immediately eligible for retry.
+	injectMessage(t, mr, "dlq:tenant-panic:outbound", &FailedMessage{
+		Source:       "outbound",
+		OriginalData: []byte(`{"id":"panic-msg"}`),
+		ErrorMessage: "original error",
+		RetryCount:   0,
+		MaxRetries:   3,
+		CreatedAt:    time.Now().UTC().Add(-5 * time.Minute),
+		NextRetryAt:  time.Now().UTC().Add(-1 * time.Minute),
+		TenantID:     "tenant-panic",
+	})
+
+	// ProcessOnce must not panic — the safeRetryFunc wrapper recovers.
+	require.NotPanics(t, func() {
+		c.ProcessOnce(ctx)
+	}, "ProcessOnce must not propagate a panic from retryFn")
+
+	// The message must be re-enqueued (not lost) with an incremented RetryCount.
+	length, err := h.QueueLength(ctx, "outbound")
+	require.NoError(t, err)
+	assert.Equal(t, int64(1), length,
+		"message must be re-enqueued after retryFn panic, not lost")
+
+	// Dequeue and verify the retry count was incremented.
+	msg, err := h.Dequeue(ctx, "outbound")
+	require.NoError(t, err)
+	assert.Equal(t, 1, msg.RetryCount,
+		"RetryCount must be incremented after panic-recovered retry failure")
+	assert.Contains(t, msg.ErrorMessage, "panicked",
+		"ErrorMessage must indicate the panic was recovered")
+}
+
 // TestIsRedisNilError covers the four cases for isRedisNilError:
 // direct redis.Nil sentinel, wrapped sentinel, arbitrary error, and nil.
 func TestIsRedisNilError(t *testing.T) {