Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions admin/api/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func makeSnapshot(t time.Time, totalReq, errReq, active, panics float64) metrics
func TestMetricsHandler_Fleet_ReturnsAggregated(t *testing.T) {
t.Parallel()
st := openTestStore(t)
c := metrics.NewCollector(10)
c := metrics.NewCollector(10, time.Hour)

ctx := context.Background()
inst, err := st.CreateInstance(ctx, "proxy-1", "10.0.0.1:9090")
Expand Down Expand Up @@ -78,7 +78,7 @@ func TestMetricsHandler_Fleet_ReturnsAggregated(t *testing.T) {
func TestMetricsHandler_Instance_ReturnsMetrics(t *testing.T) {
t.Parallel()
st := openTestStore(t)
c := metrics.NewCollector(10)
c := metrics.NewCollector(10, time.Hour)

ctx := context.Background()
inst, err := st.CreateInstance(ctx, "proxy-1", "10.0.0.1:9090")
Expand Down Expand Up @@ -117,7 +117,7 @@ func TestMetricsHandler_Instance_ReturnsMetrics(t *testing.T) {
func TestMetricsHandler_Instance_NoData_Returns404(t *testing.T) {
t.Parallel()
st := openTestStore(t)
c := metrics.NewCollector(10)
c := metrics.NewCollector(10, time.Hour)

h := NewMetricsHandler(st, c)
mux := http.NewServeMux()
Expand All @@ -135,7 +135,7 @@ func TestMetricsHandler_Instance_NoData_Returns404(t *testing.T) {
func TestMetricsHandler_Instance_InvalidID_Returns400(t *testing.T) {
t.Parallel()
st := openTestStore(t)
c := metrics.NewCollector(10)
c := metrics.NewCollector(10, time.Hour)

h := NewMetricsHandler(st, c)
mux := http.NewServeMux()
Expand All @@ -153,7 +153,7 @@ func TestMetricsHandler_Instance_InvalidID_Returns400(t *testing.T) {
func TestMetricsHandler_Fleet_EmptyFleet_ReturnsEmptyInstances(t *testing.T) {
t.Parallel()
st := openTestStore(t)
c := metrics.NewCollector(10)
c := metrics.NewCollector(10, time.Hour)

h := NewMetricsHandler(st, c)
mux := http.NewServeMux()
Expand Down
5 changes: 4 additions & 1 deletion admin/cmd/chaperone-admin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ func runServer(args []string) error {
}
defer st.Close()

collector := metrics.NewCollector(metrics.DefaultCapacity)
collector := metrics.NewCollector(
metrics.CapacityFor(cfg.Scraper.RetentionWindow.Unwrap(), cfg.Scraper.Interval.Unwrap()),
cfg.Scraper.RetentionWindow.Unwrap(),
)

srv, err := admin.NewServer(cfg, st, collector)
if err != nil {
Expand Down
5 changes: 3 additions & 2 deletions admin/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ type DatabaseConfig struct {

// ScraperConfig configures the proxy metrics scraper.
type ScraperConfig struct {
Interval Duration `yaml:"interval"`
Timeout Duration `yaml:"timeout"`
Interval Duration `yaml:"interval"`
Timeout Duration `yaml:"timeout"`
RetentionWindow Duration `yaml:"retention_window"`
}

// SessionConfig configures session management.
Expand Down
4 changes: 4 additions & 0 deletions admin/config/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ func applyDefaults(cfg *Config) {
if cfg.Scraper.Timeout == 0 {
cfg.Scraper.Timeout = Duration(5 * time.Second)
}
if cfg.Scraper.RetentionWindow == 0 {
cfg.Scraper.RetentionWindow = Duration(1 * time.Hour)
}
if cfg.Session.MaxAge == 0 {
cfg.Session.MaxAge = Duration(24 * time.Hour)
}
Expand Down Expand Up @@ -107,6 +110,7 @@ func applyEnvOverrides(cfg *Config) error {

parseDuration(&cfg.Scraper.Interval, "SCRAPER_INTERVAL", &errs)
parseDuration(&cfg.Scraper.Timeout, "SCRAPER_TIMEOUT", &errs)
parseDuration(&cfg.Scraper.RetentionWindow, "SCRAPER_RETENTION_WINDOW", &errs)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docs in docs/guides/admin-portal.md are now out of sync:

  • L52–54: YAML example missing retention_window.
  • L81–87: env var table missing CHAPERONE_ADMIN_SCRAPER_RETENTION_WINDOW.
  • L175: states "The portal retains 360 scrape snapshots per instance ... at 10s intervals is exactly 1 hour of history" — exactly what this PR changes; now misleading.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

parseDuration(&cfg.Session.MaxAge, "SESSION_MAX_AGE", &errs)
parseDuration(&cfg.Session.IdleTimeout, "SESSION_IDLE_TIMEOUT", &errs)

Expand Down
14 changes: 14 additions & 0 deletions admin/config/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ func TestLoad_NoFile_AppliesDefaults(t *testing.T) {
if cfg.Scraper.Timeout.Unwrap() != 5*time.Second {
t.Errorf("Scraper.Timeout = %v, want %v", cfg.Scraper.Timeout.Unwrap(), 5*time.Second)
}
if cfg.Scraper.RetentionWindow.Unwrap() != 1*time.Hour {
t.Errorf("Scraper.RetentionWindow = %v, want %v", cfg.Scraper.RetentionWindow.Unwrap(), 1*time.Hour)
}
if cfg.Session.MaxAge.Unwrap() != 24*time.Hour {
t.Errorf("Session.MaxAge = %v, want %v", cfg.Session.MaxAge.Unwrap(), 24*time.Hour)
}
Expand Down Expand Up @@ -75,6 +78,7 @@ database:
scraper:
interval: "30s"
timeout: "10s"
retention_window: "2h"
session:
max_age: "12h"
idle_timeout: "1h"
Expand Down Expand Up @@ -107,6 +111,9 @@ log:
if cfg.Scraper.Timeout.Unwrap() != 10*time.Second {
t.Errorf("Scraper.Timeout = %v, want %v", cfg.Scraper.Timeout.Unwrap(), 10*time.Second)
}
if cfg.Scraper.RetentionWindow.Unwrap() != 2*time.Hour {
t.Errorf("Scraper.RetentionWindow = %v, want %v", cfg.Scraper.RetentionWindow.Unwrap(), 2*time.Hour)
}
if cfg.Session.MaxAge.Unwrap() != 12*time.Hour {
t.Errorf("Session.MaxAge = %v, want %v", cfg.Session.MaxAge.Unwrap(), 12*time.Hour)
}
Expand Down Expand Up @@ -171,6 +178,7 @@ func TestLoad_EnvOverrides_AllFields(t *testing.T) {
t.Setenv("CHAPERONE_ADMIN_DATABASE_PATH", "/tmp/test.db")
t.Setenv("CHAPERONE_ADMIN_SCRAPER_INTERVAL", "20s")
t.Setenv("CHAPERONE_ADMIN_SCRAPER_TIMEOUT", "8s")
t.Setenv("CHAPERONE_ADMIN_SCRAPER_RETENTION_WINDOW", "30m")
t.Setenv("CHAPERONE_ADMIN_SESSION_MAX_AGE", "48h")
t.Setenv("CHAPERONE_ADMIN_SESSION_IDLE_TIMEOUT", "4h")
t.Setenv("CHAPERONE_ADMIN_AUDIT_RETENTION_DAYS", "60")
Expand Down Expand Up @@ -199,6 +207,9 @@ func TestLoad_EnvOverrides_AllFields(t *testing.T) {
if cfg.Scraper.Timeout.Unwrap() != 8*time.Second {
t.Errorf("Scraper.Timeout = %v, want %v", cfg.Scraper.Timeout.Unwrap(), 8*time.Second)
}
if cfg.Scraper.RetentionWindow.Unwrap() != 30*time.Minute {
t.Errorf("Scraper.RetentionWindow = %v, want %v", cfg.Scraper.RetentionWindow.Unwrap(), 30*time.Minute)
}
if cfg.Session.MaxAge.Unwrap() != 48*time.Hour {
t.Errorf("Session.MaxAge = %v, want %v", cfg.Session.MaxAge.Unwrap(), 48*time.Hour)
}
Expand Down Expand Up @@ -269,6 +280,9 @@ func TestApplyDefaults_ZeroConfig_SetsAllDefaults(t *testing.T) {
if cfg.Scraper.Timeout.Unwrap() != 5*time.Second {
t.Errorf("Scraper.Timeout = %v, want 5s", cfg.Scraper.Timeout.Unwrap())
}
if cfg.Scraper.RetentionWindow.Unwrap() != 1*time.Hour {
t.Errorf("Scraper.RetentionWindow = %v, want 1h", cfg.Scraper.RetentionWindow.Unwrap())
}
if cfg.Session.MaxAge.Unwrap() != 24*time.Hour {
t.Errorf("Session.MaxAge = %v, want 24h", cfg.Session.MaxAge.Unwrap())
}
Expand Down
3 changes: 3 additions & 0 deletions admin/config/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ func (c *Config) Validate() error {
if c.Scraper.Timeout.Unwrap() >= c.Scraper.Interval.Unwrap() {
errs = append(errs, errors.New("scraper.timeout must be less than scraper.interval"))
}
if c.Scraper.RetentionWindow.Unwrap() < c.Scraper.Interval.Unwrap() {
errs = append(errs, errors.New("scraper.retention_window must be at least one scraper.interval"))
}

if c.Session.MaxAge.Unwrap() < 1*time.Minute {
errs = append(errs, errors.New("session.max_age must be at least 1m"))
Expand Down
42 changes: 40 additions & 2 deletions admin/config/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ func validConfig() *Config {
Server: ServerConfig{Addr: DefaultAddr},
Database: DatabaseConfig{Path: "./test.db"},
Scraper: ScraperConfig{
Interval: Duration(10 * time.Second),
Timeout: Duration(5 * time.Second),
Interval: Duration(10 * time.Second),
Timeout: Duration(5 * time.Second),
RetentionWindow: Duration(1 * time.Hour),
},
Session: SessionConfig{
MaxAge: Duration(24 * time.Hour),
Expand Down Expand Up @@ -124,6 +125,43 @@ func TestValidate_TimeoutGteInterval_ReturnsError(t *testing.T) {
}
}

func TestValidate_RetentionWindowLessThanInterval_ReturnsError(t *testing.T) {
t.Parallel()

// Arrange — retention window must be at least one interval long
cfg := validConfig()
cfg.Scraper.Interval = Duration(10 * time.Second)
cfg.Scraper.RetentionWindow = Duration(5 * time.Second)

// Act
err := cfg.Validate()

// Assert
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), "retention_window") {
t.Errorf("error = %q, want to contain %q", err.Error(), "retention_window")
}
}

func TestValidate_RetentionWindowEqualsInterval_NoError(t *testing.T) {
t.Parallel()

// Arrange — equal is allowed (degenerate but valid: capacity = 1)
cfg := validConfig()
cfg.Scraper.Interval = Duration(10 * time.Second)
cfg.Scraper.RetentionWindow = Duration(10 * time.Second)

// Act
err := cfg.Validate()

// Assert
if err != nil {
t.Errorf("unexpected error: %v", err)
}
}

func TestValidate_NegativeRetention_ReturnsError(t *testing.T) {
t.Parallel()

Expand Down
47 changes: 27 additions & 20 deletions admin/metrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,20 @@ import (
// Collector manages per-instance metric ring buffers and computes derived
// metrics (rates, percentiles) on demand.
type Collector struct {
mu sync.RWMutex
buffers map[int64]*Ring
capacity int
mu sync.RWMutex
buffers map[int64]*Ring
capacity int
trendWindow time.Duration
}

// NewCollector creates a Collector with the given ring buffer capacity.
func NewCollector(capacity int) *Collector {
// NewCollector creates a Collector with the given ring buffer capacity and
// trend lookback window. Trends compare current rates against rates from
// approximately trendWindow ago; a zero trendWindow disables trends.
func NewCollector(capacity int, trendWindow time.Duration) *Collector {
return &Collector{
buffers: make(map[int64]*Ring),
capacity: capacity,
buffers: make(map[int64]*Ring),
capacity: capacity,
trendWindow: trendWindow,
}
}

Expand Down Expand Up @@ -253,20 +257,22 @@ func (*Collector) fillVendorMetrics(im *InstanceMetrics, prev, curr Snapshot) {
})
}

// historicalPair returns the two snapshots forming a rate pair from ~1 hour
// ago in the ring buffer. If the buffer doesn't span at least 50 minutes,
// ok is false.
func historicalPair(buf *Ring) (prev, curr Snapshot, ok bool) {
if buf.Len() < 4 {
// historicalPair returns the two snapshots forming a rate pair from
// approximately c.trendWindow ago in the ring buffer. The buffer must span
// at least 5/6 of the trend window before a pair is returned, which keeps
// the comparison meaningful while the ring is still filling. If the trend
// window is zero or the buffer is too short, ok is false.
func (c *Collector) historicalPair(buf *Ring) (prev, curr Snapshot, ok bool) {
if c.trendWindow == 0 || buf.Len() < 4 {
return Snapshot{}, Snapshot{}, false
}
newest := buf.At(buf.Len() - 1)
oldest := buf.At(0)
if newest.Time.Sub(oldest.Time) < 50*time.Minute {
if newest.Time.Sub(oldest.Time) < (c.trendWindow*5)/6 {
return Snapshot{}, Snapshot{}, false
}

target := newest.Time.Add(-1 * time.Hour)
target := newest.Time.Add(-c.trendWindow)
idx := findNearest(buf, target)
start := idx
if start > 0 {
Expand All @@ -279,9 +285,9 @@ func historicalPair(buf *Ring) (prev, curr Snapshot, ok bool) {
}

// fillTrends computes trend values by comparing the current rate to the rate
// from approximately 1 hour ago.
func (*Collector) fillTrends(im *InstanceMetrics, buf *Ring) {
prev, curr, ok := historicalPair(buf)
// from approximately c.trendWindow ago.
func (c *Collector) fillTrends(im *InstanceMetrics, buf *Ring) {
prev, curr, ok := c.historicalPair(buf)
if !ok {
return
}
Expand All @@ -302,9 +308,10 @@ type historicalTrend struct {
errDelta float64
}

// trendSnapshot returns historical RPS and request/error deltas from ~1h ago.
func (*Collector) trendSnapshot(buf *Ring) (historicalTrend, bool) {
prev, curr, ok := historicalPair(buf)
// trendSnapshot returns historical RPS and request/error deltas from
// approximately c.trendWindow ago.
func (c *Collector) trendSnapshot(buf *Ring) (historicalTrend, bool) {
prev, curr, ok := c.historicalPair(buf)
if !ok {
return historicalTrend{}, false
}
Expand Down
Loading
Loading