-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathquantile.go
More file actions
203 lines (185 loc) · 5.96 KB
/
quantile.go
File metadata and controls
203 lines (185 loc) · 5.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
// Copyright 2026 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
package goodhistogram
import "math"
// ValueAtQuantile returns the estimated value at the given quantile q ∈ [0, 1]
// using trapezoidal interpolation.
//
// Unlike the standard linear (uniform-density) interpolation used by Prometheus,
// trapezoidal interpolation estimates the probability density at each bucket
// boundary by averaging the densities of adjacent buckets. Within each bucket
// the density is modeled as varying linearly from the left boundary density to
// the right boundary density — a trapezoid. The CDF within a bucket is then the
// integral of this linear density, which is a quadratic. The quantile value is
// found by solving this quadratic.
//
// This produces more accurate estimates when the true distribution is not
// uniform within buckets (i.e., nearly always), especially near bucket
// boundaries where the density changes.
func (s *Snapshot) ValueAtQuantile(q float64) float64 {
if s.TotalCount == 0 {
return 0
}
// Target rank (fractional, 0-based) across all observations,
// including underflow and overflow.
rank := q * float64(s.TotalCount)
if rank <= 0 {
// Return the lower bound of the first non-empty region.
if s.ZeroCount+s.Underflow > 0 {
return s.cfg.lo
}
for i, c := range s.Counts {
if c > 0 {
return s.cfg.boundaries[i]
}
}
return s.cfg.hi
}
if rank >= float64(s.TotalCount) {
// Return the upper bound of the last non-empty region.
if s.Overflow > 0 {
return s.cfg.hi
}
for i := len(s.Counts) - 1; i >= 0; i-- {
if s.Counts[i] > 0 {
return s.cfg.boundaries[i+1]
}
}
return s.cfg.lo
}
// Underflow and zero observations form an implicit region below lo.
// If the quantile falls within this region, clamp to lo.
belowLo := float64(s.ZeroCount + s.Underflow)
if rank <= belowLo {
return s.cfg.lo
}
// Adjust rank to be relative to the in-range buckets.
rank -= belowLo
// Count of in-range observations (buckets only, excluding
// underflow, overflow, and zeros).
var inRangeCount uint64
for _, c := range s.Counts {
inRangeCount += c
}
// If the quantile falls beyond all in-range observations, it's
// in the overflow region. Clamp to hi, matching Prometheus's
// behavior of clamping to the last explicit bucket boundary.
if rank > float64(inRangeCount) {
return s.cfg.hi
}
n := len(s.Counts)
// Step 1: Compute average density in each bucket.
// density[i] = count[i] / width[i]
avgDensity := make([]float64, n)
for i := range n {
w := s.cfg.boundaries[i+1] - s.cfg.boundaries[i]
if w > 0 && s.Counts[i] > 0 {
avgDensity[i] = float64(s.Counts[i]) / w
}
}
// Step 2: Estimate density at each boundary by averaging neighbors.
// boundaryDensity has length n+1 (one per boundary).
boundaryDensity := make([]float64, n+1)
for i := range n {
switch i {
case 0:
boundaryDensity[i] = avgDensity[0]
case n:
boundaryDensity[i] = avgDensity[n-1]
default:
boundaryDensity[i] = (avgDensity[i-1] + avgDensity[i]) / 2.0
}
}
// Step 3: Walk buckets to find which one contains the target rank,
// then solve the trapezoidal CDF within that bucket.
var cumCount float64
for i := range n {
fc := float64(s.Counts[i])
if cumCount+fc >= rank {
localRank := rank - cumCount
lo := s.cfg.boundaries[i]
hi := s.cfg.boundaries[i+1]
w := hi - lo
if w <= 0 || fc == 0 {
return lo
}
dL := boundaryDensity[i]
dR := boundaryDensity[i+1]
return trapezoidalSolve(lo, w, fc, dL, dR, localRank)
}
cumCount += fc
}
// Should not reach here, but clamp to upper bound.
return s.cfg.boundaries[n]
}
// Mean returns the mean of all recorded values. Returns NaN if no
// observations have been recorded, matching Prometheus histogram behavior.
func (s *Snapshot) Mean() float64 {
if s.TotalCount == 0 {
return math.NaN()
}
return float64(s.TotalSum) / float64(s.TotalCount)
}
// Total returns the total count and sum.
func (s *Snapshot) Total() (int64, float64) {
return int64(s.TotalCount), float64(s.TotalSum)
}
// trapezoidalSolve finds the value x ∈ [lo, lo+w] such that the area under a
// linear density function from lo to x equals localRank.
//
// The density is modeled as f(t) = dL + (dR - dL) * (t - lo) / w, where dL and
// dR are the densities at the left and right bucket boundaries. However, the
// raw boundary densities may not integrate to exactly the bucket count over
// [lo, lo+w], so we scale them: the area of the trapezoid w*(dL+dR)/2 should
// equal the bucket count fc.
//
// The CDF within the bucket is:
//
// F(x) = a*x + b*x^2 (with x measured from lo)
//
// where a = scaled dL, b = (scaled dR - scaled dL) / (2*w).
// We solve a*x + b*x^2 = localRank via the quadratic formula.
func trapezoidalSolve(lo, w, fc, dL, dR, localRank float64) float64 {
// Scale boundary densities so the trapezoid area matches the count.
rawArea := w * (dL + dR) / 2.0
if rawArea <= 0 {
// Both boundary densities are zero — fall back to linear interpolation.
return lo + w*(localRank/fc)
}
scale := fc / rawArea
sL := dL * scale // scaled left density
sR := dR * scale // scaled right density
// Degenerate case: uniform density (sL ≈ sR). Linear interpolation.
if math.Abs(sR-sL) < 1e-12*(sL+sR) {
return lo + w*(localRank/fc)
}
// Quadratic: sL*x + ((sR-sL)/(2*w))*x^2 = localRank
// Rewrite as: ((sR-sL)/(2*w))*x^2 + sL*x - localRank = 0
a := (sR - sL) / (2.0 * w)
b := sL
c := -localRank
disc := b*b - 4*a*c
if disc < 0 {
// Numerical issue; fall back to linear.
return lo + w*(localRank/fc)
}
// Use the numerically stable quadratic formula.
var x float64
if b >= 0 {
x = (2 * c) / (-b - math.Sqrt(disc))
} else {
x = (-b + math.Sqrt(disc)) / (2 * a)
}
// Clamp to bucket range.
if x < 0 {
x = 0
} else if x > w {
x = w
}
return lo + x
}