From 42895721d5ac72087d0f83190c29b045bc0aff68 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 17:13:07 -0600 Subject: [PATCH 1/8] Add large batched points allocation test --- .../one_thousand_batched_points_ddsketch.rs | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 lib/ddsketch-agent/tests/one_thousand_batched_points_ddsketch.rs diff --git a/lib/ddsketch-agent/tests/one_thousand_batched_points_ddsketch.rs b/lib/ddsketch-agent/tests/one_thousand_batched_points_ddsketch.rs new file mode 100644 index 0000000000..44de50869d --- /dev/null +++ b/lib/ddsketch-agent/tests/one_thousand_batched_points_ddsketch.rs @@ -0,0 +1,30 @@ +//! Allocation test for sketch insertion. +//! +//! Note: this is in an integration test so that it will run in its own process +//! and avoid interference from other tests. See notes at: +//! https://docs.rs/dhat/latest/dhat/#heap-usage-testing. + +use crate::common::{insert_many_and_serialize, make_points, MathableHeapStats}; + +mod common; + +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +#[test] +fn test_one_thousand_single_points_ddsketch() { + let _profiler = dhat::Profiler::builder().testing().build(); + let points = make_points(1000); + + let before: MathableHeapStats = dhat::HeapStats::get().into(); + insert_many_and_serialize(&points); + let after: MathableHeapStats = dhat::HeapStats::get().into(); + + let diff = after - before; + dhat::assert_eq!(diff.total_blocks, 22); + dhat::assert_eq!(diff.total_bytes, 8096); + dhat::assert_eq!(diff.max_blocks, 3); + dhat::assert_eq!(diff.max_bytes, 3072); + dhat::assert_eq!(diff.curr_blocks, 0); + dhat::assert_eq!(diff.curr_bytes, 0); +} From 1ba598e81b2854b123f805b535fe5ee3a7460859 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 19:09:05 -0600 Subject: [PATCH 2/8] Add insertion tests --- lib/ddsketch-agent/src/lib.rs | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index d3d287d1ef..c50dd735de 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -1099,6 +1099,7 @@ mod tests { fn parse_sketch_from_string_bins(layout: &str) -> DDSketch { layout .split(' ') + .filter(|v| !v.is_empty()) .map(|pair| pair.split(':').map(ToOwned::to_owned).collect::>()) .fold(DDSketch::default(), |mut sketch, mut kn| { let k = kn.remove(0).parse::().unwrap(); @@ -1124,6 +1125,126 @@ mod tests { assert_eq!(actual.bins(), expected.bins()); } + #[test] + fn test_sketch_insert_and_overflow() { + /// values to insert into a sketch + #[allow(dead_code)] + enum Value { + Float(f64), + Vec(Vec), + NFloats(u32, f64), + } + /// ways to insert values into a sketch + #[derive(Debug)] + enum InsertFn { + Insert, + InsertMany, + InsertN, + } + struct Case { + description: &'static str, + start: &'static str, + insert: Value, + expected: &'static str, + } + + let cases = &[ + Case { + description: "baseline: inserting into an empty sketch", + start: "", + insert: Value::Float(0.0), + expected: "0:1", + }, + Case { + description: "inserting a value into an existing bin", + start: "0:1", + insert: Value::Float(0.0), + expected: "0:2", + }, + Case { + description: "inserting a value into a new bin at the start", + start: "1338:1", + insert: Value::Float(0.0), + expected: "0:1 1338:1", + }, + Case { + description: "inserting a value into a new bin in the middle", + start: "0:1 1338:1", + insert: Value::Float(0.5), + expected: "0:1 1293:1 1338:1", + }, + Case { + description: "inserting a value into a new bin at the end", + start: "0:1", + insert: Value::Float(1.0), + expected: "0:1 1338:1", + }, + Case { + description: "inserting a value into a sketch and causing an overflow", + start: "0:65535", + insert: Value::Float(0.0), + expected: "0:1 0:65535", + }, + Case { + description: "inserting many values into a sketch and causing an overflow", + start: "0:100", + insert: Value::NFloats(65535, 0.0), + expected: "0:100 0:65535", + }, + Case { + description: "inserting a value into a new bin in the middle and causing an overflow", + start: "0:1 1338:1", + insert: Value::NFloats(65536, 0.5), + expected: "0:1 1293:1 1293:65535 1338:1", + }, + ]; + + for case in cases { + for insert_fn in &[InsertFn::Insert, InsertFn::InsertMany, InsertFn::InsertN] { + // Insert each value every way possible. + + let mut sketch = parse_sketch_from_string_bins(case.start); + + match insert_fn { + InsertFn::Insert => match &case.insert { + Value::Float(v) => sketch.insert(*v), + Value::Vec(vs) => { + for v in vs { + sketch.insert(*v); + } + } + Value::NFloats(n, v) => { + for _ in 0..*n { + sketch.insert(*v); + } + } + }, + InsertFn::InsertMany => match &case.insert { + Value::Float(v) => sketch.insert_many(&vec![*v]), + Value::Vec(vs) => sketch.insert_many(vs), + Value::NFloats(n, v) => { + for _ in 0..*n { + sketch.insert_many(&vec![*v]); + } + } + }, + InsertFn::InsertN => match &case.insert { + Value::Float(v) => sketch.insert_n(*v, 1), + Value::Vec(vs) => { + for v in vs { + sketch.insert_n(*v, 1); + } + } + Value::NFloats(n, v) => sketch.insert_n(*v, *n), + }, + } + + let expected = parse_sketch_from_string_bins(case.expected); + assert_eq!(expected.bins(), sketch.bins(), "{}", case.description); + } + } + } + #[test] fn test_histogram_interpolation_agent_similarity() { #[derive(Clone)] From c1e2909188909b938e5cb9e297c912f90130a473 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 17:21:50 -0600 Subject: [PATCH 3/8] Add a faster slow path for single point insert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` DDSketch/insert-single/1 time: [195.24 ns 196.81 ns 198.42 ns] thrpt: [5.0399 Melem/s 5.0811 Melem/s 5.1218 Melem/s] change: time: [-12.248% -11.332% -10.298%] (p = 0.00 < 0.05) thrpt: [+11.480% +12.780% +13.957%] Performance has improved. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) low mild 1 (1.00%) high mild DDSketch/insert-single/10 time: [926.41 ns 932.76 ns 938.95 ns] thrpt: [10.650 Melem/s 10.721 Melem/s 10.794 Melem/s] change: time: [-46.653% -46.185% -45.738%] (p = 0.00 < 0.05) thrpt: [+84.292% +85.823% +87.452%] Performance has improved. DDSketch/insert-single/100 time: [9.4161 µs 9.4404 µs 9.4644 µs] thrpt: [10.566 Melem/s 10.593 Melem/s 10.620 Melem/s] change: time: [-62.182% -61.795% -61.405%] (p = 0.00 < 0.05) thrpt: [+159.10% +161.75% +164.42%] Performance has improved. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild DDSketch/insert-single/1000 time: [101.41 µs 101.60 µs 101.79 µs] thrpt: [9.8241 Melem/s 9.8427 Melem/s 9.8612 Melem/s] change: time: [-37.791% -37.406% -37.021%] (p = 0.00 < 0.05) thrpt: [+58.783% +59.759% +60.747%] Performance has improved. Found 2 outliers among 100 measurements (2.00%) 1 (1.00%) low mild 1 (1.00%) high mild DDSketch/insert-single/10000 time: [710.74 µs 714.54 µs 720.15 µs] thrpt: [13.886 Melem/s 13.995 Melem/s 14.070 Melem/s] change: time: [-13.108% -12.535% -11.676%] (p = 0.00 < 0.05) thrpt: [+13.220% +14.332% +15.086%] Performance has improved. Found 7 outliers among 100 measurements (7.00%) 4 (4.00%) high mild 3 (3.00%) high severe DDSketch/insert-many/1 time: [218.42 ns 220.55 ns 223.97 ns] thrpt: [4.4648 Melem/s 4.5340 Melem/s 4.5782 Melem/s] change: time: [-2.4500% -1.5756% -0.5105%] (p = 0.00 < 0.05) thrpt: [+0.5132% +1.6008% +2.5116%] Change within noise threshold. Found 2 outliers among 100 measurements (2.00%) 2 (2.00%) high severe DDSketch/insert-many/10 time: [718.25 ns 722.67 ns 727.43 ns] thrpt: [13.747 Melem/s 13.838 Melem/s 13.923 Melem/s] change: time: [-1.9143% -1.0697% -0.2484%] (p = 0.01 < 0.05) thrpt: [+0.2491% +1.0813% +1.9517%] Change within noise threshold. Found 5 outliers among 100 measurements (5.00%) 4 (4.00%) low mild 1 (1.00%) high mild DDSketch/insert-many/100 time: [2.6682 µs 2.6761 µs 2.6842 µs] thrpt: [37.255 Melem/s 37.367 Melem/s 37.479 Melem/s] change: time: [+0.5251% +1.0608% +1.5666%] (p = 0.00 < 0.05) thrpt: [-1.5424% -1.0496% -0.5223%] Change within noise threshold. Found 10 outliers among 100 measurements (10.00%) 3 (3.00%) low mild 5 (5.00%) high mild 2 (2.00%) high severe DDSketch/insert-many/1000 time: [15.579 µs 15.604 µs 15.628 µs] thrpt: [63.988 Melem/s 64.088 Melem/s 64.187 Melem/s] change: time: [+3.4126% +3.7884% +4.1394%] (p = 0.00 < 0.05) thrpt: [-3.9749% -3.6501% -3.3000%] Performance has regressed. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild DDSketch/insert-many/10000 time: [135.18 µs 135.51 µs 135.86 µs] thrpt: [73.604 Melem/s 73.797 Melem/s 73.975 Melem/s] change: time: [+3.3188% +3.8789% +4.4002%] (p = 0.00 < 0.05) thrpt: [-4.2147% -3.7340% -3.2122%] Performance has regressed. ``` --- lib/ddsketch-agent/src/lib.rs | 43 ++++++++++++++++++- .../one_thousand_single_points_ddsketch.rs | 6 +-- .../tests/ten_single_points_ddsketch.rs | 4 +- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index c50dd735de..930a21fb0e 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -442,8 +442,47 @@ impl DDSketch { } } - // Slow path could be also optimized. - self.insert_keys(vec![key]); + // Slow path is essentially `insert_keys` with a single key. + + // Preallocate output bins. + let new_size = cmp::min(self.bins.len() + 1, self.config.bin_limit as usize); + let mut temp = Vec::with_capacity(new_size); + + let mut bins_idx = 0; + let mut added = false; + let bins_len = self.bins.len(); + + while bins_idx < bins_len && !added { + let bin = self.bins[bins_idx]; + + match bin.k.cmp(&key) { + Ordering::Less => { + temp.push(bin); + bins_idx += 1; + } + Ordering::Equal => { + generate_bins(&mut temp, bin.k, u32::from(bin.n) + 1); + bins_idx += 1; + added = true; + } + Ordering::Greater => { + generate_bins(&mut temp, key, 1); + added = true; + } + } + } + + temp.extend_from_slice(&self.bins[bins_idx..]); + + if !added { + generate_bins(&mut temp, key, 1); + } + + trim_left(&mut temp, self.config.bin_limit); + + // PERF TODO: This is where we might do a mem::swap instead so that we could shove the bin vector into an object + // pool but I'm not sure this actually matters at the moment. + self.bins = temp; } /// Inserts many values into the sketch. diff --git a/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs b/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs index 5254fd65ad..ec0f2cf100 100644 --- a/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs +++ b/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs @@ -21,10 +21,10 @@ fn test_one_thousand_single_points_ddsketch() { let after: MathableHeapStats = dhat::HeapStats::get().into(); let diff = after - before; - dhat::assert_eq!(diff.total_blocks, 1426); - dhat::assert_eq!(diff.total_bytes, 249802); + dhat::assert_eq!(diff.total_blocks, 229); + dhat::assert_eq!(diff.total_bytes, 96944); dhat::assert_eq!(diff.max_blocks, 3); - dhat::assert_eq!(diff.max_bytes, 3072); + dhat::assert_eq!(diff.max_bytes, 2908); dhat::assert_eq!(diff.curr_blocks, 0); dhat::assert_eq!(diff.curr_bytes, 0); } diff --git a/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs b/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs index c468865ed3..a0f0522545 100644 --- a/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs +++ b/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs @@ -21,8 +21,8 @@ fn test_ten_single_points_ddsketch() { let after: MathableHeapStats = dhat::HeapStats::get().into(); let diff = after - before; - dhat::assert_eq!(diff.total_blocks, 33); - dhat::assert_eq!(diff.total_bytes, 668); + dhat::assert_eq!(diff.total_blocks, 16); + dhat::assert_eq!(diff.total_bytes, 444); dhat::assert_eq!(diff.max_blocks, 3); dhat::assert_eq!(diff.max_bytes, 168); dhat::assert_eq!(diff.curr_blocks, 0); From fe75834eb41e0e70ab265429b64c585b67cebf51 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 19:10:02 -0600 Subject: [PATCH 4/8] Add an even faster slow path for single point insert --- lib/ddsketch-agent/src/lib.rs | 64 ++++++------------- .../one_thousand_single_points_ddsketch.rs | 6 +- .../tests/ten_single_points_ddsketch.rs | 6 +- 3 files changed, 26 insertions(+), 50 deletions(-) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index 930a21fb0e..bb8931cfaf 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -434,55 +434,31 @@ impl DDSketch { let key = self.config.key(v); - // Fast path for adding to an existing bin. - for b in &mut self.bins { - if b.k == key && b.n < MAX_BIN_WIDTH { - b.n += 1; - return; - } - } - - // Slow path is essentially `insert_keys` with a single key. - - // Preallocate output bins. - let new_size = cmp::min(self.bins.len() + 1, self.config.bin_limit as usize); - let mut temp = Vec::with_capacity(new_size); - - let mut bins_idx = 0; - let mut added = false; - let bins_len = self.bins.len(); - - while bins_idx < bins_len && !added { - let bin = self.bins[bins_idx]; - - match bin.k.cmp(&key) { - Ordering::Less => { - temp.push(bin); - bins_idx += 1; - } - Ordering::Equal => { - generate_bins(&mut temp, bin.k, u32::from(bin.n) + 1); - bins_idx += 1; - added = true; - } - Ordering::Greater => { - generate_bins(&mut temp, key, 1); - added = true; + let mut insert_at = None; + + for (bin_idx, b) in self.bins.iter_mut().enumerate() { + if b.k == key { + if b.n < MAX_BIN_WIDTH { + // Fast path for adding to an existing bin. + b.n += 1; + return; + } else { + insert_at = Some(bin_idx); + break; } } + if b.k > key { + insert_at = Some(bin_idx); + break; + } } - temp.extend_from_slice(&self.bins[bins_idx..]); - - if !added { - generate_bins(&mut temp, key, 1); + if let Some(bin_idx) = insert_at { + self.bins.insert(bin_idx, Bin { k: key, n: 1 }); + } else { + self.bins.push(Bin { k: key, n: 1 }); } - - trim_left(&mut temp, self.config.bin_limit); - - // PERF TODO: This is where we might do a mem::swap instead so that we could shove the bin vector into an object - // pool but I'm not sure this actually matters at the moment. - self.bins = temp; + trim_left(&mut self.bins, self.config.bin_limit); } /// Inserts many values into the sketch. diff --git a/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs b/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs index ec0f2cf100..4f2a0432d6 100644 --- a/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs +++ b/lib/ddsketch-agent/tests/one_thousand_single_points_ddsketch.rs @@ -21,10 +21,10 @@ fn test_one_thousand_single_points_ddsketch() { let after: MathableHeapStats = dhat::HeapStats::get().into(); let diff = after - before; - dhat::assert_eq!(diff.total_blocks, 229); - dhat::assert_eq!(diff.total_bytes, 96944); + dhat::assert_eq!(diff.total_blocks, 21); + dhat::assert_eq!(diff.total_bytes, 6096); dhat::assert_eq!(diff.max_blocks, 3); - dhat::assert_eq!(diff.max_bytes, 2908); + dhat::assert_eq!(diff.max_bytes, 3072); dhat::assert_eq!(diff.curr_blocks, 0); dhat::assert_eq!(diff.curr_bytes, 0); } diff --git a/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs b/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs index a0f0522545..094ab80583 100644 --- a/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs +++ b/lib/ddsketch-agent/tests/ten_single_points_ddsketch.rs @@ -21,10 +21,10 @@ fn test_ten_single_points_ddsketch() { let after: MathableHeapStats = dhat::HeapStats::get().into(); let diff = after - before; - dhat::assert_eq!(diff.total_blocks, 16); - dhat::assert_eq!(diff.total_bytes, 444); + dhat::assert_eq!(diff.total_blocks, 9); + dhat::assert_eq!(diff.total_bytes, 336); dhat::assert_eq!(diff.max_blocks, 3); - dhat::assert_eq!(diff.max_bytes, 168); + dhat::assert_eq!(diff.max_bytes, 192); dhat::assert_eq!(diff.curr_blocks, 0); dhat::assert_eq!(diff.curr_bytes, 0); } From 58af76c5807dc23cf047fd26ada3eb85916af7d7 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 19:17:12 -0600 Subject: [PATCH 5/8] clarify comment & add test case --- lib/ddsketch-agent/src/lib.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index bb8931cfaf..19d8dcd804 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -439,7 +439,7 @@ impl DDSketch { for (bin_idx, b) in self.bins.iter_mut().enumerate() { if b.k == key { if b.n < MAX_BIN_WIDTH { - // Fast path for adding to an existing bin. + // Fast path for adding to an existing bin without overflow. b.n += 1; return; } else { @@ -1195,7 +1195,13 @@ mod tests { expected: "0:1 1338:1", }, Case { - description: "inserting a value into a sketch and causing an overflow", + description: "inserting a value into an existing bin and filling it", + start: "0:65534", + insert: Value::Float(0.0), + expected: "0:65535", + }, + Case { + description: "inserting a value into an existing bin and causing an overflow", start: "0:65535", insert: Value::Float(0.0), expected: "0:1 0:65535", From c96e0af76ef2945388071ce6ffb3db04afe1eddc Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 19:23:11 -0600 Subject: [PATCH 6/8] clippies in tests --- lib/ddsketch-agent/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index 19d8dcd804..781bd0a0d8 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -1241,11 +1241,11 @@ mod tests { } }, InsertFn::InsertMany => match &case.insert { - Value::Float(v) => sketch.insert_many(&vec![*v]), + Value::Float(v) => sketch.insert_many(&[*v]), Value::Vec(vs) => sketch.insert_many(vs), Value::NFloats(n, v) => { for _ in 0..*n { - sketch.insert_many(&vec![*v]); + sketch.insert_many(&[*v]); } } }, From dcb706a827fd28bed6b78e75dab79cb0e4646be4 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 19:54:07 -0600 Subject: [PATCH 7/8] Create a test showing the trimLeft overflow bug --- lib/ddsketch-agent/src/lib.rs | 144 +++++++++++++++++++++++++++++++--- 1 file changed, 131 insertions(+), 13 deletions(-) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index 781bd0a0d8..5af4b88a25 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -215,6 +215,20 @@ pub struct DDSketch { } impl DDSketch { + fn with_config(config: Config) -> Self { + let initial_bins = cmp::min(INITIAL_BINS, config.bin_limit) as usize; + + Self { + config, + bins: Vec::with_capacity(initial_bins), + count: 0, + min: f64::MAX, + max: f64::MIN, + sum: 0.0, + avg: 0.0, + } + } + #[cfg(test)] fn bin_count(&self) -> usize { self.bins.len() @@ -724,17 +738,8 @@ impl PartialEq for DDSketch { impl Default for DDSketch { fn default() -> Self { let config = Config::default(); - let initial_bins = cmp::min(INITIAL_BINS, config.bin_limit) as usize; - Self { - config, - bins: Vec::with_capacity(initial_bins), - count: 0, - min: f64::MAX, - max: f64::MIN, - sum: 0.0, - avg: 0.0, - } + Self::with_config(config) } } @@ -836,6 +841,8 @@ mod tests { use rand::thread_rng; use rand_distr::{Distribution, Pareto}; + use crate::AGENT_DEFAULT_MIN_VALUE; + use super::{Bucket, Config, DDSketch, AGENT_DEFAULT_EPS, MAX_KEY}; const FLOATING_POINT_ACCEPTABLE_ERROR: f64 = 1.0e-10; @@ -1111,12 +1118,12 @@ mod tests { test_relative_accuracy(config, AGENT_DEFAULT_EPS, min_value, max_value) } - fn parse_sketch_from_string_bins(layout: &str) -> DDSketch { + fn parse_sketch_from_string_bins_with_custom_blank_ddsketch(blank_sketch: DDSketch, layout: &str) -> DDSketch { layout .split(' ') .filter(|v| !v.is_empty()) .map(|pair| pair.split(':').map(ToOwned::to_owned).collect::>()) - .fold(DDSketch::default(), |mut sketch, mut kn| { + .fold(blank_sketch, |mut sketch, mut kn| { let k = kn.remove(0).parse::().unwrap(); let n = kn.remove(0).parse::().unwrap(); @@ -1125,6 +1132,17 @@ mod tests { }) } + fn parse_sketch_from_string_bins(layout: &str) -> DDSketch { + parse_sketch_from_string_bins_with_custom_blank_ddsketch(DDSketch::default(), layout) + } + + fn parse_sketch_from_string_bins_with_bin_limit(layout: &str, bin_limit: u16) -> DDSketch { + let config = Config::new(AGENT_DEFAULT_EPS, AGENT_DEFAULT_MIN_VALUE, bin_limit); + let sketch = DDSketch::with_config(config); + + parse_sketch_from_string_bins_with_custom_blank_ddsketch(sketch, layout) + } + fn compare_sketches(actual: &DDSketch, expected: &DDSketch, allowed_err: f64) { let actual_sum = actual.sum().unwrap(); let expected_sum = expected.sum().unwrap(); @@ -1140,6 +1158,106 @@ mod tests { assert_eq!(actual.bins(), expected.bins()); } + #[test] + fn test_sketch_trimleft() { + /// values to insert into a sketch + #[allow(dead_code)] + enum Value { + Float(f64), + Vec(Vec), + NFloats(u32, f64), + } + /// ways to insert values into a sketch + #[derive(Debug)] + enum InsertFn { + Insert, + InsertMany, + InsertN, + } + struct Case { + description: &'static str, + start: &'static str, + insert: Value, + expected: &'static str, + max_bins: u16, + } + + let cases = &[ + Case { + description: "baseline: inserting from empty up to bin limit", + start: "", + insert: Value::Vec(vec![0.0, 0.5, 1.0, 1.5]), + expected: "0:1 1293:1 1338:1 1364:1", + max_bins: 4, + }, + Case { + description: "inserting from empty to over bin limit", + start: "", + insert: Value::Vec(vec![0.0, 0.5, 1.0, 1.5]), + expected: "1293:2 1338:1 1364:1", + max_bins: 3, + }, + Case { + description: "inserting from empty to well over bin limit", + start: "", + insert: Value::Vec(vec![0.0, 0.5, 1.0, 1.5, 0.0, 0.0]), + expected: "1293:4 1338:1 1364:1", + max_bins: 3, + }, + Case { + description: "inserting from empty to over bin limit with overflow", + start: "", + insert: Value::NFloats(65535 * 5, 0.0), + // longstanding trimLeft bug + expected: "0:65535 0:65535 0:65535 0:65535 0:65535", + max_bins: 3, + }, + ]; + + for case in cases { + for insert_fn in &[InsertFn::Insert, InsertFn::InsertMany, InsertFn::InsertN] { + let mut sketch = parse_sketch_from_string_bins_with_bin_limit(case.start, case.max_bins); + + match insert_fn { + InsertFn::Insert => match &case.insert { + Value::Float(v) => sketch.insert(*v), + Value::Vec(vs) => { + for v in vs { + sketch.insert(*v); + } + } + Value::NFloats(n, v) => { + for _ in 0..*n { + sketch.insert(*v); + } + } + }, + InsertFn::InsertMany => match &case.insert { + Value::Float(v) => sketch.insert_many(&[*v]), + Value::Vec(vs) => sketch.insert_many(vs), + Value::NFloats(n, v) => { + for _ in 0..*n { + sketch.insert_many(&[*v]); + } + } + }, + InsertFn::InsertN => match &case.insert { + Value::Float(v) => sketch.insert_n(*v, 1), + Value::Vec(vs) => { + for v in vs { + sketch.insert_n(*v, 1); + } + } + Value::NFloats(n, v) => sketch.insert_n(*v, *n), + }, + } + + let expected = parse_sketch_from_string_bins_with_bin_limit(case.expected, case.max_bins); + assert_eq!(expected.bins(), sketch.bins(), "{:?}: {}", insert_fn, case.description); + } + } + } + #[test] fn test_sketch_insert_and_overflow() { /// values to insert into a sketch @@ -1261,7 +1379,7 @@ mod tests { } let expected = parse_sketch_from_string_bins(case.expected); - assert_eq!(expected.bins(), sketch.bins(), "{}", case.description); + assert_eq!(expected.bins(), sketch.bins(), "{:?}: {}", insert_fn, case.description); } } } From 76fedac228fb1ade33511430e46075cf14a01e46 Mon Sep 17 00:00:00 2001 From: George Hahn Date: Mon, 20 May 2024 20:15:13 -0600 Subject: [PATCH 8/8] Add a few more test cases --- lib/ddsketch-agent/src/lib.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/ddsketch-agent/src/lib.rs b/lib/ddsketch-agent/src/lib.rs index 5af4b88a25..16babe914e 100644 --- a/lib/ddsketch-agent/src/lib.rs +++ b/lib/ddsketch-agent/src/lib.rs @@ -1210,6 +1210,25 @@ mod tests { insert: Value::NFloats(65535 * 5, 0.0), // longstanding trimLeft bug expected: "0:65535 0:65535 0:65535 0:65535 0:65535", + // actual expected: "0:65535 0:65535 0:65535" + max_bins: 3, + }, + Case { + description: "inserting early bin over the bin limit", + start: "0:65535 0:65535 1338:65535", + insert: Value::Float(0.0), + // longstanding trimLeft bug + expected: "0:1 0:65535 0:65535 1338:65535", + // actual expected: "0:65535 0:65535 1338:65535" + max_bins: 3, + }, + Case { + description: "inserting last bin over the bin limit", + start: "0:65535 0:65535 1338:65535", + insert: Value::Float(1.0), + // This is a bug. I'm not sure what ought to happen here. Need to review the DDSketch paper. + expected: "0:65535 0:65535 1338:1 1338:65535", + // actual expected: something like "1338:65535 1338:65535 1338:65535" ? max_bins: 3, }, ];