diff --git a/src/array_impl.rs b/src/array_impl.rs new file mode 100644 index 000000000..90b327c1f --- /dev/null +++ b/src/array_impl.rs @@ -0,0 +1,230 @@ +use crate::Itertools; +use std::iter::Fuse; + +/// An iterator over all contiguous windows of the input iterator, +/// producing arrays of a specific size. +/// +/// See [`.array_windows()`](crate::Itertools::array_windows) for more +/// information. +#[derive(Debug, Clone)] +pub struct ArrayWindows +where + I: Iterator + Sized, + I::Item: Clone, +{ + iter: Fuse, + inner: Option>, +} + +#[derive(Debug, Clone)] +struct ArrayWindowsInner { + // `window` stores the `N` items delivered in the most + // recent output window. It is stored in the form of a ring + // buffer, with `window_start` identifying the element + // that logically comes first. + window: [T; N], + window_start: usize, +} + +impl ArrayWindowsInner { + /// Replace the least recent item in `window` with a new + /// item. + fn add_to_buffer(&mut self, item: T) { + if N > 0 { + self.window[self.window_start] = item; + self.window_start = (self.window_start + 1) % N; + } + } + + /// Construct an array window to return. + fn make_window(&self) -> [T; N] { + std::array::from_fn(|i| self.window[(i + self.window_start) % N].clone()) + } +} + +impl Iterator for ArrayWindows +where + I: Iterator + Sized, + I::Item: Clone, +{ + type Item = [I::Item; N]; + + fn next(&mut self) -> Option<[I::Item; N]> { + match &mut self.inner { + // Initialisation code, when next() is called for the first time + None => match self.iter.next_array() { + None => { + // The input iterator was completely empty + None + } + Some(buf) => { + let inner = ArrayWindowsInner { + window: buf.clone(), + window_start: 0, + }; + let window = inner.make_window(); + self.inner = Some(inner); + Some(window) + } + }, + Some(inner) => match self.iter.next() { + Some(item) => { + inner.add_to_buffer(item); + Some(inner.make_window()) + } + None => None, + }, + } + } +} + +pub fn array_windows(iter: I) -> ArrayWindows +where + I: Iterator + Sized, + I::Item: Clone, +{ + ArrayWindows { + iter: iter.fuse(), + inner: None, + } +} + +/// An iterator over all windows, wrapping back to the first elements when the +/// window would otherwise exceed the length of the iterator, producing arrays +/// of a specific size. +/// +/// See [`.circular_array_windows()`](crate::Itertools::circular_array_windows) +/// for more information. +#[derive(Debug, Clone)] +pub struct CircularArrayWindows +where + I: Iterator + Sized, + I::Item: Clone, +{ + iter: Fuse, + inner: Option>, +} + +#[derive(Debug, Clone)] +struct CircularArrayWindowsInner { + // `prefix` stores the first `N` items output from this iterator. + // If the input contained fewer than `N` items, then it is filled + // with clones of the previous items in a cycle. + // + // `prefix_pos` tracks the number of items that have been _used_ + // from `prefix`. It begins counting up from 0 once the input runs + // out. (So in the case where the input iterator is shorter than + // `N`, it will begin counting up before `prefix` has even been + // populated during setup.) + prefix: [T; N], + prefix_pos: usize, + + // For delivering the output arrays, we reuse `ArrayWindowsInner` + // unchanged. + arraywin: ArrayWindowsInner, +} + +impl Iterator for CircularArrayWindows +where + I: Iterator + Sized, + I::Item: Clone, +{ + type Item = [I::Item; N]; + + fn next(&mut self) -> Option<[I::Item; N]> { + match &mut self.inner { + // Initialisation code, when next() is called for the first time + None => match self.iter.next() { + None => { + // The input iterator was completely empty + None + } + Some(first) => { + // We have at least one item, so we can definitely + // populate `prefix` (even if we have to make N + // copies of this element). + + // Construct [Option; N] and convert to [T; N] + // once it's full. TODO: can this be improved? + let mut items = std::array::from_fn(|_| None); + let mut prefix_pos = 0; + if N > 0 { + // The first item stored is the one passed to + // us from our caller. + items[0] = Some(first); + } + for i in 1..N { + // Populate the remaining slots in `items` + // from the input iterator. + items[i] = self.iter.next(); + if items[i].is_none() { + // If the input iterator runs out early, + // populate the rest of `items` by + // recycling from the beginning, and set + // `prefix_pos` to indicate that we have + // already consumed those items. + for j in i..N { + items[j] = items[j - i].clone(); + } + prefix_pos = N - i; + break; + } + } + let items = items.map(Option::unwrap); + + let inner = CircularArrayWindowsInner { + prefix: items.clone(), + prefix_pos, + arraywin: ArrayWindowsInner { + window: items, + window_start: 0, + }, + }; + + let window = inner.arraywin.make_window(); + self.inner = Some(inner); + Some(window) + } + }, + Some(inner) => { + // Normal case. Read the next item in the logical + // input sequence (consisting of the contents of the + // input iterator followed by N-1 items recycling from + // the beginning), and add it to the ring buffer. + let item = if let Some(item) = self.iter.next() { + // Read from the input iterator. + item + } else if N == 0 { + return None; + } else { + assert!(N == 0 || inner.prefix_pos < N); + if inner.prefix_pos + 1 == N { + // The input iterator has run out, and we've + // emitted as many windows as we read items, + // so we've finished. + return None; + } + let item = inner.prefix[inner.prefix_pos].clone(); + inner.prefix_pos += 1; + item + }; + + if N > 0 { + inner.arraywin.add_to_buffer(item); + } + Some(inner.arraywin.make_window()) + } + } + } +} + +pub fn circular_array_windows(iter: I) -> CircularArrayWindows +where + I: Iterator + Sized, + I::Item: Clone, +{ + CircularArrayWindows { + iter: iter.fuse(), + inner: None, + } +} diff --git a/src/lib.rs b/src/lib.rs index 36ddef6cc..22d3928d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -97,6 +97,7 @@ pub mod structs { FilterOk, Interleave, InterleaveShortest, MapInto, MapOk, Positions, Product, PutBack, TakeWhileRef, TupleCombinations, Update, WhileSome, }; + pub use crate::array_impl::{ArrayWindows, CircularArrayWindows}; #[cfg(feature = "use_alloc")] pub use crate::combinations::{ArrayCombinations, Combinations}; #[cfg(feature = "use_alloc")] @@ -174,6 +175,7 @@ pub use crate::unziptuple::{multiunzip, MultiUnzip}; pub use crate::with_position::Position; pub use crate::ziptuple::multizip; mod adaptors; +mod array_impl; mod either_or_both; pub use crate::either_or_both::EitherOrBoth; #[doc(hidden)] @@ -900,6 +902,121 @@ pub trait Itertools: Iterator { tuple_impl::tuples(self) } + /// Return an iterator over all contiguous windows, producing + /// arrays of size `N`. + /// + /// `array_windows` clones the iterator elements so that they can be + /// part of successive windows. This makes it most suited for iterators + /// of references and other values that are cheap to copy. + /// + /// If the input iterator contains fewer than `N` items, no + /// windows are returned. Otherwise, if the input iterator + /// contains `k` items, exactly `k+N-1` windows are returned. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // Three-element windows from the items [1, 2, 3, 4, 5]. + /// itertools::assert_equal( + /// (1..6).array_windows::<3>(), + /// vec![[1, 2, 3], [2, 3, 4], [3, 4, 5]] + /// ); + /// + /// // When the input list is shorter than the window size, no windows + /// // are returned at all. + /// let mut windows = (1..6).array_windows::<10>(); + /// assert_eq!(None, windows.next()); + /// + /// // In some cases you don't have to specify the window size + /// // explicitly with a type hint, because Rust can infer it + /// for [a, b, c] in (1..6).array_windows() { + /// println!("{a} {b} {c}"); + /// } + /// + /// // You can also specify the complete type. + /// use itertools::ArrayWindows; + /// use std::ops::Range; + /// + /// let it: ArrayWindows, 3> = (1..6).array_windows(); + /// itertools::assert_equal(it, vec![[1, 2, 3], [2, 3, 4], [3, 4, 5]]); + /// ``` + fn array_windows(self) -> ArrayWindows + where + Self: Sized, + Self::Item: Clone, + { + array_impl::array_windows(self) + } + + /// Return an iterator over all windows, wrapping back to the first + /// elements when the window would otherwise exceed the length of the + /// iterator, producing arrays of size `N`. + /// + /// `circular_array_windows` clones the iterator elements so that + /// they can be part of successive windows, this makes it most + /// suited for iterators of references and other values that are + /// cheap to copy. + /// + /// One window is returned per element of the input iterator. This + /// is true even if the input contains fewer elements than the + /// window size. In that situation, input elements are repeated + /// within each window. The results are as if the input had been + /// treated as a cyclic list, and a window of `N` items had been + /// returned for every starting point in the cycle. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // Three-element windows from [1, 2, 3, 4, 5], with two of + /// // them wrapping round from 5 to 1. + /// itertools::assert_equal( + /// (1..6).circular_array_windows::<3>(), + /// vec![[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 1], [5, 1, 2]] + /// ); + /// + /// // If the input is shorter than the window size, input + /// // items are repeated even within the same window. + /// itertools::assert_equal( + /// (1..3).circular_array_windows::<5>(), + /// vec![[1, 2, 1, 2, 1], [2, 1, 2, 1, 2]] + /// ); + /// + /// // If the input contains only one item, the returned window + /// // repeats it N times. + /// let once = std::iter::once(1); + /// itertools::assert_equal( + /// once.circular_array_windows::<3>(), + /// vec![[1, 1, 1]] + /// ); + /// + /// // If the input is empty, no windows are returned at all. + /// let empty = std::iter::empty::(); + /// let mut windows = empty.circular_array_windows::<5>(); + /// assert_eq!(None, windows.next()); + /// + /// // In some cases you don't have to specify the window size + /// // explicitly with a type hint, because Rust can infer it. + /// for [a, b, c] in (1..10).circular_array_windows() { + /// println!("{a} {b} {c}"); + /// } + /// + /// // You can also specify the complete type. + /// use itertools::CircularArrayWindows; + /// use std::ops::Range; + /// + /// let it: CircularArrayWindows, 2> = + /// (1..6).circular_array_windows(); + /// itertools::assert_equal( + /// it, vec![[1, 2], [2, 3], [3, 4], [4, 5], [5, 1]]); + /// ``` + fn circular_array_windows(self) -> CircularArrayWindows + where + Self: Sized, + Self::Item: Clone, + { + array_impl::circular_array_windows(self) + } + /// Split into an iterator pair that both yield all elements from /// the original iterator. /// diff --git a/tests/arrays.rs b/tests/arrays.rs new file mode 100644 index 000000000..6c6de9ca7 --- /dev/null +++ b/tests/arrays.rs @@ -0,0 +1,448 @@ +use itertools::Itertools; + +#[test] +fn array_windows() { + let [vec0, vec1, vec2, vec4, vec10] = [ + vec![], + vec![1], + vec![1, 2], + vec![1, 2, 3, 4], + vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ]; + + assert_eq!( + vec10 + .iter() + .copied() + .array_windows::<2>() + .collect::>(), + vec![ + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10], + ] + ); + + assert_eq!( + vec4.iter() + .copied() + .array_windows::<2>() + .collect::>(), + vec![[1, 2], [2, 3], [3, 4]] + ); + + assert_eq!( + vec2.iter() + .copied() + .array_windows::<2>() + .collect::>(), + vec![[1, 2]] + ); + + assert_eq!( + vec1.iter() + .copied() + .array_windows::<2>() + .collect::>(), + Vec::<[i32; 2]>::new() + ); + + assert_eq!( + vec0.iter() + .copied() + .array_windows::<2>() + .collect::>(), + Vec::<[i32; 2]>::new() + ); + + assert_eq!( + vec10 + .iter() + .copied() + .array_windows::<4>() + .collect::>(), + vec![ + [1, 2, 3, 4], + [2, 3, 4, 5], + [3, 4, 5, 6], + [4, 5, 6, 7], + [5, 6, 7, 8], + [6, 7, 8, 9], + [7, 8, 9, 10], + ] + ); + + // For zero-length output windows, the equation + // + // output length = input length + N - 1 + // + // implies that we return one _more_ zero-length window than there + // are input items, as if we were returning a zero-length window + // for each position between elements of the input list, including + // the positions at the start and end. + assert_eq!( + vec0.iter() + .copied() + .array_windows::<0>() + .collect::>(), + vec![[]] + ); + + assert_eq!( + vec1.iter() + .copied() + .array_windows::<0>() + .collect::>(), + vec![[], []] + ); + + assert_eq!( + vec2.iter() + .copied() + .array_windows::<0>() + .collect::>(), + vec![[], [], []] + ); + + assert_eq!( + vec0.iter() + .copied() + .array_windows::<1>() + .collect::>(), + Vec::<[i32; 1]>::new() + ); + + assert_eq!( + vec1.iter() + .copied() + .array_windows::<1>() + .collect::>(), + vec![[1]] + ); + + assert_eq!( + vec2.iter() + .copied() + .array_windows::<1>() + .collect::>(), + vec![[1], [2]] + ); + + assert_eq!( + vec1.iter() + .copied() + .array_windows::<7>() + .collect::>(), + Vec::<[i32; 7]>::new() + ); + + assert_eq!( + vec2.iter() + .copied() + .array_windows::<7>() + .collect::>(), + Vec::<[i32; 7]>::new() + ); + + assert_eq!( + vec4.iter() + .copied() + .array_windows::<7>() + .collect::>(), + Vec::<[i32; 7]>::new() + ); + + // Check that array_windows agrees with tuple_windows + assert_eq!( + vec4.iter().copied().array_windows().collect::>(), + vec4.iter() + .copied() + .tuple_windows() + .map(|(a,)| [a]) + .collect::>(), + ); + + assert_eq!( + vec4.iter().copied().array_windows().collect::>(), + vec4.iter() + .copied() + .tuple_windows() + .map(|(a, b)| [a, b]) + .collect::>(), + ); + + assert_eq!( + vec4.iter().copied().array_windows().collect::>(), + vec4.iter() + .copied() + .tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); + + assert_eq!( + vec2.iter().copied().array_windows().collect::>(), + vec2.iter() + .copied() + .tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); + + assert_eq!( + vec1.iter().copied().array_windows().collect::>(), + vec1.iter() + .copied() + .tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); + + assert_eq!( + vec0.iter().copied().array_windows().collect::>(), + vec0.iter() + .copied() + .tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); +} + +#[test] +fn circular_array_windows() { + let [vec0, vec1, vec2, vec4, vec10] = [ + vec![], + vec![1], + vec![1, 2], + vec![1, 2, 3, 4], + vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ]; + + assert_eq!( + vec10 + .iter() + .copied() + .circular_array_windows::<2>() + .collect::>(), + vec![ + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10], + [10, 1] + ] + ); + + assert_eq!( + vec4.iter() + .copied() + .circular_array_windows::<2>() + .collect::>(), + vec![[1, 2], [2, 3], [3, 4], [4, 1]] + ); + + assert_eq!( + vec2.iter() + .copied() + .circular_array_windows::<2>() + .collect::>(), + vec![[1, 2], [2, 1]] + ); + + assert_eq!( + vec1.iter() + .copied() + .circular_array_windows::<2>() + .collect::>(), + vec![[1, 1]] + ); + + assert_eq!( + vec0.iter() + .copied() + .circular_array_windows::<2>() + .collect::>(), + Vec::<[i32; 2]>::new() + ); + + assert_eq!( + vec10 + .iter() + .copied() + .circular_array_windows::<4>() + .collect::>(), + vec![ + [1, 2, 3, 4], + [2, 3, 4, 5], + [3, 4, 5, 6], + [4, 5, 6, 7], + [5, 6, 7, 8], + [6, 7, 8, 9], + [7, 8, 9, 10], + [8, 9, 10, 1], + [9, 10, 1, 2], + [10, 1, 2, 3], + ] + ); + + assert_eq!( + vec0.iter() + .copied() + .circular_array_windows::<0>() + .collect::>(), + Vec::<[i32; 0]>::new() + ); + + assert_eq!( + vec1.iter() + .copied() + .circular_array_windows::<0>() + .collect::>(), + vec![[]] + ); + + assert_eq!( + vec2.iter() + .copied() + .circular_array_windows::<0>() + .collect::>(), + vec![[], []] + ); + + assert_eq!( + vec0.iter() + .copied() + .circular_array_windows::<1>() + .collect::>(), + Vec::<[i32; 1]>::new() + ); + + assert_eq!( + vec1.iter() + .copied() + .circular_array_windows::<1>() + .collect::>(), + vec![[1]] + ); + + assert_eq!( + vec2.iter() + .copied() + .circular_array_windows::<1>() + .collect::>(), + vec![[1], [2]] + ); + + assert_eq!( + vec1.iter() + .copied() + .circular_array_windows::<7>() + .collect::>(), + vec![[1, 1, 1, 1, 1, 1, 1]] + ); + + assert_eq!( + vec2.iter() + .copied() + .circular_array_windows::<7>() + .collect::>(), + vec![[1, 2, 1, 2, 1, 2, 1], [2, 1, 2, 1, 2, 1, 2]] + ); + + assert_eq!( + vec4.iter() + .copied() + .circular_array_windows::<7>() + .collect::>(), + vec![ + [1, 2, 3, 4, 1, 2, 3], + [2, 3, 4, 1, 2, 3, 4], + [3, 4, 1, 2, 3, 4, 1], + [4, 1, 2, 3, 4, 1, 2], + ] + ); + + // Check that circular_array_windows agrees with circular_tuple_windows + assert_eq!( + vec4.iter() + .copied() + .circular_array_windows() + .collect::>(), + vec4.iter() + .copied() + .circular_tuple_windows() + .map(|(a,)| [a]) + .collect::>(), + ); + + assert_eq!( + vec4.iter() + .copied() + .circular_array_windows() + .collect::>(), + vec4.iter() + .copied() + .circular_tuple_windows() + .map(|(a, b)| [a, b]) + .collect::>(), + ); + + assert_eq!( + vec4.iter() + .copied() + .circular_array_windows() + .collect::>(), + vec4.iter() + .copied() + .circular_tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); + + assert_eq!( + vec2.iter() + .copied() + .circular_array_windows() + .collect::>(), + vec2.iter() + .copied() + .circular_tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); + + assert_eq!( + vec1.iter() + .copied() + .circular_array_windows() + .collect::>(), + vec1.iter() + .copied() + .circular_tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); + + assert_eq!( + vec0.iter() + .copied() + .circular_array_windows() + .collect::>(), + vec0.iter() + .copied() + .circular_tuple_windows() + .map(|(a, b, c, d, e, f, g)| [a, b, c, d, e, f, g]) + .collect::>(), + ); +} diff --git a/tests/laziness.rs b/tests/laziness.rs index dfeee68f8..16801b99e 100644 --- a/tests/laziness.rs +++ b/tests/laziness.rs @@ -104,6 +104,12 @@ must_use_tests! { let _ = Panicking.circular_tuple_windows::<(_, _)>(); let _ = Panicking.circular_tuple_windows::<(_, _, _)>(); } + circular_array_windows { + let _ = Panicking.circular_array_windows::<0>(); + let _ = Panicking.circular_array_windows::<1>(); + let _ = Panicking.circular_array_windows::<2>(); + let _ = Panicking.circular_array_windows::<3>(); + } tuples { let _ = Panicking.tuples::<(_,)>(); let _ = Panicking.tuples::<(_, _)>(); diff --git a/tests/quick.rs b/tests/quick.rs index faae8d698..538346bb9 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -1300,6 +1300,79 @@ quickcheck! { } } +// array iterators +quickcheck! { + fn equal_array_windows_0(a: Vec) -> bool { + let x = (0..=a.len()).map(|_| [&0u8; 0] ); + let y = a.iter().array_windows::<0>(); + itertools::assert_equal(x,y); + true + } + + fn equal_array_windows_1(a: Vec) -> bool { + let x = a.iter().map(|e| [e] ); + let y = a.iter().array_windows::<1>(); + itertools::assert_equal(x,y); + true + } + + fn equal_array_windows_2(a: Vec) -> bool { + let x = (0..a.len().saturating_sub(1)).map(|start_idx| [ + &a[start_idx], + &a[start_idx + 1], + ]); + let y = a.iter().array_windows::<2>(); + itertools::assert_equal(x,y); + true + } + + fn equal_array_windows_3(a: Vec) -> bool { + let x = (0..a.len().saturating_sub(2)).map(|start_idx| [ + &a[start_idx], + &a[start_idx + 1], + &a[start_idx + 2], + ]); + let y = a.iter().array_windows::<3>(); + itertools::assert_equal(x,y); + true + } + + fn equal_circular_array_windows_0(a: Vec) -> bool { + let x = a.iter().map(|_| [&0u8; 0] ); + let y = a.iter().circular_array_windows::<0>(); + itertools::assert_equal(x,y); + true + } + + fn equal_circular_array_windows_1(a: Vec) -> bool { + let x = a.iter().map(|e| [e] ); + let y = a.iter().circular_array_windows::<1>(); + itertools::assert_equal(x,y); + true + } + + fn equal_circular_array_windows_2(a: Vec) -> bool { + let x = (0..a.len()).map(|start_idx| [ + &a[start_idx], + &a[(start_idx + 1) % a.len()], + ]); + let y = a.iter().circular_array_windows::<2>(); + itertools::assert_equal(x,y); + true + } + + fn equal_circular_array_windows_3(a: Vec) -> bool { + let x = (0..a.len()).map(|start_idx| [ + &a[start_idx], + &a[(start_idx + 1) % a.len()], + &a[(start_idx + 2) % a.len()], + ]); + let y = a.iter().circular_array_windows::<3>(); + itertools::assert_equal(x,y); + true + } +} + // with_position quickcheck! { fn with_position_exact_size_1(a: Vec) -> bool {