Skip to content

Commit 8dc5c0b

Browse files
TimelordUKclaude
andcommitted
feat: Add isolated row expansion system with UNNEST implementation
Create a reusable, registry-based row expansion framework for operations that multiply rows. Designed as an isolated module following the same pattern as functions and generators for future extensibility. New Module: src/data/row_expanders/ - RowExpander trait: Generic interface for row multiplication - expand() method takes value + args, returns array of values - Supports any operation that turns 1 row into N rows - RowExpanderRegistry: Registry pattern for expanders - Case-insensitive lookup - Default registration of built-in expanders - UnnestExpander: First implementation - Splits delimited strings into arrays - Handles NULL values (returns single NULL) - Empty delimiter splits into characters - Filters out empty parts Architecture Benefits: - Isolated and reusable for future expansions - Clean separation from evaluator/executor - Future use cases: * EXPLODE(json_array) - Expand JSON arrays * GENERATE_SERIES(start, end) - Range expansion * Custom domain-specific row generators Tests: - test_unnest_basic: Split "A|B|C" with "|" - test_unnest_null: NULL handling - test_registry: Registry lookup All tests passing (403 passed, +3 new) Next: Integrate with query executor for actual row multiplication 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent cba8a51 commit 8dc5c0b

2 files changed

Lines changed: 203 additions & 0 deletions

File tree

src/data/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ pub mod hash_join;
4040
pub mod query_engine;
4141
pub mod query_executor;
4242
pub mod recursive_where_evaluator;
43+
pub mod row_expanders; // Row expansion system (UNNEST, etc.)
4344
pub mod simple_where;
4445
pub mod subquery_executor;
4546
pub mod unit_converter;

src/data/row_expanders/mod.rs

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//! Row Expansion System
2+
//!
3+
//! This module provides a generic framework for functions that multiply rows.
4+
//! Unlike regular functions (value → value) or generators (args → table),
5+
//! row expanders take a single input row and produce multiple output rows.
6+
//!
7+
//! ## Use Cases
8+
//! - UNNEST: Split delimited strings into separate rows
9+
//! - EXPLODE: Expand JSON arrays
10+
//! - GENERATE_SERIES: Create multiple rows from a range
11+
//! - Custom expansions for domain-specific data
12+
13+
use crate::data::datatable::DataValue;
14+
use anyhow::Result;
15+
16+
/// Result of row expansion: a vector of values for this column across N rows
17+
#[derive(Debug, Clone)]
18+
pub struct ExpansionResult {
19+
/// The expanded values (one per output row)
20+
pub values: Vec<DataValue>,
21+
}
22+
23+
impl ExpansionResult {
24+
/// Create a new expansion result
25+
pub fn new(values: Vec<DataValue>) -> Self {
26+
Self { values }
27+
}
28+
29+
/// Number of rows this expansion will create
30+
pub fn row_count(&self) -> usize {
31+
self.values.len()
32+
}
33+
}
34+
35+
/// Trait for row expander implementations
36+
///
37+
/// Row expanders are special expressions that cause a single input row
38+
/// to be multiplied into multiple output rows.
39+
pub trait RowExpander: Send + Sync {
40+
/// Name of the expander (e.g., "UNNEST")
41+
fn name(&self) -> &str;
42+
43+
/// Description of what this expander does
44+
fn description(&self) -> &str;
45+
46+
/// Expand a single value into multiple values
47+
///
48+
/// # Arguments
49+
/// * `value` - The input value to expand (e.g., delimited string)
50+
/// * `args` - Additional arguments (e.g., delimiter)
51+
///
52+
/// # Returns
53+
/// An ExpansionResult containing the array of values for output rows
54+
fn expand(&self, value: &DataValue, args: &[DataValue]) -> Result<ExpansionResult>;
55+
}
56+
57+
/// Registry of available row expanders
58+
pub struct RowExpanderRegistry {
59+
expanders: std::collections::HashMap<String, Box<dyn RowExpander>>,
60+
}
61+
62+
impl RowExpanderRegistry {
63+
/// Create a new registry with default expanders
64+
pub fn new() -> Self {
65+
let mut registry = Self {
66+
expanders: std::collections::HashMap::new(),
67+
};
68+
69+
// Register built-in expanders
70+
registry.register(Box::new(unnest::UnnestExpander));
71+
72+
registry
73+
}
74+
75+
/// Register a new expander
76+
pub fn register(&mut self, expander: Box<dyn RowExpander>) {
77+
self.expanders
78+
.insert(expander.name().to_uppercase(), expander);
79+
}
80+
81+
/// Get an expander by name
82+
pub fn get(&self, name: &str) -> Option<&dyn RowExpander> {
83+
self.expanders.get(&name.to_uppercase()).map(|e| e.as_ref())
84+
}
85+
86+
/// Check if an expander exists
87+
pub fn contains(&self, name: &str) -> bool {
88+
self.expanders.contains_key(&name.to_uppercase())
89+
}
90+
91+
/// List all registered expanders
92+
pub fn list(&self) -> Vec<&str> {
93+
self.expanders.keys().map(|s| s.as_str()).collect()
94+
}
95+
}
96+
97+
impl Default for RowExpanderRegistry {
98+
fn default() -> Self {
99+
Self::new()
100+
}
101+
}
102+
103+
/// UNNEST expander implementation
104+
pub mod unnest {
105+
use super::*;
106+
107+
pub struct UnnestExpander;
108+
109+
impl RowExpander for UnnestExpander {
110+
fn name(&self) -> &str {
111+
"UNNEST"
112+
}
113+
114+
fn description(&self) -> &str {
115+
"Split a delimited string into multiple rows"
116+
}
117+
118+
fn expand(&self, value: &DataValue, args: &[DataValue]) -> Result<ExpansionResult> {
119+
// Get the delimiter (first argument)
120+
let delimiter = match args.first() {
121+
Some(DataValue::String(s)) => s.as_str(),
122+
Some(_) => {
123+
return Err(anyhow::anyhow!(
124+
"UNNEST delimiter must be a string, got {:?}",
125+
args.first()
126+
))
127+
}
128+
None => return Err(anyhow::anyhow!("UNNEST requires a delimiter argument")),
129+
};
130+
131+
// Convert value to string
132+
let text = match value {
133+
DataValue::String(s) => s.clone(),
134+
DataValue::Null => {
135+
// NULL expands to a single NULL
136+
return Ok(ExpansionResult::new(vec![DataValue::Null]));
137+
}
138+
other => other.to_string(),
139+
};
140+
141+
// Split the string
142+
let parts: Vec<DataValue> = if delimiter.is_empty() {
143+
// Empty delimiter: split into characters
144+
text.chars()
145+
.map(|ch| DataValue::String(ch.to_string()))
146+
.collect()
147+
} else {
148+
// Split by delimiter, filter out empty parts
149+
text.split(delimiter)
150+
.filter(|s| !s.is_empty())
151+
.map(|s| DataValue::String(s.to_string()))
152+
.collect()
153+
};
154+
155+
// If no parts, return single NULL to maintain row presence
156+
if parts.is_empty() {
157+
Ok(ExpansionResult::new(vec![DataValue::Null]))
158+
} else {
159+
Ok(ExpansionResult::new(parts))
160+
}
161+
}
162+
}
163+
}
164+
165+
#[cfg(test)]
166+
mod tests {
167+
use super::*;
168+
169+
#[test]
170+
fn test_unnest_basic() {
171+
let expander = unnest::UnnestExpander;
172+
let value = DataValue::String("A|B|C".to_string());
173+
let delimiter = DataValue::String("|".to_string());
174+
175+
let result = expander.expand(&value, &[delimiter]).unwrap();
176+
assert_eq!(result.row_count(), 3);
177+
assert_eq!(result.values[0], DataValue::String("A".to_string()));
178+
assert_eq!(result.values[1], DataValue::String("B".to_string()));
179+
assert_eq!(result.values[2], DataValue::String("C".to_string()));
180+
}
181+
182+
#[test]
183+
fn test_unnest_null() {
184+
let expander = unnest::UnnestExpander;
185+
let value = DataValue::Null;
186+
let delimiter = DataValue::String("|".to_string());
187+
188+
let result = expander.expand(&value, &[delimiter]).unwrap();
189+
assert_eq!(result.row_count(), 1);
190+
assert_eq!(result.values[0], DataValue::Null);
191+
}
192+
193+
#[test]
194+
fn test_registry() {
195+
let registry = RowExpanderRegistry::new();
196+
assert!(registry.contains("UNNEST"));
197+
assert!(registry.contains("unnest"));
198+
199+
let expander = registry.get("UNNEST").unwrap();
200+
assert_eq!(expander.name(), "UNNEST");
201+
}
202+
}

0 commit comments

Comments
 (0)