-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidate_examples.py
More file actions
150 lines (118 loc) · 5.54 KB
/
validate_examples.py
File metadata and controls
150 lines (118 loc) · 5.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
Validation script for example outputs.
This script validates that the example scripts produce expected results.
"""
import pandas as pd
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))
from batchguard import BatchGuard, min_max, total_range
def validate_synthetic_test_data():
"""Validate synthetic_test_data.py example."""
print("Validating synthetic_test_data.py example...")
transactions = pd.DataFrame({
'account_id': ['A001', 'A001', 'A001', 'A002', 'A002', 'A002', 'A003', 'A003'],
'transaction_type': ['withdrawal', 'withdrawal', 'withdrawal',
'deposit', 'deposit', 'deposit', 'withdrawal', 'withdrawal'],
'amount': [5000, 6000, 7000, # Account A001: only large withdrawals
100, 150, 200, # Account A002: normal deposits
8000, 9000], # Account A003: only large withdrawals
})
guard = BatchGuard(transactions)
guard.add_constraint(
min_max('account_id', 'amount', min_val=50, max_val=2000)
)
fixed_transactions, report = guard.enforce()
# Validate results
assert all(50 <= val <= 2000 for val in fixed_transactions['amount'].values), \
"All amounts should be between 50 and 2000"
# Check that violations were resolved
assert report.summary['satisfied'] == 1, "Constraint should be satisfied"
assert report.summary['rows_modified'] == 5, "5 rows should be modified"
assert report.summary['values_changed'] == 5, "5 values should be changed"
print("✓ synthetic_test_data.py validation passed")
return True
def validate_scenario_simulation():
"""Validate scenario_simulation.py example."""
print("Validating scenario_simulation.py example...")
scenarios = pd.DataFrame({
'scenario_id': ['S1', 'S1', 'S1', 'S2', 'S2', 'S2', 'S3', 'S3'],
'category': ['revenue', 'costs', 'profit', 'revenue', 'costs', 'profit',
'revenue', 'costs'],
'value': [500000, -300000, 200000, # Scenario S1: total = 400K (OK)
600000, -400000, 200000, # Scenario S2: total = 400K (OK)
800000, -500000], # Scenario S3: total = 300K (below min)
})
guard = BatchGuard(scenarios)
guard.add_constraint(
total_range('scenario_id', 'value', min_total=400000, max_total=1000000)
)
fixed_scenarios, report = guard.enforce()
# Validate results
totals = fixed_scenarios.groupby('scenario_id')['value'].sum()
assert all(400000 <= total <= 1000000 for total in totals.values), \
"All scenario totals should be between 400K and 1M"
# Check that violations were resolved
assert report.summary['satisfied'] == 1, "Constraint should be satisfied"
assert report.summary['rows_modified'] == 2, "2 rows should be modified"
assert report.summary['values_changed'] == 4, "4 values should be changed"
print("✓ scenario_simulation.py validation passed")
return True
def validate_demo_datasets():
"""Validate demo_datasets.py example."""
print("Validating demo_datasets.py example...")
products = pd.DataFrame({
'category': ['Electronics', 'Electronics', 'Electronics', 'Electronics',
'Clothing', 'Clothing', 'Clothing',
'Books', 'Books'],
'product_name': ['Laptop', 'Phone', 'Tablet', 'Headphones',
'Shirt', 'Pants', 'Shoes',
'Novel', 'Textbook'],
'price': [1200, 800, 600, 150, # Electronics: wide range (OK)
20, 30, 40, # Clothing: all very cheap (unrealistic)
15, 200], # Books: one very expensive (unrealistic)
'stock': [10, 50, 30, 100,
200, 150, 80,
500, 5],
})
guard = BatchGuard(products)
guard.add_constraint(
min_max('category', 'price', min_val=25, max_val=1500)
)
guard.add_constraint(
total_range('category', 'stock', min_total=50, max_total=1000)
)
fixed_products, report = guard.enforce()
# Validate price constraints
category_prices = fixed_products.groupby('category')['price']
for category, prices in category_prices:
assert all(25 <= price <= 1500 for price in prices.values), \
f"All prices in {category} should be between 25 and 1500"
# Validate stock constraints
category_stocks = fixed_products.groupby('category')['stock'].sum()
assert all(50 <= stock <= 1000 for stock in category_stocks.values), \
"All category stock totals should be between 50 and 1000"
# Check that constraints were satisfied
assert report.summary['satisfied'] == 2, "Both constraints should be satisfied"
assert report.summary['rows_modified'] == 2, "2 rows should be modified"
assert report.summary['values_changed'] == 2, "2 values should be changed"
print("✓ demo_datasets.py validation passed")
return True
if __name__ == '__main__':
print("=" * 60)
print("Validating Example Scripts")
print("=" * 60)
try:
validate_synthetic_test_data()
validate_scenario_simulation()
validate_demo_datasets()
print("\n" + "=" * 60)
print("All validations passed! ✓")
print("=" * 60)
sys.exit(0)
except Exception as e:
print(f"\n✗ Validation failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)