-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhr_analytics.yaml
More file actions
56 lines (50 loc) · 1.33 KB
/
hr_analytics.yaml
File metadata and controls
56 lines (50 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# HR Analytics Pipeline
# Load in Streamlit UI → YAML Editor, or trigger via AI Agent
#
# Uses the bundled demo dataset (hr_sample.csv).
# Run `make demo-data` first to load demo data into containers.
pipeline:
name: hr_analytics
description: >
HR People Analytics pipeline — extract the demo HR dataset,
run quality checks, remove low-variance columns, detect salary
outliers, clean missing values, and save the result.
steps:
- id: extract
service: extract_csv
params:
file_path: /app/data/hr_demo/data.csv
- id: quality
service: data_quality
params:
rules:
min_rows: 10
check_null_ratio: true
threshold_null_ratio: 0.5
check_duplicates: true
check_completeness: true
depends_on: [extract]
- id: drop_cols
service: delete_columns
params:
columns:
- EmployeeCount
- Over18
- StandardHours
depends_on: [quality]
- id: outliers
service: outlier_detection
params:
column: MonthlyIncome
z_threshold: 3.0
depends_on: [drop_cols]
- id: clean
service: clean_nan
params:
strategy: drop
depends_on: [outliers]
- id: save
service: load_data
params:
format: csv
depends_on: [clean]