-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
119 lines (96 loc) · 4.53 KB
/
Makefile
File metadata and controls
119 lines (96 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
.PHONY: help up down build logs test lint benchmark clean quickstart demo-data
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | \
awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
# ── Quick Start ──
quickstart: ## One-command setup: env → build → start → load demo data
@echo "══════════════════════════════════════════════════════"
@echo " ETL Microservices — Quick Start"
@echo "══════════════════════════════════════════════════════"
@test -f .env || (cp .env.example .env && echo "✓ Created .env from .env.example")
@test -f .env && echo "✓ .env already exists"
@mkdir -p hf_models && echo "✓ hf_models/ directory ready (HuggingFace model cache)"
docker compose build
docker compose up -d
@echo "⏳ Waiting for services to be healthy..."
@sleep 10
@echo "📦 Loading demo datasets into shared volume..."
@$(MAKE) demo-data
@echo ""
@echo "══════════════════════════════════════════════════════"
@echo " ✓ All services running!"
@echo ""
@echo " Streamlit UI: http://localhost:8501"
@echo " Airflow: http://localhost:8080"
@echo " Prometheus: http://localhost:9090"
@echo " Grafana: http://localhost:3000"
@echo ""
@echo " Demo datasets loaded at /app/data/ in containers."
@echo " Try the 'ecommerce_pipeline' or"
@echo " 'hr_analytics_pipeline' DAG in Airflow!"
@echo "══════════════════════════════════════════════════════"
demo-data: ## Copy demo datasets into the shared Docker volume
@mkdir -p data/demo
@echo "Copying demo datasets to running containers..."
@for svc in extract-csv-service clean-nan-service delete-columns-service \
data-quality-service outlier-detection-service load-data-service; do \
if docker ps --format '{{.Names}}' | grep -q "^$$svc$$"; then \
docker cp data/demo/hr_sample.csv $$svc:/app/data/hr_demo/data.csv && \
docker cp data/demo/ecommerce_orders.csv $$svc:/app/data/ecommerce_demo/data.csv && \
echo " ✓ $$svc"; \
else \
echo " ✗ $$svc is not running — start services first with 'make up'"; \
exit 1; \
fi; \
done
@echo "✓ Demo data loaded: hr_demo/data.csv, ecommerce_demo/data.csv"
# ── Docker ──
up: ## Start all services (detached)
docker compose up -d
down: ## Stop all services
docker compose down
build: ## Build all Docker images
docker compose build
rebuild: ## Rebuild all images (no cache)
docker compose build --no-cache
logs: ## Tail logs for all services
docker compose logs -f
logs-service: ## Tail logs for a specific service (usage: make logs-service SVC=clean-nan-service)
docker compose logs -f $(SVC)
ps: ## Show running containers
docker compose ps
# ── Testing ──
test: ## Run all tests
python -m pytest tests/ -v
test-unit: ## Run unit tests only
python -m pytest tests/unit/ -v
test-integration: ## Run integration tests only
python -m pytest tests/integration/ -v
test-coverage: ## Run tests with coverage report
python -m pytest tests/ --cov=services --cov-report=html --cov-report=term
# ── Code Quality ──
lint: ## Run ruff linter
python -m ruff check .
lint-fix: ## Auto-fix lint issues
python -m ruff check . --fix
format: ## Format code with ruff
python -m ruff format .
# ── Benchmark ──
benchmark-data: ## Generate benchmark datasets (all scales)
python benchmark/generate_hr_dataset.py --all-scales
benchmark-mono: ## Run monolith benchmark
python benchmark/run_benchmark.py --mode monolith --plot
benchmark-micro: ## Run microservices benchmark (services must be running)
python benchmark/run_benchmark.py --mode microservices --plot
benchmark-all: ## Run full comparison benchmark
python benchmark/run_benchmark.py --mode both --plot
# ── Streamlit ──
streamlit: ## Run Streamlit app locally
streamlit run streamlit_app/app.py
# ── Cleanup ──
clean: ## Remove generated files
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
find . -type f -name "*.pyc" -delete 2>/dev/null || true
rm -rf benchmark/results/*.png benchmark/results/*.html benchmark/results/*.json
rm -rf benchmark/data/*.csv
rm -rf htmlcov/ .coverage .pytest_cache/