Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Tests

on:
push:
branches: [ main ]
paths:
- 'src/**'
- 'dash_app/**'
- 'tests/**'
- 'scripts/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/**'
pull_request:
branches: [ main ]
paths:
- 'src/**'
- 'dash_app/**'
- 'tests/**'
- 'scripts/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/**'

env:
UV_SYSTEM_PYTHON: 1

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"

- name: Install dependencies
run: |
uv sync --all-extras

- name: Run tests
run: |
uv run pytest tests/ -v --tb=short

- name: Check code style with ruff
run: |
uv run ruff check .
uv run ruff format --check .

- name: Type check with mypy
run: |
uv run mypy src/ --ignore-missing-imports
continue-on-error: true

- name: Security check with bandit
run: |
uv run bandit -r src/ -f json
continue-on-error: true
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,21 @@ best_params/
catboost_info/
submissions/*.csv
six_stack_personality_predictions*.csv

# MLflow
mlruns/
mlartifacts/
.mlflow/
*.db
# MLflow logs and tracking files
mlflow.db
mlflow-runs.db
mlflow.log
# MLflow artifact storage
artifacts/
# MLflow temporary files
.mlruns/
mlflow_tracking_uri.txt

# Large trained model files (can be regenerated with train_and_save_models.py)
models/stack_*.pkl # Exclude large stack models but keep ensemble model
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repos:
- id: check-toml
- id: check-json
- id: check-added-large-files
args: ["--maxkb=10000"] # 10MB limit
args: ["--maxkb=150000"] # 150MB limit for ML model files
- id: check-case-conflict
- id: check-merge-conflict
- id: debug-statements
Expand Down
244 changes: 27 additions & 217 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,249 +1,59 @@
# Makefile for Six-Stack Personality Classification Pipeline
# Author: AI Assistant
# Date: 2025-07-11
# Date: 2025-07-14

.PHONY: help install install-dev format lint check test clean run-pipeline run-quick setup-env pre-commit-install pre-commit-run pre-commit-all
.PHONY: help install format lint test run train-models dash stop-dash

# Default target
help:
@echo "Six-Stack Personality Classification Pipeline"
@echo "============================================="
@echo ""
@echo "Available targets:"
@echo " install - Install dependencies using uv"
@echo " install-dev - Install development dependencies"
@echo " format - Format code with ruff"
@echo " lint - Lint code with ruff"
@echo " check - Run both linting and formatting checks"
@echo " fix - Auto-fix linting and formatting issues"
@echo " pre-commit-install - Install pre-commit hooks"
@echo " pre-commit-run - Run pre-commit on staged files"
@echo " pre-commit-all - Run pre-commit on all files"
@echo " test - Run tests (if any)"
@echo " clean - Clean cache and temporary files"
@echo " run-pipeline - Run the full modular pipeline"
@echo " run-quick - Run quick pipeline test (limited data)"
@echo " setup-env - Setup complete development environment"
@echo " jupyter - Start Jupyter Lab"
@echo " data-check - Verify data files exist"
@echo " lock - Update uv.lock file"
@echo " sync - Sync dependencies with uv.lock"
@echo " tree - Show dependency tree"
@echo " add - Add new dependency (make add PACKAGE=name)"
@echo " add-dev - Add development dependency (make add-dev PACKAGE=name)"
@echo " remove - Remove dependency (make remove PACKAGE=name)"
@echo " outdated - Check for outdated dependencies"
@echo " install - Install dependencies using uv"
@echo " format - Format code with ruff"
@echo " lint - Lint code with ruff"
@echo " test - Run tests"
@echo " run - Run the modular pipeline"
@echo " train-models - Train and save ML models"
@echo " dash - Run Dash application"
@echo " stop-dash - Stop Dash application"
@echo ""

# Environment setup
setup-env: install-dev pre-commit-install
@echo "🔧 Setting up development environment..."
@echo "✅ Development environment ready!"

# Dependency management
install:
@echo "📦 Installing dependencies with uv..."
uv sync

install-dev: install
@echo "📦 Installing development dependencies..."
uv sync --dev
uv sync --all-extras

# Code quality with Ruff
format:
@echo "🎨 Formatting code with ruff..."
uv run ruff format src/ --diff
uv run ruff format src/
uv run ruff format src/ dash_app/ tests/ scripts/

lint:
@echo "🔍 Linting code with ruff..."
uv run ruff check src/ --output-format=github

check: lint
@echo "✅ Running code quality checks..."
uv run ruff check src/ --output-format=concise
uv run ruff format src/ --check --diff

fix:
@echo "🔧 Auto-fixing code issues..."
uv run ruff check src/ --fix
uv run ruff format src/

# Pre-commit hooks
pre-commit-install:
@echo "🪝 Installing pre-commit hooks..."
uv run pre-commit install
@echo "✅ Pre-commit hooks installed"

pre-commit-run:
@echo "🔍 Running pre-commit on staged files..."
uv run pre-commit run

pre-commit-all:
@echo "🔍 Running pre-commit on all files..."
uv run pre-commit run --all-files
uv run ruff check src/ dash_app/ tests/ scripts/ --output-format=github

# Testing
test:
@echo "🧪 Running tests..."
@if [ -d "tests" ]; then \
uv run pytest tests/ -v; \
else \
echo "⚠️ No tests directory found. Creating basic test structure..."; \
mkdir -p tests; \
echo "# Test files go here" > tests/__init__.py; \
echo "def test_placeholder():\n assert True" > tests/test_placeholder.py; \
fi
uv run pytest tests/ -v

# Pipeline execution
run-pipeline:
@echo "🚀 Running full modular pipeline..."
run:
@echo "🚀 Running modular pipeline..."
uv run python src/main_modular.py

run-quick:
@echo "⚡ Running quick pipeline test..."
@echo "This will run with limited data for testing purposes"
uv run python src/main_modular.py

# Data verification
data-check:
@echo "📊 Checking data files..."
@echo "Verifying required data files exist:"
@test -f data/train.csv && echo "✅ train.csv found" || echo "❌ train.csv missing"
@test -f data/test.csv && echo "✅ test.csv found" || echo "❌ test.csv missing"
@test -f data/sample_submission.csv && echo "✅ sample_submission.csv found" || echo "❌ sample_submission.csv missing"
@test -f data/personality_dataset.csv && echo "✅ personality_dataset.csv found" || echo "❌ personality_dataset.csv missing"

# Development tools
jupyter:
@echo "📓 Starting Jupyter Lab..."
uv run jupyter lab --ip=0.0.0.0 --port=8888 --allow-root

# Dependency management with uv
lock:
@echo "� Updating uv.lock file..."
uv lock
@echo "✅ uv.lock updated"

sync:
@echo "🔄 Syncing dependencies with uv.lock..."
uv sync
@echo "✅ Dependencies synchronized"

tree:
@echo "🌳 Showing dependency tree..."
uv tree

add:
@echo "📦 Adding new dependency..."
@echo "Usage: make add PACKAGE=package-name"
@if [ -z "$(PACKAGE)" ]; then \
echo "❌ Please specify PACKAGE=package-name"; \
exit 1; \
fi
uv add $(PACKAGE)

add-dev:
@echo "📦 Adding new development dependency..."
@echo "Usage: make add-dev PACKAGE=package-name"
@if [ -z "$(PACKAGE)" ]; then \
echo "❌ Please specify PACKAGE=package-name"; \
exit 1; \
fi
uv add --dev $(PACKAGE)

remove:
@echo "🗑️ Removing dependency..."
@echo "Usage: make remove PACKAGE=package-name"
@if [ -z "$(PACKAGE)" ]; then \
echo "❌ Please specify PACKAGE=package-name"; \
exit 1; \
fi
uv remove $(PACKAGE)

outdated:
@echo "📊 Checking for outdated dependencies..."
uv tree --outdated

# Cleaning
clean:
@echo "🧹 Cleaning up..."
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
find . -type f -name "*.pyc" -delete 2>/dev/null || true
find . -type f -name "*.pyo" -delete 2>/dev/null || true
find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
find . -type d -name ".ruff_cache" -exec rm -rf {} + 2>/dev/null || true
rm -rf .coverage htmlcov/ 2>/dev/null || true
@echo "✅ Cleanup complete"

# Advanced targets
validate-config:
@echo "🔧 Validating configuration..."
uv run python -c "from src.modules.config import *; print('✅ Configuration valid')"

profile-pipeline:
@echo "📊 Profiling pipeline performance..."
uv run python -m cProfile -o profile_output.prof src/main_modular.py
@echo "Profile saved to profile_output.prof"

benchmark:
@echo "⚡ Running performance benchmark..."
@echo "This will measure pipeline execution time"
time make run-quick

# Git hooks setup
setup-hooks:
@echo "🪝 Setting up git pre-commit hooks..."
@if command -v pre-commit >/dev/null 2>&1; then \
pre-commit install; \
echo "✅ Pre-commit hooks installed"; \
else \
echo "⚠️ pre-commit not found. Install with: pip install pre-commit"; \
fi

# Docker support (if needed)
docker-build:
@echo "🐳 Building Docker image..."
docker build -t personality-classifier .

docker-run:
@echo "🐳 Running in Docker container..."
docker run -v $(PWD)/data:/app/data -v $(PWD)/submissions:/app/submissions personality-classifier

# Documentation
docs:
@echo "📚 Generating documentation..."
@if command -v sphinx-build >/dev/null 2>&1; then \
sphinx-build -b html docs/ docs/_build/html; \
echo "✅ Documentation generated in docs/_build/html"; \
else \
echo "⚠️ Sphinx not found. Install with: uv add --dev sphinx"; \
fi

# CI/CD simulation
ci: check test data-check
@echo "🤖 CI pipeline simulation complete"
@echo "✅ All checks passed!"

# Show project status
status:
@echo "📊 Project Status"
@echo "=================="
@echo "Python version: $(shell python --version 2>&1)"
@echo "UV version: $(shell uv --version 2>&1)"
@echo "Project root: $(PWD)"
@echo ""
@echo "Directory structure:"
@find . -maxdepth 2 -type d | grep -E "(src|data|best_params|submissions)" | sort
@echo ""
@echo "Recent submissions:"
@ls -la submissions/ 2>/dev/null | head -5 || echo "No submissions found"
# Model training
train-models:
@echo "🤖 Training and saving ML models..."
uv run python scripts/train_and_save_models.py

# Quick development workflow
dev: install-dev format lint test
@echo "🚀 Development workflow complete!"
# Dash application
dash:
@echo "📊 Starting Dash application..."
uv run python dash_app/main.py --model-name ensemble

# Production workflow
prod: install check test run-pipeline
@echo "🎯 Production workflow complete!"
stop-dash:
@echo "🛑 Stopping Dash application..."
@lsof -ti:8050 | xargs kill -9 2>/dev/null || echo "No process found on port 8050"
Loading