diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4153c2a..923b3e6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,9 @@ on: branches: [main] pull_request: +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} cancel-in-progress: true diff --git a/.github/workflows/integration-nightly.yml b/.github/workflows/integration-nightly.yml index de448dd..a5ce34d 100644 --- a/.github/workflows/integration-nightly.yml +++ b/.github/workflows/integration-nightly.yml @@ -9,6 +9,9 @@ on: description: "Specific model ID to test (blank = all non-gated models that fit in memory)" required: false +permissions: + contents: read + concurrency: group: nightly-integration cancel-in-progress: true diff --git a/.github/workflows/integration-prerelease.yml b/.github/workflows/integration-prerelease.yml index e5b73c2..eb4e079 100644 --- a/.github/workflows/integration-prerelease.yml +++ b/.github/workflows/integration-prerelease.yml @@ -5,6 +5,9 @@ on: types: [created] workflow_dispatch: +permissions: + contents: read + concurrency: group: prerelease-integration cancel-in-progress: true diff --git a/README.md b/README.md index b28c7a6..bcd5a72 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,7 @@ Most local LLM tools serve **one model at a time** and leave you to figure out w ```bash uv tool install mlx-stack -mlx-stack init --accept-defaults # detects hardware, picks models, generates configs -mlx-stack up # 3 model servers + API gateway, one command +mlx-stack setup # detects hardware, picks models, pulls, starts — one command # → OpenAI-compatible API at http://localhost:4000/v1 ``` @@ -141,6 +140,33 @@ uvx mlx-stack profile ## Quick Start +The fastest way to get running is the interactive setup command: + +```bash +mlx-stack setup +``` + +This walks you through hardware detection, model selection, downloading, and starting all services in one guided flow. For CI or scripting, pass `--accept-defaults` to skip all prompts: + +```bash +mlx-stack setup --accept-defaults +``` + +The OpenAI-compatible API is now available at `http://localhost:4000/v1`. + +```bash +# Check service health +mlx-stack status + +# Stop everything when done +mlx-stack down +``` + +
+Manual step-by-step setup + +If you prefer full control over each step: + ```bash # 1. Detect your hardware mlx-stack profile @@ -158,17 +184,20 @@ mlx-stack up mlx-stack status ``` -The OpenAI-compatible API is now available at `http://localhost:4000/v1`. - -```bash -# Stop everything when done -mlx-stack down -``` +
## CLI Reference ### Setup & Configuration +**`mlx-stack setup`** — Interactive guided setup: detects hardware, selects models, pulls weights, and starts the stack in one command. + +| Option | Description | +|--------|-------------| +| `--accept-defaults` | Skip all prompts and use recommended defaults | +| `--intent ` | Use case intent (prompted if not provided) | +| `--budget-pct <10-90>` | Memory budget as percentage of unified memory (default: 40) | + | Command | Description | |---------|-------------| | `mlx-stack profile` | Detect Apple Silicon hardware and save profile to `~/.mlx-stack/profile.json` | @@ -294,7 +323,7 @@ mlx-stack is designed to run unattended on always-on hardware like a Mac Mini. ### Quick setup ```bash -mlx-stack init --accept-defaults +mlx-stack setup --accept-defaults mlx-stack install ``` @@ -407,14 +436,12 @@ See [DEVELOPING.md](DEVELOPING.md) for the full developer guide, including proje # Install dev dependencies uv sync -# Run tests -uv run pytest - -# Type checking -uv run python -m pyright +# Run all checks (lint + typecheck + tests) — same as CI +make check -# Linting -uv run ruff check src/ tests/ +# Or individually +make lint # ruff + pyright +make test # pytest with coverage ``` ## Contributing diff --git a/tests/unit/test_ops_cross_area.py b/tests/unit/test_ops_cross_area.py index 2593c62..3d0e03b 100644 --- a/tests/unit/test_ops_cross_area.py +++ b/tests/unit/test_ops_cross_area.py @@ -410,6 +410,15 @@ def follow_thread() -> None: output_callback=lambda text: captured.append(text), ) + def wait_for_content(marker: str, timeout: float = 5.0) -> bool: + """Wait until marker appears in captured output.""" + end = time.monotonic() + timeout + while time.monotonic() < end: + if any(marker in c for c in captured): + return True + time.sleep(0.05) + return False + thread = threading.Thread(target=follow_thread, daemon=True) thread.start() @@ -423,7 +432,9 @@ def follow_thread() -> None: with open(log, "a") as f: f.write("after-truncation\n") - time.sleep(1.0) + assert wait_for_content("after-truncation"), ( + f"Timed out waiting for 'after-truncation' in captured output: {captured}" + ) import ctypes @@ -435,9 +446,6 @@ def follow_thread() -> None: ) thread.join(timeout=3) - combined = "\n".join(captured) - assert "after-truncation" in combined - def test_follow_continues_after_multiple_rotations( self, mlx_stack_home: Path, logs_dir: Path ) -> None: