diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4153c2a..923b3e6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,6 +5,9 @@ on:
branches: [main]
pull_request:
+permissions:
+ contents: read
+
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
diff --git a/.github/workflows/integration-nightly.yml b/.github/workflows/integration-nightly.yml
index de448dd..a5ce34d 100644
--- a/.github/workflows/integration-nightly.yml
+++ b/.github/workflows/integration-nightly.yml
@@ -9,6 +9,9 @@ on:
description: "Specific model ID to test (blank = all non-gated models that fit in memory)"
required: false
+permissions:
+ contents: read
+
concurrency:
group: nightly-integration
cancel-in-progress: true
diff --git a/.github/workflows/integration-prerelease.yml b/.github/workflows/integration-prerelease.yml
index e5b73c2..eb4e079 100644
--- a/.github/workflows/integration-prerelease.yml
+++ b/.github/workflows/integration-prerelease.yml
@@ -5,6 +5,9 @@ on:
types: [created]
workflow_dispatch:
+permissions:
+ contents: read
+
concurrency:
group: prerelease-integration
cancel-in-progress: true
diff --git a/README.md b/README.md
index b28c7a6..bcd5a72 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,7 @@ Most local LLM tools serve **one model at a time** and leave you to figure out w
```bash
uv tool install mlx-stack
-mlx-stack init --accept-defaults # detects hardware, picks models, generates configs
-mlx-stack up # 3 model servers + API gateway, one command
+mlx-stack setup # detects hardware, picks models, pulls, starts — one command
# → OpenAI-compatible API at http://localhost:4000/v1
```
@@ -141,6 +140,33 @@ uvx mlx-stack profile
## Quick Start
+The fastest way to get running is the interactive setup command:
+
+```bash
+mlx-stack setup
+```
+
+This walks you through hardware detection, model selection, downloading, and starting all services in one guided flow. For CI or scripting, pass `--accept-defaults` to skip all prompts:
+
+```bash
+mlx-stack setup --accept-defaults
+```
+
+The OpenAI-compatible API is now available at `http://localhost:4000/v1`.
+
+```bash
+# Check service health
+mlx-stack status
+
+# Stop everything when done
+mlx-stack down
+```
+
+
+Manual step-by-step setup
+
+If you prefer full control over each step:
+
```bash
# 1. Detect your hardware
mlx-stack profile
@@ -158,17 +184,20 @@ mlx-stack up
mlx-stack status
```
-The OpenAI-compatible API is now available at `http://localhost:4000/v1`.
-
-```bash
-# Stop everything when done
-mlx-stack down
-```
+
## CLI Reference
### Setup & Configuration
+**`mlx-stack setup`** — Interactive guided setup: detects hardware, selects models, pulls weights, and starts the stack in one command.
+
+| Option | Description |
+|--------|-------------|
+| `--accept-defaults` | Skip all prompts and use recommended defaults |
+| `--intent ` | Use case intent (prompted if not provided) |
+| `--budget-pct <10-90>` | Memory budget as percentage of unified memory (default: 40) |
+
| Command | Description |
|---------|-------------|
| `mlx-stack profile` | Detect Apple Silicon hardware and save profile to `~/.mlx-stack/profile.json` |
@@ -294,7 +323,7 @@ mlx-stack is designed to run unattended on always-on hardware like a Mac Mini.
### Quick setup
```bash
-mlx-stack init --accept-defaults
+mlx-stack setup --accept-defaults
mlx-stack install
```
@@ -407,14 +436,12 @@ See [DEVELOPING.md](DEVELOPING.md) for the full developer guide, including proje
# Install dev dependencies
uv sync
-# Run tests
-uv run pytest
-
-# Type checking
-uv run python -m pyright
+# Run all checks (lint + typecheck + tests) — same as CI
+make check
-# Linting
-uv run ruff check src/ tests/
+# Or individually
+make lint # ruff + pyright
+make test # pytest with coverage
```
## Contributing
diff --git a/tests/unit/test_ops_cross_area.py b/tests/unit/test_ops_cross_area.py
index 2593c62..3d0e03b 100644
--- a/tests/unit/test_ops_cross_area.py
+++ b/tests/unit/test_ops_cross_area.py
@@ -410,6 +410,15 @@ def follow_thread() -> None:
output_callback=lambda text: captured.append(text),
)
+ def wait_for_content(marker: str, timeout: float = 5.0) -> bool:
+ """Wait until marker appears in captured output."""
+ end = time.monotonic() + timeout
+ while time.monotonic() < end:
+ if any(marker in c for c in captured):
+ return True
+ time.sleep(0.05)
+ return False
+
thread = threading.Thread(target=follow_thread, daemon=True)
thread.start()
@@ -423,7 +432,9 @@ def follow_thread() -> None:
with open(log, "a") as f:
f.write("after-truncation\n")
- time.sleep(1.0)
+ assert wait_for_content("after-truncation"), (
+ f"Timed out waiting for 'after-truncation' in captured output: {captured}"
+ )
import ctypes
@@ -435,9 +446,6 @@ def follow_thread() -> None:
)
thread.join(timeout=3)
- combined = "\n".join(captured)
- assert "after-truncation" in combined
-
def test_follow_continues_after_multiple_rotations(
self, mlx_stack_home: Path, logs_dir: Path
) -> None: