diff --git a/README.md b/README.md
index 2f819f8..9d0c1ad 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,12 @@ http://127.0.0.1:8765/
   pip install torch
   pip install keep-gpu
   ```
+- **Mac M series (M1/M2/M3/M4)**
+  ```bash
+  pip install torch
+  pip install keep-gpu[macm]
+  ```
+  Uses Metal Performance Shaders (MPS) backend on Apple Silicon.
 
 Flags that matter:
 
@@ -132,6 +138,9 @@ with GlobalGPUController(gpu_ids=[0, 1], vram_to_keep="750MB", interval=90, busy
   ```
 - Methods: `start_keep`, `stop_keep` (optional `job_id`, default stops all), `status` (optional `job_id`), `list_gpus` (basic info).
 - Dashboard: `http://127.0.0.1:8765/`
+- **Mac M series limitations:**
+  - GPU utilization monitoring is not available on macOS.
+  - `busy_threshold` parameter is accepted for API compatibility but has no effect.
 - Minimal client config (stdio MCP):
   ```yaml
   servers:
diff --git a/docs/concepts/architecture.md b/docs/concepts/architecture.md
index 3576ea7..7567797 100644
--- a/docs/concepts/architecture.md
+++ b/docs/concepts/architecture.md
@@ -7,12 +7,13 @@ schedulers that the GPU is still busy, without burning a full training workload.
 ## Components
 
 1. **CLI (Typer/Rich)** – Parses options, validates GPU IDs, and configures the logger.
-2. **`GlobalGPUController`** – Detects the current platform (CUDA today) and
-   instantiates one single-GPU controller per selected device.
-3. **`CudaGPUController`** – Owns the background thread, VRAM allocation, and small
-   matmul loops that tick every `interval` seconds.
+2. **`GlobalGPUController`** – Detects the current platform (CUDA, ROCm,
+   or Mac M series) and instantiates one single-GPU controller per selected device.
+3. **`CudaGPUController`** / **`RocmGPUController`** / **`MacMGPUController`** –
+   Platform-specific implementations for per-GPU keep-alive loops.
 4. **GPU monitor (NVML/ROCm)** – Wraps `nvidia-ml-py` (the `pynvml` module) for CUDA
    telemetry and optionally `rocm-smi` when installed by way of the `rocm` extra.
+   Mac M series does not support direct GPU utilization monitoring.
 5. **Utilities** – `parse_size` turns strings like `1GiB` into bytes, while
    `setup_logger` wires both console and file logging with optional colors.
 
@@ -53,6 +54,5 @@ Matrix multiplies:
 
 ## Platform detection
 
-`get_platform()` inspects the system and currently only enables the CUDA path.
-If you plan to contribute ROCm or CPU fallbacks, use this hook to branch into
-new controller implementations without changing the CLI.
+`get_platform()` inspects the system and enables the CUDA, ROCm, or Mac M series
+(MPS) path. Detection order: CUDA → ROCm → Mac M → CPU fallback.
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 78af504..08ede36 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -11,8 +11,9 @@ understand the minimum knobs you need to keep a GPU occupied.
 
 !!! info "Platforms"
     CUDA is the primary path; ROCm is supported by way of the `rocm` extra
-    (requires a ROCm-enabled PyTorch build). CPU-only environments can import
-    the package but controllers will not start.
+    (requires a ROCm-enabled PyTorch build); Mac M series (M1/M2/M3/M4) is
+    supported by way of the `macm` extra using Metal Performance Shaders (MPS).
+    CPU-only environments can import the package but controllers will not start.
 
 ## Install
 
@@ -47,6 +48,15 @@ understand the minimum knobs you need to keep a GPU occupied.
     pip install -e .[dev]
     ```
 
+=== "Mac M series (M1/M2/M3/M4)"
+    ```bash
+    pip install torch
+    pip install keep-gpu[macm]
+    ```
+    MPS (Metal Performance Shaders) backend will be used automatically on
+    Apple Silicon Macs. Note: GPU utilization monitoring is not available
+    on macOS.
+
 ## Pick your interface
 
 - **CLI** – fastest way to reserve GPUs from a shell; see [CLI Playbook](guides/cli.md).
@@ -60,6 +70,13 @@ understand the minimum knobs you need to keep a GPU occupied.
    python -c "import torch; print(torch.cuda.device_count())"
    ```
    A non-zero integer indicates CUDA is available.
+
+   On Mac M series:
+   ```bash
+   python -c "import torch; print(torch.backends.mps.is_available())"
+   ```
+   Should print `True` on Apple Silicon Macs.
+
 2. Run the CLI in dry form (press `Ctrl+C` after a few seconds):
    ```bash
    keep-gpu --interval 30 --vram 512MB
diff --git a/pyproject.toml b/pyproject.toml
index 3bd76d0..56886fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,6 +49,9 @@ dev = [
 rocm = [
   "rocm-smi",
 ]
+macm = [
+  "psutil",  # For system memory query on Apple Silicon
+]
 
 [project.urls]
 bugs = "https://github.com/Wangmerlyn/KeepGPU/issues"
@@ -144,5 +147,6 @@ force-single-line = false
 [tool.pytest.ini_options]
 markers = [
   "rocm: tests that require ROCm stack",
+  "macm: tests that require Apple Silicon with MPS",
   "large_memory: tests that use large VRAM",
 ]
diff --git a/skills/gpu-keepalive-with-keepgpu/SKILL.md b/skills/gpu-keepalive-with-keepgpu/SKILL.md
index a7a8e6c..12dbb0f 100644
--- a/skills/gpu-keepalive-with-keepgpu/SKILL.md
+++ b/skills/gpu-keepalive-with-keepgpu/SKILL.md
@@ -73,6 +73,23 @@ For ROCm users from local checkout:
 pip install -e ".[rocm]"
 ```
 
+### Option D: Mac M series install
+
+```bash
+# Install PyTorch with MPS support
+pip install torch
+
+# Install KeepGPU with Mac M extras
+pip install "keep_gpu[macm] @ git+https://github.com/Wangmerlyn/KeepGPU.git"
+```
+
+**Note for Mac M users:**
+- Uses Metal Performance Shaders (MPS) backend automatically on Apple Silicon
+- GPU utilization monitoring is **not available** on macOS (the system doesn't provide this API)
+- The `--busy-threshold` parameter is accepted for API compatibility but has no effect
+- Only device 0 is supported (MPS limitation)
+- Memory allocation uses the unified memory architecture (shared with system RAM)
+
 Verify installation:
 
 ```bash
@@ -179,6 +196,8 @@ echo $! > keepgpu.pid
 - Allocation failure / OOM: reduce `--vram` or free memory first.
 - No utilization telemetry: ensure `nvidia-ml-py` works and `nvidia-smi` is available.
 - No GPUs detected: verify drivers, CUDA/ROCm runtime, and `torch.cuda.device_count()`.
+- Mac M series issues: ensure you're on macOS 12.3+ and PyTorch is built with MPS support (`torch.backends.mps.is_available()` should return True)
+- Memory allocation failures on Mac: the unified memory architecture may show different behavior than discrete GPUs; try reducing `--vram` values
 
 ## Example
 
diff --git a/src/keep_gpu/global_gpu_controller/global_gpu_controller.py b/src/keep_gpu/global_gpu_controller/global_gpu_controller.py
index 38d65bc..d9e5f29 100644
--- a/src/keep_gpu/global_gpu_controller/global_gpu_controller.py
+++ b/src/keep_gpu/global_gpu_controller/global_gpu_controller.py
@@ -37,12 +37,27 @@ def __init__(
             )
 
             controller_cls = RocmGPUController
+        elif self.computing_platform == ComputingPlatform.MACM:
+            from keep_gpu.single_gpu_controller.macm_gpu_controller import (
+                MacMGPUController,
+            )
+
+            controller_cls = MacMGPUController
         else:
             raise NotImplementedError(
                 f"GlobalGPUController not implemented for platform {self.computing_platform}"
             )
 
-        if gpu_ids is None:
+        if self.computing_platform == ComputingPlatform.MACM:
+            if gpu_ids is None:
+                self.gpu_ids = [0]
+            elif gpu_ids == [0]:
+                self.gpu_ids = gpu_ids
+            else:
+                raise ValueError(
+                    f"MACM platform only supports gpu_ids=[0] or None, got {gpu_ids}"
+                )
+        elif gpu_ids is None:
             self.gpu_ids = list(range(torch.cuda.device_count()))
         else:
             self.gpu_ids = gpu_ids
diff --git a/src/keep_gpu/single_gpu_controller/__init__.py b/src/keep_gpu/single_gpu_controller/__init__.py
index e69de29..df09e0a 100644
--- a/src/keep_gpu/single_gpu_controller/__init__.py
+++ b/src/keep_gpu/single_gpu_controller/__init__.py
@@ -0,0 +1,3 @@
+from keep_gpu.single_gpu_controller.macm_gpu_controller import MacMGPUController
+
+__all__ = ["MacMGPUController"]
diff --git a/src/keep_gpu/single_gpu_controller/macm_gpu_controller.py b/src/keep_gpu/single_gpu_controller/macm_gpu_controller.py
new file mode 100644
index 0000000..91f7806
--- /dev/null
+++ b/src/keep_gpu/single_gpu_controller/macm_gpu_controller.py
@@ -0,0 +1,165 @@
+import gc
+import threading
+import time
+from typing import Optional
+
+import torch
+
+from keep_gpu.single_gpu_controller.base_gpu_controller import BaseGPUController
+from keep_gpu.utilities.logger import setup_logger
+from keep_gpu.utilities.platform_manager import ComputingPlatform
+
+logger = setup_logger(__name__)
+
+
+class MacMGPUController(BaseGPUController):
+    def __init__(
+        self,
+        *,
+        rank: int = 0,
+        interval: float = 1.0,
+        vram_to_keep: str | int = "1000 MB",
+        busy_threshold: int = 10,
+        iterations: int = 5000,
+    ):
+        super().__init__(vram_to_keep=vram_to_keep, interval=interval)
+        if rank != 0:
+            raise ValueError("MPS only supports device 0; rank must be 0")
+        if iterations <= 0:
+            raise ValueError("iterations must be positive")
+        if not torch.backends.mps.is_available():
+            raise RuntimeError("PyTorch MPS backend is not available")
+
+        self.rank = rank
+        self.device = torch.device("mps")
+        self.busy_threshold = busy_threshold
+        self.iterations = iterations
+        self.platform = ComputingPlatform.MACM
+
+        self._stop_evt: Optional[threading.Event] = None
+        self._thread: Optional[threading.Thread] = None
+        self._num_elements: Optional[int] = None
+
+        logger.debug(
+            "rank %s: busy_threshold=%s ignored on macOS MPS (API compatibility)",
+            self.rank,
+            self.busy_threshold,
+        )
+
+    def keep(self) -> None:
+        if self._thread and self._thread.is_alive():
+            logger.warning("rank %s: keep thread already running", self.rank)
+            return
+
+        self._num_elements = int(self.vram_to_keep)
+        if self._num_elements <= 0:
+            raise ValueError("vram_to_keep must be positive")
+
+        self._stop_evt = threading.Event()
+        self._thread = threading.Thread(
+            target=self._keep_loop,
+            name=f"gpu-keeper-macm-{self.rank}",
+            daemon=True,
+        )
+        self._thread.start()
+        logger.info("rank %s: MPS keep thread started", self.rank)
+
+    def release(self) -> None:
+        if not (self._thread and self._thread.is_alive()):
+            logger.warning("rank %s: keep thread not running", self.rank)
+            return
+
+        stop_evt = self._stop_evt
+        if stop_evt is None:
+            logger.warning("rank %s: stop event missing; skipping release", self.rank)
+            return
+
+        stop_evt.set()
+        join_timeout = max(2.0, min(float(self.interval) + 2.0, 30.0))
+        self._thread.join(timeout=join_timeout)
+        if self._thread.is_alive():
+            logger.warning(
+                "rank %s: MPS keep thread did not stop within %.1fs",
+                self.rank,
+                join_timeout,
+            )
+            return
+
+        torch.mps.empty_cache()
+        gc.collect()
+        logger.info("rank %s: keep thread stopped & cache cleared", self.rank)
+
+    def __enter__(self):
+        self.keep()
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        self.release()
+
+    def _keep_loop(self) -> None:
+        stop_evt = self._stop_evt
+        if stop_evt is None:
+            logger.error("rank %s: stop event not initialized", self.rank)
+            return
+
+        num_elements = self._num_elements if self._num_elements is not None else 0
+        if num_elements <= 0:
+            logger.error(
+                "rank %s: invalid vram_to_keep=%s", self.rank, self.vram_to_keep
+            )
+            return
+
+        tensor = None
+        while not stop_evt.is_set():
+            try:
+                tensor = torch.rand(
+                    num_elements,
+                    device=self.device,
+                    dtype=torch.float32,
+                    requires_grad=False,
+                )
+                break
+            except RuntimeError as exc:
+                logger.error("rank %s: failed to allocate tensor: %s", self.rank, exc)
+                torch.mps.empty_cache()
+                gc.collect()
+                if stop_evt.wait(self.interval):
+                    return
+
+        if tensor is None:
+            logger.error("rank %s: failed to allocate tensor, exiting loop", self.rank)
+            return
+
+        while not stop_evt.is_set():
+            try:
+                self._run_batch(tensor)
+                if stop_evt.wait(self.interval):
+                    break
+            except RuntimeError as exc:
+                if "out of memory" in str(exc).lower():
+                    torch.mps.empty_cache()
+                    gc.collect()
+                if stop_evt.wait(self.interval):
+                    break
+            except Exception:
+                logger.exception("rank %s: unexpected error", self.rank)
+                if stop_evt.wait(self.interval):
+                    break
+
+    @torch.no_grad()
+    def _run_batch(self, tensor: torch.Tensor) -> None:
+        stop_evt = self._stop_evt
+
+        tic = time.time()
+        for _ in range(self.iterations):
+            torch.relu_(tensor)
+            if stop_evt is not None and stop_evt.is_set():
+                break
+        torch.mps.synchronize()
+        toc = time.time()
+
+        logger.debug(
+            "rank %s: elementwise batch done - avg %.2f ms",
+            self.rank,
+            (toc - tic) * 1000 / max(1, self.iterations),
+        )
diff --git a/src/keep_gpu/utilities/platform_manager.py b/src/keep_gpu/utilities/platform_manager.py
index 0063b79..923c412 100644
--- a/src/keep_gpu/utilities/platform_manager.py
+++ b/src/keep_gpu/utilities/platform_manager.py
@@ -1,4 +1,6 @@
 import os
+import sys
+import platform
 from enum import Enum
 from typing import Callable, List, Tuple
 
@@ -13,6 +15,7 @@ class ComputingPlatform(Enum):
     CPU = "cpu"
     CUDA = "cuda"
     ROCM = "rocm"
+    MACM = "macm"
 
 
 def _check_cuda():
@@ -60,9 +63,27 @@ def _check_cpu():
     return True
 
 
+def _check_macm():
+    """Return True if running on Apple Silicon Mac (Mac M) with MPS support."""
+    try:
+        # macOS (darwin) on Apple Silicon (arm64) with PyTorch MPS backend available
+        if sys.platform != "darwin":
+            return False
+        if platform.machine() != "arm64":
+            return False
+        # PyTorch MPS availability
+        if torch.backends.mps.is_available():
+            return True
+        return False
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("MACM detection failed: %s", exc)
+        return False
+
+
 _PLATFORM_CHECKS: List[Tuple[ComputingPlatform, Callable[[], bool]]] = [
     (ComputingPlatform.CUDA, _check_cuda),
     (ComputingPlatform.ROCM, _check_rocm),
+    (ComputingPlatform.MACM, _check_macm),
     (ComputingPlatform.CPU, _check_cpu),
 ]
 
@@ -105,5 +126,4 @@ def get_platform():
 
 
 if __name__ == "__main__":
-
     print("Current platform:", get_platform().value)
diff --git a/tests/conftest.py b/tests/conftest.py
index 004d2ba..47de7cb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,16 +8,26 @@ def pytest_addoption(parser):
         default=False,
         help="run tests marked as rocm (require ROCm stack)",
     )
+    parser.addoption(
+        "--run-macm",
+        action="store_true",
+        default=False,
+        help="run tests marked as macm (require Apple Silicon MPS)",
+    )
 
 
 def pytest_collection_modifyitems(config, items):
-    if config.getoption("--run-rocm"):
-        return
+    if not config.getoption("--run-rocm"):
+        skip_rocm = pytest.mark.skip(reason="need --run-rocm option to run")
+        for item in items:
+            if "rocm" in item.keywords:
+                item.add_marker(skip_rocm)
 
-    skip_rocm = pytest.mark.skip(reason="need --run-rocm option to run")
-    for item in items:
-        if "rocm" in item.keywords:
-            item.add_marker(skip_rocm)
+    if not config.getoption("--run-macm"):
+        skip_macm = pytest.mark.skip(reason="need --run-macm option to run")
+        for item in items:
+            if "macm" in item.keywords:
+                item.add_marker(skip_macm)
 
 
 @pytest.fixture
@@ -30,3 +40,14 @@ def rocm_available():
         return bool(torch.cuda.is_available() and getattr(torch.version, "hip", None))
     except Exception:
         return False
+
+
+@pytest.fixture
+def macm_available():
+    try:
+        import sys
+        import torch
+
+        return bool(sys.platform == "darwin" and torch.backends.mps.is_available())
+    except Exception:
+        return False
diff --git a/tests/macm_controller/__init__.py b/tests/macm_controller/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/macm_controller/test_macm_basic.py b/tests/macm_controller/test_macm_basic.py
new file mode 100644
index 0000000..5aa68d3
--- /dev/null
+++ b/tests/macm_controller/test_macm_basic.py
@@ -0,0 +1,73 @@
+import sys
+import time
+
+import pytest
+import torch
+
+from keep_gpu.single_gpu_controller.macm_gpu_controller import MacMGPUController
+from keep_gpu.utilities.platform_manager import ComputingPlatform
+
+
+pytestmark = [
+    pytest.mark.skipif(
+        not (sys.platform == "darwin" and torch.backends.mps.is_available()),
+        reason="Only run MacM tests on Apple Silicon with MPS",
+    ),
+    pytest.mark.macm,
+]
+
+
+def test_macm_controller_basic():
+    controller = MacMGPUController(
+        rank=0,
+        interval=0.05,
+        vram_to_keep="8MB",
+        iterations=64,
+    )
+
+    controller.keep()
+    time.sleep(0.2)
+    assert controller._thread and controller._thread.is_alive()
+
+    controller.release()
+    assert not (controller._thread and controller._thread.is_alive())
+
+    controller.keep()
+    time.sleep(0.2)
+    assert controller._thread and controller._thread.is_alive()
+
+    controller.release()
+    assert not (controller._thread and controller._thread.is_alive())
+
+
+def test_macm_controller_context_manager():
+    with MacMGPUController(
+        rank=0,
+        interval=0.05,
+        vram_to_keep="8MB",
+        iterations=64,
+    ) as controller:
+        time.sleep(0.2)
+        assert controller._thread and controller._thread.is_alive()
+
+    assert not (controller._thread and controller._thread.is_alive())
+
+
+def test_macm_controller_invalid_rank():
+    with pytest.raises(ValueError, match="MPS only supports device 0"):
+        MacMGPUController(
+            rank=1,
+            interval=0.05,
+            vram_to_keep="8MB",
+            iterations=64,
+        )
+
+
+def test_macm_controller_platform():
+    controller = MacMGPUController(
+        rank=0,
+        interval=0.05,
+        vram_to_keep="8MB",
+        iterations=64,
+    )
+    assert controller.platform == ComputingPlatform.MACM