diff --git a/.github/scripts/common.py b/.github/scripts/ci_common.py
similarity index 79%
rename from .github/scripts/common.py
rename to .github/scripts/ci_common.py
index 4b55f83..be473c8 100644
--- a/.github/scripts/common.py
+++ b/.github/scripts/ci_common.py
@@ -1,14 +1,16 @@
+from __future__ import annotations
+
+import json
 import os
 import re
 import subprocess
 import time
-import urllib.parse
 import urllib.error
+import urllib.parse
 import urllib.request
-import json
 from pathlib import Path
 
-
+# Test files with this marker are intentionally scheduled on CPU-only runners.
 GPU_DISABLED_MARKER = re.compile(r"^# GPU=-1\s*$", re.MULTILINE)
 
 
@@ -16,31 +18,6 @@ def now_ms() -> int:
     return time.time_ns() // 1_000_000
 
 
-def fetch_text(url: str, *, timeout: float, suppress_error: bool = False) -> str:
-    try:
-        with urllib.request.urlopen(url, timeout=timeout) as response:
-            return response.read().decode("utf-8", errors="replace")
-    except (urllib.error.URLError, TimeoutError, OSError) as exc:
-        if suppress_error:
-            print(f"Request failed for {url}: {exc}")
-            return ""
-        raise
-
-
-def fetch_with_retry(url: str, *, timeout: float, retries: int, retry_delay: float) -> str:
-    last_error: Exception | None = None
-    for attempt in range(retries + 1):
-        try:
-            return fetch_text(url, timeout=timeout)
-        except (urllib.error.URLError, TimeoutError, OSError) as exc:
-            last_error = exc
-            if attempt < retries:
-                time.sleep(retry_delay)
-    if last_error is not None:
-        print(f"Request failed after retries: {last_error}")
-    return ""
-
-
 def normalize_base_url(base_url: str) -> str:
     return base_url.rstrip("/")
 
@@ -89,14 +66,14 @@ def request_json_with_retry(
 
 
 def append_github_env(name: str, value: str) -> None:
-    _append_github_file(os.environ.get("GITHUB_ENV"), name, value)
+    append_github_file(os.environ.get("GITHUB_ENV"), name, value)
 
 
 def append_github_output(name: str, value: str) -> None:
-    _append_github_file(os.environ.get("GITHUB_OUTPUT"), name, value)
+    append_github_file(os.environ.get("GITHUB_OUTPUT"), name, value)
 
 
-def _append_github_file(target: str | None, name: str, value: str) -> None:
+def append_github_file(target: str | None, name: str, value: str) -> None:
     if not target:
         return
     with open(target, "a", encoding="utf-8") as fh:
@@ -126,18 +103,12 @@ def test_requires_gpu(test_file: str) -> bool:
     return GPU_DISABLED_MARKER.search(contents) is None
 
 
-def quote_url_value(value: str) -> str:
-    return urllib.parse.quote(value, safe="")
-
-
 def build_server_info() -> dict[str, str]:
     from device_smi import Device
+
     os_info = Device("os")
     cpu_model = Device("cpu").model
-    platform_name = (
-            os.environ.get("GPU_PLATFORM")
-            or cpu_model
-    )
+    platform_name = os.environ.get("GPU_PLATFORM") or cpu_model
     return {
         "platform": platform_name,
         "arch": os_info.arch,
diff --git a/.github/scripts/allocate_gpu.py b/.github/scripts/ci_gpu.py
similarity index 57%
rename from .github/scripts/allocate_gpu.py
rename to .github/scripts/ci_gpu.py
index 0ea1929..7d85049 100644
--- a/.github/scripts/allocate_gpu.py
+++ b/.github/scripts/ci_gpu.py
@@ -1,11 +1,15 @@
+from __future__ import annotations
+
 import argparse
 import subprocess
 import sys
 import time
+import urllib.error
 
-from common import (
+from ci_common import (
     append_github_env,
     build_get_request,
+    build_job_request,
     build_server_info,
     extract_gpu_ids,
     format_info_url,
@@ -43,21 +47,7 @@ def print_status(base_url: str, runner_name: str) -> None:
         print(status)
 
 
-def main() -> int:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--base-url", required=True)
-    parser.add_argument("--run-id", required=True)
-    parser.add_argument("--test", required=True)
-    parser.add_argument("--runner", required=True)
-    parser.add_argument("--count", required=True)
-    parser.add_argument("--sleep-sec", type=float, default=5)
-    parser.add_argument("--timeout-sec", type=int, default=18000)
-    parser.add_argument("--request-timeout", type=float, default=10)
-    parser.add_argument("--retries", type=int, default=3)
-    parser.add_argument("--retry-delay", type=float, default=1)
-    parser.add_argument("--require-single", action="store_true")
-    args = parser.parse_args()
-
+def allocate_gpu(args: argparse.Namespace) -> int:
     start_s = time.time()
     endpoint = f"{normalize_base_url(args.base_url)}/get"
 
@@ -121,5 +111,61 @@ def main() -> int:
         return 0
 
 
+def release_gpu(args: argparse.Namespace) -> int:
+    request_body = build_job_request(
+        runner_name=args.runner,
+        run_id=args.run_id,
+        test_name=args.test,
+    )
+    url = f"{normalize_base_url(args.base_url)}/release"
+    print(url)
+
+    try:
+        response = request_json(url, method="POST", body=request_body, timeout=args.timeout)
+    except (urllib.error.URLError, TimeoutError, OSError, ValueError) as exc:
+        print(f"Failed to release GPU: {exc}")
+        return 0
+
+    resp = extract_gpu_ids(response)
+    print(f"response: {resp}")
+    if args.gpu_id and resp not in {args.gpu_id, "-1"}:
+        print(f"Error: response ({resp}) != expected ({args.gpu_id})")
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    allocate_parser = subparsers.add_parser("allocate")
+    allocate_parser.add_argument("--base-url", required=True)
+    allocate_parser.add_argument("--run-id", required=True)
+    allocate_parser.add_argument("--test", required=True)
+    allocate_parser.add_argument("--runner", required=True)
+    allocate_parser.add_argument("--count", required=True)
+    allocate_parser.add_argument("--sleep-sec", type=float, default=5)
+    allocate_parser.add_argument("--timeout-sec", type=int, default=18000)
+    allocate_parser.add_argument("--request-timeout", type=float, default=10)
+    allocate_parser.add_argument("--retries", type=int, default=3)
+    allocate_parser.add_argument("--retry-delay", type=float, default=1)
+    allocate_parser.add_argument("--require-single", action="store_true")
+
+    release_parser = subparsers.add_parser("release")
+    release_parser.add_argument("--base-url", required=True)
+    release_parser.add_argument("--run-id", required=True)
+    release_parser.add_argument("--gpu-id", default="")
+    release_parser.add_argument("--timestamp")
+    release_parser.add_argument("--test", required=True)
+    release_parser.add_argument("--runner", required=True)
+    release_parser.add_argument("--timeout", type=float, default=10)
+
+    args = parser.parse_args()
+    if args.command == "allocate":
+        return allocate_gpu(args)
+    if args.command == "release":
+        return release_gpu(args)
+    raise AssertionError(f"Unhandled command: {args.command}")
+
+
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/.github/scripts/ci_loop_versions.py b/.github/scripts/ci_loop_versions.py
deleted file mode 100644
index c65b14d..0000000
--- a/.github/scripts/ci_loop_versions.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import argparse
-import json
-
-import requests
-from packaging.specifiers import SpecifierSet
-from packaging.version import Version
-
-
-def get_versions(package: str, version_spec: str) -> list[str]:
-    specifier = SpecifierSet(version_spec)
-
-    url = f"https://pypi.org/pypi/{package}/json"
-    resp = requests.get(url, timeout=30)
-    resp.raise_for_status()
-    data = resp.json()
-
-    all_versions = data["releases"].keys()
-
-    matched = sorted(
-        (Version(v) for v in all_versions if Version(v) in specifier),
-        reverse=True,
-    )
-    return [str(v) for v in matched]
-
-
-def main():
-    parser = argparse.ArgumentParser(description="List matching PyPI versions as JSON")
-    parser.add_argument("package", help="package name, e.g. setuptools")
-    parser.add_argument("version", help='version spec, e.g. ">=77.0.1,<83"')
-    args = parser.parse_args()
-
-    versions = get_versions(args.package, args.version)
-    print(json.dumps(versions))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/scripts/ci_prepare_checkout.sh b/.github/scripts/ci_prepare_checkout.sh
new file mode 100644
index 0000000..2062f99
--- /dev/null
+++ b/.github/scripts/ci_prepare_checkout.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+pr_number="${1:-0}"
+
+git config --global --add safe.directory "$(pwd)"
+
+if [[ -z "$pr_number" || "$pr_number" == "0" ]]; then
+  exit 0
+fi
+
+echo "pr number $pr_number"
+git fetch origin "pull/${pr_number}/head:pr-${pr_number}"
+git checkout "pr-${pr_number}"
diff --git a/.github/scripts/ci_restore_uv_cache.sh b/.github/scripts/ci_restore_uv_cache.sh
new file mode 100644
index 0000000..3e12d3e
--- /dev/null
+++ b/.github/scripts/ci_restore_uv_cache.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+tar_file="${1:-/opt/dist/uv.tar.xz}"
+cache_dir="${2:-/opt/uv/cache}"
+tmp_dir="${cache_dir}/tmp"
+last_file="${cache_dir}/lastmodified"
+
+if [[ ! -f "$tar_file" ]]; then
+  echo "uv cache archive not found: $tar_file"
+  exit 0
+fi
+
+tar_mtime="$(stat -c %Y "$tar_file")"
+last_mtime="0"
+if [[ -f "$last_file" ]]; then
+  last_mtime="$(<"$last_file")"
+fi
+
+if [[ "$tar_mtime" == "$last_mtime" ]]; then
+  echo "uv cache archive unchanged, skip decompress"
+  exit 0
+fi
+
+echo "decompressing $tar_file into $cache_dir..."
+mkdir -p "$tmp_dir"
+rm -rf "${tmp_dir:?}/"*
+tar -xJf "$tar_file" -C "$tmp_dir"
+rm -rf "$cache_dir/uv"
+mv "$tmp_dir/uv" "$cache_dir/uv"
+printf '%s\n' "$tar_mtime" > "$last_file"
+
+ls -ahl "$cache_dir"
+echo "=========="
+ls -ahl "$cache_dir/uv"
diff --git a/.github/scripts/run_tests.py b/.github/scripts/ci_tests.py
similarity index 50%
rename from .github/scripts/run_tests.py
rename to .github/scripts/ci_tests.py
index 19c89bf..e3c6be9 100644
--- a/.github/scripts/run_tests.py
+++ b/.github/scripts/ci_tests.py
@@ -1,4 +1,7 @@
+from __future__ import annotations
+
 import argparse
+import json
 import os
 import shutil
 import signal
@@ -7,17 +10,129 @@
 import threading
 import time
 import urllib.error
+from dataclasses import asdict, dataclass
 from pathlib import Path
-from common import (
+
+from ci_common import (
     append_github_env,
+    append_github_output,
     build_job_request,
     extract_gpu_ids,
     normalize_base_url,
     normalize_test_file,
     request_json,
+    test_requires_gpu,
     to_safe_name,
 )
 
+# GPTQModel engine tests need a pinned CPU wheel because upstream CPU artifacts are version-sensitive.
+TORCHAO_CPU_WHEEL = (
+    "https://download.pytorch.org/whl/cpu/"
+    "torchao-0.17.0%2Bcpu-py3-none-any.whl"
+    "#sha256=6c0ce8b506c72be4efb1f0c6fd1679cb58145efebb20d51ac1adf7a7b3ebb872"
+)
+# CI uses a non-zero exit rewrite so GitHub summaries clearly distinguish test process failures.
+ERROR_EXIT_CODE = 22
+
+
+# Unit test metadata is computed once per matrix entry and exported to GitHub env/output files.
+@dataclass(frozen=True)
+class UnitTestConfig:
+    """Describe the Python/runtime settings needed by one test file."""
+
+    test_file: str
+    safe_name: str
+    requires_gpu: bool
+    python_version: str
+    uv_python: str
+
+
+def resolve_unit_test_config(test_file: str) -> UnitTestConfig:
+    normalized = normalize_test_file(test_file)
+    python_version = "3.14t"
+    uv_python = "3.14t"
+
+    if normalized == "tests/test_tensorrt_llm_engine.py":
+        python_version = "3.12"
+        uv_python = "3.12"
+
+    return UnitTestConfig(
+        test_file=normalized,
+        safe_name=to_safe_name(normalized),
+        requires_gpu=test_requires_gpu(normalized),
+        python_version=python_version,
+        uv_python=uv_python,
+    )
+
+
+def export_unit_test_metadata(test_file: str) -> None:
+    config = resolve_unit_test_config(test_file)
+
+    append_github_env("SAFE_NAME", config.safe_name)
+    append_github_env("TEST_REQUIRES_GPU", str(config.requires_gpu).lower())
+    append_github_env("PYTHON_VERSION", config.python_version)
+    append_github_env("UV_PYTHON", config.uv_python)
+
+    append_github_output("safe-name", config.safe_name)
+    append_github_output("requires-gpu", str(config.requires_gpu).lower())
+    append_github_output("python-version", config.python_version)
+    append_github_output("uv-python", config.uv_python)
+
+    print(json.dumps(asdict(config), ensure_ascii=False, indent=2))
+
+
+def run(cmd: list[str]) -> None:
+    print(f"+ {' '.join(cmd)}")
+    subprocess.check_call(cmd)
+
+
+def uv_install(*packages: str, upgrade: bool = False) -> None:
+    if not packages:
+        return
+    cmd = ["uv", "pip", "install"]
+    if upgrade:
+        cmd.append("-U")
+    cmd.extend(packages)
+    run(cmd)
+
+
+def install_flash_attn(uv_python: str, runner: str) -> None:
+    if uv_python == "3.14t":
+        uv_install(f"http://{runner}/files/flash_attn/flash_attn-2.8.4-cp314-cp314t-linux_x86_64.whl")
+        return
+
+    if uv_python == "3.12":
+        append_github_env("EVALUTION_SKIP_GIL_CHECK", "1")
+        uv_install("tensorrt_llm", upgrade=True)
+        uv_install(f"http://{runner}/files/flash_attn/flash_attn-2.8.4-cp312-cp312-linux_x86_64.whl")
+        return
+
+    uv_install("flash-attn")
+    run(["uv", "pip", "show", "flash-attn"])
+
+
+def install_test_specific_deps(test_file: str) -> None:
+    if test_file != "tests/test_gptqmodel_engine.py":
+        return
+
+    uv_install("accelerate", upgrade=True)
+    uv_install(TORCHAO_CPU_WHEEL, upgrade=True)
+
+    print("== installing gptqmodel ==")
+    uv_install("gptqmodel", upgrade=True)
+    run(["uv", "pip", "show", "gptqmodel"])
+
+
+def install_test_deps(test_file: str, runner: str, uv_python: str, install_project: bool) -> None:
+    normalized = normalize_test_file(test_file)
+
+    if install_project:
+        uv_install(".")
+        uv_install("pytest", "datasets", "rouge_score", "sglang", "pybase64", upgrade=True)
+
+    install_flash_attn(uv_python, runner)
+    install_test_specific_deps(normalized)
+
 
 def kill_process_group(proc: subprocess.Popen[str]) -> None:
     try:
@@ -96,18 +211,7 @@ def log_python_and_pytest_resolution() -> None:
     print(f"pytest shebang={first_line}")
 
 
-def main() -> int:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--base-url", required=True)
-    parser.add_argument("--run-id", required=True)
-    parser.add_argument("--test-file", required=True)
-    parser.add_argument("--runner", required=True)
-    parser.add_argument("--gpu-id", default="")
-    parser.add_argument("--monitor-interval-sec", type=int, default=60)
-    parser.add_argument("--artifacts-dir", default="artifacts")
-    parser.add_argument("--clear-cuda", action="store_true")
-    args = parser.parse_args()
-
+def run_test(args: argparse.Namespace) -> int:
     env = os.environ.copy()
     if args.clear_cuda:
         env["CUDA_VISIBLE_DEVICES"] = ""
@@ -172,13 +276,13 @@ def main() -> int:
             monitor_thread.join(timeout=5)
 
     if monitor_state["forced_exit_code"]:
-        append_github_env("ERROR", "22")
-        return 22
+        append_github_env("ERROR", str(ERROR_EXIT_CODE))
+        return ERROR_EXIT_CODE
 
     if return_code != 0:
-        append_github_env("ERROR", "22")
+        append_github_env("ERROR", str(ERROR_EXIT_CODE))
         print(f"pipe status wrong: {return_code}")
-        return 22
+        return ERROR_EXIT_CODE
 
     execution_time = int(time.time() - start_time)
     print(f"{execution_time // 60}m {execution_time % 60}s")
@@ -193,5 +297,40 @@ def main() -> int:
     return 0
 
 
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    set_metadata_parser = subparsers.add_parser("set-metadata")
+    set_metadata_parser.add_argument("--test-file", required=True)
+
+    install_deps_parser = subparsers.add_parser("install-deps")
+    install_deps_parser.add_argument("--test-file", required=True)
+    install_deps_parser.add_argument("--runner", default=os.environ.get("RUNNER", "10.0.13.31"))
+    install_deps_parser.add_argument("--uv-python", default=os.environ.get("UV_PYTHON", ""))
+    install_deps_parser.add_argument("--install-project", action="store_true")
+
+    run_parser = subparsers.add_parser("run")
+    run_parser.add_argument("--base-url", required=True)
+    run_parser.add_argument("--run-id", required=True)
+    run_parser.add_argument("--test-file", required=True)
+    run_parser.add_argument("--runner", required=True)
+    run_parser.add_argument("--gpu-id", default="")
+    run_parser.add_argument("--monitor-interval-sec", type=int, default=60)
+    run_parser.add_argument("--artifacts-dir", default="artifacts")
+    run_parser.add_argument("--clear-cuda", action="store_true")
+
+    args = parser.parse_args()
+    if args.command == "set-metadata":
+        export_unit_test_metadata(args.test_file)
+        return 0
+    if args.command == "install-deps":
+        install_test_deps(args.test_file, args.runner, args.uv_python, args.install_project)
+        return 0
+    if args.command == "run":
+        return run_test(args)
+    raise AssertionError(f"Unhandled command: {args.command}")
+
+
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/.github/scripts/ci_workflow.py b/.github/scripts/ci_workflow.py
new file mode 100644
index 0000000..8767cb8
--- /dev/null
+++ b/.github/scripts/ci_workflow.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+from ci_common import normalize_test_file, test_requires_gpu
+
+
+# Keep the reusable workflow matrix payload constrained to the single field the runner needs.
+@dataclass(frozen=True)
+class TestMatrixEntry:
+    """Represent one test case entry consumed by the reusable unit-test workflow."""
+
+    test_file: str
+
+    def as_dict(self) -> dict[str, str]:
+        return {"test_file": self.test_file}
+
+
+# Test discovery only schedules pytest files from the repo's test tree.
+TEST_FILE_GLOB = "test_*.py"
+
+
+def compile_regex(pattern: str) -> re.Pattern[str] | None:
+    if not pattern:
+        return None
+    return re.compile(pattern)
+
+
+def normalize_rel_test_path(path: Path) -> str:
+    return normalize_test_file(path.as_posix())
+
+
+def matches_test_regex(compiled: re.Pattern[str] | None, rel_path: str) -> bool:
+    if compiled is None:
+        return True
+    candidates = {
+        rel_path,
+        rel_path.removeprefix("tests/"),
+        Path(rel_path).name,
+        Path(rel_path).stem,
+    }
+    return any(compiled.search(candidate) for candidate in candidates)
+
+
+def is_model_test(rel_path: str) -> bool:
+    return rel_path.startswith("tests/models/")
+
+
+def sort_key(rel_path: str) -> tuple[int, str]:
+    return (len(Path(rel_path).parts), rel_path)
+
+
+def list_tests(
+    *,
+    tests_root: str | Path,
+    test_regex: str,
+) -> tuple[list[str], list[str], list[str]]:
+    root = Path(tests_root)
+    compiled_regex = compile_regex(test_regex)
+
+    cpu_tests: list[str] = []
+    torch_tests: list[str] = []
+    model_tests: list[str] = []
+
+    for path in sorted(root.rglob(TEST_FILE_GLOB)):
+        rel_path = normalize_rel_test_path(path)
+        if not matches_test_regex(compiled_regex, rel_path):
+            continue
+        if is_model_test(rel_path):
+            model_tests.append(rel_path)
+            continue
+        if test_requires_gpu(rel_path):
+            torch_tests.append(rel_path)
+            continue
+        cpu_tests.append(rel_path)
+
+    return (
+        sorted(cpu_tests, key=sort_key),
+        sorted(torch_tests, key=sort_key),
+        sorted(model_tests, key=sort_key),
+    )
+
+
+def build_group_matrix(tests: list[str]) -> list[dict[str, str]]:
+    return [TestMatrixEntry(test_file=test_file).as_dict() for test_file in tests]
+
+
+def build_test_plan(*, tests_root: str | Path, test_regex: str) -> dict[str, list[dict[str, str]] | list[str]]:
+    cpu_tests, torch_tests, model_tests = list_tests(
+        tests_root=tests_root,
+        test_regex=test_regex,
+    )
+    return {
+        "cpu_files": cpu_tests,
+        "torch_files": torch_tests,
+        "model_files": model_tests,
+        "cpu_matrix": build_group_matrix(cpu_tests),
+        "torch_matrix": build_group_matrix(torch_tests),
+        "model_matrix": build_group_matrix(model_tests),
+    }
+
+
+def list_matching_versions(package: str, version_spec: str) -> list[str]:
+    # Defer optional dependencies so local test discovery can run on the stock runner image.
+    import requests
+    from packaging.specifiers import SpecifierSet
+    from packaging.version import Version
+
+    specifier = SpecifierSet(version_spec)
+    response = requests.get(f"https://pypi.org/pypi/{package}/json", timeout=30)
+    response.raise_for_status()
+    data = response.json()
+    matched = sorted(
+        (Version(version) for version in data["releases"].keys() if Version(version) in specifier),
+        reverse=True,
+    )
+    return [str(version) for version in matched]
+
+
+def cmd_list_tests(args: argparse.Namespace) -> int:
+    print(
+        json.dumps(
+            build_test_plan(
+                tests_root=args.tests_root,
+                test_regex=args.test_regex,
+            )
+        )
+    )
+    return 0
+
+
+def cmd_loop_versions(args: argparse.Namespace) -> int:
+    print(json.dumps(list_matching_versions(args.package, args.version)))
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    list_parser = subparsers.add_parser("list-tests")
+    list_parser.add_argument("--tests-root", default="tests")
+    list_parser.add_argument("--test-regex", default="")
+
+    loop_versions_parser = subparsers.add_parser("loop-versions")
+    loop_versions_parser.add_argument("package")
+    loop_versions_parser.add_argument("version")
+
+    args = parser.parse_args()
+    if args.command == "list-tests":
+        return cmd_list_tests(args)
+    if args.command == "loop-versions":
+        return cmd_loop_versions(args)
+    raise AssertionError(f"Unhandled command: {args.command}")
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/.github/scripts/ci_write_runner_outputs.sh b/.github/scripts/ci_write_runner_outputs.sh
new file mode 100644
index 0000000..4995555
--- /dev/null
+++ b/.github/scripts/ci_write_runner_outputs.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+runner_ip="${1:?runner ip is required}"
+github_run_id="${2:?github run id is required}"
+artifact_id="${3:-}"
+max_parallel="${4:-}"
+
+if [[ -z "${GITHUB_OUTPUT:-}" ]]; then
+  echo "GITHUB_OUTPUT is required" >&2
+  exit 1
+fi
+
+run_id="$github_run_id"
+if [[ -n "$artifact_id" ]]; then
+  run_id="$artifact_id"
+fi
+
+echo "ip=$runner_ip" >> "$GITHUB_OUTPUT"
+echo "ip: $runner_ip"
+echo "run_id=$run_id" >> "$GITHUB_OUTPUT"
+echo "artifact_id=$run_id"
+
+if [[ -n "$max_parallel" ]]; then
+  max_parallel_json="{\"size\": ${max_parallel:-20}}"
+  echo "max-parallel=$max_parallel_json" >> "$GITHUB_OUTPUT"
+  echo "max-parallel=$max_parallel_json"
+fi
diff --git a/.github/scripts/install_unit_test_deps.py b/.github/scripts/install_unit_test_deps.py
deleted file mode 100644
index 803566a..0000000
--- a/.github/scripts/install_unit_test_deps.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import argparse
-import os
-import subprocess
-
-from common import append_github_env, normalize_test_file
-
-
-TORCHAO_CPU_WHEEL = (
-    "https://download.pytorch.org/whl/cpu/"
-    "torchao-0.17.0%2Bcpu-py3-none-any.whl"
-    "#sha256=6c0ce8b506c72be4efb1f0c6fd1679cb58145efebb20d51ac1adf7a7b3ebb872"
-)
-
-
-def run(cmd: list[str]) -> None:
-    print(f"+ {' '.join(cmd)}")
-    subprocess.check_call(cmd)
-
-
-def uv_install(*packages: str, upgrade: bool = False) -> None:
-    if not packages:
-        return
-    cmd = ["uv", "pip", "install"]
-    if upgrade:
-        cmd.append("-U")
-    cmd.extend(packages)
-    run(cmd)
-
-
-def install_flash_attn(uv_python: str, runner: str) -> None:
-    if uv_python == "3.14t":
-        uv_install(f"http://{runner}/files/flash_attn/flash_attn-2.8.4-cp314-cp314t-linux_x86_64.whl")
-        return
-
-    if uv_python == "3.12":
-        append_github_env("EVALUTION_SKIP_GIL_CHECK", "1")
-        uv_install("tensorrt_llm", upgrade=True)
-        uv_install(f"http://{runner}/files/flash_attn/flash_attn-2.8.4-cp312-cp312-linux_x86_64.whl")
-        return
-
-    uv_install("flash-attn")
-    run(["uv", "pip", "show", "flash-attn"])
-
-
-def install_test_specific_deps(test_file: str) -> None:
-    if test_file != "tests/test_gptqmodel_engine.py":
-        return
-
-    uv_install("accelerate", upgrade=True)
-    uv_install(TORCHAO_CPU_WHEEL, upgrade=True)
-
-    print("== installing gptqmodel ==")
-    uv_install("gptqmodel", upgrade=True)
-    run(["uv", "pip", "show", "gptqmodel"])
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--test-file", required=True)
-    parser.add_argument("--runner", default=os.environ.get("RUNNER", "10.0.13.31"))
-    parser.add_argument("--uv-python", default=os.environ.get("UV_PYTHON", ""))
-    parser.add_argument("--install-project", action="store_true")
-    args = parser.parse_args()
-
-    test_file = normalize_test_file(args.test_file)
-
-    if args.install_project:
-        uv_install(".")
-        uv_install("pytest", "datasets", "rouge_score", "sglang", "pybase64", upgrade=True)
-
-    install_flash_attn(args.uv_python, args.runner)
-    install_test_specific_deps(test_file)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/scripts/list_test_files.py b/.github/scripts/list_test_files.py
deleted file mode 100644
index 3715ffe..0000000
--- a/.github/scripts/list_test_files.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import argparse
-import json
-import re
-from pathlib import Path
-
-from common import normalize_test_file
-
-
-def sort_key(path: Path, root: Path) -> tuple[int, str]:
-    rel = path.relative_to(root)
-    return (len(rel.parts), path.as_posix())
-
-
-def list_test_files(tests_root: str = "tests", test_regex: str = "") -> list[str]:
-    root = Path(tests_root)
-    regex = re.compile(test_regex) if test_regex else None
-    files: list[str] = []
-    for path in sorted(root.rglob("test_*.py"), key=lambda item: sort_key(item, root)):
-        rel = normalize_test_file(path.as_posix())
-        if regex and not regex.search(rel):
-            continue
-        files.append(rel)
-    return files
-
-
-def split_evenly(files: list[str], group_count: int) -> list[list[str]]:
-    if group_count <= 0:
-        raise ValueError("group_count must be greater than 0")
-
-    base_size, remainder = divmod(len(files), group_count)
-    groups: list[list[str]] = []
-    start = 0
-    for index in range(group_count):
-        size = base_size + (1 if index < remainder else 0)
-        end = start + size
-        groups.append(files[start:end])
-        start = end
-    return groups
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--tests-root", default="tests")
-    parser.add_argument("--test-regex", default="")
-    parser.add_argument("--group-count", type=int, default=1)
-    args = parser.parse_args()
-
-    files = list_test_files(args.tests_root, args.test_regex)
-    if args.group_count == 1:
-        print(json.dumps(files, ensure_ascii=False))
-        return
-
-    for group in split_evenly(files, args.group_count):
-        print(json.dumps(group, ensure_ascii=False))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/scripts/prepare_unit_test.py b/.github/scripts/prepare_unit_test.py
deleted file mode 100644
index 9596b78..0000000
--- a/.github/scripts/prepare_unit_test.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import argparse
-import json
-
-from common import append_github_env, append_github_output
-from unit_test_config import resolve_unit_test_config
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--test-file", required=True)
-    args = parser.parse_args()
-
-    config = resolve_unit_test_config(args.test_file)
-
-    append_github_env("SAFE_NAME", config.safe_name)
-    append_github_env("TEST_REQUIRES_GPU", str(config.requires_gpu).lower())
-    append_github_env("PYTHON_VERSION", config.python_version)
-    append_github_env("UV_PYTHON", config.uv_python)
-
-    append_github_output("safe-name", config.safe_name)
-    append_github_output("requires-gpu", str(config.requires_gpu).lower())
-    append_github_output("python-version", config.python_version)
-    append_github_output("uv-python", config.uv_python)
-
-    print(json.dumps(config.__dict__, ensure_ascii=False, indent=2))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/scripts/release_gpu.py b/.github/scripts/release_gpu.py
deleted file mode 100644
index 2cf6a0b..0000000
--- a/.github/scripts/release_gpu.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import argparse
-import sys
-import urllib.error
-
-from common import build_job_request, extract_gpu_ids, normalize_base_url, request_json
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--base-url", required=True)
-    parser.add_argument("--run-id", required=True)
-    parser.add_argument("--gpu-id", default="")
-    parser.add_argument("--timestamp")
-    parser.add_argument("--test", required=True)
-    parser.add_argument("--runner", required=True)
-    parser.add_argument("--timeout", type=float, default=10)
-    args = parser.parse_args()
-
-    request_body = build_job_request(
-        runner_name=args.runner,
-        run_id=args.run_id,
-        test_name=args.test,
-    )
-    url = f"{normalize_base_url(args.base_url)}/release"
-    print(url)
-
-    try:
-        response = request_json(url, method="POST", body=request_body, timeout=args.timeout)
-    except (urllib.error.URLError, TimeoutError, OSError, ValueError) as exc:
-        print(f"Failed to release GPU: {exc}")
-        return 0
-
-    resp = extract_gpu_ids(response)
-    print(f"response: {resp}")
-    if args.gpu_id and resp not in {args.gpu_id, "-1"}:
-        print(f"Error: response ({resp}) != expected ({args.gpu_id})")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/.github/scripts/unit_test_config.py b/.github/scripts/unit_test_config.py
deleted file mode 100644
index 63b4f04..0000000
--- a/.github/scripts/unit_test_config.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import asdict, dataclass
-
-from common import normalize_test_file, test_requires_gpu, to_safe_name
-
-
-@dataclass(frozen=True)
-class UnitTestConfig:
-    test_file: str
-    safe_name: str
-    requires_gpu: bool
-    python_version: str
-    uv_python: str
-
-
-def resolve_unit_test_config(test_file: str) -> UnitTestConfig:
-    normalized = normalize_test_file(test_file)
-    python_version = "3.14t"
-    uv_python = "3.14t"
-
-    if normalized == "tests/test_tensorrt_llm_engine.py":
-        python_version = "3.12"
-        uv_python = "3.12"
-
-    return UnitTestConfig(
-        test_file=normalized,
-        safe_name=to_safe_name(normalized),
-        requires_gpu=test_requires_gpu(normalized),
-        python_version=python_version,
-        uv_python=uv_python,
-    )
-
-
-def resolve_unit_test_config_dict(test_file: str) -> dict[str, str | bool]:
-    return asdict(resolve_unit_test_config(test_file))
diff --git a/.github/workflows/compatibility.yml b/.github/workflows/compatibility.yml
deleted file mode 100644
index 882ea64..0000000
--- a/.github/workflows/compatibility.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-name: Test Compatibility
-
-on:
-  push:
-    paths:
-      - pyproject.toml
-  workflow_dispatch:
-
-permissions:
-  contents: read
-
-jobs:
-
-  prepare-setuptools:
-    runs-on: ubuntu-latest
-    outputs:
-      versions: ${{ steps.parser.outputs.versions || '[]' }}
-    steps:
-      - uses: actions/checkout@v6
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.14t"
-
-      - name: Generate version matrix
-        id: parser
-        run: |
-          python -m pip install --upgrade requests packaging
-          versions=$(python .github/scripts/ci_loop_versions.py setuptools ">=77.0.1,<83")
-          echo "versions=$versions" >> "$GITHUB_OUTPUT"
-
-  check-setuptools:
-    needs: prepare-setuptools
-    if: needs.prepare-setuptools.outputs.versions != '[]'
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        version: ${{ fromJSON(needs.prepare-setuptools.outputs.versions) }}
-
-    steps:
-      - uses: actions/checkout@v6
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.14t"
-          cache: pip
-
-      - name: Install package with selected setuptools
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install . "setuptools==${{ matrix.version }}"
-
-      - name: Show versions
-        run: |
-          python --version
-          python -m pip show setuptools
\ No newline at end of file
diff --git a/.github/workflows/setuptools_compatibility_reusable.yml b/.github/workflows/setuptools_compatibility_reusable.yml
new file mode 100644
index 0000000..db4722d
--- /dev/null
+++ b/.github/workflows/setuptools_compatibility_reusable.yml
@@ -0,0 +1,82 @@
+name: Setuptools Compatibility Reusable
+
+on:
+  workflow_call:
+    inputs:
+      repo:
+        description: "GitHub repo {owner}/{repo}"
+        required: false
+        default: ""
+        type: string
+      ref:
+        description: "GitHub ref: Branch, Tag or Commit SHA"
+        required: false
+        default: ""
+        type: string
+      pr_number:
+        description: "PR Number"
+        required: false
+        default: 0
+        type: number
+
+permissions:
+  contents: read
+
+jobs:
+  prepare-setuptools:
+    runs-on: ubuntu-latest
+    outputs:
+      versions: ${{ steps.parser.outputs.versions || '[]' }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          repository: ${{ inputs.repo || github.repository }}
+          ref: ${{ inputs.ref || github.ref }}
+
+      - name: Prepare checkout
+        run: |
+          bash .github/scripts/ci_prepare_checkout.sh "${{ inputs.pr_number }}"
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.14"
+
+      - name: Generate version matrix
+        id: parser
+        run: |
+          python -m pip install --upgrade requests packaging
+          versions=$(python .github/scripts/ci_workflow.py loop-versions setuptools ">=77.0.1,<83")
+          echo "versions=$versions" >> "$GITHUB_OUTPUT"
+
+  check-setuptools:
+    needs: prepare-setuptools
+    if: needs.prepare-setuptools.outputs.versions != '[]'
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        version: ${{ fromJSON(needs.prepare-setuptools.outputs.versions) }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          repository: ${{ inputs.repo || github.repository }}
+          ref: ${{ inputs.ref || github.ref }}
+
+      - name: Prepare checkout
+        run: |
+          bash .github/scripts/ci_prepare_checkout.sh "${{ inputs.pr_number }}"
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.14"
+          cache: pip
+
+      - name: Install package with selected setuptools
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install . "setuptools==${{ matrix.version }}"
+
+      - name: Show versions
+        run: |
+          python --version
+          python -m pip show setuptools
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index eb1392f..c9ababc 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -1,5 +1,7 @@
 name: Unit Tests
 
+run-name: "${{ github.event.inputs.title || github.workflow }}"
+
 defaults:
   run:
     shell: bash -le {0}
@@ -8,481 +10,194 @@ on:
   repository_dispatch:
   workflow_dispatch:
     inputs:
+      title:
+        description: "Set a title for this run"
+        required: false
+        default: ""
+      repo:
+        description: "GitHub repo {owner}/{repo}"
+        required: false
+        default: ""
       ref:
         description: "GitHub ref: Branch, Tag or Commit SHA"
         required: false
         default: ""
+      pr_number:
+        description: "PR Number"
+        required: false
+        type: number
       test_regex:
         description: "Regex to filter test files"
         required: false
         default: ""
+      artifact_id:
+        description: "Run id for artifact lookup"
+        required: false
+        default: ""
       max-parallel:
         description: "Parallel jobs"
         required: false
         default: "4"
 
 env:
+  repo: ${{ github.event.inputs.repo || github.repository }}
   ref: ${{ github.event.inputs.ref || github.ref }}
   CUDA_DEVICE_ORDER: PCI_BUS_ID
   CUDA_VERSION: 131
+  # The runtime image tag tracks the current CUDA base image kept on the CI registry.
+  CUDA_IMAGE_VERSION: 132
   UV_TORCH_BACKEND: cu130
   TORCH_VERSION: 2.11.0
   PYTHON_VERSION: 3.14t
   UV_PYTHON: 3.14t
   PYTHON_GIL: 0
   RUNNER: 10.0.13.31
-  HF_TOKEN: ${{ secrets.HF_TOKEN }}
   BASE_URL: http://10.0.13.31/gpu
+  LOGBAR_ANIMATION: "0"
+  HF_TOKEN: ${{ secrets.HF_TOKEN }}
 
 concurrency:
   group: ${{ github.event.inputs.ref || github.ref }}-workflow-unit-tests
   cancel-in-progress: true
 
+permissions:
+  contents: read
+
 jobs:
-  list-test-files:
+  check-vm:
     runs-on: ubuntu-latest
     outputs:
-      files-group1: ${{ steps.files.outputs.files-group1 }}
-      files-group2: ${{ steps.files.outputs.files-group2 }}
-      max-parallel: ${{ steps.config.outputs.max-parallel }}
+      ip: ${{ steps.get_ip.outputs.ip }}
+      run_id: ${{ steps.get_ip.outputs.run_id }}
+      max-parallel: ${{ steps.get_ip.outputs['max-parallel'] }}
+      cuda_version: ${{ env.CUDA_IMAGE_VERSION }}
     steps:
       - name: Checkout Code
         uses: actions/checkout@v6
         with:
+          repository: ${{ env.repo }}
           ref: ${{ env.ref }}
 
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.14t"
-          cache: pip
-
-      - name: Set matrix config
-        id: config
-        run: |
-          echo "max-parallel=${{ github.event.inputs['max-parallel'] || '8' }}" >> "$GITHUB_OUTPUT"
-
-      - name: List test files
-        id: files
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install device_smi
-
-          files=$(python3 .github/scripts/list_test_files.py \
-            --test-regex "${{ github.event.inputs.test_regex || '' }}" \
-            --group-count=2)
-
-          mapfile -t file_groups <<< "$files"
-          files_group1="${file_groups[0]}"
-          files_group2="${file_groups[1]}"
-
-          echo "files-group1=$files_group1" >> "$GITHUB_OUTPUT"
-          echo "files-group2=$files_group2" >> "$GITHUB_OUTPUT"
-
-          echo "Group1: $files_group1"
-          echo "Group2: $files_group2"
-
-  test:
-    needs: list-test-files
-    if: needs.list-test-files.outputs.files-group1 != '' && needs.list-test-files.outputs.files-group1 != '[]'
-    strategy:
-      fail-fast: false
-      max-parallel: ${{ fromJSON(needs.list-test-files.outputs.max-parallel) }}
-      matrix:
-        test-file: ${{ fromJSON(needs.list-test-files.outputs.files-group1) }}
-    runs-on: [ self-hosted, xeon5 ]
-    container:
-      image: 10.0.13.31:5000/nvidia/cuda:132-ubuntu24.04_0325
-      volumes:
-        - /monster/ci/env/entrypoint.sh:/entrypoint.sh
-        - /monster/ci/env/entrypoint.sh:/etc/profile.d/01-entrypoint.sh
-        - /dev/dri/by-path:/dev/dri/by-path
-        - /monster/ci/models:/monster/data/model
-        - /monster/ci/dataset:/monster/data/model/dataset
-        - /monster/ci/huggingface:/github/home/.cache/huggingface
-        - /github/workspace/uv:/opt/uv
-        - /github/workspace/tmp:/opt/uv/tmp
-        - /monster/ci/uv/python:/opt/uv/python
-        - /monster/ci/uv/cache/python:/opt/uv/cache/python
-        - /monster/ci/uv/setup_uv_venv.sh:/opt/uv/setup_uv_venv.sh
-        - /monster/ci/uv/uv:/opt/uv/uv
-        - /monster/ci/uv/uvx:/opt/uv/uvx
-        - /monster/ci/uv/env:/opt/uv/env
-        - /monster/ci/uv/uv.toml:/opt/uv/uv.toml
-        - /monster/ci/env:/opt/env
-        - /monster/ci/dist:/opt/dist
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ env.ref }}
-
-      - name: Set test metadata
-        id: meta
-        run: |
-          python3 .github/scripts/prepare_unit_test.py \
-            --test-file "${{ matrix.test-file }}"
-
-      - name: decompress uv cache
-        continue-on-error: true
-        run: |
-          if [ -f /opt/dist/uv.tar.xz ]; then
-            TAR_FILE="/opt/dist/uv.tar.xz"
-            LAST_FILE="/opt/uv/cache/lastmodified"
-          
-            # Get modification time of tar.xz file (epoch seconds)
-            TAR_MTIME=$(stat -c %Y "$TAR_FILE")
-          
-            # Read last recorded modification time if file exists
-            if [ -f "$LAST_FILE" ]; then
-              LAST_MTIME=$(cat "$LAST_FILE")
-            else
-              LAST_MTIME=0
-            fi
-          
-            # Compare timestamps to decide whether to decompress
-            if [ "$TAR_MTIME" = "$LAST_MTIME" ]; then
-              echo "uv.tar.xz unchanged, skip decompress"
-            else
-              echo "decompressing uv.tar.xz..."
-          
-              # Prepare temporary directory
-              mkdir -p /opt/uv/cache/tmp
-              rm -rf /opt/uv/cache/tmp/*
-          
-              # Extract archive
-              tar -xJf "$TAR_FILE" -C /opt/uv/cache/tmp
-          
-              # Replace existing uv directory
-              rm -rf /opt/uv/cache/uv
-              mv /opt/uv/cache/tmp/uv /opt/uv/cache/uv
-          
-              # Record latest modification time
-              echo "$TAR_MTIME" > "$LAST_FILE"
-          
-              echo "done!"
-              ls -ahl /opt/uv/cache
-              echo "=========="
-              ls -ahl /opt/uv/cache/uv
-            fi
-          fi
-
-      - name: Activate uv env
-        run: |
-          env_name="evalution_${SAFE_NAME}_cu${{ env.CUDA_VERSION }}_torch${{ env.TORCH_VERSION }}_py${PYTHON_VERSION}_release"
-          mv /opt/uv/venvs/$env_name /opt/uv/tmp || true
-          echo "source /opt/uv/setup_uv_venv.sh $env_name ${PYTHON_VERSION}"
-          source /opt/uv/setup_uv_venv.sh "$env_name" "${PYTHON_VERSION}"
-          python -VV
-
-      - name: Setup uv env
-        run: |
-          /opt/env/init_compiler_torch_only.sh ${{ env.CUDA_VERSION }} ${{ env.TORCH_VERSION }} ${UV_PYTHON}
-          uv pip install device_smi -U
-          python3 .github/scripts/install_unit_test_deps.py \
-            --test-file "${{ matrix.test-file }}" \
-            --uv-python "${UV_PYTHON}" \
-            --runner "${{ env.RUNNER }}" \
-            --install-project
-
-      - name: Print uv env
-        run: |
-          echo "::group::uv python list"
-          uv python list
-          ls -ahl /opt/uv/venvs
-          echo "::endgroup::"
-          
-          echo "== python =="
-          python -VV
-          which python
-          which pip || true
-
-          echo "== nvcc =="
-          nvcc --version
-
-          echo "::group::pip list"
-          uv pip list
-          echo "::endgroup::"
-
-          echo "== torch =="
-          uv pip show torch || true
-
-          echo "::group::project files"
-          ls -ahl
-          echo "::endgroup::"
-
-          echo "::group::git status"
-          git config --global --add safe.directory $(pwd)
-          git status
-          echo "::endgroup::"
-
-      - name: Find suitable GPU
-        if: ${{ steps.meta.outputs.requires-gpu == 'true' }}
-        run: |
-          python3 .github/scripts/allocate_gpu.py \
-            --base-url "${{ env.BASE_URL }}" \
-            --run-id "${{ github.run_id }}" \
-            --test "${{ matrix.test-file }}" \
-            --runner "${{ runner.name }}" \
-            --count "1" \
-            --require-single
-
-      - name: Run test
-        run: |
-          echo "::group::pip list"
-          uv pip list
-          echo "::endgroup::"
-
-          python .github/scripts/run_tests.py \
-            --base-url "${{ env.BASE_URL }}" \
-            --run-id "${{ github.run_id }}" \
-            --test-file "${{ matrix.test-file }}" \
-            --runner "${{ runner.name }}" \
-            --gpu-id "${CUDA_VISIBLE_DEVICES:-}" \
-            --artifacts-dir artifacts
-
-      - name: Release GPU
-        if: ${{ always() && steps.meta.outputs.requires-gpu == 'true' }}
+      - name: Print env
         run: |
-          if [ -n "${CUDA_VISIBLE_DEVICES:-}" ]; then
-            python3 .github/scripts/release_gpu.py \
-              --base-url "${{ env.BASE_URL }}" \
-              --run-id "${{ github.run_id }}" \
-              --gpu-id "${CUDA_VISIBLE_DEVICES}" \
-              --test "${{ matrix.test-file }}" \
-              --runner "${{ runner.name }}"
-          else
-            echo "Skip GPU release because allocation metadata is missing."
-          fi
+          echo "repo: ${{ env.repo }}"
+          echo "ref: ${{ env.ref }}"
+          echo "artifact_id: ${{ github.event.inputs.artifact_id }}"
+          echo "max-parallel: ${{ github.event.inputs['max-parallel'] }}"
 
-      - name: Clean cache
-        if: always()
+      - name: Set runner outputs
+        id: get_ip
         run: |
-          echo "Cleaning workspace: $PWD"
-          rm -rf ./* .[^.] .??* || true
-          echo "cleaning venv: ${{ env.VIRTUAL_ENV }}"
-          rm -rf "${{ env.VIRTUAL_ENV }}"
-
+          bash .github/scripts/ci_write_runner_outputs.sh \
+            "$RUNNER" \
+            "${{ github.run_id }}" \
+            "${{ github.event.inputs.artifact_id }}" \
+            "${{ github.event.inputs['max-parallel'] }}"
 
-  test2:
-    needs: list-test-files
-    if: needs.list-test-files.outputs.files-group2 != '' && needs.list-test-files.outputs.files-group2 != '[]'
-    strategy:
-      fail-fast: false
-      max-parallel: ${{ fromJSON(needs.list-test-files.outputs.max-parallel) }}
-      matrix:
-        test-file: ${{ fromJSON(needs.list-test-files.outputs.files-group2) }}
-    runs-on: [ self-hosted, xeon5 ]
-    container:
-      image: 10.0.13.31:5000/nvidia/cuda:132-ubuntu24.04_0325
-      volumes:
-        - /monster/ci/env/entrypoint.sh:/entrypoint.sh
-        - /monster/ci/env/entrypoint.sh:/etc/profile.d/01-entrypoint.sh
-        - /dev/dri/by-path:/dev/dri/by-path
-        - /monster/ci/models:/monster/data/model
-        - /monster/ci/dataset:/monster/data/model/dataset
-        - /monster/ci/huggingface:/github/home/.cache/huggingface
-        - /github/workspace/uv:/opt/uv
-        - /github/workspace/tmp:/opt/uv/tmp
-        - /monster/ci/uv/python:/opt/uv/python
-        - /monster/ci/uv/cache/python:/opt/uv/cache/python
-        - /monster/ci/uv/setup_uv_venv.sh:/opt/uv/setup_uv_venv.sh
-        - /monster/ci/uv/uv:/opt/uv/uv
-        - /monster/ci/uv/uvx:/opt/uv/uvx
-        - /monster/ci/uv/env:/opt/uv/env
-        - /monster/ci/uv/uv.toml:/opt/uv/uv.toml
-        - /monster/ci/env:/opt/env
-        - /monster/ci/dist:/opt/dist
+  list-test-files:
+    runs-on: ubuntu-latest
+    outputs:
+      cpu-files: ${{ steps.files.outputs.cpu-files }}
+      torch-files: ${{ steps.files.outputs.torch-files }}
+      model-files: ${{ steps.files.outputs.model-files }}
+      cpu-matrix: ${{ steps.files.outputs.cpu-matrix }}
+      torch-matrix: ${{ steps.files.outputs.torch-matrix }}
+      model-matrix: ${{ steps.files.outputs.model-matrix }}
     steps:
       - name: Checkout Code
         uses: actions/checkout@v6
         with:
+          repository: ${{ env.repo }}
           ref: ${{ env.ref }}
 
-      - name: Set test metadata
-        id: meta
-        run: |
-          python3 .github/scripts/prepare_unit_test.py \
-            --test-file "${{ matrix.test-file }}"
-
-      - name: decompress uv cache
-        continue-on-error: true
-        run: |
-          if [ -f /opt/dist/uv.tar.xz ]; then
-            TAR_FILE="/opt/dist/uv.tar.xz"
-            LAST_FILE="/opt/uv/cache/lastmodified"
-          
-            # Get modification time of tar.xz file (epoch seconds)
-            TAR_MTIME=$(stat -c %Y "$TAR_FILE")
-          
-            # Read last recorded modification time if file exists
-            if [ -f "$LAST_FILE" ]; then
-              LAST_MTIME=$(cat "$LAST_FILE")
-            else
-              LAST_MTIME=0
-            fi
-          
-            # Compare timestamps to decide whether to decompress
-            if [ "$TAR_MTIME" = "$LAST_MTIME" ]; then
-              echo "uv.tar.xz unchanged, skip decompress"
-            else
-              echo "decompressing uv.tar.xz..."
-          
-              # Prepare temporary directory
-              mkdir -p /opt/uv/cache/tmp
-              rm -rf /opt/uv/cache/tmp/*
-          
-              # Extract archive
-              tar -xJf "$TAR_FILE" -C /opt/uv/cache/tmp
-          
-              # Replace existing uv directory
-              rm -rf /opt/uv/cache/uv
-              mv /opt/uv/cache/tmp/uv /opt/uv/cache/uv
-          
-              # Record latest modification time
-              echo "$TAR_MTIME" > "$LAST_FILE"
-          
-              echo "done!"
-              ls -ahl /opt/uv/cache
-              echo "=========="
-              ls -ahl /opt/uv/cache/uv
-            fi
-          fi
-
-      - name: Activate uv env
-        run: |
-          env_name="evalution_${SAFE_NAME}_cu${{ env.CUDA_VERSION }}_torch${{ env.TORCH_VERSION }}_py${PYTHON_VERSION}_release"
-          mv /opt/uv/venvs/$env_name /opt/uv/tmp || true
-          echo "source /opt/uv/setup_uv_venv.sh $env_name ${PYTHON_VERSION}"
-          source /opt/uv/setup_uv_venv.sh "$env_name" "${PYTHON_VERSION}"
-          python -VV
-
-      - name: Setup uv env
-        run: |
-          /opt/env/init_compiler_torch_only.sh ${{ env.CUDA_VERSION }} ${{ env.TORCH_VERSION }} ${UV_PYTHON}
-          uv pip install device_smi -U
-          python3 .github/scripts/install_unit_test_deps.py \
-            --test-file "${{ matrix.test-file }}" \
-            --uv-python "${UV_PYTHON}" \
-            --runner "${{ env.RUNNER }}" \
-            --install-project
-
-      - name: Print uv env
-        run: |
-          echo "::group::uv python list"
-          uv python list
-          ls -ahl /opt/uv/venvs
-          echo "::endgroup::"
-          
-          echo "== python =="
-          python -VV
-          which python
-          which pip || true
-
-          echo "== nvcc =="
-          nvcc --version
-
-          echo "::group::pip list"
-          uv pip list
-          echo "::endgroup::"
-
-          echo "== torch =="
-          uv pip show torch || true
-
-          echo "::group::project files"
-          ls -ahl
-          echo "::endgroup::"
-
-          echo "::group::git status"
-          git config --global --add safe.directory $(pwd)
-          git status
-          echo "::endgroup::"
-
-      - name: Find suitable GPU
-        if: ${{ steps.meta.outputs.requires-gpu == 'true' }}
+      - name: Prepare checkout
         run: |
-          python3 .github/scripts/allocate_gpu.py \
-            --base-url "${{ env.BASE_URL }}" \
-            --run-id "${{ github.run_id }}" \
-            --test "${{ matrix.test-file }}" \
-            --runner "${{ runner.name }}" \
-            --count "1" \
-            --require-single
+          bash .github/scripts/ci_prepare_checkout.sh "${{ github.event.inputs.pr_number }}"
 
-      - name: Run test
-        run: |
-          echo "::group::pip list"
-          uv pip list
-          echo "::endgroup::"
-
-          python .github/scripts/run_tests.py \
-            --base-url "${{ env.BASE_URL }}" \
-            --run-id "${{ github.run_id }}" \
-            --test-file "${{ matrix.test-file }}" \
-            --runner "${{ runner.name }}" \
-            --gpu-id "${CUDA_VISIBLE_DEVICES:-}" \
-            --artifacts-dir artifacts
-
-      - name: Release GPU
-        if: ${{ always() && steps.meta.outputs.requires-gpu == 'true' }}
-        run: |
-          if [ -n "${CUDA_VISIBLE_DEVICES:-}" ]; then
-            python3 .github/scripts/release_gpu.py \
-              --base-url "${{ env.BASE_URL }}" \
-              --run-id "${{ github.run_id }}" \
-              --gpu-id "${CUDA_VISIBLE_DEVICES}" \
-              --test "${{ matrix.test-file }}" \
-              --runner "${{ runner.name }}"
-          else
-            echo "Skip GPU release because allocation metadata is missing."
-          fi
-
-      - name: Clean cache
-        if: always()
-        run: |
-          echo "Cleaning workspace: $PWD"
-          rm -rf ./* .[^.] .??* || true
-          echo "cleaning venv: ${{ env.VIRTUAL_ENV }}"
-          rm -rf "${{ env.VIRTUAL_ENV }}"
-
-  prepare-setuptools:
-    runs-on: ubuntu-latest
-    outputs:
-      versions: ${{ steps.parser.outputs.versions || '[]' }}
-    steps:
-      - uses: actions/checkout@v6
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.14t"
-
-      - name: Generate version matrix
-        id: parser
-        run: |
-          python -m pip install --upgrade requests packaging
-          versions=$(python .github/scripts/ci_loop_versions.py setuptools ">=77.0.1,<83")
-          echo "versions=$versions" >> "$GITHUB_OUTPUT"
-
-  check-setuptools:
-    needs: prepare-setuptools
-    if: needs.prepare-setuptools.outputs.versions != '[]'
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        version: ${{ fromJSON(needs.prepare-setuptools.outputs.versions) }}
-
-    steps:
-      - uses: actions/checkout@v6
-      - uses: actions/setup-python@v6
-        with:
-          python-version: "3.14t"
-          cache: pip
-
-      - name: Install package with selected setuptools
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install . "setuptools==${{ matrix.version }}"
-
-      - name: Show versions
+      - name: List files
+        id: files
         run: |
-          python --version
-          python -m pip show setuptools
+          test_plan=$(python3 .github/scripts/ci_workflow.py list-tests \
+            --test-regex "${{ github.event.inputs.test_regex }}")
+          echo "Test plan: $test_plan"
+
+          TEST_PLAN="$test_plan" python3 - <<'PY' >> "$GITHUB_OUTPUT"
+          import json
+          import os
+
+          plan = json.loads(os.environ["TEST_PLAN"])
+          print(f"cpu-files={json.dumps(plan['cpu_files'])}")
+          print(f"torch-files={json.dumps(plan['torch_files'])}")
+          print(f"model-files={json.dumps(plan['model_files'])}")
+          print(f"cpu-matrix={json.dumps(plan['cpu_matrix'])}")
+          print(f"torch-matrix={json.dumps(plan['torch_matrix'])}")
+          print(f"model-matrix={json.dumps(plan['model_matrix'])}")
+          PY
+
+          TEST_PLAN="$test_plan" python3 - <<'PY'
+          import json
+          import os
+
+          plan = json.loads(os.environ["TEST_PLAN"])
+          print(f"CPU test files: {json.dumps(plan['cpu_files'])}")
+          print(f"Torch test files: {json.dumps(plan['torch_files'])}")
+          print(f"Model test files: {json.dumps(plan['model_files'])}")
+          print(f"CPU matrix: {json.dumps(plan['cpu_matrix'])}")
+          print(f"Torch matrix: {json.dumps(plan['torch_matrix'])}")
+          print(f"Model matrix: {json.dumps(plan['model_matrix'])}")
+          PY
+
+  cpu:
+    needs:
+      - list-test-files
+      - check-vm
+    if: always() && !cancelled() && needs.list-test-files.outputs.cpu-matrix != '[]'
+    uses: ./.github/workflows/unit_tests_reusable.yml
+    secrets: inherit
+    with:
+      repo: ${{ github.event.inputs.repo || github.repository }}
+      ref: ${{ github.event.inputs.ref || github.ref }}
+      pr_number: ${{ fromJSON(github.event.inputs.pr_number || '0') }}
+      check_vm_ip: ${{ needs.check-vm.outputs.ip }}
+      check_vm_max_parallel: ${{ needs.check-vm.outputs['max-parallel'] }}
+      check_vm_cuda_version: ${{ needs.check-vm.outputs.cuda_version }}
+      matrix_json: ${{ needs.list-test-files.outputs.cpu-matrix || '[]' }}
+
+  torch:
+    needs:
+      - list-test-files
+      - check-vm
+    if: always() && !cancelled() && needs.list-test-files.outputs.torch-matrix != '[]'
+    uses: ./.github/workflows/unit_tests_reusable.yml
+    secrets: inherit
+    with:
+      repo: ${{ github.event.inputs.repo || github.repository }}
+      ref: ${{ github.event.inputs.ref || github.ref }}
+      pr_number: ${{ fromJSON(github.event.inputs.pr_number || '0') }}
+      check_vm_ip: ${{ needs.check-vm.outputs.ip }}
+      check_vm_max_parallel: ${{ needs.check-vm.outputs['max-parallel'] }}
+      check_vm_cuda_version: ${{ needs.check-vm.outputs.cuda_version }}
+      matrix_json: ${{ needs.list-test-files.outputs.torch-matrix || '[]' }}
+
+  models:
+    needs:
+      - list-test-files
+      - check-vm
+    if: always() && !cancelled() && needs.list-test-files.outputs.model-matrix != '[]'
+    uses: ./.github/workflows/unit_tests_reusable.yml
+    secrets: inherit
+    with:
+      repo: ${{ github.event.inputs.repo || github.repository }}
+      ref: ${{ github.event.inputs.ref || github.ref }}
+      pr_number: ${{ fromJSON(github.event.inputs.pr_number || '0') }}
+      check_vm_ip: ${{ needs.check-vm.outputs.ip }}
+      check_vm_max_parallel: ${{ needs.check-vm.outputs['max-parallel'] }}
+      check_vm_cuda_version: ${{ needs.check-vm.outputs.cuda_version }}
+      matrix_json: ${{ needs.list-test-files.outputs.model-matrix || '[]' }}
+
+  setuptools-compatibility:
+    uses: ./.github/workflows/setuptools_compatibility_reusable.yml
+    with:
+      repo: ${{ github.event.inputs.repo || github.repository }}
+      ref: ${{ github.event.inputs.ref || github.ref }}
+      pr_number: ${{ fromJSON(github.event.inputs.pr_number || '0') }}
diff --git a/.github/workflows/unit_tests_reusable.yml b/.github/workflows/unit_tests_reusable.yml
new file mode 100644
index 0000000..f9e0a6b
--- /dev/null
+++ b/.github/workflows/unit_tests_reusable.yml
@@ -0,0 +1,215 @@
+name: Unit Tests Reusable
+
+defaults:
+  run:
+    shell: bash -le {0}
+
+on:
+  workflow_call:
+    inputs:
+      repo:
+        description: "GitHub repo {owner}/{repo}"
+        required: false
+        default: ""
+        type: string
+      ref:
+        description: "GitHub ref: Branch, Tag or Commit SHA"
+        required: false
+        default: ""
+        type: string
+      pr_number:
+        description: "PR Number"
+        required: false
+        default: 0
+        type: number
+      check_vm_ip:
+        description: "Selected CI runner IP"
+        required: true
+        type: string
+      check_vm_max_parallel:
+        description: "Serialized max parallel payload"
+        required: true
+        type: string
+      check_vm_cuda_version:
+        description: "Selected CI container CUDA image version"
+        required: true
+        type: string
+      matrix_json:
+        description: "Serialized unit test matrix for this reusable job"
+        required: true
+        type: string
+
+permissions:
+  contents: read
+
+env:
+  repo: ${{ inputs.repo || github.repository }}
+  ref: ${{ inputs.ref || github.ref }}
+  CUDA_DEVICE_ORDER: PCI_BUS_ID
+  CUDA_VERSION: 131
+  UV_TORCH_BACKEND: cu130
+  TORCH_VERSION: 2.11.0
+  PYTHON_VERSION: 3.14t
+  UV_PYTHON: 3.14t
+  PYTHON_GIL: 0
+  RUNNER: 10.0.13.31
+  BASE_URL: http://10.0.13.31/gpu
+  LOGBAR_ANIMATION: "0"
+  HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+jobs:
+  test:
+    runs-on: [self-hosted, xeon5]
+    if: ${{ !cancelled() && inputs.matrix_json != '[]' }}
+    container:
+      image: ${{ inputs.check_vm_ip }}:5000/nvidia/cuda:${{ inputs.check_vm_cuda_version }}-ubuntu24.04_0325
+      options: --device /dev/dri --ipc=host --runtime=nvidia --gpus all
+      volumes:
+        - /monster/ci/env/entrypoint.sh:/entrypoint.sh
+        - /monster/ci/env/entrypoint.sh:/etc/profile.d/01-entrypoint.sh
+        - /dev/dri/by-path:/dev/dri/by-path
+        - /monster/ci/models:/monster/data/model
+        - /monster/ci/dataset:/monster/data/model/dataset
+        - /monster/ci/huggingface:/github/home/.cache/huggingface
+        - /github/workspace/uv:/opt/uv
+        - /github/workspace/tmp:/opt/uv/tmp
+        - /monster/ci/uv/python:/opt/uv/python
+        - /monster/ci/uv/cache/python:/opt/uv/cache/python
+        - /monster/ci/uv/setup_uv_venv.sh:/opt/uv/setup_uv_venv.sh
+        - /monster/ci/uv/uv:/opt/uv/uv
+        - /monster/ci/uv/uvx:/opt/uv/uvx
+        - /monster/ci/uv/env:/opt/uv/env
+        - /monster/ci/uv/uv.toml:/opt/uv/uv.toml
+        - /monster/ci/env:/opt/env
+        - /monster/ci/dist:/opt/dist
+    strategy:
+      fail-fast: false
+      max-parallel: ${{ fromJSON(inputs.check_vm_max_parallel).size || 20 }}
+      matrix:
+        include: ${{ fromJSON(inputs.matrix_json || '[]') }}
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v6
+        with:
+          repository: ${{ env.repo }}
+          ref: ${{ env.ref }}
+
+      - name: Prepare checkout
+        run: |
+          bash .github/scripts/ci_prepare_checkout.sh "${{ inputs.pr_number }}"
+
+      - name: Restore uv cache
+        continue-on-error: true
+        run: |
+          bash .github/scripts/ci_restore_uv_cache.sh
+
+      - name: Set test metadata
+        id: meta
+        run: |
+          python3 .github/scripts/ci_tests.py set-metadata \
+            --test-file "${{ matrix.test_file }}"
+
+      - name: Activate uv env
+        run: |
+          export EVALUTION_TORCH_EXTENSIONS_DIR="/tmp/evalution/torch_extensions/${{ github.run_id }}/${{ github.run_attempt }}/${SAFE_NAME}"
+          export TORCH_EXTENSIONS_DIR="$EVALUTION_TORCH_EXTENSIONS_DIR"
+          mkdir -p "$EVALUTION_TORCH_EXTENSIONS_DIR"
+          echo "EVALUTION_TORCH_EXTENSIONS_DIR=$EVALUTION_TORCH_EXTENSIONS_DIR" >> "$GITHUB_ENV"
+          echo "TORCH_EXTENSIONS_DIR=$TORCH_EXTENSIONS_DIR" >> "$GITHUB_ENV"
+
+          uv cache prune --ci
+
+          env_name="evalution_${SAFE_NAME}_cu${{ env.CUDA_VERSION }}_torch${{ env.TORCH_VERSION }}_py${PYTHON_VERSION}_release"
+          mv "/opt/uv/venvs/$env_name" /opt/uv/tmp || true
+          source /opt/uv/setup_uv_venv.sh "$env_name" "${PYTHON_VERSION}"
+          python -VV
+
+      - name: Setup uv env
+        run: |
+          /opt/env/init_compiler_torch_only.sh ${{ env.CUDA_VERSION }} ${{ env.TORCH_VERSION }} "${UV_PYTHON}"
+          uv pip install device_smi -U
+          python3 .github/scripts/ci_tests.py install-deps \
+            --test-file "${{ matrix.test_file }}" \
+            --uv-python "${UV_PYTHON}" \
+            --runner "${{ env.RUNNER }}" \
+            --install-project
+
+      - name: Print uv env
+        run: |
+          echo "::group::uv python list"
+          uv python list
+          ls -ahl /opt/uv/venvs
+          echo "::endgroup::"
+
+          echo "== python =="
+          python -VV
+          which python
+          which pip || true
+
+          echo "== nvcc =="
+          nvcc --version
+
+          echo "::group::pip list"
+          uv pip list
+          echo "::endgroup::"
+
+          echo "== torch =="
+          uv pip show torch || true
+
+          echo "::group::project files"
+          ls -ahl
+          echo "::endgroup::"
+
+          echo "::group::git status"
+          git config --global --add safe.directory "$(pwd)"
+          git status
+          echo "::endgroup::"
+
+      - name: Find suitable GPU
+        if: ${{ steps.meta.outputs.requires-gpu == 'true' }}
+        run: |
+          python3 .github/scripts/ci_gpu.py allocate \
+            --base-url "${{ env.BASE_URL }}" \
+            --run-id "${{ github.run_id }}" \
+            --test "${{ matrix.test_file }}" \
+            --runner "${{ runner.name }}" \
+            --count "1" \
+            --require-single
+
+      - name: Run test
+        run: |
+          echo "::group::pip list"
+          uv pip list
+          echo "::endgroup::"
+
+          python .github/scripts/ci_tests.py run \
+            --base-url "${{ env.BASE_URL }}" \
+            --run-id "${{ github.run_id }}" \
+            --test-file "${{ matrix.test_file }}" \
+            --runner "${{ runner.name }}" \
+            --gpu-id "${CUDA_VISIBLE_DEVICES:-}" \
+            --artifacts-dir artifacts
+
+      - name: Release GPU
+        if: ${{ always() && steps.meta.outputs.requires-gpu == 'true' }}
+        run: |
+          if [ -n "${CUDA_VISIBLE_DEVICES:-}" ]; then
+            python3 .github/scripts/ci_gpu.py release \
+              --base-url "${{ env.BASE_URL }}" \
+              --run-id "${{ github.run_id }}" \
+              --gpu-id "${CUDA_VISIBLE_DEVICES}" \
+              --test "${{ matrix.test_file }}" \
+              --runner "${{ runner.name }}"
+          else
+            echo "Skip GPU release because allocation metadata is missing."
+          fi
+
+      - name: Clean cache
+        if: always()
+        run: |
+          echo "Cleaning workspace: $PWD"
+          rm -rf ./* .[^.] .??* || true
+          echo "cleaning venv: ${{ env.VIRTUAL_ENV }}"
+          rm -rf "${{ env.VIRTUAL_ENV }}"
+          echo "Cleaning uv cache"
+          uv cache prune --ci
diff --git a/scripts/arch.md b/scripts/arch.md
index 8a9cb2d..a9c3681 100644
--- a/scripts/arch.md
+++ b/scripts/arch.md
@@ -3,29 +3,22 @@
 ## Naming
 
 - Workflow entrypoints follow the same `ci_*.py` convention as GPTQModel.
-- `ci_common.py` and `ci_gpu.py` are kept structurally aligned with GPTQModel so allocator and GitHub env handling stay consistent across repos.
+- `ci_common.py`, `ci_gpu.py`, `ci_tests.py`, and `ci_workflow.py` are the only Python CI entrypoints that workflows should call directly.
 
 ## Evalution unit test flow
 
 1. `list-test-files`
-- `ci_workflow.py set-matrix-config` writes the matrix parallelism output.
-- `ci_workflow.py list-tests` enumerates `tests/test_*.py`, applies the optional regex filter, emits the test matrix, and reports whether any `unit_test_common` cases are present.
+- `ci_workflow.py list-tests` enumerates `tests/test_*.py`, applies the optional regex filter, and emits separate CPU, torch, and model matrices.
 
-2. `prepare-common-env`
-- When any common tests are scheduled, the workflow activates the branch-scoped `unit_test_common` uv env, runs `ci_workflow.py setup-uv-env`, and runs `ci_workflow.py prepare-test-run` once before the test matrix starts.
+2. `test` job setup
+- `ci_tests.py set-metadata` derives `SAFE_NAME` and whether the test requires a GPU by reading the file marker and test-specific Python runtime rules.
+- `ci_restore_uv_cache.sh` restores the shared uv cache before each reusable job matrix starts.
+- The reusable workflow activates the uv env and calls `ci_tests.py install-deps` to install test-specific Python dependencies.
 
-3. `test` job setup
-- `ci_workflow.py set-test-metadata` derives `SAFE_NAME`, `ENV_FAMILY`, and whether the test requires a GPU by reading the file marker.
-- `ci_workflow.py activate-uv-env` resolves the test runtime signature, writes `PYTHON_VERSION`, `UV_PYTHON`, `ENV_NAME`, and `UV_CACHE_DIR`, then activates the shared uv env for that signature.
-- `ci_workflow.py setup-uv-env` initializes compiler and torch state, then installs the Python-version-specific runtime dependencies once per shared env under a filesystem lock.
-- `ci_workflow.py print-uv-env` prints the same diagnostic state the old shell step emitted.
-
-4. execution
+3. execution
 - `ci_gpu.py allocate` reserves a GPU only when the test requires one.
-- Common-env matrix jobs skip `ci_workflow.py setup-uv-env` and `ci_workflow.py prepare-test-run` because `prepare-common-env` has already installed the shared runtime and project package.
-- `unit_test_tensorrt_llm` keeps the original flow and prepares its env lazily when that test runs.
 - `ci_tests.py run` executes pytest, writes artifacts, and optionally keeps the GPU lease alive while the test is running.
-- `ci_workflow.py release-gpu-if-present` releases the GPU only when allocation metadata exists.
+- `ci_gpu.py release` releases the GPU only when allocation metadata exists.
 
 ## Maintenance rule