Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ To build for a GPU backend, forward its flag, e.g. Apple Metal:
cmake -B build -DPARAKEET_GGML_METAL=ON && cmake --build build -j
```

The CLI auto-selects the first GPU device the ggml registry reports, so no runtime flag is needed (set `PARAKEET_DEVICE=cpu` to force CPU). Ops the chosen backend has no kernel for run on the CPU automatically, so a model always runs even when one op lacks a GPU kernel. On an Apple M4, Metal is up to about 5x faster than CPU on the larger models; see [Apple Metal](benchmarks/BENCHMARK.md#apple-metal-m4).
The CLI auto-selects the first GPU device the ggml registry reports (including integrated GPUs such as Ryzen APUs), so no runtime flag is needed. Use `PARAKEET_DEVICE` to override: set it to `cpu` to force CPU, or to a specific device name like `CUDA0` or `Vulkan1` (case-insensitive) to pick that device. Ops the chosen backend has no kernel for run on the CPU automatically, so a model always runs even when one op lacks a GPU kernel. On an Apple M4, Metal is up to about 5x faster than CPU on the larger models; see [Apple Metal](benchmarks/BENCHMARK.md#apple-metal-m4).

---

Expand Down
61 changes: 47 additions & 14 deletions src/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ggml-cpu.h"

#include <cassert>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <string>
Expand Down Expand Up @@ -68,27 +69,59 @@ struct Backend::Impl {
static thread_local Backend* t_active = nullptr;

Backend::Backend(int n_threads) : impl_(new Impl()) {
// Optional override: PARAKEET_DEVICE=cpu forces the CPU backend (used to take
// a CPU baseline on a GPU box without rebuilding).
// Optional override via PARAKEET_DEVICE:
// - "cpu" forces the CPU backend (CPU baseline on a GPU box).
// - a device name selects that specific registry device by name, e.g.
// "CUDA0", "Vulkan1", "Metal" (case-insensitive).
// - unset auto-pick the first GPU / integrated-GPU device.
const char* force = std::getenv("PARAKEET_DEVICE");
const bool force_cpu = force && std::string(force) == "cpu";
const std::string want = force ? force : "";
const bool force_cpu = want == "cpu" || want == "CPU";

// Case-insensitive equality, used to match PARAKEET_DEVICE against the
// registry's device names (which are upper-case like "CUDA0"/"Vulkan0").
auto iequals = [](const std::string& a, const std::string& b) {
if (a.size() != b.size()) return false;
for (size_t i = 0; i < a.size(); ++i)
if (std::tolower((unsigned char)a[i]) != std::tolower((unsigned char)b[i]))
return false;
return true;
};

if (!force_cpu) {
// Pick the first GPU device the registry reports. Whatever backend was
// compiled in (CUDA/Metal/Vulkan/HIP/SYCL) registers itself here, so this
// single path covers them all with no backend-specific includes.
// Walk the registry. Whatever backend was compiled in
// (CUDA/Metal/Vulkan/HIP/SYCL) registers itself here, so this single path
// covers them all with no backend-specific includes. Integrated GPUs
// (e.g. Ryzen APUs) report GGML_BACKEND_DEVICE_TYPE_IGPU and are eligible
// too. When PARAKEET_DEVICE names a device, match by name; otherwise pick
// the first GPU/IGPU device.
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
impl_->backend = ggml_backend_dev_init(dev, nullptr);
if (impl_->backend) {
device_name_ = ggml_backend_dev_name(dev);
impl_->use_sched = true; // GPU device: route compute through ggml_backend_sched
PK_LOG("pk::Backend using GPU device: %s", device_name_.c_str());
break;
}
const auto type = ggml_backend_dev_type(dev);
const char* name = ggml_backend_dev_name(dev);

bool selected;
if (!want.empty()) {
selected = name && iequals(want, name); // explicit name match
} else {
selected = type == GGML_BACKEND_DEVICE_TYPE_GPU ||
type == GGML_BACKEND_DEVICE_TYPE_IGPU;
}
if (!selected) continue;

impl_->backend = ggml_backend_dev_init(dev, nullptr);
if (impl_->backend) {
device_name_ = name ? name : "";
// Route compute through ggml_backend_sched for any non-CPU device
// so unsupported ops can fall back to CPU.
impl_->use_sched = type != GGML_BACKEND_DEVICE_TYPE_CPU;
PK_LOG("pk::Backend using device: %s", device_name_.c_str());
break;
}
}
if (!want.empty() && !impl_->backend)
PK_LOG("pk::Backend: PARAKEET_DEVICE=%s not found; falling back to CPU",
want.c_str());
}
if (!impl_->backend) { // CPU fallback (or CPU-only build)
impl_->backend = ggml_backend_cpu_init();
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ function(pk_add_test name)
endfunction()

pk_add_test(test_smoke)
pk_add_test(test_backend_device)
pk_add_test(test_audio_io)
pk_add_test(test_model_loader)
pk_add_test(test_fft)
Expand Down
53 changes: 53 additions & 0 deletions tests/test_backend_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Device selection via PARAKEET_DEVICE (issue #17).
//
// Runs on any build, including the CPU-only one used in CI: it exercises the
// env-var parsing and fallback paths that don't need a GPU to be present.
// - PARAKEET_DEVICE=cpu -> CPU backend.
// - PARAKEET_DEVICE=<unknown> -> no such device, falls back to CPU.
// - unset -> CPU on a CPU-only build (auto-pick).
// On a GPU build the unset/auto case may select a GPU/integrated-GPU device, so
// we don't assert a specific name there.
#include "backend.hpp"

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>

static int failures = 0;

static void expect_cpu(const char* label, const std::string& name) {
if (name != "cpu") {
std::fprintf(stderr, "FAIL [%s]: expected device 'cpu', got '%s'\n",
label, name.c_str());
++failures;
} else {
std::printf("ok [%s]: device = %s\n", label, name.c_str());
}
}

int main() {
// Forcing CPU must always yield the CPU backend.
setenv("PARAKEET_DEVICE", "cpu", 1);
{
pk::Backend b(1);
expect_cpu("PARAKEET_DEVICE=cpu", b.device_name());
}

// An unknown device name must not crash; it falls back to CPU.
setenv("PARAKEET_DEVICE", "definitely-not-a-real-device-9000", 1);
{
pk::Backend b(1);
expect_cpu("PARAKEET_DEVICE=<unknown>", b.device_name());
}

// Case-insensitive "CPU" is also honored as a CPU force.
setenv("PARAKEET_DEVICE", "CPU", 1);
{
pk::Backend b(1);
expect_cpu("PARAKEET_DEVICE=CPU", b.device_name());
}

unsetenv("PARAKEET_DEVICE");
return failures == 0 ? 0 : 1;
}
Loading