Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
/ggml/src/ggml-rpc/ @rgerganov
/ggml/src/ggml-threading.* @ggerganov
/ggml/src/ggml-vulkan/ @0cc4m
/ggml/src/ggml-virtgpu/ @kpouget
/ggml/src/ggml-webgpu/ @reeselevine
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
/ggml/src/ggml.c @ggerganov
Expand Down
61 changes: 54 additions & 7 deletions common/jinja/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@ static T slice(const T & array, int64_t start, int64_t stop, int64_t step = 1) {
return result;
}

template<typename T>
static value empty_value_fn(const func_args &) {
if constexpr (std::is_same_v<T, value_int>) {
return mk_val<T>(0);
} else if constexpr (std::is_same_v<T, value_float>) {
return mk_val<T>(0.0);
} else if constexpr (std::is_same_v<T, value_bool>) {
return mk_val<T>(false);
} else {
return mk_val<T>();
}
}
template<typename T>
static value test_type_fn(const func_args & args) {
args.ensure_count(1);
Expand All @@ -128,6 +140,13 @@ static value test_type_fn(const func_args & args) {
JJ_DEBUG("test_type_fn: type=%s or %s result=%d", typeid(T).name(), typeid(U).name(), is_type ? 1 : 0);
return mk_val<value_bool>(is_type);
}
template<typename T, typename U, typename V>
static value test_type_fn(const func_args & args) {
args.ensure_count(1);
bool is_type = is_val<T>(args.get_pos(0)) || is_val<U>(args.get_pos(0)) || is_val<V>(args.get_pos(0));
JJ_DEBUG("test_type_fn: type=%s, %s or %s result=%d", typeid(T).name(), typeid(U).name(), typeid(V).name(), is_type ? 1 : 0);
return mk_val<value_bool>(is_type);
}
template<value_compare_op op>
static value test_compare_fn(const func_args & args) {
args.ensure_count(2, 2);
Expand Down Expand Up @@ -347,8 +366,8 @@ const func_builtins & global_builtins() {
{"test_is_integer", test_type_fn<value_int>},
{"test_is_float", test_type_fn<value_float>},
{"test_is_number", test_type_fn<value_int, value_float>},
{"test_is_iterable", test_type_fn<value_array, value_string>},
{"test_is_sequence", test_type_fn<value_array, value_string>},
{"test_is_iterable", test_type_fn<value_array, value_string, value_undefined>},
{"test_is_sequence", test_type_fn<value_array, value_string, value_undefined>},
{"test_is_mapping", test_type_fn<value_object>},
{"test_is_lower", [](const func_args & args) -> value {
args.ensure_vals<value_string>();
Expand Down Expand Up @@ -1003,7 +1022,12 @@ const func_builtins & value_none_t::get_builtins() const {
static const func_builtins builtins = {
{"default", default_value},
{"tojson", tojson},
{"string", [](const func_args &) -> value { return mk_val<value_string>("None"); }}
{"string", [](const func_args &) -> value {
return mk_val<value_string>("None");
}},
{"safe", [](const func_args &) -> value {
return mk_val<value_string>("None");
}},
};
return builtins;
}
Expand All @@ -1012,10 +1036,33 @@ const func_builtins & value_none_t::get_builtins() const {
const func_builtins & value_undefined_t::get_builtins() const {
static const func_builtins builtins = {
{"default", default_value},
{"tojson", [](const func_args & args) -> value {
args.ensure_vals<value_undefined>();
return mk_val<value_string>("null");
}},
{"capitalize", empty_value_fn<value_string>},
{"first", empty_value_fn<value_undefined>},
{"items", empty_value_fn<value_array>},
{"join", empty_value_fn<value_string>},
{"last", empty_value_fn<value_undefined>},
{"length", empty_value_fn<value_int>},
{"list", empty_value_fn<value_array>},
{"lower", empty_value_fn<value_string>},
{"map", empty_value_fn<value_array>},
{"max", empty_value_fn<value_undefined>},
{"min", empty_value_fn<value_undefined>},
{"reject", empty_value_fn<value_array>},
{"rejectattr", empty_value_fn<value_array>},
{"replace", empty_value_fn<value_string>},
{"reverse", empty_value_fn<value_array>},
{"safe", empty_value_fn<value_string>},
{"select", empty_value_fn<value_array>},
{"selectattr", empty_value_fn<value_array>},
{"sort", empty_value_fn<value_array>},
{"string", empty_value_fn<value_string>},
{"strip", empty_value_fn<value_string>},
{"sum", empty_value_fn<value_int>},
{"title", empty_value_fn<value_string>},
{"truncate", empty_value_fn<value_string>},
{"unique", empty_value_fn<value_array>},
{"upper", empty_value_fn<value_string>},
{"wordcount", empty_value_fn<value_int>},
};
return builtins;
}
Expand Down
31 changes: 31 additions & 0 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,37 @@ Finally, after finishing your build, you should be able to do something like thi
# ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32
```

### For Mac users:

Generally, follow LunarG's [Getting Started with the MacOS Vulkan SDK](https://vulkan.lunarg.com/doc/sdk/latest/mac/getting_started.html) guide for installation and setup of the Vulkan SDK. There are two options of Vulkan drivers on macOS, both of which implement translation layers to map Vulkan to Metal. They can be hot-swapped by setting the `VK_ICD_FILENAMES` environment variable to point to the respective ICD JSON file.

Check the box for "KosmicKrisp" during the LunarG Vulkan SDK installation.

Set environment variable for the LunarG Vulkan SDK after installation (and optionally add to your shell profile for persistence):
```bash
source /path/to/vulkan-sdk/setup-env.sh
```

#### Using MoltenVK

MoltenVK is the default Vulkan driver installed with the LunarG Vulkan SDK on macOS, so you can use the above environment variable settings as is.

#### Using KosmicKrisp

Override the environment variable for KosmicKrisp:
```bash
export VK_ICD_FILENAMES=$VULKAN_SDK/share/vulkan/icd.d/libkosmickrisp_icd.json
export VK_DRIVER_FILES=$VULKAN_SDK/share/vulkan/icd.d/libkosmickrisp_icd.json
```

#### Build

This is the only step different from [above](#common-steps) instructions.
```bash
cmake -B build -DGGML_VULKAN=1 -DGGML_METAL=OFF
cmake --build build --config Release
```

## CANN
This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU.

Expand Down
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF)
option(GGML_VIRTGPU_BACKEND "ggml: build the VirtGPU/Virglrenderer API Remoting backend" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
Expand Down Expand Up @@ -320,6 +322,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-opt.h
include/ggml-metal.h
include/ggml-rpc.h
include/ggml-virtgpu.h
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
Expand Down
16 changes: 16 additions & 0 deletions ggml/include/ggml-virtgpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ ggml_add_backend(HIP)
ggml_add_backend(METAL)
ggml_add_backend(MUSA)
ggml_add_backend(RPC)
ggml_add_backend(VirtGPU)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU)
Expand Down
14 changes: 14 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
#include "ggml-rpc.h"
#endif

#ifdef GGML_USE_VIRTGPU_FRONTEND
#include "ggml-virtgpu.h"
#endif

#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif
Expand Down Expand Up @@ -180,14 +184,23 @@ struct ggml_backend_registry {
register_backend(ggml_backend_sycl_reg());
#endif
#ifdef GGML_USE_VULKAN
// Add runtime disable check
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
register_backend(ggml_backend_vk_reg());
} else {
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
}
#endif
#ifdef GGML_USE_WEBGPU
register_backend(ggml_backend_webgpu_reg());
#endif
#ifdef GGML_USE_ZDNN
register_backend(ggml_backend_zdnn_reg());
#endif
#ifdef GGML_USE_VIRTGPU_FRONTEND
register_backend(ggml_backend_virtgpu_reg());
#endif

#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
Expand Down Expand Up @@ -604,6 +617,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("virtgpu", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
Expand Down
70 changes: 70 additions & 0 deletions ggml/src/ggml-virtgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)

include(ExternalProject)

message(STATUS "Including the VirtGPU/Virglrenderer API Remoting")

# Download venus_hw.h from virglrenderer repository
ExternalProject_Add(
venus_hw_header
URL https://gitlab.freedesktop.org/virgl/virglrenderer/-/raw/virglrenderer-1.2.0/src/venus_hw.h
DOWNLOAD_NO_EXTRACT YES
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include
DOWNLOAD_NAME venus_hw.h
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
)

if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY")
message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend library")

find_package(PkgConfig REQUIRED)
pkg_check_modules(DRM REQUIRED libdrm)
if (NOT GGML_BACKEND_DL)
# cannot simply use USE_VIRTGPU, as in the 'else()' case the
# frontend isn't compiled
target_compile_definitions(ggml PUBLIC "GGML_USE_VIRTGPU_FRONTEND")
endif()

ggml_add_backend_library(ggml-virtgpu
ggml-backend-buffer.cpp
ggml-backend.cpp
ggml-backend-device.cpp
ggml-backend-reg.cpp
ggml-backend-buffer-type.cpp
virtgpu-apir.h
virtgpu-forward.gen.h
virtgpu.cpp
virtgpu-shm.cpp
virtgpu-utils.cpp
virtgpu-forward-device.cpp
virtgpu-forward-buffer-type.cpp
virtgpu-forward-buffer.cpp
virtgpu-forward-backend.cpp
virtgpu-forward-impl.h
apir_cs_ggml-rpc-front.cpp
../../include/ggml-virtgpu.h)

target_include_directories(ggml-virtgpu PUBLIC /usr/include/libdrm/)

target_link_libraries(ggml-virtgpu PUBLIC ${DRM_LIBRARIES})
target_include_directories(ggml-virtgpu PUBLIC ${DRM_INCLUDE_DIRS})
target_compile_options(ggml-virtgpu PUBLIC ${DRM_CFLAGS_OTHER})

target_include_directories(ggml-virtgpu PUBLIC ./include)
target_include_directories(ggml-virtgpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

# Ensure venus_hw.h is downloaded before building ggml-virtgpu
add_dependencies(ggml-virtgpu venus_hw_header)

target_compile_options(ggml-virtgpu PRIVATE -std=c++20)
else()
message(STATUS "Not building the VirtGPU/Virglrenderer API Remoting frontend library")
endif()

if (NOT GGML_VIRTGPU_BACKEND STREQUAL "OFF")
add_subdirectory("backend")
endif()
87 changes: 87 additions & 0 deletions ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "backend/shared/apir_cs_rpc.h"
#include "ggml-backend-impl.h"
#include "ggml-impl.h"
#include "ggml-remoting.h"

#include <cinttypes>
#include <unordered_map>
#include <unordered_set>
#include <vector>

apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
apir_rpc_tensor result;
result.id = reinterpret_cast<uint64_t>(tensor);
result.type = tensor->type;
if (tensor->buffer) {
ggml_backend_buffer_t buffer = tensor->buffer;

result.buffer = BUFFER_TO_HOST_HANDLE(buffer);
} else {
result.buffer = 0;
}
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
result.ne[i] = tensor->ne[i];
result.nb[i] = tensor->nb[i];
}
result.op = tensor->op;
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
result.op_params[i] = tensor->op_params[i];
}
result.flags = tensor->flags;
for (uint32_t i = 0; i < GGML_MAX_SRC; i++) {
result.src[i] = reinterpret_cast<uint64_t>(tensor->src[i]);
}
result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
result.view_offs = tensor->view_offs;
result.data = reinterpret_cast<uint64_t>(tensor->data);
if (tensor->data) {
if (!tensor->buffer) {
GGML_ABORT("tensor has data but not buffer");
}
// tensor->data is serialized as an offset to the buffer base address
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
}
snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name);
return result;
}

void apir_add_tensor(ggml_tensor * tensor,
std::vector<apir_rpc_tensor> & tensors,
std::unordered_set<ggml_tensor *> & visited) {
if (tensor == nullptr) {
return;
}
if (visited.find(tensor) != visited.end()) {
return;
}
visited.insert(tensor);
for (int i = 0; i < GGML_MAX_SRC; i++) {
apir_add_tensor(tensor->src[i], tensors, visited);
}
apir_add_tensor(tensor->view_src, tensors, visited);
tensors.push_back(apir_serialize_tensor(tensor));
}

void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output) {
uint32_t n_nodes = cgraph->n_nodes;
std::vector<apir_rpc_tensor> tensors;
std::unordered_set<ggml_tensor *> visited;
for (uint32_t i = 0; i < n_nodes; i++) {
apir_add_tensor(cgraph->nodes[i], tensors, visited);
}
// serialization format:
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) |
uint32_t n_tensors = tensors.size();
int output_size =
sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor);
output.resize(output_size, 0);
memcpy(output.data(), &n_nodes, sizeof(n_nodes));
for (uint32_t i = 0; i < n_nodes; i++) {
memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
}
uint32_t * out_ntensors = (uint32_t *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
*out_ntensors = n_tensors;
apir_rpc_tensor * out_tensors =
(apir_rpc_tensor *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t));
memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor));
}
Loading
Loading