From da84c04d8fa43ff92b172feb8130c74d062f956a Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Wed, 30 Apr 2025 10:44:07 +0200 Subject: [PATCH 1/8] docker : do not build tests (#13204) * docker : do not build tests * include "ggml-cpu.h" --- .devops/cpu.Dockerfile | 4 ++-- .devops/cuda.Dockerfile | 2 +- .devops/intel.Dockerfile | 2 +- .devops/llama-cli-cann.Dockerfile | 2 +- .devops/musa.Dockerfile | 2 +- .devops/rocm.Dockerfile | 2 +- .devops/vulkan.Dockerfile | 2 +- tests/test-quantize-stats.cpp | 1 + 8 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile index aa2aa031201..9459f08c10c 100644 --- a/.devops/cpu.Dockerfile +++ b/.devops/cpu.Dockerfile @@ -14,9 +14,9 @@ WORKDIR /app COPY . . RUN if [ "$TARGETARCH" = "amd64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ elif [ "$TARGETARCH" = "arm64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ else \ echo "Unsupported architecture"; \ exit 1; \ diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile index 8ae57d2e289..94f14339723 100644 --- a/.devops/cuda.Dockerfile +++ b/.devops/cuda.Dockerfile @@ -21,7 +21,7 @@ COPY . . RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile index 091e1dc5d8b..c8839fe027c 100644 --- a/.devops/intel.Dockerfile +++ b/.devops/intel.Dockerfile @@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ fi && \ echo "Building with dynamic libs" && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile index 0eb1af87cb8..ef43d78cd2a 100644 --- a/.devops/llama-cli-cann.Dockerfile +++ b/.devops/llama-cli-cann.Dockerfile @@ -22,7 +22,7 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH RUN echo "Building with static libs" && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \ cmake --build build --config Release --target llama-cli # TODO: use image with NNRT diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile index 261a2823a0e..e0f1ad9728b 100644 --- a/.devops/musa.Dockerfile +++ b/.devops/musa.Dockerfile @@ -35,7 +35,7 @@ COPY . . RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile index a1b34723a46..1c00f1b9c2c 100644 --- a/.devops/rocm.Dockerfile +++ b/.devops/rocm.Dockerfile @@ -40,7 +40,7 @@ WORKDIR /app COPY . . RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ - cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ + cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \ && cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib \ diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile index f8f3072e957..fcd81ffa1e9 100644 --- a/.devops/vulkan.Dockerfile +++ b/.devops/vulkan.Dockerfile @@ -16,7 +16,7 @@ WORKDIR /app COPY . . -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \ +RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \ cmake --build build --config Release -j$(nproc) RUN mkdir -p /app/lib && \ diff --git a/tests/test-quantize-stats.cpp b/tests/test-quantize-stats.cpp index db01059119e..a284a1f0c5e 100644 --- a/tests/test-quantize-stats.cpp +++ b/tests/test-quantize-stats.cpp @@ -1,4 +1,5 @@ #include "ggml.h" +#include "ggml-cpu.h" #include "llama.h" #include "common.h" From 5933e6fdc9c051eea6c83b5a7608de12f9f15670 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Wed, 30 Apr 2025 10:46:32 +0200 Subject: [PATCH 2/8] arg : allow using -hf offline (#13202) * arg : allow using -hf offline * add more comments in code [no ci] --- common/arg.cpp | 130 +++++++++++++++++++++++++++++++------------------ 1 file changed, 83 insertions(+), 47 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 75e8e0bd51a..6633c3a1db8 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -43,6 +43,25 @@ std::initializer_list mmproj_examples = { // TODO: add LLAMA_EXAMPLE_SERVER when it's ready }; +static std::string read_file(const std::string & fname) { + std::ifstream file(fname); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str())); + } + std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + file.close(); + return content; +} + +static void write_file(const std::string & fname, const std::string & content) { + std::ofstream file(fname); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str())); + } + file << content; + file.close(); +} + common_arg & common_arg::set_examples(std::initializer_list examples) { this->examples = std::move(examples); return *this; @@ -200,9 +219,11 @@ struct curl_slist_ptr { static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) { int remaining_attempts = max_attempts; + char * method = nullptr; + curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_METHOD, &method); while (remaining_attempts > 0) { - LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); + LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); CURLcode res = curl_easy_perform(curl); if (res == CURLE_OK) { @@ -213,6 +234,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); remaining_attempts--; + if (remaining_attempts == 0) break; std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); } @@ -231,8 +253,6 @@ static bool common_download_file_single(const std::string & url, const std::stri return false; } - bool force_download = false; - // Set the URL, allow to follow http redirection curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); @@ -256,7 +276,7 @@ static bool common_download_file_single(const std::string & url, const std::stri // If the file exists, check its JSON metadata companion file. std::string metadata_path = path + ".json"; - nlohmann::json metadata; + nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead std::string etag; std::string last_modified; @@ -266,7 +286,7 @@ static bool common_download_file_single(const std::string & url, const std::stri if (metadata_in.good()) { try { metadata_in >> metadata; - LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); + LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); if (metadata.contains("url") && metadata.at("url").is_string()) { auto previous_url = metadata.at("url").get(); if (previous_url != url) { @@ -296,7 +316,10 @@ static bool common_download_file_single(const std::string & url, const std::stri }; common_load_model_from_url_headers headers; + bool head_request_ok = false; + bool should_download = !file_exists; // by default, we should download if the file does not exist + // get ETag to see if the remote file has changed { typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { @@ -325,23 +348,28 @@ static bool common_download_file_single(const std::string & url, const std::stri curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + // we only allow retrying once for HEAD requests + // this is for the use case of using running offline (no internet), retrying can be annoying + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0); if (!was_perform_successful) { - return false; + head_request_ok = false; } long http_code = 0; curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code != 200) { - // HEAD not supported, we don't know if the file has changed - // force trigger downloading - force_download = true; - LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + if (http_code == 200) { + head_request_ok = true; + } else { + LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + head_request_ok = false; } } - bool should_download = !file_exists || force_download; - if (!should_download) { + // if head_request_ok is false, we don't have the etag or last-modified headers + // we leave should_download as-is, which is true if the file does not exist + if (head_request_ok) { + // check if ETag or Last-Modified headers are different + // if it is, we need to download the file again if (!etag.empty() && etag != headers.etag) { LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); should_download = true; @@ -350,6 +378,7 @@ static bool common_download_file_single(const std::string & url, const std::stri should_download = true; } } + if (should_download) { std::string path_temporary = path + ".downloadInProgress"; if (file_exists) { @@ -424,13 +453,15 @@ static bool common_download_file_single(const std::string & url, const std::stri {"etag", headers.etag}, {"lastModified", headers.last_modified} }); - std::ofstream(metadata_path) << metadata.dump(4); - LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + write_file(metadata_path, metadata.dump(4)); + LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); if (rename(path_temporary.c_str(), path.c_str()) != 0) { LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); return false; } + } else { + LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); } return true; @@ -605,16 +636,37 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_ // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response // User-Agent header is already set in common_remote_get_content, no need to set it here + // we use "=" to avoid clashing with other component, while still being allowed on windows + std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json"; + string_replace_all(cached_response_fname, "/", "_"); + std::string cached_response_path = fs_get_cache_file(cached_response_fname); + // make the request common_remote_params params; params.headers = headers; - auto res = common_remote_get_content(url, params); - long res_code = res.first; - std::string res_str(res.second.data(), res.second.size()); + long res_code = 0; + std::string res_str; + bool use_cache = false; + try { + auto res = common_remote_get_content(url, params); + res_code = res.first; + res_str = std::string(res.second.data(), res.second.size()); + } catch (const std::exception & e) { + LOG_WRN("error: failed to get manifest: %s\n", e.what()); + LOG_WRN("try reading from cache\n"); + // try to read from cache + try { + res_str = read_file(cached_response_path); + res_code = 200; + use_cache = true; + } catch (const std::exception & e) { + throw std::runtime_error("error: failed to get manifest (check your internet connection)"); + } + } std::string ggufFile; std::string mmprojFile; - if (res_code == 200) { + if (res_code == 200 || res_code == 304) { // extract ggufFile.rfilename in json, using regex { std::regex pattern("\"ggufFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\""); @@ -631,6 +683,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_ mmprojFile = match[1].str(); } } + if (!use_cache) { + // if not using cached response, update the cache file + write_file(cached_response_path, res_str); + } } else if (res_code == 401) { throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); } else { @@ -1142,6 +1198,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e fprintf(stderr, "%s\n", ex.what()); ctx_arg.params = params_org; return false; + } catch (std::exception & ex) { + fprintf(stderr, "%s\n", ex.what()); + exit(1); // for other exceptions, we exit with status code 1 } return true; @@ -1442,13 +1501,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"-f", "--file"}, "FNAME", "a file containing the prompt (default: none)", [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } + params.prompt = read_file(value); // store the external file name in params params.prompt_file = value; - std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); if (!params.prompt.empty() && params.prompt.back() == '\n') { params.prompt.pop_back(); } @@ -1458,11 +1513,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"-sysf", "--system-prompt-file"}, "FNAME", "a file containing the system prompt (default: none)", [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.system_prompt)); + params.system_prompt = read_file(value); if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') { params.system_prompt.pop_back(); } @@ -1887,15 +1938,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--grammar-file"}, "FNAME", "file to read grammar from", [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - std::copy( - std::istreambuf_iterator(file), - std::istreambuf_iterator(), - std::back_inserter(params.sampling.grammar) - ); + params.sampling.grammar = read_file(value); } ).set_sparam()); add_opt(common_arg( @@ -2815,14 +2858,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "list of built-in templates:\n%s", list_builtin_chat_templates().c_str() ), [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - std::copy( - std::istreambuf_iterator(file), - std::istreambuf_iterator(), - std::back_inserter(params.chat_template)); + params.chat_template = read_file(value); } ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE")); add_opt(common_arg( From 44cd8d91ff2c9e4a0f2e3151f8d6f04c928e2571 Mon Sep 17 00:00:00 2001 From: Aaron Teo <57927438+taronaeo@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:47:35 +0800 Subject: [PATCH 3/8] feat(ggml-cpu): enable z17 compile (#13182) z17 compilation requires GCC 15.1.0 and onwards Signed-off-by: Aaron Teo --- ggml/src/ggml-cpu/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 6a652738c10..9a3085befc4 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -352,10 +352,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name) # TODO: Separation to determine activation of VX/VXE/VXE2 if (${S390X_M} MATCHES "8561|8562") message(STATUS "z15 target") - list(APPEND ARCH_FLAGS -march=z15 -mtune=z15) + list(APPEND ARCH_FLAGS -march=z15) elseif (${S390X_M} MATCHES "3931") message(STATUS "z16 target") - list(APPEND ARCH_FLAGS -march=z16 -mtune=z16) + list(APPEND ARCH_FLAGS -march=z16) + elseif (${S390X_M} MATCHES "9175|9176") + # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. + message(STATUS "z17 target") + list(APPEND ARCH_FLAGS -march=z17) else() message(STATUS "Unknown target") message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") From 07c2e2f76cce9a61c110b6995fbb90ccea2c3aaa Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Wed, 30 Apr 2025 13:06:15 +0200 Subject: [PATCH 4/8] convert : correct typo image_mean --> image_std (#13208) --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b9cea7e4699..d607af69559 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1103,7 +1103,7 @@ def set_gguf_parameters(self): # preprocessor config self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"]) - self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_mean"]) + self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"]) def write_vocab(self): raise ValueError("VisionModel does not support vocab writing") From 416313773b53585fddcafbcb914cbbfbaeb94b1f Mon Sep 17 00:00:00 2001 From: shalinib-ibm Date: Wed, 30 Apr 2025 16:47:08 +0530 Subject: [PATCH 5/8] ggml : fix ppc64le build (#13176) Build fails with compilation error on power pc. This patch fixes the same. Tested with unit tests run via --build && cd && make test Signed-off-by: Shalini Salomi Bodapati --- ggml/src/ggml-cpu/simd-mappings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 04d10cec266..45c31cf1faf 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -341,7 +341,7 @@ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) { #define GGML_F32_EPR 4 #define GGML_F32x4 vector float -#define GGML_F32x4_ZERO 0.0f +#define GGML_F32x4_ZERO {0.0f} #define GGML_F32x4_SET1 vec_splats #define GGML_F32x4_LOAD(p) vec_xl(0, p) #define GGML_F32x4_STORE(p, r) vec_xst(r, 0, p) From e5007a5edf2692ef7151a81a61ce2716b83374e5 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Wed, 30 Apr 2025 07:38:37 -0500 Subject: [PATCH 6/8] vulkan: use uint array index to avoid glslang bug (#13193) --- ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp index 962d2353f88..9cb7da2daab 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp @@ -482,7 +482,7 @@ float16_t dequantFuncIQ2_XXS(const in decodeBufIQ2_XXS bl, const in uint blockCo const uint ib8 = (idx & 0x18) >> 3; // 0..3 const uint iqs = 8 * ib32 + ib8; - const uint8_t qs = bl.block.qs[iqs]; + const uint qs = bl.block.qs[iqs]; const uint signscale = pack32(u16vec2(bl16.block.qs[4*ib32+2], bl16.block.qs[4*ib32+3])); const float dscale = float(bl.block.d) * 0.25 * (0.5 + float(signscale >> 28)); From 3b127c738535d95e06abd0d43da147bc13516ad0 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Wed, 30 Apr 2025 13:52:35 +0100 Subject: [PATCH 7/8] common : add -jf / --json-schema-file flag (#12011) --- common/arg.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index 6633c3a1db8..b8c72ee6457 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1948,6 +1948,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.sampling.grammar = json_schema_to_grammar(json::parse(value)); } ).set_sparam()); + add_opt(common_arg( + {"-jf", "--json-schema-file"}, "FILE", + "File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::string schema; + std::copy( + std::istreambuf_iterator(file), + std::istreambuf_iterator(), + std::back_inserter(schema) + ); + params.sampling.grammar = json_schema_to_grammar(json::parse(schema)); + } + ).set_sparam()); add_opt(common_arg( {"--pooling"}, "{none,mean,cls,last,rank}", "pooling type for embeddings, use model default if unspecified", From ceda28ef8e310a8dee60bf275077a3eedae8e36c Mon Sep 17 00:00:00 2001 From: Tatsuya Tanaka Date: Wed, 30 Apr 2025 22:25:20 +0900 Subject: [PATCH 8/8] llava : remove duplicate include (#13207) --- examples/llava/clip-impl.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/llava/clip-impl.h b/examples/llava/clip-impl.h index 04bfcbb5e57..66cb21ef1a7 100644 --- a/examples/llava/clip-impl.h +++ b/examples/llava/clip-impl.h @@ -2,8 +2,6 @@ #include "gguf.h" #include "clip.h" -#include "clip.h" - #include #include #include