From 51003ed4f750c226006c14558149b914c804d162 Mon Sep 17 00:00:00 2001 From: Sergey Nikolaev Date: Sat, 23 May 2026 17:16:39 +0000 Subject: [PATCH 1/5] ci: enable ONNX runtime setup for embeddings builds --- .../workflows/embedding_build_template.yml | 25 +++- cmake/build_embeddings.cmake | 111 ++++++++++++++++-- embeddings/Cargo.toml | 4 + 3 files changed, 126 insertions(+), 14 deletions(-) diff --git a/.github/workflows/embedding_build_template.yml b/.github/workflows/embedding_build_template.yml index e3b42bea..7e7fe8be 100644 --- a/.github/workflows/embedding_build_template.yml +++ b/.github/workflows/embedding_build_template.yml @@ -234,13 +234,32 @@ jobs: run: | # Set Docker image based on architecture # Download glibc2_17-compatible ORT static lib (avoids __isoc23_strtoll from pyke.io builds) - ORT_VERSION="1.24.2" + read_ort_metadata() { + local key="$1" + awk -v key="$key" ' + /^\[package.metadata.manticore.ort\]/ { in_section=1; next } + /^\[/ { in_section=0 } + in_section && $1 == key { + gsub(/"/, "", $3) + print $3 + exit + } + ' embeddings/Cargo.toml + } + + ORT_VERSION="$(read_ort_metadata version)" + ORT_GLIBC="$(read_ort_metadata linux-glibc)" + if [[ -z "${ORT_VERSION}" || -z "${ORT_GLIBC}" ]]; then + echo "Failed to read ORT metadata from embeddings/Cargo.toml" >&2 + exit 1 + fi + if [[ "${{ inputs.arch }}" == "aarch64" ]]; then docker_image="ghcr.io/manticoresoftware/rust-min-libc:aarch64-rust1.94.1-glibc2.27-openssl1.1.1k" - ort_asset="onnxruntime-linux-aarch64-static_lib-${ORT_VERSION}-glibc2_17" + ort_asset="onnxruntime-linux-aarch64-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}" else docker_image="ghcr.io/manticoresoftware/rust-min-libc:amd64-rust1.94.1-glibc2.27-openssl1.1.1k" - ort_asset="onnxruntime-linux-x64-static_lib-${ORT_VERSION}-glibc2_17" + ort_asset="onnxruntime-linux-x64-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}" fi curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ort_asset}.zip" -o /tmp/ort.zip diff --git a/cmake/build_embeddings.cmake b/cmake/build_embeddings.cmake index dd035035..5f46213b 100644 --- a/cmake/build_embeddings.cmake +++ b/cmake/build_embeddings.cmake @@ -20,6 +20,80 @@ if (__build_embeddings_included) endif () set ( __build_embeddings_included YES ) +set ( EMBEDDINGS_ORT_VERSION "" CACHE STRING "ONNX Runtime version used for local Linux embeddings builds; defaults to embeddings/Cargo.toml metadata" ) +set ( EMBEDDINGS_ORT_GLIBC "" CACHE STRING "ONNX Runtime glibc baseline used for local Linux embeddings builds; defaults to embeddings/Cargo.toml metadata" ) + +function(read_embeddings_ort_metadata OUT_ORT_VERSION OUT_ORT_GLIBC) + set ( CARGO_TOML "${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml" ) + if (NOT EXISTS "${CARGO_TOML}") + message ( FATAL_ERROR "embeddings Cargo.toml was not found: ${CARGO_TOML}" ) + endif() + + file ( READ "${CARGO_TOML}" CARGO_TOML_CONTENT ) + string ( REGEX MATCH "\\[package\\.metadata\\.manticore\\.ort\\][^\[]*" ORT_METADATA "${CARGO_TOML_CONTENT}" ) + if (NOT ORT_METADATA) + message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] version/linux-glibc in ${CARGO_TOML}" ) + endif() + + if (NOT ORT_METADATA MATCHES "version[ \t]*=[ \t]*\"([^\"]+)\"") + message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] version in ${CARGO_TOML}" ) + endif() + set ( ORT_VERSION "${CMAKE_MATCH_1}" ) + + if (NOT ORT_METADATA MATCHES "linux-glibc[ \t]*=[ \t]*\"([^\"]+)\"") + message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] linux-glibc in ${CARGO_TOML}" ) + endif() + set ( ORT_GLIBC "${CMAKE_MATCH_1}" ) + + if (EMBEDDINGS_ORT_VERSION) + set ( ORT_VERSION "${EMBEDDINGS_ORT_VERSION}" ) + endif() + if (EMBEDDINGS_ORT_GLIBC) + set ( ORT_GLIBC "${EMBEDDINGS_ORT_GLIBC}" ) + endif() + + set ( ${OUT_ORT_VERSION} "${ORT_VERSION}" PARENT_SCOPE ) + set ( ${OUT_ORT_GLIBC} "${ORT_GLIBC}" PARENT_SCOPE ) +endfunction() + +function(prepare_embeddings_ort) + if (NOT UNIX OR APPLE) + return() + endif() + + read_embeddings_ort_metadata ( ORT_VERSION ORT_GLIBC ) + + if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$") + set ( ORT_ARCH "aarch64" ) + else() + set ( ORT_ARCH "x64" ) + endif() + + set ( ORT_ASSET "onnxruntime-linux-${ORT_ARCH}-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}" ) + set ( ORT_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ORT_ASSET}.zip" ) + set ( ORT_ROOT "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort/${ORT_ASSET}" ) + set ( ORT_ZIP "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort/${ORT_ASSET}.zip" ) + + if (NOT EXISTS "${ORT_ROOT}/lib") + message ( STATUS "Downloading ONNX Runtime static library: ${ORT_ASSET}" ) + file ( MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" ) + file ( DOWNLOAD "${ORT_URL}" "${ORT_ZIP}" STATUS ORT_DOWNLOAD_STATUS SHOW_PROGRESS ) + list ( GET ORT_DOWNLOAD_STATUS 0 ORT_DOWNLOAD_CODE ) + if (NOT ORT_DOWNLOAD_CODE EQUAL 0) + list ( GET ORT_DOWNLOAD_STATUS 1 ORT_DOWNLOAD_ERROR ) + message ( FATAL_ERROR "Failed to download ${ORT_URL}: ${ORT_DOWNLOAD_ERROR}" ) + endif() + file ( ARCHIVE_EXTRACT INPUT "${ORT_ZIP}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" ) + endif() + + if (NOT EXISTS "${ORT_ROOT}/lib") + message ( FATAL_ERROR "ONNX Runtime lib directory was not found: ${ORT_ROOT}/lib" ) + endif() + + set ( ENV{ORT_LIB_PATH} "${ORT_ROOT}/lib" ) + message ( STATUS "Using ONNX Runtime from ORT_LIB_PATH=$ENV{ORT_LIB_PATH}" ) +endfunction() + function(build_embeddings_lib) message ( STATUS "building embeddings locally..." ) @@ -49,21 +123,37 @@ function(build_embeddings_lib) # This matches the format used by other Manticore libraries for consistent version display set(ENV{GIT_COMMIT_ID} "${GIT_COMMIT_ID}") set(ENV{GIT_TIMESTAMP_ID} "${GIT_TIMESTAMP_ID}") + prepare_embeddings_ort() - # Enable platform-specific BLAS acceleration for candle when available - set(EMBEDDINGS_CARGO_FEATURES "") - if(APPLE) - set(EMBEDDINGS_CARGO_FEATURES "--features" "accelerate") - elseif(UNIX) - # MKL provides multi-threaded BLAS on Linux; skip if not available - execute_process(COMMAND pkg-config --exists mkl-dynamic-lp64-seq RESULT_VARIABLE MKL_FOUND OUTPUT_QUIET ERROR_QUIET) - if(MKL_FOUND EQUAL 0) - set(EMBEDDINGS_CARGO_FEATURES "--features" "mkl") + # Enable platform-specific BLAS acceleration for candle when available. + if (DEFINED EMBEDDINGS_CARGO_FEATURES) + set(EMBEDDINGS_FEATURES_CSV "${EMBEDDINGS_CARGO_FEATURES}") + else() + set(EMBEDDINGS_FEATURE_LIST) + if(APPLE) + list(APPEND EMBEDDINGS_FEATURE_LIST accelerate) + elseif(UNIX) + # MKL provides multi-threaded BLAS on Linux; skip if not available + execute_process(COMMAND pkg-config --exists mkl-dynamic-lp64-seq RESULT_VARIABLE MKL_FOUND OUTPUT_QUIET ERROR_QUIET) + if(MKL_FOUND EQUAL 0) + list(APPEND EMBEDDINGS_FEATURE_LIST mkl) + endif() endif() + list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV) + endif() + + if (UNIX AND NOT APPLE AND DEFINED ENV{ORT_LIB_PATH} AND NOT "$ENV{ORT_LIB_PATH}" STREQUAL "" AND EMBEDDINGS_FEATURES_CSV) + string(REPLACE "," ";" EMBEDDINGS_FEATURE_LIST "${EMBEDDINGS_FEATURES_CSV}") + list(REMOVE_ITEM EMBEDDINGS_FEATURE_LIST download-ort) + list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV) + endif() + + if (EMBEDDINGS_FEATURES_CSV) + set(EMBEDDINGS_CARGO_FEATURE_ARGS "--features" "${EMBEDDINGS_FEATURES_CSV}") endif() execute_process ( - COMMAND cargo build --manifest-path ${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml --lib --release ${EMBEDDINGS_CARGO_FEATURES} --target-dir ${CMAKE_CURRENT_BINARY_DIR}/embeddings + COMMAND cargo build --manifest-path ${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml --lib --release ${EMBEDDINGS_CARGO_FEATURE_ARGS} --target-dir ${CMAKE_CURRENT_BINARY_DIR}/embeddings RESULT_VARIABLE CMD_RESULT ) @@ -86,4 +176,3 @@ function(build_embeddings_lib) file(RENAME "${CMAKE_CURRENT_BINARY_DIR}/embeddings/release/${EMBEDDINGS_LIB_NAME}.pdb" "${CMAKE_CURRENT_BINARY_DIR}/embeddings/release/lib_${EMBEDDINGS_LIB_NAME}.pdb") endif() endfunction () - diff --git a/embeddings/Cargo.toml b/embeddings/Cargo.toml index b68f2c2b..9064926d 100644 --- a/embeddings/Cargo.toml +++ b/embeddings/Cargo.toml @@ -3,6 +3,10 @@ name = "manticore-knn-embeddings" version = "1.1.1" edition = "2021" +[package.metadata.manticore.ort] +version = "1.24.2" +linux-glibc = "2_17" + # Candle: git dep so CI works without a local candle clone. # For local dev with ../../candle, add a [patch] section to use path deps. [dependencies] From 065e5851a1f562c39a8512377e08a6ab417fa22d Mon Sep 17 00:00:00 2001 From: Don Hardman Date: Tue, 26 May 2026 12:43:32 +0300 Subject: [PATCH 2/5] build(embedding): automate ONNX Runtime and MKL setup - Implement GetONNXRuntime.cmake for automated ORT library downloads - Implement GetMKL.cmake for automated MKL binary acquisition - Support Linux, macOS, and Windows across x64 and ARM64 architectures - Add automatic MKL detection and conda-forge support for Linux - Integrate NuGet package retrieval for Intel OpenMP DLLs on Windows - Modularize dependency logic into separate CMake modules - Unify CI dependency fetching and align environment variables - Replace manual Cargo.toml metadata parsing with helper functions - Simplify platform and architecture selection logic --- .../workflows/embedding_build_template.yml | 109 ++++------- cmake/GetMKL.cmake | 175 ++++++++++++++++++ cmake/GetONNXRuntime.cmake | 106 +++++++++++ cmake/build_embeddings.cmake | 100 +++------- embeddings/Cargo.toml | 4 - 5 files changed, 346 insertions(+), 148 deletions(-) create mode 100644 cmake/GetMKL.cmake create mode 100644 cmake/GetONNXRuntime.cmake diff --git a/.github/workflows/embedding_build_template.yml b/.github/workflows/embedding_build_template.yml index 63904e38..fa4dfc5f 100644 --- a/.github/workflows/embedding_build_template.yml +++ b/.github/workflows/embedding_build_template.yml @@ -232,38 +232,19 @@ jobs: - name: Build for Linux if: ${{ inputs.distr == 'linux' }} run: | - # Set Docker image based on architecture - # Download glibc2_17-compatible ORT static lib (avoids __isoc23_strtoll from pyke.io builds) - read_ort_metadata() { - local key="$1" - awk -v key="$key" ' - /^\[package.metadata.manticore.ort\]/ { in_section=1; next } - /^\[/ { in_section=0 } - in_section && $1 == key { - gsub(/"/, "", $3) - print $3 - exit - } - ' embeddings/Cargo.toml - } - - ORT_VERSION="$(read_ort_metadata version)" - ORT_GLIBC="$(read_ort_metadata linux-glibc)" - if [[ -z "${ORT_VERSION}" || -z "${ORT_GLIBC}" ]]; then - echo "Failed to read ORT metadata from embeddings/Cargo.toml" >&2 - exit 1 - fi - + # Pick the rust-min-libc docker image (glibc baseline) for this arch. + # ORT static lib is fetched via cmake/embeddings_ort.cmake — the same + # script local host builds use, so there's no version/URL drift. if [[ "${{ inputs.arch }}" == "aarch64" ]]; then docker_image="ghcr.io/manticoresoftware/rust-min-libc:aarch64-rust1.95.0-glibc2.27-openssl1.1.1k" - ort_asset="onnxruntime-linux-aarch64-static_lib-${ORT_VERSION}-glibc2_17" + ort_arch="aarch64" else docker_image="ghcr.io/manticoresoftware/rust-min-libc:amd64-rust1.95.0-glibc2.27-openssl1.1.1k" - ort_asset="onnxruntime-linux-x64-static_lib-${ORT_VERSION}-glibc2_17" + ort_arch="x64" fi - curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ort_asset}.zip" -o /tmp/ort.zip - unzip -q /tmp/ort.zip -d /tmp/ort + cmake -DORT_PLATFORM=linux -DORT_ARCH=${ort_arch} -DORT_OUT_DIR="${RUNNER_TEMP}/ort" -P cmake/GetONNXRuntime.cmake + ort_lib="$(cat "${RUNNER_TEMP}/ort/lib_path.txt")" target="${{ steps.vars.outputs.target }}" # Each flavor gets its own --target-dir so cargo doesn't relink between @@ -275,11 +256,11 @@ jobs: docker run --rm \ -v $(pwd):/src \ -v $(pwd)/.cargo-cache:/cargo-cache \ - -v /tmp/ort/${ort_asset}/lib:/ort-lib \ + -v ${ort_lib}:/ort-lib \ -w /src \ -u root \ -e CARGO_HOME=/cargo-cache \ - -e ORT_LIB_PATH=/ort-lib \ + -e ORT_LIB_LOCATION=/ort-lib \ -e MKLROOT=/opt/intel/oneapi/mkl/latest \ -e GIT_COMMIT_ID="${{ steps.git_meta.outputs.commit }}" \ -e GIT_TIMESTAMP_ID="${{ steps.git_meta.outputs.timestamp }}" \ @@ -335,28 +316,21 @@ jobs: if: ${{ inputs.distr == 'macos' || inputs.distr == 'windows' }} shell: bash run: | - ort_version="1.24.2" - if [[ "${{ inputs.distr }}" == "macos" ]]; then - if [[ "${{ inputs.arch }}" == "aarch64" ]]; then - ort_name="onnxruntime-osx-arm64-static_lib-${ort_version}" - else - ort_name="onnxruntime-osx-x86_64-static_lib-${ort_version}" - fi - curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ort_version}/${ort_name}.zip" -o /tmp/ort.zip - unzip -q /tmp/ort.zip -d /tmp/ort - echo "ORT_LIB_LOCATION=/tmp/ort/${ort_name}/lib" >> $GITHUB_ENV + # Single source of truth for version + asset naming lives in + # cmake/embeddings_ort.cmake (same script local host builds use). + case "${{ inputs.distr }}" in + macos) ort_platform=macos ;; + windows) ort_platform=windows ;; + esac + if [[ "${{ inputs.arch }}" == "aarch64" ]]; then + ort_arch=aarch64 else - if [[ "${{ inputs.arch }}" == "aarch64" ]]; then - ort_name="onnxruntime-win-arm64-static_lib-MD-Release-${ort_version}" - else - ort_name="onnxruntime-win-x64-static_lib-MD-Release-${ort_version}" - fi - curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ort_version}/${ort_name}.tar.bz2" -o ort.tar.bz2 - mkdir -p ort-lib - tar xjf ort.tar.bz2 -C ort-lib - echo "ORT_LIB_LOCATION=${{ github.workspace }}/ort-lib/${ort_name}/lib" >> $GITHUB_ENV + ort_arch=x64 fi + cmake -DORT_PLATFORM=${ort_platform} -DORT_ARCH=${ort_arch} -DORT_OUT_DIR="${RUNNER_TEMP}/ort" -P cmake/GetONNXRuntime.cmake + echo "ORT_LIB_LOCATION=$(cat "${RUNNER_TEMP}/ort/lib_path.txt")" >> $GITHUB_ENV + - name: Build native if: ${{ inputs.distr == 'windows' || inputs.distr == 'macos' }} run: | @@ -442,32 +416,21 @@ jobs: if [[ "${{ inputs.distr }}" == "windows" && "${{ inputs.arch }}" != "aarch64" ]]; then echo "=== Package Windows runtime DLL dependencies ===" - openmp_redist_version="2025.3.3.31" - openmp_redist_dir=".openmp-redist" - for dll in libiomp5md.dll; do - # MKL flavor lives under target-mkl; also check it. - dep=$(find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" -type f -iname "$dll" 2>/dev/null | head -1) - if [[ -z "$dep" ]]; then - echo "$dll was not found in the Cargo build tree; downloading Intel OpenMP redist ${openmp_redist_version}" - rm -rf "$openmp_redist_dir" - mkdir -p "$openmp_redist_dir/pkg" - curl -fsSL \ - "https://api.nuget.org/v3-flatcontainer/intelopenmp.redist.win/${openmp_redist_version}/intelopenmp.redist.win.${openmp_redist_version}.nupkg" \ - -o "$openmp_redist_dir/intelopenmp.redist.win.zip" - powershell -NoProfile -NonInteractive -Command \ - "Expand-Archive -LiteralPath '$(cygpath -w "$openmp_redist_dir/intelopenmp.redist.win.zip")' -DestinationPath '$(cygpath -w "$openmp_redist_dir/pkg")' -Force" - dep=$(find "$openmp_redist_dir/pkg" -type f -iname "$dll" | head -1) - fi - - if [[ -z "$dep" ]]; then - echo "Required Windows runtime dependency was not found after redist download: $dll" - find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" "$openmp_redist_dir" -type f -iname "*.dll" 2>/dev/null | sort || true - exit 1 - fi - - cp "$dep" build/ - echo "Packaged $dll from $dep" - done + # Prefer the DLL already in the cargo build tree (intel-mkl-src may + # have fetched it); otherwise let cmake/GetMKL.cmake fetch the same + # intelopenmp.redist nuget the inline shell used to download. + dep=$(find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" -type f -iname libiomp5md.dll 2>/dev/null | head -1) + if [[ -z "$dep" ]]; then + cmake -DMKL_PLATFORM=windows -DMKL_ARCH=x64 -DMKL_OUT_DIR="${RUNNER_TEMP}/mkl" -P cmake/GetMKL.cmake + dep=$(cat "${RUNNER_TEMP}/mkl/libiomp5md_dll_path.txt") + fi + if [[ -z "$dep" || ! -f "$dep" ]]; then + echo "libiomp5md.dll was not located after GetMKL.cmake fallback" + find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" "${RUNNER_TEMP}/mkl" -type f -iname "*.dll" 2>/dev/null | sort || true + exit 1 + fi + cp "$dep" build/ + echo "Packaged libiomp5md.dll from $dep" elif [[ "${{ inputs.distr }}" == "windows" ]]; then echo "Skipping Intel OpenMP runtime packaging for Windows ${{ inputs.arch }}; Intel OpenMP redist currently ships win-x64 DLLs only" fi diff --git a/cmake/GetMKL.cmake b/cmake/GetMKL.cmake new file mode 100644 index 00000000..070c4a08 --- /dev/null +++ b/cmake/GetMKL.cmake @@ -0,0 +1,175 @@ +# Copyright (c) 2020-2025, Manticore Software LTD (https://manticoresearch.com) +# All rights reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Single source of truth for the Intel MKL / OpenMP dependency used by +# embeddings builds with the `mkl` Cargo feature. Mirrors the install +# performed inside ../rust-min-libc Dockerfile so local x86_64 hosts can +# build the MKL flavor without root + apt. Consumed in two ways: +# - included from cmake/build_embeddings.cmake for local host builds +# - run as a script (cmake -DMKL_PLATFORM=... -DMKL_ARCH=... -DMKL_OUT_DIR=... -P cmake/GetMKL.cmake) +# from CI workflows that need just the OpenMP runtime DLL for packaging. + +cmake_minimum_required ( VERSION 3.17 ) + +# MKL static archive (linux only). Conda-forge mirror — pins the exact same +# files Intel ships via apt as intel-oneapi-mkl-devel. Override either var +# to bump or substitute a private mirror. +set ( EMBEDDINGS_MKL_VERSION "2024.0.0" CACHE STRING "Intel oneMKL version for embeddings builds" ) +set ( EMBEDDINGS_MKL_LINUX_URL + "https://conda.anaconda.org/conda-forge/linux-64/mkl-static-${EMBEDDINGS_MKL_VERSION}-ha770c72_49657.tar.bz2" + CACHE STRING "Override URL for the MKL static archive (linux-x64)" ) +set ( EMBEDDINGS_OPENMP_LINUX_URL + "https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.tar.bz2" + CACHE STRING "Override URL for the llvm-openmp static archive (linux-x64)" ) + +# Intel OpenMP runtime DLL (windows-x64 only). Used to package libiomp5md.dll +# alongside the MKL .dll artifact. Same source the CI workflow used before +# this logic moved into CMake. +set ( EMBEDDINGS_OPENMP_WIN_VERSION "2025.3.3.31" CACHE STRING "intelopenmp.redist.win nuget version" ) +set ( EMBEDDINGS_OPENMP_WIN_URL + "https://api.nuget.org/v3-flatcontainer/intelopenmp.redist.win/${EMBEDDINGS_OPENMP_WIN_VERSION}/intelopenmp.redist.win.${EMBEDDINGS_OPENMP_WIN_VERSION}.nupkg" + CACHE STRING "Override URL for the intelopenmp.redist.win nuget package" ) + +# Common helper: download IN_URL to IN_OUT_DIR/. Idempotent: re-uses +# an existing archive on disk. Returns the archive path in OUT_ARCHIVE_VAR. +function ( _embeddings_mkl_fetch OUT_ARCHIVE_VAR IN_URL IN_OUT_DIR IN_LABEL ) + get_filename_component ( _name "${IN_URL}" NAME ) + set ( _archive "${IN_OUT_DIR}/${_name}" ) + + if ( NOT EXISTS "${_archive}" ) + message ( STATUS "Downloading ${IN_LABEL}: ${_name}" ) + file ( MAKE_DIRECTORY "${IN_OUT_DIR}" ) + file ( DOWNLOAD "${IN_URL}" "${_archive}" STATUS _status SHOW_PROGRESS ) + list ( GET _status 0 _code ) + if ( NOT _code EQUAL 0 ) + list ( GET _status 1 _err ) + file ( REMOVE "${_archive}" ) + message ( FATAL_ERROR "Failed to download ${IN_URL}: ${_err}" ) + endif() + endif() + + set ( ${OUT_ARCHIVE_VAR} "${_archive}" PARENT_SCOPE ) +endfunction() + +# Detect an existing MKL installation. On hit, sets OUT_VAR to MKLROOT. +function ( _embeddings_mkl_detect_linux OUT_VAR ) + if ( DEFINED ENV{MKLROOT} AND EXISTS "$ENV{MKLROOT}/lib/intel64/libmkl_intel_lp64.a" ) + set ( ${OUT_VAR} "$ENV{MKLROOT}" PARENT_SCOPE ) + return() + endif() + if ( EXISTS "/opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.a" ) + set ( ${OUT_VAR} "/opt/intel/oneapi/mkl/latest" PARENT_SCOPE ) + return() + endif() + if ( EXISTS "/opt/intel/oneapi/mkl/latest/lib/libmkl_intel_lp64.a" ) + # Modern oneMKL layout (no intel64/ subdir). + set ( ${OUT_VAR} "/opt/intel/oneapi/mkl/latest" PARENT_SCOPE ) + return() + endif() + set ( ${OUT_VAR} "" PARENT_SCOPE ) +endfunction() + +# Linux x86_64: ensure MKL static archives + libiomp5.a are in scope. Returns +# the directory to use as MKLROOT (its lib/intel64/ has the archives). +function ( embeddings_mkl_prepare_linux OUT_MKLROOT_VAR IN_OUT_DIR ) + _embeddings_mkl_detect_linux ( _detected ) + if ( _detected ) + message ( STATUS "Detected MKL at ${_detected}" ) + set ( ${OUT_MKLROOT_VAR} "${_detected}" PARENT_SCOPE ) + return() + endif() + + # Conda-forge fallback. Same files Intel ships via oneAPI apt; we just + # replay the layout dance the rust-min-libc Dockerfile does on top. + set ( _root "${IN_OUT_DIR}/mkl-${EMBEDDINGS_MKL_VERSION}" ) + + if ( NOT EXISTS "${_root}/lib/libmkl_intel_lp64.a" ) + file ( MAKE_DIRECTORY "${_root}" ) + _embeddings_mkl_fetch ( _mkl_archive "${EMBEDDINGS_MKL_LINUX_URL}" "${IN_OUT_DIR}" "MKL static" ) + _embeddings_mkl_fetch ( _omp_archive "${EMBEDDINGS_OPENMP_LINUX_URL}" "${IN_OUT_DIR}" "llvm-openmp" ) + + file ( ARCHIVE_EXTRACT INPUT "${_mkl_archive}" DESTINATION "${_root}" ) + file ( ARCHIVE_EXTRACT INPUT "${_omp_archive}" DESTINATION "${_root}" ) + endif() + + # Mirror the rust-min-libc symlink dance: intel-mkl-src 0.8.1 expects + # lib/intel64/, while modern oneMKL ships flat in lib/. Symlink the old + # path so the build script finds archives there. libiomp5.a lives under + # the openmp package's lib/ — symlink into MKL's lib too so `-liomp5` + # resolves during static link. + if ( NOT EXISTS "${_root}/lib/intel64" ) + file ( CREATE_LINK "." "${_root}/lib/intel64" SYMBOLIC ) + endif() + + if ( NOT EXISTS "${_root}/lib/libiomp5.a" AND EXISTS "${_root}/lib/libomp.a" ) + # llvm-openmp ships libomp.a; intel-mkl-src expects libiomp5.a. + file ( CREATE_LINK "${_root}/lib/libomp.a" "${_root}/lib/libiomp5.a" SYMBOLIC ) + endif() + + foreach ( _required libmkl_intel_lp64.a libmkl_intel_thread.a libmkl_core.a libiomp5.a ) + if ( NOT EXISTS "${_root}/lib/${_required}" ) + message ( FATAL_ERROR "MKL prepare: missing ${_required} under ${_root}/lib — conda-forge archive layout may have changed; override -DEMBEDDINGS_MKL_LINUX_URL / -DEMBEDDINGS_OPENMP_LINUX_URL" ) + endif() + endforeach() + + set ( ${OUT_MKLROOT_VAR} "${_root}" PARENT_SCOPE ) +endfunction() + +# Windows x86_64: download intelopenmp.redist nuget for libiomp5md.dll. +# Returns absolute path to the .dll in OUT_DLL_VAR. Mirrors the block the +# CI workflow used to do inline. +function ( embeddings_mkl_prepare_windows OUT_DLL_VAR IN_OUT_DIR ) + set ( _root "${IN_OUT_DIR}/intelopenmp-${EMBEDDINGS_OPENMP_WIN_VERSION}" ) + set ( _dll "${_root}/runtimes/win-x64/native/libiomp5md.dll" ) + + if ( NOT EXISTS "${_dll}" ) + file ( MAKE_DIRECTORY "${_root}" ) + _embeddings_mkl_fetch ( _archive "${EMBEDDINGS_OPENMP_WIN_URL}" "${IN_OUT_DIR}" "Intel OpenMP redist" ) + # .nupkg is a zip; ARCHIVE_EXTRACT handles it. + file ( ARCHIVE_EXTRACT INPUT "${_archive}" DESTINATION "${_root}" ) + endif() + + if ( NOT EXISTS "${_dll}" ) + message ( FATAL_ERROR "intelopenmp redist did not contain libiomp5md.dll at expected path: ${_dll}" ) + endif() + + set ( ${OUT_DLL_VAR} "${_dll}" PARENT_SCOPE ) +endfunction() + +# Script-mode entry: cmake -DMKL_PLATFORM=... -DMKL_ARCH=... -DMKL_OUT_DIR=... -P cmake/GetMKL.cmake +# Writes the resolved path to ${MKL_OUT_DIR}/_path.txt so shell callers +# can read it back without recomputing layout. Keys: mklroot, libiomp5md_dll. +if ( CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE ) + if ( NOT DEFINED MKL_PLATFORM OR NOT DEFINED MKL_ARCH OR NOT DEFINED MKL_OUT_DIR ) + message ( FATAL_ERROR "usage: cmake -DMKL_PLATFORM= -DMKL_ARCH= -DMKL_OUT_DIR= -P GetMKL.cmake" ) + endif() + + if ( NOT MKL_ARCH STREQUAL "x64" AND NOT MKL_ARCH STREQUAL "x86_64" ) + message ( STATUS "GetMKL: arch '${MKL_ARCH}' has no MKL build; nothing to do" ) + return() + endif() + + if ( MKL_PLATFORM STREQUAL "linux" ) + embeddings_mkl_prepare_linux ( _mklroot "${MKL_OUT_DIR}" ) + file ( WRITE "${MKL_OUT_DIR}/mklroot_path.txt" "${_mklroot}" ) + message ( STATUS "MKLROOT: ${_mklroot}" ) + elseif ( MKL_PLATFORM STREQUAL "windows" ) + embeddings_mkl_prepare_windows ( _dll "${MKL_OUT_DIR}" ) + file ( WRITE "${MKL_OUT_DIR}/libiomp5md_dll_path.txt" "${_dll}" ) + message ( STATUS "libiomp5md.dll: ${_dll}" ) + else() + message ( STATUS "GetMKL: platform '${MKL_PLATFORM}' has no MKL build; nothing to do" ) + endif() +endif() diff --git a/cmake/GetONNXRuntime.cmake b/cmake/GetONNXRuntime.cmake new file mode 100644 index 00000000..06578419 --- /dev/null +++ b/cmake/GetONNXRuntime.cmake @@ -0,0 +1,106 @@ +# Copyright (c) 2020-2025, Manticore Software LTD (https://manticoresearch.com) +# All rights reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Single source of truth for the ONNX Runtime static library used by embeddings +# builds. Consumed in two ways: +# - included from cmake/build_embeddings.cmake for local host builds +# - run as a script (cmake -DORT_PLATFORM=... -DORT_ARCH=... -DORT_OUT_DIR=... -P cmake/GetONNXRuntime.cmake) +# from CI workflows, so neither place hardcodes the version, URL, or asset name. +# Bump versions here; both code paths pick up the new values. + +cmake_minimum_required ( VERSION 3.17 ) + +set ( EMBEDDINGS_ORT_VERSION "1.24.2" CACHE STRING "ONNX Runtime version for embeddings builds" ) +set ( EMBEDDINGS_ORT_GLIBC "2_17" CACHE STRING "ONNX Runtime glibc baseline (Linux only)" ) + +# Map (platform, arch) -> (asset stem, archive extension) per csukuangfj/onnxruntime-libs releases. +function ( _embeddings_ort_asset OUT_ASSET OUT_EXT IN_PLATFORM IN_ARCH ) + set ( _ver "${EMBEDDINGS_ORT_VERSION}" ) + set ( _glibc "${EMBEDDINGS_ORT_GLIBC}" ) + + if ( IN_PLATFORM STREQUAL "linux" ) + if ( IN_ARCH MATCHES "^(aarch64|arm64)$" ) + set ( _arch "aarch64" ) + else() + set ( _arch "x64" ) + endif() + set ( _asset "onnxruntime-linux-${_arch}-static_lib-${_ver}-glibc${_glibc}" ) + set ( _ext "zip" ) + elseif ( IN_PLATFORM STREQUAL "macos" ) + if ( IN_ARCH MATCHES "^(aarch64|arm64)$" ) + set ( _arch "arm64" ) + else() + set ( _arch "x86_64" ) + endif() + set ( _asset "onnxruntime-osx-${_arch}-static_lib-${_ver}" ) + set ( _ext "zip" ) + elseif ( IN_PLATFORM STREQUAL "windows" ) + if ( IN_ARCH MATCHES "^(aarch64|arm64)$" ) + set ( _arch "arm64" ) + else() + set ( _arch "x64" ) + endif() + set ( _asset "onnxruntime-win-${_arch}-static_lib-MD-Release-${_ver}" ) + set ( _ext "tar.bz2" ) + else() + message ( FATAL_ERROR "embeddings_ort: unsupported platform '${IN_PLATFORM}' (expected linux|macos|windows)" ) + endif() + + set ( ${OUT_ASSET} "${_asset}" PARENT_SCOPE ) + set ( ${OUT_EXT} "${_ext}" PARENT_SCOPE ) +endfunction() + +# Download + extract the ORT static lib for the given platform/arch into IN_OUT_DIR. +# Idempotent: skips network when the lib dir already exists. On success, sets +# OUT_LIB_DIR_VAR (in caller scope) to the absolute lib/ path. +function ( embeddings_ort_download OUT_LIB_DIR_VAR IN_PLATFORM IN_ARCH IN_OUT_DIR ) + _embeddings_ort_asset ( _asset _ext "${IN_PLATFORM}" "${IN_ARCH}" ) + + set ( _url "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${EMBEDDINGS_ORT_VERSION}/${_asset}.${_ext}" ) + set ( _archive "${IN_OUT_DIR}/${_asset}.${_ext}" ) + set ( _root "${IN_OUT_DIR}/${_asset}" ) + + if ( NOT EXISTS "${_root}/lib" ) + message ( STATUS "Downloading ${_asset}.${_ext}" ) + file ( MAKE_DIRECTORY "${IN_OUT_DIR}" ) + file ( DOWNLOAD "${_url}" "${_archive}" STATUS _status SHOW_PROGRESS ) + list ( GET _status 0 _code ) + if ( NOT _code EQUAL 0 ) + list ( GET _status 1 _err ) + message ( FATAL_ERROR "Failed to download ${_url}: ${_err}" ) + endif() + file ( ARCHIVE_EXTRACT INPUT "${_archive}" DESTINATION "${IN_OUT_DIR}" ) + endif() + + if ( NOT EXISTS "${_root}/lib" ) + message ( FATAL_ERROR "ORT lib dir not found after extract: ${_root}/lib" ) + endif() + + set ( ${OUT_LIB_DIR_VAR} "${_root}/lib" PARENT_SCOPE ) +endfunction() + +# Script-mode entry: cmake -DORT_PLATFORM=... -DORT_ARCH=... -DORT_OUT_DIR=... -P cmake/GetONNXRuntime.cmake +# Writes the resolved lib dir to ${ORT_OUT_DIR}/lib_path.txt so shell callers +# don't have to recompute the asset name to find it. +if ( CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE ) + if ( NOT DEFINED ORT_PLATFORM OR NOT DEFINED ORT_ARCH OR NOT DEFINED ORT_OUT_DIR ) + message ( FATAL_ERROR "usage: cmake -DORT_PLATFORM= -DORT_ARCH= -DORT_OUT_DIR= -P GetONNXRuntime.cmake" ) + endif() + + embeddings_ort_download ( _lib_dir "${ORT_PLATFORM}" "${ORT_ARCH}" "${ORT_OUT_DIR}" ) + + file ( WRITE "${ORT_OUT_DIR}/lib_path.txt" "${_lib_dir}" ) + message ( STATUS "ORT lib dir: ${_lib_dir}" ) +endif() diff --git a/cmake/build_embeddings.cmake b/cmake/build_embeddings.cmake index 5f46213b..dedd7148 100644 --- a/cmake/build_embeddings.cmake +++ b/cmake/build_embeddings.cmake @@ -20,78 +20,30 @@ if (__build_embeddings_included) endif () set ( __build_embeddings_included YES ) -set ( EMBEDDINGS_ORT_VERSION "" CACHE STRING "ONNX Runtime version used for local Linux embeddings builds; defaults to embeddings/Cargo.toml metadata" ) -set ( EMBEDDINGS_ORT_GLIBC "" CACHE STRING "ONNX Runtime glibc baseline used for local Linux embeddings builds; defaults to embeddings/Cargo.toml metadata" ) - -function(read_embeddings_ort_metadata OUT_ORT_VERSION OUT_ORT_GLIBC) - set ( CARGO_TOML "${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml" ) - if (NOT EXISTS "${CARGO_TOML}") - message ( FATAL_ERROR "embeddings Cargo.toml was not found: ${CARGO_TOML}" ) - endif() - - file ( READ "${CARGO_TOML}" CARGO_TOML_CONTENT ) - string ( REGEX MATCH "\\[package\\.metadata\\.manticore\\.ort\\][^\[]*" ORT_METADATA "${CARGO_TOML_CONTENT}" ) - if (NOT ORT_METADATA) - message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] version/linux-glibc in ${CARGO_TOML}" ) - endif() - - if (NOT ORT_METADATA MATCHES "version[ \t]*=[ \t]*\"([^\"]+)\"") - message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] version in ${CARGO_TOML}" ) - endif() - set ( ORT_VERSION "${CMAKE_MATCH_1}" ) - - if (NOT ORT_METADATA MATCHES "linux-glibc[ \t]*=[ \t]*\"([^\"]+)\"") - message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] linux-glibc in ${CARGO_TOML}" ) - endif() - set ( ORT_GLIBC "${CMAKE_MATCH_1}" ) - - if (EMBEDDINGS_ORT_VERSION) - set ( ORT_VERSION "${EMBEDDINGS_ORT_VERSION}" ) - endif() - if (EMBEDDINGS_ORT_GLIBC) - set ( ORT_GLIBC "${EMBEDDINGS_ORT_GLIBC}" ) - endif() - - set ( ${OUT_ORT_VERSION} "${ORT_VERSION}" PARENT_SCOPE ) - set ( ${OUT_ORT_GLIBC} "${ORT_GLIBC}" PARENT_SCOPE ) -endfunction() +include ( ${CMAKE_CURRENT_LIST_DIR}/GetONNXRuntime.cmake ) +include ( ${CMAKE_CURRENT_LIST_DIR}/GetMKL.cmake ) function(prepare_embeddings_ort) - if (NOT UNIX OR APPLE) - return() - endif() - - read_embeddings_ort_metadata ( ORT_VERSION ORT_GLIBC ) - - if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$") - set ( ORT_ARCH "aarch64" ) + if ( APPLE ) + set ( _platform "macos" ) + elseif ( WIN32 ) + set ( _platform "windows" ) + elseif ( UNIX ) + set ( _platform "linux" ) else() - set ( ORT_ARCH "x64" ) + message ( FATAL_ERROR "prepare_embeddings_ort: unsupported host platform" ) endif() - set ( ORT_ASSET "onnxruntime-linux-${ORT_ARCH}-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}" ) - set ( ORT_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ORT_ASSET}.zip" ) - set ( ORT_ROOT "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort/${ORT_ASSET}" ) - set ( ORT_ZIP "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort/${ORT_ASSET}.zip" ) - - if (NOT EXISTS "${ORT_ROOT}/lib") - message ( STATUS "Downloading ONNX Runtime static library: ${ORT_ASSET}" ) - file ( MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" ) - file ( DOWNLOAD "${ORT_URL}" "${ORT_ZIP}" STATUS ORT_DOWNLOAD_STATUS SHOW_PROGRESS ) - list ( GET ORT_DOWNLOAD_STATUS 0 ORT_DOWNLOAD_CODE ) - if (NOT ORT_DOWNLOAD_CODE EQUAL 0) - list ( GET ORT_DOWNLOAD_STATUS 1 ORT_DOWNLOAD_ERROR ) - message ( FATAL_ERROR "Failed to download ${ORT_URL}: ${ORT_DOWNLOAD_ERROR}" ) - endif() - file ( ARCHIVE_EXTRACT INPUT "${ORT_ZIP}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" ) + if ( CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$" ) + set ( _arch "aarch64" ) + else() + set ( _arch "x64" ) endif() - if (NOT EXISTS "${ORT_ROOT}/lib") - message ( FATAL_ERROR "ONNX Runtime lib directory was not found: ${ORT_ROOT}/lib" ) - endif() + embeddings_ort_download ( _lib_dir "${_platform}" "${_arch}" "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" ) - set ( ENV{ORT_LIB_PATH} "${ORT_ROOT}/lib" ) - message ( STATUS "Using ONNX Runtime from ORT_LIB_PATH=$ENV{ORT_LIB_PATH}" ) + set ( ENV{ORT_LIB_LOCATION} "${_lib_dir}" ) + message ( STATUS "Using ORT_LIB_LOCATION=${_lib_dir}" ) endfunction() function(build_embeddings_lib) @@ -132,17 +84,23 @@ function(build_embeddings_lib) set(EMBEDDINGS_FEATURE_LIST) if(APPLE) list(APPEND EMBEDDINGS_FEATURE_LIST accelerate) - elseif(UNIX) - # MKL provides multi-threaded BLAS on Linux; skip if not available - execute_process(COMMAND pkg-config --exists mkl-dynamic-lp64-seq RESULT_VARIABLE MKL_FOUND OUTPUT_QUIET ERROR_QUIET) - if(MKL_FOUND EQUAL 0) - list(APPEND EMBEDDINGS_FEATURE_LIST mkl) - endif() + elseif(UNIX AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$") + # Linux x86_64: ensure MKL static archives are in scope (detect a + # system install, else download via cmake/GetMKL.cmake) and opt + # into the mkl feature. aarch64 has no MKL equivalent — skip. + embeddings_mkl_prepare_linux ( _mklroot "${CMAKE_CURRENT_BINARY_DIR}/embeddings/mkl" ) + set ( ENV{MKLROOT} "${_mklroot}" ) + message ( STATUS "Using MKLROOT=${_mklroot}" ) + list(APPEND EMBEDDINGS_FEATURE_LIST mkl) endif() list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV) endif() - if (UNIX AND NOT APPLE AND DEFINED ENV{ORT_LIB_PATH} AND NOT "$ENV{ORT_LIB_PATH}" STREQUAL "" AND EMBEDDINGS_FEATURES_CSV) + # When the static lib is in scope, drop any caller-supplied download-ort + # feature so cargo doesn't fetch a duplicate dynamic lib on top of the + # static one. The auto-detected feature lists above never include it; this + # only matters when the caller passes EMBEDDINGS_CARGO_FEATURES explicitly. + if (DEFINED ENV{ORT_LIB_LOCATION} AND NOT "$ENV{ORT_LIB_LOCATION}" STREQUAL "" AND EMBEDDINGS_FEATURES_CSV) string(REPLACE "," ";" EMBEDDINGS_FEATURE_LIST "${EMBEDDINGS_FEATURES_CSV}") list(REMOVE_ITEM EMBEDDINGS_FEATURE_LIST download-ort) list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV) diff --git a/embeddings/Cargo.toml b/embeddings/Cargo.toml index 9064926d..b68f2c2b 100644 --- a/embeddings/Cargo.toml +++ b/embeddings/Cargo.toml @@ -3,10 +3,6 @@ name = "manticore-knn-embeddings" version = "1.1.1" edition = "2021" -[package.metadata.manticore.ort] -version = "1.24.2" -linux-glibc = "2_17" - # Candle: git dep so CI works without a local candle clone. # For local dev with ../../candle, add a [patch] section to use path deps. [dependencies] From 885c1e13e4dc89d44019117be812333a43a2d47d Mon Sep 17 00:00:00 2001 From: Don Hardman Date: Wed, 27 May 2026 21:07:55 +0300 Subject: [PATCH 3/5] refactor(cmake): update MKL and OpenMP integration - Simplify Linux MKL flow to use detection via MKLROOT only - Remove automatic MKL and OpenMP downloads for Linux - Make MKL detection optional on Linux - Inline download logic for Windows OpenMP runtime - Update script-mode usage to reflect Linux detection-only policy --- cmake/GetMKL.cmake | 138 +++++++++-------------------------- cmake/build_embeddings.cmake | 20 +++-- 2 files changed, 47 insertions(+), 111 deletions(-) diff --git a/cmake/GetMKL.cmake b/cmake/GetMKL.cmake index 070c4a08..bc07f90a 100644 --- a/cmake/GetMKL.cmake +++ b/cmake/GetMKL.cmake @@ -13,58 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Single source of truth for the Intel MKL / OpenMP dependency used by -# embeddings builds with the `mkl` Cargo feature. Mirrors the install -# performed inside ../rust-min-libc Dockerfile so local x86_64 hosts can -# build the MKL flavor without root + apt. Consumed in two ways: -# - included from cmake/build_embeddings.cmake for local host builds -# - run as a script (cmake -DMKL_PLATFORM=... -DMKL_ARCH=... -DMKL_OUT_DIR=... -P cmake/GetMKL.cmake) -# from CI workflows that need just the OpenMP runtime DLL for packaging. +# Intel MKL / OpenMP integration for embeddings builds with the `mkl` Cargo +# feature. Linux flow is detection-only: if MKL is on the host (either via +# the rust-min-libc Docker image which apt-installs it, or via a local +# install matching ../rust-min-libc/Dockerfile lines 87-105), we use it. +# Otherwise the caller silently skips the `mkl` feature — the build still +# produces a working baseline .so. Windows flow downloads only the runtime +# DLL (libiomp5md.dll), same nuget source the CI workflow used inline. cmake_minimum_required ( VERSION 3.17 ) -# MKL static archive (linux only). Conda-forge mirror — pins the exact same -# files Intel ships via apt as intel-oneapi-mkl-devel. Override either var -# to bump or substitute a private mirror. -set ( EMBEDDINGS_MKL_VERSION "2024.0.0" CACHE STRING "Intel oneMKL version for embeddings builds" ) -set ( EMBEDDINGS_MKL_LINUX_URL - "https://conda.anaconda.org/conda-forge/linux-64/mkl-static-${EMBEDDINGS_MKL_VERSION}-ha770c72_49657.tar.bz2" - CACHE STRING "Override URL for the MKL static archive (linux-x64)" ) -set ( EMBEDDINGS_OPENMP_LINUX_URL - "https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.tar.bz2" - CACHE STRING "Override URL for the llvm-openmp static archive (linux-x64)" ) - -# Intel OpenMP runtime DLL (windows-x64 only). Used to package libiomp5md.dll -# alongside the MKL .dll artifact. Same source the CI workflow used before -# this logic moved into CMake. +# Intel OpenMP runtime DLL (windows-x64 only). Same nuget package the CI +# workflow used to download inline before this logic moved into CMake. set ( EMBEDDINGS_OPENMP_WIN_VERSION "2025.3.3.31" CACHE STRING "intelopenmp.redist.win nuget version" ) set ( EMBEDDINGS_OPENMP_WIN_URL "https://api.nuget.org/v3-flatcontainer/intelopenmp.redist.win/${EMBEDDINGS_OPENMP_WIN_VERSION}/intelopenmp.redist.win.${EMBEDDINGS_OPENMP_WIN_VERSION}.nupkg" CACHE STRING "Override URL for the intelopenmp.redist.win nuget package" ) -# Common helper: download IN_URL to IN_OUT_DIR/. Idempotent: re-uses -# an existing archive on disk. Returns the archive path in OUT_ARCHIVE_VAR. -function ( _embeddings_mkl_fetch OUT_ARCHIVE_VAR IN_URL IN_OUT_DIR IN_LABEL ) - get_filename_component ( _name "${IN_URL}" NAME ) - set ( _archive "${IN_OUT_DIR}/${_name}" ) - - if ( NOT EXISTS "${_archive}" ) - message ( STATUS "Downloading ${IN_LABEL}: ${_name}" ) - file ( MAKE_DIRECTORY "${IN_OUT_DIR}" ) - file ( DOWNLOAD "${IN_URL}" "${_archive}" STATUS _status SHOW_PROGRESS ) - list ( GET _status 0 _code ) - if ( NOT _code EQUAL 0 ) - list ( GET _status 1 _err ) - file ( REMOVE "${_archive}" ) - message ( FATAL_ERROR "Failed to download ${IN_URL}: ${_err}" ) - endif() - endif() - - set ( ${OUT_ARCHIVE_VAR} "${_archive}" PARENT_SCOPE ) -endfunction() - -# Detect an existing MKL installation. On hit, sets OUT_VAR to MKLROOT. -function ( _embeddings_mkl_detect_linux OUT_VAR ) +# Linux x86_64: detect an existing MKL install. Sets OUT_VAR to the resolved +# MKLROOT, or empty string when MKL isn't present. No download, no install. +function ( embeddings_mkl_detect_linux OUT_VAR ) if ( DEFINED ENV{MKLROOT} AND EXISTS "$ENV{MKLROOT}/lib/intel64/libmkl_intel_lp64.a" ) set ( ${OUT_VAR} "$ENV{MKLROOT}" PARENT_SCOPE ) return() @@ -81,63 +49,27 @@ function ( _embeddings_mkl_detect_linux OUT_VAR ) set ( ${OUT_VAR} "" PARENT_SCOPE ) endfunction() -# Linux x86_64: ensure MKL static archives + libiomp5.a are in scope. Returns -# the directory to use as MKLROOT (its lib/intel64/ has the archives). -function ( embeddings_mkl_prepare_linux OUT_MKLROOT_VAR IN_OUT_DIR ) - _embeddings_mkl_detect_linux ( _detected ) - if ( _detected ) - message ( STATUS "Detected MKL at ${_detected}" ) - set ( ${OUT_MKLROOT_VAR} "${_detected}" PARENT_SCOPE ) - return() - endif() - - # Conda-forge fallback. Same files Intel ships via oneAPI apt; we just - # replay the layout dance the rust-min-libc Dockerfile does on top. - set ( _root "${IN_OUT_DIR}/mkl-${EMBEDDINGS_MKL_VERSION}" ) - - if ( NOT EXISTS "${_root}/lib/libmkl_intel_lp64.a" ) - file ( MAKE_DIRECTORY "${_root}" ) - _embeddings_mkl_fetch ( _mkl_archive "${EMBEDDINGS_MKL_LINUX_URL}" "${IN_OUT_DIR}" "MKL static" ) - _embeddings_mkl_fetch ( _omp_archive "${EMBEDDINGS_OPENMP_LINUX_URL}" "${IN_OUT_DIR}" "llvm-openmp" ) - - file ( ARCHIVE_EXTRACT INPUT "${_mkl_archive}" DESTINATION "${_root}" ) - file ( ARCHIVE_EXTRACT INPUT "${_omp_archive}" DESTINATION "${_root}" ) - endif() - - # Mirror the rust-min-libc symlink dance: intel-mkl-src 0.8.1 expects - # lib/intel64/, while modern oneMKL ships flat in lib/. Symlink the old - # path so the build script finds archives there. libiomp5.a lives under - # the openmp package's lib/ — symlink into MKL's lib too so `-liomp5` - # resolves during static link. - if ( NOT EXISTS "${_root}/lib/intel64" ) - file ( CREATE_LINK "." "${_root}/lib/intel64" SYMBOLIC ) - endif() - - if ( NOT EXISTS "${_root}/lib/libiomp5.a" AND EXISTS "${_root}/lib/libomp.a" ) - # llvm-openmp ships libomp.a; intel-mkl-src expects libiomp5.a. - file ( CREATE_LINK "${_root}/lib/libomp.a" "${_root}/lib/libiomp5.a" SYMBOLIC ) - endif() - - foreach ( _required libmkl_intel_lp64.a libmkl_intel_thread.a libmkl_core.a libiomp5.a ) - if ( NOT EXISTS "${_root}/lib/${_required}" ) - message ( FATAL_ERROR "MKL prepare: missing ${_required} under ${_root}/lib — conda-forge archive layout may have changed; override -DEMBEDDINGS_MKL_LINUX_URL / -DEMBEDDINGS_OPENMP_LINUX_URL" ) - endif() - endforeach() - - set ( ${OUT_MKLROOT_VAR} "${_root}" PARENT_SCOPE ) -endfunction() - -# Windows x86_64: download intelopenmp.redist nuget for libiomp5md.dll. -# Returns absolute path to the .dll in OUT_DLL_VAR. Mirrors the block the -# CI workflow used to do inline. +# Windows x86_64: download intelopenmp.redist nuget for libiomp5md.dll. Returns +# the absolute .dll path in OUT_DLL_VAR. Same package + URL the CI workflow +# used to fetch inline. function ( embeddings_mkl_prepare_windows OUT_DLL_VAR IN_OUT_DIR ) set ( _root "${IN_OUT_DIR}/intelopenmp-${EMBEDDINGS_OPENMP_WIN_VERSION}" ) set ( _dll "${_root}/runtimes/win-x64/native/libiomp5md.dll" ) if ( NOT EXISTS "${_dll}" ) file ( MAKE_DIRECTORY "${_root}" ) - _embeddings_mkl_fetch ( _archive "${EMBEDDINGS_OPENMP_WIN_URL}" "${IN_OUT_DIR}" "Intel OpenMP redist" ) - # .nupkg is a zip; ARCHIVE_EXTRACT handles it. + get_filename_component ( _name "${EMBEDDINGS_OPENMP_WIN_URL}" NAME ) + set ( _archive "${IN_OUT_DIR}/${_name}" ) + if ( NOT EXISTS "${_archive}" ) + message ( STATUS "Downloading Intel OpenMP redist: ${_name}" ) + file ( DOWNLOAD "${EMBEDDINGS_OPENMP_WIN_URL}" "${_archive}" STATUS _status SHOW_PROGRESS ) + list ( GET _status 0 _code ) + if ( NOT _code EQUAL 0 ) + list ( GET _status 1 _err ) + file ( REMOVE "${_archive}" ) + message ( FATAL_ERROR "Failed to download ${EMBEDDINGS_OPENMP_WIN_URL}: ${_err}" ) + endif() + endif() file ( ARCHIVE_EXTRACT INPUT "${_archive}" DESTINATION "${_root}" ) endif() @@ -149,11 +81,11 @@ function ( embeddings_mkl_prepare_windows OUT_DLL_VAR IN_OUT_DIR ) endfunction() # Script-mode entry: cmake -DMKL_PLATFORM=... -DMKL_ARCH=... -DMKL_OUT_DIR=... -P cmake/GetMKL.cmake -# Writes the resolved path to ${MKL_OUT_DIR}/_path.txt so shell callers -# can read it back without recomputing layout. Keys: mklroot, libiomp5md_dll. +# Used by CI workflows that need the Windows OpenMP runtime DLL. Writes the +# resolved path to ${MKL_OUT_DIR}/libiomp5md_dll_path.txt. if ( CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE ) if ( NOT DEFINED MKL_PLATFORM OR NOT DEFINED MKL_ARCH OR NOT DEFINED MKL_OUT_DIR ) - message ( FATAL_ERROR "usage: cmake -DMKL_PLATFORM= -DMKL_ARCH= -DMKL_OUT_DIR= -P GetMKL.cmake" ) + message ( FATAL_ERROR "usage: cmake -DMKL_PLATFORM= -DMKL_ARCH= -DMKL_OUT_DIR= -P GetMKL.cmake" ) endif() if ( NOT MKL_ARCH STREQUAL "x64" AND NOT MKL_ARCH STREQUAL "x86_64" ) @@ -161,15 +93,13 @@ if ( CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE ) return() endif() - if ( MKL_PLATFORM STREQUAL "linux" ) - embeddings_mkl_prepare_linux ( _mklroot "${MKL_OUT_DIR}" ) - file ( WRITE "${MKL_OUT_DIR}/mklroot_path.txt" "${_mklroot}" ) - message ( STATUS "MKLROOT: ${_mklroot}" ) - elseif ( MKL_PLATFORM STREQUAL "windows" ) + file ( MAKE_DIRECTORY "${MKL_OUT_DIR}" ) + + if ( MKL_PLATFORM STREQUAL "windows" ) embeddings_mkl_prepare_windows ( _dll "${MKL_OUT_DIR}" ) file ( WRITE "${MKL_OUT_DIR}/libiomp5md_dll_path.txt" "${_dll}" ) message ( STATUS "libiomp5md.dll: ${_dll}" ) else() - message ( STATUS "GetMKL: platform '${MKL_PLATFORM}' has no MKL build; nothing to do" ) + message ( STATUS "GetMKL script-mode: platform '${MKL_PLATFORM}' has nothing to fetch (Linux MKL is detect-only at configure time)" ) endif() endif() diff --git a/cmake/build_embeddings.cmake b/cmake/build_embeddings.cmake index dedd7148..19e3c772 100644 --- a/cmake/build_embeddings.cmake +++ b/cmake/build_embeddings.cmake @@ -85,13 +85,19 @@ function(build_embeddings_lib) if(APPLE) list(APPEND EMBEDDINGS_FEATURE_LIST accelerate) elseif(UNIX AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$") - # Linux x86_64: ensure MKL static archives are in scope (detect a - # system install, else download via cmake/GetMKL.cmake) and opt - # into the mkl feature. aarch64 has no MKL equivalent — skip. - embeddings_mkl_prepare_linux ( _mklroot "${CMAKE_CURRENT_BINARY_DIR}/embeddings/mkl" ) - set ( ENV{MKLROOT} "${_mklroot}" ) - message ( STATUS "Using MKLROOT=${_mklroot}" ) - list(APPEND EMBEDDINGS_FEATURE_LIST mkl) + # Linux x86_64: enable mkl feature only when MKL is already on the + # host (CI Docker image bakes it in; for a local dev box, install + # once per ../rust-min-libc/Dockerfile lines 87-105). Missing MKL + # is not an error — the build falls back to baseline candle gemm. + # aarch64 has no MKL equivalent — skip. + embeddings_mkl_detect_linux ( _mklroot ) + if ( _mklroot ) + set ( ENV{MKLROOT} "${_mklroot}" ) + message ( STATUS "Using MKLROOT=${_mklroot}" ) + list(APPEND EMBEDDINGS_FEATURE_LIST mkl) + else() + message ( STATUS "MKL not detected — building without mkl feature. See ../rust-min-libc/Dockerfile to install locally." ) + endif() endif() list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV) endif() From 12cb6298564acc2eeaa17d0c28c20e504ae6944f Mon Sep 17 00:00:00 2001 From: Don Hardman Date: Wed, 27 May 2026 21:17:45 +0300 Subject: [PATCH 4/5] build(embeddings): remove verbose MKL detection logs --- cmake/build_embeddings.cmake | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cmake/build_embeddings.cmake b/cmake/build_embeddings.cmake index 19e3c772..69064e7f 100644 --- a/cmake/build_embeddings.cmake +++ b/cmake/build_embeddings.cmake @@ -85,18 +85,10 @@ function(build_embeddings_lib) if(APPLE) list(APPEND EMBEDDINGS_FEATURE_LIST accelerate) elseif(UNIX AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$") - # Linux x86_64: enable mkl feature only when MKL is already on the - # host (CI Docker image bakes it in; for a local dev box, install - # once per ../rust-min-libc/Dockerfile lines 87-105). Missing MKL - # is not an error — the build falls back to baseline candle gemm. - # aarch64 has no MKL equivalent — skip. embeddings_mkl_detect_linux ( _mklroot ) if ( _mklroot ) set ( ENV{MKLROOT} "${_mklroot}" ) - message ( STATUS "Using MKLROOT=${_mklroot}" ) list(APPEND EMBEDDINGS_FEATURE_LIST mkl) - else() - message ( STATUS "MKL not detected — building without mkl feature. See ../rust-min-libc/Dockerfile to install locally." ) endif() endif() list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV) From 158522d02c439189e4b65a9f3a0b5d7e53aa369e Mon Sep 17 00:00:00 2001 From: Don Hardman Date: Fri, 29 May 2026 00:29:42 +0300 Subject: [PATCH 5/5] chore(deps): bump manticoresearch git tag --- manticore_src.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manticore_src.txt b/manticore_src.txt index 95420816..2af1394a 100644 --- a/manticore_src.txt +++ b/manticore_src.txt @@ -1 +1 @@ -GIT_REPOSITORY https://github.com/manticoresoftware/manticoresearch.git GIT_TAG a39c4bf5aaed7b473eb1affde9ee930d5294ab88 +GIT_REPOSITORY https://github.com/manticoresoftware/manticoresearch.git GIT_TAG 27b14d261555616c8e990f1daa8b4608681ee083