Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 36 additions & 54 deletions .github/workflows/embedding_build_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,19 @@ jobs:
- name: Build for Linux
if: ${{ inputs.distr == 'linux' }}
run: |
# Set Docker image based on architecture
# Download glibc2_17-compatible ORT static lib (avoids __isoc23_strtoll from pyke.io builds)
ORT_VERSION="1.24.2"
# Pick the rust-min-libc docker image (glibc baseline) for this arch.
# ORT static lib is fetched via cmake/embeddings_ort.cmake — the same
# script local host builds use, so there's no version/URL drift.
if [[ "${{ inputs.arch }}" == "aarch64" ]]; then
docker_image="ghcr.io/manticoresoftware/rust-min-libc:aarch64-rust1.95.0-glibc2.27-openssl1.1.1k"
ort_asset="onnxruntime-linux-aarch64-static_lib-${ORT_VERSION}-glibc2_17"
ort_arch="aarch64"
else
docker_image="ghcr.io/manticoresoftware/rust-min-libc:amd64-rust1.95.0-glibc2.27-openssl1.1.1k"
ort_asset="onnxruntime-linux-x64-static_lib-${ORT_VERSION}-glibc2_17"
ort_arch="x64"
fi

curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ort_asset}.zip" -o /tmp/ort.zip
unzip -q /tmp/ort.zip -d /tmp/ort
cmake -DORT_PLATFORM=linux -DORT_ARCH=${ort_arch} -DORT_OUT_DIR="${RUNNER_TEMP}/ort" -P cmake/GetONNXRuntime.cmake
ort_lib="$(cat "${RUNNER_TEMP}/ort/lib_path.txt")"

target="${{ steps.vars.outputs.target }}"
# Each flavor gets its own --target-dir so cargo doesn't relink between
Expand All @@ -256,11 +256,11 @@ jobs:
docker run --rm \
-v $(pwd):/src \
-v $(pwd)/.cargo-cache:/cargo-cache \
-v /tmp/ort/${ort_asset}/lib:/ort-lib \
-v ${ort_lib}:/ort-lib \
-w /src \
-u root \
-e CARGO_HOME=/cargo-cache \
-e ORT_LIB_PATH=/ort-lib \
-e ORT_LIB_LOCATION=/ort-lib \
-e MKLROOT=/opt/intel/oneapi/mkl/latest \
-e GIT_COMMIT_ID="${{ steps.git_meta.outputs.commit }}" \
-e GIT_TIMESTAMP_ID="${{ steps.git_meta.outputs.timestamp }}" \
Expand Down Expand Up @@ -316,28 +316,21 @@ jobs:
if: ${{ inputs.distr == 'macos' || inputs.distr == 'windows' }}
shell: bash
run: |
ort_version="1.24.2"
if [[ "${{ inputs.distr }}" == "macos" ]]; then
if [[ "${{ inputs.arch }}" == "aarch64" ]]; then
ort_name="onnxruntime-osx-arm64-static_lib-${ort_version}"
else
ort_name="onnxruntime-osx-x86_64-static_lib-${ort_version}"
fi
curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ort_version}/${ort_name}.zip" -o /tmp/ort.zip
unzip -q /tmp/ort.zip -d /tmp/ort
echo "ORT_LIB_LOCATION=/tmp/ort/${ort_name}/lib" >> $GITHUB_ENV
# Single source of truth for version + asset naming lives in
# cmake/embeddings_ort.cmake (same script local host builds use).
case "${{ inputs.distr }}" in
macos) ort_platform=macos ;;
windows) ort_platform=windows ;;
esac
if [[ "${{ inputs.arch }}" == "aarch64" ]]; then
ort_arch=aarch64
else
if [[ "${{ inputs.arch }}" == "aarch64" ]]; then
ort_name="onnxruntime-win-arm64-static_lib-MD-Release-${ort_version}"
else
ort_name="onnxruntime-win-x64-static_lib-MD-Release-${ort_version}"
fi
curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ort_version}/${ort_name}.tar.bz2" -o ort.tar.bz2
mkdir -p ort-lib
tar xjf ort.tar.bz2 -C ort-lib
echo "ORT_LIB_LOCATION=${{ github.workspace }}/ort-lib/${ort_name}/lib" >> $GITHUB_ENV
ort_arch=x64
fi

cmake -DORT_PLATFORM=${ort_platform} -DORT_ARCH=${ort_arch} -DORT_OUT_DIR="${RUNNER_TEMP}/ort" -P cmake/GetONNXRuntime.cmake
echo "ORT_LIB_LOCATION=$(cat "${RUNNER_TEMP}/ort/lib_path.txt")" >> $GITHUB_ENV

- name: Build native
if: ${{ inputs.distr == 'windows' || inputs.distr == 'macos' }}
run: |
Expand Down Expand Up @@ -423,32 +416,21 @@ jobs:

if [[ "${{ inputs.distr }}" == "windows" && "${{ inputs.arch }}" != "aarch64" ]]; then
echo "=== Package Windows runtime DLL dependencies ==="
openmp_redist_version="2025.3.3.31"
openmp_redist_dir=".openmp-redist"
for dll in libiomp5md.dll; do
# MKL flavor lives under target-mkl; also check it.
dep=$(find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" -type f -iname "$dll" 2>/dev/null | head -1)
if [[ -z "$dep" ]]; then
echo "$dll was not found in the Cargo build tree; downloading Intel OpenMP redist ${openmp_redist_version}"
rm -rf "$openmp_redist_dir"
mkdir -p "$openmp_redist_dir/pkg"
curl -fsSL \
"https://api.nuget.org/v3-flatcontainer/intelopenmp.redist.win/${openmp_redist_version}/intelopenmp.redist.win.${openmp_redist_version}.nupkg" \
-o "$openmp_redist_dir/intelopenmp.redist.win.zip"
powershell -NoProfile -NonInteractive -Command \
"Expand-Archive -LiteralPath '$(cygpath -w "$openmp_redist_dir/intelopenmp.redist.win.zip")' -DestinationPath '$(cygpath -w "$openmp_redist_dir/pkg")' -Force"
dep=$(find "$openmp_redist_dir/pkg" -type f -iname "$dll" | head -1)
fi

if [[ -z "$dep" ]]; then
echo "Required Windows runtime dependency was not found after redist download: $dll"
find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" "$openmp_redist_dir" -type f -iname "*.dll" 2>/dev/null | sort || true
exit 1
fi

cp "$dep" build/
echo "Packaged $dll from $dep"
done
# Prefer the DLL already in the cargo build tree (intel-mkl-src may
# have fetched it); otherwise let cmake/GetMKL.cmake fetch the same
# intelopenmp.redist nuget the inline shell used to download.
dep=$(find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" -type f -iname libiomp5md.dll 2>/dev/null | head -1)
if [[ -z "$dep" ]]; then
cmake -DMKL_PLATFORM=windows -DMKL_ARCH=x64 -DMKL_OUT_DIR="${RUNNER_TEMP}/mkl" -P cmake/GetMKL.cmake
dep=$(cat "${RUNNER_TEMP}/mkl/libiomp5md_dll_path.txt")
fi
if [[ -z "$dep" || ! -f "$dep" ]]; then
echo "libiomp5md.dll was not located after GetMKL.cmake fallback"
find ./embeddings/target ./embeddings/target-mkl "$HOME/.cargo" "${RUNNER_TEMP}/mkl" -type f -iname "*.dll" 2>/dev/null | sort || true
exit 1
fi
cp "$dep" build/
echo "Packaged libiomp5md.dll from $dep"
elif [[ "${{ inputs.distr }}" == "windows" ]]; then
echo "Skipping Intel OpenMP runtime packaging for Windows ${{ inputs.arch }}; Intel OpenMP redist currently ships win-x64 DLLs only"
fi
Expand Down
105 changes: 105 additions & 0 deletions cmake/GetMKL.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (c) 2020-2025, Manticore Software LTD (https://manticoresearch.com)
# All rights reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Intel MKL / OpenMP integration for embeddings builds with the `mkl` Cargo
# feature. Linux flow is detection-only: if MKL is on the host (either via
# the rust-min-libc Docker image which apt-installs it, or via a local
# install matching ../rust-min-libc/Dockerfile lines 87-105), we use it.
# Otherwise the caller silently skips the `mkl` feature — the build still
# produces a working baseline .so. Windows flow downloads only the runtime
# DLL (libiomp5md.dll), same nuget source the CI workflow used inline.

cmake_minimum_required ( VERSION 3.17 )

# Intel OpenMP runtime DLL (windows-x64 only). Same nuget package the CI
# workflow used to download inline before this logic moved into CMake.
set ( EMBEDDINGS_OPENMP_WIN_VERSION "2025.3.3.31" CACHE STRING "intelopenmp.redist.win nuget version" )
set ( EMBEDDINGS_OPENMP_WIN_URL
"https://api.nuget.org/v3-flatcontainer/intelopenmp.redist.win/${EMBEDDINGS_OPENMP_WIN_VERSION}/intelopenmp.redist.win.${EMBEDDINGS_OPENMP_WIN_VERSION}.nupkg"
CACHE STRING "Override URL for the intelopenmp.redist.win nuget package" )

# Linux x86_64: detect an existing MKL install. Sets OUT_VAR to the resolved
# MKLROOT, or empty string when MKL isn't present. No download, no install.
function ( embeddings_mkl_detect_linux OUT_VAR )
if ( DEFINED ENV{MKLROOT} AND EXISTS "$ENV{MKLROOT}/lib/intel64/libmkl_intel_lp64.a" )
set ( ${OUT_VAR} "$ENV{MKLROOT}" PARENT_SCOPE )
return()
endif()
if ( EXISTS "/opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.a" )
set ( ${OUT_VAR} "/opt/intel/oneapi/mkl/latest" PARENT_SCOPE )
return()
endif()
if ( EXISTS "/opt/intel/oneapi/mkl/latest/lib/libmkl_intel_lp64.a" )
# Modern oneMKL layout (no intel64/ subdir).
set ( ${OUT_VAR} "/opt/intel/oneapi/mkl/latest" PARENT_SCOPE )
return()
endif()
set ( ${OUT_VAR} "" PARENT_SCOPE )
endfunction()

# Windows x86_64: download intelopenmp.redist nuget for libiomp5md.dll. Returns
# the absolute .dll path in OUT_DLL_VAR. Same package + URL the CI workflow
# used to fetch inline.
function ( embeddings_mkl_prepare_windows OUT_DLL_VAR IN_OUT_DIR )
set ( _root "${IN_OUT_DIR}/intelopenmp-${EMBEDDINGS_OPENMP_WIN_VERSION}" )
set ( _dll "${_root}/runtimes/win-x64/native/libiomp5md.dll" )

if ( NOT EXISTS "${_dll}" )
file ( MAKE_DIRECTORY "${_root}" )
get_filename_component ( _name "${EMBEDDINGS_OPENMP_WIN_URL}" NAME )
set ( _archive "${IN_OUT_DIR}/${_name}" )
if ( NOT EXISTS "${_archive}" )
message ( STATUS "Downloading Intel OpenMP redist: ${_name}" )
file ( DOWNLOAD "${EMBEDDINGS_OPENMP_WIN_URL}" "${_archive}" STATUS _status SHOW_PROGRESS )
list ( GET _status 0 _code )
if ( NOT _code EQUAL 0 )
list ( GET _status 1 _err )
file ( REMOVE "${_archive}" )
message ( FATAL_ERROR "Failed to download ${EMBEDDINGS_OPENMP_WIN_URL}: ${_err}" )
endif()
endif()
file ( ARCHIVE_EXTRACT INPUT "${_archive}" DESTINATION "${_root}" )
endif()

if ( NOT EXISTS "${_dll}" )
message ( FATAL_ERROR "intelopenmp redist did not contain libiomp5md.dll at expected path: ${_dll}" )
endif()

set ( ${OUT_DLL_VAR} "${_dll}" PARENT_SCOPE )
endfunction()

# Script-mode entry: cmake -DMKL_PLATFORM=... -DMKL_ARCH=... -DMKL_OUT_DIR=... -P cmake/GetMKL.cmake
# Used by CI workflows that need the Windows OpenMP runtime DLL. Writes the
# resolved path to ${MKL_OUT_DIR}/libiomp5md_dll_path.txt.
if ( CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE )
if ( NOT DEFINED MKL_PLATFORM OR NOT DEFINED MKL_ARCH OR NOT DEFINED MKL_OUT_DIR )
message ( FATAL_ERROR "usage: cmake -DMKL_PLATFORM=<windows> -DMKL_ARCH=<x64> -DMKL_OUT_DIR=<dir> -P GetMKL.cmake" )
endif()

if ( NOT MKL_ARCH STREQUAL "x64" AND NOT MKL_ARCH STREQUAL "x86_64" )
message ( STATUS "GetMKL: arch '${MKL_ARCH}' has no MKL build; nothing to do" )
return()
endif()

file ( MAKE_DIRECTORY "${MKL_OUT_DIR}" )

if ( MKL_PLATFORM STREQUAL "windows" )
embeddings_mkl_prepare_windows ( _dll "${MKL_OUT_DIR}" )
file ( WRITE "${MKL_OUT_DIR}/libiomp5md_dll_path.txt" "${_dll}" )
message ( STATUS "libiomp5md.dll: ${_dll}" )
else()
message ( STATUS "GetMKL script-mode: platform '${MKL_PLATFORM}' has nothing to fetch (Linux MKL is detect-only at configure time)" )
endif()
endif()
106 changes: 106 additions & 0 deletions cmake/GetONNXRuntime.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright (c) 2020-2025, Manticore Software LTD (https://manticoresearch.com)
# All rights reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Single source of truth for the ONNX Runtime static library used by embeddings
# builds. Consumed in two ways:
# - included from cmake/build_embeddings.cmake for local host builds
# - run as a script (cmake -DORT_PLATFORM=... -DORT_ARCH=... -DORT_OUT_DIR=... -P cmake/GetONNXRuntime.cmake)
# from CI workflows, so neither place hardcodes the version, URL, or asset name.
# Bump versions here; both code paths pick up the new values.

cmake_minimum_required ( VERSION 3.17 )

set ( EMBEDDINGS_ORT_VERSION "1.24.2" CACHE STRING "ONNX Runtime version for embeddings builds" )
set ( EMBEDDINGS_ORT_GLIBC "2_17" CACHE STRING "ONNX Runtime glibc baseline (Linux only)" )

# Map (platform, arch) -> (asset stem, archive extension) per csukuangfj/onnxruntime-libs releases.
function ( _embeddings_ort_asset OUT_ASSET OUT_EXT IN_PLATFORM IN_ARCH )
set ( _ver "${EMBEDDINGS_ORT_VERSION}" )
set ( _glibc "${EMBEDDINGS_ORT_GLIBC}" )

if ( IN_PLATFORM STREQUAL "linux" )
if ( IN_ARCH MATCHES "^(aarch64|arm64)$" )
set ( _arch "aarch64" )
else()
set ( _arch "x64" )
endif()
set ( _asset "onnxruntime-linux-${_arch}-static_lib-${_ver}-glibc${_glibc}" )
set ( _ext "zip" )
elseif ( IN_PLATFORM STREQUAL "macos" )
if ( IN_ARCH MATCHES "^(aarch64|arm64)$" )
set ( _arch "arm64" )
else()
set ( _arch "x86_64" )
endif()
set ( _asset "onnxruntime-osx-${_arch}-static_lib-${_ver}" )
set ( _ext "zip" )
elseif ( IN_PLATFORM STREQUAL "windows" )
if ( IN_ARCH MATCHES "^(aarch64|arm64)$" )
set ( _arch "arm64" )
else()
set ( _arch "x64" )
endif()
set ( _asset "onnxruntime-win-${_arch}-static_lib-MD-Release-${_ver}" )
set ( _ext "tar.bz2" )
else()
message ( FATAL_ERROR "embeddings_ort: unsupported platform '${IN_PLATFORM}' (expected linux|macos|windows)" )
endif()

set ( ${OUT_ASSET} "${_asset}" PARENT_SCOPE )
set ( ${OUT_EXT} "${_ext}" PARENT_SCOPE )
endfunction()

# Download + extract the ORT static lib for the given platform/arch into IN_OUT_DIR.
# Idempotent: skips network when the lib dir already exists. On success, sets
# OUT_LIB_DIR_VAR (in caller scope) to the absolute lib/ path.
function ( embeddings_ort_download OUT_LIB_DIR_VAR IN_PLATFORM IN_ARCH IN_OUT_DIR )
_embeddings_ort_asset ( _asset _ext "${IN_PLATFORM}" "${IN_ARCH}" )

set ( _url "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${EMBEDDINGS_ORT_VERSION}/${_asset}.${_ext}" )
set ( _archive "${IN_OUT_DIR}/${_asset}.${_ext}" )
set ( _root "${IN_OUT_DIR}/${_asset}" )

if ( NOT EXISTS "${_root}/lib" )
message ( STATUS "Downloading ${_asset}.${_ext}" )
file ( MAKE_DIRECTORY "${IN_OUT_DIR}" )
file ( DOWNLOAD "${_url}" "${_archive}" STATUS _status SHOW_PROGRESS )
list ( GET _status 0 _code )
if ( NOT _code EQUAL 0 )
list ( GET _status 1 _err )
message ( FATAL_ERROR "Failed to download ${_url}: ${_err}" )
endif()
file ( ARCHIVE_EXTRACT INPUT "${_archive}" DESTINATION "${IN_OUT_DIR}" )
endif()

if ( NOT EXISTS "${_root}/lib" )
message ( FATAL_ERROR "ORT lib dir not found after extract: ${_root}/lib" )
endif()

set ( ${OUT_LIB_DIR_VAR} "${_root}/lib" PARENT_SCOPE )
endfunction()

# Script-mode entry: cmake -DORT_PLATFORM=... -DORT_ARCH=... -DORT_OUT_DIR=... -P cmake/GetONNXRuntime.cmake
# Writes the resolved lib dir to ${ORT_OUT_DIR}/lib_path.txt so shell callers
# don't have to recompute the asset name to find it.
if ( CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE )
if ( NOT DEFINED ORT_PLATFORM OR NOT DEFINED ORT_ARCH OR NOT DEFINED ORT_OUT_DIR )
message ( FATAL_ERROR "usage: cmake -DORT_PLATFORM=<linux|macos|windows> -DORT_ARCH=<x64|aarch64> -DORT_OUT_DIR=<dir> -P GetONNXRuntime.cmake" )
endif()

embeddings_ort_download ( _lib_dir "${ORT_PLATFORM}" "${ORT_ARCH}" "${ORT_OUT_DIR}" )

file ( WRITE "${ORT_OUT_DIR}/lib_path.txt" "${_lib_dir}" )
message ( STATUS "ORT lib dir: ${_lib_dir}" )
endif()
Loading
Loading