Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ jobs:
matrix:
config:
# Multi-stage build
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: true}
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
steps:
- name: Check out the repo
uses: actions/checkout@v4
Expand Down
20 changes: 16 additions & 4 deletions build-xcframework.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ COMMON_CMAKE_ARGS=(
-DGGML_OPENMP=${GGML_OPENMP}
)

XCODE_VERSION=$(xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }')
MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1)
MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2)
echo "Detected Xcode version: $XCODE_VERSION"

check_required_tool() {
local tool=$1
local install_message=$2
Expand Down Expand Up @@ -325,21 +330,28 @@ combine_static_libraries() {

# Platform-specific post-processing for device builds
if [[ "$is_simulator" == "false" ]]; then
if command -v vtool &>/dev/null; then
if command -v xcrun vtool &>/dev/null; then
case "$platform" in
"ios")
echo "Marking binary as a framework binary for iOS..."
vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
xcrun vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"visionos")
echo "Marking binary as a framework binary for visionOS..."
vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
if [[ "$MAJOR_VERSION" -gt 16 ]] || [[ "$MAJOR_VERSION" -eq 16 && "$MINOR_VERSION" -gt 2 ]]; then
echo "Xcode version greater than 16.2, using visionOS."
VISION_OS_BUILD_VERSION="visionos"
else
echo "Xcode version less than or equal to 16.2, using xros."
VISION_OS_BUILD_VERSION="xros"
fi
xcrun vtool -set-build-version ${VISION_OS_BUILD_VERSION} ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"tvos")
echo "Marking binary as a framework binary for tvOS..."
vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
xcrun vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
esac
Expand Down
6 changes: 1 addition & 5 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1750,7 +1750,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:

low_freq_wavelen = old_context_len / low_freq_factor
high_freq_wavelen = old_context_len / high_freq_factor
assert low_freq_wavelen != high_freq_wavelen
# assert low_freq_wavelen != high_freq_wavelen # Errors for Llama4

rope_factors = []
for freq in freqs:
Expand Down Expand Up @@ -1806,10 +1806,6 @@ def set_gguf_parameters(self):
self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
name = name.replace("language_model.", "")
name = name.replace("feed_forward.", "mlp.") # a bit hacky for now
name = name.replace(".router.weight", ".gate.weight") # a bit hacky for now

# split the gate_up into gate and up
if "gate_up_proj" in name:
name_up = name.replace("gate_up_proj", "up_proj.weight")
Expand Down
8 changes: 6 additions & 2 deletions ggml/src/ggml-cpu/simd-mappings.h
Original file line number Diff line number Diff line change
Expand Up @@ -855,13 +855,17 @@ static inline __vector float __lzs_f16cx4_load(const ggml_fp16_t * x) {
tmp[i] = GGML_FP16_TO_FP32(x[i]);
}

return vec_xl(0, tmp);
// note: keep type-cast here to prevent compiler bugs
// see: https://github.com/ggml-org/llama.cpp/issues/12846
return vec_xl(0, (const float *)(tmp));
}

static inline void __lzs_f16cx4_store(ggml_fp16_t * x, __vector float y) {
float arr[4];

vec_xst(y, 0, arr);
// note: keep type-cast here to prevent compiler bugs
// see: https://github.com/ggml-org/llama.cpp/issues/12846
vec_xst(y, 0, (float *)(arr));

for (int i = 0; i < 4; i++) {
x[i] = GGML_FP32_TO_FP16(arr[i]);
Expand Down
Loading
Loading