diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 986169a..b845e4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: uses: actions/checkout@v4 - name: Setup Bazel Environment - uses: bazel-contrib/setup-bazel@v0.18.0 + uses: bazel-contrib/setup-bazel@0.18.0 - name: Mount Bazel Cache uses: actions/cache@v4 diff --git a/fibers/BUILD b/fibers/BUILD index a46ed79..0bb0b7e 100644 --- a/fibers/BUILD +++ b/fibers/BUILD @@ -62,7 +62,11 @@ cc_test( cc_test( name = "stackswitch_test", - srcs = ["stackswitch_test.cc"], + srcs = [ + "stackswitch_test.cc", + ] + select({ + "//platforms:linux_x86_64": ["stackswitch_linux_x86_64_test.S"], + }), deps = [ ":stackswitch", "@googletest//:gtest", diff --git a/fibers/stackswitch.h b/fibers/stackswitch.h index 170aec3..c26c3e0 100644 --- a/fibers/stackswitch.h +++ b/fibers/stackswitch.h @@ -33,6 +33,8 @@ #ifndef LOOM_FIBERS_FIBERS_STACKSWITCH_H_ #define LOOM_FIBERS_FIBERS_STACKSWITCH_H_ +#include + #include extern "C" { @@ -74,10 +76,23 @@ void* loom__configure_stack(void* stack_base, uintptr_t stack_size, // // Would not continue until other fiber switches back to this stack. void loom__switch_to_stack(void* destination_sp, void** save_sp); +// Discovers the exact SIMD buffer size necessary to save all SIMD state. +uintptr_t loom__discover_simd_buffer_size(void); + +// Saves the system dependent SIMD state in the given buffer. The buffer must be +// the exact size returned by loom__discover_simd_buffer_size(). +void loom__save_simd_state(void* state); + +// Restores the system dependent SIMD state in the given buffer. The buffer must +// be the exact size returned by loom__discover_simd_buffer_size(). +void loom__restore_simd_state(void* state); + } // extern "C" namespace loom { +const uintptr_t kSIMDBufferSize = loom__discover_simd_buffer_size(); + // Performs the exact same duties as loom__configure_stack, but is wrapped in // C++-familiar usage. inline void* ConfigureStack(void* stack_base, uintptr_t stack_size, @@ -92,6 +107,46 @@ inline void SwitchStack(void* new_sp, void** old_sp) { loom__switch_to_stack(new_sp, old_sp); } +// loom::SIMDGuard +// +// Acts as an RAII interface for saving SIMD data (like x86_64's XMM registers) +// across stack switches. SIMD data is not saved by default because it is too +// expensive to do so for 99% of use cases. +// +// Example: +// +// void Foo() { +// // Do some work +// +// { +// loom::SIMDGuard guard; +// +// Yield(); +// } +// +// // Continue SIMD work +// } +class SIMDGuard { + public: + // Saves the current SIMD state + SIMDGuard() : simd_buffer_(std::aligned_alloc(64, kSIMDBufferSize)) { + loom__save_simd_state(simd_buffer_); + } + + // Restores the saved SIMD state + ~SIMDGuard() { + loom__restore_simd_state(simd_buffer_); + std::free(simd_buffer_); + } + + // Not copyable + SIMDGuard(const SIMDGuard& other) = delete; + SIMDGuard& operator=(const SIMDGuard& other) = delete; + + private: + void* simd_buffer_; +}; + } #endif // LOOM_FIBERS_FIBERS_STACKSWITCH_H_ diff --git a/fibers/stackswitch_linux_x86_64.S b/fibers/stackswitch_linux_x86_64.S index 78e0934..1dd9288 100644 --- a/fibers/stackswitch_linux_x86_64.S +++ b/fibers/stackswitch_linux_x86_64.S @@ -28,6 +28,12 @@ .type loom__configure_stack,@function .globl loom__switch_to_stack .type loom__switch_to_stack,@function +.globl loom__discover_simd_buffer_size +.type loom__discover_simd_buffer_size,@function +.globl loom__save_simd_state +.type loom__save_simd_state,@function +.globl loom__restore_simd_state +.type loom__restore_simd_state,@function .align 16 /* Used as a trampoline to prevent jumping to unspecified memory. (SEE #3) */ @@ -123,3 +129,54 @@ loom__switch_to_stack: /* Drop into new fiber by popping rip (ret) */ ret +/* loom__discover_simd_buffer_size() - see stackswitch.h + */ +loom__discover_simd_buffer_size: + push %rbp + mov %rsp, %rbp + + /* RBX gets clobbered, so we have to save it */ + push %rbx + + xor %rax, %rax + + mov $0xD, %eax /* CPUID leaf 0xD is for Processor Extended State */ + mov $0, %ecx /* Sub-leaf 0 */ + cpuid + mov %ebx, %eax /* Result was in EBX/ECX */ + + pop %rbx + + pop %rbp + ret + +/* loom__save_simd_state() - see stackswitch.h + * + * %rdi - state + */ +loom__save_simd_state: + push %rbp + mov %rsp, %rbp + + mov $0xFFFFFFFF, %eax /* Enable all flags (we want to save all state) */ + mov $0xFFFFFFFF, %edx + xsave (%rdi) + + pop %rbp + ret + +/* loom__restore_simd_state() - see stackswitch.h + * + * %rdi - state + */ +loom__restore_simd_state: + push %rbp + mov %rsp, %rbp + + mov $0xFFFFFFFF, %eax /* Enable all flags (we want to restore all state) */ + mov $0xFFFFFFFF, %edx + xrstor (%rdi) + + pop %rbp + ret + diff --git a/fibers/stackswitch_linux_x86_64_test.S b/fibers/stackswitch_linux_x86_64_test.S new file mode 100644 index 0000000..879a171 --- /dev/null +++ b/fibers/stackswitch_linux_x86_64_test.S @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +/* Copyright 2026 Adrian Gjerstad. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ----------------------------------------------------------------------------- + * loom/fibers/stackswitch_linux_x86_64_test.S + * ----------------------------------------------------------------------------- + * + * Assembly-specific stack switching testing helper functions. + */ + +.file "stackswitch_linux_x86_64_test.S" +.globl write_simd +.type write_simd,@function +.globl read_simd +.type read_simd,@function +.align 16 + +.text + +/* write_simd() - see stackswitch_test.cc + * + * %rdi - value + */ +write_simd: + /* The SIMD-specific register that we will use is XMM5. */ + movq %rdi, %xmm5 + + ret + +/* read_simd() - see stackswitch_test.cc + */ +read_simd: + /* Return contents of SIMD-specific register XMM5. */ + movq %xmm5, %rax + + ret + diff --git a/fibers/stackswitch_test.cc b/fibers/stackswitch_test.cc index 352f48d..a6a1401 100644 --- a/fibers/stackswitch_test.cc +++ b/fibers/stackswitch_test.cc @@ -25,7 +25,8 @@ // B) Can properly preserve registers // C) Allows fibers to switch between each other // D) Works to migrate fibers from one thread to another -// E) Aborts when an entry point returns (see #3). +// E) Aborts when an entry point returns (see #3) +// F) Includes SIMD saving when necessary. // // This test statically allocates its stacks to make it separate from StackPool. // @@ -38,6 +39,15 @@ #include #include +extern "C" { + +// SIMD functions that are implemented in assembly allow us to test the +// SIMD-specific registers that aren't normally saved. +extern void write_simd(uintptr_t value); // Writes a value to a SIMD register. +extern uintptr_t read_simd(void); // Reads a value from a SIMD register. + +} + namespace loom { namespace { @@ -224,6 +234,39 @@ TEST_F(StackSwitchDeathTest, EntryPointReturns) { }, testing::KilledBySignal(SIGABRT), ".*"); } +TEST_F(StackSwitchTest, PreservesSIMDData) { + FiberContext ctx; + + auto SIMDEntry = [](void* arg) { + auto* ctx = static_cast(arg); + + // Write SIMD data and the yield. SIMDGuard should protect the data. + write_simd(42); + + { + loom::SIMDGuard guard; + loom::SwitchStack(ctx->main_sp, &ctx->fiber_a_sp); + } + + // The SIMD data should be restored and can be read from the main stack. + loom::SwitchStack(ctx->main_sp, &ctx->fiber_a_sp); + }; + + ctx.fiber_a_sp = loom::ConfigureStack(fiber_a_stack, kStackSize, SIMDEntry, + &ctx); + + loom::SwitchStack(ctx.fiber_a_sp, &ctx.main_sp); + + ASSERT_EQ(read_simd(), 42u); + write_simd(143); // Clobber the old data + ASSERT_EQ(read_simd(), 143u); + + loom::SwitchStack(ctx.fiber_a_sp, &ctx.main_sp); + + // Verify that the data was restored. + ASSERT_EQ(read_simd(), 42u); +} + } }