From 06b2dbcdc15217e8c9f5858b51d01ce73a91070d Mon Sep 17 00:00:00 2001 From: Vadim Skipin Date: Thu, 18 Jun 2026 07:42:09 +0000 Subject: [PATCH] Make platform constants configurable --- include/silk/fibers/fiber.h | 4 +-- include/silk/util/bounded-queue.h | 8 ++--- include/silk/util/platform.h | 14 ++++++-- include/silk/util/queue.h | 4 +-- include/silk/util/sharded-stack.h | 2 +- src/fibers/fiber.cpp | 46 +++++++++++++------------- src/fibers/profiler.h | 4 +-- src/fibers/tests/fiber-test.cpp | 2 +- src/gdb/fiber.py | 2 +- src/util/benchmarks/platform-bench.cpp | 2 +- src/util/memory-pool.cpp | 2 +- 11 files changed, 49 insertions(+), 41 deletions(-) diff --git a/include/silk/fibers/fiber.h b/include/silk/fibers/fiber.h index 06e8f21..31e7fce 100644 --- a/include/silk/fibers/fiber.h +++ b/include/silk/fibers/fiber.h @@ -344,7 +344,7 @@ class FiberScheduler uint8_t category = 0; // Processor whose io_uring ring holds this SQE; cancelIo must submit // the cancel to the same ring to avoid a cross-ring -ENOENT failure. - uint16_t processorNumber = INVALID_PROCESSOR_NUMBER; + uint16_t processorNumber = kInvalidProcessorNumber; #if defined(__SANITIZE_MEMORY__) // Used to mark the kernel-written bytes as initialized for MSan. iovec * readIov = nullptr; @@ -521,7 +521,7 @@ class FiberScheduler StackEntry stackEntry; TreeEntry treeEntry; uint64_t deadlineCycles = 0; - uint16_t processorNumber = INVALID_PROCESSOR_NUMBER; + uint16_t processorNumber = kInvalidProcessorNumber; std::atomic state{}; }; diff --git a/include/silk/util/bounded-queue.h b/include/silk/util/bounded-queue.h index 39cfa3b..a9d8317 100644 --- a/include/silk/util/bounded-queue.h +++ b/include/silk/util/bounded-queue.h @@ -127,13 +127,13 @@ class BoundedQueue } private: - struct alignas(CACHELINE_SIZE) Slot + struct alignas(kCacheLineSize) Slot { std::atomic sequence; T value; }; - static_assert(sizeof(Slot) == CACHELINE_SIZE); + static_assert(sizeof(Slot) == kCacheLineSize); // Match offsets and stride used by src/gdb/fiber.py::_walk_bounded_queue static_assert(offsetof(Slot, sequence) == 0); @@ -149,8 +149,8 @@ class BoundedQueue // src/gdb/fiber.py::_walk_bounded_queue reads enqueuePos at offset 64 and // dequeuePos at offset 128 (mask=8 bytes, slots=8 bytes, then 2 x cacheline). // Reordering or inserting fields here requires updating that script. - alignas(CACHELINE_SIZE) std::atomic enqueuePos{}; - alignas(CACHELINE_SIZE) std::atomic dequeuePos{}; + alignas(kCacheLineSize) std::atomic enqueuePos{}; + alignas(kCacheLineSize) std::atomic dequeuePos{}; }; } // namespace silk diff --git a/include/silk/util/platform.h b/include/silk/util/platform.h index b440613..427ba9f 100644 --- a/include/silk/util/platform.h +++ b/include/silk/util/platform.h @@ -40,13 +40,21 @@ namespace silk { /** System page size in bytes. */ -static constexpr uint64_t PAGE_SIZE = 4096; +#if defined(PAGE_SIZE) +static constexpr uint64_t kPageSize = PAGE_SIZE; +#else +static constexpr uint64_t kPageSize = 4096; +#endif /** Cache line size in bytes. */ -static constexpr uint64_t CACHELINE_SIZE = 64; +#if defined(CACHE_LINESIZE) +static constexpr uint64_t kCacheLineSize = CACHE_LINESIZE; +#else +static constexpr uint64_t kCacheLineSize = 64; +#endif /** Hard cap on CPU index (largest known socket: 384 cores). */ -static constexpr uint16_t INVALID_PROCESSOR_NUMBER = (1 << 10); +static constexpr uint16_t kInvalidProcessorNumber = (1 << 10); /** Round @p value up to the nearest multiple of @p align (must be a power of two). */ template diff --git a/include/silk/util/queue.h b/include/silk/util/queue.h index 6d6cfc1..9c642e4 100644 --- a/include/silk/util/queue.h +++ b/include/silk/util/queue.h @@ -20,7 +20,7 @@ class QueueBase /** * Queue node holding a pointer to the enqueued value. */ - struct alignas(CACHELINE_SIZE) QueueNode + struct alignas(kCacheLineSize) QueueNode { StackEntry stackEntry; std::atomic next; @@ -28,7 +28,7 @@ class QueueBase // TODO(vskipin): we can store small data in-place }; - static_assert(sizeof(QueueNode) == CACHELINE_SIZE); + static_assert(sizeof(QueueNode) == kCacheLineSize); // Match offsets used by src/gdb/fiber.py::_walk_queue static_assert(offsetof(QueueNode, next) == 8); diff --git a/include/silk/util/sharded-stack.h b/include/silk/util/sharded-stack.h index b85cd68..637ce01 100644 --- a/include/silk/util/sharded-stack.h +++ b/include/silk/util/sharded-stack.h @@ -80,7 +80,7 @@ class ShardedStackBase * objects or corruption. rseq guarantees the head update is atomic with * respect to preemption; count is updated after the rseq commit. */ - struct alignas(CACHELINE_SIZE) ProcessorState + struct alignas(kCacheLineSize) ProcessorState { std::atomic head{}; std::atomic count{}; diff --git a/src/fibers/fiber.cpp b/src/fibers/fiber.cpp index 4ccef5c..9a58fa6 100644 --- a/src/fibers/fiber.cpp +++ b/src/fibers/fiber.cpp @@ -133,7 +133,7 @@ class Fiber // dispatch and every suspension. runFiber's full read/write set lives on // this single line, so dispatch never pulls a second cache line on the // common path. - struct alignas(CACHELINE_SIZE) + struct alignas(kCacheLineSize) { // Intrusive node for pool free-list and WaitStack membership. StackEntry stackEntry; @@ -150,10 +150,10 @@ class Fiber bool inThreadMode = false; // CPU this fiber is assigned to. - uint16_t processorNumber = INVALID_PROCESSOR_NUMBER; + uint16_t processorNumber = kInvalidProcessorNumber; // Processor whose suspendedList this fiber is currently in. - uint16_t suspendedProcessorNumber = INVALID_PROCESSOR_NUMBER; + uint16_t suspendedProcessorNumber = kInvalidProcessorNumber; // Suspend callback set by suspend, invoked by runFiber after the // context switch back to the scheduler or thread worker. @@ -173,7 +173,7 @@ class Fiber // Cache line 1: context-switch state and profiler timestamps; touched on // every dispatch. Per-fiber-once fields (fiberMain, parametersDtor, // waitingFuture) piggyback for free. - struct alignas(CACHELINE_SIZE) + struct alignas(kCacheLineSize) { // mmap'd stack and fcontext handles for cooperative switching. void * stack = nullptr; @@ -266,7 +266,7 @@ Fiber::~Fiber() noexcept if (stack) { - int r = ::munmap(stack, FiberScheduler::getOptions().fiberStackSize + 2 * PAGE_SIZE); + int r = ::munmap(stack, FiberScheduler::getOptions().fiberStackSize + 2 * kPageSize); SILK_ASSERT(!r); } } @@ -277,8 +277,8 @@ bool Fiber::initialize( state.store(FiberState::SUSPENDED, std::memory_order_relaxed); inThreadMode = false; - processorNumber = INVALID_PROCESSOR_NUMBER; - suspendedProcessorNumber = INVALID_PROCESSOR_NUMBER; + processorNumber = kInvalidProcessorNumber; + suspendedProcessorNumber = kInvalidProcessorNumber; suspendCallback = nullptr; suspendContext = nullptr; fiberId = fiberId_; @@ -291,17 +291,17 @@ bool Fiber::initialize( if (!stack) { - stack = ::mmap(nullptr, fiberStackSize + 2 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + stack = ::mmap(nullptr, fiberStackSize + 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (stack == MAP_FAILED) [[unlikely]] { stack = nullptr; return false; } - int r = ::mprotect(stack, PAGE_SIZE, PROT_NONE); + int r = ::mprotect(stack, kPageSize, PROT_NONE); SILK_ASSERT(!r); - r = ::mprotect(static_cast(stack) + PAGE_SIZE + fiberStackSize, PAGE_SIZE, PROT_NONE); + r = ::mprotect(static_cast(stack) + kPageSize + fiberStackSize, kPageSize, PROT_NONE); SILK_ASSERT(!r); } @@ -317,7 +317,7 @@ bool Fiber::initialize( fiberMain = fiberMain_; parametersDtor = parametersDtor_; - fiberContext = make_fcontext(static_cast(stack) + PAGE_SIZE + fiberStackSize, fiberStackSize, fiberContextMain); + fiberContext = make_fcontext(static_cast(stack) + kPageSize + fiberStackSize, fiberStackSize, fiberContextMain); return true; } @@ -342,7 +342,7 @@ void Fiber::switchToFiberContext() noexcept #if defined(__SANITIZE_ADDRESS__) void * schedulerFakeStack = nullptr; __sanitizer_start_switch_fiber( - &schedulerFakeStack, static_cast(stack) + PAGE_SIZE, FiberScheduler::getOptions().fiberStackSize); + &schedulerFakeStack, static_cast(stack) + kPageSize, FiberScheduler::getOptions().fiberStackSize); #endif #if defined(__SANITIZE_THREAD__) @@ -539,10 +539,10 @@ struct FiberScheduler::ProcessorState void removeSuspended(Fiber * fiber) noexcept; // Cache line 0: scheduling hot path. - struct alignas(CACHELINE_SIZE) + struct alignas(kCacheLineSize) { // CPU index this processor is pinned to. - uint16_t number = INVALID_PROCESSOR_NUMBER; + uint16_t number = kInvalidProcessorNumber; // Set to true by runScheduler after initialization completes. // The steal loop checks this before accessing the ring, @@ -636,7 +636,7 @@ struct FiberScheduler::ProcessorState void FiberScheduler::ProcessorState::initialize(uint16_t cpu) noexcept { - SILK_ASSERT(cpu < INVALID_PROCESSOR_NUMBER); + SILK_ASSERT(cpu < kInvalidProcessorNumber); number = cpu; readyQueue.initialize(options.readyQueueCapacity); @@ -1023,7 +1023,7 @@ void FiberScheduler::initialize(const Options * userOptions) noexcept options = *userOptions; } - SILK_ASSERT(options.fiberStackSize >= PAGE_SIZE && (options.fiberStackSize % PAGE_SIZE) == 0); + SILK_ASSERT(options.fiberStackSize >= kPageSize && (options.fiberStackSize % kPageSize) == 0); SILK_ASSERT(options.readyQueueCapacity >= 2 && (options.readyQueueCapacity & (options.readyQueueCapacity - 1)) == 0); SILK_ASSERT(options.ioUringQueueSize >= 2 && (options.ioUringQueueSize & (options.ioUringQueueSize - 1)) == 0); SILK_ASSERT(options.ioUringFlushThreshold >= 1 && options.ioUringFlushThreshold <= options.ioUringQueueSize); @@ -1096,7 +1096,7 @@ void FiberScheduler::buildStealCandidates() noexcept for (uint16_t cpu = 0; cpu < scheduler->processorCount; ++cpu) { ProcessorState * processor = &scheduler->processorState[cpu]; - if (processor->number == INVALID_PROCESSOR_NUMBER) + if (processor->number == kInvalidProcessorNumber) { continue; } @@ -1112,7 +1112,7 @@ void FiberScheduler::buildStealCandidates() noexcept continue; } uint64_t cost = UINT64_MAX; - if (scheduler->processorState[other].number != INVALID_PROCESSOR_NUMBER) + if (scheduler->processorState[other].number != kInvalidProcessorNumber) { cost = topologyCostCycles(topologies[cpu], topologies[other]); } @@ -1156,7 +1156,7 @@ void FiberScheduler::destroy() noexcept for (uint16_t cpu = 0; cpu < scheduler->processorCount; ++cpu) { ProcessorState * processor = &scheduler->processorState[cpu]; - if (processor->number != INVALID_PROCESSOR_NUMBER) + if (processor->number != kInvalidProcessorNumber) { processor->wakeThread(); } @@ -1273,7 +1273,7 @@ void FiberScheduler::enqueueReady(Fiber * fiber) noexcept if (!fiber->inThreadMode) { - if (fiber->processorNumber == INVALID_PROCESSOR_NUMBER) + if (fiber->processorNumber == kInvalidProcessorNumber) { fiber->processorNumber = getCurrentProcessor(); } @@ -1472,7 +1472,7 @@ void FiberScheduler::cancelIo(IoFuture * future) noexcept // it), io_uring returns -ENOENT and the original operation is never removed, // leaving the caller's IoFuture::wait() blocked forever. uint16_t processorNumber = future->processorNumber; - if (processorNumber == INVALID_PROCESSOR_NUMBER) + if (processorNumber == kInvalidProcessorNumber) { processorNumber = getCurrentProcessor(); } @@ -1992,11 +1992,11 @@ void FiberScheduler::runFiber(Fiber * fiber, CpuTimer * timer) noexcept // Maintain the per-CPU suspended list for GDB debuggability. // suspendedLock and suspendedList are co-located in ProcessorState cache line 0. // Benchmarking showed no net cost. - if (fiber->suspendedProcessorNumber != INVALID_PROCESSOR_NUMBER) + if (fiber->suspendedProcessorNumber != kInvalidProcessorNumber) { ProcessorState * processor = &scheduler->processorState[fiber->suspendedProcessorNumber]; processor->removeSuspended(fiber); - fiber->suspendedProcessorNumber = INVALID_PROCESSOR_NUMBER; + fiber->suspendedProcessorNumber = kInvalidProcessorNumber; } ProcessorState * processor = &scheduler->processorState[fiber->processorNumber]; diff --git a/src/fibers/profiler.h b/src/fibers/profiler.h index 484126d..908bb56 100644 --- a/src/fibers/profiler.h +++ b/src/fibers/profiler.h @@ -71,8 +71,8 @@ class Profiler // State. // - alignas(CACHELINE_SIZE) std::atomic writeIndex{}; - alignas(CACHELINE_SIZE) std::atomic readIndex{}; + alignas(kCacheLineSize) std::atomic writeIndex{}; + alignas(kCacheLineSize) std::atomic readIndex{}; std::atomic events[RING_CAPACITY]; Histogram histograms[NUM_KINDS][NUM_CATEGORIES]; }; diff --git a/src/fibers/tests/fiber-test.cpp b/src/fibers/tests/fiber-test.cpp index d56375a..f420251 100644 --- a/src/fibers/tests/fiber-test.cpp +++ b/src/fibers/tests/fiber-test.cpp @@ -885,7 +885,7 @@ TEST(Fiber, WorkStealing) std::atomic started{false}; std::atomic stop{false}; - std::atomic blockerCpuAtom{INVALID_PROCESSOR_NUMBER}; + std::atomic blockerCpuAtom{kInvalidProcessorNumber}; FiberFuture blocker; int r = FiberScheduler::run(BlockerParams::fiberMain, {&started, &stop, &blockerCpuAtom}, &blocker); diff --git a/src/gdb/fiber.py b/src/gdb/fiber.py index 74e14c2..c0ddda3 100644 --- a/src/gdb/fiber.py +++ b/src/gdb/fiber.py @@ -340,7 +340,7 @@ def _walk_bounded_queue(val): for i in range(num_items): pos = dequeue_pos + i - slot_addr = slots_base + (pos & mask) * 64 # stride = CACHELINE_SIZE + slot_addr = slots_base + (pos & mask) * 64 # stride = kCacheLineSize seq = _u64(slot_addr) if seq == pos + 1: # slot has a valid value value = _ptr(slot_addr + 8) diff --git a/src/util/benchmarks/platform-bench.cpp b/src/util/benchmarks/platform-bench.cpp index 0f1ec71..9787095 100644 --- a/src/util/benchmarks/platform-bench.cpp +++ b/src/util/benchmarks/platform-bench.cpp @@ -158,7 +158,7 @@ BENCHMARK_F(PlatformBench, EventFdRoundTrip)(benchmark::State & state) // pure MESI protocol traffic. BENCHMARK_F(PlatformBench, CacheLineRoundTrip)(benchmark::State & state) { - alignas(CACHELINE_SIZE) std::atomic shared{}; + alignas(kCacheLineSize) std::atomic shared{}; std::thread helper( [&] diff --git a/src/util/memory-pool.cpp b/src/util/memory-pool.cpp index f349be5..13a8db3 100644 --- a/src/util/memory-pool.cpp +++ b/src/util/memory-pool.cpp @@ -12,7 +12,7 @@ MemoryPoolBase::MemoryPoolBase( uint32_t objectSize, uint32_t alignment, uint32_t stackEntryOffset, InitFn * initialize, DestroyFn * destroy) noexcept : objectSize(alignUp(objectSize, alignment)) , alignment(alignment) - , chunkSize(alignUp(slotsOffset() + MIN_BATCH_SIZE * this->objectSize, PAGE_SIZE)) + , chunkSize(alignUp(slotsOffset() + MIN_BATCH_SIZE * this->objectSize, kPageSize)) , stackEntryOffset(stackEntryOffset) , initialize(initialize) , destroy(destroy)