From 69eafe687878211a59208667961d4a5aab07136c Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sat, 24 May 2025 23:55:17 +0800 Subject: [PATCH 1/3] lib/math/gcd: Use static key to select implementation at runtime On platforms like RISC-V, the compiler may generate hardware FFS instructions even if the underlying CPU does not actually support them. Currently, the GCD implementation is chosen at compile time based on CONFIG_CPU_NO_EFFICIENT_FFS, which can result in suboptimal behavior on such systems. Introduce a static key, efficient_ffs_key, to enable runtime selection between the binary GCD (using ffs) and the odd-even GCD implementation. This allows the kernel to default to the faster binary GCD when FFS is efficient, while retaining the ability to fall back when needed. Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Signed-off-by: Linux RISC-V bot --- lib/math/gcd.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/math/gcd.c b/lib/math/gcd.c index e3b042214d1b7d..2898ee0e5addcd 100644 --- a/lib/math/gcd.c +++ b/lib/math/gcd.c @@ -2,6 +2,7 @@ #include #include #include +#include /* * This implements the binary GCD algorithm. (Often attributed to Stein, @@ -11,16 +12,13 @@ * has decent hardware division. */ +DEFINE_STATIC_KEY_TRUE(efficient_ffs_key); + #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ -/** - * gcd - calculate and return the greatest common divisor of 2 unsigned longs - * @a: first value - * @b: second value - */ -unsigned long gcd(unsigned long a, unsigned long b) +static unsigned long binary_gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; @@ -44,9 +42,15 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#else +#endif /* If normalization is done by loops, the even/odd algorithm is a win. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; @@ -54,6 +58,11 @@ unsigned long gcd(unsigned long a, unsigned long b) if (!a || !b) return r; +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + if (static_branch_likely(&efficient_ffs_key)) + return binary_gcd(a, b); +#endif + /* Isolate lsbit of r */ r &= -r; @@ -80,6 +89,4 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#endif - EXPORT_SYMBOL_GPL(gcd); From cc1646e80b40b65f510f7a8cf47dde87799fb61f Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sat, 24 May 2025 23:55:18 +0800 Subject: [PATCH 2/3] riscv: Optimize gcd() code size when CONFIG_RISCV_ISA_ZBB is disabled The binary GCD implementation depends on efficient ffs(), which on RISC-V requires hardware support for the Zbb extension. When CONFIG_RISCV_ISA_ZBB is not enabled, the kernel will never use binary GCD, as runtime logic will always fall back to the odd-even implementation. To avoid compiling unused code and reduce code size, select CONFIG_CPU_NO_EFFICIENT_FFS when CONFIG_RISCV_ISA_ZBB is not set. $ ./scripts/bloat-o-meter ./lib/math/gcd.o.old ./lib/math/gcd.o.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-274 (-274) Function old new delta gcd 360 86 -274 Total: Before=384, After=110, chg -71.35% Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Signed-off-by: Linux RISC-V bot --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b7930999..f085adc6f573de 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -95,6 +95,7 @@ config RISCV select CLINT_TIMER if RISCV_M_MODE select CLONE_BACKWARDS select COMMON_CLK + select CPU_NO_EFFICIENT_FFS if !RISCV_ISA_ZBB select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) From 9abdbbdbae16e9abf808544edd180f4a97d6ef54 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sat, 24 May 2025 23:55:19 +0800 Subject: [PATCH 3/3] riscv: Optimize gcd() performance on RISC-V without Zbb extension The binary GCD implementation uses FFS (find first set), which benefits from hardware support for the ctz instruction, provided by the Zbb extension on RISC-V. Without Zbb, this results in slower software-emulated behavior. Previously, RISC-V always used the binary GCD, regardless of actual hardware support. This patch improves runtime efficiency by disabling the efficient_ffs_key static branch when Zbb is either not enabled in the kernel (config) or not supported on the executing CPU. This selects the odd-even GCD implementation, which is faster in the absence of efficient FFS. This change ensures the most suitable GCD algorithm is chosen dynamically based on actual hardware capabilities. Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f7c9a1caa83e62..f891eedc3644b6 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,8 @@ atomic_t hart_lottery __section(".sdata") ; unsigned long boot_cpu_hartid; +DECLARE_STATIC_KEY_TRUE(efficient_ffs_key); + /* * Place kernel memory regions on the resource tree so that * kexec-tools can retrieve them from /proc/iomem. While there @@ -361,6 +364,9 @@ void __init setup_arch(char **cmdline_p) riscv_user_isa_enable(); riscv_spinlock_init(); + + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB) || !riscv_isa_extension_available(NULL, ZBB)) + static_branch_disable(&efficient_ffs_key); } bool arch_cpu_is_hotpluggable(int cpu)