From 88a15b79dfcfee202381d9367ae618901e5b9baa Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Fri, 17 Nov 2023 14:56:35 +0200
Subject: [PATCH 1/3] Math: Restore fast look-up table based sine function

This patch adds function sofm_lut_sin_fixed_16b(). It was
used earlier in SOF with name sin_fixed() but was remove
at add of Cordic trigonometric library. This sine function
can be used in hot code parts. Due to look-up table usage it
consumes more .bss RAM than cordic version.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
---
 src/include/sof/math/lut_trig.h |  15 +++++
 src/math/CMakeLists.txt         |   2 +
 src/math/Kconfig                |  11 ++++
 src/math/lut_trig.c             | 108 ++++++++++++++++++++++++++++++++
 zephyr/CMakeLists.txt           |   5 ++
 5 files changed, 141 insertions(+)
 create mode 100644 src/include/sof/math/lut_trig.h
 create mode 100644 src/math/lut_trig.c

diff --git a/src/include/sof/math/lut_trig.h b/src/include/sof/math/lut_trig.h
new file mode 100644
index 000000000000..7cf8f48ab602
--- /dev/null
+++ b/src/include/sof/math/lut_trig.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2016-2023 Intel Corporation. All rights reserved.
+ *
+ * Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
+ */
+
+#ifndef __SOF_MATH_LUT_TRIG_H__
+#define __SOF_MATH_LUT_TRIG_H__
+
+#include <stdint.h>
+
+int16_t sofm_lut_sin_fixed_16b(int32_t w); /* Input is Q4.28, output is Q1.15 */
+
+#endif /* __SOF_MATH_LUT_TRIG_H__ */
diff --git a/src/math/CMakeLists.txt b/src/math/CMakeLists.txt
index afbc4718fa59..203d32ef7c55 100644
--- a/src/math/CMakeLists.txt
+++ b/src/math/CMakeLists.txt
@@ -11,6 +11,8 @@ if(CONFIG_CORDIC_FIXED)
         add_local_sources(sof trig.c)
 endif()
 
+add_local_sources_ifdef(CONFIG_MATH_LUT_SINE_FIXED sof lut_trig.c)
+
 add_local_sources_ifdef(CONFIG_SQRT_FIXED sof sqrt_int16.c)
 
 add_local_sources_ifdef(CONFIG_MATH_EXP sof exp_fcn.c exp_fcn_hifi.c)
diff --git a/src/math/Kconfig b/src/math/Kconfig
index 78a67ac868d1..d911fd4befe5 100644
--- a/src/math/Kconfig
+++ b/src/math/Kconfig
@@ -9,6 +9,17 @@ config CORDIC_FIXED
 	  Select this to enable sin(), cos(), asin(), acos(),
 	  and cexp() functions as 16 bit and 32 bit versions.
 
+config MATH_LUT_SINE_FIXED
+	bool "Lookup table based sine function"
+	default n
+	help
+	  Select this to enable sofm_lut_sin_fixed_16b() function. The
+	  calculation is using 1/4 wave lookup and interpolation. Use
+	  this for fast sine calculation in hot code parts. Sine
+	  calculation in component initialization should use the cordic
+	  version. This option consumes 1026 bytes .bss RAM for the
+	  lookup table.
+
 config POWER_FIXED
        bool "Power function"
        default n
diff --git a/src/math/lut_trig.c b/src/math/lut_trig.c
new file mode 100644
index 000000000000..5c88b957d413
--- /dev/null
+++ b/src/math/lut_trig.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2016-2024 Intel Corporation. All rights reserved.
+//
+// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
+
+#include <sof/audio/format.h>
+#include <sof/math/lut_trig.h>
+#include <stdint.h>
+
+#define SOFM_LUT_SINE_C_Q20	341782638	/* 2 * SINE_NQUART / pi in Q12.20 */
+#define SOFM_LUT_SINE_NQUART	512		/* Must be 2^N */
+#define SOFM_LUT_SINE_SIZE	(SOFM_LUT_SINE_NQUART + 1)
+
+/* Sine values 0 to pi/2, calculated with Octave
+ *	w = linspace(0, pi/2, 513);
+ *	s = 2^16;
+ *	x = min(round(s * sin(w)), s - 1);
+ */
+
+static const uint16_t sofm_lut_sine_table_s16[SOFM_LUT_SINE_SIZE] = {
+	     0,    201,    402,    603,    804,   1005,   1206,   1407,   1608,   1809,   2010,
+	  2211,   2412,   2613,   2814,   3015,   3216,   3417,   3617,   3818,   4019,   4219,
+	  4420,   4621,   4821,   5022,   5222,   5422,   5623,   5823,   6023,   6224,   6424,
+	  6624,   6824,   7024,   7224,   7423,   7623,   7823,   8022,   8222,   8421,   8621,
+	  8820,   9019,   9218,   9417,   9616,   9815,  10014,  10212,  10411,  10609,  10808,
+	 11006,  11204,  11402,  11600,  11798,  11996,  12193,  12391,  12588,  12785,  12983,
+	 13180,  13376,  13573,  13770,  13966,  14163,  14359,  14555,  14751,  14947,  15143,
+	 15338,  15534,  15729,  15924,  16119,  16314,  16508,  16703,  16897,  17091,  17285,
+	 17479,  17673,  17867,  18060,  18253,  18446,  18639,  18832,  19024,  19216,  19409,
+	 19600,  19792,  19984,  20175,  20366,  20557,  20748,  20939,  21129,  21320,  21510,
+	 21699,  21889,  22078,  22268,  22457,  22645,  22834,  23022,  23210,  23398,  23586,
+	 23774,  23961,  24148,  24335,  24521,  24708,  24894,  25080,  25265,  25451,  25636,
+	 25821,  26005,  26190,  26374,  26558,  26742,  26925,  27108,  27291,  27474,  27656,
+	 27838,  28020,  28202,  28383,  28564,  28745,  28926,  29106,  29286,  29466,  29645,
+	 29824,  30003,  30182,  30360,  30538,  30716,  30893,  31071,  31248,  31424,  31600,
+	 31776,  31952,  32127,  32303,  32477,  32652,  32826,  33000,  33173,  33347,  33520,
+	 33692,  33865,  34037,  34208,  34380,  34551,  34721,  34892,  35062,  35231,  35401,
+	 35570,  35738,  35907,  36075,  36243,  36410,  36577,  36744,  36910,  37076,  37241,
+	 37407,  37572,  37736,  37900,  38064,  38228,  38391,  38554,  38716,  38878,  39040,
+	 39201,  39362,  39523,  39683,  39843,  40002,  40161,  40320,  40478,  40636,  40794,
+	 40951,  41108,  41264,  41420,  41576,  41731,  41886,  42040,  42194,  42348,  42501,
+	 42654,  42806,  42958,  43110,  43261,  43412,  43562,  43713,  43862,  44011,  44160,
+	 44308,  44456,  44604,  44751,  44898,  45044,  45190,  45335,  45480,  45625,  45769,
+	 45912,  46056,  46199,  46341,  46483,  46624,  46765,  46906,  47046,  47186,  47325,
+	 47464,  47603,  47741,  47878,  48015,  48152,  48288,  48424,  48559,  48694,  48828,
+	 48962,  49095,  49228,  49361,  49493,  49624,  49756,  49886,  50016,  50146,  50275,
+	 50404,  50532,  50660,  50787,  50914,  51041,  51166,  51292,  51417,  51541,  51665,
+	 51789,  51911,  52034,  52156,  52277,  52398,  52519,  52639,  52759,  52878,  52996,
+	 53114,  53232,  53349,  53465,  53581,  53697,  53812,  53926,  54040,  54154,  54267,
+	 54379,  54491,  54603,  54714,  54824,  54934,  55043,  55152,  55260,  55368,  55476,
+	 55582,  55689,  55794,  55900,  56004,  56108,  56212,  56315,  56418,  56520,  56621,
+	 56722,  56823,  56923,  57022,  57121,  57219,  57317,  57414,  57511,  57607,  57703,
+	 57798,  57892,  57986,  58079,  58172,  58265,  58356,  58448,  58538,  58628,  58718,
+	 58807,  58896,  58983,  59071,  59158,  59244,  59330,  59415,  59499,  59583,  59667,
+	 59750,  59832,  59914,  59995,  60075,  60156,  60235,  60314,  60392,  60470,  60547,
+	 60624,  60700,  60776,  60851,  60925,  60999,  61072,  61145,  61217,  61288,  61359,
+	 61429,  61499,  61568,  61637,  61705,  61772,  61839,  61906,  61971,  62036,  62101,
+	 62165,  62228,  62291,  62353,  62415,  62476,  62536,  62596,  62655,  62714,  62772,
+	 62830,  62886,  62943,  62998,  63054,  63108,  63162,  63215,  63268,  63320,  63372,
+	 63423,  63473,  63523,  63572,  63621,  63668,  63716,  63763,  63809,  63854,  63899,
+	 63944,  63987,  64031,  64073,  64115,  64156,  64197,  64237,  64277,  64316,  64354,
+	 64392,  64429,  64465,  64501,  64536,  64571,  64605,  64639,  64672,  64704,  64735,
+	 64766,  64797,  64827,  64856,  64884,  64912,  64940,  64967,  64993,  65018,  65043,
+	 65067,  65091,  65114,  65137,  65159,  65180,  65200,  65220,  65240,  65259,  65277,
+	 65294,  65311,  65328,  65343,  65358,  65373,  65387,  65400,  65413,  65425,  65436,
+	 65447,  65457,  65467,  65476,  65484,  65492,  65499,  65505,  65511,  65516,  65521,
+	 65525,  65528,  65531,  65533,  65535,  65535,  65535
+};
+
+/* Sine lookup table read */
+static inline int32_t sofm_sine_lookup_16b(int idx)
+{
+	uint16_t s;
+	int i1;
+
+	i1 = idx & (2 * SOFM_LUT_SINE_NQUART - 1);
+	if (i1 > SOFM_LUT_SINE_NQUART)
+		i1 = 2 * SOFM_LUT_SINE_NQUART - i1;
+
+	s = sofm_lut_sine_table_s16[i1];
+	if (idx > 2 * SOFM_LUT_SINE_NQUART)
+		return -((int32_t)s);
+
+	return (int32_t)s;
+}
+
+/* Compute fixed point sine with table lookup and interpolation */
+int16_t sofm_lut_sin_fixed_16b(int32_t w)
+{
+	int64_t idx;
+	int32_t sine;
+	int32_t frac;
+	int32_t delta;
+	int32_t s0;
+	int32_t s1;
+	int64_t idx_tmp;
+
+	/* Q4.28 x Q12.20 -> Q16.48 --> Q16.31*/
+	idx_tmp = ((int64_t)w * SOFM_LUT_SINE_C_Q20) >> 17;
+	idx = (idx_tmp >> 31); /* Shift to Q0 */
+	frac = (int32_t)(idx_tmp - (idx << 31)); /* Get fraction Q1.31*/
+	s0 = sofm_sine_lookup_16b(idx); /* Q1.16 */
+	s1 = sofm_sine_lookup_16b(idx + 1); /* Q1.16 */
+	delta = s1 - s0; /* Q1.16 */
+	sine = s0 + q_mults_32x32(frac, delta, Q_SHIFT_BITS_64(31, 16, 16)); /* Q1.16 */
+	return sat_int16((sine + 1) >> 1); /* Round to Q1.15 */
+}
diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt
index badb88f3bb83..7234bbda2bfa 100644
--- a/zephyr/CMakeLists.txt
+++ b/zephyr/CMakeLists.txt
@@ -416,6 +416,11 @@ zephyr_library_sources(
 	lib.c
 )
 
+# Optional math utility
+zephyr_library_sources_ifdef(CONFIG_MATH_LUT_SINE_FIXED
+	${SOF_MATH_PATH}/lut_trig.c
+)
+
 # SOF module interface functions
 add_subdirectory(../src/module module_unused_install/)
 

From 4ed988b9ff7f0147d678c0d06abc33780bfa4159 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Tue, 9 Jan 2024 19:08:45 +0200
Subject: [PATCH 2/3] Test: Cmocka: Add test case for lookup table sine
 function

The test function is based on test function for the cordic
sine function. The error tolerance is adjusted to just pass.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
---
 test/cmocka/src/math/trig/CMakeLists.txt      |  4 ++
 test/cmocka/src/math/trig/lut_sin_16b_fixed.c | 54 +++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 test/cmocka/src/math/trig/lut_sin_16b_fixed.c

diff --git a/test/cmocka/src/math/trig/CMakeLists.txt b/test/cmocka/src/math/trig/CMakeLists.txt
index a818df1cc709..2d39eecd27b0 100644
--- a/test/cmocka/src/math/trig/CMakeLists.txt
+++ b/test/cmocka/src/math/trig/CMakeLists.txt
@@ -40,3 +40,7 @@ cmocka_test(acos_16b_fixed
 	${PROJECT_SOURCE_DIR}/src/math/trig.c
 )
 
+cmocka_test(lut_sin_16b_fixed
+	lut_sin_16b_fixed.c
+	${PROJECT_SOURCE_DIR}/src/math/lut_trig.c
+)
diff --git a/test/cmocka/src/math/trig/lut_sin_16b_fixed.c b/test/cmocka/src/math/trig/lut_sin_16b_fixed.c
new file mode 100644
index 000000000000..b95390022903
--- /dev/null
+++ b/test/cmocka/src/math/trig/lut_sin_16b_fixed.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2024 Intel Corporation. All rights reserved.
+//
+// Author: Slawomir Blauciak <slawomir.blauciak@linux.intel.com>
+// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <math.h>
+#include <cmocka.h>
+
+#include <sof/audio/format.h>
+#include <sof/math/lut_trig.h>
+
+#include "trig_tables.h"
+
+#define CMP_TOLERANCE	3.1e-5
+#define _M_PI		3.14159265358979323846	/* pi */
+
+static void test_math_trig_lut_sin_fixed(void **state)
+{
+	(void)state;
+
+	int theta;
+
+	for (theta = 0; theta < 360; ++theta) {
+		double rad = _M_PI / 180.0 * theta;
+		int32_t rad_q28 = Q_CONVERT_FLOAT(rad, 28);
+		float r = Q_CONVERT_QTOF(sofm_lut_sin_fixed_16b(rad_q28), 15);
+		float diff = fabsf(sin_ref_table[theta] - r);
+
+		if (diff > CMP_TOLERANCE) {
+			printf("%s: diff for %d deg = %g\n", __func__,
+			       theta, diff);
+		}
+
+		assert_true(diff <= CMP_TOLERANCE);
+	}
+}
+
+int main(void)
+{
+	const struct CMUnitTest tests[] = {
+		cmocka_unit_test(test_math_trig_lut_sin_fixed)
+	};
+
+	cmocka_set_message_output(CM_OUTPUT_TAP);
+
+	return cmocka_run_group_tests(tests, NULL, NULL);
+}

From 3d1d453b2bb23ed3f9a588903f22f650a78f63da Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Fri, 17 Nov 2023 18:32:58 +0200
Subject: [PATCH 3/3] Audio: DRC: Change DRC to use lookup table based sine
 function

This change saves in TGL platform about 13 MPCS, from 83
to 70 MCPS. In MTL platform the saving is 12 MCPS, from 46
to 34 MCPS. The .bss RAM usage increases by 1 kB from
selecting CONFIG_MATH_LUT_SINE_FIXED.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
---
 src/audio/drc/Kconfig    | 1 +
 src/audio/drc/drc_math.h | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/audio/drc/Kconfig b/src/audio/drc/Kconfig
index 985feede998a..dfc5aba4f4fe 100644
--- a/src/audio/drc/Kconfig
+++ b/src/audio/drc/Kconfig
@@ -3,6 +3,7 @@
 config COMP_DRC
 	bool "Dynamic Range Compressor component"
 	select CORDIC_FIXED
+        select MATH_LUT_SINE_FIXED
 	select NUMBERS_NORM
 	select MATH_EXP
 	select COMP_BLOB
diff --git a/src/audio/drc/drc_math.h b/src/audio/drc/drc_math.h
index 8748632e5812..0d89fd77a52c 100644
--- a/src/audio/drc/drc_math.h
+++ b/src/audio/drc/drc_math.h
@@ -11,6 +11,7 @@
 #include <stdint.h>
 #include <sof/audio/format.h>
 #include <sof/math/numbers.h>
+#include <sof/math/lut_trig.h>
 #include <sof/math/trig.h>
 
 #include "drc_plat_conf.h"
@@ -60,9 +61,8 @@ static inline int32_t drc_sin_fixed(int32_t x)
 {
 	const int32_t lshift = drc_get_lshift(30, 30, 28);
 	int32_t denorm_x = drc_mult_lshift(x, PI_OVER_TWO_Q30, lshift);
-	int32_t sin_val = sin_fixed_16b(denorm_x);
 
-	return sin_val << 16;
+	return sofm_lut_sin_fixed_16b(denorm_x) << 16;
 }
 
 #ifdef DRC_USE_CORDIC_ASIN
@@ -88,9 +88,8 @@ static inline int32_t drc_asin_fixed(int32_t x)
 static inline int32_t drc_sin_fixed(int32_t x)
 {
 	const int32_t PI_OVER_TWO = Q_CONVERT_FLOAT(1.57079632679489661923, 30);
-	int32_t sin_val = sin_fixed_16b(Q_MULTSR_32X32((int64_t)x, PI_OVER_TWO, 30, 30, 28));
 
-	return sin_val << 16;
+	return sofm_lut_sin_fixed_16b(Q_MULTSR_32X32((int64_t)x, PI_OVER_TWO, 30, 30, 28)) << 16;
 }
 
 #ifdef DRC_USE_CORDIC_ASIN