mirror of https://github.com/thesofproject/sof.git
Math: Library: Add the hifi4 exponential function
The 32-bit HiFi4 exponential library function has an accuracy of 1e-4, a unit in last place error of 5.60032793, and output ranges from 0.0067379470 to 148.4131591026 for inputs from -5 to +5 (Q4.28) (Q9.23). Signed-off-by: ShriramShastry <malladi.sastry@intel.com>
This commit is contained in:
parent
75e8f4b63c
commit
c069197b63
|
@ -10,6 +10,18 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__XCC__)
|
||||
/* HiFi */
|
||||
#include <xtensa/config/core-isa.h>
|
||||
#if XCHAL_HAVE_HIFI4 == 1
|
||||
#define SOFM_EXPONENTIAL_HIFI4 1
|
||||
#endif
|
||||
#else
|
||||
/* !XCC */
|
||||
#define EXPONENTIAL_GENERIC 1
|
||||
|
||||
#endif
|
||||
|
||||
int32_t sofm_exp_int32(int32_t x);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -14,8 +14,8 @@ if(CONFIG_SQRT_FIXED)
|
|||
add_local_sources(sof sqrt_int16.c)
|
||||
endif()
|
||||
|
||||
if(CONFIG_EXP_FIXED)
|
||||
add_local_sources(sof exp_fcn.c)
|
||||
if(CONFIG_MATH_EXP)
|
||||
add_local_sources(sof exp_fcn.c exp_fcn_hifi4.c)
|
||||
endif()
|
||||
|
||||
if(CONFIG_MATH_DECIBELS)
|
||||
|
|
|
@ -38,13 +38,13 @@ config SQRT_FIXED
|
|||
to calculate square root.square function having positive number
|
||||
y as input and return the positive number x multiplied by itself (squared)
|
||||
|
||||
config EXP_FIXED
|
||||
config MATH_EXP
|
||||
bool "Exponential functions"
|
||||
default n
|
||||
help
|
||||
By selecting this, the 32-bit sofm_exp_int32() function can be used to calculate
|
||||
exponential values. With a maximum ulp of 5, an exponential function with
|
||||
an input range of -5 to +5 y gives positive numbers between 0.00673794699908547 and
|
||||
an input range of -5 to +5 gives positive numbers between 0.00673794699908547 and
|
||||
148.413159102577. The precision of this function is 1e-4.
|
||||
|
||||
config NATURAL_LOGARITHM_FIXED
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
*
|
||||
*/
|
||||
|
||||
/* Include Files */
|
||||
#include <sof/math/exp_fcn.h>
|
||||
#include <sof/math/numbers.h>
|
||||
#include <sof/common.h>
|
||||
|
@ -14,12 +13,14 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define BIT_MASK_Q62P2 0x4000000000000000LL
|
||||
#define CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884
|
||||
#define BIT_MASK_LOW_Q27P5 0x8000000
|
||||
#define QUOTIENT_SCALE BIT(30)
|
||||
#define TERMS_Q23P9 8388608
|
||||
#define LSHIFT_BITS 8192
|
||||
#if defined(EXPONENTIAL_GENERIC)
|
||||
|
||||
#define SOFM_BIT_MASK_Q62P2 0x4000000000000000LL
|
||||
#define SOFM_CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884
|
||||
#define SOFM_BIT_MASK_LOW_Q27P5 0x8000000
|
||||
#define SOFM_QUOTIENT_SCALE BIT(30)
|
||||
#define SOFM_TERMS_Q23P9 8388608
|
||||
#define SOFM_LSHIFT_BITS 8192
|
||||
|
||||
/* inv multiplication lookup table */
|
||||
/* LUT = ceil(1/factorial(b_n) * 2 ^ 63) */
|
||||
|
@ -152,7 +153,7 @@ static inline int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
|
|||
uint64_t u64_rlo;
|
||||
|
||||
mul_s64(a, b, &u64_rhi, &u64_rlo);
|
||||
const bool roundup = (u64_rlo & BIT_MASK_LOW_Q27P5) != 0;
|
||||
const bool roundup = (u64_rlo & SOFM_BIT_MASK_LOW_Q27P5) != 0;
|
||||
|
||||
u64_rlo = (u64_rhi << 36 | u64_rlo >> 28) + (roundup ? 1 : 0);
|
||||
return u64_rlo;
|
||||
|
@ -179,8 +180,8 @@ int32_t sofm_exp_int32(int32_t x)
|
|||
uint64_t ou0Lo;
|
||||
int64_t qt;
|
||||
int32_t b_n;
|
||||
int32_t ts = TERMS_Q23P9; /* Q23.9 */
|
||||
int64_t dividend = (x + LSHIFT_BITS) >> 14; /* x in Q50.14 */
|
||||
int32_t ts = SOFM_TERMS_Q23P9; /* Q23.9 */
|
||||
int64_t dividend = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */
|
||||
static const int32_t i_emin = -1342177280; /* Q4.28 */
|
||||
static const int32_t o_emin = 56601; /* Q9.23 */
|
||||
static const int32_t i_emax = 1342177280; /* Q4.28 */
|
||||
|
@ -195,22 +196,23 @@ int32_t sofm_exp_int32(int32_t x)
|
|||
return o_emax; /* 148.4131494760513306 in Q9.23 */
|
||||
|
||||
/* pre-computation of 1st & 2nd terms */
|
||||
mul_s64(dividend, BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
|
||||
mul_s64(dividend, SOFM_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
|
||||
qt = (ou0Hi << 46) | (ou0Lo >> 18);/* Q6.26 */
|
||||
ts += (int32_t)((qt >> 35) + ((qt & QUOTIENT_SCALE) >> 18));
|
||||
ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) >> 18));
|
||||
dividend = lomul_s64_sr_sat_near(dividend, x);
|
||||
for (b_n = 0; b_n < ARRAY_SIZE(exp_iv_ilookup); b_n++) {
|
||||
mul_s64(dividend, exp_iv_ilookup[b_n], &ou0Hi, &ou0Lo);
|
||||
qt = (ou0Hi << 45) | (ou0Lo >> 19);
|
||||
|
||||
/* sum of the remaining terms */
|
||||
ts += (int32_t)((qt >> 35) + ((qt & QUOTIENT_SCALE) ? 1 : 0));
|
||||
ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) ? 1 : 0));
|
||||
dividend = lomul_s64_sr_sat_near(dividend, x);
|
||||
|
||||
qt = ABS(qt);
|
||||
/* For inputs between -5 and 5, (qt < CONVERG_ERROR) is always true */
|
||||
if (qt < CONVERG_ERROR)
|
||||
/* For inputs between -5 and 5, (qt < SOFM_CONVERG_ERROR) is always true */
|
||||
if (qt < SOFM_CONVERG_ERROR)
|
||||
break;
|
||||
}
|
||||
return ts;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,226 @@
|
|||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
/*
|
||||
*Copyright(c) 2023 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Author: Shriram Shastry <malladi.sastry@linux.intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sof/common.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#if defined(SOFM_EXPONENTIAL_HIFI4)
|
||||
#include <xtensa/tie/xt_hifi4.h>
|
||||
|
||||
#define SOFM_BIT_MASK_LOW_Q27P5 0x0000000008000000
|
||||
#define SOFM_BIT_MASK_Q62P2 0x4000000000000000
|
||||
#define SOFM_CONVERG_ERROR 0x1A36E2EB4000LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */
|
||||
#define SOFM_QUOTIENT_SCALE 0x400000000
|
||||
#define SOFM_TERMS_Q23P9 0x800000
|
||||
#define SOFM_LSHIFT_BITS BIT(13)
|
||||
|
||||
/*
|
||||
* Arguments : int64_t in_0
|
||||
* int64_t in_1
|
||||
* uint64_t *ptroutbitshi
|
||||
* uint64_t *ptroutbitslo
|
||||
* Return Type : void
|
||||
* Description:Perform element-wise multiplication on in_0 and in_1
|
||||
* while keeping the required product word length and fractional
|
||||
* length in mind. mul_s64 function divide the 64-bit quantities
|
||||
* into two 32-bit words,multiply the low words to produce the
|
||||
* lowest and second-lowest words in the result, then both pairs
|
||||
* of low and high words from different numbers to produce the
|
||||
* second and third lowest words in the result, and finally both
|
||||
* high words to produce the two highest words in the outcome.
|
||||
* Add them all up, taking carry into consideration.
|
||||
*
|
||||
* The 64 x 64 bit multiplication of operands in_0 and in_1 is
|
||||
* shown in the image below. The 64-bit operand in_0,in_1 is
|
||||
* represented by the notation in0_H, in1_H for the top 32 bits
|
||||
* and in0_L, in1_L for the bottom 32 bits.
|
||||
*
|
||||
* in0_H : in0_L
|
||||
* x in1_H : in1_L
|
||||
* ---------------------
|
||||
* P0 in0_L x in1_L
|
||||
* P1 in0_H x in1_L 64 bit inner multiplication
|
||||
* P2 in0_L x in1_H 64 bit inner multiplication
|
||||
* P3 in0_H x in1_H
|
||||
* --------------------
|
||||
* [64 x 64 bit multiplication] sum of inner products
|
||||
* All combinations are multiplied by one another and then added.
|
||||
* Each inner product is moved into its proper power location.given the names
|
||||
* of the inner products, redoing the addition where 000 represents 32 zero
|
||||
* bits.The inner products can be added together in 64 bit addition.The sum
|
||||
* of two 64-bit numbers yields a 65-bit output.
|
||||
* (P0H:P0L)
|
||||
* P1H(P1L:000)
|
||||
* P2H(P2L:000)
|
||||
* P3H:P3L(000:000)
|
||||
* .......(aaa:P0L)
|
||||
* By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0.
|
||||
* The partial result is then multiplied by P2L:000.
|
||||
* We call it CRY1 because it has the potential to carry again.
|
||||
* (CRY0 + CRY1)P0H:P0L
|
||||
* ( P1H)P1L:000
|
||||
* ( P2H)P2L:000
|
||||
* (P3H: P3L)000:000
|
||||
* --------------------
|
||||
* (ccc:bbb)aaa:P0L
|
||||
* P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1.This increase will
|
||||
* not result in an overflow.
|
||||
*
|
||||
*/
|
||||
static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *ptroutbitshi, ae_int64 *ptroutbitslo)
|
||||
{
|
||||
ae_int64 producthihi, producthilo, productlolo;
|
||||
ae_int64 producthi, carry, product_hl_lh_h, product_hl_lh_l;
|
||||
|
||||
ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(in_0);
|
||||
ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(in_1);
|
||||
|
||||
ae_ep ep_lolo = AE_MOVEA(0);
|
||||
ae_ep ep_hilo = AE_MOVEA(0);
|
||||
ae_ep ep_HL_LH = AE_MOVEA(0);
|
||||
|
||||
producthihi = AE_MUL32_HH(in0_32, in1_32);
|
||||
|
||||
/* AE_MULZAAD32USEP.HL.LH - Unsigned lower parts and signed higher 32-bit parts dual */
|
||||
/* multiply and accumulate operation on two 32x2 operands with 72-bit output */
|
||||
/* Input-32x32-bit(in1_32xin0_32)into 72-bit multiplication operations */
|
||||
/* Output-lower 64 bits of the result are stored in producthilo */
|
||||
/* Output-upper eight bits are stored in ep_hilo */
|
||||
AE_MULZAAD32USEP_HL_LH(ep_hilo, producthilo, in1_32, in0_32);
|
||||
productlolo = AE_MUL32U_LL(in0_32, in1_32);
|
||||
|
||||
product_hl_lh_h = AE_SRAI72(ep_hilo, producthilo, 32);
|
||||
product_hl_lh_l = AE_SLAI64(producthilo, 32);
|
||||
|
||||
/* The AE_ADD72 procedure adds two 72-bit elements. The first 72-bit value is created */
|
||||
/* by concatenating the MSBs and LSBs of operands ep[7:0] and d[63:0]. Similarly, the */
|
||||
/* second value is created by concatenating bits from operands ep1[7:0] and d1[63:0]. */
|
||||
AE_ADD72(ep_lolo, productlolo, ep_HL_LH, product_hl_lh_l);
|
||||
|
||||
carry = AE_SRAI72(ep_lolo, productlolo, 32);
|
||||
|
||||
carry = AE_SRLI64(carry, 32);
|
||||
producthi = AE_ADD64(producthihi, carry);
|
||||
|
||||
producthi = AE_ADD64(producthi, product_hl_lh_h);
|
||||
|
||||
*ptroutbitslo = productlolo;
|
||||
*ptroutbitshi = producthi;
|
||||
}
|
||||
|
||||
/*
|
||||
* Arguments : int64_t a
|
||||
* int64_t b
|
||||
* Return Type : int64_t
|
||||
*/
|
||||
static int64_t mul_s64_sr_sat_near(int64_t a, int64_t b)
|
||||
{
|
||||
ae_int64 result;
|
||||
ae_int64 u64_chi;
|
||||
ae_int64 u64_clo;
|
||||
ae_int64 temp;
|
||||
|
||||
mul_s64(a, b, &u64_chi, &u64_clo);
|
||||
|
||||
ae_int64 roundup = AE_AND64(u64_clo, SOFM_SOFM_BIT_MASK_LOW_Q27P5);
|
||||
|
||||
roundup = AE_SRLI64(roundup, 27);
|
||||
temp = AE_OR64(AE_SLAI64(u64_chi, 36), AE_SRLI64(u64_clo, 28));
|
||||
|
||||
return AE_ADD64(temp, roundup);
|
||||
}
|
||||
|
||||
static ae_int64 onebyfact_Q63[19] = {
|
||||
4611686018427387904LL,
|
||||
1537228672809129301LL,
|
||||
384307168202282325LL,
|
||||
76861433640456465LL,
|
||||
12810238940076077LL,
|
||||
1830034134296582LL,
|
||||
228754266787072LL,
|
||||
25417140754119LL,
|
||||
2541714075411LL,
|
||||
231064915946LL,
|
||||
19255409662LL,
|
||||
1481185358LL,
|
||||
105798954LL,
|
||||
7053264LL,
|
||||
440829LL,
|
||||
25931LL,
|
||||
1441LL,
|
||||
76LL,
|
||||
4LL
|
||||
};
|
||||
|
||||
/* f(x) = a^x, x is variable and a is base
|
||||
*
|
||||
* Arguments : int32_t x(Q4.28)
|
||||
* input range : -5 to 5
|
||||
*
|
||||
* Return Type : int32_t ts(Q9.23)
|
||||
* output range 0.0067465305 to 148.4131488800
|
||||
*+------------------+-----------------+--------+--------+
|
||||
*| x | ts (returntype) | x | ts |
|
||||
*+----+-----+-------+----+----+-------+--------+--------+
|
||||
*|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
|
||||
*+----+-----+-------+----+----+-------+--------+--------+
|
||||
*| 32 | 28 | 1 | 32 | 23 | 0 | 4.28 | 9.23 |
|
||||
*+------------------+-----------------+--------+--------+
|
||||
*/
|
||||
int32_t sofm_exp_int32(int32_t x)
|
||||
{
|
||||
ae_int64 outhi;
|
||||
ae_int64 outlo;
|
||||
ae_int64 qt;
|
||||
ae_int64 onebyfact;
|
||||
ae_int64 temp;
|
||||
|
||||
ae_int64 *ponebyfact_Q63 = &onebyfact_Q63[0];
|
||||
ae_int64 ts = SOFM_TERMS_Q23P9;
|
||||
ae_int64 mp = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */;
|
||||
xtbool flag;
|
||||
|
||||
int64_t qt_temp;
|
||||
int64_t b_n;
|
||||
|
||||
mul_s64(mp, SOFM_BIT_MASK_Q62P2, &outhi, &outlo);
|
||||
qt = AE_OR64(AE_SLAI64(outhi, 46), AE_SRLI64(outlo, 18));
|
||||
|
||||
temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35);
|
||||
|
||||
ts = AE_ADD64(ts, temp);
|
||||
|
||||
mp = mul_s64_sr_sat_near(mp, (int64_t)x);
|
||||
|
||||
for (b_n = 0; b_n < 64;) {
|
||||
AE_L64_IP(onebyfact, ponebyfact_Q63, 8);
|
||||
|
||||
mul_s64(mp, onebyfact, &outhi, &outlo);
|
||||
qt = AE_OR64(AE_SLAI64(outhi, 45), AE_SRLI64(outlo, 19));
|
||||
|
||||
temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35);
|
||||
ts = AE_ADD64(ts, temp);
|
||||
|
||||
mp = mul_s64_sr_sat_near(mp, (int64_t)x);
|
||||
|
||||
const ae_int64 sign = AE_NEG64(qt);
|
||||
|
||||
flag = AE_LT64(qt, 0);
|
||||
AE_MOVT64(qt, sign, flag);
|
||||
|
||||
if (!(qt < (ae_int64)SOFM_CONVERG_ERROR))
|
||||
b_n++;
|
||||
else
|
||||
b_n = 64;
|
||||
}
|
||||
|
||||
return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
|
||||
}
|
||||
#endif
|
|
@ -13,6 +13,7 @@ cmocka_test(base2_logarithm
|
|||
cmocka_test(exponential
|
||||
exponential.c
|
||||
${PROJECT_SOURCE_DIR}/src/math/exp_fcn.c
|
||||
${PROJECT_SOURCE_DIR}/src/math/exp_fcn_hifi4.c
|
||||
)
|
||||
cmocka_test(square_root
|
||||
square_root.c
|
||||
|
|
|
@ -700,8 +700,9 @@ zephyr_library_sources_ifdef(CONFIG_SQRT_FIXED
|
|||
${SOF_MATH_PATH}/sqrt_int16.c
|
||||
)
|
||||
|
||||
zephyr_library_sources_ifdef(CONFIG_EXP_FIXED
|
||||
zephyr_library_sources_ifdef(CONFIG_MATH_EXP
|
||||
${SOF_MATH_PATH}/exp_fcn.c
|
||||
${SOF_MATH_PATH}/exp_fcn_hifi4.c
|
||||
)
|
||||
|
||||
zephyr_library_sources_ifdef(CONFIG_COMP_UP_DOWN_MIXER
|
||||
|
|
Loading…
Reference in New Issue