EQ FIR: Small code optimization for gcc build

This patch improves a bit (about 1.3%) the execution speed with a
simpler pointer arithmetic based FIR filter core. The comments
about used 32 bit fractional format are updated (Q1.31), earlier
ones were old assumptions for SOF.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
This commit is contained in:
Seppo Ingalsuo 2019-12-18 20:58:06 +02:00 committed by Liam Girdwood
parent 234c84abac
commit a5cc4df67b
1 changed files with 34 additions and 36 deletions

View File

@ -53,62 +53,60 @@ void eq_fir_s32(struct fir_state_32x16 *fir, struct comp_buffer *source,
/* The next functions are inlined to optmize execution speed */ /* The next functions are inlined to optmize execution speed */
static inline void fir_part_32x16(int64_t *y, int taps, const int16_t c[],
int *ic, int32_t d[], int *id)
{
int n;
/* Data is Q8.24, coef is Q1.15, product is Q9.39 */
for (n = 0; n < taps; n++) {
*y += (int64_t)c[*ic] * d[*id];
(*ic)++;
(*id)--;
}
}
static inline int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x) static inline int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x)
{ {
int64_t y = 0; int64_t y = 0;
int32_t *data = &fir->delay[fir->rwi];
int16_t *coef = &fir->coef[0];
int n1; int n1;
int n2; int n2;
int i = 0; /* Start from 1st tap */ int n;
int tmp_ri;
/* Bypass is set with length set to zero. */ /* Bypass is set with length set to zero. */
if (!fir->length) if (!fir->length)
return x; return x;
/* Write sample to delay */ /* Write sample to delay */
fir->delay[fir->rwi] = x; *data = x;
/* Start FIR calculation. Calculate first number of taps possible to /* Advance write pointer and calculate into n1 max. number of taps
* calculate before circular wrap need. * to process before circular wrap.
*/ */
n1 = fir->rwi + 1; n1 = ++fir->rwi;
/* Point to newest sample and advance read index */
tmp_ri = (fir->rwi)++;
if (fir->rwi == fir->length) if (fir->rwi == fir->length)
fir->rwi = 0; fir->rwi = 0;
/* Check if no need to un-wrap FIR data. */
if (n1 > fir->length) { if (n1 > fir->length) {
/* No need to un-wrap fir read index, make sure ri /* Data is Q1.31, coef is Q1.15, product is Q2.46 */
* is >= 0 after FIR computation. for (n = 0; n < fir->length; n++) {
*/ y += (int64_t)(*coef) * (*data);
fir_part_32x16(&y, fir->length, fir->coef, &i, fir->delay, coef++;
&tmp_ri); data--;
} else { }
n2 = fir->length - n1;
/* Part 1, loop n1 times, fir_ri becomes -1 */
fir_part_32x16(&y, n1, fir->coef, &i, fir->delay, &tmp_ri);
/* Part 2, unwrap fir_ri, continue rest of filter */ /* Q2.46 -> Q2.31, saturate to Q1.31 */
tmp_ri = fir->length - 1; return sat_int32(y >> (15 + fir->out_shift));
fir_part_32x16(&y, n2, fir->coef, &i, fir->delay, &tmp_ri);
} }
/* Q9.39 -> Q9.24, saturate to Q8.24 */
y = sat_int32(y >> (15 + fir->out_shift));
return (int32_t)y; /* Part 1, loop n1 times */
for (n = 0; n < n1; n++) {
y += (int64_t)(*coef) * (*data);
coef++;
data--;
}
/* Part 2, un-wrap data, continue n2 times */
n2 = fir->length - n1;
data = &fir->delay[fir->length - 1];
for (n = 0; n < n2; n++) {
y += (int64_t)(*coef) * (*data);
coef++;
data--;
}
/* Q2.46 -> Q2.31, saturate to Q1.31 */
return sat_int32(y >> (15 + fir->out_shift));
} }
#endif #endif