diff --git a/src/include/sof/math/fir_generic.h b/src/include/sof/math/fir_generic.h index 464ffd5d2..28944c7d1 100644 --- a/src/include/sof/math/fir_generic.h +++ b/src/include/sof/math/fir_generic.h @@ -25,7 +25,7 @@ struct sof_eq_fir_coef_data; struct fir_state_32x16 { int rwi; /* Circular read and write index */ int taps; /* Number of FIR taps */ - int length; /* Number of FIR taps */ + int length; /* Number of FIR taps plus input length (even) */ int out_shift; /* Amount of right shifts at output */ int16_t *coef; /* Pointer to FIR coefficients */ int32_t *delay; /* Pointer to FIR delay line */ @@ -42,5 +42,7 @@ void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data); int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x); +void fir_32x16_2x(struct fir_state_32x16 *fir, int32_t x0, int32_t x1, int32_t *y0, int32_t *y1); + #endif #endif /* __SOF_MATH_FIR_GENERIC_H__ */ diff --git a/src/math/fir_generic.c b/src/math/fir_generic.c index e0a54df05..53f650a20 100644 --- a/src/math/fir_generic.c +++ b/src/math/fir_generic.c @@ -36,21 +36,26 @@ void fir_reset(struct fir_state_32x16 *fir) int fir_delay_size(struct sof_fir_coef_data *config) { - /* Check for sane FIR length. The generic version does not - * have other constraints. - */ - if (config->length > SOF_FIR_MAX_LENGTH || config->length < 1) + /* Check FIR tap count for implementation specific constraints */ + if (config->length > SOF_FIR_MAX_LENGTH || config->length < 4) return -EINVAL; - return config->length * sizeof(int32_t); + /* The optimization requires the tap count to be multiple of four */ + if (config->length & 0x3) + return -EINVAL; + + /* The dual sample version needs one more delay entry. To preserve + * align for 64 bits need to add two. + */ + return (config->length + 4) * sizeof(int32_t); } int fir_init_coef(struct fir_state_32x16 *fir, struct sof_fir_coef_data *config) { fir->rwi = 0; - fir->length = (int)config->length; - fir->taps = fir->length; /* The same for generic C version */ + fir->taps = (int)config->length; + fir->length = (int)fir->taps + 2; fir->out_shift = (int)config->out_shift; fir->coef = ASSUME_ALIGNED(&config->coef[0], 4); return 0; @@ -109,4 +114,70 @@ int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x) return sat_int32(y >> shift); } +void fir_32x16_2x(struct fir_state_32x16 *fir, int32_t x0, int32_t x1, int32_t *y0, int32_t *y1) +{ + int64_t a0 = 0; + int64_t a1 = 0; + int32_t sample0; + int32_t sample1; + int16_t tap; + int32_t *data = &fir->delay[fir->rwi]; + int16_t *coef = &fir->coef[0]; + int n1; + int n2; + int i; + const int length = fir->length; + const int taps = fir->taps; + const int shift = 15 + fir->out_shift; + + /* Bypass is set with length set to zero. */ + if (!fir->taps) { + *y0 = x0; + *y1 = x1; + return; + } + + /* Write samples to delay */ + *data = x0; + *(data + 1) = x1; + + /* Advance write pointer and calculate into n1 max. number of taps + * to process before circular wrap. + */ + n1 = fir->rwi + 1; + fir->rwi += 2; + if (fir->rwi >= length) + fir->rwi -= length; + + /* Part 1, loop n1 times */ + sample1 = x1; + n1 = MIN(n1, taps); + for (i = 0; i < n1; i++) { + tap = *coef; + coef++; + sample0 = *data; + data--; + a1 += (int64_t)tap * sample1; + a0 += (int64_t)tap * sample0; + sample1 = sample0; + } + + /* Part 2, un-wrap data, continue n2 times */ + n2 = taps - n1; + data = &fir->delay[length - 1]; + for (i = 0; i < n2; i++) { + tap = *coef; + coef++; + sample0 = *data; + data--; + a1 += (int64_t)tap * sample1; + a0 += (int64_t)tap * sample0; + sample1 = sample0; + } + + /* Q2.46 -> Q2.31, saturate to Q1.31 */ + *y0 = sat_int32(a0 >> shift); + *y1 = sat_int32(a1 >> shift); +} + #endif