EQ FIR: Small code optimization for gcc build

This patch improves a bit (about 1.3%) the execution speed with a simpler pointer arithmetic based FIR filter core. The comments about used 32 bit fractional format are updated (Q1.31), earlier ones were old assumptions for SOF. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
2019-12-18 20:58:06 +02:00 · 2019-12-18 20:58:06 +02:00 · a5cc4df67b
parent 234c84abac
commit a5cc4df67b
1 changed files with 34 additions and 36 deletions
--- a/src/include/sof/audio/eq_fir/fir.h
+++ b/src/include/sof/audio/eq_fir/fir.h
@ -53,62 +53,60 @@ void eq_fir_s32(struct fir_state_32x16 *fir, struct comp_buffer *source,
 /* The next functions are inlined to optmize execution speed */
 static inline void fir_part_32x16(int64_t *y, int taps, const int16_t c[],
 				  int *ic, int32_t d[], int *id)
 {
 	int n;
 	/* Data is Q8.24, coef is Q1.15, product is Q9.39 */
 	for (n = 0; n < taps; n++) {
 		*y += (int64_t)c[*ic] * d[*id];
 		(*ic)++;
 		(*id)--;
 	}
 }
 static inline int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x)
 {
 	int64_t y = 0;
 	int32_t *data = &fir->delay[fir->rwi];
 	int16_t *coef = &fir->coef[0];
 	int n1;
 	int n2;
-	int i = 0; /* Start from 1st tap */
+	int n;
 	int tmp_ri;
 	/* Bypass is set with length set to zero. */
 	if (!fir->length)
 		return x;
 	/* Write sample to delay */
-	fir->delay[fir->rwi] = x;
+	*data = x;
-	/* Start FIR calculation. Calculate first number of taps possible to
+	/* Advance write pointer and calculate into n1 max. number of taps
-	 * calculate before circular wrap need.
+	 * to process before circular wrap.
 	 */
-	n1 = fir->rwi + 1;
+	n1 = ++fir->rwi;
 	/* Point to newest sample and advance read index */
 	tmp_ri = (fir->rwi)++;
 	if (fir->rwi == fir->length)
 		fir->rwi = 0;
 	/* Check if no need to un-wrap FIR data. */
 	if (n1 > fir->length) {
-		/* No need to un-wrap fir read index, make sure ri
+		/* Data is Q1.31, coef is Q1.15, product is Q2.46 */
-		 * is >= 0 after FIR computation.
+		for (n = 0; n < fir->length; n++) {
-		 */
+			y += (int64_t)(*coef) * (*data);
-		fir_part_32x16(&y, fir->length, fir->coef, &i, fir->delay,
+			coef++;
-			       &tmp_ri);
+			data--;
-	} else {
+		}
 		n2 = fir->length - n1;
 		/* Part 1, loop n1 times, fir_ri becomes -1 */
 		fir_part_32x16(&y, n1, fir->coef, &i, fir->delay, &tmp_ri);
-		/* Part 2, unwrap fir_ri, continue rest of filter */
+		/* Q2.46 -> Q2.31, saturate to Q1.31 */
-		tmp_ri = fir->length - 1;
+		return sat_int32(y >> (15 + fir->out_shift));
 		fir_part_32x16(&y, n2, fir->coef, &i, fir->delay, &tmp_ri);
 	}
 	/* Q9.39 -> Q9.24, saturate to Q8.24 */
 	y = sat_int32(y >> (15 + fir->out_shift));
-	return (int32_t)y;
+	/* Part 1, loop n1 times */
 	for (n = 0; n < n1; n++) {
 		y += (int64_t)(*coef) * (*data);
 		coef++;
 		data--;
 	}
 	/* Part 2, un-wrap data, continue n2 times */
 	n2 = fir->length - n1;
 	data = &fir->delay[fir->length - 1];
 	for (n = 0; n < n2; n++) {
 		y += (int64_t)(*coef) * (*data);
 		coef++;
 		data--;
 	}
 	/* Q2.46 -> Q2.31, saturate to Q1.31 */
 	return sat_int32(y >> (15 + fir->out_shift));
 }
 #endif