Audio: EQFIR: Optimize source and sink buffers use in generic C version

This patch replaces the audio stream read/write frag based access to source and sink by block processing based on audio_stream_bytes_without_wrap() bytes count. In a test with forced generic C for xtensa build processing load in original was 324 MCPS, in optimized 309 MCPS, saving is 15 MCPS. The load is nearly same for all formats s16/s24/s32. The base load was very high in test due to a very long used FIR filter. The MCPS saving should be the same for all stereo 48k streams. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
2022-02-09 18:09:44 +02:00 · 2022-02-09 18:09:44 +02:00 · 2b6001e065
parent d0057a7def
commit 2b6001e065
2 changed files with 72 additions and 41 deletions
--- a/src/audio/eq_fir/eq_fir_generic.c
+++ b/src/audio/eq_fir/eq_fir_generic.c
@ -21,23 +21,32 @@ void eq_fir_s16(struct fir_state_32x16 fir[], const struct audio_stream *source,
 		struct audio_stream *sink, int frames, int nch)
 {
 	struct fir_state_32x16 *filter;
-	int16_t *x;
-	int16_t *y;
 	int32_t z;
-	int idx;
-	int ch;
-	int i;
+	int16_t *x0, *y0;
+	int16_t *x = source->r_ptr;
+	int16_t *y = sink->w_ptr;
+	int nmax, n, i, j;
+	int remaining_samples = frames * nch;

-	for (ch = 0; ch < nch; ch++) {
-		filter = &fir[ch];
-		idx = ch;
-		for (i = 0; i < frames; i++) {
-			x = audio_stream_read_frag_s16(source, idx);
-			y = audio_stream_write_frag_s16(sink, idx);
-			z = fir_32x16(filter, *x << 16);
-			*y = sat_int16(Q_SHIFT_RND(z, 31, 15));
-			idx += nch;
+	while (remaining_samples) {
+		nmax = EQ_FIR_BYTES_TO_S16_SAMPLES(audio_stream_bytes_without_wrap(source, x));
+		n = MIN(remaining_samples, nmax);
+		nmax = EQ_FIR_BYTES_TO_S16_SAMPLES(audio_stream_bytes_without_wrap(sink, y));
+		n = MIN(n, nmax);
+		for (j = 0; j < nch; j++) {
+			x0 = x + j;
+			y0 = y + j;
+			filter = &fir[j];
+			for (i = 0; i < n; i += nch) {
+				z = fir_32x16(filter, *x0 << 16);
+				*y0 = sat_int16(Q_SHIFT_RND(z, 31, 15));
+				x0 += nch;
+				y0 += nch;
+			}
 		}
+		remaining_samples -= n;
+		x = audio_stream_wrap(source, x + n);
+		y = audio_stream_wrap(sink, y + n);
 	}
 }
 #endif /* CONFIG_FORMAT_S16LE */
@ -47,23 +56,32 @@ void eq_fir_s24(struct fir_state_32x16 fir[], const struct audio_stream *source,
 		struct audio_stream *sink, int frames, int nch)
 {
 	struct fir_state_32x16 *filter;
-	int32_t *x;
-	int32_t *y;
 	int32_t z;
-	int idx;
-	int ch;
-	int i;
+	int32_t *x0, *y0;
+	int32_t *x = source->r_ptr;
+	int32_t *y = sink->w_ptr;
+	int nmax, n, i, j;
+	int remaining_samples = frames * nch;

-	for (ch = 0; ch < nch; ch++) {
-		filter = &fir[ch];
-		idx = ch;
-		for (i = 0; i < frames; i++) {
-			x = audio_stream_read_frag_s32(source, idx);
-			y = audio_stream_write_frag_s32(sink, idx);
-			z = fir_32x16(filter, *x << 8);
-			*y = sat_int24(Q_SHIFT_RND(z, 31, 23));
-			idx += nch;
+	while (remaining_samples) {
+		nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(source, x));
+		n = MIN(remaining_samples, nmax);
+		nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(sink, y));
+		n = MIN(n, nmax);
+		for (j = 0; j < nch; j++) {
+			x0 = x + j;
+			y0 = y + j;
+			filter = &fir[j];
+			for (i = 0; i < n; i += nch) {
+				z = fir_32x16(filter, *x0 << 8);
+				*y0 = sat_int24(Q_SHIFT_RND(z, 31, 23));
+				x0 += nch;
+				y0 += nch;
+			}
 		}
+		remaining_samples -= n;
+		x = audio_stream_wrap(source, x + n);
+		y = audio_stream_wrap(sink, y + n);
 	}
 }
 #endif /* CONFIG_FORMAT_S24LE */
@ -73,21 +91,30 @@ void eq_fir_s32(struct fir_state_32x16 fir[], const struct audio_stream *source,
 		struct audio_stream *sink, int frames, int nch)
 {
 	struct fir_state_32x16 *filter;
-	int32_t *x;
-	int32_t *y;
-	int idx;
-	int ch;
-	int i;
+	int32_t *x0, *y0;
+	int32_t *x = source->r_ptr;
+	int32_t *y = sink->w_ptr;
+	int nmax, n, i, j;
+	int remaining_samples = frames * nch;

-	for (ch = 0; ch < nch; ch++) {
-		filter = &fir[ch];
-		idx = ch;
-		for (i = 0; i < frames; i++) {
-			x = audio_stream_read_frag_s32(source, idx);
-			y = audio_stream_write_frag_s32(sink, idx);
-			*y = fir_32x16(filter, *x);
-			idx += nch;
+	while (remaining_samples) {
+		nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(source, x));
+		n = MIN(remaining_samples, nmax);
+		nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(sink, y));
+		n = MIN(n, nmax);
+		for (j = 0; j < nch; j++) {
+			x0 = x + j;
+			y0 = y + j;
+			filter = &fir[j];
+			for (i = 0; i < n; i += nch) {
+				*y0 = fir_32x16(filter, *x0);
+				x0 += nch;
+				y0 += nch;
+			}
 		}
+		remaining_samples -= n;
+		x = audio_stream_wrap(source, x + n);
+		y = audio_stream_wrap(sink, y + n);
 	}
 }
 #endif /* CONFIG_FORMAT_S32LE */
--- a/src/include/sof/audio/eq_fir/eq_fir.h
+++ b/src/include/sof/audio/eq_fir/eq_fir.h
@ -23,6 +23,10 @@
 #include <user/fir.h>
 #include <stdint.h>

+/** \brief Macros to convert without division bytes count to samples count */
+#define EQ_FIR_BYTES_TO_S16_SAMPLES(b)	((b) >> 1)
+#define EQ_FIR_BYTES_TO_S32_SAMPLES(b)	((b) >> 2)
+
 #if CONFIG_FORMAT_S16LE
 void eq_fir_s16(struct fir_state_32x16 *fir, const struct audio_stream *source,
 		struct audio_stream *sink, int frames, int nch);