Audio: TDFB: Optimize pass-through MCPS for beamformer

This patch adds use of dedicated pass-through functions to save MCPS. The pass-through is used when ALSA beam control is to off. The earlier pass-through mode was made with a minimum length set channels filters for the FIR filter bank. In TGL platform with 4 to 2 channels beamformer the patch saves 18 MCPS, from 27 to 9 MCPS. With 2 to 2 channels beamformer the patch saves 14 MCPS, from 16 to 2 MCPS. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
2024-05-20 20:00:53 +03:00 · 2024-05-20 20:00:53 +03:00 · 0b6cf0250e
parent e2dbe90358
commit 0b6cf0250e
1 changed files with 201 additions and 4 deletions
--- a/src/audio/tdfb/tdfb.c
+++ b/src/audio/tdfb/tdfb.c
@ -79,6 +79,187 @@ static inline int set_func(struct processing_module *mod, enum sof_ipc_frame fmt
 	return 0;
 }

+/*
+ * Pass-through processing functions
+ */
+
+static void tdfb_pass_same_format(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
+				  struct output_stream_buffer *bsink, int frames)
+{
+	struct audio_stream *source = bsource->data;
+	struct audio_stream *sink = bsink->data;
+
+	audio_stream_copy(source, 0, sink, 0, frames * audio_stream_get_channels(source));
+}
+
+#if CONFIG_FORMAT_S16LE
+static void tdfb_pass_s16(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
+			  struct output_stream_buffer *bsink, int frames)
+{
+	struct audio_stream *source = bsource->data;
+	struct audio_stream *sink = bsink->data;
+	int16_t *x = audio_stream_get_rptr(source);
+	int16_t *y = audio_stream_get_wptr(sink);
+	int16_t s0, s1;
+	int fmax;
+	int is, om;
+	int f, i, j, k;
+	const int in_nch = audio_stream_get_channels(source);
+	const int out_nch = audio_stream_get_channels(sink);
+	const int num_filters = cd->config->num_filters;
+	int remaining_frames = frames;
+
+	while (remaining_frames) {
+		fmax = audio_stream_frames_without_wrap(source, x);
+		f = MIN(remaining_frames, fmax);
+		fmax = audio_stream_frames_without_wrap(sink, y);
+		f = MIN(f, fmax);
+		for (j = 0; j < f; j += 2) {
+			for (i = 0; i < num_filters; i++) {
+				is = cd->input_channel_select[i];
+				om = cd->output_channel_mix[i];
+				s0 = x[is];
+				s1 = x[is + in_nch];
+				for (k = 0; k < out_nch; k++) {
+					if (om & 1) {
+						y[k] = s0;
+						y[k + out_nch] = s1;
+					}
+					om = om >> 1;
+				}
+			}
+			x += 2 * in_nch;
+			y += 2 * out_nch;
+		}
+		remaining_frames -= f;
+		x = audio_stream_wrap(source, x);
+		y = audio_stream_wrap(sink, y);
+	}
+}
+#endif
+
+#if CONFIG_FORMAT_S24LE
+static void tdfb_pass_s24(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
+			  struct output_stream_buffer *bsink, int frames)
+{
+	struct audio_stream *source = bsource->data;
+	struct audio_stream *sink = bsink->data;
+	int32_t *x = audio_stream_get_rptr(source);
+	int32_t *y = audio_stream_get_wptr(sink);
+	int32_t s0, s1;
+	int fmax;
+	int is, om;
+	int f, i, j, k;
+	const int in_nch = audio_stream_get_channels(source);
+	const int out_nch = audio_stream_get_channels(sink);
+	const int num_filters = cd->config->num_filters;
+	int remaining_frames = frames;
+
+	while (remaining_frames) {
+		fmax = audio_stream_frames_without_wrap(source, x);
+		f = MIN(remaining_frames, fmax);
+		fmax = audio_stream_frames_without_wrap(sink, y);
+		f = MIN(f, fmax);
+		for (j = 0; j < f; j += 2) {
+			for (i = 0; i < num_filters; i++) {
+				is = cd->input_channel_select[i];
+				om = cd->output_channel_mix[i];
+				s0 = x[is];
+				s1 = x[is + in_nch];
+				for (k = 0; k < out_nch; k++) {
+					if (om & 1) {
+						y[k] = s0;
+						y[k + out_nch] = s1;
+					}
+					om = om >> 1;
+				}
+			}
+			x += 2 * in_nch;
+			y += 2 * out_nch;
+		}
+		remaining_frames -= f;
+		x = audio_stream_wrap(source, x);
+		y = audio_stream_wrap(sink, y);
+	}
+}
+#endif
+
+#if CONFIG_FORMAT_S32LE
+static void tdfb_pass_s32(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
+			  struct output_stream_buffer *bsink, int frames)
+{
+	struct audio_stream *source = bsource->data;
+	struct audio_stream *sink = bsink->data;
+	int32_t *x = audio_stream_get_rptr(source);
+	int32_t *y = audio_stream_get_wptr(sink);
+	int32_t s0, s1;
+	int fmax;
+	int is, om;
+	int f, i, j, k;
+	const int in_nch = audio_stream_get_channels(source);
+	const int out_nch = audio_stream_get_channels(sink);
+	const int num_filters = cd->config->num_filters;
+	int remaining_frames = frames;
+
+	while (remaining_frames) {
+		fmax = audio_stream_frames_without_wrap(source, x);
+		f = MIN(remaining_frames, fmax);
+		fmax = audio_stream_frames_without_wrap(sink, y);
+		f = MIN(f, fmax);
+		for (j = 0; j < f; j += 2) {
+			for (i = 0; i < num_filters; i++) {
+				is = cd->input_channel_select[i];
+				om = cd->output_channel_mix[i];
+				s0 = x[is];
+				s1 = x[is + in_nch];
+				for (k = 0; k < out_nch; k++) {
+					if (om & 1) {
+						y[k] = s0;
+						y[k + out_nch] = s1;
+					}
+					om = om >> 1;
+				}
+			}
+			x += 2 * in_nch;
+			y += 2 * out_nch;
+		}
+		remaining_frames -= f;
+		x = audio_stream_wrap(source, x);
+		y = audio_stream_wrap(sink, y);
+	}
+}
+#endif
+
+static inline int set_pass_func(struct processing_module *mod, enum sof_ipc_frame fmt)
+{
+	struct tdfb_comp_data *cd = module_get_private_data(mod);
+
+	switch (fmt) {
+#if CONFIG_FORMAT_S16LE
+	case SOF_IPC_FRAME_S16_LE:
+		comp_dbg(mod->dev, "set_pass_func(), SOF_IPC_FRAME_S16_LE");
+		cd->tdfb_func = tdfb_pass_s16;
+		break;
+#endif /* CONFIG_FORMAT_S16LE */
+#if CONFIG_FORMAT_S24LE
+	case SOF_IPC_FRAME_S24_4LE:
+		comp_dbg(mod->dev, "set_pass_func(), SOF_IPC_FRAME_S24_4LE");
+		cd->tdfb_func = tdfb_pass_s24;
+		break;
+#endif /* CONFIG_FORMAT_S24LE */
+#if CONFIG_FORMAT_S32LE
+	case SOF_IPC_FRAME_S32_LE:
+		comp_dbg(mod->dev, "set_pass_func(), SOF_IPC_FRAME_S32_LE");
+		cd->tdfb_func = tdfb_pass_s32;
+		break;
+#endif /* CONFIG_FORMAT_S32LE */
+	default:
+		comp_err(mod->dev, "set_pass_func(), invalid frame_fmt");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
 * Control code functions next. The processing is in fir_ C modules.
 */
@ -301,11 +482,25 @@ static void tdfb_init_delay(struct tdfb_comp_data *cd)
 	}
 }

-static int tdfb_setup(struct processing_module *mod, int source_nch, int sink_nch)
+static int tdfb_setup(struct processing_module *mod, int source_nch, int sink_nch,
+		      enum sof_ipc_frame fmt)
 {
 	struct tdfb_comp_data *cd = module_get_private_data(mod);
 	int delay_size;

+	/* If beam on, restore processing function. If off, use for same source and
+	 * sink format the efficient 1:1 copy, otherwise faster pass-through processing
+	 * functions those copy selected source channels to selected sink channels.
+	 */
+	if (cd->beam_on) {
+		set_func(mod, fmt);
+	} else {
+		if (source_nch == sink_nch)
+			cd->tdfb_func = tdfb_pass_same_format;
+		else
+			set_pass_func(mod, fmt);
+	}
+
 	/* Set coefficients for each channel from coefficient blob */
 	delay_size = tdfb_init_coef(mod, source_nch, sink_nch);
 	if (delay_size < 0)
@ -468,7 +663,8 @@ static int tdfb_process(struct processing_module *mod,
 	if (comp_is_new_data_blob_available(cd->model_handler)) {
 		cd->config = comp_get_data_blob(cd->model_handler, NULL, NULL);
 		ret = tdfb_setup(mod, audio_stream_get_channels(source),
-				 audio_stream_get_channels(sink));
+				 audio_stream_get_channels(sink),
+				 audio_stream_get_frm_fmt(source));
 		if (ret < 0) {
 			comp_err(dev, "tdfb_process(), failed FIR setup");
 			return ret;
@ -479,7 +675,8 @@ static int tdfb_process(struct processing_module *mod,
 	if (cd->update) {
 		cd->update = false;
 		ret = tdfb_setup(mod, audio_stream_get_channels(source),
-				 audio_stream_get_channels(sink));
+				 audio_stream_get_channels(sink),
+				 audio_stream_get_frm_fmt(source));
 		if (ret < 0) {
 			comp_err(dev, "tdfb_process(), failed FIR setup");
 			return ret;
@ -560,7 +757,7 @@ static int tdfb_prepare(struct processing_module *mod,
 		goto out;
 	}

-	ret = tdfb_setup(mod, source_channels, sink_channels);
+	ret = tdfb_setup(mod, source_channels, sink_channels, frame_fmt);
 	if (ret < 0) {
 		comp_err(dev, "tdfb_prepare() error: tdfb_setup failed.");
 		goto out;