Audio: TDFB: Optimize pass-through MCPS for beamformer

This patch adds use of dedicated pass-through functions to save MCPS. The pass-through is used when ALSA beam control is to off. The earlier pass-through mode was made with a minimum length set channels filters for the FIR filter bank. In TGL platform with 4 to 2 channels beamformer the patch saves 18 MCPS, from 27 to 9 MCPS. With 2 to 2 channels beamformer the patch saves 14 MCPS, from 16 to 2 MCPS. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
2024-05-20 20:00:53 +03:00 · 2024-05-20 20:00:53 +03:00 · 0b6cf0250e
parent e2dbe90358
commit 0b6cf0250e
1 changed files with 201 additions and 4 deletions
--- a/src/audio/tdfb/tdfb.c
+++ b/src/audio/tdfb/tdfb.c
@ -79,6 +79,187 @@ static inline int set_func(struct processing_module *mod, enum sof_ipc_frame fmt
 	return 0;
 }
 /*
 * Pass-through processing functions
 */
 static void tdfb_pass_same_format(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
 				  struct output_stream_buffer *bsink, int frames)
 {
 	struct audio_stream *source = bsource->data;
 	struct audio_stream *sink = bsink->data;
 	audio_stream_copy(source, 0, sink, 0, frames * audio_stream_get_channels(source));
 }
 #if CONFIG_FORMAT_S16LE
 static void tdfb_pass_s16(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
 			  struct output_stream_buffer *bsink, int frames)
 {
 	struct audio_stream *source = bsource->data;
 	struct audio_stream *sink = bsink->data;
 	int16_t *x = audio_stream_get_rptr(source);
 	int16_t *y = audio_stream_get_wptr(sink);
 	int16_t s0, s1;
 	int fmax;
 	int is, om;
 	int f, i, j, k;
 	const int in_nch = audio_stream_get_channels(source);
 	const int out_nch = audio_stream_get_channels(sink);
 	const int num_filters = cd->config->num_filters;
 	int remaining_frames = frames;
 	while (remaining_frames) {
 		fmax = audio_stream_frames_without_wrap(source, x);
 		f = MIN(remaining_frames, fmax);
 		fmax = audio_stream_frames_without_wrap(sink, y);
 		f = MIN(f, fmax);
 		for (j = 0; j < f; j += 2) {
 			for (i = 0; i < num_filters; i++) {
 				is = cd->input_channel_select[i];
 				om = cd->output_channel_mix[i];
 				s0 = x[is];
 				s1 = x[is + in_nch];
 				for (k = 0; k < out_nch; k++) {
 					if (om & 1) {
 						y[k] = s0;
 						y[k + out_nch] = s1;
 					}
 					om = om >> 1;
 				}
 			}
 			x += 2 * in_nch;
 			y += 2 * out_nch;
 		}
 		remaining_frames -= f;
 		x = audio_stream_wrap(source, x);
 		y = audio_stream_wrap(sink, y);
 	}
 }
 #endif
 #if CONFIG_FORMAT_S24LE
 static void tdfb_pass_s24(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
 			  struct output_stream_buffer *bsink, int frames)
 {
 	struct audio_stream *source = bsource->data;
 	struct audio_stream *sink = bsink->data;
 	int32_t *x = audio_stream_get_rptr(source);
 	int32_t *y = audio_stream_get_wptr(sink);
 	int32_t s0, s1;
 	int fmax;
 	int is, om;
 	int f, i, j, k;
 	const int in_nch = audio_stream_get_channels(source);
 	const int out_nch = audio_stream_get_channels(sink);
 	const int num_filters = cd->config->num_filters;
 	int remaining_frames = frames;
 	while (remaining_frames) {
 		fmax = audio_stream_frames_without_wrap(source, x);
 		f = MIN(remaining_frames, fmax);
 		fmax = audio_stream_frames_without_wrap(sink, y);
 		f = MIN(f, fmax);
 		for (j = 0; j < f; j += 2) {
 			for (i = 0; i < num_filters; i++) {
 				is = cd->input_channel_select[i];
 				om = cd->output_channel_mix[i];
 				s0 = x[is];
 				s1 = x[is + in_nch];
 				for (k = 0; k < out_nch; k++) {
 					if (om & 1) {
 						y[k] = s0;
 						y[k + out_nch] = s1;
 					}
 					om = om >> 1;
 				}
 			}
 			x += 2 * in_nch;
 			y += 2 * out_nch;
 		}
 		remaining_frames -= f;
 		x = audio_stream_wrap(source, x);
 		y = audio_stream_wrap(sink, y);
 	}
 }
 #endif
 #if CONFIG_FORMAT_S32LE
 static void tdfb_pass_s32(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource,
 			  struct output_stream_buffer *bsink, int frames)
 {
 	struct audio_stream *source = bsource->data;
 	struct audio_stream *sink = bsink->data;
 	int32_t *x = audio_stream_get_rptr(source);
 	int32_t *y = audio_stream_get_wptr(sink);
 	int32_t s0, s1;
 	int fmax;
 	int is, om;
 	int f, i, j, k;
 	const int in_nch = audio_stream_get_channels(source);
 	const int out_nch = audio_stream_get_channels(sink);
 	const int num_filters = cd->config->num_filters;
 	int remaining_frames = frames;
 	while (remaining_frames) {
 		fmax = audio_stream_frames_without_wrap(source, x);
 		f = MIN(remaining_frames, fmax);
 		fmax = audio_stream_frames_without_wrap(sink, y);
 		f = MIN(f, fmax);
 		for (j = 0; j < f; j += 2) {
 			for (i = 0; i < num_filters; i++) {
 				is = cd->input_channel_select[i];
 				om = cd->output_channel_mix[i];
 				s0 = x[is];
 				s1 = x[is + in_nch];
 				for (k = 0; k < out_nch; k++) {
 					if (om & 1) {
 						y[k] = s0;
 						y[k + out_nch] = s1;
 					}
 					om = om >> 1;
 				}
 			}
 			x += 2 * in_nch;
 			y += 2 * out_nch;
 		}
 		remaining_frames -= f;
 		x = audio_stream_wrap(source, x);
 		y = audio_stream_wrap(sink, y);
 	}
 }
 #endif
 static inline int set_pass_func(struct processing_module *mod, enum sof_ipc_frame fmt)
 {
 	struct tdfb_comp_data *cd = module_get_private_data(mod);
 	switch (fmt) {
 #if CONFIG_FORMAT_S16LE
 	case SOF_IPC_FRAME_S16_LE:
 		comp_dbg(mod->dev, "set_pass_func(), SOF_IPC_FRAME_S16_LE");
 		cd->tdfb_func = tdfb_pass_s16;
 		break;
 #endif /* CONFIG_FORMAT_S16LE */
 #if CONFIG_FORMAT_S24LE
 	case SOF_IPC_FRAME_S24_4LE:
 		comp_dbg(mod->dev, "set_pass_func(), SOF_IPC_FRAME_S24_4LE");
 		cd->tdfb_func = tdfb_pass_s24;
 		break;
 #endif /* CONFIG_FORMAT_S24LE */
 #if CONFIG_FORMAT_S32LE
 	case SOF_IPC_FRAME_S32_LE:
 		comp_dbg(mod->dev, "set_pass_func(), SOF_IPC_FRAME_S32_LE");
 		cd->tdfb_func = tdfb_pass_s32;
 		break;
 #endif /* CONFIG_FORMAT_S32LE */
 	default:
 		comp_err(mod->dev, "set_pass_func(), invalid frame_fmt");
 		return -EINVAL;
 	}
 	return 0;
 }
 /*
 * Control code functions next. The processing is in fir_ C modules.
 */
@ -301,11 +482,25 @@ static void tdfb_init_delay(struct tdfb_comp_data *cd)
 	}
 }
-static int tdfb_setup(struct processing_module *mod, int source_nch, int sink_nch)
+static int tdfb_setup(struct processing_module *mod, int source_nch, int sink_nch,
 		      enum sof_ipc_frame fmt)
 {
 	struct tdfb_comp_data *cd = module_get_private_data(mod);
 	int delay_size;
 	/* If beam on, restore processing function. If off, use for same source and
 	 * sink format the efficient 1:1 copy, otherwise faster pass-through processing
 	 * functions those copy selected source channels to selected sink channels.
 	 */
 	if (cd->beam_on) {
 		set_func(mod, fmt);
 	} else {
 		if (source_nch == sink_nch)
 			cd->tdfb_func = tdfb_pass_same_format;
 		else
 			set_pass_func(mod, fmt);
 	}
 	/* Set coefficients for each channel from coefficient blob */
 	delay_size = tdfb_init_coef(mod, source_nch, sink_nch);
 	if (delay_size < 0)
@ -468,7 +663,8 @@ static int tdfb_process(struct processing_module *mod,
 	if (comp_is_new_data_blob_available(cd->model_handler)) {
 		cd->config = comp_get_data_blob(cd->model_handler, NULL, NULL);
 		ret = tdfb_setup(mod, audio_stream_get_channels(source),
-				 audio_stream_get_channels(sink));
+				 audio_stream_get_channels(sink),
 				 audio_stream_get_frm_fmt(source));
 		if (ret < 0) {
 			comp_err(dev, "tdfb_process(), failed FIR setup");
 			return ret;
@ -479,7 +675,8 @@ static int tdfb_process(struct processing_module *mod,
 	if (cd->update) {
 		cd->update = false;
 		ret = tdfb_setup(mod, audio_stream_get_channels(source),
-				 audio_stream_get_channels(sink));
+				 audio_stream_get_channels(sink),
 				 audio_stream_get_frm_fmt(source));
 		if (ret < 0) {
 			comp_err(dev, "tdfb_process(), failed FIR setup");
 			return ret;
@ -560,7 +757,7 @@ static int tdfb_prepare(struct processing_module *mod,
 		goto out;
 	}
-	ret = tdfb_setup(mod, source_channels, sink_channels);
+	ret = tdfb_setup(mod, source_channels, sink_channels, frame_fmt);
 	if (ret < 0) {
 		comp_err(dev, "tdfb_prepare() error: tdfb_setup failed.");
 		goto out;