From 802cffad0df810b25deebfa8595410b81f873eca Mon Sep 17 00:00:00 2001 From: Andrula Song Date: Mon, 6 Mar 2023 20:10:23 +0800 Subject: [PATCH] Audio: Dcblock: Add HiFi4 implementation of dcblock Add HiFi4 implementation of dcblock processing functions. Compared with generic C version, the 16 bit format can save about 54.8% cycles, and 53.1% for 24 bit format and 49.1% for 32 bit. Signed-off-by: Andrula Song --- src/audio/CMakeLists.txt | 2 +- src/audio/dcblock/CMakeLists.txt | 1 + src/audio/dcblock/dcblock_generic.c | 3 + src/audio/dcblock/dcblock_hifi4.c | 170 ++++++++++++++++++++++++ src/include/sof/audio/dcblock/dcblock.h | 13 ++ zephyr/CMakeLists.txt | 1 + 6 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 src/audio/dcblock/dcblock_hifi4.c diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt index 59b1d7d16..414ee88a1 100644 --- a/src/audio/CMakeLists.txt +++ b/src/audio/CMakeLists.txt @@ -172,7 +172,7 @@ set(src_sources src/src.c src/src_generic.c) set(asrc_sources asrc/asrc.c asrc/asrc_farrow.c asrc/asrc_farrow_generic.c) set(eq-fir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_fir/eq_fir.c eq_fir/eq_fir_generic.c) set(eq-iir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_iir/eq_iir.c) -set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c) +set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c dcblock/dcblock_hifi4.c) set(crossover_sources crossover/crossover.c crossover/crossover_generic.c) set(tdfb_sources tdfb/tdfb.c tdfb/tdfb_generic.c tdfb/tdfb_direction.c) set(drc_sources drc/drc.c drc/drc_generic.c drc/drc_math_generic.c) diff --git a/src/audio/dcblock/CMakeLists.txt b/src/audio/dcblock/CMakeLists.txt index b3abaf0aa..5655330ab 100644 --- a/src/audio/dcblock/CMakeLists.txt +++ b/src/audio/dcblock/CMakeLists.txt @@ -1,2 +1,3 @@ add_local_sources(sof dcblock.c) add_local_sources(sof dcblock_generic.c) +add_local_sources(sof dcblock_hifi4.c) diff --git a/src/audio/dcblock/dcblock_generic.c b/src/audio/dcblock/dcblock_generic.c index 212f3459b..b821bb7d5 100644 --- a/src/audio/dcblock/dcblock_generic.c +++ b/src/audio/dcblock/dcblock_generic.c @@ -9,6 +9,8 @@ #include #include +#ifdef DCBLOCK_GENERIC + LOG_MODULE_DECLARE(dcblock, CONFIG_SOF_LOG_LEVEL); /** @@ -166,3 +168,4 @@ const struct dcblock_func_map dcblock_fnmap[] = { }; const size_t dcblock_fncount = ARRAY_SIZE(dcblock_fnmap); +#endif diff --git a/src/audio/dcblock/dcblock_hifi4.c b/src/audio/dcblock/dcblock_hifi4.c new file mode 100644 index 000000000..8ba7fb87c --- /dev/null +++ b/src/audio/dcblock/dcblock_hifi4.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Andrula Song + +#include +#include +#include +#include + +#ifdef DCBLOCK_HIFI4 + +#include +LOG_MODULE_DECLARE(dcblock, CONFIG_SOF_LOG_LEVEL); + +static inline ae_int32x2 dcblock_cal(ae_int32x2 R, ae_int32x2 state_x, ae_int32x2 state_y, + ae_int32x2 sample) +{ + ae_int64 out, temp; + + /* R: Q2.30, y_prev: Q1.31 the result is Q2.62 */ + temp = AE_MULF32S_LL(R, state_y); + out = AE_SUB64(AE_MOVAD32_L(sample), AE_MOVAD32_L(state_x)); + /* shift out to 2.62 */ + out = AE_ADD64S(AE_SLAI64S(out, 31), temp); + /* shift out to 1.63 */ + return AE_ROUND32F64SSYM(AE_SLAI64S(out, 1)); +} + +/* Setup circular for component sink and source */ +static inline void dcblock_set_circular(const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink) +{ + /* Set source as circular buffer 0 */ + AE_SETCBEGIN0(source->addr); + AE_SETCEND0(source->end_addr); + + /* Set sink as circular buffer 1 */ + AE_SETCBEGIN1(sink->addr); + AE_SETCEND1(sink->end_addr); +} + +#if CONFIG_FORMAT_S16LE +static void dcblock_s16_default(const struct comp_dev *dev, + const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink, + uint32_t frames) +{ + struct comp_data *cd = comp_get_drvdata(dev); + ae_int16 *in; + ae_int16 *out; + ae_int32x2 R, state_x, state_y, sample; + ae_int16x4 in_sample, out_sample; + int ch, i; + int nch = source->channels; + const int inc = nch * sizeof(ae_int16); + + dcblock_set_circular(source, sink); + for (ch = 0; ch < nch; ch++) { + in = (ae_int16 *)source->r_ptr + ch; + out = (ae_int16 *)sink->w_ptr + ch; + state_x = cd->state[ch].x_prev; + state_y = cd->state[ch].y_prev; + R = cd->R_coeffs[ch]; + for (i = 0; i < frames; i++) { + /* Load a 16 bit sample*/ + AE_L16_XC(in_sample, in, inc); + /* store the 16 bit sample to high 16bit of 32bit register*/ + sample = AE_CVT32X2F16_32(in_sample); + state_y = dcblock_cal(R, state_x, state_y, sample); + state_x = sample; + out_sample = AE_ROUND16X4F32SSYM(state_y, state_y); + AE_S16_0_XC1(out_sample, out, inc); + } + cd->state[ch].x_prev = state_x; + cd->state[ch].y_prev = state_y; + } +} +#endif /* CONFIG_FORMAT_S16LE */ + +#if CONFIG_FORMAT_S24LE +static void dcblock_s24_default(const struct comp_dev *dev, + const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink, + uint32_t frames) +{ + struct comp_data *cd = comp_get_drvdata(dev); + ae_int32 *in; + ae_int32 *out; + ae_int32x2 R, state_x, state_y; + ae_int32x2 in_sample, out_sample; + int ch, i; + int nch = source->channels; + const int inc = nch * sizeof(ae_int32); + + dcblock_set_circular(source, sink); + for (ch = 0; ch < nch; ch++) { + in = (ae_int32 *)source->r_ptr + ch; + out = (ae_int32 *)sink->w_ptr + ch; + + state_x = cd->state[ch].x_prev; + state_y = cd->state[ch].y_prev; + R = cd->R_coeffs[ch]; + for (i = 0; i < frames; i++) { + AE_L32_XC(in_sample, in, inc); + in_sample = AE_SLAI32(in_sample, 8); + state_y = dcblock_cal(R, state_x, state_y, in_sample); + state_x = in_sample; + out_sample = AE_SRAI32R(state_y, 8); + out_sample = AE_SLAI32S(out_sample, 8); + out_sample = AE_SRAI32R(out_sample, 8); + AE_S32_L_XC1(out_sample, out, inc); + } + cd->state[ch].x_prev = state_x; + cd->state[ch].y_prev = state_y; + } +} +#endif /* CONFIG_FORMAT_S24LE */ + +#if CONFIG_FORMAT_S32LE +static void dcblock_s32_default(const struct comp_dev *dev, + const struct audio_stream __sparse_cache *source, + const struct audio_stream __sparse_cache *sink, + uint32_t frames) +{ + struct comp_data *cd = comp_get_drvdata(dev); + ae_int32 *in; + ae_int32 *out; + ae_int32x2 R, state_x, state_y; + ae_int32x2 in_sample; + int ch, i; + int nch = source->channels; + const int inc = nch * sizeof(ae_int32); + + dcblock_set_circular(source, sink); + for (ch = 0; ch < nch; ch++) { + in = (ae_int32 *)source->r_ptr + ch; + out = (ae_int32 *)sink->w_ptr + ch; + + state_x = cd->state[ch].x_prev; + state_y = cd->state[ch].y_prev; + R = cd->R_coeffs[ch]; + for (i = 0; i < frames; i++) { + AE_L32_XC(in_sample, in, inc); + state_y = dcblock_cal(R, state_x, state_y, in_sample); + state_x = in_sample; + AE_S32_L_XC1(state_y, out, inc); + } + cd->state[ch].x_prev = state_x; + cd->state[ch].y_prev = state_y; + } +} +#endif /* CONFIG_FORMAT_S32LE */ + +const struct dcblock_func_map dcblock_fnmap[] = { +/* { SOURCE_FORMAT , PROCESSING FUNCTION } */ +#if CONFIG_FORMAT_S16LE + { SOF_IPC_FRAME_S16_LE, dcblock_s16_default }, +#endif /* CONFIG_FORMAT_S16LE */ +#if CONFIG_FORMAT_S24LE + { SOF_IPC_FRAME_S24_4LE, dcblock_s24_default }, +#endif /* CONFIG_FORMAT_S24LE */ +#if CONFIG_FORMAT_S32LE + { SOF_IPC_FRAME_S32_LE, dcblock_s32_default }, +#endif /* CONFIG_FORMAT_S32LE */ +}; + +const size_t dcblock_fncount = ARRAY_SIZE(dcblock_fnmap); +#endif diff --git a/src/include/sof/audio/dcblock/dcblock.h b/src/include/sof/audio/dcblock/dcblock.h index 6963284ff..74dc5b8b5 100644 --- a/src/include/sof/audio/dcblock/dcblock.h +++ b/src/include/sof/audio/dcblock/dcblock.h @@ -11,6 +11,19 @@ #include #include #include +#include + +/* __XCC__ is both for xt_xcc and xt_clang */ +#if defined(__XCC__) +# include +# if XCHAL_HAVE_HIFI4 +# define DCBLOCK_HIFI4 +# else +# define DCBLOCK_GENERIC +# endif +#else +# define DCBLOCK_GENERIC +#endif struct audio_stream; struct comp_dev; diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt index 8ee2b4915..904a1cefa 100644 --- a/zephyr/CMakeLists.txt +++ b/zephyr/CMakeLists.txt @@ -463,6 +463,7 @@ zephyr_library_sources_ifdef(CONFIG_COMP_ASRC zephyr_library_sources_ifdef(CONFIG_COMP_DCBLOCK ${SOF_AUDIO_PATH}/dcblock/dcblock_generic.c ${SOF_AUDIO_PATH}/dcblock/dcblock.c + ${SOF_AUDIO_PATH}/dcblock/dcblock_hifi4.c ) zephyr_library_sources_ifdef(CONFIG_COMP_SEL