Audio: Dcblock: Add HiFi4 implementation of dcblock

Add HiFi4 implementation of dcblock processing functions.
Compared with generic C version, the 16 bit format can save
about 54.8% cycles, and 53.1% for 24 bit format and 49.1%
for 32 bit.

Signed-off-by: Andrula Song <andrula.song@intel.com>
This commit is contained in:
Andrula Song 2023-03-06 20:10:23 +08:00 committed by Kai Vehmanen
parent 29f3ac656b
commit 802cffad0d
6 changed files with 189 additions and 1 deletions

View File

@ -172,7 +172,7 @@ set(src_sources src/src.c src/src_generic.c)
set(asrc_sources asrc/asrc.c asrc/asrc_farrow.c asrc/asrc_farrow_generic.c)
set(eq-fir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_fir/eq_fir.c eq_fir/eq_fir_generic.c)
set(eq-iir_sources module_adapter/module_adapter.c module_adapter/module/generic.c eq_iir/eq_iir.c)
set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c)
set(dcblock_sources dcblock/dcblock.c dcblock/dcblock_generic.c dcblock/dcblock_hifi4.c)
set(crossover_sources crossover/crossover.c crossover/crossover_generic.c)
set(tdfb_sources tdfb/tdfb.c tdfb/tdfb_generic.c tdfb/tdfb_direction.c)
set(drc_sources drc/drc.c drc/drc_generic.c drc/drc_math_generic.c)

View File

@ -1,2 +1,3 @@
add_local_sources(sof dcblock.c)
add_local_sources(sof dcblock_generic.c)
add_local_sources(sof dcblock_hifi4.c)

View File

@ -9,6 +9,8 @@
#include <sof/audio/format.h>
#include <sof/audio/dcblock/dcblock.h>
#ifdef DCBLOCK_GENERIC
LOG_MODULE_DECLARE(dcblock, CONFIG_SOF_LOG_LEVEL);
/**
@ -166,3 +168,4 @@ const struct dcblock_func_map dcblock_fnmap[] = {
};
const size_t dcblock_fncount = ARRAY_SIZE(dcblock_fnmap);
#endif

View File

@ -0,0 +1,170 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2022 Intel Corporation. All rights reserved.
//
// Author: Andrula Song <andrula.song@intel.com>
#include <stdint.h>
#include <sof/audio/component.h>
#include <sof/audio/format.h>
#include <sof/audio/dcblock/dcblock.h>
#ifdef DCBLOCK_HIFI4
#include <xtensa/tie/xt_hifi4.h>
LOG_MODULE_DECLARE(dcblock, CONFIG_SOF_LOG_LEVEL);
static inline ae_int32x2 dcblock_cal(ae_int32x2 R, ae_int32x2 state_x, ae_int32x2 state_y,
ae_int32x2 sample)
{
ae_int64 out, temp;
/* R: Q2.30, y_prev: Q1.31 the result is Q2.62 */
temp = AE_MULF32S_LL(R, state_y);
out = AE_SUB64(AE_MOVAD32_L(sample), AE_MOVAD32_L(state_x));
/* shift out to 2.62 */
out = AE_ADD64S(AE_SLAI64S(out, 31), temp);
/* shift out to 1.63 */
return AE_ROUND32F64SSYM(AE_SLAI64S(out, 1));
}
/* Setup circular for component sink and source */
static inline void dcblock_set_circular(const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink)
{
/* Set source as circular buffer 0 */
AE_SETCBEGIN0(source->addr);
AE_SETCEND0(source->end_addr);
/* Set sink as circular buffer 1 */
AE_SETCBEGIN1(sink->addr);
AE_SETCEND1(sink->end_addr);
}
#if CONFIG_FORMAT_S16LE
static void dcblock_s16_default(const struct comp_dev *dev,
const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink,
uint32_t frames)
{
struct comp_data *cd = comp_get_drvdata(dev);
ae_int16 *in;
ae_int16 *out;
ae_int32x2 R, state_x, state_y, sample;
ae_int16x4 in_sample, out_sample;
int ch, i;
int nch = source->channels;
const int inc = nch * sizeof(ae_int16);
dcblock_set_circular(source, sink);
for (ch = 0; ch < nch; ch++) {
in = (ae_int16 *)source->r_ptr + ch;
out = (ae_int16 *)sink->w_ptr + ch;
state_x = cd->state[ch].x_prev;
state_y = cd->state[ch].y_prev;
R = cd->R_coeffs[ch];
for (i = 0; i < frames; i++) {
/* Load a 16 bit sample*/
AE_L16_XC(in_sample, in, inc);
/* store the 16 bit sample to high 16bit of 32bit register*/
sample = AE_CVT32X2F16_32(in_sample);
state_y = dcblock_cal(R, state_x, state_y, sample);
state_x = sample;
out_sample = AE_ROUND16X4F32SSYM(state_y, state_y);
AE_S16_0_XC1(out_sample, out, inc);
}
cd->state[ch].x_prev = state_x;
cd->state[ch].y_prev = state_y;
}
}
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
static void dcblock_s24_default(const struct comp_dev *dev,
const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink,
uint32_t frames)
{
struct comp_data *cd = comp_get_drvdata(dev);
ae_int32 *in;
ae_int32 *out;
ae_int32x2 R, state_x, state_y;
ae_int32x2 in_sample, out_sample;
int ch, i;
int nch = source->channels;
const int inc = nch * sizeof(ae_int32);
dcblock_set_circular(source, sink);
for (ch = 0; ch < nch; ch++) {
in = (ae_int32 *)source->r_ptr + ch;
out = (ae_int32 *)sink->w_ptr + ch;
state_x = cd->state[ch].x_prev;
state_y = cd->state[ch].y_prev;
R = cd->R_coeffs[ch];
for (i = 0; i < frames; i++) {
AE_L32_XC(in_sample, in, inc);
in_sample = AE_SLAI32(in_sample, 8);
state_y = dcblock_cal(R, state_x, state_y, in_sample);
state_x = in_sample;
out_sample = AE_SRAI32R(state_y, 8);
out_sample = AE_SLAI32S(out_sample, 8);
out_sample = AE_SRAI32R(out_sample, 8);
AE_S32_L_XC1(out_sample, out, inc);
}
cd->state[ch].x_prev = state_x;
cd->state[ch].y_prev = state_y;
}
}
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
static void dcblock_s32_default(const struct comp_dev *dev,
const struct audio_stream __sparse_cache *source,
const struct audio_stream __sparse_cache *sink,
uint32_t frames)
{
struct comp_data *cd = comp_get_drvdata(dev);
ae_int32 *in;
ae_int32 *out;
ae_int32x2 R, state_x, state_y;
ae_int32x2 in_sample;
int ch, i;
int nch = source->channels;
const int inc = nch * sizeof(ae_int32);
dcblock_set_circular(source, sink);
for (ch = 0; ch < nch; ch++) {
in = (ae_int32 *)source->r_ptr + ch;
out = (ae_int32 *)sink->w_ptr + ch;
state_x = cd->state[ch].x_prev;
state_y = cd->state[ch].y_prev;
R = cd->R_coeffs[ch];
for (i = 0; i < frames; i++) {
AE_L32_XC(in_sample, in, inc);
state_y = dcblock_cal(R, state_x, state_y, in_sample);
state_x = in_sample;
AE_S32_L_XC1(state_y, out, inc);
}
cd->state[ch].x_prev = state_x;
cd->state[ch].y_prev = state_y;
}
}
#endif /* CONFIG_FORMAT_S32LE */
const struct dcblock_func_map dcblock_fnmap[] = {
/* { SOURCE_FORMAT , PROCESSING FUNCTION } */
#if CONFIG_FORMAT_S16LE
{ SOF_IPC_FRAME_S16_LE, dcblock_s16_default },
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
{ SOF_IPC_FRAME_S24_4LE, dcblock_s24_default },
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
{ SOF_IPC_FRAME_S32_LE, dcblock_s32_default },
#endif /* CONFIG_FORMAT_S32LE */
};
const size_t dcblock_fncount = ARRAY_SIZE(dcblock_fnmap);
#endif

View File

@ -11,6 +11,19 @@
#include <stdint.h>
#include <sof/platform.h>
#include <ipc/stream.h>
#include <sof/compiler_info.h>
/* __XCC__ is both for xt_xcc and xt_clang */
#if defined(__XCC__)
# include <xtensa/config/core-isa.h>
# if XCHAL_HAVE_HIFI4
# define DCBLOCK_HIFI4
# else
# define DCBLOCK_GENERIC
# endif
#else
# define DCBLOCK_GENERIC
#endif
struct audio_stream;
struct comp_dev;

View File

@ -463,6 +463,7 @@ zephyr_library_sources_ifdef(CONFIG_COMP_ASRC
zephyr_library_sources_ifdef(CONFIG_COMP_DCBLOCK
${SOF_AUDIO_PATH}/dcblock/dcblock_generic.c
${SOF_AUDIO_PATH}/dcblock/dcblock.c
${SOF_AUDIO_PATH}/dcblock/dcblock_hifi4.c
)
zephyr_library_sources_ifdef(CONFIG_COMP_SEL