Audio: Mixer: Add hifi version processing functions for mixer

Add hifi3 & hifi4 version implementation of mixer processing functions.
The hifi version functions can save at least 47% cycles than C version.

Signed-off-by: Andrula Song <xiaoyuan.song@intel.com>
This commit is contained in:
Andrula Song 2022-08-24 10:36:00 +08:00 committed by Liam Girdwood
parent 16e3a6018e
commit 100144a1d0
8 changed files with 454 additions and 167 deletions

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
if(CONFIG_IPC_MAJOR_3)
set(mixer_src mixer.c)
set(mixer_src mixer/mixer.c mixer/mixer_generic.c mixer/mixer_hifi3.c)
elseif(CONFIG_IPC_MAJOR_4)
set(mixer_src mixin_mixout.c)
endif()

View File

@ -0,0 +1,2 @@
add_local_sources(sof mixer.c mixer_generic.c mixer_hifi3.c)

View File

@ -49,146 +49,6 @@ struct mixer_data {
uint32_t frames);
};
#if CONFIG_FORMAT_S16LE
/* Mix n 16 bit PCM source streams to one sink stream */
static void mix_n_s16(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
int16_t *src[PLATFORM_MAX_CHANNELS];
int16_t *dest;
int32_t val;
int nmax;
int i, j, n, ns;
int processed = 0;
int nch = sink->channels;
int samples = frames * nch;
dest = sink->w_ptr;
for (j = 0; j < num_sources; j++)
src[j] = sources[j]->r_ptr;
while (processed < samples) {
nmax = samples - processed;
n = audio_stream_bytes_without_wrap(sink, dest) >> 1; /* divide 2 */
n = MIN(n, nmax);
for (i = 0; i < num_sources; i++) {
ns = audio_stream_bytes_without_wrap(sources[i], src[i]) >> 1;
n = MIN(n, ns);
}
for (i = 0; i < n; i++) {
val = 0;
for (j = 0; j < num_sources; j++) {
val += *src[j];
src[j]++;
}
/* Saturate to 16 bits */
*dest = sat_int16(val);
dest++;
}
processed += n;
dest = audio_stream_wrap(sink, dest);
for (i = 0; i < num_sources; i++)
src[i] = audio_stream_wrap(sources[i], src[i]);
}
}
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
/* Mix n 24 bit PCM source streams to one sink stream */
static void mix_n_s24(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
int32_t *src[PLATFORM_MAX_CHANNELS];
int32_t *dest;
int32_t val;
int32_t x;
int nmax;
int i, j, n, ns;
int processed = 0;
int nch = sink->channels;
int samples = frames * nch;
dest = sink->w_ptr;
for (j = 0; j < num_sources; j++)
src[j] = sources[j]->r_ptr;
while (processed < samples) {
nmax = samples - processed;
n = audio_stream_bytes_without_wrap(sink, dest) >> 2; /* divide 4 */
n = MIN(n, nmax);
for (i = 0; i < num_sources; i++) {
ns = audio_stream_bytes_without_wrap(sources[i], src[i]) >> 2;
n = MIN(n, ns);
}
for (i = 0; i < n; i++) {
val = 0;
for (j = 0; j < num_sources; j++) {
x = *src[j] << 8;
val += x >> 8; /* Sign extend */
src[j]++;
}
/* Saturate to 24 bits */
*dest = sat_int24(val);
dest++;
}
processed += n;
dest = audio_stream_wrap(sink, dest);
for (i = 0; i < num_sources; i++)
src[i] = audio_stream_wrap(sources[i], src[i]);
}
}
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
/* Mix n 32 bit PCM source streams to one sink stream */
static void mix_n_s32(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
int32_t *src[PLATFORM_MAX_CHANNELS];
int32_t *dest;
int64_t val;
int nmax;
int i, j, n, ns;
int processed = 0;
int nch = sink->channels;
int samples = frames * nch;
dest = sink->w_ptr;
for (j = 0; j < num_sources; j++)
src[j] = sources[j]->r_ptr;
while (processed < samples) {
nmax = samples - processed;
n = audio_stream_bytes_without_wrap(sink, dest) >> 2; /* divide 4 */
n = MIN(n, nmax);
for (i = 0; i < num_sources; i++) {
ns = audio_stream_bytes_without_wrap(sources[i], src[i]) >> 2;
n = MIN(n, ns);
}
for (i = 0; i < n; i++) {
val = 0;
for (j = 0; j < num_sources; j++) {
val += *src[j];
src[j]++;
}
/* Saturate to 32 bits */
*dest = sat_int32(val);
dest++;
}
processed += n;
dest = audio_stream_wrap(sink, dest);
for (i = 0; i < num_sources; i++)
src[i] = audio_stream_wrap(sources[i], src[i]);
}
}
#endif /* CONFIG_FORMAT_S32LE */
static struct comp_dev *mixer_new(const struct comp_driver *drv,
struct comp_ipc_config *config,
void *spec)
@ -455,7 +315,6 @@ static int mixer_prepare_common(struct comp_dev *dev)
struct mixer_data *md = comp_get_drvdata(dev);
struct comp_buffer *sink;
struct comp_buffer __sparse_cache *sink_c;
enum sof_ipc_frame fmt;
int ret;
comp_dbg(dev, "mixer_prepare()");
@ -467,31 +326,9 @@ static int mixer_prepare_common(struct comp_dev *dev)
sink = list_first_item(&dev->bsink_list, struct comp_buffer,
source_list);
sink_c = buffer_acquire(sink);
fmt = sink_c->stream.frame_fmt;
md->mix_func = mixer_get_processing_function(dev, sink_c);
buffer_release(sink_c);
/* currently inactive so setup mixer */
switch (fmt) {
#if CONFIG_FORMAT_S16LE
case SOF_IPC_FRAME_S16_LE:
md->mix_func = mix_n_s16;
break;
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
case SOF_IPC_FRAME_S24_4LE:
md->mix_func = mix_n_s24;
break;
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
case SOF_IPC_FRAME_S32_LE:
md->mix_func = mix_n_s32;
break;
#endif /* CONFIG_FORMAT_S32LE */
default:
comp_err(dev, "unsupported data format");
return -EINVAL;
}
ret = comp_set_state(dev, COMP_TRIGGER_PREPARE);
if (ret < 0)
return ret;

View File

@ -0,0 +1,166 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2022 Intel Corporation. All rights reserved.
//
// Author: Andrula Song <xiaoyuan.song@intel.com>
#include <sof/audio/mixer.h>
#include <sof/common.h>
#ifdef MIXER_GENERIC
#if CONFIG_FORMAT_S16LE
/* Mix n 16 bit PCM source streams to one sink stream */
static void mix_n_s16(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
int16_t *src[PLATFORM_MAX_CHANNELS];
int16_t *dest;
int32_t val;
int nmax;
int i, j, n, ns;
int processed = 0;
int nch = sink->channels;
int samples = frames * nch;
dest = sink->w_ptr;
for (j = 0; j < num_sources; j++)
src[j] = sources[j]->r_ptr;
while (processed < samples) {
nmax = samples - processed;
n = audio_stream_samples_without_wrap_s16(sink, dest);
n = MIN(n, nmax);
for (i = 0; i < num_sources; i++) {
ns = audio_stream_samples_without_wrap_s16(sources[i], src[i]);
n = MIN(n, ns);
}
for (i = 0; i < n; i++) {
val = 0;
for (j = 0; j < num_sources; j++) {
val += *src[j];
src[j]++;
}
/* Saturate to 16 bits */
*dest = sat_int16(val);
dest++;
}
processed += n;
dest = audio_stream_wrap(sink, dest);
for (i = 0; i < num_sources; i++)
src[i] = audio_stream_wrap(sources[i], src[i]);
}
}
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
/* Mix n 24 bit PCM source streams to one sink stream */
static void mix_n_s24(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
int32_t *src[PLATFORM_MAX_CHANNELS];
int32_t *dest;
int32_t val;
int32_t x;
int nmax;
int i, j, n, ns;
int processed = 0;
int nch = sink->channels;
int samples = frames * nch;
dest = sink->w_ptr;
for (j = 0; j < num_sources; j++)
src[j] = sources[j]->r_ptr;
while (processed < samples) {
nmax = samples - processed;
n = audio_stream_samples_without_wrap_s24(sink, dest);
n = MIN(n, nmax);
for (i = 0; i < num_sources; i++) {
ns = audio_stream_samples_without_wrap_s24(sources[i], src[i]);
n = MIN(n, ns);
}
for (i = 0; i < n; i++) {
val = 0;
for (j = 0; j < num_sources; j++) {
x = *src[j] << 8;
val += x >> 8; /* Sign extend */
src[j]++;
}
/* Saturate to 24 bits */
*dest = sat_int24(val);
dest++;
}
processed += n;
dest = audio_stream_wrap(sink, dest);
for (i = 0; i < num_sources; i++)
src[i] = audio_stream_wrap(sources[i], src[i]);
}
}
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
/* Mix n 32 bit PCM source streams to one sink stream */
static void mix_n_s32(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
int32_t *src[PLATFORM_MAX_CHANNELS];
int32_t *dest;
int64_t val;
int nmax;
int i, j, n, ns;
int processed = 0;
int nch = sink->channels;
int samples = frames * nch;
dest = sink->w_ptr;
for (j = 0; j < num_sources; j++)
src[j] = sources[j]->r_ptr;
while (processed < samples) {
nmax = samples - processed;
n = audio_stream_samples_without_wrap_s32(sink, dest);
n = MIN(n, nmax);
for (i = 0; i < num_sources; i++) {
ns = audio_stream_samples_without_wrap_s32(sources[i], src[i]);
n = MIN(n, ns);
}
for (i = 0; i < n; i++) {
val = 0;
for (j = 0; j < num_sources; j++) {
val += *src[j];
src[j]++;
}
/* Saturate to 32 bits */
*dest = sat_int32(val);
dest++;
}
processed += n;
dest = audio_stream_wrap(sink, dest);
for (i = 0; i < num_sources; i++)
src[i] = audio_stream_wrap(sources[i], src[i]);
}
}
#endif /* CONFIG_FORMAT_S32LE */
const struct mixer_func_map mixer_func_map[] = {
#if CONFIG_FORMAT_S16LE
{ SOF_IPC_FRAME_S16_LE, mix_n_s16 },
#endif
#if CONFIG_FORMAT_S24LE
{ SOF_IPC_FRAME_S24_4LE, mix_n_s24 },
#endif
#if CONFIG_FORMAT_S32LE
{ SOF_IPC_FRAME_S32_LE, mix_n_s32 },
#endif
};
const size_t mixer_func_count = ARRAY_SIZE(mixer_func_map);
#endif

View File

@ -0,0 +1,220 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2022 Intel Corporation. All rights reserved.
//
// Author: Andrula Song <xiaoyuan.song@intel.com>
#include <sof/audio/mixer.h>
#include <sof/common.h>
#if __XCC__ && (XCHAL_HAVE_HIFI3 || XCHAL_HAVE_HIFI4)
#include <xtensa/tie/xt_hifi3.h>
#if CONFIG_FORMAT_S16LE
/* Mix n 16 bit PCM source streams to one sink stream */
static void mix_n_s16(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
ae_int16x4 * in[PLATFORM_MAX_CHANNELS];
ae_int16x4 *out = sink->w_ptr;
ae_int16x4 sample = AE_ZERO16();
ae_int16x4 res = AE_ZERO16();
ae_int32x2 val1;
ae_int32x2 val2;
ae_int32x2 sample_1;
ae_int32x2 sample_2;
ae_valign inu[PLATFORM_MAX_CHANNELS];
ae_valign outu = AE_ZALIGN64();
unsigned int n, m, nmax, i, j, left, left_samples;
unsigned int samples = frames * sink->channels;
for (j = 0; j < num_sources; j++) {
in[j] = sources[j]->r_ptr;
inu[j] = AE_ZALIGN64();
}
for (left_samples = samples; left_samples; left_samples -= n) {
out = audio_stream_wrap(sink, out);
nmax = audio_stream_samples_without_wrap_s16(sink, out);
n = MIN(left_samples, nmax);
for (j = 0; j < num_sources; j++) {
in[j] = audio_stream_wrap(sources[j], in[j]);
nmax = audio_stream_samples_without_wrap_s16(sources[j], in[j]);
n = MIN(n, nmax);
inu[j] = AE_LA64_PP(in[j]);
}
m = n >> 2;
left = n & 0x03;
for (i = 0; i < m; i++) {
val1 = AE_ZERO32();
val2 = AE_ZERO32();
for (j = 0; j < num_sources; j++) {
/* load four 16 bit samples */
AE_LA16X4_IP(sample, inu[j], in[j]);
sample_1 = AE_SEXT32X2D16_32(sample);
sample_2 = AE_SEXT32X2D16_10(sample);
val1 = AE_ADD32S(val1, sample_1);
val2 = AE_ADD32S(val2, sample_2);
}
/*Saturate to 16 bits */
val1 = AE_SRAA32S(AE_SLAA32S(val1, 16), 16);
val2 = AE_SRAA32S(AE_SLAA32S(val2, 16), 16);
/* truncate the LSB 16bit of four 32-bit signed elements*/
res = AE_CVT16X4(val1, val2);
/* store four 16 bit samples */
AE_SA16X4_IP(res, outu, out);
}
AE_SA64POS_FP(outu, out);
/* process the left samples that less than 4
* one by one to avoid memory access overrun
*/
for (i = 0; i < left ; i++) {
val1 = AE_ZERO32();
for (j = 0; j < num_sources; j++) {
AE_L16_IP(sample, (ae_int16 *)in[j], sizeof(ae_int16));
sample_1 = AE_SEXT32X2D16_32(sample);
val1 = AE_ADD32S(val1, sample_1);
}
/*Saturate to 16 bits */
val1 = AE_SRAA32S(AE_SLAA32S(val1, 16), 16);
/* truncate the LSB 16bit of four 32-bit signed elements*/
res = AE_CVT16X4(val1, val1);
/* store one 16 bit samples */
AE_S16_0_IP(res, (ae_int16 *)out, sizeof(ae_int16));
}
}
}
#endif /* CONFIG_FORMAT_S16LE */
#if CONFIG_FORMAT_S24LE
/* Mix n 24 bit PCM source streams to one sink stream */
static void mix_n_s24(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
ae_int32x2 *in[PLATFORM_MAX_CHANNELS];
ae_int32x2 *out = sink->w_ptr;
ae_int32x2 val;
ae_valign inu[PLATFORM_MAX_CHANNELS];
ae_valign outu = AE_ZALIGN64();
ae_int32x2 sample = AE_ZERO32();
unsigned int n, m, nmax, i, j, left, left_samples;
unsigned int samples = frames * sink->channels;
for (j = 0; j < num_sources; j++) {
in[j] = sources[j]->r_ptr;
inu[j] = AE_ZALIGN64();
}
for (left_samples = samples; left_samples; left_samples -= n) {
out = audio_stream_wrap(sink, out);
nmax = audio_stream_samples_without_wrap_s24(sink, out);
n = MIN(left_samples, nmax);
for (j = 0; j < num_sources; j++) {
in[j] = audio_stream_wrap(sources[j], in[j]);
nmax = audio_stream_samples_without_wrap_s24(sources[j], in[j]);
n = MIN(n, nmax);
inu[j] = AE_LA64_PP(in[j]);
}
m = n >> 1;
left = n & 0x01;
for (i = 0; i < m; i++) {
val = AE_ZERO32();
for (j = 0; j < num_sources; j++) {
/* load two 32 bit samples */
AE_LA32X2_IP(sample, inu[j], in[j]);
/* Sign extend */
sample = AE_SRAA32RS(AE_SLAI32(sample, 8), 8);
val = AE_ADD32S(val, sample);
}
/*Saturate to 24 bits */
val = AE_SRAA32S(AE_SLAA32S(val, 8), 8);
/* store two 32 bit samples */
AE_SA32X2_IP(val, outu, out);
}
AE_SA64POS_FP(outu, out);
/* process the left sample to avoid memory access overrun */
if (left) {
val = AE_ZERO32();
for (j = 0; j < num_sources; j++) {
AE_L32_IP(sample, (ae_int32 *)in[j], sizeof(ae_int32));
sample = AE_SRAA32RS(AE_SLAI32(sample, 8), 8);
val = AE_ADD32S(val, sample);
}
/*Saturate to 24 bits */
val = AE_SRAA32RS(AE_SLAA32S(val, 8), 8);
AE_S32_L_IP(val, (ae_int32 *)out, sizeof(ae_int32));
}
}
}
#endif /* CONFIG_FORMAT_S24LE */
#if CONFIG_FORMAT_S32LE
/* Mix n 32 bit PCM source streams to one sink stream */
static void mix_n_s32(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames)
{
ae_q32s * in[PLATFORM_MAX_CHANNELS];
ae_int32 *out = sink->w_ptr;
ae_int64 sample;
ae_int64 val;
ae_int32x2 res;
unsigned int n, nmax, i, j, left_samples;
unsigned int m = 0;
unsigned int samples = frames * sink->channels;
for (j = 0; j < num_sources; j++)
in[j] = sources[j]->r_ptr;
for (left_samples = samples; left_samples; left_samples -= n) {
out = audio_stream_wrap(sink, out);
nmax = audio_stream_samples_without_wrap_s32(sink, out);
n = MIN(left_samples, nmax);
for (j = 0; j < num_sources; j++) {
in[j] = audio_stream_wrap(sources[j], in[j] + m);
nmax = audio_stream_samples_without_wrap_s32(sources[j], in[j]);
n = MIN(n, nmax);
}
/*record the processed samples for next address iteration */
m = n;
for (i = 0; i < m; i++) {
val = AE_ZERO64();
for (j = 0; j < num_sources; j++) {
/* load one 32 bit sample */
sample = AE_L32M_X(in[j], i * sizeof(ae_q32s));
val = AE_ADD64S(val, sample);
}
/*Saturate to 32 bits */
res = AE_ROUND32X2F48SSYM(val, val);
/* store one 32 bit samples */
AE_S32_L_IP(res, out, sizeof(ae_int32));
}
}
}
#endif /* CONFIG_FORMAT_S32LE */
const struct mixer_func_map mixer_func_map[] = {
#if CONFIG_FORMAT_S16LE
{ SOF_IPC_FRAME_S16_LE, mix_n_s16 },
#endif
#if CONFIG_FORMAT_S24LE
{ SOF_IPC_FRAME_S24_4LE, mix_n_s24 },
#endif
#if CONFIG_FORMAT_S32LE
{ SOF_IPC_FRAME_S32_LE, mix_n_s32 },
#endif
};
const size_t mixer_func_count = ARRAY_SIZE(mixer_func_map);
#endif

View File

@ -8,8 +8,66 @@
#ifndef __SOF_AUDIO_MIXER_H__
#define __SOF_AUDIO_MIXER_H__
#include <sof/audio/buffer.h>
#include <sof/audio/component.h>
#include <sof/audio/format.h>
#include <sof/platform.h>
#include <stddef.h>
#include <stdint.h>
#ifdef UNIT_TEST
void sys_comp_mixer_init(void);
#endif
#define MIXER_GENERIC
#if defined(__XCC__)
#include <xtensa/config/core-isa.h>
#if XCHAL_HAVE_HIFI3 || XCHAL_HAVE_HIFI4
#undef MIXER_GENERIC
#endif
#endif
/**
* \brief mixer processing function interface
*/
typedef void (*mixer_func)(struct comp_dev *dev, struct audio_stream __sparse_cache *sink,
const struct audio_stream __sparse_cache **sources, uint32_t num_sources,
uint32_t frames);
/** \brief Volume processing functions map. */
struct mixer_func_map {
uint16_t frame_fmt; /**< frame format */
mixer_func func; /**< volume processing function */
};
/** \brief Map of formats with dedicated processing functions. */
extern const struct mixer_func_map mixer_func_map[];
/** \brief Number of processing functions. */
extern const size_t mixer_func_count;
/**
* \brief Retrievies mixer processing function.
* \param[in,out] dev Mixer base component device.
* \param[in] sinkb Sink buffer to match against
*/
static inline mixer_func mixer_get_processing_function(struct comp_dev *dev,
struct comp_buffer __sparse_cache *sinkb)
{
int i;
/* map the volume function for source and sink buffers */
for (i = 0; i < mixer_func_count; i++) {
if (sinkb->stream.frame_fmt != mixer_func_map[i].frame_fmt)
continue;
return mixer_func_map[i].func;
}
return NULL;
}
#endif /* __SOF_AUDIO_MIXER_H__ */

View File

@ -5,7 +5,9 @@ cmocka_test(mixer
comp_mock.c
${PROJECT_SOURCE_DIR}/test/cmocka/src/notifier_mocks.c
${PROJECT_SOURCE_DIR}/src/audio/buffer.c
${PROJECT_SOURCE_DIR}/src/audio/mixer.c
${PROJECT_SOURCE_DIR}/src/audio/mixer/mixer.c
${PROJECT_SOURCE_DIR}/src/audio/mixer/mixer_generic.c
${PROJECT_SOURCE_DIR}/src/audio/mixer/mixer_hifi3.c
${PROJECT_SOURCE_DIR}/src/ipc/ipc3/helper.c
${PROJECT_SOURCE_DIR}/src/ipc/ipc-common.c
${PROJECT_SOURCE_DIR}/src/ipc/ipc-helper.c

View File

@ -650,7 +650,9 @@ zephyr_library_sources_ifdef(CONFIG_COMP_SWITCH
if(CONFIG_IPC_MAJOR_3)
zephyr_library_sources_ifdef(CONFIG_COMP_MIXER
${SOF_AUDIO_PATH}/mixer.c
${SOF_AUDIO_PATH}/mixer/mixer.c
${SOF_AUDIO_PATH}/mixer/mixer_generic.c
${SOF_AUDIO_PATH}/mixer/mixer_hifi3.c
)
elseif(CONFIG_IPC_MAJOR_4)
zephyr_library_sources_ifdef(CONFIG_COMP_MIXER