Audio: MFCC: Add HiFi4 implementation of MFCC

Add HiFi4 implementation of MFCC.

Signed-off-by: Andrula Song <andrula.song@intel.com>
This commit is contained in:
Andrula Song 2023-07-31 15:50:52 +08:00 committed by Liam Girdwood
parent 9b9c683cc6
commit 5c92bddcf4
5 changed files with 305 additions and 2 deletions

View File

@ -174,7 +174,7 @@ set(crossover_sources crossover/crossover.c crossover/crossover_generic.c)
set(tdfb_sources tdfb/tdfb.c tdfb/tdfb_generic.c tdfb/tdfb_direction.c)
set(drc_sources drc/drc.c drc/drc_generic.c drc/drc_math_generic.c)
set(multiband_drc_sources multiband_drc/multiband_drc_generic.c crossover/crossover.c crossover/crossover_generic.c drc/drc.c drc/drc_generic.c drc/drc_math_generic.c multiband_drc/multiband_drc.c )
set(mfcc_sources mfcc/mfcc.c mfcc/mfcc_setup.c mfcc/mfcc_common.c mfcc/mfcc_generic.c)
set(mfcc_sources mfcc/mfcc.c mfcc/mfcc_setup.c mfcc/mfcc_common.c mfcc/mfcc_generic.c mfcc/mfcc_hifi4.c)
set(mux_sources mux/mux.c mux/mux_generic.c)
foreach(audio_module ${sof_audio_modules})

View File

@ -1,3 +1,3 @@
# SPDX-License-Identifier: BSD-3-Clause
add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c)
add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c mfcc_hifi4.c)

View File

@ -5,6 +5,7 @@
// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
#include <sof/audio/mfcc/mfcc_comp.h>
#ifdef MFCC_GENERIC
#include <sof/audio/component.h>
#include <sof/audio/audio_stream.h>
@ -235,3 +236,4 @@ int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr
}
#endif /* CONFIG_FORMAT_S16LE */
#endif

287
src/audio/mfcc/mfcc_hifi4.c Normal file
View File

@ -0,0 +1,287 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2023 Intel Corporation. All rights reserved.
//
// Author: Andrula Song <andrula.song@intel.com>
#include <sof/audio/mfcc/mfcc_comp.h>
#ifdef MFCC_HIFI4
#include <sof/audio/component.h>
#include <sof/audio/audio_stream.h>
#include <sof/math/auditory.h>
#include <sof/math/matrix.h>
#include <sof/math/sqrt.h>
#include <sof/math/trig.h>
#include <sof/math/window.h>
#include <sof/trace/trace.h>
#include <user/mfcc.h>
#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <xtensa/tie/xt_hifi4.h>
/* Setup circular buffer 0 */
static inline void set_circular_buf0(const void *start, const void *end)
{
AE_SETCBEGIN0(start);
AE_SETCEND0(end);
}
/* Setup circular for buffer 1 */
static inline void set_circular_buf1(const void *start, const void *end)
{
AE_SETCBEGIN1(start);
AE_SETCEND1(end);
}
/*
* MFCC algorithm code
*/
void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf,
struct mfcc_pre_emph *emph, int frames, int source_channel)
{
struct audio_stream __sparse_cache *source = bsource->data;
int num_channels = audio_stream_get_channels(source);
ae_int16 *in = (ae_int16 *)source->r_ptr + source_channel;
ae_int16 *out = (ae_int16 *)buf->w_ptr;
ae_int16x4 sample;
ae_int32x2 temp;
ae_int16x4 coef;
ae_int16x4 delay;
const int in_inc = sizeof(ae_int16) * num_channels;
const int out_inc = sizeof(ae_int16);
int i;
set_circular_buf1(buf->addr, buf->end_addr);
set_circular_buf0(source->addr, source->end_addr);
/* Copy from source to pre-buffer for FFT.
* The pre-emphasis filter is done in this step.
*/
if (emph->enable) {
delay = emph->delay;
coef = emph->coef;
for (i = 0; i < frames; i++) {
AE_L16_XC(sample, in, in_inc);
/* Q1.15 -> Q1.31 */
temp = AE_CVT32X2F16_10(sample);
AE_MULAF16SS_00(temp, delay, coef);
delay = sample;
sample = AE_ROUND16X4F32SSYM(temp, temp);
AE_S16_0_XC1(sample, out, out_inc);
}
emph->delay = delay;
} else {
for (i = 0; i < frames; i++) {
AE_L16_XC(sample, in, in_inc);
AE_S16_0_XC1(sample, out, out_inc);
}
}
buf->s_avail += frames;
buf->s_free -= frames;
buf->w_ptr = out;
}
void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data,
int prev_data_length)
{
/* Fill prev_data from input buffer */
ae_int32 *out = (ae_int32 *)prev_data;
ae_int32 *in = (ae_int32 *)buf->r_ptr;
ae_int32x2 in_sample;
ae_int16x4 sample;
const int inc = sizeof(ae_int32);
int n = prev_data_length >> 1;
int i;
/* Set buf as circular buffer 0 */
set_circular_buf0(buf->addr, buf->end_addr);
/* very strange this align load is unexpected
* so use load a 32bit to replace 16x4 align load.
*/
for (i = 0; i < n; i++) {
AE_L32_XC(in_sample, in, inc);
/* sizeof(ae_int32) = 4 */
AE_S32_L_IP(in_sample, out, 4);
}
if (prev_data_length & 0x01) {
AE_L16_XC(sample, (ae_int16 *)in, sizeof(ae_int16));
AE_S16_0_IP(sample, (ae_int16 *)out, sizeof(ae_int16));
}
buf->s_avail -= prev_data_length;
buf->s_free += prev_data_length;
buf->r_ptr = in;
}
void mfcc_fill_fft_buffer(struct mfcc_state *state)
{
struct mfcc_buffer *buf = &state->buf;
struct mfcc_fft *fft = &state->fft;
int idx = fft->fft_fill_start_idx;
ae_int16 *out = (ae_int16 *)&fft->fft_buf[idx].real;
ae_int16 *in = (ae_int16 *)state->prev_data;
ae_int16x4 sample;
const int buf_inc = sizeof(ae_int16);
const int fft_inc = sizeof(fft->fft_buf[0]);
int j;
/* Copy overlapped samples from state buffer. Imaginary part of input
* remains zero.
*/
for (j = 0; j < state->prev_data_size; j++) {
AE_L16_XP(sample, in, buf_inc);
AE_S16_0_XP(sample, out, fft_inc);
}
/* Copy hop size of new data from circular buffer */
idx += state->prev_data_size;
in = (ae_int16 *)buf->r_ptr;
out = (ae_int16 *)&fft->fft_buf[idx].real;
set_circular_buf1(buf->addr, buf->end_addr);
for (j = 0; j < fft->fft_hop_size; j++) {
AE_L16_XC(sample, in, buf_inc);
AE_S16_0_XP(sample, out, fft_inc);
}
buf->s_avail -= fft->fft_hop_size;
buf->s_free += fft->fft_hop_size;
buf->r_ptr = (int16_t *)in;
/* Copy for next time data back to overlap buffer */
idx = fft->fft_fill_start_idx + fft->fft_hop_size;
in = (ae_int16 *)&fft->fft_buf[idx].real;
out = (ae_int16 *)state->prev_data;
for (j = 0; j < state->prev_data_size; j++) {
AE_L16_XP(sample, in, fft_inc);
AE_S16_0_XP(sample, out, buf_inc);
}
}
#ifdef MFCC_NORMALIZE_FFT
int mfcc_normalize_fft_buffer(struct mfcc_state *state)
{
struct mfcc_fft *fft = &state->fft;
ae_p16s *in = (ae_p16s *)&fft->fft_buf[fft->fft_fill_start_idx].real;
ae_int32x2 sample;
ae_int32x2 max = AE_ZERO32();
const int fft_inc = sizeof(fft->fft_buf[0]);
int shift;
int j;
for (j = 0; j < fft->fft_size; j++) {
/* load 16-bit data to middle of 32-bit container*/
AE_L16M_XU(sample, in, fft_inc);
max = AE_MAXABS32S(max, sample);
}
shift = AE_NSAZ32_L(max) - 8;/* 16 bit data */
shift = MAX(shift, 0);
shift = MIN(shift, MFCC_NORMALIZE_MAX_SHIFT);
return shift;
}
#endif
void mfcc_apply_window(struct mfcc_state *state, int input_shift)
{
struct mfcc_fft *fft = &state->fft;
const int fft_inc = sizeof(fft->fft_buf[0]);
ae_int16 *win_in = (ae_int16 *)state->window;
const int win_inc = sizeof(ae_int16);
ae_int32x2 temp;
ae_int16x4 win;
int j;
#if MFCC_FFT_BITS == 16
ae_int16 *fft_in = (ae_int16 *)&fft->fft_buf[fft->fft_fill_start_idx].real;
ae_int16x4 sample;
for (j = 0; j < fft->fft_size; j++) {
AE_L16_IP(sample, fft_in, 0);
AE_L16_XP(win, win_in, win_inc);
temp = AE_MULF16SS_00(sample, win);
temp = AE_SLAA32S(temp, input_shift);
sample = AE_ROUND16X4F32SASYM(temp, temp);
AE_S16_0_XP(sample, fft_in, fft_inc);
}
#else
ae_int32 *fft_in = (ae_int32 *)&fft->fft_buf[fft->fft_fill_start_idx].real;
ae_int32x2 sample;
for (j = 0; j < fft->fft_size; j++) {
AE_L32_IP(sample, fft_in, 0);
AE_L16_XP(win, win_in, win_inc);
temp = AE_MULFP32X16X2RS_H(sample, win);
temp = AE_MULFP32X16X2RS_L(sample, win);
temp = AE_SLAA32S(temp, input_shift);
AE_S32_L_XP(temp, fft_in, fft_inc);
}
#endif
}
#if CONFIG_FORMAT_S16LE
int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink,
int16_t *w_ptr, int samples)
{
int i;
int n = samples >> 2;
int m = samples & 0x03;
ae_int16x4 *out = (ae_int16x4 *)w_ptr;
const int inc = sizeof(ae_int16);
ae_valign outu = AE_ZALIGN64();
ae_int16x4 zero = AE_ZERO16();
set_circular_buf0(sink->addr, sink->end_addr);
for (i = 0; i < n; i++)
AE_SA16X4_IC(zero, outu, out);
AE_SA64POS_FP(outu, out);
/* process the left samples that less than 4
* one by one to avoid memory access overrun
*/
for (i = 0; i < m ; i++)
AE_S16_0_XC(zero, (ae_int16 *)out, inc);
return (int16_t *)out;
}
int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr,
int samples, int16_t *r_ptr)
{
int i;
int n = samples >> 2;
int m = samples & 0x03;
ae_int16x4 *out = (ae_int16x4 *)w_ptr;
ae_int16x4 *in = (ae_int16x4 *)r_ptr;
ae_valign outu = AE_ZALIGN64();
ae_valign inu = AE_ZALIGN64();
const int inc = sizeof(ae_int16);
ae_int16x4 in_sample;
set_circular_buf0(sink->addr, sink->end_addr);
inu = AE_LA64_PP(in);
for (i = 0; i < n; i++) {
AE_LA16X4_IP(in_sample, inu, in);
AE_SA16X4_IC(in_sample, outu, out);
}
AE_SA64POS_FP(outu, out);
/* process the left samples that less than 4
* one by one to avoid memory access overrun
*/
for (i = 0; i < m ; i++) {
AE_L16_XP(in_sample, (ae_int16 *)in, inc);
AE_S16_0_XC(in_sample, (ae_int16 *)out, inc);
}
return (int16_t *)out;
}
#endif /* CONFIG_FORMAT_S16LE */
#endif

View File

@ -15,6 +15,20 @@
#include <stddef.h>
#include <stdint.h>
/* __XCC__ is both for xt_xcc and xt_clang */
#if defined(__XCC__)
# include <xtensa/config/core-isa.h>
# if XCHAL_HAVE_HIFI4
# define MFCC_HIFI4
# elif XCHAL_HAVE_HIFI3
# define MFCC_HIFI3
# else
# define MFCC_GENERIC
# endif
#else
# define MFCC_GENERIC
#endif
#define MFCC_MAGIC 0x6d666363 /* ASCII for "mfcc" */
/* Set to 16 for lower RAM and MCPS with slightly lower quality. Set to 32 for best