mirror of https://github.com/thesofproject/sof.git
Audio: EQFIR: Optimize source and sink buffers use in generic C version
This patch replaces the audio stream read/write frag based access to source and sink by block processing based on audio_stream_bytes_without_wrap() bytes count. In a test with forced generic C for xtensa build processing load in original was 324 MCPS, in optimized 309 MCPS, saving is 15 MCPS. The load is nearly same for all formats s16/s24/s32. The base load was very high in test due to a very long used FIR filter. The MCPS saving should be the same for all stereo 48k streams. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
This commit is contained in:
parent
d0057a7def
commit
2b6001e065
|
@ -21,23 +21,32 @@ void eq_fir_s16(struct fir_state_32x16 fir[], const struct audio_stream *source,
|
|||
struct audio_stream *sink, int frames, int nch)
|
||||
{
|
||||
struct fir_state_32x16 *filter;
|
||||
int16_t *x;
|
||||
int16_t *y;
|
||||
int32_t z;
|
||||
int idx;
|
||||
int ch;
|
||||
int i;
|
||||
int16_t *x0, *y0;
|
||||
int16_t *x = source->r_ptr;
|
||||
int16_t *y = sink->w_ptr;
|
||||
int nmax, n, i, j;
|
||||
int remaining_samples = frames * nch;
|
||||
|
||||
for (ch = 0; ch < nch; ch++) {
|
||||
filter = &fir[ch];
|
||||
idx = ch;
|
||||
for (i = 0; i < frames; i++) {
|
||||
x = audio_stream_read_frag_s16(source, idx);
|
||||
y = audio_stream_write_frag_s16(sink, idx);
|
||||
z = fir_32x16(filter, *x << 16);
|
||||
*y = sat_int16(Q_SHIFT_RND(z, 31, 15));
|
||||
idx += nch;
|
||||
while (remaining_samples) {
|
||||
nmax = EQ_FIR_BYTES_TO_S16_SAMPLES(audio_stream_bytes_without_wrap(source, x));
|
||||
n = MIN(remaining_samples, nmax);
|
||||
nmax = EQ_FIR_BYTES_TO_S16_SAMPLES(audio_stream_bytes_without_wrap(sink, y));
|
||||
n = MIN(n, nmax);
|
||||
for (j = 0; j < nch; j++) {
|
||||
x0 = x + j;
|
||||
y0 = y + j;
|
||||
filter = &fir[j];
|
||||
for (i = 0; i < n; i += nch) {
|
||||
z = fir_32x16(filter, *x0 << 16);
|
||||
*y0 = sat_int16(Q_SHIFT_RND(z, 31, 15));
|
||||
x0 += nch;
|
||||
y0 += nch;
|
||||
}
|
||||
}
|
||||
remaining_samples -= n;
|
||||
x = audio_stream_wrap(source, x + n);
|
||||
y = audio_stream_wrap(sink, y + n);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_FORMAT_S16LE */
|
||||
|
@ -47,23 +56,32 @@ void eq_fir_s24(struct fir_state_32x16 fir[], const struct audio_stream *source,
|
|||
struct audio_stream *sink, int frames, int nch)
|
||||
{
|
||||
struct fir_state_32x16 *filter;
|
||||
int32_t *x;
|
||||
int32_t *y;
|
||||
int32_t z;
|
||||
int idx;
|
||||
int ch;
|
||||
int i;
|
||||
int32_t *x0, *y0;
|
||||
int32_t *x = source->r_ptr;
|
||||
int32_t *y = sink->w_ptr;
|
||||
int nmax, n, i, j;
|
||||
int remaining_samples = frames * nch;
|
||||
|
||||
for (ch = 0; ch < nch; ch++) {
|
||||
filter = &fir[ch];
|
||||
idx = ch;
|
||||
for (i = 0; i < frames; i++) {
|
||||
x = audio_stream_read_frag_s32(source, idx);
|
||||
y = audio_stream_write_frag_s32(sink, idx);
|
||||
z = fir_32x16(filter, *x << 8);
|
||||
*y = sat_int24(Q_SHIFT_RND(z, 31, 23));
|
||||
idx += nch;
|
||||
while (remaining_samples) {
|
||||
nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(source, x));
|
||||
n = MIN(remaining_samples, nmax);
|
||||
nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(sink, y));
|
||||
n = MIN(n, nmax);
|
||||
for (j = 0; j < nch; j++) {
|
||||
x0 = x + j;
|
||||
y0 = y + j;
|
||||
filter = &fir[j];
|
||||
for (i = 0; i < n; i += nch) {
|
||||
z = fir_32x16(filter, *x0 << 8);
|
||||
*y0 = sat_int24(Q_SHIFT_RND(z, 31, 23));
|
||||
x0 += nch;
|
||||
y0 += nch;
|
||||
}
|
||||
}
|
||||
remaining_samples -= n;
|
||||
x = audio_stream_wrap(source, x + n);
|
||||
y = audio_stream_wrap(sink, y + n);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_FORMAT_S24LE */
|
||||
|
@ -73,21 +91,30 @@ void eq_fir_s32(struct fir_state_32x16 fir[], const struct audio_stream *source,
|
|||
struct audio_stream *sink, int frames, int nch)
|
||||
{
|
||||
struct fir_state_32x16 *filter;
|
||||
int32_t *x;
|
||||
int32_t *y;
|
||||
int idx;
|
||||
int ch;
|
||||
int i;
|
||||
int32_t *x0, *y0;
|
||||
int32_t *x = source->r_ptr;
|
||||
int32_t *y = sink->w_ptr;
|
||||
int nmax, n, i, j;
|
||||
int remaining_samples = frames * nch;
|
||||
|
||||
for (ch = 0; ch < nch; ch++) {
|
||||
filter = &fir[ch];
|
||||
idx = ch;
|
||||
for (i = 0; i < frames; i++) {
|
||||
x = audio_stream_read_frag_s32(source, idx);
|
||||
y = audio_stream_write_frag_s32(sink, idx);
|
||||
*y = fir_32x16(filter, *x);
|
||||
idx += nch;
|
||||
while (remaining_samples) {
|
||||
nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(source, x));
|
||||
n = MIN(remaining_samples, nmax);
|
||||
nmax = EQ_FIR_BYTES_TO_S32_SAMPLES(audio_stream_bytes_without_wrap(sink, y));
|
||||
n = MIN(n, nmax);
|
||||
for (j = 0; j < nch; j++) {
|
||||
x0 = x + j;
|
||||
y0 = y + j;
|
||||
filter = &fir[j];
|
||||
for (i = 0; i < n; i += nch) {
|
||||
*y0 = fir_32x16(filter, *x0);
|
||||
x0 += nch;
|
||||
y0 += nch;
|
||||
}
|
||||
}
|
||||
remaining_samples -= n;
|
||||
x = audio_stream_wrap(source, x + n);
|
||||
y = audio_stream_wrap(sink, y + n);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_FORMAT_S32LE */
|
||||
|
|
|
@ -23,6 +23,10 @@
|
|||
#include <user/fir.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/** \brief Macros to convert without division bytes count to samples count */
|
||||
#define EQ_FIR_BYTES_TO_S16_SAMPLES(b) ((b) >> 1)
|
||||
#define EQ_FIR_BYTES_TO_S32_SAMPLES(b) ((b) >> 2)
|
||||
|
||||
#if CONFIG_FORMAT_S16LE
|
||||
void eq_fir_s16(struct fir_state_32x16 *fir, const struct audio_stream *source,
|
||||
struct audio_stream *sink, int frames, int nch);
|
||||
|
|
Loading…
Reference in New Issue