From e2b328d6c36b12ce6b11e7d3c416cb9314ba809a Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 16 Jun 2020 08:34:21 +0200 Subject: [PATCH] Vectorize audio downsampling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmarks: ``` name old time/op new time/op delta BM_Resample 36.9ms ± 1% 30.4ms ± 1% -17.82% (p=0.000 n=10+9) BM_Resample 24.7ms ± 0% 23.4ms ± 1% -5.27% (p=0.000 n=9+9) BM_Resample 6.27ms ± 0% 3.88ms ± 1% -38.10% (p=0.000 n=9+10) BM_Resample 124ms ± 1% 95ms ± 1% -22.90% (p=0.000 n=10+10) BM_Resample 84.8ms ± 1% 75.2ms ± 1% -11.26% (p=0.000 n=9+10) BM_Resample 22.1ms ± 1% 19.2ms ± 2% -13.31% (p=0.000 n=9+10) ``` --- src/filterkit.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/filterkit.c b/src/filterkit.c index d4360a4..953134b 100644 --- a/src/filterkit.c +++ b/src/filterkit.c @@ -20,6 +20,10 @@ #include "filterkit.h" +#ifdef __SSE4_1__ +#include +#endif + #include #include #include @@ -202,7 +206,42 @@ float lrsFilterUD(float Imp[], /* impulse response */ Ho += dhb; /* IR step */ Xp += Inc; /* Input signal step. NO CHECK ON BOUNDS */ } - else + else { +#ifdef __SSE4_1__ + __m128i vv = _mm_set1_ps(0.0f); + if (Inc == 1) { + while ((Hp = &Imp[(int)(Ho + 4*dhb)]) < End) { + /* Get IR sample */ + __m128i vt = _mm_set_ps(Imp[(int)(Ho + 3 * dhb)], + Imp[(int)(Ho + 2*dhb)], + Imp[(int)(Ho + dhb)], + Imp[(int)(Ho)]); + vt = _mm_mul_ps(vt, _mm_loadu_ps(Xp));/* Mult coeff by input + sample */ + vv = _mm_add_ps(vv, vt); /* The filter output */ + Ho += 4*dhb; /* IR step */ + Xp += 4; /* Input signal step. NO CHECK ON BOUNDS */ + } + } else { + while ((Hp = &Imp[(int)(Ho + 4*dhb)]) < End) { + /* Get IR sample */ + __m128i vt = _mm_set_ps(Imp[(int)(Ho)], + Imp[(int)(Ho + dhb)], + Imp[(int)(Ho + 2*dhb)], + Imp[(int)(Ho + 3*dhb)]); + vt = _mm_mul_ps(vt, _mm_loadu_ps(Xp - 3)); /* Mult coeff by input + sample */ + vv = _mm_add_ps(vv, vt); /* The filter output */ + Ho += 4*dhb; /* IR step */ + Xp -= 4; /* Input signal step. NO CHECK ON BOUNDS */ + } + } + float elems[4]; + memcpy(elems, &vv, sizeof(vv)); + v = elems[0] + elems[1] + elems[2] + elems[3]; + // Fallthrough intended: the rest of the loop is done in a non-vectorized + // way. +#endif while ((Hp = &Imp[(int)Ho]) < End) { t = *Hp; /* Get IR sample */ t *= *Xp; /* Mult coeff by input sample */ @@ -210,6 +249,6 @@ float lrsFilterUD(float Imp[], /* impulse response */ Ho += dhb; /* IR step */ Xp += Inc; /* Input signal step. NO CHECK ON BOUNDS */ } - + } return v; }