Skip to content

Commit bdd20f9

Browse files
committed
Math: Optimize sofm_exp_fixed() HiFi version
The unnecessary shift and multiply functions can be removed with use of normal C shift left and with use xtensa multiply, shift, and round intrinsics directly in the function. This change saves in TGL HiFi3 platform 1.3 MCPS in DRC processing mode. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent cf62b65 commit bdd20f9

File tree

1 file changed

+17
-55
lines changed

1 file changed

+17
-55
lines changed

src/math/exp_fcn_hifi.c

Lines changed: 17 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -280,52 +280,6 @@ int32_t sofm_exp_int32(int32_t x)
280280
return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
281281
}
282282

283-
/* Fractional multiplication with shift and round
284-
* Note that the parameters px and py must be cast to (int64_t) if other type.
285-
*/
286-
static inline int exp_hifi_q_multsr_32x32(int a, int b, int c, int d, int e)
287-
{
288-
ae_int64 res;
289-
int xt_o;
290-
int shift;
291-
292-
res = AE_MUL32_LL(a, b);
293-
shift = XT_SUB(XT_ADD(c, d), XT_ADD(e, 1));
294-
res = AE_SRAA64(res, shift);
295-
res = AE_ADD64(res, 1);
296-
res = AE_SRAI64(res, 1);
297-
xt_o = AE_MOVINT32_FROMINT64(res);
298-
299-
return xt_o;
300-
}
301-
302-
/* A macro for Q-shifts */
303-
static inline int exp_hifi_q_shift_rnd(int a, int b, int c)
304-
{
305-
ae_int32 res;
306-
int shift;
307-
308-
shift = XT_SUB(b, XT_ADD(c, 1));
309-
res = AE_SRAA32(a, shift);
310-
res = AE_ADD32(res, 1);
311-
res = AE_SRAI32(res, 1);
312-
313-
return res;
314-
}
315-
316-
/* Alternative version since compiler does not allow (x >> -1) */
317-
static inline int exp_hifi_q_shift_left(int a, int b, int c)
318-
{
319-
ae_int32 xt_o;
320-
int shift;
321-
322-
shift = XT_SUB(c, b);
323-
xt_o = AE_SLAA32(a, shift);
324-
325-
return xt_o;
326-
}
327-
328-
#define q_mult(a, b, qa, qb, qy) ((int32_t)exp_hifi_q_multsr_32x32((int64_t)(a), b, qa, qb, qy))
329283
/* Fixed point exponent function for approximate range -11.5 .. 7.6
330284
* that corresponds to decibels range -100 .. +66 dB.
331285
*
@@ -341,11 +295,12 @@ static inline int exp_hifi_q_shift_left(int a, int b, int c)
341295

342296
int32_t sofm_exp_fixed(int32_t x)
343297
{
298+
ae_f64 p;
299+
ae_int32 y0;
300+
ae_int32 y;
344301
int32_t xs;
345-
int32_t y;
346-
int32_t y0;
302+
int32_t n = 1;
347303
int i;
348-
int n = 0;
349304

350305
if (x < SOFM_EXP_FIXED_INPUT_MIN)
351306
return 0;
@@ -357,20 +312,27 @@ int32_t sofm_exp_fixed(int32_t x)
357312
xs = x;
358313
while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) {
359314
xs >>= 1;
360-
n++;
315+
n <<= 1;
361316
}
362317

363318
/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
364319
* sofm_exp_int32() output is Q9.23, while y0 is Q12.20
365320
*/
366-
y0 = exp_hifi_q_shift_rnd(sofm_exp_int32(exp_hifi_q_shift_left(xs, 27, 28)),
367-
23, 20);
321+
y0 = AE_SRAI32R(sofm_exp_int32(xs << 1), 3);
368322
y = SOFM_EXP_ONE_Q20;
369-
for (i = 0; i < (1 << n); i++)
370-
y = (int32_t)exp_hifi_q_multsr_32x32((int64_t)y, y0, 20, 20, 20);
371323

372-
return y;
324+
/* AE multiply returns Q41 from Q20 * Q20. To get Q20 it need to be
325+
* shifted right by 21. Since the used round instruction is aligned
326+
* to the high 32 bits it is shifted instead left by 32 - 21 = 11:
327+
*/
328+
for (i = 0; i < n; i++) {
329+
p = AE_SLAI64S(AE_MULF32S_LL(y, y0), 11);
330+
y = AE_ROUND32F64SASYM(p);
331+
}
332+
333+
return (int32_t)y;
373334
}
335+
374336
EXPORT_SYMBOL(sofm_exp_fixed);
375337

376338
#endif

0 commit comments

Comments
 (0)