Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion celt/_kiss_fft_guts.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
#if defined(OPUS_ARM_INLINE_EDSP)
#include "arm/kiss_fft_armv5e.h"
#endif
#if defined(__mips_dsp) && __mips == 32
#if defined(__mips)
#include "mips/kiss_fft_mipsr1.h"
#endif

Expand Down
3 changes: 2 additions & 1 deletion celt/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,10 @@ typedef opus_val16 celt_coef;
#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
#define ABS32(x) ((x) < 0 ? (-(x)) : (x))

static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
static OPUS_INLINE opus_int16 SAT16_default(opus_int32 x) {
return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
}
#define SAT16(x) SAT16_default(x)

#ifdef FIXED_DEBUG
#include "fixed_debug.h"
Expand Down
2 changes: 1 addition & 1 deletion celt/celt.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
#define PACKAGE_VERSION "unknown"
#endif

#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
#if defined(FIXED_POINT) && defined(__mips)
#include "mips/celt_mipsr1.h"
#endif

Expand Down
7 changes: 7 additions & 0 deletions celt/celt_lpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,15 @@ int p
for (i = 0; i < p; i++) {
/* Sum up this iteration's reflection coefficient */
opus_val32 rr = 0;
#if defined (FIXED_POINT) && OPUS_FAST_INT64
opus_int64 acc = 0;
for (j = 0; j < i; j++)
acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]);
rr = (opus_val32)SHR(acc, 31);
#else
for (j = 0; j < i; j++)
rr += MULT32_32_Q31(lpc[j],ac[i - j]);
#endif
rr += SHR32(ac[i + 1],6);
r = -frac_div32(SHL32(rr,6), error);
/* Update LPC coefficients and total error */
Expand Down
11 changes: 6 additions & 5 deletions celt/fixed_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@
#define SHR(a,shift) ((a) >> (shift))
#define SHL(a,shift) SHL32(a,shift)
#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
#define SATURATE_generic(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
#define SATURATE(x,a) SATURATE_generic((x), (a))

#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))

Expand Down Expand Up @@ -200,10 +201,6 @@
/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))

#if defined(__mips_dsp) && __mips == 32
#include "mips/fixed_generic_mipsr1.h"
#endif

static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
{
x = PSHR32(x, SIG_SHIFT);
Expand All @@ -213,4 +210,8 @@ static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
}
#define SIG2WORD16(x) (SIG2WORD16_generic(x))

#if defined(__mips)
#include "mips/fixed_generic_mipsr1.h"
#endif

#endif
2 changes: 2 additions & 0 deletions celt/kiss_fft.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ void opus_fft_free(const kiss_fft_state *cfg, int arch)
#endif /* CUSTOM_MODES */

#ifdef FIXED_POINT
#ifndef OVERRIDE_fft_downshift
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
int shift;
shift = IMIN(step, *total);
Expand All @@ -553,6 +554,7 @@ static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
}
}
}
#endif /* OVERRIDE_fft_downshift */
#else
#define fft_downshift(x, N, total, step)
#endif
Expand Down
2 changes: 1 addition & 1 deletion celt/mdct.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
#include "mathops.h"
#include "stack_alloc.h"

#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
#if defined(FIXED_POINT) && defined(__mips) && __mips == 32
#include "mips/mdct_mipsr1.h"
#endif

Expand Down
72 changes: 59 additions & 13 deletions celt/mips/celt_mipsr1.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@

#define CELT_C

#if defined (__mips_dsp) && __mips == 32

#define OVERRIDE_COMB_FILTER_CONST
#define OVERRIDE_comb_filter
#elif defined(__mips_isa_rev) && __mips_isa_rev < 6

#define OVERRIDE_COMB_FILTER_CONST
#define OVERRIDE_comb_filter
#endif

#include "os_support.h"
#include "mdct.h"
#include <math.h>
Expand All @@ -53,8 +63,43 @@
#include "celt_lpc.h"
#include "vq.h"

#define OVERRIDE_COMB_FILTER_CONST
#define OVERRIDE_comb_filter
#if defined (__mips_dsp) && __mips == 32

#define MIPS_MULT __builtin_mips_mult
#define MIPS_MADD __builtin_mips_madd
#define MIPS_EXTR __builtin_mips_extr_w

#elif defined(__mips_isa_rev) && __mips_isa_rev < 6

static inline long long MIPS_MULT(int a, int b) {
long long acc;

asm volatile (
"mult %[a], %[b] \n"
: [acc] "=x"(acc)
: [a] "r"(a), [b] "r"(b)
:
);
return acc;
}

static inline long long MIPS_MADD(long long acc, int a, int b) {
asm volatile (
"madd %[a], %[b] \n"
: [acc] "+x"(acc)
: [a] "r"(a), [b] "r"(b)
:
);
return acc;
}

static inline opus_val32 MIPS_EXTR(long long acc, int shift) {
return (opus_val32)(acc >> shift);
}

#endif

#if defined (OVERRIDE_comb_filter)
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap, int arch)
Expand Down Expand Up @@ -101,13 +146,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
f = MULT16_16_Q15(window[i],window[i]);
x0= x[i-T1+2];

acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
res = __builtin_mips_extr_w(acc, 15);
acc = MIPS_MULT((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
res = MIPS_EXTR(acc, 15);

y[i] = x[i] + res;

Expand Down Expand Up @@ -136,10 +181,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
long long acc;
x0=x[i-T1+2];

acc = __builtin_mips_mult((int)g10, (int)x2);
acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1));
acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0));
res = __builtin_mips_extr_w(acc, 15);
acc = MIPS_MULT((int)g10, (int)x2);
acc = MIPS_MADD(acc, (int)g11, (int)ADD32(x3,x1));
acc = MIPS_MADD(acc, (int)g12, (int)ADD32(x4,x0));
res = MIPS_EXTR(acc, 15);

y[i] = x[i] + res;
x4=x3;
Expand All @@ -148,5 +193,6 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
x1=x0;
}
}
#endif /* OVERRIDE_comb_filter */

#endif /* CELT_MIPSR1_H__ */
130 changes: 103 additions & 27 deletions celt/mips/fixed_generic_mipsr1.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,34 +33,10 @@
#ifndef CELT_FIXED_GENERIC_MIPSR1_H
#define CELT_FIXED_GENERIC_MIPSR1_H

#undef MULT16_32_Q15_ADD
static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
long long acc = __builtin_mips_mult(a, b);
acc = __builtin_mips_madd(acc, c, d);
return __builtin_mips_extr_w(acc, 15);
}

#undef MULT16_32_Q15_SUB
static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
long long acc = __builtin_mips_mult(a, b);
acc = __builtin_mips_msub(acc, c, d);
return __builtin_mips_extr_w(acc, 15);
}

#undef MULT16_16_Q15_ADD
static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
long long acc = __builtin_mips_mult(a, b);
acc = __builtin_mips_madd(acc, c, d);
return __builtin_mips_extr_w(acc, 15);
}

#undef MULT16_16_Q15_SUB
static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
long long acc = __builtin_mips_mult(a, b);
acc = __builtin_mips_msub(acc, c, d);
return __builtin_mips_extr_w(acc, 15);
}
#if defined (__mips_dsp) && __mips == 32

typedef short v2i16 __attribute__((vector_size(4)));
typedef char v2i8 __attribute__((vector_size(4)));

#undef MULT16_32_Q16
static inline int MULT16_32_Q16(int a, int b)
Expand Down Expand Up @@ -103,4 +79,104 @@ static inline int MULT16_16_P15(int a, int b)
return __builtin_mips_shra_r_w(r, 15);
}

#define OVERRIDE_CELT_MAXABS16
static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
{
int i;
v2i16 v2max = (v2i16){ 0, 0 };
v2i16 x01, x23;
const v2i16 *x2;
opus_val16 maxlo, maxhi;
int loops;

if ((long)x & 2 && len > 0) {
v2max = (v2i16){ 0, ABS16(*x) };
x++;
len--;
}
x2 = __builtin_assume_aligned(x, 4);
loops = len / 4;

for (i = 0; i < loops; i++)
{
x01 = *x2++;
x23 = *x2++;
x01 = __builtin_mips_absq_s_ph(x01);
x23 = __builtin_mips_absq_s_ph(x23);
__builtin_mips_cmp_lt_ph(v2max, x01);
v2max = __builtin_mips_pick_ph(x01, v2max);
__builtin_mips_cmp_lt_ph(v2max, x23);
v2max = __builtin_mips_pick_ph(x23, v2max);
}

switch (len & 3) {
case 3:
x01 = __builtin_mips_absq_s_ph(*x2);
__builtin_mips_cmp_lt_ph(v2max, x01);
v2max = __builtin_mips_pick_ph(x01, v2max);
maxlo = EXTRACT16((opus_val32)v2max);
maxhi = EXTRACT16((opus_val32)v2max >> 16);
maxlo = MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));
break;
case 2:
x01 = __builtin_mips_absq_s_ph(*x2);
__builtin_mips_cmp_lt_ph(v2max, x01);
v2max = __builtin_mips_pick_ph(x01, v2max);
maxlo = EXTRACT16((opus_val32)v2max);
maxhi = EXTRACT16((opus_val32)v2max >> 16);
maxlo = MAX16(maxlo, maxhi);
break;
case 1:
maxlo = EXTRACT16((opus_val32)v2max);
maxhi = EXTRACT16((opus_val32)v2max >> 16);
return MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));
break;
case 0:
maxlo = EXTRACT16((opus_val32)v2max);
maxhi = EXTRACT16((opus_val32)v2max >> 16);
maxlo = MAX16(maxlo, maxhi);
break;
default:
__builtin_unreachable();
}
/* C version might return 0x8000, this one can't
* because abs is saturated here. Since result
* used only for determine dynamic range
* in ilog2-like context it's worth to add 1
* for proper magnitude whether saturated
*/
return (opus_val32)maxlo + 1;
}

#undef SATURATE
static OPUS_INLINE int SATURATE(int x, int a)
{
if (__builtin_constant_p(a) && __builtin_popcount(a + 1) == 1) {
const int shift = __builtin_clz(a + 1);
int ret = __builtin_mips_shll_s_w(x, shift);
return ret >> shift;
}
return SATURATE_generic(x, a);
}

#undef SATURATE16
#define SATURATE16(x) EXTRACT16(SATURATE(x, 32767))

#undef SAT16
#define SAT16(x) EXTRACT16(SATURATE(x, 32767))

#undef SIG2WORD16
static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
{
x = PSHR32(x, SIG_SHIFT);
return SATURATE16(x);
}

#elif __mips == 32

#undef MULT16_32_Q16
#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)(SHL32((a), 16))*(b),32))

#endif

#endif /* CELT_FIXED_GENERIC_MIPSR1_H */
Loading
Loading