xiph · SiarheiVolkau · Aug 21, 2025 · Aug 22, 2025 · Aug 22, 2025 · Aug 22, 2025
diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h
@@ -102,7 +102,7 @@
 #if defined(OPUS_ARM_INLINE_EDSP)
 #include "arm/kiss_fft_armv5e.h"
 #endif
-#if defined(__mips_dsp) && __mips == 32
+#if defined(__mips)
 #include "mips/kiss_fft_mipsr1.h"
 #endif
 

diff --git a/celt/arch.h b/celt/arch.h
@@ -227,9 +227,10 @@ typedef opus_val16 celt_coef;
 #define ABS16(x) ((x) < 0 ? (-(x)) : (x))
 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))
 
-static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+static OPUS_INLINE opus_int16 SAT16_default(opus_int32 x) {
    return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
 }
+#define SAT16(x) SAT16_default(x)
 
 #ifdef FIXED_DEBUG
 #include "fixed_debug.h"

diff --git a/celt/celt.c b/celt/celt.c
@@ -54,7 +54,7 @@
 #define PACKAGE_VERSION "unknown"
 #endif
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips)
 #include "mips/celt_mipsr1.h"
 #endif
 

diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c
@@ -59,8 +59,15 @@ int          p
       for (i = 0; i < p; i++) {
          /* Sum up this iteration's reflection coefficient */
          opus_val32 rr = 0;
+#if defined (FIXED_POINT) && OPUS_FAST_INT64
+         opus_int64 acc = 0;
+         for (j = 0; j < i; j++)
+            acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]);
+         rr = (opus_val32)SHR(acc, 31);
+#else
          for (j = 0; j < i; j++)
             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+#endif
          rr += SHR32(ac[i + 1],6);
          r = -frac_div32(SHL32(rr,6), error);
          /*  Update LPC coefficients and total error */

diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h
@@ -129,7 +129,8 @@
 #define SHR(a,shift) ((a) >> (shift))
 #define SHL(a,shift) SHL32(a,shift)
 #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
-#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+#define SATURATE_generic(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+#define SATURATE(x,a) SATURATE_generic((x), (a))
 
 #define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
 
@@ -200,10 +201,6 @@
 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
 
-#if defined(__mips_dsp) && __mips == 32
-#include "mips/fixed_generic_mipsr1.h"
-#endif
-
 static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
 {
    x = PSHR32(x, SIG_SHIFT);
@@ -213,4 +210,8 @@ static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
 }
 #define SIG2WORD16(x) (SIG2WORD16_generic(x))
 
+#if defined(__mips)
+#include "mips/fixed_generic_mipsr1.h"
+#endif
+
 #endif
diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
@@ -535,6 +535,7 @@ void opus_fft_free(const kiss_fft_state *cfg, int arch)
 #endif /* CUSTOM_MODES */
 
 #ifdef FIXED_POINT
+#ifndef OVERRIDE_fft_downshift
 static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
    int shift;
    shift = IMIN(step, *total);
@@ -553,6 +554,7 @@ static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
       }
    }
 }
+#endif /* OVERRIDE_fft_downshift */
 #else
 #define fft_downshift(x, N, total, step)
 #endif

diff --git a/celt/mdct.c b/celt/mdct.c
@@ -53,7 +53,7 @@
 #include "mathops.h"
 #include "stack_alloc.h"
 
-#if defined(FIXED_POINT) && defined(__mips_dsp) && __mips == 32
+#if defined(FIXED_POINT) && defined(__mips) && __mips == 32
 #include "mips/mdct_mipsr1.h"
 #endif
 

diff --git a/celt/mips/celt_mipsr1.h b/celt/mips/celt_mipsr1.h
@@ -36,6 +36,16 @@
 
 #define CELT_C
 
+#if defined (__mips_dsp) && __mips == 32
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+#endif
+
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -53,8 +63,43 @@
 #include "celt_lpc.h"
 #include "vq.h"
 
-#define OVERRIDE_COMB_FILTER_CONST
-#define OVERRIDE_comb_filter
+#if defined (__mips_dsp) && __mips == 32
+
+#define MIPS_MULT __builtin_mips_mult
+#define MIPS_MADD __builtin_mips_madd
+#define MIPS_EXTR __builtin_mips_extr_w
+
+#elif defined(__mips_isa_rev) && __mips_isa_rev < 6
+
+static inline long long MIPS_MULT(int a, int b) {
+    long long acc;
+
+    asm volatile (
+            "mult %[a], %[b]  \n"
+        : [acc] "=x"(acc)
+        : [a] "r"(a), [b] "r"(b)
+        :
+    );
+    return acc;
+}
+
+static inline long long MIPS_MADD(long long acc, int a, int b) {
+    asm volatile (
+            "madd %[a], %[b]  \n"
+        : [acc] "+x"(acc)
+        : [a] "r"(a), [b] "r"(b)
+        :
+    );
+    return acc;
+}
+
+static inline opus_val32 MIPS_EXTR(long long acc, int shift) {
+    return (opus_val32)(acc >> shift);
+}
+
+#endif
+
+#if defined (OVERRIDE_comb_filter)
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
       const opus_val16 *window, int overlap, int arch)
@@ -101,13 +146,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       f = MULT16_16_Q15(window[i],window[i]);
       x0= x[i-T1+2];
 
-      acc = __builtin_mips_mult((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
-      acc = __builtin_mips_madd(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
-      res = __builtin_mips_extr_w(acc, 15);
+      acc = MIPS_MULT((int)MULT16_16_Q15((Q15ONE-f),g00), (int)x[i-T0]);
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g01), (int)ADD32(x[i-T0-1],x[i-T0+1]));
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15((Q15ONE-f),g02), (int)ADD32(x[i-T0-2],x[i-T0+2]));
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g10), (int)x2);
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g11), (int)ADD32(x3,x1));
+      acc = MIPS_MADD(acc, (int)MULT16_16_Q15(f,g12), (int)ADD32(x4,x0));
+      res = MIPS_EXTR(acc, 15);
 
       y[i] = x[i] + res;
 
@@ -136,10 +181,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       long long acc;
       x0=x[i-T1+2];
 
-      acc = __builtin_mips_mult((int)g10, (int)x2);
-      acc = __builtin_mips_madd(acc, (int)g11, (int)ADD32(x3,x1));
-      acc = __builtin_mips_madd(acc, (int)g12, (int)ADD32(x4,x0));
-      res = __builtin_mips_extr_w(acc, 15);
+      acc = MIPS_MULT((int)g10, (int)x2);
+      acc = MIPS_MADD(acc, (int)g11, (int)ADD32(x3,x1));
+      acc = MIPS_MADD(acc, (int)g12, (int)ADD32(x4,x0));
+      res = MIPS_EXTR(acc, 15);
 
       y[i] = x[i] + res;
       x4=x3;
@@ -148,5 +193,6 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
       x1=x0;
    }
 }
+#endif /* OVERRIDE_comb_filter */
 
 #endif /* CELT_MIPSR1_H__ */
diff --git a/celt/mips/fixed_generic_mipsr1.h b/celt/mips/fixed_generic_mipsr1.h
@@ -33,34 +33,10 @@
 #ifndef CELT_FIXED_GENERIC_MIPSR1_H
 #define CELT_FIXED_GENERIC_MIPSR1_H
 
-#undef MULT16_32_Q15_ADD
-static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_madd(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
-
-#undef MULT16_32_Q15_SUB
-static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_msub(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
-
-#undef MULT16_16_Q15_ADD
-static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_madd(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
-
-#undef MULT16_16_Q15_SUB
-static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
-    long long acc = __builtin_mips_mult(a, b);
-    acc = __builtin_mips_msub(acc, c, d);
-    return __builtin_mips_extr_w(acc, 15);
-}
+#if defined (__mips_dsp) && __mips == 32
 
+typedef short v2i16 __attribute__((vector_size(4)));
+typedef char  v2i8  __attribute__((vector_size(4)));
 
 #undef MULT16_32_Q16
 static inline int MULT16_32_Q16(int a, int b)
@@ -103,4 +79,104 @@ static inline int MULT16_16_P15(int a, int b)
     return __builtin_mips_shra_r_w(r, 15);
 }
 
+#define OVERRIDE_CELT_MAXABS16
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   v2i16 v2max = (v2i16){ 0, 0 };
+   v2i16 x01, x23;
+   const v2i16 *x2;
+   opus_val16 maxlo, maxhi;
+   int loops;
+
+   if ((long)x & 2 && len > 0) {
+      v2max = (v2i16){ 0, ABS16(*x) };
+      x++;
+      len--;
+   }
+   x2 = __builtin_assume_aligned(x, 4);
+   loops = len / 4;
+
+   for (i = 0; i < loops; i++)
+   {
+       x01 = *x2++;
+       x23 = *x2++;
+       x01 = __builtin_mips_absq_s_ph(x01);
+       x23 = __builtin_mips_absq_s_ph(x23);
+       __builtin_mips_cmp_lt_ph(v2max, x01);
+       v2max = __builtin_mips_pick_ph(x01, v2max);
+       __builtin_mips_cmp_lt_ph(v2max, x23);
+       v2max = __builtin_mips_pick_ph(x23, v2max);
+   }
+
+   switch (len & 3) {
+   case 3:
+       x01 = __builtin_mips_absq_s_ph(*x2);
+       __builtin_mips_cmp_lt_ph(v2max, x01);
+       v2max = __builtin_mips_pick_ph(x01, v2max);
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       maxlo = MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));
+       break;
+   case 2:
+       x01 = __builtin_mips_absq_s_ph(*x2);
+       __builtin_mips_cmp_lt_ph(v2max, x01);
+       v2max = __builtin_mips_pick_ph(x01, v2max);
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       maxlo = MAX16(maxlo, maxhi);
+       break;
+   case 1:
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       return MAX16(MAX16(maxlo, maxhi), ABS16(x[len - 1]));
+       break;
+   case 0:
+       maxlo = EXTRACT16((opus_val32)v2max);
+       maxhi = EXTRACT16((opus_val32)v2max >> 16);
+       maxlo = MAX16(maxlo, maxhi);
+       break;
+   default:
+       __builtin_unreachable();
+   }
+   /* C version might return 0x8000, this one can't
+    * because abs is saturated here. Since result
+    * used only for determine dynamic range
+    * in ilog2-like context it's worth to add 1
+    * for proper magnitude whether saturated
+    */
+   return (opus_val32)maxlo + 1;
+}
+
+#undef SATURATE
+static OPUS_INLINE int SATURATE(int x, int a)
+{
+    if (__builtin_constant_p(a) && __builtin_popcount(a + 1) == 1) {
+        const int shift = __builtin_clz(a + 1);
+        int ret = __builtin_mips_shll_s_w(x, shift);
+        return ret >> shift;
+    }
+    return SATURATE_generic(x, a);
+}
+
+#undef SATURATE16
+#define SATURATE16(x) EXTRACT16(SATURATE(x, 32767))
+
+#undef SAT16
+#define SAT16(x) EXTRACT16(SATURATE(x, 32767))
+
+#undef SIG2WORD16
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
+{
+   x = PSHR32(x, SIG_SHIFT);
+   return SATURATE16(x);
+}
+
+#elif __mips == 32
+
+#undef MULT16_32_Q16
+#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)(SHL32((a), 16))*(b),32))
+
+#endif
+
 #endif /* CELT_FIXED_GENERIC_MIPSR1_H */