93 files changed, 26058 insertions, 0 deletions
diff --git a/libopus/celt/_kiss_fft_guts.h b/libopus/celt/_kiss_fft_guts.h
new file mode 100644
index 0000000..17392b3
--- /dev/null
+++ b/libopus/celt/_kiss_fft_guts.h
@@ -0,0 +1,182 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_GUTS_H
+#define KISS_FFT_GUTS_H
+
+#define MIN(a,b) ((a)<(b) ? (a):(b))
+#define MAX(a,b) ((a)>(b) ? (a):(b))
+
+/* kiss_fft.h
+   defines kiss_fft_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#include "kiss_fft.h"
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef FIXED_POINT
+#include "arch.h"
+
+
+#define SAMP_MAX 2147483647
+#define TWID_MAX 32767
+#define TRIG_UPSCALE 1
+
+#define SAMP_MIN -SAMP_MAX
+
+
+#   define S_MUL(a,b) MULT16_32_Q15(b, a)
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
+
+#   define C_MULC(m,a,b) \
+      do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
+
+#   define C_MULBYSCALAR( c, s ) \
+      do{ (c).r =  S_MUL( (c).r , s ) ;\
+          (c).i =  S_MUL( (c).i , s ) ; }while(0)
+
+#   define DIVSCALAR(x,k) \
+        (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )
+
+#   define C_FIXDIV(c,div) \
+        do {    DIVSCALAR( (c).r , div);  \
+                DIVSCALAR( (c).i  , div); }while (0)
+
+#define  C_ADD( res, a,b)\
+    do {(res).r=ADD32_ovflw((a).r,(b).r);  (res).i=ADD32_ovflw((a).i,(b).i); \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do {(res).r=SUB32_ovflw((a).r,(b).r);  (res).i=SUB32_ovflw((a).i,(b).i); \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do {(res).r = ADD32_ovflw((res).r, (a).r);  (res).i = ADD32_ovflw((res).i,(a).i);\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {(res).r = ADD32_ovflw((res).r,(a).r);  (res).i = SUB32_ovflw((res).i,(a).i); \
+    }while(0)
+
+#if defined(OPUS_ARM_INLINE_ASM)
+#include "arm/kiss_fft_armv4.h"
+#endif
+
+#if defined(OPUS_ARM_INLINE_EDSP)
+#include "arm/kiss_fft_armv5e.h"
+#endif
+#if defined(MIPSr1_ASM)
+#include "mips/kiss_fft_mipsr1.h"
+#endif
+
+#else  /* not FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#define C_MULC(m,a,b) \
+    do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
+        (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
+
+#define C_MUL4(m,a,b) C_MUL(m,a,b)
+
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#ifndef C_ADD
+#define  C_ADD( res, a,b)\
+    do { \
+            CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+            CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+            (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+            CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+            CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+            (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+            CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+            CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+            (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+            CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+            CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+            (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+#endif /* C_ADD defined */
+
+#ifdef FIXED_POINT
+/*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
+#  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
+#  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
+#else
+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*.5f)
+#endif
+
+#define  kf_cexp(x,phase) \
+        do{ \
+                (x)->r = KISS_FFT_COS(phase);\
+                (x)->i = KISS_FFT_SIN(phase);\
+        }while(0)
+
+#define  kf_cexp2(x,phase) \
+   do{ \
+      (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
+      (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
+}while(0)
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/libopus/celt/arch.h b/libopus/celt/arch.h
new file mode 100644
index 0000000..08b07db
--- /dev/null
+++ b/libopus/celt/arch.h
@@ -0,0 +1,288 @@
+/* Copyright (c) 2003-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file arch.h
+   @brief Various architecture definitions for CELT
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ARCH_H
+#define ARCH_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+# if !defined(__GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define __GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define __GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+#if OPUS_GNUC_PREREQ(3, 0)
+#define opus_likely(x)       (__builtin_expect(!!(x), 1))
+#define opus_unlikely(x)     (__builtin_expect(!!(x), 0))
+#else
+#define opus_likely(x)       (!!(x))
+#define opus_unlikely(x)     (!!(x))
+#endif
+
+#define CELT_SIG_SCALE 32768.f
+
+#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__);
+
+#if defined(ENABLE_ASSERTIONS) || defined(ENABLE_HARDENING)
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+void celt_fatal(const char *str, const char *file, int line);
+
+#if defined(CELT_C) && !defined(OVERRIDE_celt_fatal)
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+void celt_fatal(const char *str, const char *file, int line)
+{
+   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+   abort();
+}
+#endif
+
+#define celt_assert(cond) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond);}}
+#define celt_assert2(cond, message) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond "\n" message);}}
+#define MUST_SUCCEED(call) celt_assert((call) == OPUS_OK)
+#else
+#define celt_assert(cond)
+#define celt_assert2(cond, message)
+#define MUST_SUCCEED(call) do {if((call) != OPUS_OK) {RESTORE_STACK; return OPUS_INTERNAL_ERROR;} } while (0)
+#endif
+
+#if defined(ENABLE_ASSERTIONS)
+#define celt_sig_assert(cond) {if (!(cond)) {CELT_FATAL("signal assertion failed: " #cond);}}
+#else
+#define celt_sig_assert(cond)
+#endif
+
+#define IMUL32(a,b) ((a)*(b))
+
+#define MIN16(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 16-bit value.   */
+#define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
+#define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
+#define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
+#define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */
+#define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */
+#define UADD32(a,b) ((a)+(b))
+#define USUB32(a,b) ((a)-(b))
+
+/* Set this if opus_int64 is a native type of the CPU. */
+/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
+   (which can be ILP32 for x32) and Win64 (which is LLP64). */
+#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
+#define OPUS_FAST_INT64 1
+#else
+#define OPUS_FAST_INT64 0
+#endif
+
+#define PRINT_MIPS(file)
+
+#ifdef FIXED_POINT
+
+typedef opus_int16 opus_val16;
+typedef opus_int32 opus_val32;
+typedef opus_int64 opus_val64;
+
+typedef opus_val32 celt_sig;
+typedef opus_val16 celt_norm;
+typedef opus_val32 celt_ener;
+
+#define celt_isnan(x) 0
+
+#define Q15ONE 32767
+
+#define SIG_SHIFT 12
+/* Safe saturation value for 32-bit signals. Should be less than
+   2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
+#define SIG_SAT (300000000)
+
+#define NORM_SCALING 16384
+
+#define DB_SHIFT 10
+
+#define EPSILON 1
+#define VERY_SMALL 0
+#define VERY_LARGE16 ((opus_val16)32767)
+#define Q15_ONE ((opus_val16)32767)
+
+#define SCALEIN(a)      (a)
+#define SCALEOUT(a)     (a)
+
+#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
+#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
+
+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
+}
+
+#ifdef FIXED_DEBUG
+#include "fixed_debug.h"
+#else
+
+#include "fixed_generic.h"
+
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/fixed_arm64.h"
+#elif defined (OPUS_ARM_INLINE_EDSP)
+#include "arm/fixed_armv5e.h"
+#elif defined (OPUS_ARM_INLINE_ASM)
+#include "arm/fixed_armv4.h"
+#elif defined (BFIN_ASM)
+#include "fixed_bfin.h"
+#elif defined (TI_C5X_ASM)
+#include "fixed_c5x.h"
+#elif defined (TI_C6X_ASM)
+#include "fixed_c6x.h"
+#endif
+
+#endif
+
+#else /* FIXED_POINT */
+
+typedef float opus_val16;
+typedef float opus_val32;
+typedef float opus_val64;
+
+typedef float celt_sig;
+typedef float celt_norm;
+typedef float celt_ener;
+
+#ifdef FLOAT_APPROX
+/* This code should reliably detect NaN/inf even when -ffast-math is used.
+   Assumes IEEE 754 format. */
+static OPUS_INLINE int celt_isnan(float x)
+{
+   union {float f; opus_uint32 i;} in;
+   in.f = x;
+   return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
+}
+#else
+#ifdef __FAST_MATH__
+#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
+#endif
+#define celt_isnan(x) ((x)!=(x))
+#endif
+
+#define Q15ONE 1.0f
+
+#define NORM_SCALING 1.f
+
+#define EPSILON 1e-15f
+#define VERY_SMALL 1e-30f
+#define VERY_LARGE16 1e15f
+#define Q15_ONE ((opus_val16)1.f)
+
+/* This appears to be the same speed as C99's fabsf() but it's more portable. */
+#define ABS16(x) ((float)fabs(x))
+#define ABS32(x) ((float)fabs(x))
+
+#define QCONST16(x,bits) (x)
+#define QCONST32(x,bits) (x)
+
+#define NEG16(x) (-(x))
+#define NEG32(x) (-(x))
+#define NEG32_ovflw(x) (-(x))
+#define EXTRACT16(x) (x)
+#define EXTEND32(x) (x)
+#define SHR16(a,shift) (a)
+#define SHL16(a,shift) (a)
+#define SHR32(a,shift) (a)
+#define SHL32(a,shift) (a)
+#define PSHR32(a,shift) (a)
+#define VSHR32(a,shift) (a)
+
+#define PSHR(a,shift)   (a)
+#define SHR(a,shift)    (a)
+#define SHL(a,shift)    (a)
+#define SATURATE(x,a)   (x)
+#define SATURATE16(x)   (x)
+
+#define ROUND16(a,shift)  (a)
+#define SROUND16(a,shift) (a)
+#define HALF16(x)       (.5f*(x))
+#define HALF32(x)       (.5f*(x))
+
+#define ADD16(a,b) ((a)+(b))
+#define SUB16(a,b) ((a)-(b))
+#define ADD32(a,b) ((a)+(b))
+#define SUB32(a,b) ((a)-(b))
+#define ADD32_ovflw(a,b) ((a)+(b))
+#define SUB32_ovflw(a,b) ((a)-(b))
+#define MULT16_16_16(a,b)     ((a)*(b))
+#define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))
+#define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))
+
+#define MULT16_32_Q15(a,b)     ((a)*(b))
+#define MULT16_32_Q16(a,b)     ((a)*(b))
+
+#define MULT32_32_Q31(a,b)     ((a)*(b))
+
+#define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
+#define MAC16_32_Q16(c,a,b)     ((c)+(a)*(b))
+
+#define MULT16_16_Q11_32(a,b)     ((a)*(b))
+#define MULT16_16_Q11(a,b)     ((a)*(b))
+#define MULT16_16_Q13(a,b)     ((a)*(b))
+#define MULT16_16_Q14(a,b)     ((a)*(b))
+#define MULT16_16_Q15(a,b)     ((a)*(b))
+#define MULT16_16_P15(a,b)     ((a)*(b))
+#define MULT16_16_P13(a,b)     ((a)*(b))
+#define MULT16_16_P14(a,b)     ((a)*(b))
+#define MULT16_32_P16(a,b)     ((a)*(b))
+
+#define DIV32_16(a,b)     (((opus_val32)(a))/(opus_val16)(b))
+#define DIV32(a,b)     (((opus_val32)(a))/(opus_val32)(b))
+
+#define SCALEIN(a)      ((a)*CELT_SIG_SCALE)
+#define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))
+
+#define SIG2WORD16(x) (x)
+
+#endif /* !FIXED_POINT */
+
+#ifndef GLOBAL_STACK_SIZE
+#ifdef FIXED_POINT
+#define GLOBAL_STACK_SIZE 120000
+#else
+#define GLOBAL_STACK_SIZE 120000
+#endif
+#endif
+
+#endif /* ARCH_H */
diff --git a/libopus/celt/arm/arm2gnu.pl b/libopus/celt/arm/arm2gnu.pl
new file mode 100755
index 0000000..a2895f7
--- /dev/null
+++ b/libopus/celt/arm/arm2gnu.pl
@@ -0,0 +1,353 @@
+#!/usr/bin/perl
+# Copyright (C) 2002-2013 Xiph.org Foundation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# - Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+my $bigend;  # little/big endian
+my $nxstack;
+my $apple = 0;
+my $symprefix = "";
+
+$nxstack = 0;
+
+eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
+    if $running_under_some_shell;
+
+while ($ARGV[0] =~ /^-/) {
+    $_ = shift;
+  last if /^--$/;
+    if (/^-n$/) {
+    $nflag++;
+    next;
+    }
+    if (/^--apple$/) {
+        $apple = 1;
+        $symprefix = "_";
+        next;
+    }
+    die "I don't recognize this switch: $_\\n";
+}
+$printit++ unless $nflag;
+
+$\ = "\n";      # automatically add newline on print
+$n=0;
+
+$thumb = 0;     # ARM mode by default, not Thumb.
+@proc_stack = ();
+
+printf ("    .syntax unified\n");
+
+LINE:
+while (<>) {
+
+    # For ADRLs we need to add a new line after the substituted one.
+    $addPadding = 0;
+
+    # First, we do not dare to touch *anything* inside double quotes, do we?
+    # Second, if you want a dollar character in the string,
+    # insert two of them -- that's how ARM C and assembler treat strings.
+    s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1:   .ascii \"/   && do { s/\$\$/\$/g; next };
+    s/\bDCB\b[ \t]*\"/.ascii \"/                          && do { s/\$\$/\$/g; next };
+    s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/                    && do { s/\$\$/\$/g; next };
+    # If there's nothing on a line but a comment, don't try to apply any further
+    #  substitutions (this is a cheap hack to avoid mucking up the license header)
+    s/^([ \t]*);/$1@/                                     && do { s/\$\$/\$/g; next };
+    # If substituted -- leave immediately !
+
+    s/@/,:/;
+    s/;/@/;
+    while ( /@.*'/ ) {
+      s/(@.*)'/$1/g;
+    }
+    s/\{FALSE\}/0/g;
+    s/\{TRUE\}/1/g;
+    s/\{(\w\w\w\w+)\}/$1/g;
+    s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
+    s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
+    s/\bIMPORT\b/.extern/;
+    s/\bEXPORT\b\s*/.global $symprefix/;
+    s/^(\s+)\[/$1IF/;
+    s/^(\s+)\|/$1ELSE/;
+    s/^(\s+)\]/$1ENDIF/;
+    s/IF *:DEF:/ .ifdef/;
+    s/IF *:LNOT: *:DEF:/ .ifndef/;
+    s/ELSE/ .else/;
+    s/ENDIF/ .endif/;
+
+    if( /\bIF\b/ ) {
+      s/\bIF\b/ .if/;
+      s/=/==/;
+    }
+    if ( $n == 2) {
+        s/\$/\\/g;
+    }
+    if ($n == 1) {
+        s/\$//g;
+        s/label//g;
+    $n = 2;
+      }
+    if ( /MACRO/ ) {
+      s/MACRO *\n/.macro/;
+      $n=1;
+    }
+    if ( /\bMEND\b/ ) {
+      s/\bMEND\b/.endm/;
+      $n=0;
+    }
+
+    # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there.
+    #
+    if ( /\bAREA\b/ ) {
+        my $align;
+        $align = "2";
+        if ( /ALIGN=(\d+)/ ) {
+            $align = $1;
+        }
+        if ( /CODE/ ) {
+            $nxstack = 1;
+        }
+        s/^(.+)CODE(.+)READONLY(.*)/    .text/;
+        s/^(.+)DATA(.+)READONLY(.*)/    .section .rdata/;
+        s/^(.+)\|\|\.data\|\|(.+)/    .data/;
+        s/^(.+)\|\|\.bss\|\|(.+)/    .bss/;
+        s/$/;   .p2align $align/;
+        # Enable NEON instructions but don't produce a binary that requires
+        # ARMv7. RVCT does not have equivalent directives, so we just do this
+        # for all CODE areas.
+        if ( /.text/ ) {
+            # Separating .arch, .fpu, etc., by semicolons does not work (gas
+            # thinks the semicolon is part of the arch name, even when there's
+            # whitespace separating them). Sadly this means our line numbers
+            # won't match the original source file (we could use the .line
+            # directive, which is documented to be obsolete, but then gdb will
+            # show the wrong line in the translated source file).
+            s/$/;   .arch armv7-a\n   .fpu neon\n   .object_arch armv4t/ unless ($apple);
+        }
+    }
+
+    s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/;       # ||.constdata$3||
+    s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/;               # ||.bss$2||
+    s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/;             # ||.data$2||
+    s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/;
+    s/^(\s+)\%(\s)/    .space $1/;
+
+    s/\|(.+)\.(\d+)\|/\.$1_$2/;                     # |L80.123| -> .L80_123
+    s/\bCODE32\b/.code 32/ && do {$thumb = 0};
+    s/\bCODE16\b/.code 16/ && do {$thumb = 1};
+    if (/\bPROC\b/)
+    {
+        my $prefix;
+        my $proc;
+        /^([A-Za-z_\.]\w+)\b/;
+        $proc = $1;
+        $prefix = "";
+        if ($proc)
+        {
+            $prefix = $prefix.sprintf("\t.type\t%s, %%function", $proc) unless ($apple);
+            # Make sure we $prefix isn't empty here (for the $apple case).
+            # We handle mangling the label here, make sure it doesn't match
+            # the label handling below (if $prefix would be empty).
+            $prefix = $prefix."; ";
+            push(@proc_stack, $proc);
+            s/^[A-Za-z_\.]\w+/$symprefix$&:/;
+        }
+        $prefix = $prefix."\t.thumb_func; " if ($thumb);
+        s/\bPROC\b/@ $&/;
+        $_ = $prefix.$_;
+    }
+    s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/;
+    s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/;
+    if (/\bENDP\b/)
+    {
+        my $proc;
+        s/\bENDP\b/@ $&/;
+        $proc = pop(@proc_stack);
+        $_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple);
+    }
+    s/\bSUBT\b/@ $&/;
+    s/\bDATA\b/@ $&/;   # DATA directive is deprecated -- Asm guide, p.7-25
+    s/\bKEEP\b/@ $&/;
+    s/\bEXPORTAS\b/@ $&/;
+    s/\|\|(.)+\bEQU\b/@ $&/;
+    s/\|\|([\w\$]+)\|\|/$1/;
+    s/\bENTRY\b/@ $&/;
+    s/\bASSERT\b/@ $&/;
+    s/\bGBLL\b/@ $&/;
+    s/\bGBLA\b/@ $&/;
+    s/^\W+OPT\b/@ $&/;
+    s/:OR:/|/g;
+    s/:SHL:/<</g;
+    s/:SHR:/>>/g;
+    s/:AND:/&/g;
+    s/:LAND:/&&/g;
+    s/CPSR/cpsr/;
+    s/SPSR/spsr/;
+    s/ALIGN$/.balign 4/;
+    s/ALIGN\s+([0-9x]+)$/.balign $1/;
+    s/psr_cxsf/psr_all/;
+    s/LTORG/.ltorg/;
+    s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/;
+
+    #  {PC} + 0xdeadfeed  -->  . + 0xdeadfeed
+    s/\{PC\} \+/ \. +/;
+
+    # Single hex constant on the line !
+    #
+    # >>> NOTE <<<
+    #   Double-precision floats in gcc are always mixed-endian, which means
+    #   bytes in two words are little-endian, but words are big-endian.
+    #   So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address
+    #   and 0xfeed0000 at high address.
+    #
+    s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/;
+    # Only decimal constants on the line, no hex !
+    s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/;
+
+    # Single hex constant on the line !
+#    s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/;
+    # Only decimal constants on the line, no hex !
+#    s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/;
+    s/\bDCFS[ \t]+0x/.word 0x/;
+    s/\bDCFS\b/.float/;
+
+    s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/;
+    s/\bDCD\b/.word/;
+    s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/;
+    s/\bDCW\b/.short/;
+    s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/;
+    s/\bDCB\b/.byte/;
+    s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/;
+    s/^[A-Za-z_\.]\w+/$&:/;
+    s/^(\d+)/$1:/;
+    s/\%(\d+)/$1b_or_f/;
+    s/\%[Bb](\d+)/$1b/;
+    s/\%[Ff](\d+)/$1f/;
+    s/\%[Ff][Tt](\d+)/$1f/;
+    s/&([\dA-Fa-f]+)/0x$1/;
+    if ( /\b2_[01]+\b/ ) {
+      s/\b2_([01]+)\b/conv$1&&&&/g;
+      while ( /[01][01][01][01]&&&&/ ) {
+        s/0000&&&&/&&&&0/g;
+        s/0001&&&&/&&&&1/g;
+        s/0010&&&&/&&&&2/g;
+        s/0011&&&&/&&&&3/g;
+        s/0100&&&&/&&&&4/g;
+        s/0101&&&&/&&&&5/g;
+        s/0110&&&&/&&&&6/g;
+        s/0111&&&&/&&&&7/g;
+        s/1000&&&&/&&&&8/g;
+        s/1001&&&&/&&&&9/g;
+        s/1010&&&&/&&&&A/g;
+        s/1011&&&&/&&&&B/g;
+        s/1100&&&&/&&&&C/g;
+        s/1101&&&&/&&&&D/g;
+        s/1110&&&&/&&&&E/g;
+        s/1111&&&&/&&&&F/g;
+      }
+      s/000&&&&/&&&&0/g;
+      s/001&&&&/&&&&1/g;
+      s/010&&&&/&&&&2/g;
+      s/011&&&&/&&&&3/g;
+      s/100&&&&/&&&&4/g;
+      s/101&&&&/&&&&5/g;
+      s/110&&&&/&&&&6/g;
+      s/111&&&&/&&&&7/g;
+      s/00&&&&/&&&&0/g;
+      s/01&&&&/&&&&1/g;
+      s/10&&&&/&&&&2/g;
+      s/11&&&&/&&&&3/g;
+      s/0&&&&/&&&&0/g;
+      s/1&&&&/&&&&1/g;
+      s/conv&&&&/0x/g;
+    }
+
+    if ( /commandline/)
+    {
+        if( /-bigend/)
+        {
+            $bigend=1;
+        }
+    }
+
+    if ( /\bDCDU\b/ )
+    {
+        my $cmd=$_;
+        my $value;
+        my $prefix;
+        my $w1;
+        my $w2;
+        my $w3;
+        my $w4;
+
+        s/\s+DCDU\b/@ $&/;
+
+        $cmd =~ /\bDCDU\b\s+0x(\d+)/;
+        $value = $1;
+        $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/;
+        $w1 = $1;
+        $w2 = $2;
+        $w3 = $3;
+        $w4 = $4;
+
+        if( $bigend ne "")
+        {
+            # big endian
+            $prefix = "\t.byte\t0x".$w1.";".
+                      "\t.byte\t0x".$w2.";".
+                      "\t.byte\t0x".$w3.";".
+                      "\t.byte\t0x".$w4."; ";
+        }
+        else
+        {
+            # little endian
+            $prefix = "\t.byte\t0x".$w4.";".
+                      "\t.byte\t0x".$w3.";".
+                      "\t.byte\t0x".$w2.";".
+                      "\t.byte\t0x".$w1."; ";
+        }
+        $_=$prefix.$_;
+    }
+
+    if ( /\badrl\b/i )
+    {
+        s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i;
+        $addPadding = 1;
+    }
+    s/\bEND\b/@ END/;
+} continue {
+    printf ("%s", $_) if $printit;
+    if ($addPadding != 0)
+    {
+        printf ("   mov r0,r0\n");
+        $addPadding = 0;
+    }
+}
+#If we had a code section, mark that this object doesn't need an executable
+# stack.
+if ($nxstack && !$apple) {
+    printf ("    .section\t.note.GNU-stack,\"\",\%\%progbits\n");
+}
diff --git a/libopus/celt/arm/arm_celt_map.c b/libopus/celt/arm/arm_celt_map.c
new file mode 100644
index 0000000..ca988b6
--- /dev/null
+++ b/libopus/celt/arm/arm_celt_map.c
@@ -0,0 +1,160 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pitch.h"
+#include "kiss_fft.h"
+#include "mdct.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
+opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = {
+  celt_inner_prod_c,   /* ARMv4 */
+  celt_inner_prod_c,   /* EDSP */
+  celt_inner_prod_c,   /* Media */
+  celt_inner_prod_neon /* NEON */
+};
+
+void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2) = {
+  dual_inner_prod_c,   /* ARMv4 */
+  dual_inner_prod_c,   /* EDSP */
+  dual_inner_prod_c,   /* Media */
+  dual_inner_prod_neon /* NEON */
+};
+# endif
+
+# if defined(FIXED_POINT)
+#  if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+    (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+    (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
+opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int, int, int) = {
+  celt_pitch_xcorr_c,               /* ARMv4 */
+  MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
+  MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+};
+
+#  endif
+# else /* !FIXED_POINT */
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
+void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int, int, int) = {
+  celt_pitch_xcorr_c,              /* ARMv4 */
+  celt_pitch_xcorr_c,              /* EDSP */
+  celt_pitch_xcorr_c,              /* Media */
+  celt_pitch_xcorr_float_neon      /* Neon */
+};
+#  endif
+# endif /* FIXED_POINT */
+
+#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
+ defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* ARMv4 */
+  xcorr_kernel_c,                /* EDSP */
+  xcorr_kernel_c,                /* Media */
+  xcorr_kernel_neon_fixed,       /* Neon */
+};
+
+#endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#  if defined(HAVE_ARM_NE10)
+#   if defined(CUSTOM_MODES)
+int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+   opus_fft_alloc_arch_c,        /* ARMv4 */
+   opus_fft_alloc_arch_c,        /* EDSP */
+   opus_fft_alloc_arch_c,        /* Media */
+   opus_fft_alloc_arm_neon       /* Neon with NE10 library support */
+};
+
+void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+   opus_fft_free_arch_c,         /* ARMv4 */
+   opus_fft_free_arch_c,         /* EDSP */
+   opus_fft_free_arch_c,         /* Media */
+   opus_fft_free_arm_neon        /* Neon with NE10 */
+};
+#   endif /* CUSTOM_MODES */
+
+void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+                                        const kiss_fft_cpx *fin,
+                                        kiss_fft_cpx *fout) = {
+   opus_fft_c,                   /* ARMv4 */
+   opus_fft_c,                   /* EDSP */
+   opus_fft_c,                   /* Media */
+   opus_fft_neon                 /* Neon with NE10 */
+};
+
+void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+                                         const kiss_fft_cpx *fin,
+                                         kiss_fft_cpx *fout) = {
+   opus_ifft_c,                   /* ARMv4 */
+   opus_ifft_c,                   /* EDSP */
+   opus_ifft_c,                   /* Media */
+   opus_ifft_neon                 /* Neon with NE10 */
+};
+
+void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
+                                                     kiss_fft_scalar *in,
+                                                     kiss_fft_scalar * OPUS_RESTRICT out,
+                                                     const opus_val16 *window,
+                                                     int overlap, int shift,
+                                                     int stride, int arch) = {
+   clt_mdct_forward_c,           /* ARMv4 */
+   clt_mdct_forward_c,           /* EDSP */
+   clt_mdct_forward_c,           /* Media */
+   clt_mdct_forward_neon         /* Neon with NE10 */
+};
+
+void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
+                                                      kiss_fft_scalar *in,
+                                                      kiss_fft_scalar * OPUS_RESTRICT out,
+                                                      const opus_val16 *window,
+                                                      int overlap, int shift,
+                                                      int stride, int arch) = {
+   clt_mdct_backward_c,           /* ARMv4 */
+   clt_mdct_backward_c,           /* EDSP */
+   clt_mdct_backward_c,           /* Media */
+   clt_mdct_backward_neon         /* Neon with NE10 */
+};
+
+#  endif /* HAVE_ARM_NE10 */
+# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
+
+#endif /* OPUS_HAVE_RTCD */
diff --git a/libopus/celt/arm/armcpu.c b/libopus/celt/arm/armcpu.c
new file mode 100644
index 0000000..694a63b
--- /dev/null
+++ b/libopus/celt/arm/armcpu.c
@@ -0,0 +1,185 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef OPUS_HAVE_RTCD
+
+#include "armcpu.h"
+#include "cpu_support.h"
+#include "os_support.h"
+#include "opus_types.h"
+#include "arch.h"
+
+#define OPUS_CPU_ARM_V4_FLAG    (1<<OPUS_ARCH_ARM_V4)
+#define OPUS_CPU_ARM_EDSP_FLAG  (1<<OPUS_ARCH_ARM_EDSP)
+#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
+#define OPUS_CPU_ARM_NEON_FLAG  (1<<OPUS_ARCH_ARM_NEON)
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
+  opus_uint32 flags;
+  flags=0;
+  /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
+   * instructions via their assembled hex code.
+   * All of these instructions should be essentially nops. */
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+  __try{
+    /*PLD [r13]*/
+    __emit(0xF5DDF000);
+    flags|=OPUS_CPU_ARM_EDSP_FLAG;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+  __try{
+    /*SHADD8 r3,r3,r3*/
+    __emit(0xE6333F93);
+    flags|=OPUS_CPU_ARM_MEDIA_FLAG;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+  __try{
+    /*VORR q0,q0,q0*/
+    __emit(0xF2200150);
+    flags|=OPUS_CPU_ARM_NEON_FLAG;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   endif
+#  endif
+# endif
+  return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capabilities(void)
+{
+  opus_uint32 flags = 0;
+  FILE *cpuinfo;
+
+  /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+   * Android */
+  cpuinfo = fopen("/proc/cpuinfo", "r");
+
+  if(cpuinfo != NULL)
+  {
+    /* 512 should be enough for anybody (it's even enough for all the flags that
+     * x86 has accumulated... so far). */
+    char buf[512];
+
+    while(fgets(buf, 512, cpuinfo) != NULL)
+    {
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+      /* Search for edsp and neon flag */
+      if(memcmp(buf, "Features", 8) == 0)
+      {
+        char *p;
+        p = strstr(buf, " edsp");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_EDSP_FLAG;
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+        p = strstr(buf, " neon");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_NEON_FLAG;
+#  endif
+      }
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+      /* Search for media capabilities (>= ARMv6) */
+      if(memcmp(buf, "CPU architecture:", 17) == 0)
+      {
+        int version;
+        version = atoi(buf+17);
+
+        if(version >= 6)
+          flags |= OPUS_CPU_ARM_MEDIA_FLAG;
+      }
+# endif
+    }
+
+    fclose(cpuinfo);
+  }
+  return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+   "your platform.  Reconfigure with --disable-rtcd (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+  opus_uint32 flags = opus_cpu_capabilities();
+  int arch = 0;
+
+  if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
+    /* Asserts ensure arch values are sequential */
+    celt_assert(arch == OPUS_ARCH_ARM_V4);
+    return arch;
+  }
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_EDSP);
+    return arch;
+  }
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
+    return arch;
+  }
+  arch++;
+
+  celt_assert(arch == OPUS_ARCH_ARM_NEON);
+  return arch;
+}
+
+#endif
diff --git a/libopus/celt/arm/armcpu.h b/libopus/celt/arm/armcpu.h
new file mode 100644
index 0000000..820262f
--- /dev/null
+++ b/libopus/celt/arm/armcpu.h
@@ -0,0 +1,77 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(ARMCPU_H)
+# define ARMCPU_H
+
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+#  define MAY_HAVE_EDSP(name) name ## _edsp
+# else
+#  define MAY_HAVE_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  define MAY_HAVE_MEDIA(name) name ## _media
+# else
+#  define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+#  define MAY_HAVE_NEON(name) name ## _neon
+# else
+#  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_EDSP)
+#  define PRESUME_EDSP(name) name ## _edsp
+# else
+#  define PRESUME_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_PRESUME_MEDIA)
+#  define PRESUME_MEDIA(name) name ## _media
+# else
+#  define PRESUME_MEDIA(name) PRESUME_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_NEON)
+#  define PRESUME_NEON(name) name ## _neon
+# else
+#  define PRESUME_NEON(name) PRESUME_MEDIA(name)
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
+int opus_select_arch(void);
+
+#define OPUS_ARCH_ARM_V4    (0)
+#define OPUS_ARCH_ARM_EDSP  (1)
+#define OPUS_ARCH_ARM_MEDIA (2)
+#define OPUS_ARCH_ARM_NEON  (3)
+
+# endif
+
+#endif
diff --git a/libopus/celt/arm/armopts.s.in b/libopus/celt/arm/armopts.s.in
new file mode 100644
index 0000000..3d8aaf2
--- /dev/null
+++ b/libopus/celt/arm/armopts.s.in
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+;  (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP  * @OPUS_ARM_MAY_HAVE_EDSP@
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON  * @OPUS_ARM_MAY_HAVE_NEON@
+
+END
diff --git a/libopus/celt/arm/celt_fft_ne10.c b/libopus/celt/arm/celt_fft_ne10.c
new file mode 100644
index 0000000..ea5fd78
--- /dev/null
+++ b/libopus/celt/arm/celt_fft_ne10.c
@@ -0,0 +1,173 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_fft_ne10.c
+   @brief ARM Neon optimizations for fft using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include <NE10_dsp.h>
+#include "os_support.h"
+#include "kiss_fft.h"
+#include "stack_alloc.h"
+
+#if !defined(FIXED_POINT)
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
+#else
+# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
+# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
+# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
+# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
+# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
+#endif
+
+#if defined(CUSTOM_MODES)
+
+/* nfft lengths in NE10 that support scaled fft */
+# define NE10_FFTSCALED_SUPPORT_MAX 4
+static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
+   480, 240, 120, 60
+};
+
+int opus_fft_alloc_arm_neon(kiss_fft_state *st)
+{
+   int i;
+   size_t memneeded = sizeof(struct arch_fft_state);
+
+   st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
+   if (!st->arch_fft)
+      return -1;
+
+   for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
+      if(st->nfft == ne10_fft_scaled_support[i])
+         break;
+   }
+   if (i == NE10_FFTSCALED_SUPPORT_MAX) {
+      /* This nfft length (scaled fft) is not supported in NE10 */
+      st->arch_fft->is_supported = 0;
+      st->arch_fft->priv = NULL;
+   }
+   else {
+      st->arch_fft->is_supported = 1;
+      st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
+      if (st->arch_fft->priv == NULL) {
+         return -1;
+      }
+   }
+   return 0;
+}
+
+void opus_fft_free_arm_neon(kiss_fft_state *st)
+{
+   NE10_FFT_CFG_TYPE_T cfg;
+
+   if (!st->arch_fft)
+      return;
+
+   cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
+   if (cfg)
+      NE10_FFT_DESTROY_C2C_TYPE(cfg);
+   opus_free(st->arch_fft);
+}
+#endif
+
+void opus_fft_neon(const kiss_fft_state *st,
+                   const kiss_fft_cpx *fin,
+                   kiss_fft_cpx *fout)
+{
+   NE10_FFT_STATE_TYPE_T state;
+   NE10_FFT_CFG_TYPE_T cfg = &state;
+   VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+   SAVE_STACK;
+   ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+   if (!st->arch_fft->is_supported) {
+      /* This nfft length (scaled fft) not supported in NE10 */
+      opus_fft_c(st, fin, fout);
+   }
+   else {
+      memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+      state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+      state.is_forward_scaled = 1;
+
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 0);
+#else
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 0, 1);
+#endif
+   }
+   RESTORE_STACK;
+}
+
+void opus_ifft_neon(const kiss_fft_state *st,
+                    const kiss_fft_cpx *fin,
+                    kiss_fft_cpx *fout)
+{
+   NE10_FFT_STATE_TYPE_T state;
+   NE10_FFT_CFG_TYPE_T cfg = &state;
+   VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
+   SAVE_STACK;
+   ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
+
+   if (!st->arch_fft->is_supported) {
+      /* This nfft length (scaled fft) not supported in NE10 */
+      opus_ifft_c(st, fin, fout);
+   }
+   else {
+      memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
+      state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
+#if !defined(FIXED_POINT)
+      state.is_backward_scaled = 0;
+
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 1);
+#else
+      NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
+                                (NE10_FFT_CPX_TYPE_T *)fin,
+                                cfg, 1, 0);
+#endif
+   }
+   RESTORE_STACK;
+}
diff --git a/libopus/celt/arm/celt_mdct_ne10.c b/libopus/celt/arm/celt_mdct_ne10.c
new file mode 100644
index 0000000..3531d02
--- /dev/null
+++ b/libopus/celt/arm/celt_mdct_ne10.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_mdct_ne10.c
+   @brief ARM Neon optimizations for mdct using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include "mdct.h"
+#include "stack_alloc.h"
+
+void clt_mdct_forward_neon(const mdct_lookup *l,
+                           kiss_fft_scalar *in,
+                           kiss_fft_scalar * OPUS_RESTRICT out,
+                           const opus_val16 *window,
+                           int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+
+   SAVE_STACK;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
+         yc.r = yr;
+         yc.i = yi;
+         f2[i] = yc;
+      }
+   }
+
+   opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+void clt_mdct_backward_neon(const mdct_lookup *l,
+                            kiss_fft_scalar *in,
+                            kiss_fft_scalar * OPUS_RESTRICT out,
+                            const opus_val16 * OPUS_RESTRICT window,
+                            int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   const kiss_twiddle_scalar *trig;
+   const kiss_fft_state *st = l->kfft[shift];
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+         yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+         yp[2*i] = yr;
+         yp[2*i+1] = yi;
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         re = yp1[0];
+         im = yp1[1];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}
diff --git a/libopus/celt/arm/celt_neon_intr.c b/libopus/celt/arm/celt_neon_intr.c
new file mode 100644
index 0000000..effda76
--- /dev/null
+++ b/libopus/celt/arm/celt_neon_intr.c
@@ -0,0 +1,211 @@
+/* Copyright (c) 2014-2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_neon_intr.c
+   @brief ARM Neon Intrinsic optimizations for celt
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <arm_neon.h>
+#include "../pitch.h"
+
+#if defined(FIXED_POINT)
+void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   int32x4_t a = vld1q_s32(sum);
+   /* Load y[0...3] */
+   /* This requires len>0 to always be valid (which we assert in the C code). */
+   int16x4_t y0 = vld1_s16(y);
+   y += 4;
+
+   for (j = 0; j + 8 <= len; j += 8)
+   {
+      /* Load x[0...7] */
+      int16x8_t xx = vld1q_s16(x);
+      int16x4_t x0 = vget_low_s16(xx);
+      int16x4_t x4 = vget_high_s16(xx);
+      /* Load y[4...11] */
+      int16x8_t yy = vld1q_s16(y);
+      int16x4_t y4 = vget_low_s16(yy);
+      int16x4_t y8 = vget_high_s16(yy);
+      int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
+      int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
+
+      int16x4_t y1 = vext_s16(y0, y4, 1);
+      int16x4_t y5 = vext_s16(y4, y8, 1);
+      int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
+      int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
+
+      int16x4_t y2 = vext_s16(y0, y4, 2);
+      int16x4_t y6 = vext_s16(y4, y8, 2);
+      int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
+      int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
+
+      int16x4_t y3 = vext_s16(y0, y4, 3);
+      int16x4_t y7 = vext_s16(y4, y8, 3);
+      int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
+      int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
+
+      y0 = y8;
+      a = a7;
+      x += 8;
+      y += 8;
+   }
+
+   for (; j < len; j++)
+   {
+      int16x4_t x0 = vld1_dup_s16(x);  /* load next x */
+      int32x4_t a0 = vmlal_s16(a, y0, x0);
+
+      int16x4_t y4 = vld1_dup_s16(y);  /* load next y */
+      y0 = vext_s16(y0, y4, 1);
+      a = a0;
+      x++;
+      y++;
+   }
+
+   vst1q_s32(sum, a);
+}
+
+#else
+/*
+ * Function: xcorr_kernel_neon_float
+ * ---------------------------------
+ * Computes 4 correlation values and stores them in sum[4]
+ */
+static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
+      float32_t sum[4], int len) {
+   float32x4_t YY[3];
+   float32x4_t YEXT[3];
+   float32x4_t XX[2];
+   float32x2_t XX_2;
+   float32x4_t SUMM;
+   const float32_t *xi = x;
+   const float32_t *yi = y;
+
+   celt_assert(len>0);
+
+   YY[0] = vld1q_f32(yi);
+   SUMM = vdupq_n_f32(0);
+
+   /* Consume 8 elements in x vector and 12 elements in y
+    * vector. However, the 12'th element never really gets
+    * touched in this loop. So, if len == 8, then we only
+    * must access y[0] to y[10]. y[11] must not be accessed
+    * hence make sure len > 8 and not len >= 8
+    */
+   while (len > 8) {
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+      yi += 4;
+      YY[2] = vld1q_f32(yi);
+
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+      XX[1] = vld1q_f32(xi);
+      xi += 4;
+
+      SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
+      YEXT[0] = vextq_f32(YY[0], YY[1], 1);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
+      YEXT[1] = vextq_f32(YY[0], YY[1], 2);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
+      YEXT[2] = vextq_f32(YY[0], YY[1], 3);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
+
+      SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0);
+      YEXT[0] = vextq_f32(YY[1], YY[2], 1);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1);
+      YEXT[1] = vextq_f32(YY[1], YY[2], 2);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0);
+      YEXT[2] = vextq_f32(YY[1], YY[2], 3);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1);
+
+      YY[0] = YY[2];
+      len -= 8;
+   }
+
+   /* Consume 4 elements in x vector and 8 elements in y
+    * vector. However, the 8'th element in y never really gets
+    * touched in this loop. So, if len == 4, then we only
+    * must access y[0] to y[6]. y[7] must not be accessed
+    * hence make sure len>4 and not len>=4
+    */
+   if (len > 4) {
+      yi += 4;
+      YY[1] = vld1q_f32(yi);
+
+      XX[0] = vld1q_f32(xi);
+      xi += 4;
+
+      SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
+      YEXT[0] = vextq_f32(YY[0], YY[1], 1);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
+      YEXT[1] = vextq_f32(YY[0], YY[1], 2);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
+      YEXT[2] = vextq_f32(YY[0], YY[1], 3);
+      SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
+
+      YY[0] = YY[1];
+      len -= 4;
+   }
+
+   while (--len > 0) {
+      XX_2 = vld1_dup_f32(xi++);
+      SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
+      YY[0]= vld1q_f32(++yi);
+   }
+
+   XX_2 = vld1_dup_f32(xi);
+   SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
+
+   vst1q_f32(sum, SUMM);
+}
+
+void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
+                        opus_val32 *xcorr, int len, int max_pitch, int arch) {
+   int i;
+   (void)arch;
+   celt_assert(max_pitch > 0);
+   celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+
+   for (i = 0; i < (max_pitch-3); i += 4) {
+      xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
+            (float32_t *)xcorr+i, len);
+   }
+
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (; i < max_pitch; i++) {
+      xcorr[i] = celt_inner_prod_neon(_x, _y+i, len);
+   }
+}
+#endif
diff --git a/libopus/celt/arm/celt_pitch_xcorr_arm-gnu.S b/libopus/celt/arm/celt_pitch_xcorr_arm-gnu.S
new file mode 100644
index 0000000..10668e5
--- /dev/null
+++ b/libopus/celt/arm/celt_pitch_xcorr_arm-gnu.S
@@ -0,0 +1,555 @@
+    .syntax unified
+@ Copyright (c) 2007-2008 CSIRO
+@ Copyright (c) 2007-2009 Xiph.Org Foundation
+@ Copyright (c) 2013      Parrot
+@ Written by Aurélien Zanelli
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions
+@ are met:
+@
+@ - Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@
+@ - Redistributions in binary form must reproduce the above copyright
+@ notice, this list of conditions and the following disclaimer in the
+@ documentation and/or other materials provided with the distribution.
+@
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    .text;   .p2align 2;   .arch armv7-a
+   .fpu neon
+   .object_arch armv4t
+
+  .include "celt/arm/armopts-gnu.S"
+
+ .if OPUS_ARM_MAY_HAVE_EDSP
+  .global celt_pitch_xcorr_edsp
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_NEON
+  .global celt_pitch_xcorr_neon
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_NEON
+
+@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+	.type	xcorr_kernel_neon, %function; xcorr_kernel_neon: @ PROC
+xcorr_kernel_neon_start:
+  @ input:
+  @   r3     = int         len
+  @   r4     = opus_val16 *x
+  @   r5     = opus_val16 *y
+  @   q0     = opus_val32  sum[4]
+  @ output:
+  @   q0     = opus_val32  sum[4]
+  @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  @ internal usage:
+  @   r12 = int j
+  @   d3  = y_3|y_2|y_1|y_0
+  @   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  @   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  @   q8  = scratch
+  @
+  @ Load y[0...3]
+  @ This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+@ Process 8 samples at a time.
+@ This loop loads one y value more than we actually need. Therefore we have to
+@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+@ reading past the end of the array.
+xcorr_kernel_neon_process8:
+  @ This loop has 19 total instructions (10 cycles to issue, minimum), with
+  @ - 2 cycles of ARM insrtuctions,
+  @ - 10 cycles of load/store/byte permute instructions, and
+  @ - 9 cycles of data processing instructions.
+  @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  @ latter two categories, meaning the whole loop should run in 10 cycles per
+  @ iteration, barring cache misses.
+  @
+  @ Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  @ Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+@ Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4:
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  @ Load x[0...3]
+  VLD1.16      d6, [r4]!
+  @ Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  @ Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+@ Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2:
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  @ Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  @ Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  @ Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  @ instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+@ Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1:
+  @ Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  @ y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+@ Now process 1 last sample, not reading ahead.
+  @ Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  @ Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+	.size xcorr_kernel_neon, .-xcorr_kernel_neon  @ ENDP
+
+@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+@  opus_val32 *xcorr, int len, int max_pitch, int arch)
+	.type	celt_pitch_xcorr_neon, %function; celt_pitch_xcorr_neon: @ PROC
+  @ input:
+  @   r0  = opus_val16 *_x
+  @   r1  = opus_val16 *_y
+  @   r2  = opus_val32 *xcorr
+  @   r3  = int         len
+  @ output:
+  @   r0  = int         maxcorr
+  @ internal usage:
+  @   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  @   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  @   r6  = int         max_pitch
+  @   r12 = int         j
+  @   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  @ ignored:
+  @         int         arch
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4:
+  @ xcorr_kernel_neon parameters:
+  @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  @ So we don't save/restore any other registers.
+  BL xcorr_kernel_neon_start
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  @ _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+@ We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done:
+  ADDS         r6, r6, #4
+  @ Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+@ Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining:
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+@ Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8:
+  @ Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  @ Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+@ Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4:
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  @ Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  @ Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done:
+  @ Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+@ Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1:
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done:
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  @ _y++
+  ADD          r1, r1, #2
+  @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done:
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+	.size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon  @ ENDP
+
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_EDSP
+
+@ This will get used on ARMv7 devices without NEON, so it has been optimized
+@ to take advantage of dual-issuing where possible.
+	.type	xcorr_kernel_edsp, %function; xcorr_kernel_edsp: @ PROC
+xcorr_kernel_edsp_start:
+  @ input:
+  @   r3      = int         len
+  @   r4      = opus_val16 *_x (must be 32-bit aligned)
+  @   r5      = opus_val16 *_y (must be 32-bit aligned)
+  @   r6...r9 = opus_val32  sum[4]
+  @ output:
+  @   r6...r9 = opus_val32  sum[4]
+  @ preserved: r0-r5
+  @ internal usage
+  @   r2      = int         j
+  @   r12,r14 = opus_val16  x[4]
+  @   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      @ Load y[0...1]
+  SUBS         r2, r3, #4         @ j = len-4
+  LDR          r11, [r5], #4      @ Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      @ Load x[0...1]
+  @ Stall
+xcorr_kernel_edsp_process4:
+  @ The multiplies must issue from pipeline 0, and can't dual-issue with each
+  @ other. Every other instruction here dual-issues with a multiply, and is
+  @ thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   @ sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      @ Load x[2...3]
+  SMLABT       r7, r12, r10, r7   @ sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         @ j-=4
+  SMLABB       r8, r12, r11, r8   @ sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   @ sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   @ sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      @ Load y[4...5]
+  SMLATB       r7, r12, r11, r7   @ sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   @ sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   @ sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      @ Load x[0...1]
+  SMLABB       r6, r14, r11, r6   @ sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   @ sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   @ sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   @ sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   @ sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      @ Load y[6...7]
+  SMLATB       r7, r14, r10, r7   @ sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   @ sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   @ sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done:
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      @ r12 = *x++
+  SUBS         r2, r2, #1         @ j--
+  @ Stall
+  SMLABB       r6, r12, r10, r6   @ sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRHGT       r14, [r4], #2      @ r14 = *x++
+  SMLABT       r7, r12, r10, r7   @ sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   @ sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   @ sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   @ sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         @ j--
+  SMLABB       r7, r14, r11, r7   @ sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      @ r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   @ sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRHGT       r12, [r4], #2      @ r12 = *x++
+  SMLABB       r9, r14, r10, r9   @ sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   @ sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             @ j--
+  SMLABT       r7, r12, r11, r7   @ sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       @ r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   @ sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRHGT       r14, [r4]          @ r14 = *x
+  SMLABB       r9, r12, r2, r9    @ sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   @ sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          @ r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   @ sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    @ sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   @ sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done:
+  LDMFD        sp!, {r2,r4,r5,pc}
+	.size xcorr_kernel_edsp, .-xcorr_kernel_edsp  @ ENDP
+
+	.type	celt_pitch_xcorr_edsp, %function; celt_pitch_xcorr_edsp: @ PROC
+  @ input:
+  @   r0  = opus_val16 *_x (must be 32-bit aligned)
+  @   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  @   r2  = opus_val32 *xcorr
+  @   r3  = int         len
+  @ output:
+  @   r0  = maxcorr
+  @ internal usage
+  @   r4  = opus_val16 *x
+  @   r5  = opus_val16 *y
+  @   r6  = opus_val32  sum0
+  @   r7  = opus_val32  sum1
+  @   r8  = opus_val32  sum2
+  @   r9  = opus_val32  sum3
+  @   r1  = int         max_pitch
+  @   r12 = int         j
+  @ ignored:
+  @         int         arch
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  @ maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+@ Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  @ r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4:
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     @ sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     @ sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     @ sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         @ j-=4
+  SMLATB       r14, r7, r8, r14     @ sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done:
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1:
+  LDRHGE       r6, [r4], #2
+  @ Stall
+  SMLABBGE     r14, r6, r8, r14    @ sum = MAC16_16(sum, *x, *y)
+  SUBSGE       r12, r12, #1
+  LDRHGT       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  @ Restore _x
+  SUB          r4, r4, r3, LSL #1
+  @ Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  @ maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  @ xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done:
+  @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4:
+  @ xcorr_kernel_edsp parameters:
+  @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp_start  @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  @ _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2:
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  @ {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4:
+  SMLABB       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     @ sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         @ j-=4
+  SMLATT       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     @ sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     @ sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     @ sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     @ sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     @ sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done:
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  @ Stall
+  SMLABB       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     @ sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     @ sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1:
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  @ Stall
+  SMLABB       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRHGT       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     @ sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     @ sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done:
+  @ Restore _x
+  SUB          r4, r4, r3, LSL #1
+  @ Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  @ maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  @ maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  @ xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a:
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  @ r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4:
+  SMLABB       r14, r6, r8, r14     @ sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         @ j-=4
+  SMLATT       r14, r6, r8, r14     @ sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     @ sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     @ sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done:
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  @ Stall
+  SMLABBGE     r14, r6, r8, r14     @ sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     @ sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRHGE       r6, [r4], #2
+  LDRHGE       r8, [r5], #2
+  @ Stall
+  SMLABBGE     r14, r6, r8, r14     @ sum = MAC16_16(sum, *x, *y)
+  @ maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  @ xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done:
+  LDMFD        sp!, {r4-r11, pc}
+	.size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp  @ ENDP
+
+ .endif
+
+@ END:
+    .section	.note.GNU-stack,"",%progbits
diff --git a/libopus/celt/arm/celt_pitch_xcorr_arm.s b/libopus/celt/arm/celt_pitch_xcorr_arm.s
new file mode 100644
index 0000000..6e873af
--- /dev/null
+++ b/libopus/celt/arm/celt_pitch_xcorr_arm.s
@@ -0,0 +1,551 @@
+; Copyright (c) 2007-2008 CSIRO
+; Copyright (c) 2007-2009 Xiph.Org Foundation
+; Copyright (c) 2013      Parrot
+; Written by Aurélien Zanelli
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  AREA  |.text|, CODE, READONLY
+
+  GET    celt/arm/armopts.s
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+  EXPORT celt_pitch_xcorr_edsp
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+  EXPORT celt_pitch_xcorr_neon
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+
+; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+xcorr_kernel_neon PROC
+xcorr_kernel_neon_start
+  ; input:
+  ;   r3     = int         len
+  ;   r4     = opus_val16 *x
+  ;   r5     = opus_val16 *y
+  ;   q0     = opus_val32  sum[4]
+  ; output:
+  ;   q0     = opus_val32  sum[4]
+  ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  ; internal usage:
+  ;   r12 = int j
+  ;   d3  = y_3|y_2|y_1|y_0
+  ;   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  ;   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  ;   q8  = scratch
+  ;
+  ; Load y[0...3]
+  ; This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+; Process 8 samples at a time.
+; This loop loads one y value more than we actually need. Therefore we have to
+; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+; reading past the end of the array.
+xcorr_kernel_neon_process8
+  ; This loop has 19 total instructions (10 cycles to issue, minimum), with
+  ; - 2 cycles of ARM insrtuctions,
+  ; - 10 cycles of load/store/byte permute instructions, and
+  ; - 9 cycles of data processing instructions.
+  ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  ; latter two categories, meaning the whole loop should run in 10 cycles per
+  ; iteration, barring cache misses.
+  ;
+  ; Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  ; Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+; Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  ; Load x[0...3]
+  VLD1.16      d6, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  ; Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+; Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  ; Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  ; Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  ; instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+; Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1
+  ; Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  ; y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+; Now process 1 last sample, not reading ahead.
+  ; Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  ; Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+  ENDP
+
+; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+;  opus_val32 *xcorr, int len, int max_pitch, int arch)
+celt_pitch_xcorr_neon PROC
+  ; input:
+  ;   r0  = opus_val16 *_x
+  ;   r1  = opus_val16 *_y
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = int         maxcorr
+  ; internal usage:
+  ;   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  ;   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  ;   r6  = int         max_pitch
+  ;   r12 = int         j
+  ;   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  ; ignored:
+  ;         int         arch
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4
+  ; xcorr_kernel_neon parameters:
+  ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  ; So we don't save/restore any other registers.
+  BL xcorr_kernel_neon_start
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  ; _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+; We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done
+  ADDS         r6, r6, #4
+  ; Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+; Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+; Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8
+  ; Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  ; Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+; Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  ; Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  ; Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done
+  ; Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+; Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  ; _y++
+  ADD          r1, r1, #2
+  ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+  ENDP
+
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+
+; This will get used on ARMv7 devices without NEON, so it has been optimized
+; to take advantage of dual-issuing where possible.
+xcorr_kernel_edsp PROC
+xcorr_kernel_edsp_start
+  ; input:
+  ;   r3      = int         len
+  ;   r4      = opus_val16 *_x (must be 32-bit aligned)
+  ;   r5      = opus_val16 *_y (must be 32-bit aligned)
+  ;   r6...r9 = opus_val32  sum[4]
+  ; output:
+  ;   r6...r9 = opus_val32  sum[4]
+  ; preserved: r0-r5
+  ; internal usage
+  ;   r2      = int         j
+  ;   r12,r14 = opus_val16  x[4]
+  ;   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      ; Load y[0...1]
+  SUBS         r2, r3, #4         ; j = len-4
+  LDR          r11, [r5], #4      ; Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      ; Load x[0...1]
+  ; Stall
+xcorr_kernel_edsp_process4
+  ; The multiplies must issue from pipeline 0, and can't dual-issue with each
+  ; other. Every other instruction here dual-issues with a multiply, and is
+  ; thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      ; Load x[2...3]
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         ; j-=4
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      ; Load y[4...5]
+  SMLATB       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   ; sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      ; Load x[0...1]
+  SMLABB       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      ; Load y[6...7]
+  SMLATB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      ; r12 = *x++
+  SUBS         r2, r2, #1         ; j--
+  ; Stall
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRHGT       r14, [r4], #2      ; r14 = *x++
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         ; j--
+  SMLABB       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      ; r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRHGT       r12, [r4], #2      ; r12 = *x++
+  SMLABB       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             ; j--
+  SMLABT       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       ; r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   ; sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRHGT       r14, [r4]          ; r14 = *x
+  SMLABB       r9, r12, r2, r9    ; sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          ; r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    ; sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done
+  LDMFD        sp!, {r2,r4,r5,pc}
+  ENDP
+
+celt_pitch_xcorr_edsp PROC
+  ; input:
+  ;   r0  = opus_val16 *_x (must be 32-bit aligned)
+  ;   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = maxcorr
+  ; internal usage
+  ;   r4  = opus_val16 *x
+  ;   r5  = opus_val16 *y
+  ;   r6  = opus_val32  sum0
+  ;   r7  = opus_val32  sum1
+  ;   r8  = opus_val32  sum2
+  ;   r9  = opus_val32  sum3
+  ;   r1  = int         max_pitch
+  ;   r12 = int         j
+  ; ignored:
+  ;         int         arch
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  ; maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+; Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATB       r14, r7, r8, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1
+  LDRHGE       r6, [r4], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14    ; sum = MAC16_16(sum, *x, *y)
+  SUBSGE       r12, r12, #1
+  LDRHGT       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4
+  ; xcorr_kernel_edsp parameters:
+  ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp_start  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  ; _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  ; {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     ; sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRHGT       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  ; maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  ; xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4
+  SMLABB       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRHGE       r6, [r4], #2
+  LDRHGE       r8, [r5], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done
+  LDMFD        sp!, {r4-r11, pc}
+  ENDP
+
+ENDIF
+
+END
diff --git a/libopus/celt/arm/fft_arm.h b/libopus/celt/arm/fft_arm.h
new file mode 100644
index 0000000..0b78175
--- /dev/null
+++ b/libopus/celt/arm/fft_arm.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file fft_arm.h
+   @brief ARM Neon Intrinsic optimizations for fft using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if !defined(FFT_ARM_H)
+#define FFT_ARM_H
+
+#include "kiss_fft.h"
+
+#if defined(HAVE_ARM_NE10)
+
+int opus_fft_alloc_arm_neon(kiss_fft_state *st);
+void opus_fft_free_arm_neon(kiss_fft_state *st);
+
+void opus_fft_neon(const kiss_fft_state *st,
+                   const kiss_fft_cpx *fin,
+                   kiss_fft_cpx *fout);
+
+void opus_ifft_neon(const kiss_fft_state *st,
+                    const kiss_fft_cpx *fin,
+                    kiss_fft_cpx *fout);
+
+#if !defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OPUS_FFT (1)
+
+#define opus_fft_alloc_arch(_st, arch) \
+   ((void)(arch), opus_fft_alloc_arm_neon(_st))
+
+#define opus_fft_free_arch(_st, arch) \
+   ((void)(arch), opus_fft_free_arm_neon(_st))
+
+#define opus_fft(_st, _fin, _fout, arch) \
+   ((void)(arch), opus_fft_neon(_st, _fin, _fout))
+
+#define opus_ifft(_st, _fin, _fout, arch) \
+   ((void)(arch), opus_ifft_neon(_st, _fin, _fout))
+
+#endif /* OPUS_HAVE_RTCD */
+
+#endif /* HAVE_ARM_NE10 */
+
+#endif
diff --git a/libopus/celt/arm/fixed_arm64.h b/libopus/celt/arm/fixed_arm64.h
new file mode 100644
index 0000000..c6fbd3d
--- /dev/null
+++ b/libopus/celt/arm/fixed_arm64.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2015 Vidyo */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARM64_H
+#define FIXED_ARM64_H
+
+#include <arm_neon.h>
+
+#undef SIG2WORD16
+#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
+
+#endif
diff --git a/libopus/celt/arm/fixed_armv4.h b/libopus/celt/arm/fixed_armv4.h
new file mode 100644
index 0000000..d84888a
--- /dev/null
+++ b/libopus/celt/arm/fixed_armv4.h
@@ -0,0 +1,80 @@
+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv4_H
+#define FIXED_ARMv4_H
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b),"r"(SHL32(a,16))
+  );
+  return rd_hi;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b), "r"(SHL32(a,16))
+  );
+  /*We intentionally don't OR in the high bit of rd_lo for speed.*/
+  return SHL32(rd_hi,1);
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q16
+#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+
+#endif
diff --git a/libopus/celt/arm/fixed_armv5e.h b/libopus/celt/arm/fixed_armv5e.h
new file mode 100644
index 0000000..6bf73cb
--- /dev/null
+++ b/libopus/celt/arm/fixed_armv5e.h
@@ -0,0 +1,151 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO
+   Copyright (C) 2013      Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv5E_H
+#define FIXED_ARMv5E_H
+
+#include "fixed_armv4.h"
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b),"r"(a)
+  );
+  return res;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(a)
+  );
+  return SHL32(res,1);
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q15\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(SHL32(b,1)), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q16
+static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q16\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(b), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#undef MAC16_16
+static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_16\n\t"
+      "smlabb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b), "r"(c)
+  );
+  return res;
+}
+#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
+
+/** 16x16 multiplication where the result fits in 32 bits */
+#undef MULT16_16
+static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_16\n\t"
+      "smulbb %0, %1, %2;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+
+#ifdef OPUS_ARM_INLINE_MEDIA
+
+#undef SIG2WORD16
+static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
+{
+   celt_sig res;
+   __asm__(
+       "#SIG2WORD16\n\t"
+       "ssat %0, #16, %1, ASR #12\n\t"
+       : "=r"(res)
+       : "r"(x+2048)
+   );
+   return EXTRACT16(res);
+}
+#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
+
+#endif /* OPUS_ARM_INLINE_MEDIA */
+
+#endif
diff --git a/libopus/celt/arm/kiss_fft_armv4.h b/libopus/celt/arm/kiss_fft_armv4.h
new file mode 100644
index 0000000..e4faad6
--- /dev/null
+++ b/libopus/celt/arm/kiss_fft_armv4.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv4_H
+#define KISS_FFT_ARMv4_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL4\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #17\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
+            "mov %[br], %[br], lsr #17\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #15\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MULC\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mr], r0, %[br]\n\t" \
+            "smlal %[tt], %[mr], r1, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mi], r1, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mi], r0, %[bi]\n\t" \
+            "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mi], %[br], %[mi], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#endif /* FIXED_POINT */
+
+#endif /* KISS_FFT_ARMv4_H */
diff --git a/libopus/celt/arm/kiss_fft_armv5e.h b/libopus/celt/arm/kiss_fft_armv5e.h
new file mode 100644
index 0000000..9eca183
--- /dev/null
+++ b/libopus/celt/arm/kiss_fft_armv5e.h
@@ -0,0 +1,118 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv5E_H
+#define KISS_FFT_ARMv5E_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#if defined(__thumb__)||defined(__thumb2__)
+#define LDRD_CONS "Q"
+#else
+#define LDRD_CONS "Uq"
+#endif
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHL32(mi__, 1); \
+    } \
+    while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL4\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHR32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHR32(mi__, 1); \
+    } \
+    while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+        int mr__; \
+        int mi1__; \
+        int mi2__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MULC\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mr], %[aval], %[bval]\n\t" \
+            "smulwb %[mi1], %H[aval], %[bval]\n\t" \
+            "smulwt %[mi2], %[aval], %[bval]\n\t" \
+            "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \
+            : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(mr__, 1); \
+        (m).i = SHL32(SUB32(mi1__, mi2__), 1); \
+    } \
+    while(0)
+
+#endif /* FIXED_POINT */
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/libopus/celt/arm/mdct_arm.h b/libopus/celt/arm/mdct_arm.h
new file mode 100644
index 0000000..14200ba
--- /dev/null
+++ b/libopus/celt/arm/mdct_arm.h
@@ -0,0 +1,59 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file arm_mdct.h
+   @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(MDCT_ARM_H)
+#define MDCT_ARM_H
+
+#include "mdct.h"
+
+#if defined(HAVE_ARM_NE10)
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
+                           kiss_fft_scalar * OPUS_RESTRICT out,
+                           const opus_val16 *window, int overlap,
+                           int shift, int stride, int arch);
+
+void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
+                            kiss_fft_scalar * OPUS_RESTRICT out,
+                            const opus_val16 *window, int overlap,
+                            int shift, int stride, int arch);
+
+#if !defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OPUS_MDCT (1)
+#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
+      clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
+#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
+      clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
+#endif /* OPUS_HAVE_RTCD */
+#endif /* HAVE_ARM_NE10 */
+
+#endif
diff --git a/libopus/celt/arm/pitch_arm.h b/libopus/celt/arm/pitch_arm.h
new file mode 100644
index 0000000..bed8b04
--- /dev/null
+++ b/libopus/celt/arm/pitch_arm.h
@@ -0,0 +1,160 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(PITCH_ARM_H)
+# define PITCH_ARM_H
+
+# include "armcpu.h"
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N);
+void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01,
+        const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
+
+#  if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
+#   define OVERRIDE_CELT_INNER_PROD (1)
+#   define OVERRIDE_DUAL_INNER_PROD (1)
+#   define celt_inner_prod(x, y, N, arch) ((void)(arch), PRESUME_NEON(celt_inner_prod)(x, y, N))
+#   define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), PRESUME_NEON(dual_inner_prod)(x, y01, y02, N, xy1, xy2))
+#  endif
+# endif
+
+# if !defined(OVERRIDE_CELT_INNER_PROD)
+#  if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N);
+#   define OVERRIDE_CELT_INNER_PROD (1)
+#   define celt_inner_prod(x, y, N, arch) ((*CELT_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y, N))
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_CELT_INNER_PROD (1)
+#   define celt_inner_prod(x, y, N, arch) ((void)(arch), celt_inner_prod_neon(x, y, N))
+#  endif
+# endif
+
+# if !defined(OVERRIDE_DUAL_INNER_PROD)
+#  if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x,
+        const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
+#   define OVERRIDE_DUAL_INNER_PROD (1)
+#   define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((*DUAL_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_DUAL_INNER_PROD (1)
+#   define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), dual_inner_prod_neon(x, y01, y02, N, xy1, xy2))
+#  endif
+# endif
+
+# if defined(FIXED_POINT)
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch, int arch);
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch, int arch);
+#  endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+     (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+     (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
+extern opus_val32
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int, int);
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch, arch))
+
+#  elif defined(OPUS_ARM_PRESUME_EDSP) || \
+    defined(OPUS_ARM_PRESUME_MEDIA) || \
+    defined(OPUS_ARM_PRESUME_NEON)
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr))
+
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void xcorr_kernel_neon_fixed(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#  endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+
+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+     ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+      ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
+
+#  endif
+
+#else /* Start !FIXED_POINT */
+/* Float case */
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
+                                 opus_val32 *xcorr, int len, int max_pitch, int arch);
+#endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern void
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int, int);
+
+#  define OVERRIDE_PITCH_XCORR (1)
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch, arch))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr celt_pitch_xcorr_float_neon
+
+#  endif
+
+#endif /* end !FIXED_POINT */
+
+#endif
diff --git a/libopus/celt/arm/pitch_neon_intr.c b/libopus/celt/arm/pitch_neon_intr.c
new file mode 100644
index 0000000..1ac38c4
--- /dev/null
+++ b/libopus/celt/arm/pitch_neon_intr.c
@@ -0,0 +1,290 @@
+/***********************************************************************
+Copyright (c) 2017 Google Inc.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <arm_neon.h>
+#include "pitch.h"
+
+#ifdef FIXED_POINT
+
+opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
+{
+    int i;
+    opus_val32 xy;
+    int16x8_t x_s16x8, y_s16x8;
+    int32x4_t xy_s32x4 = vdupq_n_s32(0);
+    int64x2_t xy_s64x2;
+    int64x1_t xy_s64x1;
+
+    for (i = 0; i < N - 7; i += 8) {
+        x_s16x8  = vld1q_s16(&x[i]);
+        y_s16x8  = vld1q_s16(&y[i]);
+        xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8));
+        xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8));
+    }
+
+    if (N - i >= 4) {
+        const int16x4_t x_s16x4 = vld1_s16(&x[i]);
+        const int16x4_t y_s16x4 = vld1_s16(&y[i]);
+        xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4);
+        i += 4;
+    }
+
+    xy_s64x2 = vpaddlq_s32(xy_s32x4);
+    xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2));
+    xy       = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0);
+
+    for (; i < N; i++) {
+        xy = MAC16_16(xy, x[i], y[i]);
+    }
+
+#ifdef OPUS_CHECK_ASM
+    celt_assert(celt_inner_prod_c(x, y, N) == xy);
+#endif
+
+    return xy;
+}
+
+void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+        int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+    int i;
+    opus_val32 xy01, xy02;
+    int16x8_t x_s16x8, y01_s16x8, y02_s16x8;
+    int32x4_t xy01_s32x4 = vdupq_n_s32(0);
+    int32x4_t xy02_s32x4 = vdupq_n_s32(0);
+    int64x2_t xy01_s64x2, xy02_s64x2;
+    int64x1_t xy01_s64x1, xy02_s64x1;
+
+    for (i = 0; i < N - 7; i += 8) {
+        x_s16x8    = vld1q_s16(&x[i]);
+        y01_s16x8  = vld1q_s16(&y01[i]);
+        y02_s16x8  = vld1q_s16(&y02[i]);
+        xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8));
+        xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8));
+        xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8));
+        xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8));
+    }
+
+    if (N - i >= 4) {
+        const int16x4_t x_s16x4   = vld1_s16(&x[i]);
+        const int16x4_t y01_s16x4 = vld1_s16(&y01[i]);
+        const int16x4_t y02_s16x4 = vld1_s16(&y02[i]);
+        xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4);
+        xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4);
+        i += 4;
+    }
+
+    xy01_s64x2 = vpaddlq_s32(xy01_s32x4);
+    xy02_s64x2 = vpaddlq_s32(xy02_s32x4);
+    xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2));
+    xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2));
+    xy01       = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0);
+    xy02       = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0);
+
+    for (; i < N; i++) {
+        xy01 = MAC16_16(xy01, x[i], y01[i]);
+        xy02 = MAC16_16(xy02, x[i], y02[i]);
+    }
+    *xy1 = xy01;
+    *xy2 = xy02;
+
+#ifdef OPUS_CHECK_ASM
+    {
+        opus_val32 xy1_c, xy2_c;
+        dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c);
+        celt_assert(xy1_c == *xy1);
+        celt_assert(xy2_c == *xy2);
+    }
+#endif
+}
+
+#else /* !FIXED_POINT */
+
+/* ========================================================================== */
+
+#ifdef OPUS_CHECK_ASM
+
+/* This part of code simulates floating-point NEON operations. */
+
+/* celt_inner_prod_neon_float_c_simulation() simulates the floating-point   */
+/* operations of celt_inner_prod_neon(), and both functions should have bit */
+/* exact output.                                                            */
+static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N)
+{
+   int i;
+   opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0;
+   for (i = 0; i < N - 3; i += 4) {
+      xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]);
+      xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]);
+      xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]);
+      xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]);
+   }
+   xy0 += xy2;
+   xy1 += xy3;
+   xy = xy0 + xy1;
+   for (; i < N; i++) {
+      xy = MAC16_16(xy, x[i], y[i]);
+   }
+   return xy;
+}
+
+/* dual_inner_prod_neon_float_c_simulation() simulates the floating-point   */
+/* operations of dual_inner_prod_neon(), and both functions should have bit */
+/* exact output.                                                            */
+static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0;
+   for (i = 0; i < N - 3; i += 4) {
+      xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]);
+      xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]);
+      xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]);
+      xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]);
+      xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]);
+      xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]);
+      xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]);
+      xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]);
+   }
+   xy01_0 += xy01_2;
+   xy02_0 += xy02_2;
+   xy01_1 += xy01_3;
+   xy02_1 += xy02_3;
+   xy01 = xy01_0 + xy01_1;
+   xy02 = xy02_0 + xy02_1;
+   for (; i < N; i++) {
+      xy01 = MAC16_16(xy01, x[i], y01[i]);
+      xy02 = MAC16_16(xy02, x[i], y02[i]);
+   }
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+
+#endif /* OPUS_CHECK_ASM */
+
+/* ========================================================================== */
+
+opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
+{
+    int i;
+    opus_val32 xy;
+    float32x4_t xy_f32x4 = vdupq_n_f32(0);
+    float32x2_t xy_f32x2;
+
+    for (i = 0; i < N - 7; i += 8) {
+        float32x4_t x_f32x4, y_f32x4;
+        x_f32x4  = vld1q_f32(&x[i]);
+        y_f32x4  = vld1q_f32(&y[i]);
+        xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
+        x_f32x4  = vld1q_f32(&x[i + 4]);
+        y_f32x4  = vld1q_f32(&y[i + 4]);
+        xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
+    }
+
+    if (N - i >= 4) {
+        const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
+        const float32x4_t y_f32x4 = vld1q_f32(&y[i]);
+        xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
+        i += 4;
+    }
+
+    xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4));
+    xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2);
+    xy       = vget_lane_f32(xy_f32x2, 0);
+
+    for (; i < N; i++) {
+        xy = MAC16_16(xy, x[i], y[i]);
+    }
+
+#ifdef OPUS_CHECK_ASM
+    celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL);
+#endif
+
+    return xy;
+}
+
+void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+        int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+    int i;
+    opus_val32 xy01, xy02;
+    float32x4_t xy01_f32x4 = vdupq_n_f32(0);
+    float32x4_t xy02_f32x4 = vdupq_n_f32(0);
+    float32x2_t xy01_f32x2, xy02_f32x2;
+
+    for (i = 0; i < N - 7; i += 8) {
+        float32x4_t x_f32x4, y01_f32x4, y02_f32x4;
+        x_f32x4    = vld1q_f32(&x[i]);
+        y01_f32x4  = vld1q_f32(&y01[i]);
+        y02_f32x4  = vld1q_f32(&y02[i]);
+        xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
+        xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
+        x_f32x4    = vld1q_f32(&x[i + 4]);
+        y01_f32x4  = vld1q_f32(&y01[i + 4]);
+        y02_f32x4  = vld1q_f32(&y02[i + 4]);
+        xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
+        xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
+    }
+
+    if (N - i >= 4) {
+        const float32x4_t x_f32x4   = vld1q_f32(&x[i]);
+        const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]);
+        const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]);
+        xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
+        xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
+        i += 4;
+    }
+
+    xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4));
+    xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4));
+    xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2);
+    xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2);
+    xy01       = vget_lane_f32(xy01_f32x2, 0);
+    xy02       = vget_lane_f32(xy02_f32x2, 0);
+
+    for (; i < N; i++) {
+        xy01 = MAC16_16(xy01, x[i], y01[i]);
+        xy02 = MAC16_16(xy02, x[i], y02[i]);
+    }
+    *xy1 = xy01;
+    *xy2 = xy02;
+
+#ifdef OPUS_CHECK_ASM
+    {
+        opus_val32 xy1_c, xy2_c;
+        dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c);
+        celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL);
+        celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL);
+    }
+#endif
+}
+
+#endif /* FIXED_POINT */
diff --git a/libopus/celt/bands.c b/libopus/celt/bands.c
new file mode 100644
index 0000000..a9aa78d
--- /dev/null
+++ b/libopus/celt/bands.c
@@ -0,0 +1,1673 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2009 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include "bands.h"
+#include "modes.h"
+#include "vq.h"
+#include "cwrs.h"
+#include "stack_alloc.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "rate.h"
+#include "quant_bands.h"
+#include "pitch.h"
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+   int i;
+   for (i=0;i<N;i++)
+   {
+      if (val < thresholds[i])
+         break;
+   }
+   if (i>prev && val < thresholds[prev]+hysteresis[prev])
+      i=prev;
+   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+      i=prev;
+   return i;
+}
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed)
+{
+   return 1664525 * seed + 1013904223;
+}
+
+/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
+   with this approximation is important because it has an impact on the bit allocation */
+opus_int16 bitexact_cos(opus_int16 x)
+{
+   opus_int32 tmp;
+   opus_int16 x2;
+   tmp = (4096+((opus_int32)(x)*(x)))>>13;
+   celt_sig_assert(tmp<=32767);
+   x2 = tmp;
+   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
+   celt_sig_assert(x2<=32766);
+   return 1+x2;
+}
+
+int bitexact_log2tan(int isin,int icos)
+{
+   int lc;
+   int ls;
+   lc=EC_ILOG(icos);
+   ls=EC_ILOG(isin);
+   icos<<=15-lc;
+   isin<<=15-ls;
+   return (ls-lc)*(1<<11)
+         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
+         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
+}
+
+#ifdef FIXED_POINT
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   (void)arch;
+   N = m->shortMdctSize<<LM;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val32 maxval=0;
+         opus_val32 sum = 0;
+
+         maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
+         if (maxval > 0)
+         {
+            int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
+            j=eBands[i]<<LM;
+            if (shift>0)
+            {
+               do {
+                  sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
+                        EXTRACT16(SHR32(X[j+c*N],shift)));
+               } while (++j<eBands[i+1]<<LM);
+            } else {
+               do {
+                  sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
+                        EXTRACT16(SHL32(X[j+c*N],-shift)));
+               } while (++j<eBands[i+1]<<LM);
+            }
+            /* We're adding one here to ensure the normalized band isn't larger than unity norm */
+            bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
+         } else {
+            bandE[i+c*m->nbEBands] = EPSILON;
+         }
+         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+      }
+   } while (++c<C);
+   /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      i=0; do {
+         opus_val16 g;
+         int j,shift;
+         opus_val16 E;
+         shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
+         E = VSHR32(bandE[i+c*m->nbEBands], shift);
+         g = EXTRACT16(celt_rcp(SHL32(E,3)));
+         j=M*eBands[i]; do {
+            X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
+         } while (++j<M*eBands[i+1]);
+      } while (++i<end);
+   } while (++c<C);
+}
+
+#else /* FIXED_POINT */
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = m->shortMdctSize<<LM;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         opus_val32 sum;
+         sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
+         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
+         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+      }
+   } while (++c<C);
+   /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
+         for (j=M*eBands[i];j<M*eBands[i+1];j++)
+            X[j+c*N] = freq[j+c*N]*g;
+      }
+   } while (++c<C);
+}
+
+#endif /* FIXED_POINT */
+
+/* De-normalise the energy to produce the synthesis from the unit-energy bands */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
+      int end, int M, int downsample, int silence)
+{
+   int i, N;
+   int bound;
+   celt_sig * OPUS_RESTRICT f;
+   const celt_norm * OPUS_RESTRICT x;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   bound = M*eBands[end];
+   if (downsample!=1)
+      bound = IMIN(bound, N/downsample);
+   if (silence)
+   {
+      bound = 0;
+      start = end = 0;
+   }
+   f = freq;
+   x = X+M*eBands[start];
+   for (i=0;i<M*eBands[start];i++)
+      *f++ = 0;
+   for (i=start;i<end;i++)
+   {
+      int j, band_end;
+      opus_val16 g;
+      opus_val16 lg;
+#ifdef FIXED_POINT
+      int shift;
+#endif
+      j=M*eBands[i];
+      band_end = M*eBands[i+1];
+      lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
+#ifndef FIXED_POINT
+      g = celt_exp2(MIN32(32.f, lg));
+#else
+      /* Handle the integer part of the log energy */
+      shift = 16-(lg>>DB_SHIFT);
+      if (shift>31)
+      {
+         shift=0;
+         g=0;
+      } else {
+         /* Handle the fractional part. */
+         g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+      }
+      /* Handle extreme gains with negative shift. */
+      if (shift<0)
+      {
+         /* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
+            capping the gain here, which is equivalent to a cap of 18 on lg.
+            This shouldn't trigger unless the bitstream is already corrupted. */
+         if (shift <= -2)
+         {
+            g = 16384;
+            shift = -2;
+         }
+         do {
+            *f++ = SHL32(MULT16_16(*x++, g), -shift);
+         } while (++j<band_end);
+      } else
+#endif
+         /* Be careful of the fixed-point "else" just above when changing this code */
+         do {
+            *f++ = SHR32(MULT16_16(*x++, g), shift);
+         } while (++j<band_end);
+   }
+   celt_assert(start <= end);
+   OPUS_CLEAR(&freq[bound], N-bound);
+}
+
+/* This prevents energy collapse for transients with multiple short MDCTs */
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+      int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
+      const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
+{
+   int c, i, j, k;
+   for (i=start;i<end;i++)
+   {
+      int N0;
+      opus_val16 thresh, sqrt_1;
+      int depth;
+#ifdef FIXED_POINT
+      int shift;
+      opus_val32 thresh32;
+#endif
+
+      N0 = m->eBands[i+1]-m->eBands[i];
+      /* depth in 1/8 bits */
+      celt_sig_assert(pulses[i]>=0);
+      depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
+
+#ifdef FIXED_POINT
+      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
+      thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
+      {
+         opus_val32 t;
+         t = N0<<LM;
+         shift = celt_ilog2(t)>>1;
+         t = SHL32(t, (7-shift)<<1);
+         sqrt_1 = celt_rsqrt_norm(t);
+      }
+#else
+      thresh = .5f*celt_exp2(-.125f*depth);
+      sqrt_1 = celt_rsqrt(N0<<LM);
+#endif
+
+      c=0; do
+      {
+         celt_norm *X;
+         opus_val16 prev1;
+         opus_val16 prev2;
+         opus_val32 Ediff;
+         opus_val16 r;
+         int renormalize=0;
+         prev1 = prev1logE[c*m->nbEBands+i];
+         prev2 = prev2logE[c*m->nbEBands+i];
+         if (C==1)
+         {
+            prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
+            prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
+         }
+         Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
+         Ediff = MAX32(0, Ediff);
+
+#ifdef FIXED_POINT
+         if (Ediff < 16384)
+         {
+            opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
+            r = 2*MIN16(16383,r32);
+         } else {
+            r = 0;
+         }
+         if (LM==3)
+            r = MULT16_16_Q14(23170, MIN32(23169, r));
+         r = SHR16(MIN16(thresh, r),1);
+         r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
+#else
+         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
+            short blocks don't have the same energy as long */
+         r = 2.f*celt_exp2(-Ediff);
+         if (LM==3)
+            r *= 1.41421356f;
+         r = MIN16(thresh, r);
+         r = r*sqrt_1;
+#endif
+         X = X_+c*size+(m->eBands[i]<<LM);
+         for (k=0;k<1<<LM;k++)
+         {
+            /* Detect collapse */
+            if (!(collapse_masks[i*C+c]&1<<k))
+            {
+               /* Fill with noise */
+               for (j=0;j<N0;j++)
+               {
+                  seed = celt_lcg_rand(seed);
+                  X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
+               }
+               renormalize = 1;
+            }
+         }
+         /* We just added some energy, so we need to renormalise */
+         if (renormalize)
+            renormalise_vector(X, N0<<LM, Q15ONE, arch);
+      } while (++c<C);
+   }
+}
+
+/* Compute the weights to use for optimizing normalized distortion across
+   channels. We use the amplitude to weight square distortion, which means
+   that we use the square root of the value we would have been using if we
+   wanted to minimize the MSE in the non-normalized domain. This roughly
+   corresponds to some quick-and-dirty perceptual experiments I ran to
+   measure inter-aural masking (there doesn't seem to be any published data
+   on the topic). */
+static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
+{
+   celt_ener minE;
+#ifdef FIXED_POINT
+   int shift;
+#endif
+   minE = MIN32(Ex, Ey);
+   /* Adjustment to make the weights a bit more conservative. */
+   Ex = ADD32(Ex, minE/3);
+   Ey = ADD32(Ey, minE/3);
+#ifdef FIXED_POINT
+   shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
+#endif
+   w[0] = VSHR32(Ex, shift);
+   w[1] = VSHR32(Ey, shift);
+}
+
+static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
+{
+   int i = bandID;
+   int j;
+   opus_val16 a1, a2;
+   opus_val16 left, right;
+   opus_val16 norm;
+#ifdef FIXED_POINT
+   int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
+#endif
+   left = VSHR32(bandE[i],shift);
+   right = VSHR32(bandE[i+m->nbEBands],shift);
+   norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
+   a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
+   a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      l = X[j];
+      r = Y[j];
+      X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
+      /* Side is not encoded, no need to calculate */
+   }
+}
+
+static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
+{
+   int j;
+   for (j=0;j<N;j++)
+   {
+      opus_val32 r, l;
+      l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
+      r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
+      X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
+      Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
+   }
+}
+
+static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
+{
+   int j;
+   opus_val32 xp=0, side=0;
+   opus_val32 El, Er;
+   opus_val16 mid2;
+#ifdef FIXED_POINT
+   int kl, kr;
+#endif
+   opus_val32 t, lgain, rgain;
+
+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+   dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
+   /* Compensating for the mid normalization */
+   xp = MULT16_32_Q15(mid, xp);
+   /* mid and side are in Q15, not Q14 like X and Y */
+   mid2 = SHR16(mid, 1);
+   El = MULT16_16(mid2, mid2) + side - 2*xp;
+   Er = MULT16_16(mid2, mid2) + side + 2*xp;
+   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
+   {
+      OPUS_COPY(Y, X, N);
+      return;
+   }
+
+#ifdef FIXED_POINT
+   kl = celt_ilog2(El)>>1;
+   kr = celt_ilog2(Er)>>1;
+#endif
+   t = VSHR32(El, (kl-7)<<1);
+   lgain = celt_rsqrt_norm(t);
+   t = VSHR32(Er, (kr-7)<<1);
+   rgain = celt_rsqrt_norm(t);
+
+#ifdef FIXED_POINT
+   if (kl < 7)
+      kl = 7;
+   if (kr < 7)
+      kr = 7;
+#endif
+
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      /* Apply mid scaling (side is already scaled) */
+      l = MULT16_16_P15(mid, X[j]);
+      r = Y[j];
+      X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
+      Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
+   }
+}
+
+/* Decide whether we should spread the pulses in the current frame */
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
+      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+      int end, int C, int M, const int *spread_weight)
+{
+   int i, c, N0;
+   int sum = 0, nbBands=0;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   int decision;
+   int hf_sum=0;
+
+   celt_assert(end>0);
+
+   N0 = M*m->shortMdctSize;
+
+   if (M*(eBands[end]-eBands[end-1]) <= 8)
+      return SPREAD_NONE;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j, N, tmp=0;
+         int tcount[3] = {0,0,0};
+         const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+         N = M*(eBands[i+1]-eBands[i]);
+         if (N<=8)
+            continue;
+         /* Compute rough CDF of |x[j]| */
+         for (j=0;j<N;j++)
+         {
+            opus_val32 x2N; /* Q13 */
+
+            x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
+            if (x2N < QCONST16(0.25f,13))
+               tcount[0]++;
+            if (x2N < QCONST16(0.0625f,13))
+               tcount[1]++;
+            if (x2N < QCONST16(0.015625f,13))
+               tcount[2]++;
+         }
+
+         /* Only include four last bands (8 kHz and up) */
+         if (i>m->nbEBands-4)
+            hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
+         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
+         sum += tmp*spread_weight[i];
+         nbBands+=spread_weight[i];
+      }
+   } while (++c<C);
+
+   if (update_hf)
+   {
+      if (hf_sum)
+         hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
+      *hf_average = (*hf_average+hf_sum)>>1;
+      hf_sum = *hf_average;
+      if (*tapset_decision==2)
+         hf_sum += 4;
+      else if (*tapset_decision==0)
+         hf_sum -= 4;
+      if (hf_sum > 22)
+         *tapset_decision=2;
+      else if (hf_sum > 18)
+         *tapset_decision=1;
+      else
+         *tapset_decision=0;
+   }
+   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
+   celt_assert(nbBands>0); /* end has to be non-zero */
+   celt_assert(sum>=0);
+   sum = celt_udiv((opus_int32)sum<<8, nbBands);
+   /* Recursive averaging */
+   sum = (sum+*average)>>1;
+   *average = sum;
+   /* Hysteresis */
+   sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
+   if (sum < 80)
+   {
+      decision = SPREAD_AGGRESSIVE;
+   } else if (sum < 256)
+   {
+      decision = SPREAD_NORMAL;
+   } else if (sum < 384)
+   {
+      decision = SPREAD_LIGHT;
+   } else {
+      decision = SPREAD_NONE;
+   }
+#ifdef FUZZING
+   decision = rand()&0x3;
+   *tapset_decision=rand()%3;
+#endif
+   return decision;
+}
+
+/* Indexing table for converting from natural Hadamard to ordery Hadamard
+   This is essentially a bit-reversed Gray, on top of which we've added
+   an inversion of the order because we want the DC at the end rather than
+   the beginning. The lines are for N=2, 4, 8, 16 */
+static const int ordery_table[] = {
+       1,  0,
+       3,  0,  2,  1,
+       7,  0,  4,  3,  6,  1,  5,  2,
+      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
+};
+
+static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+   int i,j;
+   VARDECL(celt_norm, tmp);
+   int N;
+   SAVE_STACK;
+   N = N0*stride;
+   ALLOC(tmp, N, celt_norm);
+   celt_assert(stride>0);
+   if (hadamard)
+   {
+      const int *ordery = ordery_table+stride-2;
+      for (i=0;i<stride;i++)
+      {
+         for (j=0;j<N0;j++)
+            tmp[ordery[i]*N0+j] = X[j*stride+i];
+      }
+   } else {
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[i*N0+j] = X[j*stride+i];
+   }
+   OPUS_COPY(X, tmp, N);
+   RESTORE_STACK;
+}
+
+static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+   int i,j;
+   VARDECL(celt_norm, tmp);
+   int N;
+   SAVE_STACK;
+   N = N0*stride;
+   ALLOC(tmp, N, celt_norm);
+   if (hadamard)
+   {
+      const int *ordery = ordery_table+stride-2;
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[j*stride+i] = X[ordery[i]*N0+j];
+   } else {
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[j*stride+i] = X[i*N0+j];
+   }
+   OPUS_COPY(X, tmp, N);
+   RESTORE_STACK;
+}
+
+void haar1(celt_norm *X, int N0, int stride)
+{
+   int i, j;
+   N0 >>= 1;
+   for (i=0;i<stride;i++)
+      for (j=0;j<N0;j++)
+      {
+         opus_val32 tmp1, tmp2;
+         tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
+         tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+         X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
+         X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
+      }
+}
+
+static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
+{
+   static const opus_int16 exp2_table8[8] =
+      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
+   int qn, qb;
+   int N2 = 2*N-1;
+   if (stereo && N==2)
+      N2--;
+   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
+       always have enough bits left over to code at least one pulse in the
+       side; otherwise it would collapse, since it doesn't get folded. */
+   qb = celt_sudiv(b+N2*offset, N2);
+   qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
+
+   qb = IMIN(8<<BITRES, qb);
+
+   if (qb<(1<<BITRES>>1)) {
+      qn = 1;
+   } else {
+      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
+      qn = (qn+1)>>1<<1;
+   }
+   celt_assert(qn <= 256);
+   return qn;
+}
+
+struct band_ctx {
+   int encode;
+   int resynth;
+   const CELTMode *m;
+   int i;
+   int intensity;
+   int spread;
+   int tf_change;
+   ec_ctx *ec;
+   opus_int32 remaining_bits;
+   const celt_ener *bandE;
+   opus_uint32 seed;
+   int arch;
+   int theta_round;
+   int disable_inv;
+   int avoid_split_noise;
+};
+
+struct split_ctx {
+   int inv;
+   int imid;
+   int iside;
+   int delta;
+   int itheta;
+   int qalloc;
+};
+
+static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
+      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
+      int LM,
+      int stereo, int *fill)
+{
+   int qn;
+   int itheta=0;
+   int delta;
+   int imid, iside;
+   int qalloc;
+   int pulse_cap;
+   int offset;
+   opus_int32 tell;
+   int inv=0;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int intensity;
+   ec_ctx *ec;
+   const celt_ener *bandE;
+
+   encode = ctx->encode;
+   m = ctx->m;
+   i = ctx->i;
+   intensity = ctx->intensity;
+   ec = ctx->ec;
+   bandE = ctx->bandE;
+
+   /* Decide on the resolution to give to the split parameter theta */
+   pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
+   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
+   if (stereo && i>=intensity)
+      qn = 1;
+   if (encode)
+   {
+      /* theta is the atan() of the ratio between the (normalized)
+         side and mid. With just that parameter, we can re-scale both
+         mid and side because we know that 1) they have unit norm and
+         2) they are orthogonal. */
+      itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
+   }
+   tell = ec_tell_frac(ec);
+   if (qn!=1)
+   {
+      if (encode)
+      {
+         if (!stereo || ctx->theta_round == 0)
+         {
+            itheta = (itheta*(opus_int32)qn+8192)>>14;
+            if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
+            {
+               /* Check if the selected value of theta will cause the bit allocation
+                  to inject noise on one side. If so, make sure the energy of that side
+                  is zero. */
+               int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
+               imid = bitexact_cos((opus_int16)unquantized);
+               iside = bitexact_cos((opus_int16)(16384-unquantized));
+               delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
+               if (delta > *b)
+                  itheta = qn;
+               else if (delta < -*b)
+                  itheta = 0;
+            }
+         } else {
+            int down;
+            /* Bias quantization towards itheta=0 and itheta=16384. */
+            int bias = itheta > 8192 ? 32767/qn : -32767/qn;
+            down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
+            if (ctx->theta_round < 0)
+               itheta = down;
+            else
+               itheta = down+1;
+         }
+      }
+      /* Entropy coding of the angle. We use a uniform pdf for the
+         time split, a step for stereo, and a triangular one for the rest. */
+      if (stereo && N>2)
+      {
+         int p0 = 3;
+         int x = itheta;
+         int x0 = qn/2;
+         int ft = p0*(x0+1) + x0;
+         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
+         if (encode)
+         {
+            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+         } else {
+            int fs;
+            fs=ec_decode(ec,ft);
+            if (fs<(x0+1)*p0)
+               x=fs/p0;
+            else
+               x=x0+1+(fs-(x0+1)*p0);
+            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+            itheta = x;
+         }
+      } else if (B0>1 || stereo) {
+         /* Uniform pdf */
+         if (encode)
+            ec_enc_uint(ec, itheta, qn+1);
+         else
+            itheta = ec_dec_uint(ec, qn+1);
+      } else {
+         int fs=1, ft;
+         ft = ((qn>>1)+1)*((qn>>1)+1);
+         if (encode)
+         {
+            int fl;
+
+            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
+            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+
+            ec_encode(ec, fl, fl+fs, ft);
+         } else {
+            /* Triangular pdf */
+            int fl=0;
+            int fm;
+            fm = ec_decode(ec, ft);
+
+            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
+            {
+               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
+               fs = itheta + 1;
+               fl = itheta*(itheta + 1)>>1;
+            }
+            else
+            {
+               itheta = (2*(qn + 1)
+                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
+               fs = qn + 1 - itheta;
+               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+            }
+
+            ec_dec_update(ec, fl, fl+fs, ft);
+         }
+      }
+      celt_assert(itheta>=0);
+      itheta = celt_udiv((opus_int32)itheta*16384, qn);
+      if (encode && stereo)
+      {
+         if (itheta==0)
+            intensity_stereo(m, X, Y, bandE, i, N);
+         else
+            stereo_split(X, Y, N);
+      }
+      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
+               Let's do that at higher complexity */
+   } else if (stereo) {
+      if (encode)
+      {
+         inv = itheta > 8192 && !ctx->disable_inv;
+         if (inv)
+         {
+            int j;
+            for (j=0;j<N;j++)
+               Y[j] = -Y[j];
+         }
+         intensity_stereo(m, X, Y, bandE, i, N);
+      }
+      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
+      {
+         if (encode)
+            ec_enc_bit_logp(ec, inv, 2);
+         else
+            inv = ec_dec_bit_logp(ec, 2);
+      } else
+         inv = 0;
+      /* inv flag override to avoid problems with downmixing. */
+      if (ctx->disable_inv)
+         inv = 0;
+      itheta = 0;
+   }
+   qalloc = ec_tell_frac(ec) - tell;
+   *b -= qalloc;
+
+   if (itheta == 0)
+   {
+      imid = 32767;
+      iside = 0;
+      *fill &= (1<<B)-1;
+      delta = -16384;
+   } else if (itheta == 16384)
+   {
+      imid = 0;
+      iside = 32767;
+      *fill &= ((1<<B)-1)<<B;
+      delta = 16384;
+   } else {
+      imid = bitexact_cos((opus_int16)itheta);
+      iside = bitexact_cos((opus_int16)(16384-itheta));
+      /* This is the mid vs side allocation that minimizes squared error
+         in that band. */
+      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
+   }
+
+   sctx->inv = inv;
+   sctx->imid = imid;
+   sctx->iside = iside;
+   sctx->delta = delta;
+   sctx->itheta = itheta;
+   sctx->qalloc = qalloc;
+}
+static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
+      celt_norm *lowband_out)
+{
+    (void)b;
+   int c;
+   int stereo;
+   celt_norm *x = X;
+   int encode;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   ec = ctx->ec;
+
+   stereo = Y != NULL;
+   c=0; do {
+      int sign=0;
+      if (ctx->remaining_bits>=1<<BITRES)
+      {
+         if (encode)
+         {
+            sign = x[0]<0;
+            ec_enc_bits(ec, sign, 1);
+         } else {
+            sign = ec_dec_bits(ec, 1);
+         }
+         ctx->remaining_bits -= 1<<BITRES;
+         b-=1<<BITRES;
+      }
+      if (ctx->resynth)
+         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
+      x = Y;
+   } while (++c<1+stereo);
+   if (lowband_out)
+      lowband_out[0] = SHR16(X[0],4);
+   return 1;
+}
+
+/* This function is responsible for encoding and decoding a mono partition.
+   It can split the band in two and transmit the energy difference with
+   the two half-bands. It can be called recursively so bands can end up being
+   split in 8 parts. */
+static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM,
+      opus_val16 gain, int fill)
+{
+   const unsigned char *cache;
+   int q;
+   int curr_bits;
+   int imid=0, iside=0;
+   int B0=B;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+   celt_norm *Y=NULL;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int spread;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   m = ctx->m;
+   i = ctx->i;
+   spread = ctx->spread;
+   ec = ctx->ec;
+
+   /* If we need 1.5 more bit than we can produce, split the band in two. */
+   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
+   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
+   {
+      int mbits, sbits, delta;
+      int itheta;
+      int qalloc;
+      struct split_ctx sctx;
+      celt_norm *next_lowband2=NULL;
+      opus_int32 rebalance;
+
+      N >>= 1;
+      Y = X+N;
+      LM -= 1;
+      if (B==1)
+         fill = (fill&1)|(fill<<1);
+      B = (B+1)>>1;
+
+      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
+      imid = sctx.imid;
+      iside = sctx.iside;
+      delta = sctx.delta;
+      itheta = sctx.itheta;
+      qalloc = sctx.qalloc;
+#ifdef FIXED_POINT
+      mid = imid;
+      side = iside;
+#else
+      mid = (1.f/32768)*imid;
+      side = (1.f/32768)*iside;
+#endif
+
+      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
+      if (B0>1 && (itheta&0x3fff))
+      {
+         if (itheta > 8192)
+            /* Rough approximation for pre-echo masking */
+            delta -= delta>>(4-LM);
+         else
+            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
+            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
+      }
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+
+      if (lowband)
+         next_lowband2 = lowband+N; /* >32-bit split case */
+
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+      } else {
+         cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+      }
+   } else {
+      /* This is the basic no-split case */
+      q = bits2pulses(m, i, LM, b);
+      curr_bits = pulses2bits(m, i, LM, q);
+      ctx->remaining_bits -= curr_bits;
+
+      /* Ensures we can never bust the budget */
+      while (ctx->remaining_bits < 0 && q > 0)
+      {
+         ctx->remaining_bits += curr_bits;
+         q--;
+         curr_bits = pulses2bits(m, i, LM, q);
+         ctx->remaining_bits -= curr_bits;
+      }
+
+      if (q!=0)
+      {
+         int K = get_pulses(q);
+
+         /* Finally do the actual quantization */
+         if (encode)
+         {
+            cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
+         } else {
+            cm = alg_unquant(X, N, K, spread, B, ec, gain);
+         }
+      } else {
+         /* If there's no pulse, fill the band anyway */
+         int j;
+         if (ctx->resynth)
+         {
+            unsigned cm_mask;
+            /* B can be as large as 16, so this shift might overflow an int on a
+               16-bit platform; use a long to get defined behavior.*/
+            cm_mask = (unsigned)(1UL<<B)-1;
+            fill &= cm_mask;
+            if (!fill)
+            {
+               OPUS_CLEAR(X, N);
+            } else {
+               if (lowband == NULL)
+               {
+                  /* Noise */
+                  for (j=0;j<N;j++)
+                  {
+                     ctx->seed = celt_lcg_rand(ctx->seed);
+                     X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
+                  }
+                  cm = cm_mask;
+               } else {
+                  /* Folded spectrum */
+                  for (j=0;j<N;j++)
+                  {
+                     opus_val16 tmp;
+                     ctx->seed = celt_lcg_rand(ctx->seed);
+                     /* About 48 dB below the "normal" folding level */
+                     tmp = QCONST16(1.0f/256, 10);
+                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
+                     X[j] = lowband[j]+tmp;
+                  }
+                  cm = fill;
+               }
+               renormalise_vector(X, N, gain, ctx->arch);
+            }
+         }
+      }
+   }
+
+   return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the mono case. */
+static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      opus_val16 gain, celt_norm *lowband_scratch, int fill)
+{
+   int N0=N;
+   int N_B=N;
+   int N_B0;
+   int B0=B;
+   int time_divide=0;
+   int recombine=0;
+   int longBlocks;
+   unsigned cm=0;
+   int k;
+   int encode;
+   int tf_change;
+
+   encode = ctx->encode;
+   tf_change = ctx->tf_change;
+
+   longBlocks = B0==1;
+
+   N_B = celt_udiv(N_B, B);
+
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, NULL, b, lowband_out);
+   }
+
+   if (tf_change>0)
+      recombine = tf_change;
+   /* Band recombining to increase frequency resolution */
+
+   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+   {
+      OPUS_COPY(lowband_scratch, lowband, N);
+      lowband = lowband_scratch;
+   }
+
+   for (k=0;k<recombine;k++)
+   {
+      static const unsigned char bit_interleave_table[16]={
+            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+      };
+      if (encode)
+         haar1(X, N>>k, 1<<k);
+      if (lowband)
+         haar1(lowband, N>>k, 1<<k);
+      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
+   }
+   B>>=recombine;
+   N_B<<=recombine;
+
+   /* Increasing the time resolution */
+   while ((N_B&1) == 0 && tf_change<0)
+   {
+      if (encode)
+         haar1(X, N_B, B);
+      if (lowband)
+         haar1(lowband, N_B, B);
+      fill |= fill<<B;
+      B <<= 1;
+      N_B >>= 1;
+      time_divide++;
+      tf_change++;
+   }
+   B0=B;
+   N_B0 = N_B;
+
+   /* Reorganize the samples in time order instead of frequency order */
+   if (B0>1)
+   {
+      if (encode)
+         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      if (lowband)
+         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+   }
+
+   cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
+
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (ctx->resynth)
+   {
+      /* Undo the sample reorganization going from time order to frequency order */
+      if (B0>1)
+         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+
+      /* Undo time-freq changes that we did earlier */
+      N_B = N_B0;
+      B = B0;
+      for (k=0;k<time_divide;k++)
+      {
+         B >>= 1;
+         N_B <<= 1;
+         cm |= cm>>B;
+         haar1(X, N_B, B);
+      }
+
+      for (k=0;k<recombine;k++)
+      {
+         static const unsigned char bit_deinterleave_table[16]={
+               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+         };
+         cm = bit_deinterleave_table[cm];
+         haar1(X, N0>>k, 1<<k);
+      }
+      B<<=recombine;
+
+      /* Scale output for later folding */
+      if (lowband_out)
+      {
+         int j;
+         opus_val16 n;
+         n = celt_sqrt(SHL32(EXTEND32(N0),22));
+         for (j=0;j<N0;j++)
+            lowband_out[j] = MULT16_16_Q15(n,X[j]);
+      }
+      cm &= (1<<B)-1;
+   }
+   return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the stereo case. */
+static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      celt_norm *lowband_scratch, int fill)
+{
+   int imid=0, iside=0;
+   int inv = 0;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+   int mbits, sbits, delta;
+   int itheta;
+   int qalloc;
+   struct split_ctx sctx;
+   int orig_fill;
+   int encode;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   ec = ctx->ec;
+
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, Y, b, lowband_out);
+   }
+
+   orig_fill = fill;
+
+   compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
+   inv = sctx.inv;
+   imid = sctx.imid;
+   iside = sctx.iside;
+   delta = sctx.delta;
+   itheta = sctx.itheta;
+   qalloc = sctx.qalloc;
+#ifdef FIXED_POINT
+   mid = imid;
+   side = iside;
+#else
+   mid = (1.f/32768)*imid;
+   side = (1.f/32768)*iside;
+#endif
+
+   /* This is a special case for N=2 that only works for stereo and takes
+      advantage of the fact that mid and side are orthogonal to encode
+      the side with just one bit. */
+   if (N==2)
+   {
+      int c;
+      int sign=0;
+      celt_norm *x2, *y2;
+      mbits = b;
+      sbits = 0;
+      /* Only need one bit for the side. */
+      if (itheta != 0 && itheta != 16384)
+         sbits = 1<<BITRES;
+      mbits -= sbits;
+      c = itheta > 8192;
+      ctx->remaining_bits -= qalloc+sbits;
+
+      x2 = c ? Y : X;
+      y2 = c ? X : Y;
+      if (sbits)
+      {
+         if (encode)
+         {
+            /* Here we only need to encode a sign for the side. */
+            sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
+            ec_enc_bits(ec, sign, 1);
+         } else {
+            sign = ec_dec_bits(ec, 1);
+         }
+      }
+      sign = 1-2*sign;
+      /* We use orig_fill here because we want to fold the side, but if
+         itheta==16384, we'll have cleared the low bits of fill. */
+      cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
+            lowband_scratch, orig_fill);
+      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
+         and there's no need to worry about mixing with the other channel. */
+      y2[0] = -sign*x2[1];
+      y2[1] = sign*x2[0];
+      if (ctx->resynth)
+      {
+         celt_norm tmp;
+         X[0] = MULT16_16_Q15(mid, X[0]);
+         X[1] = MULT16_16_Q15(mid, X[1]);
+         Y[0] = MULT16_16_Q15(side, Y[0]);
+         Y[1] = MULT16_16_Q15(side, Y[1]);
+         tmp = X[0];
+         X[0] = SUB16(tmp,Y[0]);
+         Y[0] = ADD16(tmp,Y[0]);
+         tmp = X[1];
+         X[1] = SUB16(tmp,Y[1]);
+         Y[1] = ADD16(tmp,Y[1]);
+      }
+   } else {
+      /* "Normal" split code */
+      opus_int32 rebalance;
+
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
+               lowband_scratch, fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
+      } else {
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
+               lowband_scratch, fill);
+      }
+   }
+
+
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (ctx->resynth)
+   {
+      if (N!=2)
+         stereo_merge(X, Y, mid, N, ctx->arch);
+      if (inv)
+      {
+         int j;
+         for (j=0;j<N;j++)
+            Y[j] = -Y[j];
+      }
+   }
+   return cm;
+}
+
+static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
+{
+   int n1, n2;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   n1 = M*(eBands[start+1]-eBands[start]);
+   n2 = M*(eBands[start+2]-eBands[start+1]);
+   /* Duplicate enough of the first band folding data to be able to fold the second band.
+      Copies no data for CELT-only mode. */
+   OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
+   if (dual_stereo)
+      OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
+}
+
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
+      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
+      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
+      opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
+      opus_uint32 *seed, int complexity, int arch, int disable_inv)
+{
+   int i;
+   opus_int32 remaining_bits;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
+   VARDECL(celt_norm, _norm);
+   VARDECL(celt_norm, _lowband_scratch);
+   VARDECL(celt_norm, X_save);
+   VARDECL(celt_norm, Y_save);
+   VARDECL(celt_norm, X_save2);
+   VARDECL(celt_norm, Y_save2);
+   VARDECL(celt_norm, norm_save2);
+   int resynth_alloc;
+   celt_norm *lowband_scratch;
+   int B;
+   int M;
+   int lowband_offset;
+   int update_lowband = 1;
+   int C = Y_ != NULL ? 2 : 1;
+   int norm_offset;
+   int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !encode || theta_rdo;
+#endif
+   struct band_ctx ctx;
+   SAVE_STACK;
+
+   M = 1<<LM;
+   B = shortBlocks ? M : 1;
+   norm_offset = M*eBands[start];
+   /* No need to allocate norm for the last band because we don't need an
+      output in that band. */
+   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
+   norm = _norm;
+   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
+
+   /* For decoding, we can use the last band as scratch space because we don't need that
+      scratch space for the last band and we don't care about the data there until we're
+      decoding the last band. */
+   if (encode && resynth)
+      resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
+   else
+      resynth_alloc = ALLOC_NONE;
+   ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
+   if (encode && resynth)
+      lowband_scratch = _lowband_scratch;
+   else
+      lowband_scratch = X_+M*eBands[m->nbEBands-1];
+   ALLOC(X_save, resynth_alloc, celt_norm);
+   ALLOC(Y_save, resynth_alloc, celt_norm);
+   ALLOC(X_save2, resynth_alloc, celt_norm);
+   ALLOC(Y_save2, resynth_alloc, celt_norm);
+   ALLOC(norm_save2, resynth_alloc, celt_norm);
+
+   lowband_offset = 0;
+   ctx.bandE = bandE;
+   ctx.ec = ec;
+   ctx.encode = encode;
+   ctx.intensity = intensity;
+   ctx.m = m;
+   ctx.seed = *seed;
+   ctx.spread = spread;
+   ctx.arch = arch;
+   ctx.disable_inv = disable_inv;
+   ctx.resynth = resynth;
+   ctx.theta_round = 0;
+   /* Avoid injecting noise in the first band on transients. */
+   ctx.avoid_split_noise = B > 1;
+   for (i=start;i<end;i++)
+   {
+      opus_int32 tell;
+      int b;
+      int N;
+      opus_int32 curr_balance;
+      int effective_lowband=-1;
+      celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
+      int tf_change=0;
+      unsigned x_cm;
+      unsigned y_cm;
+      int last;
+
+      ctx.i = i;
+      last = (i==end-1);
+
+      X = X_+M*eBands[i];
+      if (Y_!=NULL)
+         Y = Y_+M*eBands[i];
+      else
+         Y = NULL;
+      N = M*eBands[i+1]-M*eBands[i];
+      celt_assert(N > 0);
+      tell = ec_tell_frac(ec);
+
+      /* Compute how many bits we want to allocate to this band */
+      if (i != start)
+         balance -= tell;
+      remaining_bits = total_bits-tell-1;
+      ctx.remaining_bits = remaining_bits;
+      if (i <= codedBands-1)
+      {
+         curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
+         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
+      } else {
+         b = 0;
+      }
+
+#ifndef DISABLE_UPDATE_DRAFT
+      if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
+            lowband_offset = i;
+      if (i == start+1)
+         special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
+#else
+      if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
+            lowband_offset = i;
+#endif
+
+      tf_change = tf_res[i];
+      ctx.tf_change = tf_change;
+      if (i>=m->effEBands)
+      {
+         X=norm;
+         if (Y_!=NULL)
+            Y = norm;
+         lowband_scratch = NULL;
+      }
+      if (last && !theta_rdo)
+         lowband_scratch = NULL;
+
+      /* Get a conservative estimate of the collapse_mask's for the bands we're
+         going to be folding from. */
+      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
+      {
+         int fold_start;
+         int fold_end;
+         int fold_i;
+         /* This ensures we never repeat spectral content within one band */
+         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
+         fold_start = lowband_offset;
+         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
+         fold_end = lowband_offset-1;
+#ifndef DISABLE_UPDATE_DRAFT
+         while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
+#else
+         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
+#endif
+         x_cm = y_cm = 0;
+         fold_i = fold_start; do {
+           x_cm |= collapse_masks[fold_i*C+0];
+           y_cm |= collapse_masks[fold_i*C+C-1];
+         } while (++fold_i<fold_end);
+      }
+      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
+         always) be non-zero. */
+      else
+         x_cm = y_cm = (1<<B)-1;
+
+      if (dual_stereo && i==intensity)
+      {
+         int j;
+
+         /* Switch off dual stereo to do intensity. */
+         dual_stereo = 0;
+         if (resynth)
+            for (j=0;j<M*eBands[i]-norm_offset;j++)
+               norm[j] = HALF32(norm[j]+norm2[j]);
+      }
+      if (dual_stereo)
+      {
+         x_cm = quant_band(&ctx, X, N, b/2, B,
+               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+               last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
+         y_cm = quant_band(&ctx, Y, N, b/2, B,
+               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
+               last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
+      } else {
+         if (Y!=NULL)
+         {
+            if (theta_rdo && i < intensity)
+            {
+               ec_ctx ec_save, ec_save2;
+               struct band_ctx ctx_save, ctx_save2;
+               opus_val32 dist0, dist1;
+               unsigned cm, cm2;
+               int nstart_bytes, nend_bytes, save_bytes;
+               unsigned char *bytes_buf;
+               unsigned char bytes_save[1275];
+               opus_val16 w[2];
+               compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
+               /* Make a copy. */
+               cm = x_cm|y_cm;
+               ec_save = *ec;
+               ctx_save = ctx;
+               OPUS_COPY(X_save, X, N);
+               OPUS_COPY(Y_save, Y, N);
+               /* Encode and round down. */
+               ctx.theta_round = -1;
+               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
+               dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+
+               /* Save first result. */
+               cm2 = x_cm;
+               ec_save2 = *ec;
+               ctx_save2 = ctx;
+               OPUS_COPY(X_save2, X, N);
+               OPUS_COPY(Y_save2, Y, N);
+               if (!last)
+                  OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
+               nstart_bytes = ec_save.offs;
+               nend_bytes = ec_save.storage;
+               bytes_buf = ec_save.buf+nstart_bytes;
+               save_bytes = nend_bytes-nstart_bytes;
+               OPUS_COPY(bytes_save, bytes_buf, save_bytes);
+
+               /* Restore */
+               *ec = ec_save;
+               ctx = ctx_save;
+               OPUS_COPY(X, X_save, N);
+               OPUS_COPY(Y, Y_save, N);
+#ifndef DISABLE_UPDATE_DRAFT
+               if (i == start+1)
+                  special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
+#endif
+               /* Encode and round up. */
+               ctx.theta_round = 1;
+               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
+               dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+               if (dist0 >= dist1) {
+                  x_cm = cm2;
+                  *ec = ec_save2;
+                  ctx = ctx_save2;
+                  OPUS_COPY(X, X_save2, N);
+                  OPUS_COPY(Y, Y_save2, N);
+                  if (!last)
+                     OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
+                  OPUS_COPY(bytes_buf, bytes_save, save_bytes);
+               }
+            } else {
+               ctx.theta_round = 0;
+               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
+            }
+         } else {
+            x_cm = quant_band(&ctx, X, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                  last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
+         }
+         y_cm = x_cm;
+      }
+      collapse_masks[i*C+0] = (unsigned char)x_cm;
+      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
+      balance += pulses[i] + tell;
+
+      /* Update the folding position only as long as we have 1 bit/sample depth. */
+      update_lowband = b>(N<<BITRES);
+      /* We only need to avoid noise on a split for the first band. After that, we
+         have folding. */
+      ctx.avoid_split_noise = 0;
+   }
+   *seed = ctx.seed;
+
+   RESTORE_STACK;
+}
+
diff --git a/libopus/celt/bands.h b/libopus/celt/bands.h
new file mode 100644
index 0000000..422b32c
--- /dev/null
+++ b/libopus/celt/bands.h
@@ -0,0 +1,123 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2009 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef BANDS_H
+#define BANDS_H
+
+#include "arch.h"
+#include "modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "rate.h"
+
+opus_int16 bitexact_cos(opus_int16 x);
+int bitexact_log2tan(int isin,int icos);
+
+/** Compute the amplitude (sqrt energy) in each of the bands
+ * @param m Mode data
+ * @param X Spectrum
+ * @param bandE Square root of the energy for each band (returned)
+ */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch);
+
+/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
+
+/** Normalise each band of X such that the energy in each band is
+    equal to 1
+ * @param m Mode data
+ * @param X Spectrum (returned normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M);
+
+/** Denormalise each band of X to restore full amplitude
+ * @param m Mode data
+ * @param X Spectrum (returned de-normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
+      int end, int M, int downsample, int silence);
+
+#define SPREAD_NONE       (0)
+#define SPREAD_LIGHT      (1)
+#define SPREAD_NORMAL     (2)
+#define SPREAD_AGGRESSIVE (3)
+
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
+      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+      int end, int C, int M, const int *spread_weight);
+
+#ifdef MEASURE_NORM_MSE
+void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
+#endif
+
+void haar1(celt_norm *X, int N0, int stride);
+
+/** Quantisation/encoding of the residual spectrum
+ * @param encode flag that indicates whether we're encoding (1) or decoding (0)
+ * @param m Mode data
+ * @param start First band to process
+ * @param end Last band to process + 1
+ * @param X Residual (normalised)
+ * @param Y Residual (normalised) for second channel (or NULL for mono)
+ * @param collapse_masks Anti-collapse tracking mask
+ * @param bandE Square root of the energy for each band
+ * @param pulses Bit allocation (per band) for PVQ
+ * @param shortBlocks Zero for long blocks, non-zero for short blocks
+ * @param spread Amount of spreading to use
+ * @param dual_stereo Zero for MS stereo, non-zero for dual stereo
+ * @param intensity First band to use intensity stereo
+ * @param tf_res Time-frequency resolution change
+ * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
+ * @param balance Number of unallocated bits
+ * @param en Entropy coder state
+ * @param LM log2() of the number of 2.5 subframes in the frame
+ * @param codedBands Last band to receive bits + 1
+ * @param seed Random generator seed
+ * @param arch Run-time architecture (see opus_select_arch())
+ */
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
+      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
+      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
+      opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
+      int complexity, int arch, int disable_inv);
+
+void anti_collapse(const CELTMode *m, celt_norm *X_,
+      unsigned char *collapse_masks, int LM, int C, int size, int start,
+      int end, const opus_val16 *logE, const opus_val16 *prev1logE,
+      const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
+      int arch);
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed);
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
+
+#endif /* BANDS_H */
diff --git a/libopus/celt/celt.c b/libopus/celt/celt.c
new file mode 100644
index 0000000..9ce2346
--- /dev/null
+++ b/libopus/celt/celt.c
@@ -0,0 +1,316 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#ifndef PACKAGE_VERSION
+#define PACKAGE_VERSION "unknown"
+#endif
+
+#if defined(MIPSr1_ASM)
+#include "mips/celt_mipsr1.h"
+#endif
+
+
+int resampling_factor(opus_int32 rate)
+{
+   int ret;
+   switch (rate)
+   {
+   case 48000:
+      ret = 1;
+      break;
+   case 24000:
+      ret = 2;
+      break;
+   case 16000:
+      ret = 3;
+      break;
+   case 12000:
+      ret = 4;
+      break;
+   case 8000:
+      ret = 6;
+      break;
+   default:
+#ifndef CUSTOM_MODES
+      celt_assert(0);
+#endif
+      ret = 0;
+      break;
+   }
+   return ret;
+}
+
+#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
+/* This version should be faster on ARM */
+#ifdef OPUS_ARM_ASM
+#ifndef NON_STATIC_COMB_FILTER_CONST_C
+static
+#endif
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   opus_val32 x0, x1, x2, x3, x4;
+   int i;
+   x4 = SHL32(x[-T-2], 1);
+   x3 = SHL32(x[-T-1], 1);
+   x2 = SHL32(x[-T], 1);
+   x1 = SHL32(x[-T+1], 1);
+   for (i=0;i<N-4;i+=5)
+   {
+      opus_val32 t;
+      x0=SHL32(x[i-T+2],1);
+      t = MAC16_32_Q16(x[i], g10, x2);
+      t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+      t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+      t = SATURATE(t, SIG_SAT);
+      y[i] = t;
+      x4=SHL32(x[i-T+3],1);
+      t = MAC16_32_Q16(x[i+1], g10, x1);
+      t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
+      t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
+      t = SATURATE(t, SIG_SAT);
+      y[i+1] = t;
+      x3=SHL32(x[i-T+4],1);
+      t = MAC16_32_Q16(x[i+2], g10, x0);
+      t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
+      t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
+      t = SATURATE(t, SIG_SAT);
+      y[i+2] = t;
+      x2=SHL32(x[i-T+5],1);
+      t = MAC16_32_Q16(x[i+3], g10, x4);
+      t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
+      t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
+      t = SATURATE(t, SIG_SAT);
+      y[i+3] = t;
+      x1=SHL32(x[i-T+6],1);
+      t = MAC16_32_Q16(x[i+4], g10, x3);
+      t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
+      t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
+      t = SATURATE(t, SIG_SAT);
+      y[i+4] = t;
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      opus_val32 t;
+      x0=SHL32(x[i-T+2],1);
+      t = MAC16_32_Q16(x[i], g10, x2);
+      t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+      t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+      t = SATURATE(t, SIG_SAT);
+      y[i] = t;
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+#endif
+}
+#else
+#ifndef NON_STATIC_COMB_FILTER_CONST_C
+static
+#endif
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   opus_val32 x0, x1, x2, x3, x4;
+   int i;
+   x4 = x[-T-2];
+   x3 = x[-T-1];
+   x2 = x[-T];
+   x1 = x[-T+1];
+   for (i=0;i<N;i++)
+   {
+      x0=x[i-T+2];
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x2)
+               + MULT16_32_Q15(g11,ADD32(x1,x3))
+               + MULT16_32_Q15(g12,ADD32(x0,x4));
+      y[i] = SATURATE(y[i], SIG_SAT);
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+
+}
+#endif
+#endif
+
+#ifndef OVERRIDE_comb_filter
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap, int arch)
+{
+   int i;
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   opus_val16 g00, g01, g02, g10, g11, g12;
+   opus_val32 x0, x1, x2, x3, x4;
+   static const opus_val16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+   /* When the gain is zero, T0 and/or T1 is set to zero. We need
+      to have then be at least 2 to avoid processing garbage data. */
+   T0 = IMAX(T0, COMBFILTER_MINPERIOD);
+   T1 = IMAX(T1, COMBFILTER_MINPERIOD);
+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   /* If the filter didn't change, we don't need the overlap */
+   if (g0==g1 && T0==T1 && tapset0==tapset1)
+      overlap=0;
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      x0=x[i-T1+2];
+      f = MULT16_16_Q15(window[i],window[i]);
+      y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+      y[i] = SATURATE(y[i], SIG_SAT);
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+
+   }
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+
+   /* Compute the part with the constant filter. */
+   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch);
+}
+#endif /* OVERRIDE_comb_filter */
+
+/* TF change table. Positive values mean better frequency resolution (longer
+   effective window), whereas negative values mean better time resolution
+   (shorter effective window). The second index is computed as:
+   4*isTransient + 2*tf_select + per_band_flag */
+const signed char tf_select_table[4][8] = {
+    /*isTransient=0     isTransient=1 */
+      {0, -1, 0, -1,    0,-1, 0,-1}, /* 2.5 ms */
+      {0, -1, 0, -2,    1, 0, 1,-1}, /* 5 ms */
+      {0, -2, 0, -3,    2, 0, 1,-1}, /* 10 ms */
+      {0, -2, 0, -3,    3, 0, 1,-1}, /* 20 ms */
+};
+
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C)
+{
+   int i;
+   for (i=0;i<m->nbEBands;i++)
+   {
+      int N;
+      N=(m->eBands[i+1]-m->eBands[i])<<LM;
+      cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2;
+   }
+}
+
+
+
+const char *opus_strerror(int error)
+{
+   static const char * const error_strings[8] = {
+      "success",
+      "invalid argument",
+      "buffer too small",
+      "internal error",
+      "corrupted stream",
+      "request not implemented",
+      "invalid state",
+      "memory allocation failed"
+   };
+   if (error > 0 || error < -7)
+      return "unknown error";
+   else
+      return error_strings[-error];
+}
+
+const char *opus_get_version_string(void)
+{
+    return "libopus " PACKAGE_VERSION
+    /* Applications may rely on the presence of this substring in the version
+       string to determine if they have a fixed-point or floating-point build
+       at runtime. */
+#ifdef FIXED_POINT
+          "-fixed"
+#endif
+#ifdef FUZZING
+          "-fuzzing"
+#endif
+          ;
+}
diff --git a/libopus/celt/celt.h b/libopus/celt/celt.h
new file mode 100644
index 0000000..24b6b2b
--- /dev/null
+++ b/libopus/celt/celt.h
@@ -0,0 +1,251 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/**
+  @file celt.h
+  @brief Contains all the functions for encoding and decoding audio
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_H
+#define CELT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+#include "opus_custom.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CELTEncoder OpusCustomEncoder
+#define CELTDecoder OpusCustomDecoder
+#define CELTMode OpusCustomMode
+
+#define LEAK_BANDS 19
+
+typedef struct {
+   int valid;
+   float tonality;
+   float tonality_slope;
+   float noisiness;
+   float activity;
+   float music_prob;
+   float music_prob_min;
+   float music_prob_max;
+   int   bandwidth;
+   float activity_probability;
+   float max_pitch_ratio;
+   /* Store as Q6 char to save space. */
+   unsigned char leak_boost[LEAK_BANDS];
+} AnalysisInfo;
+
+typedef struct {
+   int signalType;
+   int offset;
+} SILKInfo;
+
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
+
+#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
+
+/* Encoder/decoder Requests */
+
+
+#define CELT_SET_PREDICTION_REQUEST    10002
+/** Controls the use of interframe prediction.
+    0=Independent frames
+    1=Short term interframe prediction allowed
+    2=Long term prediction allowed
+ */
+#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_INPUT_CLIPPING_REQUEST    10004
+#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_AND_CLEAR_ERROR_REQUEST   10007
+#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x)
+
+#define CELT_SET_CHANNELS_REQUEST    10008
+#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x)
+
+
+/* Internal */
+#define CELT_SET_START_BAND_REQUEST    10010
+#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_END_BAND_REQUEST    10012
+#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_MODE_REQUEST    10015
+/** Get the CELTMode used by an encoder or decoder */
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
+
+#define CELT_SET_SIGNALLING_REQUEST    10016
+#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_TONALITY_REQUEST    10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST    10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_ANALYSIS_REQUEST    10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
+
+#define OPUS_SET_LFE_REQUEST    10024
+#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+
+#define OPUS_SET_ENERGY_MASK_REQUEST    10026
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
+
+#define CELT_SET_SILK_INFO_REQUEST    10028
+#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
+
+/* Encoder stuff */
+
+int celt_encoder_get_size(int channels);
+
+int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch);
+
+
+
+/* Decoder stuff */
+
+int celt_decoder_get_size(int channels);
+
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
+
+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
+
+#define celt_encoder_ctl opus_custom_encoder_ctl
+#define celt_decoder_ctl opus_custom_decoder_ctl
+
+
+#ifdef CUSTOM_MODES
+#define OPUS_CUSTOM_NOSTATIC
+#else
+#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE
+#endif
+
+static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
+/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
+
+static const unsigned char tapset_icdf[3]={2,1,0};
+
+#ifdef CUSTOM_MODES
+static const unsigned char toOpusTable[20] = {
+      0xE0, 0xE8, 0xF0, 0xF8,
+      0xC0, 0xC8, 0xD0, 0xD8,
+      0xA0, 0xA8, 0xB0, 0xB8,
+      0x00, 0x00, 0x00, 0x00,
+      0x80, 0x88, 0x90, 0x98,
+};
+
+static const unsigned char fromOpusTable[16] = {
+      0x80, 0x88, 0x90, 0x98,
+      0x40, 0x48, 0x50, 0x58,
+      0x20, 0x28, 0x30, 0x38,
+      0x00, 0x08, 0x10, 0x18
+};
+
+static OPUS_INLINE int toOpus(unsigned char c)
+{
+   int ret=0;
+   if (c<0xA0)
+      ret = toOpusTable[c>>3];
+   if (ret == 0)
+      return -1;
+   else
+      return ret|(c&0x7);
+}
+
+static OPUS_INLINE int fromOpus(unsigned char c)
+{
+   if (c<0x80)
+      return -1;
+   else
+      return fromOpusTable[(c>>3)-16] | (c&0x7);
+}
+#endif /* CUSTOM_MODES */
+
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 15
+
+extern const signed char tf_select_table[4][8];
+
+#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
+void validate_celt_decoder(CELTDecoder *st);
+#define VALIDATE_CELT_DECODER(st) validate_celt_decoder(st)
+#else
+#define VALIDATE_CELT_DECODER(st)
+#endif
+
+int resampling_factor(opus_int32 rate);
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap, int arch);
+
+#ifdef NON_STATIC_COMB_FILTER_CONST_C
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+                         opus_val16 g10, opus_val16 g11, opus_val16 g12);
+#endif
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
+    ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
+#endif
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C);
+
+#ifdef RESYNTH
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+      opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
+      int LM, int downsample, int silence);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CELT_H */
diff --git a/libopus/celt/celt_decoder.c b/libopus/celt/celt_decoder.c
new file mode 100644
index 0000000..46a1f50
--- /dev/null
+++ b/libopus/celt/celt_decoder.c
@@ -0,0 +1,1372 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_DECODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+   current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+   pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+
+#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
+#define NORM_ALIASING_HACK
+#endif
+/**********************************************************************/
+/*                                                                    */
+/*                             DECODER                                */
+/*                                                                    */
+/**********************************************************************/
+#define DECODE_BUFFER_SIZE 2048
+
+/** Decoder state
+ @brief Decoder state
+ */
+struct OpusCustomDecoder {
+   const OpusCustomMode *mode;
+   int overlap;
+   int channels;
+   int stream_channels;
+
+   int downsample;
+   int start, end;
+   int signalling;
+   int disable_inv;
+   int arch;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define DECODER_RESET_START rng
+
+   opus_uint32 rng;
+   int error;
+   int last_pitch_index;
+   int loss_count;
+   int skip_plc;
+   int postfilter_period;
+   int postfilter_period_old;
+   opus_val16 postfilter_gain;
+   opus_val16 postfilter_gain_old;
+   int postfilter_tapset;
+   int postfilter_tapset_old;
+
+   celt_sig preemph_memD[2];
+
+   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
+   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
+   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
+   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
+};
+
+#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
+/* Make basic checks on the CELT state to ensure we don't end
+   up writing all over memory. */
+void validate_celt_decoder(CELTDecoder *st)
+{
+#ifndef CUSTOM_MODES
+   celt_assert(st->mode == opus_custom_mode_create(48000, 960, NULL));
+   celt_assert(st->overlap == 120);
+#endif
+   celt_assert(st->channels == 1 || st->channels == 2);
+   celt_assert(st->stream_channels == 1 || st->stream_channels == 2);
+   celt_assert(st->downsample > 0);
+   celt_assert(st->start == 0 || st->start == 17);
+   celt_assert(st->start < st->end);
+   celt_assert(st->end <= 21);
+#ifdef OPUS_ARCHMASK
+   celt_assert(st->arch >= 0);
+   celt_assert(st->arch <= OPUS_ARCHMASK);
+#endif
+   celt_assert(st->last_pitch_index <= PLC_PITCH_LAG_MAX);
+   celt_assert(st->last_pitch_index >= PLC_PITCH_LAG_MIN || st->last_pitch_index == 0);
+   celt_assert(st->postfilter_period < MAX_PERIOD);
+   celt_assert(st->postfilter_period >= COMBFILTER_MINPERIOD || st->postfilter_period == 0);
+   celt_assert(st->postfilter_period_old < MAX_PERIOD);
+   celt_assert(st->postfilter_period_old >= COMBFILTER_MINPERIOD || st->postfilter_period_old == 0);
+   celt_assert(st->postfilter_tapset <= 2);
+   celt_assert(st->postfilter_tapset >= 0);
+   celt_assert(st->postfilter_tapset_old <= 2);
+   celt_assert(st->postfilter_tapset_old >= 0);
+}
+#endif
+
+int celt_decoder_get_size(int channels)
+{
+   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_decoder_get_size(mode, channels);
+}
+
+int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTDecoder)
+            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+            + channels*LPC_ORDER*sizeof(opus_val16)
+            + 4*2*mode->nbEBands*sizeof(opus_val16);
+   return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
+   ret = opus_custom_decoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_decoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
+{
+   int ret;
+   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
+   if (ret != OPUS_OK)
+      return ret;
+   st->downsample = resampling_factor(sampling_rate);
+   if (st->downsample==0)
+      return OPUS_BAD_ARG;
+   else
+      return OPUS_OK;
+}
+
+int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+
+   if (st==NULL)
+      return OPUS_ALLOC_FAIL;
+
+   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
+
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+
+   st->downsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+#ifndef DISABLE_UPDATE_DRAFT
+   st->disable_inv = channels == 1;
+#else
+   st->disable_inv = 0;
+#endif
+   st->arch = opus_select_arch();
+
+   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
+
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_decoder_destroy(CELTDecoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+#ifndef CUSTOM_MODES
+/* Special case for stereo with no downsampling and no accumulation. This is
+   quite common and we can make it faster by processing both channels in the
+   same loop, reducing overhead due to the dependency loop in the IIR filter. */
+static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
+      celt_sig *mem)
+{
+   celt_sig * OPUS_RESTRICT x0;
+   celt_sig * OPUS_RESTRICT x1;
+   celt_sig m0, m1;
+   int j;
+   x0=in[0];
+   x1=in[1];
+   m0 = mem[0];
+   m1 = mem[1];
+   for (j=0;j<N;j++)
+   {
+      celt_sig tmp0, tmp1;
+      /* Add VERY_SMALL to x[] first to reduce dependency chain. */
+      tmp0 = x0[j] + VERY_SMALL + m0;
+      tmp1 = x1[j] + VERY_SMALL + m1;
+      m0 = MULT16_32_Q15(coef0, tmp0);
+      m1 = MULT16_32_Q15(coef0, tmp1);
+      pcm[2*j  ] = SCALEOUT(SIG2WORD16(tmp0));
+      pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
+   }
+   mem[0] = m0;
+   mem[1] = m1;
+}
+#endif
+
+#ifndef RESYNTH
+static
+#endif
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
+      celt_sig *mem, int accum)
+{
+   int c;
+   int Nd;
+   int apply_downsampling=0;
+   opus_val16 coef0;
+   VARDECL(celt_sig, scratch);
+   SAVE_STACK;
+#ifndef CUSTOM_MODES
+   /* Short version for common case. */
+   if (downsample == 1 && C == 2 && !accum)
+   {
+      deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
+      return;
+   }
+#endif
+#ifndef FIXED_POINT
+   (void)accum;
+   celt_assert(accum==0);
+#endif
+   ALLOC(scratch, N, celt_sig);
+   coef0 = coef[0];
+   Nd = N/downsample;
+   c=0; do {
+      int j;
+      celt_sig * OPUS_RESTRICT x;
+      opus_val16  * OPUS_RESTRICT y;
+      celt_sig m = mem[c];
+      x =in[c];
+      y = pcm+c;
+#ifdef CUSTOM_MODES
+      if (coef[1] != 0)
+      {
+         opus_val16 coef1 = coef[1];
+         opus_val16 coef3 = coef[3];
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp)
+                          - MULT16_32_Q15(coef1, x[j]);
+            tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else
+#endif
+      if (downsample>1)
+      {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + VERY_SMALL + m;
+            m = MULT16_32_Q15(coef0, tmp);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else {
+         /* Shortcut for the standard (non-custom modes) case */
+#ifdef FIXED_POINT
+         if (accum)
+         {
+            for (j=0;j<N;j++)
+            {
+               celt_sig tmp = x[j] + m + VERY_SMALL;
+               m = MULT16_32_Q15(coef0, tmp);
+               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
+            }
+         } else
+#endif
+         {
+            for (j=0;j<N;j++)
+            {
+               celt_sig tmp = x[j] + VERY_SMALL + m;
+               m = MULT16_32_Q15(coef0, tmp);
+               y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+            }
+         }
+      }
+      mem[c] = m;
+
+      if (apply_downsampling)
+      {
+         /* Perform down-sampling */
+#ifdef FIXED_POINT
+         if (accum)
+         {
+            for (j=0;j<Nd;j++)
+               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
+         } else
+#endif
+         {
+            for (j=0;j<Nd;j++)
+               y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+         }
+      }
+   } while (++c<C);
+   RESTORE_STACK;
+}
+
+#ifndef RESYNTH
+static
+#endif
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+                    opus_val16 *oldBandE, int start, int effEnd, int C, int CC,
+                    int isTransient, int LM, int downsample,
+                    int silence, int arch)
+{
+   int c, i;
+   int M;
+   int b;
+   int B;
+   int N, NB;
+   int shift;
+   int nbEBands;
+   int overlap;
+   VARDECL(celt_sig, freq);
+   SAVE_STACK;
+
+   overlap = mode->overlap;
+   nbEBands = mode->nbEBands;
+   N = mode->shortMdctSize<<LM;
+   ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
+   M = 1<<LM;
+
+   if (isTransient)
+   {
+      B = M;
+      NB = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      NB = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+
+   if (CC==2&&C==1)
+   {
+      /* Copying a mono streams to two channels */
+      celt_sig *freq2;
+      denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+            downsample, silence);
+      /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
+      freq2 = out_syn[1]+overlap/2;
+      OPUS_COPY(freq2, freq, N);
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch);
+   } else if (CC==1&&C==2)
+   {
+      /* Downmixing a stereo stream to mono */
+      celt_sig *freq2;
+      freq2 = out_syn[0]+overlap/2;
+      denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+            downsample, silence);
+      /* Use the output buffer as temp array before downmixing. */
+      denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
+            downsample, silence);
+      for (i=0;i<N;i++)
+         freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i]));
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
+   } else {
+      /* Normal case (mono or stereo) */
+      c=0; do {
+         denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
+               downsample, silence);
+         for (b=0;b<B;b++)
+            clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
+      } while (++c<CC);
+   }
+   /* Saturate IMDCT output so that we can't overflow in the pitch postfilter
+      or in the */
+   c=0; do {
+      for (i=0;i<N;i++)
+         out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT);
+   } while (++c<CC);
+   RESTORE_STACK;
+}
+
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
+{
+   int i, curr, tf_select;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+
+   budget = dec->storage*8;
+   tell = ec_tell(dec);
+   logp = isTransient ? 2 : 4;
+   tf_select_rsv = LM>0 && tell+logp+1<=budget;
+   budget -= tf_select_rsv;
+   tf_changed = curr = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         curr ^= ec_dec_bit_logp(dec, logp);
+         tell = ec_tell(dec);
+         tf_changed |= curr;
+      }
+      tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   tf_select = 0;
+   if (tf_select_rsv &&
+     tf_select_table[LM][4*isTransient+0+tf_changed] !=
+     tf_select_table[LM][4*isTransient+2+tf_changed])
+   {
+      tf_select = ec_dec_bit_logp(dec, 1);
+   }
+   for (i=start;i<end;i++)
+   {
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   }
+}
+
+static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
+{
+   int pitch_index;
+   VARDECL( opus_val16, lp_pitch_buf );
+   SAVE_STACK;
+   ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+   pitch_downsample(decode_mem, lp_pitch_buf,
+         DECODE_BUFFER_SIZE, C, arch);
+   pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+         DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+         PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch);
+   pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+   RESTORE_STACK;
+   return pitch_index;
+}
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
+{
+   int c;
+   int i;
+   const int C = st->channels;
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   int start;
+   int loss_count;
+   int noise_based;
+   const opus_int16 *eBands;
+   SAVE_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<C);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
+   oldBandE = lpc+C*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+
+   loss_count = st->loss_count;
+   start = st->start;
+   noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
+   if (noise_based)
+   {
+      /* Noise-based PLC/CNG */
+#ifdef NORM_ALIASING_HACK
+      celt_norm *X;
+#else
+      VARDECL(celt_norm, X);
+#endif
+      opus_uint32 seed;
+      int end;
+      int effEnd;
+      opus_val16 decay;
+      end = st->end;
+      effEnd = IMAX(start, IMIN(end, mode->effEBands));
+
+#ifdef NORM_ALIASING_HACK
+      /* This is an ugly hack that breaks aliasing rules and would be easily broken,
+         but it saves almost 4kB of stack. */
+      X = (celt_norm*)(out_syn[C-1]+overlap/2);
+#else
+      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+#endif
+
+      /* Energy decay */
+      decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+      c=0; do
+      {
+         for (i=start;i<end;i++)
+            oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
+      } while (++c<C);
+      seed = st->rng;
+      for (c=0;c<C;c++)
+      {
+         for (i=start;i<effEnd;i++)
+         {
+            int j;
+            int boffs;
+            int blen;
+            boffs = N*c+(eBands[i]<<LM);
+            blen = (eBands[i+1]-eBands[i])<<LM;
+            for (j=0;j<blen;j++)
+            {
+               seed = celt_lcg_rand(seed);
+               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
+            }
+            renormalise_vector(X+boffs, blen, Q15ONE, st->arch);
+         }
+      }
+      st->rng = seed;
+
+      c=0; do {
+         OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+               DECODE_BUFFER_SIZE-N+(overlap>>1));
+      } while (++c<C);
+
+      celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
+   } else {
+      int exc_length;
+      /* Pitch-based PLC */
+      const opus_val16 *window;
+      opus_val16 *exc;
+      opus_val16 fade = Q15ONE;
+      int pitch_index;
+      VARDECL(opus_val32, etmp);
+      VARDECL(opus_val16, _exc);
+      VARDECL(opus_val16, fir_tmp);
+
+      if (loss_count == 0)
+      {
+         st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
+      } else {
+         pitch_index = st->last_pitch_index;
+         fade = QCONST16(.8f,15);
+      }
+
+      /* We want the excitation for 2 pitch periods in order to look for a
+         decaying signal, but we can't get more than MAX_PERIOD. */
+      exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+
+      ALLOC(etmp, overlap, opus_val32);
+      ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
+      ALLOC(fir_tmp, exc_length, opus_val16);
+      exc = _exc+LPC_ORDER;
+      window = mode->window;
+      c=0; do {
+         opus_val16 decay;
+         opus_val16 attenuation;
+         opus_val32 S1=0;
+         celt_sig *buf;
+         int extrapolation_offset;
+         int extrapolation_len;
+         int j;
+
+         buf = decode_mem[c];
+         for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
+            exc[i-LPC_ORDER] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
+
+         if (loss_count == 0)
+         {
+            opus_val32 ac[LPC_ORDER+1];
+            /* Compute LPC coefficients for the last MAX_PERIOD samples before
+               the first loss so we can work in the excitation-filter domain. */
+            _celt_autocorr(exc, ac, window, overlap,
+                   LPC_ORDER, MAX_PERIOD, st->arch);
+            /* Add a noise floor of -40 dB. */
+#ifdef FIXED_POINT
+            ac[0] += SHR32(ac[0],13);
+#else
+            ac[0] *= 1.0001f;
+#endif
+            /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
+            for (i=1;i<=LPC_ORDER;i++)
+            {
+               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef FIXED_POINT
+               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+               ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
+#endif
+            }
+            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+#ifdef FIXED_POINT
+         /* For fixed-point, apply bandwidth expansion until we can guarantee that
+            no overflow can happen in the IIR filter. This means:
+            32768*sum(abs(filter)) < 2^31 */
+         while (1) {
+            opus_val16 tmp=Q15ONE;
+            opus_val32 sum=QCONST16(1., SIG_SHIFT);
+            for (i=0;i<LPC_ORDER;i++)
+               sum += ABS16(lpc[c*LPC_ORDER+i]);
+            if (sum < 65535) break;
+            for (i=0;i<LPC_ORDER;i++)
+            {
+               tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
+               lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
+            }
+         }
+#endif
+         }
+         /* Initialize the LPC history with the samples just before the start
+            of the region for which we're computing the excitation. */
+         {
+            /* Compute the excitation for exc_length samples before the loss. We need the copy
+               because celt_fir() cannot filter in-place. */
+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+                  fir_tmp, exc_length, LPC_ORDER, st->arch);
+            OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
+         }
+
+         /* Check if the waveform is decaying, and if so how fast.
+            We do this to avoid adding energy when concealing in a segment
+            with decaying energy. */
+         {
+            opus_val32 E1=1, E2=1;
+            int decay_length;
+#ifdef FIXED_POINT
+            int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20);
+#endif
+            decay_length = exc_length>>1;
+            for (i=0;i<decay_length;i++)
+            {
+               opus_val16 e;
+               e = exc[MAX_PERIOD-decay_length+i];
+               E1 += SHR32(MULT16_16(e, e), shift);
+               e = exc[MAX_PERIOD-2*decay_length+i];
+               E2 += SHR32(MULT16_16(e, e), shift);
+            }
+            E1 = MIN32(E1, E2);
+            decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
+         }
+
+         /* Move the decoder memory one frame to the left to give us room to
+            add the data for the new frame. We ignore the overlap that extends
+            past the end of the buffer, because we aren't going to use it. */
+         OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
+
+         /* Extrapolate from the end of the excitation with a period of
+            "pitch_index", scaling down each period by an additional factor of
+            "decay". */
+         extrapolation_offset = MAX_PERIOD-pitch_index;
+         /* We need to extrapolate enough samples to cover a complete MDCT
+            window (including overlap/2 samples on both sides). */
+         extrapolation_len = N+overlap;
+         /* We also apply fading if this is not the first loss. */
+         attenuation = MULT16_16_Q15(fade, decay);
+         for (i=j=0;i<extrapolation_len;i++,j++)
+         {
+            opus_val16 tmp;
+            if (j >= pitch_index) {
+               j -= pitch_index;
+               attenuation = MULT16_16_Q15(attenuation, decay);
+            }
+            buf[DECODE_BUFFER_SIZE-N+i] =
+                  SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+                        exc[extrapolation_offset+j])), SIG_SHIFT);
+            /* Compute the energy of the previously decoded signal whose
+               excitation we're copying. */
+            tmp = ROUND16(
+                  buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+                  SIG_SHIFT);
+            S1 += SHR32(MULT16_16(tmp, tmp), 10);
+         }
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            /* Copy the last decoded samples (prior to the overlap region) to
+               synthesis filter memory so we can have a continuous signal. */
+            for (i=0;i<LPC_ORDER;i++)
+               lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+            /* Apply the synthesis filter to convert the excitation back into
+               the signal domain. */
+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+                  lpc_mem, st->arch);
+#ifdef FIXED_POINT
+            for (i=0; i < extrapolation_len; i++)
+               buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT);
+#endif
+         }
+
+         /* Check if the synthesis energy is higher than expected, which can
+            happen with the signal changes during our window. If so,
+            attenuate. */
+         {
+            opus_val32 S2=0;
+            for (i=0;i<extrapolation_len;i++)
+            {
+               opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+               S2 += SHR32(MULT16_16(tmp, tmp), 10);
+            }
+            /* This checks for an "explosion" in the synthesis. */
+#ifdef FIXED_POINT
+            if (!(S1 > SHR32(S2,2)))
+#else
+            /* The float test is written this way to catch NaNs in the output
+               of the IIR filter at the same time. */
+            if (!(S1 > 0.2f*S2))
+#endif
+            {
+               for (i=0;i<extrapolation_len;i++)
+                  buf[DECODE_BUFFER_SIZE-N+i] = 0;
+            } else if (S1 < S2)
+            {
+               opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
+               for (i=0;i<overlap;i++)
+               {
+                  opus_val16 tmp_g = Q15ONE
+                        - MULT16_16_Q15(window[i], Q15ONE-ratio);
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+               for (i=overlap;i<extrapolation_len;i++)
+               {
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+            }
+         }
+
+         /* Apply the pre-filter to the MDCT overlap for the next frame because
+            the post-filter will be re-applied in the decoder after the MDCT
+            overlap. */
+         comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+              st->postfilter_period, st->postfilter_period, overlap,
+              -st->postfilter_gain, -st->postfilter_gain,
+              st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
+
+         /* Simulate TDAC on the concealed audio so that it blends with the
+            MDCT of the next frame. */
+         for (i=0;i<overlap/2;i++)
+         {
+            buf[DECODE_BUFFER_SIZE+i] =
+               MULT16_32_Q15(window[i], etmp[overlap-1-i])
+               + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
+         }
+      } while (++c<C);
+   }
+
+   st->loss_count = loss_count+1;
+
+   RESTORE_STACK;
+}
+
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+{
+   int c, i, N;
+   int spread_decision;
+   opus_int32 bits;
+   ec_dec _dec;
+#ifdef NORM_ALIASING_HACK
+   celt_norm *X;
+#else
+   VARDECL(celt_norm, X);
+#endif
+   VARDECL(int, fine_quant);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+
+   int shortBlocks;
+   int isTransient;
+   int intra_ener;
+   const int CC = st->channels;
+   int LM, M;
+   int start;
+   int end;
+   int effEnd;
+   int codedBands;
+   int alloc_trim;
+   int postfilter_pitch;
+   opus_val16 postfilter_gain;
+   int intensity=0;
+   int dual_stereo=0;
+   opus_int32 total_bits;
+   opus_int32 balance;
+   opus_int32 tell;
+   int dynalloc_logp;
+   int postfilter_tapset;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence;
+   int C = st->stream_channels;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   ALLOC_STACK;
+
+   VALIDATE_CELT_DECODER(st);
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   start = st->start;
+   end = st->end;
+   frame_size *= st->downsample;
+
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
+   oldBandE = lpc+CC*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+
+#ifdef CUSTOM_MODES
+   if (st->signalling && data!=NULL)
+   {
+      int data0=data[0];
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         data0 = fromOpus(data0);
+         if (data0<0)
+            return OPUS_INVALID_PACKET;
+      }
+      st->end = end = IMAX(1, mode->effEBands-2*(data0>>5));
+      LM = (data0>>3)&0x3;
+      C = 1 + ((data0>>2)&0x1);
+      data++;
+      len--;
+      if (LM>mode->maxLM)
+         return OPUS_INVALID_PACKET;
+      if (frame_size < mode->shortMdctSize<<LM)
+         return OPUS_BUFFER_TOO_SMALL;
+      else
+         frame_size = mode->shortMdctSize<<LM;
+   } else {
+#else
+   {
+#endif
+      for (LM=0;LM<=mode->maxLM;LM++)
+         if (mode->shortMdctSize<<LM==frame_size)
+            break;
+      if (LM>mode->maxLM)
+         return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+
+   if (len<0 || len>1275 || pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   N = M*mode->shortMdctSize;
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<CC);
+
+   effEnd = end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+
+   if (data == NULL || len<=1)
+   {
+      celt_decode_lost(st, N, LM);
+      deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
+      RESTORE_STACK;
+      return frame_size/st->downsample;
+   }
+
+   /* Check if there are at least two packets received consecutively before
+    * turning on the pitch-based PLC */
+   st->skip_plc = st->loss_count != 0;
+
+   if (dec == NULL)
+   {
+      ec_dec_init(&_dec,(unsigned char*)data,len);
+      dec = &_dec;
+   }
+
+   if (C==1)
+   {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
+   }
+
+   total_bits = len*8;
+   tell = ec_tell(dec);
+
+   if (tell >= total_bits)
+      silence = 1;
+   else if (tell==1)
+      silence = ec_dec_bit_logp(dec, 15);
+   else
+      silence = 0;
+   if (silence)
+   {
+      /* Pretend we've read all the remaining bits */
+      tell = len*8;
+      dec->nbits_total+=tell-ec_tell(dec);
+   }
+
+   postfilter_gain = 0;
+   postfilter_pitch = 0;
+   postfilter_tapset = 0;
+   if (start==0 && tell+16 <= total_bits)
+   {
+      if(ec_dec_bit_logp(dec, 1))
+      {
+         int qg, octave;
+         octave = ec_dec_uint(dec, 6);
+         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+         qg = ec_dec_bits(dec, 3);
+         if (ec_tell(dec)+2<=total_bits)
+            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
+      }
+      tell = ec_tell(dec);
+   }
+
+   if (LM > 0 && tell+3 <= total_bits)
+   {
+      isTransient = ec_dec_bit_logp(dec, 3);
+      tell = ec_tell(dec);
+   }
+   else
+      isTransient = 0;
+
+   if (isTransient)
+      shortBlocks = M;
+   else
+      shortBlocks = 0;
+
+   /* Decode the global flags (first symbols in the stream) */
+   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* Get band energies */
+   unquant_coarse_energy(mode, start, end, oldBandE,
+         intra_ener, dec, C, LM);
+
+   ALLOC(tf_res, nbEBands, int);
+   tf_decode(start, end, isTransient, tf_res, LM, dec);
+
+   tell = ec_tell(dec);
+   spread_decision = SPREAD_NORMAL;
+   if (tell+4 <= total_bits)
+      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+
+   ALLOC(cap, nbEBands, int);
+
+   init_caps(mode,cap,LM,C);
+
+   ALLOC(offsets, nbEBands, int);
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   tell = ec_tell_frac(dec);
+   for (i=start;i<end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
+      {
+         int flag;
+         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+         tell = ec_tell_frac(dec);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_bits -= quanta;
+         dynalloc_loop_logp = 1;
+      }
+      offsets[i] = boost;
+      /* Making dynalloc more likely */
+      if (boost>0)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+   }
+
+   ALLOC(fine_quant, nbEBands, int);
+   alloc_trim = tell+(6<<BITRES) <= total_bits ?
+         ec_dec_icdf(dec, trim_icdf, 7) : 5;
+
+   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
+   codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+
+   unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
+
+   c=0; do {
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+   } while (++c<CC);
+
+   /* Decode fixed codebook */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+
+#ifdef NORM_ALIASING_HACK
+   /* This is an ugly hack that breaks aliasing rules and would be easily broken,
+      but it saves almost 4kB of stack. */
+   X = (celt_norm*)(out_syn[CC-1]+overlap/2);
+#else
+   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+#endif
+
+   quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
+         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0,
+         st->arch, st->disable_inv);
+
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = ec_dec_bits(dec, 1);
+   }
+
+   unquant_energy_finalise(mode, start, end, oldBandE,
+         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
+
+   if (anti_collapse_on)
+      anti_collapse(mode, X, collapse_masks, LM, C, N,
+            start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
+
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+   }
+
+   celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
+                  C, CC, isTransient, LM, st->downsample, silence, st->arch);
+
+   c=0; do {
+      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
+            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+            mode->window, overlap, st->arch);
+      if (LM!=0)
+         comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
+               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+               mode->window, overlap, st->arch);
+
+   } while (++c<CC);
+   st->postfilter_period_old = st->postfilter_period;
+   st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_tapset_old = st->postfilter_tapset;
+   st->postfilter_period = postfilter_pitch;
+   st->postfilter_gain = postfilter_gain;
+   st->postfilter_tapset = postfilter_tapset;
+   if (LM!=0)
+   {
+      st->postfilter_period_old = st->postfilter_period;
+      st->postfilter_gain_old = st->postfilter_gain;
+      st->postfilter_tapset_old = st->postfilter_tapset;
+   }
+
+   if (C==1)
+      OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
+
+   /* In case start or end were to change */
+   if (!isTransient)
+   {
+      opus_val16 max_background_increase;
+      OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
+      OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
+      /* In normal circumstances, we only allow the noise floor to increase by
+         up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB
+         increase for each update.*/
+      if (st->loss_count < 10)
+         max_background_increase = M*QCONST16(0.001f,DB_SHIFT);
+      else
+         max_background_increase = QCONST16(1.f,DB_SHIFT);
+      for (i=0;i<2*nbEBands;i++)
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]);
+   } else {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   c=0; do
+   {
+      for (i=0;i<start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<2);
+   st->rng = dec->rng;
+
+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
+   st->loss_count = 0;
+   RESTORE_STACK;
+   if (ec_tell(dec) > 8*len)
+      return OPUS_INTERNAL_ERROR;
+   if(ec_get_error(dec))
+      st->error = 1;
+   return frame_size/st->downsample;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef FIXED_POINT
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+
+   ALLOC(out, C*N, opus_int16);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=out[j]*(1.f/32768.f);
+
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+
+#else
+
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
+}
+
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, celt_sig);
+
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
+
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j] = FLOAT2INT16 (out[j]);
+
+   RESTORE_STACK;
+   return ret;
+}
+
+#endif
+#endif /* CUSTOM_MODES */
+
+int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+
+   va_start(ap, request);
+   switch (request)
+   {
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value=st->error;
+         st->error = 0;
+      }
+      break;
+      case OPUS_GET_LOOKAHEAD_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->overlap/st->downsample;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
+         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
+         oldBandE = lpc+st->channels*LPC_ORDER;
+         oldLogE = oldBandE + 2*st->mode->nbEBands;
+         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
+               opus_custom_decoder_get_size(st->mode, st->channels)-
+               ((char*)&st->DECODER_RESET_START - (char*)st));
+         for (i=0;i<2*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->skip_plc = 1;
+      }
+      break;
+      case OPUS_GET_PITCH_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->postfilter_period;
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if(value<0 || value>1)
+          {
+             goto bad_arg;
+          }
+          st->disable_inv = value;
+      }
+      break;
+      case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          *value = st->disable_inv;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+      va_end(ap);
+  return OPUS_UNIMPLEMENTED;
+}
diff --git a/libopus/celt/celt_encoder.c b/libopus/celt/celt_encoder.c
new file mode 100644
index 0000000..34e395c
--- /dev/null
+++ b/libopus/celt/celt_encoder.c
@@ -0,0 +1,2607 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_ENCODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+
+/** Encoder state
+ @brief Encoder state
+ */
+struct OpusCustomEncoder {
+   const OpusCustomMode *mode;     /**< Mode used by the encoder */
+   int channels;
+   int stream_channels;
+
+   int force_intra;
+   int clip;
+   int disable_pf;
+   int complexity;
+   int upsample;
+   int start, end;
+
+   opus_int32 bitrate;
+   int vbr;
+   int signalling;
+   int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
+   int loss_rate;
+   int lsb_depth;
+   int lfe;
+   int disable_inv;
+   int arch;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define ENCODER_RESET_START rng
+
+   opus_uint32 rng;
+   int spread_decision;
+   opus_val32 delayedIntra;
+   int tonal_average;
+   int lastCodedBands;
+   int hf_average;
+   int tapset_decision;
+
+   int prefilter_period;
+   opus_val16 prefilter_gain;
+   int prefilter_tapset;
+#ifdef RESYNTH
+   int prefilter_period_old;
+   opus_val16 prefilter_gain_old;
+   int prefilter_tapset_old;
+#endif
+   int consec_transient;
+   AnalysisInfo analysis;
+   SILKInfo silk_info;
+
+   opus_val32 preemph_memE[2];
+   opus_val32 preemph_memD[2];
+
+   /* VBR-related parameters */
+   opus_int32 vbr_reservoir;
+   opus_int32 vbr_drift;
+   opus_int32 vbr_offset;
+   opus_int32 vbr_count;
+   opus_val32 overlap_max;
+   opus_val16 stereo_saving;
+   int intensity;
+   opus_val16 *energy_mask;
+   opus_val16 spec_avg;
+
+#ifdef RESYNTH
+   /* +MAX_PERIOD/2 to make space for overlap */
+   celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
+#endif
+
+   celt_sig in_mem[1]; /* Size = channels*mode->overlap */
+   /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_MAXPERIOD */
+   /* opus_val16 oldBandE[],     Size = channels*mode->nbEBands */
+   /* opus_val16 oldLogE[],      Size = channels*mode->nbEBands */
+   /* opus_val16 oldLogE2[],     Size = channels*mode->nbEBands */
+   /* opus_val16 energyError[],  Size = channels*mode->nbEBands */
+};
+
+int celt_encoder_get_size(int channels)
+{
+   CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_encoder_get_size(mode, channels);
+}
+
+int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTEncoder)
+         + (channels*mode->overlap-1)*sizeof(celt_sig)    /* celt_sig in_mem[channels*mode->overlap]; */
+         + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
+         + 4*channels*mode->nbEBands*sizeof(opus_val16);  /* opus_val16 oldBandE[channels*mode->nbEBands]; */
+                                                          /* opus_val16 oldLogE[channels*mode->nbEBands]; */
+                                                          /* opus_val16 oldLogE2[channels*mode->nbEBands]; */
+                                                          /* opus_val16 energyError[channels*mode->nbEBands]; */
+   return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
+   /* init will handle the NULL case */
+   ret = opus_custom_encoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_encoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+
+static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
+                                         int channels, int arch)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+
+   if (st==NULL || mode==NULL)
+      return OPUS_ALLOC_FAIL;
+
+   OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
+
+   st->mode = mode;
+   st->stream_channels = st->channels = channels;
+
+   st->upsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+   st->arch = arch;
+
+   st->constrained_vbr = 1;
+   st->clip = 1;
+
+   st->bitrate = OPUS_BITRATE_MAX;
+   st->vbr = 0;
+   st->force_intra  = 0;
+   st->complexity = 5;
+   st->lsb_depth=24;
+
+   opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
+
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+{
+   return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
+}
+#endif
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch)
+{
+   int ret;
+   ret = opus_custom_encoder_init_arch(st,
+           opus_custom_mode_create(48000, 960, NULL), channels, arch);
+   if (ret != OPUS_OK)
+      return ret;
+   st->upsample = resampling_factor(sampling_rate);
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_encoder_destroy(CELTEncoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+
+static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
+                              opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients,
+                              int *weak_transient)
+{
+   int i;
+   VARDECL(opus_val16, tmp);
+   opus_val32 mem0,mem1;
+   int is_transient = 0;
+   opus_int32 mask_metric = 0;
+   int c;
+   opus_val16 tf_max;
+   int len2;
+   /* Forward masking: 6.7 dB/ms. */
+#ifdef FIXED_POINT
+   int forward_shift = 4;
+#else
+   opus_val16 forward_decay = QCONST16(.0625f,15);
+#endif
+   /* Table of 6*64/x, trained on real data to minimize the average error */
+   static const unsigned char inv_table[128] = {
+         255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
+          23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12,
+          12, 12, 11, 11, 11, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
+           8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,
+           6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,  5,
+           5,  5,  5,  5,  5,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+           4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  3,  3,
+           3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,
+   };
+   SAVE_STACK;
+   ALLOC(tmp, len, opus_val16);
+
+   *weak_transient = 0;
+   /* For lower bitrates, let's be more conservative and have a forward masking
+      decay of 3.3 dB/ms. This avoids having to code transients at very low
+      bitrate (mostly for hybrid), which can result in unstable energy and/or
+      partial collapse. */
+   if (allow_weak_transients)
+   {
+#ifdef FIXED_POINT
+      forward_shift = 5;
+#else
+      forward_decay = QCONST16(.03125f,15);
+#endif
+   }
+   len2=len/2;
+   for (c=0;c<C;c++)
+   {
+      opus_val32 mean;
+      opus_int32 unmask=0;
+      opus_val32 norm;
+      opus_val16 maxE;
+      mem0=0;
+      mem1=0;
+      /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x,y;
+         x = SHR32(in[i+c*len],SIG_SHIFT);
+         y = ADD32(mem0, x);
+#ifdef FIXED_POINT
+         mem0 = mem1 + y - SHL32(x,1);
+         mem1 = x - SHR32(y,1);
+#else
+         mem0 = mem1 + y - 2*x;
+         mem1 = x - .5f*y;
+#endif
+         tmp[i] = SROUND16(y, 2);
+         /*printf("%f ", tmp[i]);*/
+      }
+      /*printf("\n");*/
+      /* First few samples are bad because we don't propagate the memory */
+      OPUS_CLEAR(tmp, 12);
+
+#ifdef FIXED_POINT
+      /* Normalize tmp to max range */
+      {
+         int shift=0;
+         shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len)));
+         if (shift!=0)
+         {
+            for (i=0;i<len;i++)
+               tmp[i] = SHL16(tmp[i], shift);
+         }
+      }
+#endif
+
+      mean=0;
+      mem0=0;
+      /* Grouping by two to reduce complexity */
+      /* Forward pass to compute the post-echo threshold*/
+      for (i=0;i<len2;i++)
+      {
+         opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16);
+         mean += x2;
+#ifdef FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
+#else
+         tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
+#endif
+         mem0 = tmp[i];
+      }
+
+      mem0=0;
+      maxE=0;
+      /* Backward pass to compute the pre-echo threshold */
+      for (i=len2-1;i>=0;i--)
+      {
+         /* Backward masking: 13.9 dB/ms. */
+#ifdef FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
+#else
+         tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
+#endif
+         mem0 = tmp[i];
+         maxE = MAX16(maxE, mem0);
+      }
+      /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
+
+      /* Compute the ratio of the "frame energy" over the harmonic mean of the energy.
+         This essentially corresponds to a bitrate-normalized temporal noise-to-mask
+         ratio */
+
+      /* As a compromise with the old transient detector, frame energy is the
+         geometric mean of the energy and half the max */
+#ifdef FIXED_POINT
+      /* Costs two sqrt() to avoid overflows */
+      mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1)));
+#else
+      mean = celt_sqrt(mean * maxE*.5*len2);
+#endif
+      /* Inverse of the mean energy in Q15+6 */
+      norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
+      /* Compute harmonic mean discarding the unreliable boundaries
+         The data is smooth, so we only take 1/4th of the samples */
+      unmask=0;
+      /* We should never see NaNs here. If we find any, then something really bad happened and we better abort
+         before it does any damage later on. If these asserts are disabled (no hardening), then the table
+         lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX
+         that crash on NaN since it could result in a worse issue later on. */
+      celt_assert(!celt_isnan(tmp[0]));
+      celt_assert(!celt_isnan(norm));
+      for (i=12;i<len2-5;i+=4)
+      {
+         int id;
+#ifdef FIXED_POINT
+         id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */
+#else
+         id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */
+#endif
+         unmask += inv_table[id];
+      }
+      /*printf("%d\n", unmask);*/
+      /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */
+      unmask = 64*unmask*4/(6*(len2-17));
+      if (unmask>mask_metric)
+      {
+         *tf_chan = c;
+         mask_metric = unmask;
+      }
+   }
+   is_transient = mask_metric>200;
+   /* For low bitrates, define "weak transients" that need to be
+      handled differently to avoid partial collapse. */
+   if (allow_weak_transients && is_transient && mask_metric<600) {
+      is_transient = 0;
+      *weak_transient = 1;
+   }
+   /* Arbitrary metric for VBR boost */
+   tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
+   /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
+   *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
+   /*printf("%d %f\n", tf_max, mask_metric);*/
+   RESTORE_STACK;
+#ifdef FUZZING
+   is_transient = rand()&0x1;
+#endif
+   /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
+   return is_transient;
+}
+
+/* Looks for sudden increases of energy to decide whether we need to patch
+   the transient decision */
+static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
+      int start, int end, int C)
+{
+   int i, c;
+   opus_val32 mean_diff=0;
+   opus_val16 spread_old[26];
+   /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to
+      avoid false detection caused by irrelevant bands */
+   if (C==1)
+   {
+      spread_old[start] = oldE[start];
+      for (i=start+1;i<end;i++)
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
+   } else {
+      spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]);
+      for (i=start+1;i<end;i++)
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
+                               MAX16(oldE[i],oldE[i+nbEBands]));
+   }
+   for (i=end-2;i>=start;i--)
+      spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
+   /* Compute mean increase */
+   c=0; do {
+      for (i=IMAX(2,start);i<end-1;i++)
+      {
+         opus_val16 x1, x2;
+         x1 = MAX16(0, newE[i + c*nbEBands]);
+         x2 = MAX16(0, spread_old[i]);
+         mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
+      }
+   } while (++c<C);
+   mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start)));
+   /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
+   return mean_diff > QCONST16(1.f, DB_SHIFT);
+}
+
+/** Apply window and compute the MDCT for all sub-frames and
+    all channels in a frame */
+static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
+                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
+                          int arch)
+{
+   const int overlap = mode->overlap;
+   int N;
+   int B;
+   int shift;
+   int i, b, c;
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      for (b=0;b<B;b++)
+      {
+         /* Interleaving the sub-frames while doing the MDCTs */
+         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
+                          &out[b+c*N*B], mode->window, overlap, shift, B,
+                          arch);
+      }
+   } while (++c<CC);
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<B*N;i++)
+         out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i]));
+   }
+   if (upsample != 1)
+   {
+      c=0; do
+      {
+         int bound = B*N/upsample;
+         for (i=0;i<bound;i++)
+            out[c*B*N+i] *= upsample;
+         OPUS_CLEAR(&out[c*B*N+bound], B*N-bound);
+      } while (++c<C);
+   }
+}
+
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
+{
+   int i;
+   opus_val16 coef0;
+   celt_sig m;
+   int Nu;
+
+   coef0 = coef[0];
+   m = *mem;
+
+   /* Fast path for the normal 48kHz case and no clipping */
+   if (coef[1] == 0 && upsample == 1 && !clip)
+   {
+      for (i=0;i<N;i++)
+      {
+         opus_val16 x;
+         x = SCALEIN(pcmp[CC*i]);
+         /* Apply pre-emphasis */
+         inp[i] = SHL32(x, SIG_SHIFT) - m;
+         m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
+      }
+      *mem = m;
+      return;
+   }
+
+   Nu = N/upsample;
+   if (upsample!=1)
+   {
+      OPUS_CLEAR(inp, N);
+   }
+   for (i=0;i<Nu;i++)
+      inp[i*upsample] = SCALEIN(pcmp[CC*i]);
+
+#ifndef FIXED_POINT
+   if (clip)
+   {
+      /* Clip input to avoid encoding non-portable files */
+      for (i=0;i<Nu;i++)
+         inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
+   }
+#else
+   (void)clip; /* Avoids a warning about clip being unused. */
+#endif
+#ifdef CUSTOM_MODES
+   if (coef[1] != 0)
+   {
+      opus_val16 coef1 = coef[1];
+      opus_val16 coef2 = coef[2];
+      for (i=0;i<N;i++)
+      {
+         celt_sig x, tmp;
+         x = inp[i];
+         /* Apply pre-emphasis */
+         tmp = MULT16_16(coef2, x);
+         inp[i] = tmp + m;
+         m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp);
+      }
+   } else
+#endif
+   {
+      for (i=0;i<N;i++)
+      {
+         opus_val16 x;
+         x = inp[i];
+         /* Apply pre-emphasis */
+         inp[i] = SHL32(x, SIG_SHIFT) - m;
+         m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
+      }
+   }
+   *mem = m;
+}
+
+
+
+static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
+{
+   int i;
+   opus_val32 L1;
+   L1 = 0;
+   for (i=0;i<N;i++)
+      L1 += EXTEND32(ABS16(tmp[i]));
+   /* When in doubt, prefer good freq resolution */
+   L1 = MAC16_32_Q15(L1, LM*bias, L1);
+   return L1;
+
+}
+
+static int tf_analysis(const CELTMode *m, int len, int isTransient,
+      int *tf_res, int lambda, celt_norm *X, int N0, int LM,
+      opus_val16 tf_estimate, int tf_chan, int *importance)
+{
+   int i;
+   VARDECL(int, metric);
+   int cost0;
+   int cost1;
+   VARDECL(int, path0);
+   VARDECL(int, path1);
+   VARDECL(celt_norm, tmp);
+   VARDECL(celt_norm, tmp_1);
+   int sel;
+   int selcost[2];
+   int tf_select=0;
+   opus_val16 bias;
+
+   SAVE_STACK;
+   bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate));
+   /*printf("%f ", bias);*/
+
+   ALLOC(metric, len, int);
+   ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(path0, len, int);
+   ALLOC(path1, len, int);
+
+   for (i=0;i<len;i++)
+   {
+      int k, N;
+      int narrow;
+      opus_val32 L1, best_L1;
+      int best_level=0;
+      N = (m->eBands[i+1]-m->eBands[i])<<LM;
+      /* band is too narrow to be split down to LM=-1 */
+      narrow = (m->eBands[i+1]-m->eBands[i])==1;
+      OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N);
+      /* Just add the right channel if we're in stereo */
+      /*if (C==2)
+         for (j=0;j<N;j++)
+            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
+      L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
+      best_L1 = L1;
+      /* Check the -1 case for transients */
+      if (isTransient && !narrow)
+      {
+         OPUS_COPY(tmp_1, tmp, N);
+         haar1(tmp_1, N>>LM, 1<<LM);
+         L1 = l1_metric(tmp_1, N, LM+1, bias);
+         if (L1<best_L1)
+         {
+            best_L1 = L1;
+            best_level = -1;
+         }
+      }
+      /*printf ("%f ", L1);*/
+      for (k=0;k<LM+!(isTransient||narrow);k++)
+      {
+         int B;
+
+         if (isTransient)
+            B = (LM-k-1);
+         else
+            B = k+1;
+
+         haar1(tmp, N>>k, 1<<k);
+
+         L1 = l1_metric(tmp, N, B, bias);
+
+         if (L1 < best_L1)
+         {
+            best_L1 = L1;
+            best_level = k+1;
+         }
+      }
+      /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
+      /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
+      if (isTransient)
+         metric[i] = 2*best_level;
+      else
+         metric[i] = -2*best_level;
+      /* For bands that can't be split to -1, set the metric to the half-way point to avoid
+         biasing the decision */
+      if (narrow && (metric[i]==0 || metric[i]==-2*LM))
+         metric[i]-=1;
+      /*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/
+   }
+   /*printf("\n");*/
+   /* Search for the optimal tf resolution, including tf_select */
+   tf_select = 0;
+   for (sel=0;sel<2;sel++)
+   {
+      cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+      cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda);
+      for (i=1;i<len;i++)
+      {
+         int curr0, curr1;
+         curr0 = IMIN(cost0, cost1 + lambda);
+         curr1 = IMIN(cost0 + lambda, cost1);
+         cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+         cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+      }
+      cost0 = IMIN(cost0, cost1);
+      selcost[sel]=cost0;
+   }
+   /* For now, we're conservative and only allow tf_select=1 for transients.
+    * If tests confirm it's useful for non-transients, we could allow it. */
+   if (selcost[1]<selcost[0] && isTransient)
+      tf_select=1;
+   cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+   cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda);
+   /* Viterbi forward pass */
+   for (i=1;i<len;i++)
+   {
+      int curr0, curr1;
+      int from0, from1;
+
+      from0 = cost0;
+      from1 = cost1 + lambda;
+      if (from0 < from1)
+      {
+         curr0 = from0;
+         path0[i]= 0;
+      } else {
+         curr0 = from1;
+         path0[i]= 1;
+      }
+
+      from0 = cost0 + lambda;
+      from1 = cost1;
+      if (from0 < from1)
+      {
+         curr1 = from0;
+         path1[i]= 0;
+      } else {
+         curr1 = from1;
+         path1[i]= 1;
+      }
+      cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+      cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
+   }
+   tf_res[len-1] = cost0 < cost1 ? 0 : 1;
+   /* Viterbi backward pass to check the decisions */
+   for (i=len-2;i>=0;i--)
+   {
+      if (tf_res[i+1] == 1)
+         tf_res[i] = path1[i+1];
+      else
+         tf_res[i] = path0[i+1];
+   }
+   /*printf("%d %f\n", *tf_sum, tf_estimate);*/
+   RESTORE_STACK;
+#ifdef FUZZING
+   tf_select = rand()&0x1;
+   tf_res[0] = rand()&0x1;
+   for (i=1;i<len;i++)
+      tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
+#endif
+   return tf_select;
+}
+
+static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
+{
+   int curr, i;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+   budget = enc->storage*8;
+   tell = ec_tell(enc);
+   logp = isTransient ? 2 : 4;
+   /* Reserve space to code the tf_select decision. */
+   tf_select_rsv = LM>0 && tell+logp+1 <= budget;
+   budget -= tf_select_rsv;
+   curr = tf_changed = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
+         tell = ec_tell(enc);
+         curr = tf_res[i];
+         tf_changed |= curr;
+      }
+      else
+         tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   /* Only code tf_select if it would actually make a difference. */
+   if (tf_select_rsv &&
+         tf_select_table[LM][4*isTransient+0+tf_changed]!=
+         tf_select_table[LM][4*isTransient+2+tf_changed])
+      ec_enc_bit_logp(enc, tf_select, 1);
+   else
+      tf_select = 0;
+   for (i=start;i<end;i++)
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
+}
+
+
+static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
+      const opus_val16 *bandLogE, int end, int LM, int C, int N0,
+      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+      int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch)
+{
+   int i;
+   opus_val32 diff=0;
+   int c;
+   int trim_index;
+   opus_val16 trim = QCONST16(5.f, 8);
+   opus_val16 logXC, logXC2;
+   /* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less
+      clear what's best, so we're keeping it as it was before, at least for now. */
+   if (equiv_rate < 64000) {
+      trim = QCONST16(4.f, 8);
+   } else if (equiv_rate < 80000) {
+      opus_int32 frac = (equiv_rate-64000) >> 10;
+      trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac;
+   }
+   if (C==2)
+   {
+      opus_val16 sum = 0; /* Q10 */
+      opus_val16 minXC; /* Q10 */
+      /* Compute inter-channel correlation for low frequencies */
+      for (i=0;i<8;i++)
+      {
+         opus_val32 partial;
+         partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
+               (m->eBands[i+1]-m->eBands[i])<<LM, arch);
+         sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
+      }
+      sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+      sum = MIN16(QCONST16(1.f, 10), ABS16(sum));
+      minXC = sum;
+      for (i=8;i<intensity;i++)
+      {
+         opus_val32 partial;
+         partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
+               (m->eBands[i+1]-m->eBands[i])<<LM, arch);
+         minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
+      }
+      minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
+      /*printf ("%f\n", sum);*/
+      /* mid-side savings estimations based on the LF average*/
+      logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+      /* mid-side savings estimations based on min correlation */
+      logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
+#ifdef FIXED_POINT
+      /* Compensate for Q20 vs Q14 input and convert output to Q8 */
+      logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+      logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+#endif
+
+      trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
+      *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
+   }
+
+   /* Estimate spectral tilt */
+   c=0; do {
+      for (i=0;i<end-1;i++)
+      {
+         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
+      }
+   } while (++c<C);
+   diff /= C*(end-1);
+   /*printf("%f\n", diff);*/
+   trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= SHR16(surround_trim, DB_SHIFT-8);
+   trim -= 2*SHR16(tf_estimate, 14-8);
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid)
+   {
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
+            (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
+   }
+#else
+   (void)analysis;
+#endif
+
+#ifdef FIXED_POINT
+   trim_index = PSHR32(trim, 8);
+#else
+   trim_index = (int)floor(.5f+trim);
+#endif
+   trim_index = IMAX(0, IMIN(10, trim_index));
+   /*printf("%d\n", trim_index);*/
+#ifdef FUZZING
+   trim_index = rand()%11;
+#endif
+   return trim_index;
+}
+
+static int stereo_analysis(const CELTMode *m, const celt_norm *X,
+      int LM, int N0)
+{
+   int i;
+   int thetas;
+   opus_val32 sumLR = EPSILON, sumMS = EPSILON;
+
+   /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
+   for (i=0;i<13;i++)
+   {
+      int j;
+      for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+      {
+         opus_val32 L, R, M, S;
+         /* We cast to 32-bit first because of the -32768 case */
+         L = EXTEND32(X[j]);
+         R = EXTEND32(X[N0+j]);
+         M = ADD32(L, R);
+         S = SUB32(L, R);
+         sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
+         sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
+      }
+   }
+   sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
+   thetas = 13;
+   /* We don't need thetas for lower bands with LM<=1 */
+   if (LM<=1)
+      thetas -= 8;
+   return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
+         > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
+}
+
+#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0)
+static opus_val16 median_of_5(const opus_val16 *x)
+{
+   opus_val16 t0, t1, t2, t3, t4;
+   t2 = x[2];
+   if (x[0] > x[1])
+   {
+      t0 = x[1];
+      t1 = x[0];
+   } else {
+      t0 = x[0];
+      t1 = x[1];
+   }
+   if (x[3] > x[4])
+   {
+      t3 = x[4];
+      t4 = x[3];
+   } else {
+      t3 = x[3];
+      t4 = x[4];
+   }
+   if (t0 > t3)
+   {
+      MSWAP(t0, t3);
+      MSWAP(t1, t4);
+   }
+   if (t2 > t1)
+   {
+      if (t1 < t3)
+         return MIN16(t2, t3);
+      else
+         return MIN16(t4, t1);
+   } else {
+      if (t2 < t3)
+         return MIN16(t1, t3);
+      else
+         return MIN16(t2, t4);
+   }
+}
+
+static opus_val16 median_of_3(const opus_val16 *x)
+{
+   opus_val16 t0, t1, t2;
+   if (x[0] > x[1])
+   {
+      t0 = x[1];
+      t1 = x[0];
+   } else {
+      t0 = x[0];
+      t1 = x[1];
+   }
+   t2 = x[2];
+   if (t1 < t2)
+      return t1;
+   else if (t0 < t2)
+      return t2;
+   else
+      return t0;
+}
+
+static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
+      int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
+      int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc,
+      AnalysisInfo *analysis, int *importance, int *spread_weight)
+{
+   int i, c;
+   opus_int32 tot_boost=0;
+   opus_val16 maxDepth;
+   VARDECL(opus_val16, follower);
+   VARDECL(opus_val16, noise_floor);
+   SAVE_STACK;
+   ALLOC(follower, C*nbEBands, opus_val16);
+   ALLOC(noise_floor, C*nbEBands, opus_val16);
+   OPUS_CLEAR(offsets, nbEBands);
+   /* Dynamic allocation code */
+   maxDepth=-QCONST16(31.9f, DB_SHIFT);
+   for (i=0;i<end;i++)
+   {
+      /* Noise floor must take into account eMeans, the depth, the width of the bands
+         and the preemphasis filter (approx. square of bark band ID) */
+      noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i])
+            +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
+            +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
+   }
+   c=0;do
+   {
+      for (i=0;i<end;i++)
+         maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
+   } while (++c<C);
+   {
+      /* Compute a really simple masking model to avoid taking into account completely masked
+         bands when computing the spreading decision. */
+      VARDECL(opus_val16, mask);
+      VARDECL(opus_val16, sig);
+      ALLOC(mask, nbEBands, opus_val16);
+      ALLOC(sig, nbEBands, opus_val16);
+      for (i=0;i<end;i++)
+         mask[i] = bandLogE[i]-noise_floor[i];
+      if (C==2)
+      {
+         for (i=0;i<end;i++)
+            mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]);
+      }
+      OPUS_COPY(sig, mask, end);
+      for (i=1;i<end;i++)
+         mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT));
+      for (i=end-2;i>=0;i--)
+         mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT));
+      for (i=0;i<end;i++)
+      {
+         /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/
+         opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]);
+         /* Clamp SMR to make sure we're not shifting by something negative or too large. */
+#ifdef FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         int shift = -PSHR32(MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)), DB_SHIFT);
+#else
+         int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr)));
+#endif
+         spread_weight[i] = 32 >> shift;
+      }
+      /*for (i=0;i<end;i++)
+         printf("%d ", spread_weight[i]);
+      printf("\n");*/
+   }
+   /* Make sure that dynamic allocation can't make us bust the budget */
+   if (effectiveBytes > 50 && LM>=1 && !lfe)
+   {
+      int last=0;
+      c=0;do
+      {
+         opus_val16 offset;
+         opus_val16 tmp;
+         opus_val16 *f;
+         f = &follower[c*nbEBands];
+         f[0] = bandLogE2[c*nbEBands];
+         for (i=1;i<end;i++)
+         {
+            /* The last band to be at least 3 dB higher than the previous one
+               is the last we'll consider. Otherwise, we run into problems on
+               bandlimited signals. */
+            if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+               last=i;
+            f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
+         }
+         for (i=last-1;i>=0;i--)
+            f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+
+         /* Combine with a median filter to avoid dynalloc triggering unnecessarily.
+            The "offset" value controls how conservative we are -- a higher offset
+            reduces the impact of the median filter and makes dynalloc use more bits. */
+         offset = QCONST16(1.f, DB_SHIFT);
+         for (i=2;i<end-2;i++)
+            f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset);
+         tmp = median_of_3(&bandLogE2[c*nbEBands])-offset;
+         f[0] = MAX16(f[0], tmp);
+         f[1] = MAX16(f[1], tmp);
+         tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset;
+         f[end-2] = MAX16(f[end-2], tmp);
+         f[end-1] = MAX16(f[end-1], tmp);
+
+         for (i=0;i<end;i++)
+            f[i] = MAX16(f[i], noise_floor[i]);
+      } while (++c<C);
+      if (C==2)
+      {
+         for (i=start;i<end;i++)
+         {
+            /* Consider 24 dB "cross-talk" */
+            follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[         i]-QCONST16(4.f,DB_SHIFT));
+            follower[         i] = MAX16(follower[         i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT));
+            follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
+         }
+      } else {
+         for (i=start;i<end;i++)
+         {
+            follower[i] = MAX16(0, bandLogE[i]-follower[i]);
+         }
+      }
+      for (i=start;i<end;i++)
+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);
+      for (i=start;i<end;i++)
+      {
+#ifdef FIXED_POINT
+         importance[i] = PSHR32(13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))), 16);
+#else
+         importance[i] = (int)floor(.5f+13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))));
+#endif
+      }
+      /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
+      if ((!vbr || constrained_vbr)&&!isTransient)
+      {
+         for (i=start;i<end;i++)
+            follower[i] = HALF16(follower[i]);
+      }
+      for (i=start;i<end;i++)
+      {
+         if (i<8)
+            follower[i] *= 2;
+         if (i>=12)
+            follower[i] = HALF16(follower[i]);
+      }
+#ifdef DISABLE_FLOAT_API
+      (void)analysis;
+#else
+      if (analysis->valid)
+      {
+         for (i=start;i<IMIN(LEAK_BANDS, end);i++)
+            follower[i] = follower[i] +  QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i];
+      }
+#endif
+      for (i=start;i<end;i++)
+      {
+         int width;
+         int boost;
+         int boost_bits;
+
+         follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
+
+         width = C*(eBands[i+1]-eBands[i])<<LM;
+         if (width<6)
+         {
+            boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT);
+            boost_bits = boost*width<<BITRES;
+         } else if (width > 48) {
+            boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
+            boost_bits = (boost*width<<BITRES)/8;
+         } else {
+            boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
+            boost_bits = boost*6<<BITRES;
+         }
+         /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */
+         if ((!vbr || (constrained_vbr&&!isTransient))
+               && (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3)
+         {
+            opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3);
+            offsets[i] = cap-tot_boost;
+            tot_boost = cap;
+            break;
+         } else {
+            offsets[i] = boost;
+            tot_boost += boost_bits;
+         }
+      }
+   } else {
+      for (i=start;i<end;i++)
+         importance[i] = 13;
+   }
+   *tot_boost_ = tot_boost;
+   RESTORE_STACK;
+   return maxDepth;
+}
+
+
+static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
+      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
+{
+   int c;
+   VARDECL(celt_sig, _pre);
+   celt_sig *pre[2];
+   const CELTMode *mode;
+   int pitch_index;
+   opus_val16 gain1;
+   opus_val16 pf_threshold;
+   int pf_on;
+   int qg;
+   int overlap;
+   SAVE_STACK;
+
+   mode = st->mode;
+   overlap = mode->overlap;
+   ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
+
+   pre[0] = _pre;
+   pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
+
+
+   c=0; do {
+      OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
+      OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N);
+   } while (++c<CC);
+
+   if (enabled)
+   {
+      VARDECL(opus_val16, pitch_buf);
+      ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
+
+      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
+      /* Don't search for the fir last 1.5 octave of the range because
+         there's too many false-positives due to short-term correlation */
+      pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
+            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
+            st->arch);
+      pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
+
+      gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
+            N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch);
+      if (pitch_index > COMBFILTER_MAXPERIOD-2)
+         pitch_index = COMBFILTER_MAXPERIOD-2;
+      gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+      /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
+      if (st->loss_rate>2)
+         gain1 = HALF32(gain1);
+      if (st->loss_rate>4)
+         gain1 = HALF32(gain1);
+      if (st->loss_rate>8)
+         gain1 = 0;
+   } else {
+      gain1 = 0;
+      pitch_index = COMBFILTER_MINPERIOD;
+   }
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid)
+      gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio);
+#else
+   (void)analysis;
+#endif
+   /* Gain threshold for enabling the prefilter/postfilter */
+   pf_threshold = QCONST16(.2f,15);
+
+   /* Adjusting the threshold based on rate and continuity */
+   if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
+      pf_threshold += QCONST16(.2f,15);
+   if (nbAvailableBytes<25)
+      pf_threshold += QCONST16(.1f,15);
+   if (nbAvailableBytes<35)
+      pf_threshold += QCONST16(.1f,15);
+   if (st->prefilter_gain > QCONST16(.4f,15))
+      pf_threshold -= QCONST16(.1f,15);
+   if (st->prefilter_gain > QCONST16(.55f,15))
+      pf_threshold -= QCONST16(.1f,15);
+
+   /* Hard threshold at 0.2 */
+   pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
+   if (gain1<pf_threshold)
+   {
+      gain1 = 0;
+      pf_on = 0;
+      qg = 0;
+   } else {
+      /*This block is not gated by a total bits check only because
+        of the nbAvailableBytes check above.*/
+      if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
+         gain1=st->prefilter_gain;
+
+#ifdef FIXED_POINT
+      qg = ((gain1+1536)>>10)/3-1;
+#else
+      qg = (int)floor(.5f+gain1*32/3)-1;
+#endif
+      qg = IMAX(0, IMIN(7, qg));
+      gain1 = QCONST16(0.09375f,15)*(qg+1);
+      pf_on = 1;
+   }
+   /*printf("%d %f\n", pitch_index, gain1);*/
+
+   c=0; do {
+      int offset = mode->shortMdctSize-overlap;
+      st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+      OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap);
+      if (offset)
+         comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD,
+               st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
+               st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch);
+
+      comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
+            st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
+            st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch);
+      OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap);
+
+      if (N>COMBFILTER_MAXPERIOD)
+      {
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+      } else {
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+      }
+   } while (++c<CC);
+
+   RESTORE_STACK;
+   *gain = gain1;
+   *pitch = pitch_index;
+   *qgain = qg;
+   return pf_on;
+}
+
+static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target,
+      int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
+      int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
+      opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
+      int lfe, int has_surround_mask, opus_val16 surround_masking,
+      opus_val16 temporal_vbr)
+{
+   /* The target rate in 8th bits per frame */
+   opus_int32 target;
+   int coded_bins;
+   int coded_bands;
+   opus_val16 tf_calibration;
+   int nbEBands;
+   const opus_int16 *eBands;
+
+   nbEBands = mode->nbEBands;
+   eBands = mode->eBands;
+
+   coded_bands = lastCodedBands ? lastCodedBands : nbEBands;
+   coded_bins = eBands[coded_bands]<<LM;
+   if (C==2)
+      coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM;
+
+   target = base_target;
+
+   /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid && analysis->activity<.4)
+      target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
+#endif
+   /* Stereo savings */
+   if (C==2)
+   {
+      int coded_stereo_bands;
+      int coded_stereo_dof;
+      opus_val16 max_frac;
+      coded_stereo_bands = IMIN(intensity, coded_bands);
+      coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+      /* Maximum fraction of the bits we can save if the signal is mono. */
+      max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
+      stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
+      /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+      target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
+                      SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
+   }
+   /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
+   target += tot_boost-(19<<LM);
+   /* Apply transient boost, compensating for average boost. */
+   tf_calibration = QCONST16(0.044f,14);
+   target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
+
+#ifndef DISABLE_FLOAT_API
+   /* Apply tonality boost */
+   if (analysis->valid && !lfe)
+   {
+      opus_int32 tonal_target;
+      float tonal;
+
+      /* Tonality boost (compensating for the average). */
+      tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f;
+      tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
+      if (pitch_change)
+         tonal_target +=  (opus_int32)((coded_bins<<BITRES)*.8f);
+      /*printf("%f %f ", analysis->tonality, tonal);*/
+      target = tonal_target;
+   }
+#else
+   (void)analysis;
+   (void)pitch_change;
+#endif
+
+   if (has_surround_mask&&!lfe)
+   {
+      opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
+      /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
+      target = IMAX(target/4, surround_target);
+   }
+
+   {
+      opus_int32 floor_depth;
+      int bins;
+      bins = eBands[nbEBands-2]<<LM;
+      /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
+      floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
+      floor_depth = IMAX(floor_depth, target>>2);
+      target = IMIN(target, floor_depth);
+      /*printf("%f %d\n", maxDepth, floor_depth);*/
+   }
+
+   /* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate
+      for long. Needs tuning. */
+   if ((!has_surround_mask||lfe) && constrained_vbr)
+   {
+      target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target);
+   }
+
+   if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
+   {
+      opus_val16 amount;
+      opus_val16 tvbr_factor;
+      amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate)));
+      tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT);
+      target += (opus_int32)MULT16_32_Q15(tvbr_factor, target);
+   }
+
+   /* Don't allow more than doubling the rate */
+   target = IMIN(2*base_target, target);
+
+   return target;
+}
+
+int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+{
+   int i, c, N;
+   opus_int32 bits;
+   ec_enc _enc;
+   VARDECL(celt_sig, in);
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(celt_ener, bandE);
+   VARDECL(opus_val16, bandLogE);
+   VARDECL(opus_val16, bandLogE2);
+   VARDECL(int, fine_quant);
+   VARDECL(opus_val16, error);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, importance);
+   VARDECL(int, spread_weight);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *prefilter_mem;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError;
+   int shortBlocks=0;
+   int isTransient=0;
+   const int CC = st->channels;
+   const int C = st->stream_channels;
+   int LM, M;
+   int tf_select;
+   int nbFilledBytes, nbAvailableBytes;
+   int start;
+   int end;
+   int effEnd;
+   int codedBands;
+   int alloc_trim;
+   int pitch_index=COMBFILTER_MINPERIOD;
+   opus_val16 gain1 = 0;
+   int dual_stereo=0;
+   int effectiveBytes;
+   int dynalloc_logp;
+   opus_int32 vbr_rate;
+   opus_int32 total_bits;
+   opus_int32 total_boost;
+   opus_int32 balance;
+   opus_int32 tell;
+   opus_int32 tell0_frac;
+   int prefilter_tapset=0;
+   int pf_on;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence=0;
+   int tf_chan = 0;
+   opus_val16 tf_estimate;
+   int pitch_change=0;
+   opus_int32 tot_boost;
+   opus_val32 sample_max;
+   opus_val16 maxDepth;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   int secondMdct;
+   int signalBandwidth;
+   int transient_got_disabled=0;
+   opus_val16 surround_masking=0;
+   opus_val16 temporal_vbr=0;
+   opus_val16 surround_trim = 0;
+   opus_int32 equiv_rate;
+   int hybrid;
+   int weak_transient = 0;
+   int enable_tf_analysis;
+   VARDECL(opus_val16, surround_dynalloc);
+   ALLOC_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   start = st->start;
+   end = st->end;
+   hybrid = start != 0;
+   tf_estimate = 0;
+   if (nbCompressedBytes<2 || pcm==NULL)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   frame_size *= st->upsample;
+   for (LM=0;LM<=mode->maxLM;LM++)
+      if (mode->shortMdctSize<<LM==frame_size)
+         break;
+   if (LM>mode->maxLM)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+   N = M*mode->shortMdctSize;
+
+   prefilter_mem = st->in_mem+CC*(overlap);
+   oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD));
+   oldLogE = oldBandE + CC*nbEBands;
+   oldLogE2 = oldLogE + CC*nbEBands;
+   energyError = oldLogE2 + CC*nbEBands;
+
+   if (enc==NULL)
+   {
+      tell0_frac=tell=1;
+      nbFilledBytes=0;
+   } else {
+      tell0_frac=ec_tell_frac(enc);
+      tell=ec_tell(enc);
+      nbFilledBytes=(tell+4)>>3;
+   }
+
+#ifdef CUSTOM_MODES
+   if (st->signalling && enc==NULL)
+   {
+      int tmp = (mode->effEBands-end)>>1;
+      end = st->end = IMAX(1, mode->effEBands-tmp);
+      compressed[0] = tmp<<5;
+      compressed[0] |= LM<<3;
+      compressed[0] |= (C==2)<<2;
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         int c0 = toOpus(compressed[0]);
+         if (c0<0)
+         {
+            RESTORE_STACK;
+            return OPUS_BAD_ARG;
+         }
+         compressed[0] = c0;
+      }
+      compressed++;
+      nbCompressedBytes--;
+   }
+#else
+   celt_assert(st->signalling==0);
+#endif
+
+   /* Can't produce more than 1275 output bytes */
+   nbCompressedBytes = IMIN(nbCompressedBytes,1275);
+   nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
+
+   if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
+   {
+      opus_int32 den=mode->Fs>>BITRES;
+      vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
+#ifdef CUSTOM_MODES
+      if (st->signalling)
+         vbr_rate -= 8<<BITRES;
+#endif
+      effectiveBytes = vbr_rate>>(3+BITRES);
+   } else {
+      opus_int32 tmp;
+      vbr_rate = 0;
+      tmp = st->bitrate*frame_size;
+      if (tell>1)
+         tmp += tell;
+      if (st->bitrate!=OPUS_BITRATE_MAX)
+         nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
+               (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+      effectiveBytes = nbCompressedBytes - nbFilledBytes;
+   }
+   equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50);
+   if (st->bitrate != OPUS_BITRATE_MAX)
+      equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50));
+
+   if (enc==NULL)
+   {
+      ec_enc_init(&_enc, compressed, nbCompressedBytes);
+      enc = &_enc;
+   }
+
+   if (vbr_rate>0)
+   {
+      /* Computes the max bit-rate allowed in VBR mode to avoid violating the
+          target rate and buffering.
+         We must do this up front so that bust-prevention logic triggers
+          correctly if we don't have enough bits. */
+      if (st->constrained_vbr)
+      {
+         opus_int32 vbr_bound;
+         opus_int32 max_allowed;
+         /* We could use any multiple of vbr_rate as bound (depending on the
+             delay).
+            This is clamped to ensure we use at least two bytes if the encoder
+             was entirely empty, but to allow 0 in hybrid mode. */
+         vbr_bound = vbr_rate;
+         max_allowed = IMIN(IMAX(tell==1?2:0,
+               (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
+               nbAvailableBytes);
+         if(max_allowed < nbAvailableBytes)
+         {
+            nbCompressedBytes = nbFilledBytes+max_allowed;
+            nbAvailableBytes = max_allowed;
+            ec_enc_shrink(enc, nbCompressedBytes);
+         }
+      }
+   }
+   total_bits = nbCompressedBytes*8;
+
+   effEnd = end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+
+   ALLOC(in, CC*(N+overlap), celt_sig);
+
+   sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
+   st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
+   sample_max=MAX32(sample_max, st->overlap_max);
+#ifdef FIXED_POINT
+   silence = (sample_max==0);
+#else
+   silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
+#endif
+#ifdef FUZZING
+   if ((rand()&0x3F)==0)
+      silence = 1;
+#endif
+   if (tell==1)
+      ec_enc_bit_logp(enc, silence, 15);
+   else
+      silence=0;
+   if (silence)
+   {
+      /*In VBR mode there is no need to send more than the minimum. */
+      if (vbr_rate>0)
+      {
+         effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
+         total_bits=nbCompressedBytes*8;
+         nbAvailableBytes=2;
+         ec_enc_shrink(enc, nbCompressedBytes);
+      }
+      /* Pretend we've filled all the remaining bits with zeros
+            (that's what the initialiser did anyway) */
+      tell = nbCompressedBytes*8;
+      enc->nbits_total+=tell-ec_tell(enc);
+   }
+   c=0; do {
+      int need_clip=0;
+#ifndef FIXED_POINT
+      need_clip = st->clip && sample_max>65536.f;
+#endif
+      celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample,
+                  mode->preemph, st->preemph_memE+c, need_clip);
+   } while (++c<CC);
+
+
+
+   /* Find pitch period and gain */
+   {
+      int enabled;
+      int qg;
+      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf
+            && st->complexity >= 5;
+
+      prefilter_tapset = st->tapset_decision;
+      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
+      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
+            && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
+         pitch_change = 1;
+      if (pf_on==0)
+      {
+         if(!hybrid && tell+16<=total_bits)
+            ec_enc_bit_logp(enc, 0, 1);
+      } else {
+         /*This block is not gated by a total bits check only because
+           of the nbAvailableBytes check above.*/
+         int octave;
+         ec_enc_bit_logp(enc, 1, 1);
+         pitch_index += 1;
+         octave = EC_ILOG(pitch_index)-5;
+         ec_enc_uint(enc, octave, 6);
+         ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
+         pitch_index -= 1;
+         ec_enc_bits(enc, qg, 3);
+         ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
+      }
+   }
+
+   isTransient = 0;
+   shortBlocks = 0;
+   if (st->complexity >= 1 && !st->lfe)
+   {
+      /* Reduces the likelihood of energy instability on fricatives at low bitrate
+         in hybrid mode. It seems like we still want to have real transients on vowels
+         though (small SILK quantization offset value). */
+      int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2;
+      isTransient = transient_analysis(in, N+overlap, CC,
+            &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient);
+   }
+   if (LM>0 && ec_tell(enc)+3<=total_bits)
+   {
+      if (isTransient)
+         shortBlocks = M;
+   } else {
+      isTransient = 0;
+      transient_got_disabled=1;
+   }
+
+   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(bandE,nbEBands*CC, celt_ener);
+   ALLOC(bandLogE,nbEBands*CC, opus_val16);
+
+   secondMdct = shortBlocks && st->complexity>=8;
+   ALLOC(bandLogE2, C*nbEBands, opus_val16);
+   if (secondMdct)
+   {
+      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
+      compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
+      amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
+      for (i=0;i<C*nbEBands;i++)
+         bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+   }
+
+   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
+   /* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered
+      at the Opus layer), just abort. */
+   celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N])));
+   if (CC==2&&C==1)
+      tf_chan = 0;
+   compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
+
+   if (st->lfe)
+   {
+      for (i=2;i<end;i++)
+      {
+         bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
+         bandE[i] = MAX32(bandE[i], EPSILON);
+      }
+   }
+   amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
+
+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+   OPUS_CLEAR(surround_dynalloc, end);
+   /* This computes how much masking takes place between surround channels */
+   if (!hybrid&&st->energy_mask&&!st->lfe)
+   {
+      int mask_end;
+      int midband;
+      int count_dynalloc;
+      opus_val32 mask_avg=0;
+      opus_val32 diff=0;
+      int count=0;
+      mask_end = IMAX(2,st->lastCodedBands);
+      for (c=0;c<C;c++)
+      {
+         for(i=0;i<mask_end;i++)
+         {
+            opus_val16 mask;
+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+            if (mask > 0)
+               mask = HALF16(mask);
+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+            count += eBands[i+1]-eBands[i];
+            diff += MULT16_16(mask, 1+2*i-mask_end);
+         }
+      }
+      celt_assert(count>0);
+      mask_avg = DIV32_16(mask_avg,count);
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+      /* Again, being conservative */
+      diff = HALF32(diff);
+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+      /* Find the band that's in the middle of the coded spectrum */
+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+      count_dynalloc=0;
+      for(i=0;i<mask_end;i++)
+      {
+         opus_val32 lin;
+         opus_val16 unmask;
+         lin = mask_avg + diff*(i-midband);
+         if (C==2)
+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+         else
+            unmask = st->energy_mask[i];
+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+         unmask -= lin;
+         if (unmask > QCONST16(.25f, DB_SHIFT))
+         {
+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+            count_dynalloc++;
+         }
+      }
+      if (count_dynalloc>=3)
+      {
+         /* If we need dynalloc in many bands, it's probably because our
+            initial masking rate was too low. */
+         mask_avg += QCONST16(.25f, DB_SHIFT);
+         if (mask_avg>0)
+         {
+            /* Something went really wrong in the original calculations,
+               disabling masking. */
+            mask_avg = 0;
+            diff = 0;
+            OPUS_CLEAR(surround_dynalloc, mask_end);
+         } else {
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+         }
+      }
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      /* Convert to 1/64th units used for the trim */
+      surround_trim = 64*diff;
+      /*printf("%d %d ", mask_avg, surround_trim);*/
+      surround_masking = mask_avg;
+   }
+   /* Temporal VBR (but not for LFE) */
+   if (!st->lfe)
+   {
+      opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
+      opus_val32 frame_avg=0;
+      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+      for(i=start;i<end;i++)
+      {
+         follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
+         if (C==2)
+            follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
+         frame_avg += follow;
+      }
+      frame_avg /= (end-start);
+      temporal_vbr = SUB16(frame_avg,st->spec_avg);
+      temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
+      st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
+   }
+   /*for (i=0;i<21;i++)
+      printf("%f ", bandLogE[i]);
+   printf("\n");*/
+
+   if (!secondMdct)
+   {
+      OPUS_COPY(bandLogE2, bandLogE, C*nbEBands);
+   }
+
+   /* Last chance to catch any transient we might have missed in the
+      time-domain analysis */
+   if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid)
+   {
+      if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C))
+      {
+         isTransient = 1;
+         shortBlocks = M;
+         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
+         compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
+         amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
+         /* Compensate for the scaling of short vs long mdcts */
+         for (i=0;i<C*nbEBands;i++)
+            bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+         tf_estimate = QCONST16(.2f,14);
+      }
+   }
+
+   if (LM>0 && ec_tell(enc)+3<=total_bits)
+      ec_enc_bit_logp(enc, isTransient, 3);
+
+   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
+
+   /* Band normalisation */
+   normalise_bands(mode, freq, X, bandE, effEnd, C, M);
+
+   enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe;
+
+   ALLOC(offsets, nbEBands, int);
+   ALLOC(importance, nbEBands, int);
+   ALLOC(spread_weight, nbEBands, int);
+
+   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
+         st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight);
+
+   ALLOC(tf_res, nbEBands, int);
+   /* Disable variable tf resolution for hybrid and at very low bitrate */
+   if (enable_tf_analysis)
+   {
+      int lambda;
+      lambda = IMAX(80, 20480/effectiveBytes + 2);
+      tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance);
+      for (i=effEnd;i<end;i++)
+         tf_res[i] = tf_res[effEnd-1];
+   } else if (hybrid && weak_transient)
+   {
+      /* For weak transients, we rely on the fact that improving time resolution using
+         TF on a long window is imperfect and will not result in an energy collapse at
+         low bitrate. */
+      for (i=0;i<end;i++)
+         tf_res[i] = 1;
+      tf_select=0;
+   } else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2)
+   {
+      /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */
+      for (i=0;i<end;i++)
+         tf_res[i] = 0;
+      tf_select=isTransient;
+   } else {
+      for (i=0;i<end;i++)
+         tf_res[i] = isTransient;
+      tf_select=0;
+   }
+
+   ALLOC(error, C*nbEBands, opus_val16);
+   c=0;
+   do {
+      for (i=start;i<end;i++)
+      {
+         /* When the energy is stable, slightly bias energy quantization towards
+            the previous error to make the gain more stable (a constant offset is
+            better than fluctuations). */
+         if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT))
+         {
+            bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15));
+         }
+      }
+   } while (++c < C);
+   quant_coarse_energy(mode, start, end, effEnd, bandLogE,
+         oldBandE, total_bits, error, enc,
+         C, LM, nbAvailableBytes, st->force_intra,
+         &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
+
+   tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc);
+
+   if (ec_tell(enc)+4<=total_bits)
+   {
+      if (st->lfe)
+      {
+         st->tapset_decision = 0;
+         st->spread_decision = SPREAD_NORMAL;
+      } else if (hybrid)
+      {
+         if (st->complexity == 0)
+            st->spread_decision = SPREAD_NONE;
+         else if (isTransient)
+            st->spread_decision = SPREAD_NORMAL;
+         else
+            st->spread_decision = SPREAD_AGGRESSIVE;
+      } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
+      {
+         if (st->complexity == 0)
+            st->spread_decision = SPREAD_NONE;
+         else
+            st->spread_decision = SPREAD_NORMAL;
+      } else {
+         /* Disable new spreading+tapset estimator until we can show it works
+            better than the old one. So far it seems like spreading_decision()
+            works best. */
+#if 0
+         if (st->analysis.valid)
+         {
+            static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
+            static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
+            static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
+            static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
+            st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
+            st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
+         } else
+#endif
+         {
+            st->spread_decision = spreading_decision(mode, X,
+                  &st->tonal_average, st->spread_decision, &st->hf_average,
+                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight);
+         }
+         /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
+         /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
+      }
+      ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
+   }
+
+   /* For LFE, everything interesting is in the first band */
+   if (st->lfe)
+      offsets[0] = IMIN(8, effectiveBytes/3);
+   ALLOC(cap, nbEBands, int);
+   init_caps(mode,cap,LM,C);
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   total_boost = 0;
+   tell = ec_tell_frac(enc);
+   for (i=start;i<end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      int j;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
+            && boost < cap[i]; j++)
+      {
+         int flag;
+         flag = j<offsets[i];
+         ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
+         tell = ec_tell_frac(enc);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_boost += quanta;
+         dynalloc_loop_logp = 1;
+      }
+      /* Making dynalloc more likely */
+      if (j)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+      offsets[i] = boost;
+   }
+
+   if (C==2)
+   {
+      static const opus_val16 intensity_thresholds[21]=
+      /* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19  20  off*/
+        {  1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
+      static const opus_val16 intensity_histeresis[21]=
+        {  1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6,  8, 8};
+
+      /* Always use MS for 2.5 ms frames until we can do a better analysis */
+      if (LM!=0)
+         dual_stereo = stereo_analysis(mode, X, LM, N);
+
+      st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
+            intensity_thresholds, intensity_histeresis, 21, st->intensity);
+      st->intensity = IMIN(end,IMAX(start, st->intensity));
+   }
+
+   alloc_trim = 5;
+   if (tell+(6<<BITRES) <= total_bits - total_boost)
+   {
+      if (start > 0 || st->lfe)
+      {
+         st->stereo_saving = 0;
+         alloc_trim = 5;
+      } else {
+         alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
+            end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate,
+            st->intensity, surround_trim, equiv_rate, st->arch);
+      }
+      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
+      tell = ec_tell_frac(enc);
+   }
+
+   /* Variable bitrate */
+   if (vbr_rate>0)
+   {
+     opus_val16 alpha;
+     opus_int32 delta;
+     /* The target rate in 8th bits per frame */
+     opus_int32 target, base_target;
+     opus_int32 min_allowed;
+     int lm_diff = mode->maxLM - LM;
+
+     /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
+        The CELT allocator will just not be able to use more than that anyway. */
+     nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
+     if (!hybrid)
+     {
+        base_target = vbr_rate - ((40*C+20)<<BITRES);
+     } else {
+        base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES));
+     }
+
+     if (st->constrained_vbr)
+        base_target += (st->vbr_offset>>lm_diff);
+
+     if (!hybrid)
+     {
+        target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
+           st->lastCodedBands, C, st->intensity, st->constrained_vbr,
+           st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
+           st->lfe, st->energy_mask!=NULL, surround_masking,
+           temporal_vbr);
+     } else {
+        target = base_target;
+        /* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */
+        if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM);
+        if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM);
+        /* Boosting bitrate on transients and vowels with significant temporal
+           spikes. */
+        target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES));
+        /* If we have a strong transient, let's make sure it has enough bits to code
+           the first two bands, so that it can use folding rather than noise. */
+        if (tf_estimate > QCONST16(.7f,14))
+           target = IMAX(target, 50<<BITRES);
+     }
+     /* The current offset is removed from the target and the space used
+        so far is added*/
+     target=target+tell;
+     /* In VBR mode the frame size must not be reduced so much that it would
+         result in the encoder running out of bits.
+        The margin of 2 bytes ensures that none of the bust-prevention logic
+         in the decoder will have triggered so far. */
+     min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2;
+     /* Take into account the 37 bits we need to have left in the packet to
+        signal a redundant frame in hybrid mode. Creating a shorter packet would
+        create an entropy coder desync. */
+     if (hybrid)
+        min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3));
+
+     nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
+     nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
+     nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
+
+     /* By how much did we "miss" the target on that frame */
+     delta = target - vbr_rate;
+
+     target=nbAvailableBytes<<(BITRES+3);
+
+     /*If the frame is silent we don't adjust our drift, otherwise
+       the encoder will shoot to very high rates after hitting a
+       span of silence, but we do allow the bitres to refill.
+       This means that we'll undershoot our target in CVBR/VBR modes
+       on files with lots of silence. */
+     if(silence)
+     {
+       nbAvailableBytes = 2;
+       target = 2*8<<BITRES;
+       delta = 0;
+     }
+
+     if (st->vbr_count < 970)
+     {
+        st->vbr_count++;
+        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
+     } else
+        alpha = QCONST16(.001f,15);
+     /* How many bits have we used in excess of what we're allowed */
+     if (st->constrained_vbr)
+        st->vbr_reservoir += target - vbr_rate;
+     /*printf ("%d\n", st->vbr_reservoir);*/
+
+     /* Compute the offset we need to apply in order to reach the target */
+     if (st->constrained_vbr)
+     {
+        st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
+        st->vbr_offset = -st->vbr_drift;
+     }
+     /*printf ("%d\n", st->vbr_drift);*/
+
+     if (st->constrained_vbr && st->vbr_reservoir < 0)
+     {
+        /* We're under the min value -- increase rate */
+        int adjust = (-st->vbr_reservoir)/(8<<BITRES);
+        /* Unless we're just coding silence */
+        nbAvailableBytes += silence?0:adjust;
+        st->vbr_reservoir = 0;
+        /*printf ("+%d\n", adjust);*/
+     }
+     nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
+     /*printf("%d\n", nbCompressedBytes*50*8);*/
+     /* This moves the raw bits to take into account the new compressed size */
+     ec_enc_shrink(enc, nbCompressedBytes);
+   }
+
+   /* Bit allocation */
+   ALLOC(fine_quant, nbEBands, int);
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
+   /* bits =           packet size                    - where we are - safety*/
+   bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   signalBandwidth = end-1;
+#ifndef DISABLE_FLOAT_API
+   if (st->analysis.valid)
+   {
+      int min_bandwidth;
+      if (equiv_rate < (opus_int32)32000*C)
+         min_bandwidth = 13;
+      else if (equiv_rate < (opus_int32)48000*C)
+         min_bandwidth = 16;
+      else if (equiv_rate < (opus_int32)60000*C)
+         min_bandwidth = 18;
+      else  if (equiv_rate < (opus_int32)80000*C)
+         min_bandwidth = 19;
+      else
+         min_bandwidth = 20;
+      signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth);
+   }
+#endif
+   if (st->lfe)
+      signalBandwidth = 1;
+   codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
+         alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
+   if (st->lastCodedBands)
+      st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
+   else
+      st->lastCodedBands = codedBands;
+
+   quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C);
+
+   /* Residual quantisation */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
+         bandE, pulses, shortBlocks, st->spread_decision,
+         dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv,
+         balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv);
+
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = st->consec_transient<2;
+#ifdef FUZZING
+      anti_collapse_on = rand()&0x1;
+#endif
+      ec_enc_bits(enc, anti_collapse_on, 1);
+   }
+   quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
+   OPUS_CLEAR(energyError, nbEBands*CC);
+   c=0;
+   do {
+      for (i=start;i<end;i++)
+      {
+         energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands]));
+      }
+   } while (++c < C);
+
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+   }
+
+#ifdef RESYNTH
+   /* Re-synthesis of the coded audio if required */
+   {
+      celt_sig *out_mem[2];
+
+      if (anti_collapse_on)
+      {
+         anti_collapse(mode, X, collapse_masks, LM, C, N,
+               start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+      }
+
+      c=0; do {
+         OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
+      } while (++c<CC);
+
+      c=0; do {
+         out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
+      } while (++c<CC);
+
+      celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd,
+                     C, CC, isTransient, LM, st->upsample, silence, st->arch);
+
+      c=0; do {
+         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
+         comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
+               st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
+               mode->window, overlap);
+         if (LM!=0)
+            comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
+                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
+                  mode->window, overlap);
+      } while (++c<CC);
+
+      /* We reuse freq[] as scratch space for the de-emphasis */
+      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
+   }
+#endif
+
+   st->prefilter_period = pitch_index;
+   st->prefilter_gain = gain1;
+   st->prefilter_tapset = prefilter_tapset;
+#ifdef RESYNTH
+   if (LM!=0)
+   {
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
+   }
+#endif
+
+   if (CC==2&&C==1) {
+      OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
+   }
+
+   if (!isTransient)
+   {
+      OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands);
+      OPUS_COPY(oldLogE, oldBandE, CC*nbEBands);
+   } else {
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   /* In case start or end were to change */
+   c=0; do
+   {
+      for (i=0;i<start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<CC);
+
+   if (isTransient || transient_got_disabled)
+      st->consec_transient++;
+   else
+      st->consec_transient=0;
+   st->rng = enc->rng;
+
+   /* If there's any room left (can only happen for very high rates),
+      it's already filled with zeros */
+   ec_enc_done(enc);
+
+#ifdef CUSTOM_MODES
+   if (st->signalling)
+      nbCompressedBytes++;
+#endif
+
+   RESTORE_STACK;
+   if (ec_get_error(enc))
+      return OPUS_INTERNAL_ERROR;
+   else
+      return nbCompressedBytes;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef FIXED_POINT
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, in);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+   ALLOC(in, C*N, opus_int16);
+
+   for (j=0;j<C*N;j++)
+     in[j] = FLOAT2INT16(pcm[j]);
+
+   ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((float*)pcm)[j]=in[j]*(1.f/32768.f);
+#endif
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+#else
+
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, in);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C=st->channels;
+   N=frame_size;
+   ALLOC(in, C*N, celt_sig);
+   for (j=0;j<C*N;j++) {
+     in[j] = SCALEOUT(pcm[j]);
+   }
+
+   ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
+#endif
+   RESTORE_STACK;
+   return ret;
+}
+
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#endif
+
+#endif /* CUSTOM_MODES */
+
+int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+
+   va_start(ap, request);
+   switch (request)
+   {
+      case OPUS_SET_COMPLEXITY_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>10)
+            goto bad_arg;
+         st->complexity = value;
+      }
+      break;
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_PREDICTION_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>2)
+            goto bad_arg;
+         st->disable_pf = value<=1;
+         st->force_intra = value==0;
+      }
+      break;
+      case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>100)
+            goto bad_arg;
+         st->loss_rate = value;
+      }
+      break;
+      case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->constrained_vbr = value;
+      }
+      break;
+      case OPUS_SET_VBR_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->vbr = value;
+      }
+      break;
+      case OPUS_SET_BITRATE_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<=500 && value!=OPUS_BITRATE_MAX)
+            goto bad_arg;
+         value = IMIN(value, 260000*st->channels);
+         st->bitrate = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case OPUS_SET_LSB_DEPTH_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if (value<8 || value>24)
+             goto bad_arg;
+          st->lsb_depth=value;
+      }
+      break;
+      case OPUS_GET_LSB_DEPTH_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          *value=st->lsb_depth;
+      }
+      break;
+      case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if(value<0 || value>1)
+          {
+             goto bad_arg;
+          }
+          st->disable_inv = value;
+      }
+      break;
+      case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          *value = st->disable_inv;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+         oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD));
+         oldLogE = oldBandE + st->channels*st->mode->nbEBands;
+         oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
+               opus_custom_encoder_get_size(st->mode, st->channels)-
+               ((char*)&st->ENCODER_RESET_START - (char*)st));
+         for (i=0;i<st->channels*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->vbr_offset = 0;
+         st->delayedIntra = 1;
+         st->spread_decision = SPREAD_NORMAL;
+         st->tonal_average = 256;
+         st->hf_average = 0;
+         st->tapset_decision = 0;
+      }
+      break;
+#ifdef CUSTOM_MODES
+      case CELT_SET_INPUT_CLIPPING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->clip = value;
+      }
+      break;
+#endif
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case CELT_SET_ANALYSIS_REQUEST:
+      {
+         AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
+         if (info)
+            OPUS_COPY(&st->analysis, info, 1);
+      }
+      break;
+      case CELT_SET_SILK_INFO_REQUEST:
+      {
+         SILKInfo *info = va_arg(ap, SILKInfo *);
+         if (info)
+            OPUS_COPY(&st->silk_info, info, 1);
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      case OPUS_SET_LFE_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          st->lfe = value;
+      }
+      break;
+      case OPUS_SET_ENERGY_MASK_REQUEST:
+      {
+          opus_val16 *value = va_arg(ap, opus_val16*);
+          st->energy_mask = value;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+   va_end(ap);
+   return OPUS_UNIMPLEMENTED;
+}
diff --git a/libopus/celt/celt_lpc.c b/libopus/celt/celt_lpc.c
new file mode 100644
index 0000000..8ecb693
--- /dev/null
+++ b/libopus/celt/celt_lpc.c
@@ -0,0 +1,296 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+void _celt_lpc(
+      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
+const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
+int          p
+)
+{
+   int i, j;
+   opus_val32 r;
+   opus_val32 error = ac[0];
+#ifdef FIXED_POINT
+   opus_val32 lpc[LPC_ORDER];
+#else
+   float *lpc = _lpc;
+#endif
+
+   OPUS_CLEAR(lpc, p);
+   if (ac[0] != 0)
+   {
+      for (i = 0; i < p; i++) {
+         /* Sum up this iteration's reflection coefficient */
+         opus_val32 rr = 0;
+         for (j = 0; j < i; j++)
+            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+         rr += SHR32(ac[i + 1],3);
+         r = -frac_div32(SHL32(rr,3), error);
+         /*  Update LPC coefficients and total error */
+         lpc[i] = SHR32(r,3);
+         for (j = 0; j < (i+1)>>1; j++)
+         {
+            opus_val32 tmp1, tmp2;
+            tmp1 = lpc[j];
+            tmp2 = lpc[i-1-j];
+            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
+            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
+         }
+
+         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
+         /* Bail out once we get 30 dB gain */
+#ifdef FIXED_POINT
+         if (error<SHR32(ac[0],10))
+            break;
+#else
+         if (error<.001f*ac[0])
+            break;
+#endif
+      }
+   }
+#ifdef FIXED_POINT
+   for (i=0;i<p;i++)
+      _lpc[i] = ROUND16(lpc[i],16);
+#endif
+}
+
+
+void celt_fir_c(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         int arch)
+{
+   int i,j;
+   VARDECL(opus_val16, rnum);
+   SAVE_STACK;
+   celt_assert(x != y);
+   ALLOC(rnum, ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4];
+      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
+      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
+      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
+      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
+      xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+      y[i  ] = ROUND16(sum[0], SIG_SHIFT);
+      y[i+1] = ROUND16(sum[1], SIG_SHIFT);
+      y[i+2] = ROUND16(sum[2], SIG_SHIFT);
+      y[i+3] = ROUND16(sum[3], SIG_SHIFT);
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
+      y[i] = ROUND16(sum, SIG_SHIFT);
+   }
+   RESTORE_STACK;
+}
+
+void celt_iir(const opus_val32 *_x,
+         const opus_val16 *den,
+         opus_val32 *_y,
+         int N,
+         int ord,
+         opus_val16 *mem,
+         int arch)
+{
+#ifdef SMALL_FOOTPRINT
+   int i,j;
+   (void)arch;
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+      {
+         sum -= MULT16_16(den[j],mem[j]);
+      }
+      for (j=ord-1;j>=1;j--)
+      {
+         mem[j]=mem[j-1];
+      }
+      mem[0] = SROUND16(sum, SIG_SHIFT);
+      _y[i] = sum;
+   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord, arch);
+
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = SROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
+}
+
+int _celt_autocorr(
+                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
+                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
+                   const opus_val16       *window,
+                   int          overlap,
+                   int          lag,
+                   int          n,
+                   int          arch
+                  )
+{
+   opus_val32 d;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
+   VARDECL(opus_val16, xx);
+   SAVE_STACK;
+   ALLOC(xx, n, opus_val16);
+   celt_assert(n>0);
+   celt_assert(overlap>=0);
+   if (overlap == 0)
+   {
+      xptr = x;
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
+   }
+   shift=0;
+#ifdef FIXED_POINT
+   {
+      opus_val32 ac0;
+      ac0 = 1+(n<<7);
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
+      for(i=(n&1);i<n;i+=2)
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
+
+      shift = celt_ilog2(ac0)-30+10;
+      shift = (shift)/2;
+      if (shift>0)
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
+   }
+#endif
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
+   for (k=0;k<=lag;k++)
+   {
+      for (i = k+fastN, d = 0; i < n; i++)
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
+      ac[k] += d;
+   }
+#ifdef FIXED_POINT
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
+
+   RESTORE_STACK;
+   return shift;
+}
diff --git a/libopus/celt/celt_lpc.h b/libopus/celt/celt_lpc.h
new file mode 100644
index 0000000..a4c5fd6
--- /dev/null
+++ b/libopus/celt/celt_lpc.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PLC_H
+#define PLC_H
+
+#include "arch.h"
+#include "cpu_support.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#include "x86/celt_lpc_sse.h"
+#endif
+
+#define LPC_ORDER 24
+
+void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
+
+void celt_fir_c(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         int arch);
+
+#if !defined(OVERRIDE_CELT_FIR)
+#define celt_fir(x, num, y, N, ord, arch) \
+    (celt_fir_c(x, num, y, N, ord, arch))
+#endif
+
+void celt_iir(const opus_val32 *x,
+         const opus_val16 *den,
+         opus_val32 *y,
+         int N,
+         int ord,
+         opus_val16 *mem,
+         int arch);
+
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+         const opus_val16 *window, int overlap, int lag, int n, int arch);
+
+#endif /* PLC_H */
diff --git a/libopus/celt/cpu_support.h b/libopus/celt/cpu_support.h
new file mode 100644
index 0000000..68fc606
--- /dev/null
+++ b/libopus/celt/cpu_support.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if defined(OPUS_HAVE_RTCD) && \
+  (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#include "arm/armcpu.h"
+
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+
+#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
+  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+
+#include "x86/x86cpu.h"
+/* We currently support 5 x86 variants:
+ * arch[0] -> non-sse
+ * arch[1] -> sse
+ * arch[2] -> sse2
+ * arch[3] -> sse4.1
+ * arch[4] -> avx
+ */
+#define OPUS_ARCHMASK 7
+int opus_select_arch(void);
+
+#else
+#define OPUS_ARCHMASK 0
+
+static OPUS_INLINE int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+#endif
diff --git a/libopus/celt/cwrs.c b/libopus/celt/cwrs.c
new file mode 100644
index 0000000..a552e4f
--- /dev/null
+++ b/libopus/celt/cwrs.c
@@ -0,0 +1,715 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2009 Timothy B. Terriberry
+   Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "os_support.h"
+#include "cwrs.h"
+#include "mathops.h"
+#include "arch.h"
+
+#ifdef CUSTOM_MODES
+
+/*Guaranteed to return a conservatively large estimate of the binary logarithm
+   with frac bits of fractional precision.
+  Tested for all possible 32-bit inputs with frac=4, where the maximum
+   overestimation is 0.06254243 bits.*/
+int log2_frac(opus_uint32 val, int frac)
+{
+  int l;
+  l=EC_ILOG(val);
+  if(val&(val-1)){
+    /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
+       before the shift would cause overflow (e.g., for 0xFFFFxxxx).
+       Doesn't work for val=0, but that case fails the test above.*/
+    if(l>16)val=((val-1)>>(l-16))+1;
+    else val<<=16-l;
+    l=(l-1)<<frac;
+    /*Note that we always need one iteration, since the rounding up above means
+       that we might need to adjust the integer part of the logarithm.*/
+    do{
+      int b;
+      b=(int)(val>>16);
+      l+=b<<frac;
+      val=(val+b)>>b;
+      val=(val*val+0x7FFF)>>15;
+    }
+    while(frac-->0);
+    /*If val is not exactly 0x8000, then we have to round up the remainder.*/
+    return l+(val>0x8000);
+  }
+  /*Exact powers of two require no rounding.*/
+  else return (l-1)<<frac;
+}
+#endif
+
+/*Although derived separately, the pulse vector coding scheme is equivalent to
+   a Pyramid Vector Quantizer \cite{Fis86}.
+  Some additional notes about an early version appear at
+   https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+   and the definitions of some terms have evolved since that was written.
+
+  The conversion from a pulse vector to an integer index (encoding) and back
+   (decoding) is governed by two related functions, V(N,K) and U(N,K).
+
+  V(N,K) = the number of combinations, with replacement, of N items, taken K
+   at a time, when a sign bit is added to each item taken at least once (i.e.,
+   the number of N-dimensional unit pulse vectors with K pulses).
+  One way to compute this is via
+    V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
+   where choose() is the binomial function.
+  A table of values for N<10 and K<10 looks like:
+  V[10][10] = {
+    {1,  0,   0,    0,    0,     0,     0,      0,      0,       0},
+    {1,  2,   2,    2,    2,     2,     2,      2,      2,       2},
+    {1,  4,   8,   12,   16,    20,    24,     28,     32,      36},
+    {1,  6,  18,   38,   66,   102,   146,    198,    258,     326},
+    {1,  8,  32,   88,  192,   360,   608,    952,   1408,    1992},
+    {1, 10,  50,  170,  450,  1002,  1970,   3530,   5890,    9290},
+    {1, 12,  72,  292,  912,  2364,  5336,  10836,  20256,   35436},
+    {1, 14,  98,  462, 1666,  4942, 12642,  28814,  59906,  115598},
+    {1, 16, 128,  688, 2816,  9424, 27008,  68464, 157184,  332688},
+    {1, 18, 162,  978, 4482, 16722, 53154, 148626, 374274,  864146}
+  };
+
+  U(N,K) = the number of such combinations wherein N-1 objects are taken at
+   most K-1 at a time.
+  This is given by
+    U(N,K) = sum(k=0...K-1,V(N-1,k))
+           = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
+  The latter expression also makes clear that U(N,K) is half the number of such
+   combinations wherein the first object is taken at least once.
+  Although it may not be clear from either of these definitions, U(N,K) is the
+   natural function to work with when enumerating the pulse vector codebooks,
+   not V(N,K).
+  U(N,K) is not well-defined for N=0, but with the extension
+    U(0,K) = K>0 ? 0 : 1,
+   the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
+  U[10][10] = {
+    {1, 0,  0,   0,    0,    0,     0,     0,      0,      0},
+    {0, 1,  1,   1,    1,    1,     1,     1,      1,      1},
+    {0, 1,  3,   5,    7,    9,    11,    13,     15,     17},
+    {0, 1,  5,  13,   25,   41,    61,    85,    113,    145},
+    {0, 1,  7,  25,   63,  129,   231,   377,    575,    833},
+    {0, 1,  9,  41,  129,  321,   681,  1289,   2241,   3649},
+    {0, 1, 11,  61,  231,  681,  1683,  3653,   7183,  13073},
+    {0, 1, 13,  85,  377, 1289,  3653,  8989,  19825,  40081},
+    {0, 1, 15, 113,  575, 2241,  7183, 19825,  48639, 108545},
+    {0, 1, 17, 145,  833, 3649, 13073, 40081, 108545, 265729}
+  };
+
+  With this extension, V(N,K) may be written in terms of U(N,K):
+    V(N,K) = U(N,K) + U(N,K+1)
+   for all N>=0, K>=0.
+  Thus U(N,K+1) represents the number of combinations where the first element
+   is positive or zero, and U(N,K) represents the number of combinations where
+   it is negative.
+  With a large enough table of U(N,K) values, we could write O(N) encoding
+   and O(min(N*log(K),N+K)) decoding routines, but such a table would be
+   prohibitively large for small embedded devices (K may be as large as 32767
+   for small N, and N may be as large as 200).
+
+  Both functions obey the same recurrence relation:
+    V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
+    U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
+   for all N>0, K>0, with different initial conditions at N=0 or K=0.
+  This allows us to construct a row of one of the tables above given the
+   previous row or the next row.
+  Thus we can derive O(NK) encoding and decoding routines with O(K) memory
+   using only addition and subtraction.
+
+  When encoding, we build up from the U(2,K) row and work our way forwards.
+  When decoding, we need to start at the U(N,K) row and work our way backwards,
+   which requires a means of computing U(N,K).
+  U(N,K) may be computed from two previous values with the same N:
+    U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
+   for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
+   previous values with the same K:
+    U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
+   for all K>1.
+  This allows us to construct an arbitrary row of the U(N,K) table by starting
+   with the first two values, which are constants.
+  This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
+   multiplications.
+  Similar relations can be derived for V(N,K), but are not used here.
+
+  For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
+   polynomial for fixed N.
+  The first few are
+    U(1,K) = 1,
+    U(2,K) = 2*K-1,
+    U(3,K) = (2*K-2)*K+1,
+    U(4,K) = (((4*K-6)*K+8)*K-3)/3,
+    U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
+   and
+    V(1,K) = 2,
+    V(2,K) = 4*K,
+    V(3,K) = 4*K*K+2,
+    V(4,K) = 8*(K*K+2)*K/3,
+    V(5,K) = ((4*K*K+20)*K*K+6)/3,
+   for all K>0.
+  This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
+   small N (and indeed decoding is also O(N) for N<3).
+
+  @ARTICLE{Fis86,
+    author="Thomas R. Fischer",
+    title="A Pyramid Vector Quantizer",
+    journal="IEEE Transactions on Information Theory",
+    volume="IT-32",
+    number=4,
+    pages="568--583",
+    month=Jul,
+    year=1986
+  }*/
+
+#if !defined(SMALL_FOOTPRINT)
+
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
+   with K pulses allocated to it.*/
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+  Thus, the number of entries in row I is the larger of the maximum number of
+   pulses we will ever allocate for a given N=I (K=128, or however many fit in
+   32 bits, whichever is smaller), plus one, and the maximum N for which
+   K=I-1 pulses fit in 32 bits.
+  The largest band size in an Opus Custom mode is 208.
+  Otherwise, we can limit things to the set of N which can be achieved by
+   splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+   44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272]={
+#endif
+  /*N=0, K=0...176:*/
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
+#endif
+  /*N=1, K=1...176:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1,
+#endif
+  /*N=2, K=2...176:*/
+  3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+  43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+  81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+  115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+  175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+  205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+  235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+  265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+  295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+  325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+  383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+  413, 415,
+#endif
+  /*N=3, K=3...176:*/
+  13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+  685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+  1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+  3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+  6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+  9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+  13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+  17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+  21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+  26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+  31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+  37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+  43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+  50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+  57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+  70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+  78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+  /*N=4, K=4...176:*/
+  63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+  7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+  30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+  82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+  161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+  267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+  410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+  597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+  833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+  1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+  1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+  1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+  2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+  2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+  3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+  3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+  4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+  5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+  6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+  7085049, 7207551,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+  8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+  9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+  10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+  11912575,
+#endif
+  /*N=5, K=5...176:*/
+  321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+  50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+  330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+  1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+  2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+  4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+  8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+  13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+  20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+  29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+  40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+  55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+  73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+  95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+  122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+  155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+  193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+  238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+  290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+  351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+  420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+  500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+  590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+  755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+  878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+  1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+  1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+  /*N=6, K=6...96:*/
+  1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+  335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+  2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+  11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+  29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+  64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+  128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+  235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+  402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+  655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+  1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+  1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+  2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+  /*...109:*/
+  2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+  3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+  4012305913U,
+#endif
+  /*N=7, K=7...54*/
+  8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+  1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+  19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+  88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+  292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+  793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+  1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+  /*...60:*/
+  2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+  /*N=8, K=8...37*/
+  48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+  9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+  104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+  638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+  2229491905U,
+#if defined(CUSTOM_MODES)
+  /*...40:*/
+  2691463695U, 3233240945U, 3866006015U,
+#endif
+  /*N=9, K=9...28:*/
+  265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+  39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+  628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+  /*...29:*/
+  2883810113U,
+#endif
+  /*N=10, K=10...24:*/
+  1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+  254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+  3375210671U,
+  /*N=11, K=11...19:*/
+  8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+  948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+  /*...20:*/
+  2684641785U,
+#endif
+  /*N=12, K=12...18:*/
+  45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+  3248227095U,
+  /*N=13, K=13...16:*/
+  251595969, 579168825, 1267854873, 2653649025U,
+  /*N=14, K=14:*/
+  1409933619
+};
+
+#if defined(CUSTOM_MODES)
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
+  CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+  CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+  CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+  CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+  CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+  CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+  CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+  CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
+}
+#endif
+
+static opus_uint32 icwrs(int _n,const int *_y){
+  opus_uint32 i;
+  int         j;
+  int         k;
+  celt_assert(_n>=2);
+  j=_n-1;
+  i=_y[j]<0;
+  k=abs(_y[j]);
+  do{
+    j--;
+    i+=CELT_PVQ_U(_n-j,k);
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+  }
+  while(j>0);
+  return i;
+}
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+  celt_assert(_k>0);
+  ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
+}
+
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+  opus_uint32 p;
+  int         s;
+  int         k0;
+  opus_int16  val;
+  opus_val32  yy=0;
+  celt_assert(_k>0);
+  celt_assert(_n>1);
+  while(_n>2){
+    opus_uint32 q;
+    /*Lots of pulses case:*/
+    if(_k>=_n){
+      const opus_uint32 *row;
+      row=CELT_PVQ_U_ROW[_n];
+      /*Are the pulses in this dimension negative?*/
+      p=row[_k+1];
+      s=-(_i>=p);
+      _i-=p&s;
+      /*Count how many pulses were placed in this dimension.*/
+      k0=_k;
+      q=row[_n];
+      if(q>_i){
+        celt_sig_assert(p>q);
+        _k=_n;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+      }
+      else for(p=row[_k];p>_i;p=row[_k])_k--;
+      _i-=p;
+      val=(k0-_k+s)^s;
+      *_y++=val;
+      yy=MAC16_16(yy,val,val);
+    }
+    /*Lots of dimensions case:*/
+    else{
+      /*Are there any pulses in this dimension at all?*/
+      p=CELT_PVQ_U_ROW[_k][_n];
+      q=CELT_PVQ_U_ROW[_k+1][_n];
+      if(p<=_i&&_i<q){
+        _i-=p;
+        *_y++=0;
+      }
+      else{
+        /*Are the pulses in this dimension negative?*/
+        s=-(_i>=q);
+        _i-=q&s;
+        /*Count how many pulses were placed in this dimension.*/
+        k0=_k;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+        _i-=p;
+        val=(k0-_k+s)^s;
+        *_y++=val;
+        yy=MAC16_16(yy,val,val);
+      }
+    }
+    _n--;
+  }
+  /*_n==2*/
+  p=2*_k+1;
+  s=-(_i>=p);
+  _i-=p&s;
+  k0=_k;
+  _k=(_i+1)>>1;
+  if(_k)_i-=2*_k-1;
+  val=(k0-_k+s)^s;
+  *_y++=val;
+  yy=MAC16_16(yy,val,val);
+  /*_n==1*/
+  s=-(int)_i;
+  val=(_k+s)^s;
+  *_y=val;
+  yy=MAC16_16(yy,val,val);
+  return yy;
+}
+
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+}
+
+#else /* SMALL_FOOTPRINT */
+
+/*Computes the next row/column of any recurrence that obeys the relation
+   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
+  _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
+  opus_uint32 ui1;
+  unsigned      j;
+  /*This do-while will overrun the array if we don't have storage for at least
+     2 values.*/
+  j=1; do {
+    ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
+    _ui[j-1]=_ui0;
+    _ui0=ui1;
+  } while (++j<_len);
+  _ui[j-1]=_ui0;
+}
+
+/*Computes the previous row/column of any recurrence that obeys the relation
+   u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
+  _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
+  opus_uint32 ui1;
+  unsigned      j;
+  /*This do-while will overrun the array if we don't have storage for at least
+     2 values.*/
+  j=1; do {
+    ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
+    _ui[j-1]=_ui0;
+    _ui0=ui1;
+  } while (++j<_n);
+  _ui[j-1]=_ui0;
+}
+
+/*Compute V(_n,_k), as well as U(_n,0..._k+1).
+  _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
+static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
+  opus_uint32 um2;
+  unsigned      len;
+  unsigned      k;
+  len=_k+2;
+  /*We require storage at least 3 values (e.g., _k>0).*/
+  celt_assert(len>=3);
+  _u[0]=0;
+  _u[1]=um2=1;
+  /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+  /*If _n==1, _u[i] should be 1 for i>1.*/
+  celt_assert(_n>=2);
+  /*If _k==0, the following do-while loop will overflow the buffer.*/
+  celt_assert(_k>0);
+  k=2;
+  do _u[k]=(k<<1)-1;
+  while(++k<len);
+  for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
+  return _u[_k]+_u[_k+1];
+}
+
+/*Returns the _i'th combination of _k elements chosen from a set of size _n
+   with associated sign bits.
+  _y: Returns the vector of pulses.
+  _u: Must contain entries [0..._k+1] of row _n of U() on input.
+      Its contents will be destructively modified.*/
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+  int j;
+  opus_int16 val;
+  opus_val32 yy=0;
+  celt_assert(_n>0);
+  j=0;
+  do{
+    opus_uint32 p;
+    int           s;
+    int           yj;
+    p=_u[_k+1];
+    s=-(_i>=p);
+    _i-=p&s;
+    yj=_k;
+    p=_u[_k];
+    while(p>_i)p=_u[--_k];
+    _i-=p;
+    yj-=_k;
+    val=(yj+s)^s;
+    _y[j]=val;
+    yy=MAC16_16(yy,val,val);
+    uprev(_u,_k+2,0);
+  }
+  while(++j<_n);
+  return yy;
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+   of size 1 with associated sign bits.
+  _y: The vector of pulses, whose sum of absolute values is K.
+  _k: Returns K.*/
+static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){
+  *_k=abs(_y[0]);
+  return _y[0]<0;
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+   of size _n with associated sign bits.
+  _y:  The vector of pulses, whose sum of absolute values must be _k.
+  _nc: Returns V(_n,_k).*/
+static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
+ opus_uint32 *_u){
+  opus_uint32 i;
+  int         j;
+  int         k;
+  /*We can't unroll the first two iterations of the loop unless _n>=2.*/
+  celt_assert(_n>=2);
+  _u[0]=0;
+  for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
+  i=icwrs1(_y+_n-1,&k);
+  j=_n-2;
+  i+=_u[k];
+  k+=abs(_y[j]);
+  if(_y[j]<0)i+=_u[k+1];
+  while(j-->0){
+    unext(_u,_k+2,0);
+    i+=_u[k];
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=_u[k+1];
+  }
+  *_nc=_u[k]+_u[k+1];
+  return i;
+}
+
+#ifdef CUSTOM_MODES
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  if (_n==1)
+  {
+    for (k=1;k<=_maxk;k++)
+      _bits[k] = 1<<_frac;
+  }
+  else {
+    VARDECL(opus_uint32,u);
+    SAVE_STACK;
+    ALLOC(u,_maxk+2U,opus_uint32);
+    ncwrs_urow(_n,_maxk,u);
+    for(k=1;k<=_maxk;k++)
+      _bits[k]=log2_frac(u[k]+u[k+1],_frac);
+    RESTORE_STACK;
+  }
+}
+#endif /* CUSTOM_MODES */
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+  opus_uint32 i;
+  VARDECL(opus_uint32,u);
+  opus_uint32 nc;
+  SAVE_STACK;
+  celt_assert(_k>0);
+  ALLOC(u,_k+2U,opus_uint32);
+  i=icwrs(_n,_k,&nc,_y,u);
+  ec_enc_uint(_enc,i,nc);
+  RESTORE_STACK;
+}
+
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  VARDECL(opus_uint32,u);
+  int ret;
+  SAVE_STACK;
+  celt_assert(_k>0);
+  ALLOC(u,_k+2U,opus_uint32);
+  ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+  RESTORE_STACK;
+  return ret;
+}
+
+#endif /* SMALL_FOOTPRINT */
diff --git a/libopus/celt/cwrs.h b/libopus/celt/cwrs.h
new file mode 100644
index 0000000..7cd4717
--- /dev/null
+++ b/libopus/celt/cwrs.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2009 Timothy B. Terriberry
+   Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CWRS_H
+#define CWRS_H
+
+#include "arch.h"
+#include "stack_alloc.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef CUSTOM_MODES
+int log2_frac(opus_uint32 val, int frac);
+#endif
+
+void get_required_bits(opus_int16 *bits, int N, int K, int frac);
+
+void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
+
+opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
+
+#endif /* CWRS_H */
diff --git a/libopus/celt/ecintrin.h b/libopus/celt/ecintrin.h
new file mode 100644
index 0000000..2263cff
--- /dev/null
+++ b/libopus/celt/ecintrin.h
@@ -0,0 +1,87 @@
+/* Copyright (c) 2003-2008 Timothy B. Terriberry
+   Copyright (c) 2008 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include "opus_types.h"
+#include <math.h>
+#include <limits.h>
+#include "arch.h"
+#if !defined(_ecintrin_H)
+# define _ecintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((_b)>(_a)) internally.*/
+# define EC_MINI(_a,_b)      ((_a)+(((_b)-(_a))&-((_b)<(_a))))
+
+/*Count leading zeros.
+  This macro should only be used for implementing ec_ilog(), if it is defined.
+  All other code should use EC_ILOG() instead.*/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+# include <intrin.h>
+/*In _DEBUG mode this is not an intrinsic by default.*/
+# pragma intrinsic(_BitScanReverse)
+
+static __inline int ec_bsr(unsigned long _x){
+  unsigned long ret;
+  _BitScanReverse(&ret,_x);
+  return (int)ret;
+}
+# define EC_CLZ0    (1)
+# define EC_CLZ(_x) (-ec_bsr(_x))
+#elif defined(ENABLE_TI_DSPLIB)
+# include "dsplib.h"
+# define EC_CLZ0    (31)
+# define EC_CLZ(_x) (_lnorm(_x))
+#elif __GNUC_PREREQ(3,4)
+# if INT_MAX>=2147483647
+#  define EC_CLZ0    ((int)sizeof(unsigned)*CHAR_BIT)
+#  define EC_CLZ(_x) (__builtin_clz(_x))
+# elif LONG_MAX>=2147483647L
+#  define EC_CLZ0    ((int)sizeof(unsigned long)*CHAR_BIT)
+#  define EC_CLZ(_x) (__builtin_clzl(_x))
+# endif
+#endif
+
+#if defined(EC_CLZ)
+/*Note that __builtin_clz is not defined when _x==0, according to the gcc
+   documentation (and that of the BSR instruction that implements it on x86).
+  The majority of the time we can never pass it zero.
+  When we need to, it can be special cased.*/
+# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
+#else
+int ec_ilog(opus_uint32 _v);
+# define EC_ILOG(_x) (ec_ilog(_x))
+#endif
+#endif
diff --git a/libopus/celt/entcode.c b/libopus/celt/entcode.c
new file mode 100644
index 0000000..70f3201
--- /dev/null
+++ b/libopus/celt/entcode.c
@@ -0,0 +1,153 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "entcode.h"
+#include "arch.h"
+
+#if !defined(EC_CLZ)
+/*This is a fallback for systems where we don't know how to access
+   a BSR or CLZ instruction (see ecintrin.h).
+  If you are optimizing Opus on a new platform and it has a native CLZ or
+   BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
+   an easy performance win.*/
+int ec_ilog(opus_uint32 _v){
+  /*On a Pentium M, this branchless version tested as the fastest on
+     1,000,000,000 random 32-bit integers, edging out a similar version with
+     branches, and a 256-entry LUT version.*/
+  int ret;
+  int m;
+  ret=!!_v;
+  m=!!(_v&0xFFFF0000)<<4;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xFF00)<<3;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xF0)<<2;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xC)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=!!(_v&0x2);
+  return ret;
+}
+#endif
+
+#if 1
+/* This is a faster version of ec_tell_frac() that takes advantage
+   of the low (1/8 bit) resolution to use just a linear function
+   followed by a lookup to determine the exact transition thresholds. */
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+  static const unsigned correction[8] =
+    {35733, 38967, 42495, 46340,
+     50535, 55109, 60097, 65535};
+  opus_uint32 nbits;
+  opus_uint32 r;
+  int         l;
+  unsigned    b;
+  nbits=_this->nbits_total<<BITRES;
+  l=EC_ILOG(_this->rng);
+  r=_this->rng>>(l-16);
+  b = (r>>12)-8;
+  b += r>correction[b];
+  l = (l<<3)+b;
+  return nbits-l;
+}
+#else
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+  opus_uint32 nbits;
+  opus_uint32 r;
+  int         l;
+  int         i;
+  /*To handle the non-integral number of bits still left in the encoder/decoder
+     state, we compute the worst-case number of bits of val that must be
+     encoded to ensure that the value is inside the range for any possible
+     subsequent bits.
+    The computation here is independent of val itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of val are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1<<n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     encoder or decoder claims to have used 1 bit.*/
+  nbits=_this->nbits_total<<BITRES;
+  l=EC_ILOG(_this->rng);
+  r=_this->rng>>(l-16);
+  for(i=BITRES;i-->0;){
+    int b;
+    r=r*r>>15;
+    b=(int)(r>>16);
+    l=l<<1|b;
+    r>>=b;
+  }
+  return nbits-l;
+}
+#endif
+
+#ifdef USE_SMALL_DIV_TABLE
+/* Result of 2^32/(2*i+1), except for i=0. */
+const opus_uint32 SMALL_DIV_TABLE[129] = {
+   0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
+   0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
+   0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
+   0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
+   0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
+   0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
+   0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
+   0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
+   0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
+   0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
+   0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
+   0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
+   0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
+   0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
+   0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
+   0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
+   0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
+   0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
+   0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
+   0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
+   0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
+   0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
+   0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
+   0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
+   0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
+   0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
+   0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
+   0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
+   0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
+   0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
+   0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
+   0x01073260, 0x0105197F, 0x0103091B, 0x01010101
+};
+#endif
diff --git a/libopus/celt/entcode.h b/libopus/celt/entcode.h
new file mode 100644
index 0000000..3763e3f
--- /dev/null
+++ b/libopus/celt/entcode.h
@@ -0,0 +1,152 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if !defined(_entcode_H)
+# define _entcode_H (1)
+# include <limits.h>
+# include <stddef.h>
+# include "ecintrin.h"
+
+extern const opus_uint32 SMALL_DIV_TABLE[129];
+
+#ifdef OPUS_ARM_ASM
+#define USE_SMALL_DIV_TABLE
+#endif
+
+/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
+   larger type, you can speed up the decoder by using it here.*/
+typedef opus_uint32           ec_window;
+typedef struct ec_ctx         ec_ctx;
+typedef struct ec_ctx         ec_enc;
+typedef struct ec_ctx         ec_dec;
+
+# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT)
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+# define EC_UINT_BITS   (8)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+   3 => 1/8th bits.*/
+# define BITRES 3
+
+/*The entropy encoder/decoder context.
+  We use the same structure for both, so that common functions like ec_tell()
+   can be used on either one.*/
+struct ec_ctx{
+   /*Buffered input/output.*/
+   unsigned char *buf;
+   /*The size of the buffer.*/
+   opus_uint32    storage;
+   /*The offset at which the last byte containing raw bits was read/written.*/
+   opus_uint32    end_offs;
+   /*Bits that will be read from/written at the end.*/
+   ec_window      end_window;
+   /*Number of valid bits in end_window.*/
+   int            nend_bits;
+   /*The total number of whole bits read/written.
+     This does not include partial bits currently in the range coder.*/
+   int            nbits_total;
+   /*The offset at which the next range coder byte will be read/written.*/
+   opus_uint32    offs;
+   /*The number of values in the current range.*/
+   opus_uint32    rng;
+   /*In the decoder: the difference between the top of the current range and
+      the input value, minus one.
+     In the encoder: the low end of the current range.*/
+   opus_uint32    val;
+   /*In the decoder: the saved normalization factor from ec_decode().
+     In the encoder: the number of oustanding carry propagating symbols.*/
+   opus_uint32    ext;
+   /*A buffered input/output symbol, awaiting carry propagation.*/
+   int            rem;
+   /*Nonzero if an error occurred.*/
+   int            error;
+};
+
+static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){
+  return _this->offs;
+}
+
+static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){
+  return _this->buf;
+}
+
+static OPUS_INLINE int ec_get_error(ec_ctx *_this){
+  return _this->error;
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+static OPUS_INLINE int ec_tell(ec_ctx *_this){
+  return _this->nbits_total-EC_ILOG(_this->rng);
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits scaled by 2**BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+opus_uint32 ec_tell_frac(ec_ctx *_this);
+
+/* Tested exhaustively for all n and for 1<=d<=256 */
+static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
+   celt_sig_assert(d>0);
+#ifdef USE_SMALL_DIV_TABLE
+   if (d>256)
+      return n/d;
+   else {
+      opus_uint32 t, q;
+      t = EC_ILOG(d&-d);
+      q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
+      return q+(n-q*d >= d);
+   }
+#else
+   return n/d;
+#endif
+}
+
+static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
+   celt_sig_assert(d>0);
+#ifdef USE_SMALL_DIV_TABLE
+   if (n<0)
+      return -(opus_int32)celt_udiv(-n, d);
+   else
+      return celt_udiv(n, d);
+#else
+   return n/d;
+#endif
+}
+
+#endif
diff --git a/libopus/celt/entdec.c b/libopus/celt/entdec.c
new file mode 100644
index 0000000..0b3433e
--- /dev/null
+++ b/libopus/celt/entdec.c
@@ -0,0 +1,245 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stddef.h>
+#include "os_support.h"
+#include "arch.h"
+#include "entdec.h"
+#include "mfrngcod.h"
+
+/*A range decoder.
+  This is an entropy decoder based upon \cite{Mar79}, which is itself a
+   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+  It is very similar to arithmetic encoding, except that encoding is done with
+   digits in any base, instead of with bits, and so it is faster when using
+   larger bases (i.e.: a byte).
+  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+   is the base, longer than the theoretical optimum, but to my knowledge there
+   is no published justification for this claim.
+  This only seems true when using near-infinite precision arithmetic so that
+   the process is carried out with no rounding errors.
+
+  An excellent description of implementation details is available at
+   http://www.arturocampos.com/ac_range.html
+  A recent work \cite{MNW98} which proposes several changes to arithmetic
+   encoding for efficiency actually re-discovers many of the principles
+   behind range encoding, and presents a good theoretical analysis of them.
+
+  End of stream is handled by writing out the smallest number of bits that
+   ensures that the stream will be correctly decoded regardless of the value of
+   any subsequent bits.
+  ec_tell() can be used to determine how many bits were needed to decode
+   all the symbols thus far; other data can be packed in the remaining bits of
+   the input buffer.
+  @PHDTHESIS{Pas76,
+    author="Richard Clark Pasco",
+    title="Source coding algorithms for fast data compression",
+    school="Dept. of Electrical Engineering, Stanford University",
+    address="Stanford, CA",
+    month=May,
+    year=1976
+  }
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video & Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf"
+  }*/
+
+static int ec_read_byte(ec_dec *_this){
+  return _this->offs<_this->storage?_this->buf[_this->offs++]:0;
+}
+
+static int ec_read_byte_from_end(ec_dec *_this){
+  return _this->end_offs<_this->storage?
+   _this->buf[_this->storage-++(_this->end_offs)]:0;
+}
+
+/*Normalizes the contents of val and rng so that rng lies entirely in the
+   high-order symbol.*/
+static void ec_dec_normalize(ec_dec *_this){
+  /*If the range is too small, rescale it and input some bits.*/
+  while(_this->rng<=EC_CODE_BOT){
+    int sym;
+    _this->nbits_total+=EC_SYM_BITS;
+    _this->rng<<=EC_SYM_BITS;
+    /*Use up the remaining bits from our last symbol.*/
+    sym=_this->rem;
+    /*Read the next value from the input.*/
+    _this->rem=ec_read_byte(_this);
+    /*Take the rest of the bits we need from this new symbol.*/
+    sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA);
+    /*And subtract them from val, capped to be less than EC_CODE_TOP.*/
+    _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1);
+  }
+}
+
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
+  _this->buf=_buf;
+  _this->storage=_storage;
+  _this->end_offs=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_tell() will subtract partial bits.
+    The final value after the ec_dec_normalize() call will be the same as in
+     the encoder, but we have to compensate for the bits that are added there.*/
+  _this->nbits_total=EC_CODE_BITS+1
+   -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS;
+  _this->offs=0;
+  _this->rng=1U<<EC_CODE_EXTRA;
+  _this->rem=ec_read_byte(_this);
+  _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA));
+  _this->error=0;
+  /*Normalize the interval.*/
+  ec_dec_normalize(_this);
+}
+
+unsigned ec_decode(ec_dec *_this,unsigned _ft){
+  unsigned s;
+  _this->ext=celt_udiv(_this->rng,_ft);
+  s=(unsigned)(_this->val/_this->ext);
+  return _ft-EC_MINI(s+1,_ft);
+}
+
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){
+   unsigned s;
+   _this->ext=_this->rng>>_bits;
+   s=(unsigned)(_this->val/_this->ext);
+   return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits);
+}
+
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+  opus_uint32 s;
+  s=IMUL32(_this->ext,_ft-_fh);
+  _this->val-=s;
+  _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s;
+  ec_dec_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  int         ret;
+  r=_this->rng;
+  d=_this->val;
+  s=r>>_logp;
+  ret=d<s;
+  if(!ret)_this->val=d-s;
+  _this->rng=ret?s:r-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  opus_uint32 t;
+  int         ret;
+  s=_this->rng;
+  d=_this->val;
+  r=s>>_ftb;
+  ret=-1;
+  do{
+    t=s;
+    s=IMUL32(r,_icdf[++ret]);
+  }
+  while(d<s);
+  _this->val=d-s;
+  _this->rng=t-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
+  unsigned ft;
+  unsigned s;
+  int      ftb;
+  /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+  celt_assert(_ft>1);
+  _ft--;
+  ftb=EC_ILOG(_ft);
+  if(ftb>EC_UINT_BITS){
+    opus_uint32 t;
+    ftb-=EC_UINT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    s=ec_decode(_this,ft);
+    ec_dec_update(_this,s,s+1,ft);
+    t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb);
+    if(t<=_ft)return t;
+    _this->error=1;
+    return _ft;
+  }
+  else{
+    _ft++;
+    s=ec_decode(_this,(unsigned)_ft);
+    ec_dec_update(_this,s,s+1,(unsigned)_ft);
+    return s;
+  }
+}
+
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){
+  ec_window   window;
+  int         available;
+  opus_uint32 ret;
+  window=_this->end_window;
+  available=_this->nend_bits;
+  if((unsigned)available<_bits){
+    do{
+      window|=(ec_window)ec_read_byte_from_end(_this)<<available;
+      available+=EC_SYM_BITS;
+    }
+    while(available<=EC_WINDOW_SIZE-EC_SYM_BITS);
+  }
+  ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U);
+  window>>=_bits;
+  available-=_bits;
+  _this->end_window=window;
+  _this->nend_bits=available;
+  _this->nbits_total+=_bits;
+  return ret;
+}
diff --git a/libopus/celt/entdec.h b/libopus/celt/entdec.h
new file mode 100644
index 0000000..025fc18
--- /dev/null
+++ b/libopus/celt/entdec.h
@@ -0,0 +1,100 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entdec_H)
+# define _entdec_H (1)
+# include <limits.h>
+# include "entcode.h"
+
+/*Initializes the decoder.
+  _buf: The input buffer to use.
+  Return: 0 on success, or a negative value on error.*/
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage);
+
+/*Calculates the cumulative frequency for the next symbol.
+  This can then be fed into the probability model to determine what that
+   symbol is, and the additional frequency information required to advance to
+   the next symbol.
+  This function cannot be called more than once without a corresponding call to
+   ec_dec_update(), or decoding will not proceed correctly.
+  _ft: The total frequency of the symbols in the alphabet the next symbol was
+        encoded with.
+  Return: A cumulative frequency representing the encoded symbol.
+          If the cumulative frequency of all the symbols before the one that
+           was encoded was fl, and the cumulative frequency of all the symbols
+           up to and including the one encoded is fh, then the returned value
+           will fall in the range [fl,fh).*/
+unsigned ec_decode(ec_dec *_this,unsigned _ft);
+
+/*Equivalent to ec_decode() with _ft==1<<_bits.*/
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits);
+
+/*Advance the decoder past the next symbol using the frequency information the
+   symbol was encoded with.
+  Exactly one call to ec_decode() must have been made so that all necessary
+   intermediate calculations are performed.
+  _fl:  The cumulative frequency of all symbols that come before the symbol
+         decoded.
+  _fh:  The cumulative frequency of all symbols up to and including the symbol
+         decoded.
+        Together with _fl, this defines the range [_fl,_fh) in which the value
+         returned above must fall.
+  _ft:  The total frequency of the symbols in the alphabet the symbol decoded
+         was encoded in.
+        This must be the same as passed to the preceding call to ec_decode().*/
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/* Decode a bit that has a 1/(1<<_logp) probability of being a one */
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
+
+/*Decodes a symbol given an "inverse" CDF table.
+  No call to ec_dec_update() is necessary after this call.
+  _icdf: The "inverse" CDF, such that symbol s falls in the range
+          [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.
+  Return: The decoded symbol s.*/
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
+
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The bits must have been encoded with ec_enc_uint().
+  No call to ec_dec_update() is necessary after this call.
+  _ft: The number of integers that can be decoded (one more than the max).
+       This must be at least 2, and no more than 2**32-1.
+  Return: The decoded bits.*/
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
+
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with ec_enc_bits().
+  No call to ec_dec_update() is necessary after this call.
+  _ftb: The number of bits to extract.
+        This must be between 0 and 25, inclusive.
+  Return: The decoded bits.*/
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb);
+
+#endif
diff --git a/libopus/celt/entenc.c b/libopus/celt/entenc.c
new file mode 100644
index 0000000..f1750d2
--- /dev/null
+++ b/libopus/celt/entenc.c
@@ -0,0 +1,294 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
+#endif
+#include "os_support.h"
+#include "arch.h"
+#include "entenc.h"
+#include "mfrngcod.h"
+
+/*A range encoder.
+  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
+
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video \& Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
+  }*/
+
+static int ec_write_byte(ec_enc *_this,unsigned _value){
+  if(_this->offs+_this->end_offs>=_this->storage)return -1;
+  _this->buf[_this->offs++]=(unsigned char)_value;
+  return 0;
+}
+
+static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){
+  if(_this->offs+_this->end_offs>=_this->storage)return -1;
+  _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value;
+  return 0;
+}
+
+/*Outputs a symbol, with a carry bit.
+  If there is a potential to propagate a carry over several symbols, they are
+   buffered until it can be determined whether or not an actual carry will
+   occur.
+  If the counter for the buffered symbols overflows, then the stream becomes
+   undecodable.
+  This gives a theoretical limit of a few billion symbols in a single packet on
+   32-bit systems.
+  The alternative is to truncate the range in order to force a carry, but
+   requires similar carry tracking in the decoder, needlessly slowing it down.*/
+static void ec_enc_carry_out(ec_enc *_this,int _c){
+  if(_c!=EC_SYM_MAX){
+    /*No further carry propagation possible, flush buffer.*/
+    int carry;
+    carry=_c>>EC_SYM_BITS;
+    /*Don't output a byte on the first write.
+      This compare should be taken care of by branch-prediction thereafter.*/
+    if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry);
+    if(_this->ext>0){
+      unsigned sym;
+      sym=(EC_SYM_MAX+carry)&EC_SYM_MAX;
+      do _this->error|=ec_write_byte(_this,sym);
+      while(--(_this->ext)>0);
+    }
+    _this->rem=_c&EC_SYM_MAX;
+  }
+  else _this->ext++;
+}
+
+static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){
+  /*If the range is too small, output some bits and rescale it.*/
+  while(_this->rng<=EC_CODE_BOT){
+    ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
+    /*Move the next-to-high-order symbol into the high-order position.*/
+    _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+    _this->rng<<=EC_SYM_BITS;
+    _this->nbits_total+=EC_SYM_BITS;
+  }
+}
+
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
+  _this->buf=_buf;
+  _this->end_offs=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_tell() will subtract partial bits.*/
+  _this->nbits_total=EC_CODE_BITS+1;
+  _this->offs=0;
+  _this->rng=EC_CODE_TOP;
+  _this->rem=-1;
+  _this->val=0;
+  _this->ext=0;
+  _this->storage=_size;
+  _this->error=0;
+}
+
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+  opus_uint32 r;
+  r=celt_udiv(_this->rng,_ft);
+  if(_fl>0){
+    _this->val+=_this->rng-IMUL32(r,(_ft-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,(_ft-_fh));
+  ec_enc_normalize(_this);
+}
+
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){
+  opus_uint32 r;
+  r=_this->rng>>_bits;
+  if(_fl>0){
+    _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,((1U<<_bits)-_fh));
+  ec_enc_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){
+  opus_uint32 r;
+  opus_uint32 s;
+  opus_uint32 l;
+  r=_this->rng;
+  l=_this->val;
+  s=r>>_logp;
+  r-=s;
+  if(_val)_this->val=l+r;
+  _this->rng=_val?s:r;
+  ec_enc_normalize(_this);
+}
+
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  r=_this->rng>>_ftb;
+  if(_s>0){
+    _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
+    _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
+  }
+  else _this->rng-=IMUL32(r,_icdf[_s]);
+  ec_enc_normalize(_this);
+}
+
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
+  unsigned  ft;
+  unsigned  fl;
+  int       ftb;
+  /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+  celt_assert(_ft>1);
+  _ft--;
+  ftb=EC_ILOG(_ft);
+  if(ftb>EC_UINT_BITS){
+    ftb-=EC_UINT_BITS;
+    ft=(_ft>>ftb)+1;
+    fl=(unsigned)(_fl>>ftb);
+    ec_encode(_this,fl,fl+1,ft);
+    ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb);
+  }
+  else ec_encode(_this,_fl,_fl+1,_ft+1);
+}
+
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){
+  ec_window window;
+  int       used;
+  window=_this->end_window;
+  used=_this->nend_bits;
+  celt_assert(_bits>0);
+  if(used+_bits>EC_WINDOW_SIZE){
+    do{
+      _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+      window>>=EC_SYM_BITS;
+      used-=EC_SYM_BITS;
+    }
+    while(used>=EC_SYM_BITS);
+  }
+  window|=(ec_window)_fl<<used;
+  used+=_bits;
+  _this->end_window=window;
+  _this->nend_bits=used;
+  _this->nbits_total+=_bits;
+}
+
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){
+  int      shift;
+  unsigned mask;
+  celt_assert(_nbits<=EC_SYM_BITS);
+  shift=EC_SYM_BITS-_nbits;
+  mask=((1<<_nbits)-1)<<shift;
+  if(_this->offs>0){
+    /*The first byte has been finalized.*/
+    _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift);
+  }
+  else if(_this->rem>=0){
+    /*The first byte is still awaiting carry propagation.*/
+    _this->rem=(_this->rem&~mask)|_val<<shift;
+  }
+  else if(_this->rng<=(EC_CODE_TOP>>_nbits)){
+    /*The renormalization loop has never been run.*/
+    _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))|
+     (opus_uint32)_val<<(EC_CODE_SHIFT+shift);
+  }
+  /*The encoder hasn't even encoded _nbits of data yet.*/
+  else _this->error=-1;
+}
+
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){
+  celt_assert(_this->offs+_this->end_offs<=_size);
+  OPUS_MOVE(_this->buf+_size-_this->end_offs,
+   _this->buf+_this->storage-_this->end_offs,_this->end_offs);
+  _this->storage=_size;
+}
+
+void ec_enc_done(ec_enc *_this){
+  ec_window   window;
+  int         used;
+  opus_uint32 msk;
+  opus_uint32 end;
+  int         l;
+  /*We output the minimum number of bits that ensures that the symbols encoded
+     thus far will be decoded correctly regardless of the bits that follow.*/
+  l=EC_CODE_BITS-EC_ILOG(_this->rng);
+  msk=(EC_CODE_TOP-1)>>l;
+  end=(_this->val+msk)&~msk;
+  if((end|msk)>=_this->val+_this->rng){
+    l++;
+    msk>>=1;
+    end=(_this->val+msk)&~msk;
+  }
+  while(l>0){
+    ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT));
+    end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+    l-=EC_SYM_BITS;
+  }
+  /*If we have a buffered byte flush it into the output buffer.*/
+  if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0);
+  /*If we have buffered extra bits, flush them as well.*/
+  window=_this->end_window;
+  used=_this->nend_bits;
+  while(used>=EC_SYM_BITS){
+    _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+    window>>=EC_SYM_BITS;
+    used-=EC_SYM_BITS;
+  }
+  /*Clear any excess space and add any remaining extra bits to the last byte.*/
+  if(!_this->error){
+    OPUS_CLEAR(_this->buf+_this->offs,
+     _this->storage-_this->offs-_this->end_offs);
+    if(used>0){
+      /*If there's no range coder data at all, give up.*/
+      if(_this->end_offs>=_this->storage)_this->error=-1;
+      else{
+        l=-l;
+        /*If we've busted, don't add too many extra bits to the last byte; it
+           would corrupt the range coder data, and that's more important.*/
+        if(_this->offs+_this->end_offs>=_this->storage&&l<used){
+          window&=(1<<l)-1;
+          _this->error=-1;
+        }
+        _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window;
+      }
+    }
+  }
+}
diff --git a/libopus/celt/entenc.h b/libopus/celt/entenc.h
new file mode 100644
index 0000000..f502eaf
--- /dev/null
+++ b/libopus/celt/entenc.h
@@ -0,0 +1,110 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entenc_H)
+# define _entenc_H (1)
+# include <stddef.h>
+# include "entcode.h"
+
+/*Initializes the encoder.
+  _buf:  The buffer to store output bytes in.
+  _size: The size of the buffer, in chars.*/
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size);
+/*Encodes a symbol given its frequency information.
+  The frequency information must be discernable by the decoder, assuming it
+   has read only the previous symbols from the stream.
+  It is allowable to change the frequency information, or even the entire
+   source alphabet, so long as the decoder can tell from the context of the
+   previously encoded information that it is supposed to do so as well.
+  _fl: The cumulative frequency of all symbols that come before the one to be
+        encoded.
+  _fh: The cumulative frequency of all symbols up to and including the one to
+        be encoded.
+       Together with _fl, this defines the range [_fl,_fh) in which the
+        decoded value will fall.
+  _ft: The sum of the frequencies of all the symbols*/
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/*Equivalent to ec_encode() with _ft==1<<_bits.*/
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits);
+
+/* Encode a bit that has a 1/(1<<_logp) probability of being a one */
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
+
+/*Encodes a symbol given an "inverse" CDF table.
+  _s:    The index of the symbol to encode.
+  _icdf: The "inverse" CDF, such that symbol _s falls in the range
+          [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.*/
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
+
+/*Encodes a raw unsigned integer in the stream.
+  _fl: The integer to encode.
+  _ft: The number of integers that can be encoded (one more than the max).
+       This must be at least 2, and no more than 2**32-1.*/
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
+
+/*Encodes a sequence of raw bits in the stream.
+  _fl:  The bits to encode.
+  _ftb: The number of bits to encode.
+        This must be between 1 and 25, inclusive.*/
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb);
+
+/*Overwrites a few bits at the very start of an existing stream, after they
+   have already been encoded.
+  This makes it possible to have a few flags up front, where it is easy for
+   decoders to access them without parsing the whole stream, even if their
+   values are not determined until late in the encoding process, without having
+   to buffer all the intermediate symbols in the encoder.
+  In order for this to work, at least _nbits bits must have already been
+   encoded using probabilities that are an exact power of two.
+  The encoder can verify the number of encoded bits is sufficient, but cannot
+   check this latter condition.
+  _val:   The bits to encode (in the least _nbits significant bits).
+          They will be decoded in order from most-significant to least.
+  _nbits: The number of bits to overwrite.
+          This must be no more than 8.*/
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits);
+
+/*Compacts the data to fit in the target size.
+  This moves up the raw bits at the end of the current buffer so they are at
+   the end of the new buffer size.
+  The caller must ensure that the amount of data that's already been written
+   will fit in the new size.
+  _size: The number of bytes in the new buffer.
+         This must be large enough to contain the bits already written, and
+          must be no larger than the existing size.*/
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size);
+
+/*Indicates that there are no more symbols to encode.
+  All reamining output bytes are flushed to the output buffer.
+  ec_enc_init() must be called before the encoder can be used again.*/
+void ec_enc_done(ec_enc *_this);
+
+#endif
diff --git a/libopus/celt/fixed_debug.h b/libopus/celt/fixed_debug.h
new file mode 100644
index 0000000..f435295
--- /dev/null
+++ b/libopus/celt/fixed_debug.h
@@ -0,0 +1,791 @@
+/* Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2012 Xiph.Org Foundation */
+/**
+   @file fixed_debug.h
+   @brief Fixed-point operations with debugging
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_DEBUG_H
+#define FIXED_DEBUG_H
+
+#include <stdio.h>
+#include "opus_defines.h"
+
+#ifdef CELT_C
+OPUS_EXPORT opus_int64 celt_mips=0;
+#else
+extern opus_int64 celt_mips;
+#endif
+
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16)
+
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
+#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
+#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))
+
+#define SHR(a,b) SHR32(a,b)
+#define PSHR(a,b) PSHR32(a,b)
+
+/** Add two 32-bit values, ignore any overflows */
+#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
+/** Subtract two 32-bit values, ignore any overflows */
+#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
+/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
+/** Negate 32-bit value, ignore any overflows */
+#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))
+
+static OPUS_INLINE short NEG16(int x)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "NEG16: input is not short: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = -x;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "NEG16: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+static OPUS_INLINE int NEG32(opus_int64 x)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(x))
+   {
+      fprintf (stderr, "NEG16: input is not int: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = -x;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "NEG16: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
+static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = x;
+   celt_mips++;
+   return res;
+}
+
+#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
+static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = x;
+   celt_mips++;
+   return res;
+}
+
+#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a>>shift;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a<<shift;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+static OPUS_INLINE int SHR32(opus_int64 a, int shift)
+{
+   opus_int64  res;
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a>>shift;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SHR32: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
+{
+   opus_int64  res;
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a<<shift;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift))
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
+#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767)));
+
+#define HALF16(x)  (SHR16(x,1))
+#define HALF32(x)  (SHR32(x,1))
+
+#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef UADD32
+#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+   opus_uint64 res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef USUB32
+#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+   opus_uint64 res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (a<b)
+   {
+      fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+/* result fits in 16 bits */
+static OPUS_INLINE short MULT16_16_16(int a, int b)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a*b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define MAC16_16(c,a,b)     (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
+
+#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (ABS32(b)>=((opus_val32)(1)<<(15+Q)))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = (((opus_int64)a)*(opus_int64)b) >> Q;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (Q==15)
+      celt_mips+=3;
+   else
+      celt_mips+=4;
+   return res;
+}
+
+#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (ABS32(b)>=((opus_int64)(1)<<(15+Q)))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (Q==15)
+      celt_mips+=4;
+   else
+      celt_mips+=5;
+   return res;
+}
+
+#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
+#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
+#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b))))
+
+static OPUS_INLINE int SATURATE(int a, int b)
+{
+   if (a>b)
+      a=b;
+   if (a<-b)
+      a = -b;
+   celt_mips+=3;
+   return a;
+}
+
+static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
+{
+   celt_mips+=3;
+   if (a>32767)
+      return 32767;
+   else if (a<-32768)
+      return -32768;
+   else return a;
+}
+
+static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 11;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_Q13(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 13;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_Q14(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 14;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+
+#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 15;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=1;
+   return res;
+}
+
+static OPUS_INLINE short MULT16_16_P13(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 4096;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 13;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=4;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_P14(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 8192;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 14;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=4;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_P15(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 16384;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 15;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)
+
+static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (b==0)
+   {
+      fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+      return 0;
+   }
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a/b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line);
+      if (res>32767)
+         res = 32767;
+      if (res<-32768)
+         res = -32768;
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=35;
+   return res;
+}
+
+#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (b==0)
+   {
+      fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+      return 0;
+   }
+
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a/b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=70;
+   return res;
+}
+
+static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
+{
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+}
+#define SIG2WORD16(x) (SIG2WORD16_generic(x))
+
+
+#undef PRINT_MIPS
+#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);
+
+#endif
diff --git a/libopus/celt/fixed_generic.h b/libopus/celt/fixed_generic.h
new file mode 100644
index 0000000..5f4abda
--- /dev/null
+++ b/libopus/celt/fixed_generic.h
@@ -0,0 +1,178 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO */
+/**
+   @file fixed_generic.h
+   @brief Generic fixed-point operations
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_GENERIC_H
+#define FIXED_GENERIC_H
+
+/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
+
+/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
+#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
+#else
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+#endif
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
+#else
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#endif
+
+/** Compile-time conversion of float constant to 16-bit value */
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Compile-time conversion of float constant to 32-bit value */
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Negate a 16-bit value */
+#define NEG16(x) (-(x))
+/** Negate a 32-bit value */
+#define NEG32(x) (-(x))
+
+/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */
+#define EXTRACT16(x) ((opus_val16)(x))
+/** Change a 16-bit value into a 32-bit value */
+#define EXTEND32(x) ((opus_val32)(x))
+
+/** Arithmetic shift-right of a 16-bit value */
+#define SHR16(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 16-bit value */
+#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift)))
+/** Arithmetic shift-right of a 32-bit value */
+#define SHR32(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 32-bit value */
+#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift)))
+
+/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */
+#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+/** 32-bit arithmetic shift right where the argument can be negative */
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+/** "RAW" macros, should not be used outside of this header file */
+#define SHR(a,shift) ((a) >> (shift))
+#define SHL(a,shift) SHL32(a,shift)
+#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+
+#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
+
+/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
+#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
+/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */
+#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
+
+/** Divide by two */
+#define HALF16(x)  (SHR16(x,1))
+#define HALF32(x)  (SHR32(x,1))
+
+/** Add two 16-bit values */
+#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b)))
+/** Subtract two 16-bit values */
+#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b))
+/** Add two 32-bit values */
+#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b))
+/** Subtract two 32-bit values */
+#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
+
+/** Add two 32-bit values, ignore any overflows */
+#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
+/** Subtract two 32-bit values, ignore any overflows */
+#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
+/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
+/** Negate 32-bit value, ignore any overflows */
+#define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))
+
+/** 16x16 multiplication where the result fits in 16 bits */
+#define MULT16_16_16(a,b)     ((((opus_val16)(a))*((opus_val16)(b))))
+
+/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */
+/** 16x16 multiplication where the result fits in 32 bits */
+#define MULT16_16(a,b)     (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b)))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add.
+    Results fits in 32 bits */
+#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)))
+
+#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
+#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
+#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
+
+#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
+#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
+#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
+
+/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */
+#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b))))
+
+/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
+#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
+
+#if defined(MIPSr1_ASM)
+#include "mips/fixed_generic_mipsr1.h"
+#endif
+
+static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
+{
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+}
+#define SIG2WORD16(x) (SIG2WORD16_generic(x))
+
+#endif
diff --git a/libopus/celt/float_cast.h b/libopus/celt/float_cast.h
new file mode 100644
index 0000000..d33ed11
--- /dev/null
+++ b/libopus/celt/float_cast.h
@@ -0,0 +1,146 @@
+/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Version 1.1 */
+
+#ifndef FLOAT_CAST_H
+#define FLOAT_CAST_H
+
+
+#include "arch.h"
+
+/*============================================================================
+**      On Intel Pentium processors (especially PIII and probably P4), converting
+**      from float to int is very slow. To meet the C specs, the code produced by
+**      most C compilers targeting Pentium needs to change the FPU rounding mode
+**      before the float to int conversion is performed.
+**
+**      Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
+**      is this flushing of the pipeline which is so slow.
+**
+**      Fortunately the ISO C99 specifications define the functions lrint, lrintf,
+**      llrint and llrintf which fix this problem as a side effect.
+**
+**      On Unix-like systems, the configure process should have detected the
+**      presence of these functions. If they weren't found we have to replace them
+**      here with a standard C cast.
+*/
+
+/*
+**      The C99 prototypes for lrint and lrintf are as follows:
+**
+**              long int lrintf (float x) ;
+**              long int lrint  (double x) ;
+*/
+
+/*      The presence of the required functions are detected during the configure
+**      process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
+**      the config.h file.
+*/
+
+/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */
+#if defined(__GNUC__) && defined(__SSE__)
+
+#include <xmmintrin.h>
+static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
+
+#elif defined(HAVE_LRINTF)
+
+/*      These defines enable functionality introduced with the 1999 ISO C
+**      standard. They must be defined before the inclusion of math.h to
+**      engage them. If optimisation is enabled, these functions will be
+**      inlined. With optimisation switched off, you have to link in the
+**      maths library using -lm.
+*/
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrintf(x)
+
+#elif (defined(HAVE_LRINT))
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrint(x)
+
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1))
+        #include <xmmintrin.h>
+
+        static __inline long int float2int(float value)
+        {
+                return _mm_cvtss_si32(_mm_load_ss(&value));
+        }
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
+        #include <math.h>
+
+        /*      Win32 doesn't seem to have these functions.
+        **      Therefore implement OPUS_INLINE versions of these functions here.
+        */
+
+        static __inline long int
+        float2int (float flt)
+        {       int intgr;
+
+                _asm
+                {       fld flt
+                        fistp intgr
+                } ;
+
+                return intgr ;
+        }
+
+#else
+
+#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L)
+        /* supported by gcc in C99 mode, but not by all other compilers */
+        //#warning "Don't have the functions lrint() and lrintf ()."
+        //#warning "Replacing these functions with a standard C cast."
+#endif /* __STDC_VERSION__ >= 199901L */
+        #include <math.h>
+        #define float2int(flt) ((int)(floor(.5+flt)))
+#endif
+
+#ifndef DISABLE_FLOAT_API
+static OPUS_INLINE opus_int16 FLOAT2INT16(float x)
+{
+   x = x*CELT_SIG_SCALE;
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return (opus_int16)float2int(x);
+}
+#endif /* DISABLE_FLOAT_API */
+
+#endif /* FLOAT_CAST_H */
diff --git a/libopus/celt/kiss_fft.c b/libopus/celt/kiss_fft.c
new file mode 100644
index 0000000..8377516
--- /dev/null
+++ b/libopus/celt/kiss_fft.c
@@ -0,0 +1,604 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+  Lots of modifications by Jean-Marc Valin
+  Copyright (c) 2005-2007, Xiph.Org Foundation
+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+/* This code is originally from Mark Borgerding's KISS-FFT but has been
+   heavily modified to better suit Opus */
+
+#ifndef SKIP_CONFIG_H
+#  ifdef HAVE_CONFIG_H
+#    include "config.h"
+#  endif
+#endif
+
+#include "_kiss_fft_guts.h"
+#include "arch.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* The guts header contains all the multiplication and addition macros that are defined for
+   complex numbers.  It also delares the kf_ internal functions.
+*/
+
+static void kf_bfly2(
+                     kiss_fft_cpx * Fout,
+                     int m,
+                     int N
+                    )
+{
+   kiss_fft_cpx * Fout2;
+   int i;
+   (void)m;
+#ifdef CUSTOM_MODES
+   if (m==1)
+   {
+      celt_assert(m==1);
+      for (i=0;i<N;i++)
+      {
+         kiss_fft_cpx t;
+         Fout2 = Fout + 1;
+         t = *Fout2;
+         C_SUB( *Fout2 ,  *Fout , t );
+         C_ADDTO( *Fout ,  t );
+         Fout += 2;
+      }
+   } else
+#endif
+   {
+      opus_val16 tw;
+      tw = QCONST16(0.7071067812f, 15);
+      /* We know that m==4 here because the radix-2 is just after a radix-4 */
+      celt_assert(m==4);
+      for (i=0;i<N;i++)
+      {
+         kiss_fft_cpx t;
+         Fout2 = Fout + 4;
+         t = Fout2[0];
+         C_SUB( Fout2[0] ,  Fout[0] , t );
+         C_ADDTO( Fout[0] ,  t );
+
+         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
+         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
+         C_SUB( Fout2[1] ,  Fout[1] , t );
+         C_ADDTO( Fout[1] ,  t );
+
+         t.r = Fout2[2].i;
+         t.i = -Fout2[2].r;
+         C_SUB( Fout2[2] ,  Fout[2] , t );
+         C_ADDTO( Fout[2] ,  t );
+
+         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
+         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
+         C_SUB( Fout2[3] ,  Fout[3] , t );
+         C_ADDTO( Fout[3] ,  t );
+         Fout += 8;
+      }
+   }
+}
+
+static void kf_bfly4(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   int i;
+
+   if (m==1)
+   {
+      /* Degenerate case where all the twiddles are 1. */
+      for (i=0;i<N;i++)
+      {
+         kiss_fft_cpx scratch0, scratch1;
+
+         C_SUB( scratch0 , *Fout, Fout[2] );
+         C_ADDTO(*Fout, Fout[2]);
+         C_ADD( scratch1 , Fout[1] , Fout[3] );
+         C_SUB( Fout[2], *Fout, scratch1 );
+         C_ADDTO( *Fout , scratch1 );
+         C_SUB( scratch1 , Fout[1] , Fout[3] );
+
+         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
+         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
+         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
+         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
+         Fout+=4;
+      }
+   } else {
+      int j;
+      kiss_fft_cpx scratch[6];
+      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+      const int m2=2*m;
+      const int m3=3*m;
+      kiss_fft_cpx * Fout_beg = Fout;
+      for (i=0;i<N;i++)
+      {
+         Fout = Fout_beg + i*mm;
+         tw3 = tw2 = tw1 = st->twiddles;
+         /* m is guaranteed to be a multiple of 4. */
+         for (j=0;j<m;j++)
+         {
+            C_MUL(scratch[0],Fout[m] , *tw1 );
+            C_MUL(scratch[1],Fout[m2] , *tw2 );
+            C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+            C_SUB( scratch[5] , *Fout, scratch[1] );
+            C_ADDTO(*Fout, scratch[1]);
+            C_ADD( scratch[3] , scratch[0] , scratch[2] );
+            C_SUB( scratch[4] , scratch[0] , scratch[2] );
+            C_SUB( Fout[m2], *Fout, scratch[3] );
+            tw1 += fstride;
+            tw2 += fstride*2;
+            tw3 += fstride*3;
+            C_ADDTO( *Fout , scratch[3] );
+
+            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
+            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
+            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
+            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
+            ++Fout;
+         }
+      }
+   }
+}
+
+
+#ifndef RADIX_TWO_ONLY
+
+static void kf_bfly3(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   int i;
+   size_t k;
+   const size_t m2 = 2*m;
+   const kiss_twiddle_cpx *tw1,*tw2;
+   kiss_fft_cpx scratch[5];
+   kiss_twiddle_cpx epi3;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+#ifdef FIXED_POINT
+   /*epi3.r = -16384;*/ /* Unused */
+   epi3.i = -28378;
+#else
+   epi3 = st->twiddles[fstride*m];
+#endif
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw1=tw2=st->twiddles;
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
+      k=m;
+      do {
+
+         C_MUL(scratch[1],Fout[m] , *tw1);
+         C_MUL(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
+         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
+
+         C_MULBYSCALAR( scratch[0] , epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
+         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
+
+         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
+         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
+
+         ++Fout;
+      } while(--k);
+   }
+}
+
+
+#ifndef OVERRIDE_kf_bfly5
+static void kf_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+#ifdef FIXED_POINT
+   ya.r = 10126;
+   ya.i = -31164;
+   yb.r = -26510;
+   yb.i = -19261;
+#else
+   ya = st->twiddles[fstride*m];
+   yb = st->twiddles[fstride*2*m];
+#endif
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
+      for ( u=0; u<m; ++u ) {
+         scratch[0] = *Fout0;
+
+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
+         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
+
+         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
+         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
+
+         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
+         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
+         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
+         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
+         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+#endif /* OVERRIDE_kf_bfly5 */
+
+
+#endif
+
+
+#ifdef CUSTOM_MODES
+
+static
+void compute_bitrev_table(
+         int Fout,
+         opus_int16 *f,
+         const size_t fstride,
+         int in_stride,
+         opus_int16 * factors,
+         const kiss_fft_state *st
+            )
+{
+   const int p=*factors++; /* the radix  */
+   const int m=*factors++; /* stage's fft length/p */
+
+    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
+   if (m==1)
+   {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         *f = Fout+j;
+         f += fstride*in_stride;
+      }
+   } else {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
+         f += fstride*in_stride;
+         Fout += m;
+      }
+   }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static
+int kf_factor(int n,opus_int16 * facbuf)
+{
+    int p=4;
+    int i;
+    int stages=0;
+    int nbak = n;
+
+    /*factor out powers of 4, powers of 2, then any remaining primes */
+    do {
+        while (n % p) {
+            switch (p) {
+                case 4: p = 2; break;
+                case 2: p = 3; break;
+                default: p += 2; break;
+            }
+            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
+                p = n;          /* no more factors, skip to end */
+        }
+        n /= p;
+#ifdef RADIX_TWO_ONLY
+        if (p!=2 && p != 4)
+#else
+        if (p>5)
+#endif
+        {
+           return 0;
+        }
+        facbuf[2*stages] = p;
+        if (p==2 && stages > 1)
+        {
+           facbuf[2*stages] = 4;
+           facbuf[2] = 2;
+        }
+        stages++;
+    } while (n > 1);
+    n = nbak;
+    /* Reverse the order to get the radix 4 at the end, so we can use the
+       fast degenerate case. It turns out that reversing the order also
+       improves the noise behaviour. */
+    for (i=0;i<stages/2;i++)
+    {
+       int tmp;
+       tmp = facbuf[2*i];
+       facbuf[2*i] = facbuf[2*(stages-i-1)];
+       facbuf[2*(stages-i-1)] = tmp;
+    }
+    for (i=0;i<stages;i++)
+    {
+        n /= facbuf[2*i];
+        facbuf[2*i+1] = n;
+    }
+    return 1;
+}
+
+static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
+{
+   int i;
+#ifdef FIXED_POINT
+   for (i=0;i<nfft;++i) {
+      opus_val32 phase = -i;
+      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
+   }
+#else
+   for (i=0;i<nfft;++i) {
+      const double pi=3.14159265358979323846264338327;
+      double phase = ( -2*pi /nfft ) * i;
+      kf_cexp(twiddles+i, phase );
+   }
+#endif
+}
+
+int opus_fft_alloc_arch_c(kiss_fft_state *st) {
+   (void)st;
+   return 0;
+}
+
+/*
+ *
+ * Allocates all necessary storage space for the fft and ifft.
+ * The return value is a contiguous block of memory.  As such,
+ * It can be freed with free().
+ * */
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
+                                        const kiss_fft_state *base, int arch)
+{
+    kiss_fft_state *st=NULL;
+    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
+
+    if ( lenmem==NULL ) {
+        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
+    }else{
+        if (mem != NULL && *lenmem >= memneeded)
+            st = (kiss_fft_state*)mem;
+        *lenmem = memneeded;
+    }
+    if (st) {
+        opus_int16 *bitrev;
+        kiss_twiddle_cpx *twiddles;
+
+        st->nfft=nfft;
+#ifdef FIXED_POINT
+        st->scale_shift = celt_ilog2(st->nfft);
+        if (st->nfft == 1<<st->scale_shift)
+           st->scale = Q15ONE;
+        else
+           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
+#else
+        st->scale = 1.f/nfft;
+#endif
+        if (base != NULL)
+        {
+           st->twiddles = base->twiddles;
+           st->shift = 0;
+           while (st->shift < 32 && nfft<<st->shift != base->nfft)
+              st->shift++;
+           if (st->shift>=32)
+              goto fail;
+        } else {
+           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
+           compute_twiddles(twiddles, nfft);
+           st->shift = -1;
+        }
+        if (!kf_factor(nfft,st->factors))
+        {
+           goto fail;
+        }
+
+        /* bitrev */
+        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
+        if (st->bitrev==NULL)
+            goto fail;
+        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
+
+        /* Initialize architecture specific fft parameters */
+        if (opus_fft_alloc_arch(st, arch))
+            goto fail;
+    }
+    return st;
+fail:
+    opus_fft_free(st, arch);
+    return NULL;
+}
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
+{
+   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
+}
+
+void opus_fft_free_arch_c(kiss_fft_state *st) {
+   (void)st;
+}
+
+void opus_fft_free(const kiss_fft_state *cfg, int arch)
+{
+   if (cfg)
+   {
+      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
+      opus_free((opus_int16*)cfg->bitrev);
+      if (cfg->shift < 0)
+         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
+      opus_free((kiss_fft_state*)cfg);
+   }
+}
+
+#endif /* CUSTOM_MODES */
+
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
+{
+    int m2, m;
+    int p;
+    int L;
+    int fstride[MAXFACTORS];
+    int i;
+    int shift;
+
+    /* st->shift can be -1 */
+    shift = st->shift>0 ? st->shift : 0;
+
+    fstride[0] = 1;
+    L=0;
+    do {
+       p = st->factors[2*L];
+       m = st->factors[2*L+1];
+       fstride[L+1] = fstride[L]*p;
+       L++;
+    } while(m!=1);
+    m = st->factors[2*L-1];
+    for (i=L-1;i>=0;i--)
+    {
+       if (i!=0)
+          m2 = st->factors[2*i-1];
+       else
+          m2 = 1;
+       switch (st->factors[2*i])
+       {
+       case 2:
+          kf_bfly2(fout, m, fstride[i]);
+          break;
+       case 4:
+          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+ #ifndef RADIX_TWO_ONLY
+       case 3:
+          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+       case 5:
+          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+ #endif
+       }
+       m = m2;
+    }
+}
+
+void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+   int i;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+   scale = st->scale;
+
+   celt_assert2 (fin != fout, "In-place FFT not supported");
+   /* Bit-reverse the input */
+   for (i=0;i<st->nfft;i++)
+   {
+      kiss_fft_cpx x = fin[i];
+      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
+      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
+   }
+   opus_fft_impl(st, fout);
+}
+
+
+void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+   int i;
+   celt_assert2 (fin != fout, "In-place FFT not supported");
+   /* Bit-reverse the input */
+   for (i=0;i<st->nfft;i++)
+      fout[st->bitrev[i]] = fin[i];
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
+   opus_fft_impl(st, fout);
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
+}
diff --git a/libopus/celt/kiss_fft.h b/libopus/celt/kiss_fft.h
new file mode 100644
index 0000000..bffa2bf
--- /dev/null
+++ b/libopus/celt/kiss_fft.h
@@ -0,0 +1,200 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+  Lots of modifications by Jean-Marc Valin
+  Copyright (c) 2005-2007, Xiph.Org Foundation
+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <math.h>
+#include "arch.h"
+#include "cpu_support.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
+#else
+#define KISS_FFT_MALLOC opus_alloc
+#endif
+
+#ifdef FIXED_POINT
+#include "arch.h"
+
+#  define kiss_fft_scalar opus_int32
+#  define kiss_twiddle_scalar opus_int16
+
+
+#else
+# ifndef kiss_fft_scalar
+/*  default is float */
+#   define kiss_fft_scalar float
+#   define kiss_twiddle_scalar float
+#   define KF_SUFFIX _celt_single
+# endif
+#endif
+
+typedef struct {
+    kiss_fft_scalar r;
+    kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct {
+   kiss_twiddle_scalar r;
+   kiss_twiddle_scalar i;
+}kiss_twiddle_cpx;
+
+#define MAXFACTORS 8
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+typedef struct arch_fft_state{
+   int is_supported;
+   void *priv;
+} arch_fft_state;
+
+typedef struct kiss_fft_state{
+    int nfft;
+    opus_val16 scale;
+#ifdef FIXED_POINT
+    int scale_shift;
+#endif
+    int shift;
+    opus_int16 factors[2*MAXFACTORS];
+    const opus_int16 *bitrev;
+    const kiss_twiddle_cpx *twiddles;
+    arch_fft_state *arch_fft;
+} kiss_fft_state;
+
+#if defined(HAVE_ARM_NE10)
+#include "arm/fft_arm.h"
+#endif
+
+/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
+
+/**
+ *  opus_fft_alloc
+ *
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
+
+/**
+ * opus_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
+void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
+
+void opus_fft_free(const kiss_fft_state *cfg, int arch);
+
+
+void opus_fft_free_arch_c(kiss_fft_state *st);
+int opus_fft_alloc_arch_c(kiss_fft_state *st);
+
+#if !defined(OVERRIDE_OPUS_FFT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
+
+extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
+ kiss_fft_state *st);
+
+#define opus_fft_alloc_arch(_st, arch) \
+         ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
+
+extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
+ kiss_fft_state *st);
+#define opus_fft_free_arch(_st, arch) \
+         ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
+
+extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
+#define opus_fft(_cfg, _fin, _fout, arch) \
+   ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
+
+extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
+#define opus_ifft(_cfg, _fin, _fout, arch) \
+   ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
+
+#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+
+#define opus_fft_alloc_arch(_st, arch) \
+         ((void)(arch), opus_fft_alloc_arch_c(_st))
+
+#define opus_fft_free_arch(_st, arch) \
+         ((void)(arch), opus_fft_free_arch_c(_st))
+
+#define opus_fft(_cfg, _fin, _fout, arch) \
+         ((void)(arch), opus_fft_c(_cfg, _fin, _fout))
+
+#define opus_ifft(_cfg, _fin, _fout, arch) \
+         ((void)(arch), opus_ifft_c(_cfg, _fin, _fout))
+
+#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
+#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/libopus/celt/laplace.c b/libopus/celt/laplace.c
new file mode 100644
index 0000000..a7bca87
--- /dev/null
+++ b/libopus/celt/laplace.c
@@ -0,0 +1,134 @@
+/* Copyright (c) 2007 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "laplace.h"
+#include "mathops.h"
+
+/* The minimum probability of an energy delta (out of 32768). */
+#define LAPLACE_LOG_MINP (0)
+#define LAPLACE_MINP (1<<LAPLACE_LOG_MINP)
+/* The minimum number of guaranteed representable energy deltas (in one
+    direction). */
+#define LAPLACE_NMIN (16)
+
+/* When called, decay is positive and at most 11456. */
+static unsigned ec_laplace_get_freq1(unsigned fs0, int decay)
+{
+   unsigned ft;
+   ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN) - fs0;
+   return ft*(opus_int32)(16384-decay)>>15;
+}
+
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay)
+{
+   unsigned fl;
+   int val = *value;
+   fl = 0;
+   if (val)
+   {
+      int s;
+      int i;
+      s = -(val<0);
+      val = (val+s)^s;
+      fl = fs;
+      fs = ec_laplace_get_freq1(fs, decay);
+      /* Search the decaying part of the PDF.*/
+      for (i=1; fs > 0 && i < val; i++)
+      {
+         fs *= 2;
+         fl += fs+2*LAPLACE_MINP;
+         fs = (fs*(opus_int32)decay)>>15;
+      }
+      /* Everything beyond that has probability LAPLACE_MINP. */
+      if (!fs)
+      {
+         int di;
+         int ndi_max;
+         ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP;
+         ndi_max = (ndi_max-s)>>1;
+         di = IMIN(val - i, ndi_max - 1);
+         fl += (2*di+1+s)*LAPLACE_MINP;
+         fs = IMIN(LAPLACE_MINP, 32768-fl);
+         *value = (i+di+s)^s;
+      }
+      else
+      {
+         fs += LAPLACE_MINP;
+         fl += fs&~s;
+      }
+      celt_assert(fl+fs<=32768);
+      celt_assert(fs>0);
+   }
+   ec_encode_bin(enc, fl, fl+fs, 15);
+}
+
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
+{
+   int val=0;
+   unsigned fl;
+   unsigned fm;
+   fm = ec_decode_bin(dec, 15);
+   fl = 0;
+   if (fm >= fs)
+   {
+      val++;
+      fl = fs;
+      fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP;
+      /* Search the decaying part of the PDF.*/
+      while(fs > LAPLACE_MINP && fm >= fl+2*fs)
+      {
+         fs *= 2;
+         fl += fs;
+         fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15;
+         fs += LAPLACE_MINP;
+         val++;
+      }
+      /* Everything beyond that has probability LAPLACE_MINP. */
+      if (fs <= LAPLACE_MINP)
+      {
+         int di;
+         di = (fm-fl)>>(LAPLACE_LOG_MINP+1);
+         val += di;
+         fl += 2*di*LAPLACE_MINP;
+      }
+      if (fm < fl+fs)
+         val = -val;
+      else
+         fl += fs;
+   }
+   celt_assert(fl<32768);
+   celt_assert(fs>0);
+   celt_assert(fl<=fm);
+   celt_assert(fm<IMIN(fl+fs,32768));
+   ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
+   return val;
+}
diff --git a/libopus/celt/laplace.h b/libopus/celt/laplace.h
new file mode 100644
index 0000000..46c14b5
--- /dev/null
+++ b/libopus/celt/laplace.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "entenc.h"
+#include "entdec.h"
+
+/** Encode a value that is assumed to be the realisation of a
+    Laplace-distributed random process
+ @param enc Entropy encoder state
+ @param value Value to encode
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+*/
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
+
+/** Decode a value that is assumed to be the realisation of a
+    Laplace-distributed random process
+ @param dec Entropy decoder state
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+ @return Value decoded
+ */
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
diff --git a/libopus/celt/mathops.c b/libopus/celt/mathops.c
new file mode 100644
index 0000000..6ee9b9e
--- /dev/null
+++ b/libopus/celt/mathops.c
@@ -0,0 +1,209 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file mathops.h
+   @brief Various math functions
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+
+/*Compute floor(sqrt(_val)) with exact arithmetic.
+  _val must be greater than 0.
+  This has been tested on all possible 32-bit inputs greater than 0.*/
+unsigned isqrt32(opus_uint32 _val){
+  unsigned b;
+  unsigned g;
+  int      bshift;
+  /*Uses the second method from
+     http://www.azillionmonkeys.com/qed/sqroot.html
+    The main idea is to search for the largest binary digit b such that
+     (g+b)*(g+b) <= _val, and add it to the solution g.*/
+  g=0;
+  bshift=(EC_ILOG(_val)-1)>>1;
+  b=1U<<bshift;
+  do{
+    opus_uint32 t;
+    t=(((opus_uint32)g<<1)+b)<<bshift;
+    if(t<=_val){
+      g+=b;
+      _val-=t;
+    }
+    b>>=1;
+    bshift--;
+  }
+  while(bshift>=0);
+  return g;
+}
+
+#ifdef FIXED_POINT
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b)
+{
+   opus_val16 rcp;
+   opus_val32 result, rem;
+   int shift = celt_ilog2(b)-29;
+   a = VSHR32(a,shift);
+   b = VSHR32(b,shift);
+   /* 16-bit reciprocal */
+   rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
+   result = MULT16_32_Q15(rcp, a);
+   rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
+   result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
+   if (result >= 536870912)       /*  2^29 */
+      return 2147483647;          /*  2^31 - 1 */
+   else if (result <= -536870912) /* -2^29 */
+      return -2147483647;         /* -2^31 */
+   else
+      return SHL32(result, 2);
+}
+
+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
+opus_val16 celt_rsqrt_norm(opus_val32 x)
+{
+   opus_val16 n;
+   opus_val16 r;
+   opus_val16 r2;
+   opus_val16 y;
+   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
+   n = x-32768;
+   /* Get a rough initial guess for the root.
+      The optimal minimax quadratic approximation (using relative error) is
+       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+      Coefficients here, and the final result r, are Q14.*/
+   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
+   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
+      We can compute the result from n and r using Q15 multiplies with some
+       adjustment, carefully done to avoid overflow.
+      Range of y is [-1564,1594]. */
+   r2 = MULT16_16_Q15(r, r);
+   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
+   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
+       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+       peak absolute error of 2.26591/16384. */
+   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
+              SUB16(MULT16_16_Q15(y, 12288), 16384))));
+}
+
+/** Sqrt approximation (QX input, QX/2 output) */
+opus_val32 celt_sqrt(opus_val32 x)
+{
+   int k;
+   opus_val16 n;
+   opus_val32 rt;
+   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
+   if (x==0)
+      return 0;
+   else if (x>=1073741824)
+      return 32767;
+   k = (celt_ilog2(x)>>1)-7;
+   x = VSHR32(x, 2*k);
+   n = x-32768;
+   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
+              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
+   rt = VSHR32(rt,7-k);
+   return rt;
+}
+
+#define L1 32767
+#define L2 -7651
+#define L3 8277
+#define L4 -626
+
+static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
+{
+   opus_val16 x2;
+
+   x2 = MULT16_16_P15(x,x);
+   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
+                                                                                ))))))));
+}
+
+#undef L1
+#undef L2
+#undef L3
+#undef L4
+
+opus_val16 celt_cos_norm(opus_val32 x)
+{
+   x = x&0x0001ffff;
+   if (x>SHL32(EXTEND32(1), 16))
+      x = SUB32(SHL32(EXTEND32(1), 17),x);
+   if (x&0x00007fff)
+   {
+      if (x<SHL32(EXTEND32(1), 15))
+      {
+         return _celt_cos_pi_2(EXTRACT16(x));
+      } else {
+         return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
+      }
+   } else {
+      if (x&0x0000ffff)
+         return 0;
+      else if (x&0x0001ffff)
+         return -32767;
+      else
+         return 32767;
+   }
+}
+
+/** Reciprocal approximation (Q15 input, Q16 output) */
+opus_val32 celt_rcp(opus_val32 x)
+{
+   int i;
+   opus_val16 n;
+   opus_val16 r;
+   celt_sig_assert(x>0);
+   i = celt_ilog2(x);
+   /* n is Q15 with range [0,1). */
+   n = VSHR32(x,i-15)-32768;
+   /* Start with a linear approximation:
+      r = 1.8823529411764706-0.9411764705882353*n.
+      The coefficients and the result are Q14 in the range [15420,30840].*/
+   r = ADD16(30840, MULT16_16_Q15(-15420, n));
+   /* Perform two Newton iterations:
+      r -= r*((r*n)-1.Q15)
+         = r*((r*n)+(r-1.Q15)). */
+   r = SUB16(r, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+       neatly compensates for truncation error in the rest of the process. */
+   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+       error of 1.24665/32768. */
+   return VSHR32(EXTEND32(r),i-16);
+}
+
+#endif
diff --git a/libopus/celt/mathops.h b/libopus/celt/mathops.h
new file mode 100644
index 0000000..5e86ff0
--- /dev/null
+++ b/libopus/celt/mathops.h
@@ -0,0 +1,290 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file mathops.h
+   @brief Various math functions
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MATHOPS_H
+#define MATHOPS_H
+
+#include "arch.h"
+#include "entcode.h"
+#include "os_support.h"
+
+#define PI 3.141592653f
+
+/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
+#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
+
+unsigned isqrt32(opus_uint32 _val);
+
+/* CELT doesn't need it for fixed-point, by analysis.c does. */
+#if !defined(FIXED_POINT) || defined(ANALYSIS_C)
+#define cA 0.43157974f
+#define cB 0.67848403f
+#define cC 0.08595542f
+#define cE ((float)PI/2)
+static OPUS_INLINE float fast_atan2f(float y, float x) {
+   float x2, y2;
+   x2 = x*x;
+   y2 = y*y;
+   /* For very small values, we don't care about the answer, so
+      we can just return 0. */
+   if (x2 + y2 < 1e-18f)
+   {
+      return 0;
+   }
+   if(x2<y2){
+      float den = (y2 + cB*x2) * (y2 + cC*x2);
+      return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
+   }else{
+      float den = (x2 + cB*y2) * (x2 + cC*y2);
+      return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+   }
+}
+#undef cA
+#undef cB
+#undef cC
+#undef cE
+#endif
+
+
+#ifndef OVERRIDE_CELT_MAXABS16
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   opus_val16 maxval = 0;
+   opus_val16 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX16(maxval, x[i]);
+      minval = MIN16(minval, x[i]);
+   }
+   return MAX32(EXTEND32(maxval),-EXTEND32(minval));
+}
+#endif
+
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef FIXED_POINT
+static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+   int i;
+   opus_val32 maxval = 0;
+   opus_val32 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX32(maxval, x[i]);
+      minval = MIN32(minval, x[i]);
+   }
+   return MAX32(maxval, -minval);
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
+
+
+#ifndef FIXED_POINT
+
+#define celt_sqrt(x) ((float)sqrt(x))
+#define celt_rsqrt(x) (1.f/celt_sqrt(x))
+#define celt_rsqrt_norm(x) (celt_rsqrt(x))
+#define celt_cos_norm(x) ((float)cos((.5f*PI)*(x)))
+#define celt_rcp(x) (1.f/(x))
+#define celt_div(a,b) ((a)/(b))
+#define frac_div32(a,b) ((float)(a)/(b))
+
+#ifdef FLOAT_APPROX
+
+/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
+         denorm, +/- inf and NaN are *not* handled */
+
+/** Base-2 log approximation (log2(x)). */
+static OPUS_INLINE float celt_log2(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      opus_uint32 i;
+   } in;
+   in.f = x;
+   integer = (in.i>>23)-127;
+   in.i -= integer<<23;
+   frac = in.f - 1.5f;
+   frac = -0.41445418f + frac*(0.95909232f
+          + frac*(-0.33951290f + frac*0.16541097f));
+   return 1+integer+frac;
+}
+
+/** Base-2 exponential approximation (2^x). */
+static OPUS_INLINE float celt_exp2(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      opus_uint32 i;
+   } res;
+   integer = floor(x);
+   if (integer < -50)
+      return 0;
+   frac = x-integer;
+   /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
+   res.f = 0.99992522f + frac * (0.69583354f
+           + frac * (0.22606716f + 0.078024523f*frac));
+   res.i = (res.i + (integer<<23)) & 0x7fffffff;
+   return res.f;
+}
+
+#else
+#define celt_log2(x) ((float)(1.442695040888963387*log(x)))
+#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))
+#endif
+
+#endif
+
+#ifdef FIXED_POINT
+
+#include "os_support.h"
+
+#ifndef OVERRIDE_CELT_ILOG2
+/** Integer log in base2. Undefined for zero and negative numbers */
+static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
+{
+   celt_sig_assert(x>0);
+   return EC_ILOG(x)-1;
+}
+#endif
+
+
+/** Integer log in base2. Defined for zero, but not for negative numbers */
+static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
+{
+   return x <= 0 ? 0 : celt_ilog2(x);
+}
+
+opus_val16 celt_rsqrt_norm(opus_val32 x);
+
+opus_val32 celt_sqrt(opus_val32 x);
+
+opus_val16 celt_cos_norm(opus_val32 x);
+
+/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
+static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
+{
+   int i;
+   opus_val16 n, frac;
+   /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
+       0.15530808010959576, -0.08556153059057618 */
+   static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
+   if (x==0)
+      return -32767;
+   i = celt_ilog2(x);
+   n = VSHR32(x,i-15)-32768-16384;
+   frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));
+   return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);
+}
+
+/*
+ K0 = 1
+ K1 = log(2)
+ K2 = 3-4*log(2)
+ K3 = 3*log(2) - 2
+*/
+#define D0 16383
+#define D1 22804
+#define D2 14819
+#define D3 10204
+
+static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
+{
+   opus_val16 frac;
+   frac = SHL16(x, 4);
+   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
+}
+/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
+static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
+{
+   int integer;
+   opus_val16 frac;
+   integer = SHR16(x,10);
+   if (integer>14)
+      return 0x7f000000;
+   else if (integer < -15)
+      return 0;
+   frac = celt_exp2_frac(x-SHL16(integer,10));
+   return VSHR32(EXTEND32(frac), -integer-2);
+}
+
+opus_val32 celt_rcp(opus_val32 x);
+
+#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b);
+
+#define M1 32767
+#define M2 -21
+#define M3 -11943
+#define M4 4936
+
+/* Atan approximation using a 4th order polynomial. Input is in Q15 format
+   and normalized by pi/4. Output is in Q15 format */
+static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
+{
+   return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
+}
+
+#undef M1
+#undef M2
+#undef M3
+#undef M4
+
+/* atan2() approximation valid for positive input values */
+static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
+{
+   if (y < x)
+   {
+      opus_val32 arg;
+      arg = celt_div(SHL32(EXTEND32(y),15),x);
+      if (arg >= 32767)
+         arg = 32767;
+      return SHR16(celt_atan01(EXTRACT16(arg)),1);
+   } else {
+      opus_val32 arg;
+      arg = celt_div(SHL32(EXTEND32(x),15),y);
+      if (arg >= 32767)
+         arg = 32767;
+      return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1);
+   }
+}
+
+#endif /* FIXED_POINT */
+#endif /* MATHOPS_H */
diff --git a/libopus/celt/mdct.c b/libopus/celt/mdct.c
new file mode 100644
index 0000000..5c6dab5
--- /dev/null
+++ b/libopus/celt/mdct.c
@@ -0,0 +1,343 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+#if defined(MIPSr1_ASM)
+#include "mips/mdct_mipsr1.h"
+#endif
+
+
+#ifdef CUSTOM_MODES
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
+{
+   int i;
+   kiss_twiddle_scalar *trig;
+   int shift;
+   int N2=N>>1;
+   l->n = N;
+   l->maxshift = maxshift;
+   for (i=0;i<=maxshift;i++)
+   {
+      if (i==0)
+         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
+      else
+         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
+#ifndef ENABLE_TI_DSPLIB55
+      if (l->kfft[i]==NULL)
+         return 0;
+#endif
+   }
+   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
+   if (l->trig==NULL)
+     return 0;
+   for (shift=0;shift<=maxshift;shift++)
+   {
+      /* We have enough points that sine isn't necessary */
+#if defined(FIXED_POINT)
+#if 1
+      for (i=0;i<N2;i++)
+         trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
+#else
+      for (i=0;i<N2;i++)
+         trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
+#endif
+#else
+      for (i=0;i<N2;i++)
+         trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
+#endif
+      trig += N2;
+      N2 >>= 1;
+      N >>= 1;
+   }
+   return 1;
+}
+
+void clt_mdct_clear(mdct_lookup *l, int arch)
+{
+   int i;
+   for (i=0;i<=l->maxshift;i++)
+      opus_fft_free(l->kfft[i], arch);
+   opus_free((kiss_twiddle_scalar*)l->trig);
+}
+
+#endif /* CUSTOM_MODES */
+
+/* Forward MDCT trashes the input array */
+#ifndef OVERRIDE_clt_mdct_forward
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+   SAVE_STACK;
+   (void)arch;
+   scale = st->scale;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
+         yc.r = yr;
+         yc.i = yi;
+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+         f2[st->bitrev[i]] = yc;
+      }
+   }
+
+   /* N/4 complex FFT, does not downscale anymore */
+   opus_fft_impl(st, f2);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+#endif /* OVERRIDE_clt_mdct_forward */
+
+#ifndef OVERRIDE_clt_mdct_backward
+void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   const kiss_twiddle_scalar *trig;
+   (void) arch;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
+      for(i=0;i<N4;i++)
+      {
+         int rev;
+         kiss_fft_scalar yr, yi;
+         rev = *bitrev++;
+         yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
+         yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
+         /* We swap real and imag because we use an FFT instead of an IFFT. */
+         yp[2*rev+1] = yr;
+         yp[2*rev] = yi;
+         /* Storing the pre-rotation directly in the bitrev order. */
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp0[1];
+         im = yp0[0];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
+         yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp1[1];
+         im = yp1[0];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
+         yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
+         *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
+         wp1++;
+         wp2--;
+      }
+   }
+}
+#endif /* OVERRIDE_clt_mdct_backward */
diff --git a/libopus/celt/mdct.h b/libopus/celt/mdct.h
new file mode 100644
index 0000000..160ae4e
--- /dev/null
+++ b/libopus/celt/mdct.h
@@ -0,0 +1,112 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+
+#ifndef MDCT_H
+#define MDCT_H
+
+#include "opus_defines.h"
+#include "kiss_fft.h"
+#include "arch.h"
+
+typedef struct {
+   int n;
+   int maxshift;
+   const kiss_fft_state *kfft[4];
+   const kiss_twiddle_scalar * OPUS_RESTRICT trig;
+} mdct_lookup;
+
+#if defined(HAVE_ARM_NE10)
+#include "arm/mdct_arm.h"
+#endif
+
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
+void clt_mdct_clear(mdct_lookup *l, int arch);
+
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
+                        kiss_fft_scalar * OPUS_RESTRICT out,
+                        const opus_val16 *window, int overlap,
+                        int shift, int stride, int arch);
+
+/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
+    (scales implicitly by 1/2) */
+void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window,
+      int overlap, int shift, int stride, int arch);
+
+#if !defined(OVERRIDE_OPUS_MDCT)
+/* Is run-time CPU detection enabled on this platform? */
+#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
+
+extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
+      const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
+      int overlap, int shift, int stride, int arch);
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
+                                                   _window, _overlap, _shift, \
+                                                   _stride, _arch))
+
+extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
+      const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
+      int overlap, int shift, int stride, int arch);
+
+#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   (*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
+                                                   _window, _overlap, _shift, \
+                                                   _stride, _arch)
+
+#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */
+
+#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
+
+#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
+   clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
+
+#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
+#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
+
+#endif
diff --git a/libopus/celt/mfrngcod.h b/libopus/celt/mfrngcod.h
new file mode 100644
index 0000000..809152a
--- /dev/null
+++ b/libopus/celt/mfrngcod.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2001-2008 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_mfrngcode_H)
+# define _mfrngcode_H (1)
+# include "entcode.h"
+
+/*Constants used by the entropy encoder/decoder.*/
+
+/*The number of bits to output at a time.*/
+# define EC_SYM_BITS   (8)
+/*The total number of bits in each of the state registers.*/
+# define EC_CODE_BITS  (32)
+/*The maximum symbol value.*/
+# define EC_SYM_MAX    ((1U<<EC_SYM_BITS)-1)
+/*Bits to shift by to move a symbol into the high-order position.*/
+# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1)
+/*Carry bit of the high-order range symbol.*/
+# define EC_CODE_TOP   (((opus_uint32)1U)<<(EC_CODE_BITS-1))
+/*Low-order bit of the high-order range symbol.*/
+# define EC_CODE_BOT   (EC_CODE_TOP>>EC_SYM_BITS)
+/*The number of bits available for the last, partial symbol in the code field.*/
+# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1)
+#endif
diff --git a/libopus/celt/mips/celt_mipsr1.h b/libopus/celt/mips/celt_mipsr1.h
new file mode 100644
index 0000000..c332fe0
--- /dev/null
+++ b/libopus/celt/mips/celt_mipsr1.h
@@ -0,0 +1,152 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __CELT_MIPSR1_H__
+#define __CELT_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#define OVERRIDE_COMB_FILTER_CONST
+#define OVERRIDE_comb_filter
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap, int arch)
+{
+   int i;
+   opus_val32 x0, x1, x2, x3, x4;
+
+   (void)arch;
+
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   opus_val16 g00, g01, g02, g10, g11, g12;
+   static const opus_val16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+
+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   /* If the filter didn't change, we don't need the overlap */
+   if (g0==g1 && T0==T1 && tapset0==tapset1)
+      overlap=0;
+
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      opus_val32 res;
+      f = MULT16_16_Q15(window[i],window[i]);
+      x0= x[i-T1+2];
+
+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));
+
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));
+
+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+
+      y[i] = x[i] + res;
+
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+
+   x4 = x[i-T1-2];
+   x3 = x[i-T1-1];
+   x2 = x[i-T1];
+   x1 = x[i-T1+1];
+
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+
+   for (i=overlap;i<N;i++)
+   {
+      opus_val32 res;
+      x0=x[i-T1+2];
+
+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));
+
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));
+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
+      y[i] = x[i] + res;
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+}
+
+#endif /* __CELT_MIPSR1_H__ */
diff --git a/libopus/celt/mips/fixed_generic_mipsr1.h b/libopus/celt/mips/fixed_generic_mipsr1.h
new file mode 100644
index 0000000..4a05efb
--- /dev/null
+++ b/libopus/celt/mips/fixed_generic_mipsr1.h
@@ -0,0 +1,126 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO */
+/**
+   @file fixed_generic.h
+   @brief Generic fixed-point operations
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_FIXED_GENERIC_MIPSR1_H
+#define CELT_FIXED_GENERIC_MIPSR1_H
+
+#undef MULT16_32_Q15_ADD
+static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef MULT16_32_Q15_SUB
+static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef MULT16_16_Q15_ADD
+static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef MULT16_16_Q15_SUB
+static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+
+#undef MULT16_32_Q16
+static inline int MULT16_32_Q16(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));
+    return c;
+}
+
+#undef MULT16_32_P16
+static inline int MULT16_32_P16(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));
+    return c;
+}
+
+#undef MULT16_32_Q15
+static inline int MULT16_32_Q15(int a, int b)
+{
+    int c;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));
+    return c;
+}
+
+#undef MULT32_32_Q31
+static inline int MULT32_32_Q31(int a, int b)
+{
+    int r;
+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));
+    return r;
+}
+
+#undef PSHR32
+static inline int PSHR32(int a, int shift)
+{
+    int r;
+    asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));
+    return r;
+}
+
+#undef MULT16_16_P15
+static inline int MULT16_16_P15(int a, int b)
+{
+    int r;
+    asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));
+    asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r):  "0" (r), "i"(15));
+    return r;
+}
+
+#endif /* CELT_FIXED_GENERIC_MIPSR1_H */
diff --git a/libopus/celt/mips/kiss_fft_mipsr1.h b/libopus/celt/mips/kiss_fft_mipsr1.h
new file mode 100644
index 0000000..400ca4d
--- /dev/null
+++ b/libopus/celt/mips/kiss_fft_mipsr1.h
@@ -0,0 +1,167 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_MIPSR1_H
+#define KISS_FFT_MIPSR1_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
+#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
+
+#undef S_MUL_ADD
+static inline int S_MUL_ADD(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef S_MUL_SUB
+static inline int S_MUL_SUB(int a, int b, int c, int d) {
+    int m;
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
+    return m;
+}
+
+#undef C_MUL
+#   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
+static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
+
+    return m;
+}
+#undef C_MULC
+#   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
+static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
+    kiss_fft_cpx m;
+
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
+
+    return m;
+}
+
+#endif /* FIXED_POINT */
+
+#define OVERRIDE_kf_bfly5
+static void kf_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+#ifdef FIXED_POINT
+   ya.r = 10126;
+   ya.i = -31164;
+   yb.r = -26510;
+   yb.i = -19261;
+#else
+   ya = st->twiddles[fstride*m];
+   yb = st->twiddles[fstride*2*m];
+#endif
+
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
+      for ( u=0; u<m; ++u ) {
+         scratch[0] = *Fout0;
+
+
+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+         scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
+
+         scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
+         scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
+
+         scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
+         scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+
+#endif /* KISS_FFT_MIPSR1_H */
diff --git a/libopus/celt/mips/mdct_mipsr1.h b/libopus/celt/mips/mdct_mipsr1.h
new file mode 100644
index 0000000..2934dab
--- /dev/null
+++ b/libopus/celt/mips/mdct_mipsr1.h
@@ -0,0 +1,288 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+#ifndef __MDCT_MIPSR1_H__
+#define __MDCT_MIPSR1_H__
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* Forward MDCT trashes the input array */
+#define OVERRIDE_clt_mdct_forward
+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+
+    (void)arch;
+
+   SAVE_STACK;
+   scale = st->scale;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+          *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
+          *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+          *yp++ =  S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
+          *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+
+         yr = S_MUL_SUB(re,t0,im,t1);
+         yi = S_MUL_ADD(im,t0,re,t1);
+
+         yc.r = yr;
+         yc.i = yi;
+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+         f2[st->bitrev[i]] = yc;
+      }
+   }
+
+   /* N/4 complex FFT, does not downscale anymore */
+   opus_fft_impl(st, f2);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
+         yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+#define OVERRIDE_clt_mdct_backward
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   const kiss_twiddle_scalar *trig;
+
+    (void)arch;
+
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
+      for(i=0;i<N4;i++)
+      {
+         int rev;
+         kiss_fft_scalar yr, yi;
+         rev = *bitrev++;
+         yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
+         yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
+         /* We swap real and imag because we use an FFT instead of an IFFT. */
+         yp[2*rev+1] = yr;
+         yp[2*rev] = yi;
+         /* Storing the pre-rotation directly in the bitrev order. */
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp0[1];
+         im = yp0[0];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL_ADD(re,t0 , im,t1);
+         yi = S_MUL_SUB(re,t1 , im,t0);
+         /* We swap real and imag because we're using an FFT instead of an IFFT. */
+         re = yp1[1];
+         im = yp1[0];
+         yp0[0] = yr;
+         yp1[1] = yi;
+
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL_ADD(re,t0,im,t1);
+         yi = S_MUL_SUB(re,t1,im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+}
+#endif /* __MDCT_MIPSR1_H__ */
diff --git a/libopus/celt/mips/pitch_mipsr1.h b/libopus/celt/mips/pitch_mipsr1.h
new file mode 100644
index 0000000..a9500af
--- /dev/null
+++ b/libopus/celt/mips/pitch_mipsr1.h
@@ -0,0 +1,161 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_MIPSR1_H
+#define PITCH_MIPSR1_H
+
+#define OVERRIDE_DUAL_INNER_PROD
+static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
+{
+   int j;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+
+   (void)arch;
+
+   asm volatile("MULT $ac1, $0, $0");
+   asm volatile("MULT $ac2, $0, $0");
+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+   for (j=0;j<N;j++)
+   {
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
+      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+      ++j;
+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
+      asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
+   }
+   asm volatile ("mflo %0, $ac1": "=r"(xy01));
+   asm volatile ("mflo %0, $ac2": "=r"(xy02));
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+
+static inline void xcorr_kernel_mips(const opus_val16 * x,
+      const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+
+    opus_int64 sum_0, sum_1, sum_2, sum_3;
+    sum_0 =  (opus_int64)sum[0];
+    sum_1 =  (opus_int64)sum[1];
+    sum_2 =  (opus_int64)sum[2];
+    sum_3 =  (opus_int64)sum[3];
+
+    y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+    y_0=*y++;
+    y_1=*y++;
+    y_2=*y++;
+    for (j=0;j<len-3;j+=4)
+    {
+        opus_val16 tmp;
+        tmp = *x++;
+        y_3=*y++;
+
+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+
+        tmp=*x++;
+        y_0=*y++;
+
+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+
+       tmp=*x++;
+       y_1=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+
+
+      tmp=*x++;
+      y_2=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
+
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
+   }
+
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+
+      sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
+      sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
+      sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
+      sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
+   }
+
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+
+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
+
+   }
+
+   sum[0] = (opus_val32)sum_0;
+   sum[1] = (opus_val32)sum_1;
+   sum[2] = (opus_val32)sum_2;
+   sum[3] = (opus_val32)sum_3;
+}
+
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
+
+#endif /* PITCH_MIPSR1_H */
diff --git a/libopus/celt/mips/vq_mipsr1.h b/libopus/celt/mips/vq_mipsr1.h
new file mode 100644
index 0000000..f26a33e
--- /dev/null
+++ b/libopus/celt/mips/vq_mipsr1.h
@@ -0,0 +1,116 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __VQ_MIPSR1_H__
+#define __VQ_MIPSR1_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+#include "arch.h"
+
+#define OVERRIDE_vq_exp_rotation1
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+   int i;
+   opus_val16 ms;
+   celt_norm *Xptr;
+   Xptr = X;
+   ms = NEG16(s);
+   for (i=0;i<len-stride;i++)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+   Xptr = &X[len-2*stride-1];
+   for (i=len-2*stride-1;i>=0;i--)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+}
+
+#define OVERRIDE_renormalise_vector
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 E = EPSILON;
+   opus_val16 g;
+   opus_val32 t;
+   celt_norm *xptr = X;
+   int X0, X1;
+
+   (void)arch;
+
+   asm volatile("mult $ac1, $0, $0");
+   asm volatile("MTLO %0, $ac1" : :"r" (E));
+   /*if(N %4)
+       printf("error");*/
+   for (i=0;i<N-2;i+=2)
+   {
+      X0 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+
+      X1 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));
+   }
+
+   for (;i<N;i++)
+   {
+      X0 = (int)*xptr++;
+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
+   }
+
+   asm volatile("MFLO %0, $ac1" : "=r" (E));
+#ifdef FIXED_POINT
+   k = celt_ilog2(E)>>1;
+#endif
+   t = VSHR32(E, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   xptr = X;
+   for (i=0;i<N;i++)
+   {
+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+      xptr++;
+   }
+   /*return celt_sqrt(E);*/
+}
+
+#endif /* __VQ_MIPSR1_H__ */
diff --git a/libopus/celt/modes.c b/libopus/celt/modes.c
new file mode 100644
index 0000000..390c5e8
--- /dev/null
+++ b/libopus/celt/modes.c
@@ -0,0 +1,442 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "celt.h"
+#include "modes.h"
+#include "rate.h"
+#include "os_support.h"
+#include "stack_alloc.h"
+#include "quant_bands.h"
+#include "cpu_support.h"
+
+static const opus_int16 eband5ms[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
+  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
+};
+
+/* Alternate tuning (partially derived from Vorbis) */
+#define BITALLOC_SIZE 11
+/* Bit allocation table in units of 1/32 bit/sample (0.1875 dB SNR) */
+static const unsigned char band_allocation[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ 90, 80, 75, 69, 63, 56, 49, 40, 34, 29, 20, 18, 10,  0,  0,  0,  0,  0,  0,  0,  0,
+110,100, 90, 84, 78, 71, 65, 58, 51, 45, 39, 32, 26, 20, 12,  0,  0,  0,  0,  0,  0,
+118,110,103, 93, 86, 80, 75, 70, 65, 59, 53, 47, 40, 31, 23, 15,  4,  0,  0,  0,  0,
+126,119,112,104, 95, 89, 83, 78, 72, 66, 60, 54, 47, 39, 32, 25, 17, 12,  1,  0,  0,
+134,127,120,114,103, 97, 91, 85, 78, 72, 66, 60, 54, 47, 41, 35, 29, 23, 16, 10,  1,
+144,137,130,124,113,107,101, 95, 88, 82, 76, 70, 64, 57, 51, 45, 39, 33, 26, 15,  1,
+152,145,138,132,123,117,111,105, 98, 92, 86, 80, 74, 67, 61, 55, 49, 43, 36, 20,  1,
+162,155,148,142,133,127,121,115,108,102, 96, 90, 84, 77, 71, 65, 59, 53, 46, 30,  1,
+172,165,158,152,143,137,131,125,118,112,106,100, 94, 87, 81, 75, 69, 63, 56, 45, 20,
+200,200,200,200,200,200,200,200,198,193,188,183,178,173,168,163,158,153,148,129,104,
+};
+
+#ifndef CUSTOM_MODES_ONLY
+ #ifdef FIXED_POINT
+  #include "static_modes_fixed.h"
+ #else
+  #include "static_modes_float.h"
+ #endif
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+#ifdef CUSTOM_MODES
+
+/* Defining 25 critical bands for the full 0-20 kHz audio bandwidth
+   Taken from http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html */
+#define BARK_BANDS 25
+static const opus_int16 bark_freq[BARK_BANDS+1] = {
+      0,   100,   200,   300,   400,
+    510,   630,   770,   920,  1080,
+   1270,  1480,  1720,  2000,  2320,
+   2700,  3150,  3700,  4400,  5300,
+   6400,  7700,  9500, 12000, 15500,
+  20000};
+
+static opus_int16 *compute_ebands(opus_int32 Fs, int frame_size, int res, int *nbEBands)
+{
+   opus_int16 *eBands;
+   int i, j, lin, low, high, nBark, offset=0;
+
+   /* All modes that have 2.5 ms short blocks use the same definition */
+   if (Fs == 400*(opus_int32)frame_size)
+   {
+      *nbEBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+      eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+1));
+      for (i=0;i<*nbEBands+1;i++)
+         eBands[i] = eband5ms[i];
+      return eBands;
+   }
+   /* Find the number of critical bands supported by our sampling rate */
+   for (nBark=1;nBark<BARK_BANDS;nBark++)
+    if (bark_freq[nBark+1]*2 >= Fs)
+       break;
+
+   /* Find where the linear part ends (i.e. where the spacing is more than min_width */
+   for (lin=0;lin<nBark;lin++)
+      if (bark_freq[lin+1]-bark_freq[lin] >= res)
+         break;
+
+   low = (bark_freq[lin]+res/2)/res;
+   high = nBark-lin;
+   *nbEBands = low+high;
+   eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2));
+
+   if (eBands==NULL)
+      return NULL;
+
+   /* Linear spacing (min_width) */
+   for (i=0;i<low;i++)
+      eBands[i] = i;
+   if (low>0)
+      offset = eBands[low-1]*res - bark_freq[lin-1];
+   /* Spacing follows critical bands */
+   for (i=0;i<high;i++)
+   {
+      int target = bark_freq[lin+i];
+      /* Round to an even value */
+      eBands[i+low] = (target+offset/2+res)/(2*res)*2;
+      offset = eBands[i+low]*res - target;
+   }
+   /* Enforce the minimum spacing at the boundary */
+   for (i=0;i<*nbEBands;i++)
+      if (eBands[i] < i)
+         eBands[i] = i;
+   /* Round to an even value */
+   eBands[*nbEBands] = (bark_freq[nBark]+res)/(2*res)*2;
+   if (eBands[*nbEBands] > frame_size)
+      eBands[*nbEBands] = frame_size;
+   for (i=1;i<*nbEBands-1;i++)
+   {
+      if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1])
+      {
+         eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2;
+      }
+   }
+   /* Remove any empty bands. */
+   for (i=j=0;i<*nbEBands;i++)
+      if(eBands[i+1]>eBands[j])
+         eBands[++j]=eBands[i+1];
+   *nbEBands=j;
+
+   for (i=1;i<*nbEBands;i++)
+   {
+      /* Every band must be smaller than the last band. */
+      celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]);
+      /* Each band must be no larger than twice the size of the previous one. */
+      celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1]));
+   }
+
+   return eBands;
+}
+
+static void compute_allocation_table(CELTMode *mode)
+{
+   int i, j;
+   unsigned char *allocVectors;
+   int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+
+   mode->nbAllocVectors = BITALLOC_SIZE;
+   allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands));
+   if (allocVectors==NULL)
+      return;
+
+   /* Check for standard mode */
+   if (mode->Fs == 400*(opus_int32)mode->shortMdctSize)
+   {
+      for (i=0;i<BITALLOC_SIZE*mode->nbEBands;i++)
+         allocVectors[i] = band_allocation[i];
+      mode->allocVectors = allocVectors;
+      return;
+   }
+   /* If not the standard mode, interpolate */
+   /* Compute per-codec-band allocation from per-critical-band matrix */
+   for (i=0;i<BITALLOC_SIZE;i++)
+   {
+      for (j=0;j<mode->nbEBands;j++)
+      {
+         int k;
+         for (k=0;k<maxBands;k++)
+         {
+            if (400*(opus_int32)eband5ms[k] > mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize)
+               break;
+         }
+         if (k>maxBands-1)
+            allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1];
+         else {
+            opus_int32 a0, a1;
+            a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1];
+            a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize;
+            allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1]
+                                             + a1*band_allocation[i*maxBands+k])/(a0+a1);
+         }
+      }
+   }
+
+   /*printf ("\n");
+   for (i=0;i<BITALLOC_SIZE;i++)
+   {
+      for (j=0;j<mode->nbEBands;j++)
+         printf ("%d ", allocVectors[i*mode->nbEBands+j]);
+      printf ("\n");
+   }
+   exit(0);*/
+
+   mode->allocVectors = allocVectors;
+}
+
+#endif /* CUSTOM_MODES */
+
+CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
+{
+   int i;
+#ifdef CUSTOM_MODES
+   CELTMode *mode=NULL;
+   int res;
+   opus_val16 *window;
+   opus_int16 *logN;
+   int LM;
+   int arch = opus_select_arch();
+   ALLOC_STACK;
+#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
+   if (global_stack==NULL)
+      goto failure;
+#endif
+#endif
+
+#ifndef CUSTOM_MODES_ONLY
+   for (i=0;i<TOTAL_MODES;i++)
+   {
+      int j;
+      for (j=0;j<4;j++)
+      {
+         if (Fs == static_mode_list[i]->Fs &&
+               (frame_size<<j) == static_mode_list[i]->shortMdctSize*static_mode_list[i]->nbShortMdcts)
+         {
+            if (error)
+               *error = OPUS_OK;
+            return (CELTMode*)static_mode_list[i];
+         }
+      }
+   }
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef CUSTOM_MODES
+   if (error)
+      *error = OPUS_BAD_ARG;
+   return NULL;
+#else
+
+   /* The good thing here is that permutation of the arguments will automatically be invalid */
+
+   if (Fs < 8000 || Fs > 96000)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   /* Frames of less than 1ms are not supported. */
+   if ((opus_int32)frame_size*1000 < Fs)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+
+   if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0)
+   {
+     LM = 3;
+   } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0)
+   {
+     LM = 2;
+   } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0)
+   {
+     LM = 1;
+   } else
+   {
+     LM = 0;
+   }
+
+   /* Shorts longer than 3.3ms are not supported. */
+   if ((opus_int32)(frame_size>>LM)*300 > Fs)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+
+   mode = opus_alloc(sizeof(CELTMode));
+   if (mode==NULL)
+      goto failure;
+   mode->Fs = Fs;
+
+   /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis
+      is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should
+      approximate that. */
+   if(Fs < 12000) /* 8 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.3500061035f, 15);
+      mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+      mode->preemph[2] =  QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(3.6765136719f, 13);
+   } else if(Fs < 24000) /* 16 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.6000061035f, 15);
+      mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+      mode->preemph[2] =  QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(2.2598876953f, 13);
+   } else if(Fs < 40000) /* 32 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.7799987793f, 15);
+      mode->preemph[1] = -QCONST16(0.1000061035f, 15);
+      mode->preemph[2] =  QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(1.3333740234f, 13);
+   } else /* 48 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.8500061035f, 15);
+      mode->preemph[1] =  QCONST16(0.0f, 15);
+      mode->preemph[2] =  QCONST16(1.f, SIG_SHIFT);
+      mode->preemph[3] =  QCONST16(1.f, 13);
+   }
+
+   mode->maxLM = LM;
+   mode->nbShortMdcts = 1<<LM;
+   mode->shortMdctSize = frame_size/mode->nbShortMdcts;
+   res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);
+
+   mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
+   if (mode->eBands==NULL)
+      goto failure;
+#if !defined(SMALL_FOOTPRINT)
+   /* Make sure we don't allocate a band larger than our PVQ table.
+      208 should be enough, but let's be paranoid. */
+   if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+    208) {
+       goto failure;
+   }
+#endif
+
+   mode->effEBands = mode->nbEBands;
+   while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
+      mode->effEBands--;
+
+   /* Overlap must be divisible by 4 */
+   mode->overlap = ((mode->shortMdctSize>>2)<<2);
+
+   compute_allocation_table(mode);
+   if (mode->allocVectors==NULL)
+      goto failure;
+
+   window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16));
+   if (window==NULL)
+      goto failure;
+
+#ifndef FIXED_POINT
+   for (i=0;i<mode->overlap;i++)
+      window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap));
+#else
+   for (i=0;i<mode->overlap;i++)
+      window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap))));
+#endif
+   mode->window = window;
+
+   logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16));
+   if (logN==NULL)
+      goto failure;
+
+   for (i=0;i<mode->nbEBands;i++)
+      logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES);
+   mode->logN = logN;
+
+   compute_pulse_cache(mode, mode->maxLM);
+
+   if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
+           mode->maxLM, arch) == 0)
+      goto failure;
+
+   if (error)
+      *error = OPUS_OK;
+
+   return mode;
+failure:
+   if (error)
+      *error = OPUS_ALLOC_FAIL;
+   if (mode!=NULL)
+      opus_custom_mode_destroy(mode);
+   return NULL;
+#endif /* !CUSTOM_MODES */
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_mode_destroy(CELTMode *mode)
+{
+   int arch = opus_select_arch();
+
+   if (mode == NULL)
+      return;
+#ifndef CUSTOM_MODES_ONLY
+   {
+     int i;
+     for (i=0;i<TOTAL_MODES;i++)
+     {
+        if (mode == static_mode_list[i])
+        {
+           return;
+        }
+     }
+   }
+#endif /* CUSTOM_MODES_ONLY */
+   opus_free((opus_int16*)mode->eBands);
+   opus_free((unsigned char*)mode->allocVectors);
+
+   opus_free((opus_val16*)mode->window);
+   opus_free((opus_int16*)mode->logN);
+
+   opus_free((opus_int16*)mode->cache.index);
+   opus_free((unsigned char*)mode->cache.bits);
+   opus_free((unsigned char*)mode->cache.caps);
+   clt_mdct_clear(&mode->mdct, arch);
+
+   opus_free((CELTMode *)mode);
+}
+#endif
diff --git a/libopus/celt/modes.h b/libopus/celt/modes.h
new file mode 100644
index 0000000..be813cc
--- /dev/null
+++ b/libopus/celt/modes.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MODES_H
+#define MODES_H
+
+#include "opus_types.h"
+#include "celt.h"
+#include "arch.h"
+#include "mdct.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#define MAX_PERIOD 1024
+
+typedef struct {
+   int size;
+   const opus_int16 *index;
+   const unsigned char *bits;
+   const unsigned char *caps;
+} PulseCache;
+
+/** Mode definition (opaque)
+ @brief Mode definition
+ */
+struct OpusCustomMode {
+   opus_int32 Fs;
+   int          overlap;
+
+   int          nbEBands;
+   int          effEBands;
+   opus_val16    preemph[4];
+   const opus_int16   *eBands;   /**< Definition for each "pseudo-critical band" */
+
+   int         maxLM;
+   int         nbShortMdcts;
+   int         shortMdctSize;
+
+   int          nbAllocVectors; /**< Number of lines in the matrix below */
+   const unsigned char   *allocVectors;   /**< Number of bits in each band for several rates */
+   const opus_int16 *logN;
+
+   const opus_val16 *window;
+   mdct_lookup mdct;
+   PulseCache cache;
+};
+
+
+#endif
diff --git a/libopus/celt/opus_custom_demo.c b/libopus/celt/opus_custom_demo.c
new file mode 100644
index 0000000..ae41c0d
--- /dev/null
+++ b/libopus/celt/opus_custom_demo.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_custom.h"
+#include "arch.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define MAX_PACKET 1275
+
+int main(int argc, char *argv[])
+{
+   int err;
+   char *inFile, *outFile;
+   FILE *fin, *fout;
+   OpusCustomMode *mode=NULL;
+   OpusCustomEncoder *enc;
+   OpusCustomDecoder *dec;
+   int len;
+   opus_int32 frame_size, channels, rate;
+   int bytes_per_packet;
+   unsigned char data[MAX_PACKET];
+   int complexity;
+#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+   int i;
+   double rmsd = 0;
+#endif
+   int count = 0;
+   opus_int32 skip;
+   opus_int16 *in, *out;
+   if (argc != 9 && argc != 8 && argc != 7)
+   {
+      fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> "
+               " <bytes per packet> [<complexity> [packet loss rate]] "
+               "<input> <output>\n");
+      return 1;
+   }
+
+   rate = (opus_int32)atol(argv[1]);
+   channels = atoi(argv[2]);
+   frame_size = atoi(argv[3]);
+   mode = opus_custom_mode_create(rate, frame_size, NULL);
+   if (mode == NULL)
+   {
+      fprintf(stderr, "failed to create a mode\n");
+      return 1;
+   }
+
+   bytes_per_packet = atoi(argv[4]);
+   if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
+   {
+      fprintf (stderr, "bytes per packet must be between 0 and %d\n",
+                        MAX_PACKET);
+      return 1;
+   }
+
+   inFile = argv[argc-2];
+   fin = fopen(inFile, "rb");
+   if (!fin)
+   {
+      fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
+      return 1;
+   }
+   outFile = argv[argc-1];
+   fout = fopen(outFile, "wb+");
+   if (!fout)
+   {
+      fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
+      fclose(fin);
+      return 1;
+   }
+
+   enc = opus_custom_encoder_create(mode, channels, &err);
+   if (err != 0)
+   {
+      fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
+      fclose(fin);
+      fclose(fout);
+      return 1;
+   }
+   dec = opus_custom_decoder_create(mode, channels, &err);
+   if (err != 0)
+   {
+      fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
+      fclose(fin);
+      fclose(fout);
+      return 1;
+   }
+   opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
+
+   if (argc>7)
+   {
+      complexity=atoi(argv[5]);
+      opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
+   }
+
+   in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+   out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+
+   while (!feof(fin))
+   {
+      int ret;
+      err = fread(in, sizeof(short), frame_size*channels, fin);
+      if (feof(fin))
+         break;
+      len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet);
+      if (len <= 0)
+         fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
+
+      /* This is for simulating bit errors */
+#if 0
+      int errors = 0;
+      int eid = 0;
+      /* This simulates random bit error */
+      for (i=0;i<len*8;i++)
+      {
+         if (rand()%atoi(argv[8])==0)
+         {
+            if (i<64)
+            {
+               errors++;
+               eid = i;
+            }
+            data[i/8] ^= 1<<(7-(i%8));
+         }
+      }
+      if (errors == 1)
+         data[eid/8] ^= 1<<(7-(eid%8));
+      else if (errors%2 == 1)
+         data[rand()%8] ^= 1<<rand()%8;
+#endif
+
+#if 1 /* Set to zero to use the encoder's output instead */
+      /* This is to simulate packet loss */
+      if (argc==9 && rand()%1000<atoi(argv[argc-3]))
+      /*if (errors && (errors%2==0))*/
+         ret = opus_custom_decode(dec, NULL, len, out, frame_size);
+      else
+         ret = opus_custom_decode(dec, data, len, out, frame_size);
+      if (ret < 0)
+         fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
+#else
+      for (i=0;i<ret*channels;i++)
+         out[i] = in[i];
+#endif
+#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+      for (i=0;i<ret*channels;i++)
+      {
+         rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
+         /*out[i] -= in[i];*/
+      }
+#endif
+      count++;
+      fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout);
+      skip = 0;
+   }
+   PRINT_MIPS(stderr);
+
+   opus_custom_encoder_destroy(enc);
+   opus_custom_decoder_destroy(dec);
+   fclose(fin);
+   fclose(fout);
+   opus_custom_mode_destroy(mode);
+   free(in);
+   free(out);
+#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+   if (rmsd > 0)
+   {
+      rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
+      fprintf (stderr, "Error: encoder doesn't match decoder\n");
+      fprintf (stderr, "RMS mismatch is %f\n", rmsd);
+      return 1;
+   } else {
+      fprintf (stderr, "Encoder matches decoder!!\n");
+   }
+#endif
+   return 0;
+}
+
diff --git a/libopus/celt/os_support.h b/libopus/celt/os_support.h
new file mode 100644
index 0000000..a217197
--- /dev/null
+++ b/libopus/celt/os_support.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2007 Jean-Marc Valin
+
+   File: os_support.h
+   This is the (tiny) OS abstraction layer. Aside from math.h, this is the
+   only place where system headers are allowed.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OS_SUPPORT_H
+#define OS_SUPPORT_H
+
+#ifdef CUSTOM_SUPPORT
+#  include "custom_support.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+#ifndef OVERRIDE_OPUS_ALLOC
+static OPUS_INLINE void *opus_alloc (size_t size)
+{
+   return malloc(size);
+}
+#endif
+
+/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
+#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
+static OPUS_INLINE void *opus_alloc_scratch (size_t size)
+{
+   /* Scratch space doesn't need to be cleared */
+   return opus_alloc(size);
+}
+#endif
+
+/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
+#ifndef OVERRIDE_OPUS_FREE
+static OPUS_INLINE void opus_free (void *ptr)
+{
+   free(ptr);
+}
+#endif
+
+/** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
+#ifndef OVERRIDE_OPUS_COPY
+#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
+    provides compile-time type checking */
+#ifndef OVERRIDE_OPUS_MOVE
+#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Set n elements of dst to zero */
+#ifndef OVERRIDE_OPUS_CLEAR
+#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
+#endif
+
+/*#ifdef __GNUC__
+#pragma GCC poison printf sprintf
+#pragma GCC poison malloc free realloc calloc
+#endif*/
+
+#endif /* OS_SUPPORT_H */
+
diff --git a/libopus/celt/pitch.c b/libopus/celt/pitch.c
new file mode 100644
index 0000000..872582a
--- /dev/null
+++ b/libopus/celt/pitch.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.c
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pitch.h"
+#include "os_support.h"
+#include "modes.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "celt_lpc.h"
+
+static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
+                            int max_pitch, int *best_pitch
+#ifdef FIXED_POINT
+                            , int yshift, opus_val32 maxcorr
+#endif
+                            )
+{
+   int i, j;
+   opus_val32 Syy=1;
+   opus_val16 best_num[2];
+   opus_val32 best_den[2];
+#ifdef FIXED_POINT
+   int xshift;
+
+   xshift = celt_ilog2(maxcorr)-14;
+#endif
+
+   best_num[0] = -1;
+   best_num[1] = -1;
+   best_den[0] = 0;
+   best_den[1] = 0;
+   best_pitch[0] = 0;
+   best_pitch[1] = 1;
+   for (j=0;j<len;j++)
+      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
+   for (i=0;i<max_pitch;i++)
+   {
+      if (xcorr[i]>0)
+      {
+         opus_val16 num;
+         opus_val32 xcorr16;
+         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
+#ifndef FIXED_POINT
+         /* Considering the range of xcorr16, this should avoid both underflows
+            and overflows (inf) when squaring xcorr16 */
+         xcorr16 *= 1e-12f;
+#endif
+         num = MULT16_16_Q15(xcorr16,xcorr16);
+         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
+         {
+            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
+            {
+               best_num[1] = best_num[0];
+               best_den[1] = best_den[0];
+               best_pitch[1] = best_pitch[0];
+               best_num[0] = num;
+               best_den[0] = Syy;
+               best_pitch[0] = i;
+            } else {
+               best_num[1] = num;
+               best_den[1] = Syy;
+               best_pitch[1] = i;
+            }
+         }
+      }
+      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
+      Syy = MAX32(1, Syy);
+   }
+}
+
+static void celt_fir5(opus_val16 *x,
+         const opus_val16 *num,
+         int N)
+{
+   int i;
+   opus_val16 num0, num1, num2, num3, num4;
+   opus_val32 mem0, mem1, mem2, mem3, mem4;
+   num0=num[0];
+   num1=num[1];
+   num2=num[2];
+   num3=num[3];
+   num4=num[4];
+   mem0=0;
+   mem1=0;
+   mem2=0;
+   mem3=0;
+   mem4=0;
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      sum = MAC16_16(sum,num0,mem0);
+      sum = MAC16_16(sum,num1,mem1);
+      sum = MAC16_16(sum,num2,mem2);
+      sum = MAC16_16(sum,num3,mem3);
+      sum = MAC16_16(sum,num4,mem4);
+      mem4 = mem3;
+      mem3 = mem2;
+      mem2 = mem1;
+      mem1 = mem0;
+      mem0 = x[i];
+      x[i] = ROUND16(sum, SIG_SHIFT);
+   }
+}
+
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+      int len, int C, int arch)
+{
+   int i;
+   opus_val32 ac[5];
+   opus_val16 tmp=Q15ONE;
+   opus_val16 lpc[4];
+   opus_val16 lpc2[5];
+   opus_val16 c1 = QCONST16(.8f,15);
+#ifdef FIXED_POINT
+   int shift;
+   opus_val32 maxabs = celt_maxabs32(x[0], len);
+   if (C==2)
+   {
+      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
+      maxabs = MAX32(maxabs, maxabs_1);
+   }
+   if (maxabs<1)
+      maxabs=1;
+   shift = celt_ilog2(maxabs)-10;
+   if (shift<0)
+      shift=0;
+   if (C==2)
+      shift++;
+#endif
+   for (i=1;i<len>>1;i++)
+      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
+   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
+   if (C==2)
+   {
+      for (i=1;i<len>>1;i++)
+         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
+      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
+   }
+
+   _celt_autocorr(x_lp, ac, NULL, 0,
+                  4, len>>1, arch);
+
+   /* Noise floor -40 dB */
+#ifdef FIXED_POINT
+   ac[0] += SHR32(ac[0],13);
+#else
+   ac[0] *= 1.0001f;
+#endif
+   /* Lag windowing */
+   for (i=1;i<=4;i++)
+   {
+      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef FIXED_POINT
+      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
+#endif
+   }
+
+   _celt_lpc(lpc, ac, 4);
+   for (i=0;i<4;i++)
+   {
+      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
+      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
+   }
+   /* Add a zero */
+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+   celt_fir5(x_lp, lpc2, len>>1);
+}
+
+/* Pure C implementation. */
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch, int arch)
+{
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+         well on DSPs like Blackfin and TI C5x/C6x */
+   int i, j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+#if !defined(OVERRIDE_PITCH_XCORR)
+   (void)arch;
+#endif
+   for (i=0;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, _x[j], _y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+   int i;
+   /*The EDSP version requires that max_pitch is at least 1, and that _x is
+      32-bit aligned.
+     Since it's hard to put asserts in assembly, put them here.*/
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   celt_assert(max_pitch>0);
+   celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+   for (i=0;i<max_pitch-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(_x, _y+i, sum, len, arch);
+      xcorr[i]=sum[0];
+      xcorr[i+1]=sum[1];
+      xcorr[i+2]=sum[2];
+      xcorr[i+3]=sum[3];
+#ifdef FIXED_POINT
+      sum[0] = MAX32(sum[0], sum[1]);
+      sum[2] = MAX32(sum[2], sum[3]);
+      sum[0] = MAX32(sum[0], sum[2]);
+      maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+   }
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (;i<max_pitch;i++)
+   {
+      opus_val32 sum;
+      sum = celt_inner_prod(_x, _y+i, len, arch);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+#endif
+}
+
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+                  int len, int max_pitch, int *pitch, int arch)
+{
+   int i, j;
+   int lag;
+   int best_pitch[2]={0,0};
+   VARDECL(opus_val16, x_lp4);
+   VARDECL(opus_val16, y_lp4);
+   VARDECL(opus_val32, xcorr);
+#ifdef FIXED_POINT
+   opus_val32 maxcorr;
+   opus_val32 xmax, ymax;
+   int shift=0;
+#endif
+   int offset;
+
+   SAVE_STACK;
+
+   celt_assert(len>0);
+   celt_assert(max_pitch>0);
+   lag = len+max_pitch;
+
+   ALLOC(x_lp4, len>>2, opus_val16);
+   ALLOC(y_lp4, lag>>2, opus_val16);
+   ALLOC(xcorr, max_pitch>>1, opus_val32);
+
+   /* Downsample by 2 again */
+   for (j=0;j<len>>2;j++)
+      x_lp4[j] = x_lp[2*j];
+   for (j=0;j<lag>>2;j++)
+      y_lp4[j] = y[2*j];
+
+#ifdef FIXED_POINT
+   xmax = celt_maxabs16(x_lp4, len>>2);
+   ymax = celt_maxabs16(y_lp4, lag>>2);
+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
+   if (shift>0)
+   {
+      for (j=0;j<len>>2;j++)
+         x_lp4[j] = SHR16(x_lp4[j], shift);
+      for (j=0;j<lag>>2;j++)
+         y_lp4[j] = SHR16(y_lp4[j], shift);
+      /* Use double the shift for a MAC */
+      shift *= 2;
+   } else {
+      shift = 0;
+   }
+#endif
+
+   /* Coarse search with 4x decimation */
+
+#ifdef FIXED_POINT
+   maxcorr =
+#endif
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
+
+   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
+#ifdef FIXED_POINT
+                   , 0, maxcorr
+#endif
+                   );
+
+   /* Finer search with 2x decimation */
+#ifdef FIXED_POINT
+   maxcorr=1;
+#endif
+   for (i=0;i<max_pitch>>1;i++)
+   {
+      opus_val32 sum;
+      xcorr[i] = 0;
+      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
+         continue;
+#ifdef FIXED_POINT
+      sum = 0;
+      for (j=0;j<len>>1;j++)
+         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+#else
+      sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
+#endif
+      xcorr[i] = MAX32(-1, sum);
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
+#ifdef FIXED_POINT
+                   , shift+1, maxcorr
+#endif
+                   );
+
+   /* Refine by pseudo-interpolation */
+   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
+   {
+      opus_val32 a, b, c;
+      a = xcorr[best_pitch[0]-1];
+      b = xcorr[best_pitch[0]];
+      c = xcorr[best_pitch[0]+1];
+      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
+         offset = 1;
+      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
+         offset = -1;
+      else
+         offset = 0;
+   } else {
+      offset = 0;
+   }
+   *pitch = 2*best_pitch[0]-offset;
+
+   RESTORE_STACK;
+}
+
+#ifdef FIXED_POINT
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   opus_val32 x2y2;
+   int sx, sy, shift;
+   opus_val32 g;
+   opus_val16 den;
+   if (xy == 0 || xx == 0 || yy == 0)
+      return 0;
+   sx = celt_ilog2(xx)-14;
+   sy = celt_ilog2(yy)-14;
+   shift = sx + sy;
+   x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
+   if (shift & 1) {
+      if (x2y2 < 32768)
+      {
+         x2y2 <<= 1;
+         shift--;
+      } else {
+         x2y2 >>= 1;
+         shift++;
+      }
+   }
+   den = celt_rsqrt_norm(x2y2);
+   g = MULT16_32_Q15(den, xy);
+   g = VSHR32(g, (shift>>1)-1);
+   return EXTRACT16(MIN32(g, Q15ONE));
+}
+#else
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   return xy/celt_sqrt(1+xx*yy);
+}
+#endif
+
+static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+      int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
+{
+   int k, i, T, T0;
+   opus_val16 g, g0;
+   opus_val16 pg;
+   opus_val32 xy,xx,yy,xy2;
+   opus_val32 xcorr[3];
+   opus_val32 best_xy, best_yy;
+   int offset;
+   int minperiod0;
+   VARDECL(opus_val32, yy_lookup);
+   SAVE_STACK;
+
+   minperiod0 = minperiod;
+   maxperiod /= 2;
+   minperiod /= 2;
+   *T0_ /= 2;
+   prev_period /= 2;
+   N /= 2;
+   x += maxperiod;
+   if (*T0_>=maxperiod)
+      *T0_=maxperiod-1;
+
+   T = T0 = *T0_;
+   ALLOC(yy_lookup, maxperiod+1, opus_val32);
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
+   yy_lookup[0] = xx;
+   yy=xx;
+   for (i=1;i<=maxperiod;i++)
+   {
+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
+      yy_lookup[i] = MAX32(0, yy);
+   }
+   yy = yy_lookup[T0];
+   best_xy = xy;
+   best_yy = yy;
+   g = g0 = compute_pitch_gain(xy, xx, yy);
+   /* Look for any pitch at T/k */
+   for (k=2;k<=15;k++)
+   {
+      int T1, T1b;
+      opus_val16 g1;
+      opus_val16 cont=0;
+      opus_val16 thresh;
+      T1 = celt_udiv(2*T0+k, 2*k);
+      if (T1 < minperiod)
+         break;
+      /* Look for another strong correlation at T1b */
+      if (k==2)
+      {
+         if (T1+T0>maxperiod)
+            T1b = T0;
+         else
+            T1b = T0+T1;
+      } else
+      {
+         T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
+      }
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
+      xy = HALF32(xy + xy2);
+      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
+      g1 = compute_pitch_gain(xy, xx, yy);
+      if (abs(T1-prev_period)<=1)
+         cont = prev_gain;
+      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
+         cont = HALF16(prev_gain);
+      else
+         cont = 0;
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+      if (g1 > thresh)
+      {
+         best_xy = xy;
+         best_yy = yy;
+         T = T1;
+         g = g1;
+      }
+   }
+   best_xy = MAX32(0, best_xy);
+   if (best_yy <= best_xy)
+      pg = Q15ONE;
+   else
+      pg = SHR32(frac_div32(best_xy,best_yy+1),16);
+
+   for (k=0;k<3;k++)
+      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
+   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
+      offset = 1;
+   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
+      offset = -1;
+   else
+      offset = 0;
+   if (pg > g)
+      pg = g;
+   *T0_ = 2*T+offset;
+
+   if (*T0_<minperiod0)
+      *T0_=minperiod0;
+   RESTORE_STACK;
+   return pg;
+}
diff --git a/libopus/celt/pitch.h b/libopus/celt/pitch.h
new file mode 100644
index 0000000..e425f56
--- /dev/null
+++ b/libopus/celt/pitch.h
@@ -0,0 +1,192 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_H
+#define PITCH_H
+
+#include "modes.h"
+#include "cpu_support.h"
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \
+  || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT))
+#include "x86/pitch_sse.h"
+#endif
+
+#if defined(MIPSr1_ASM)
+#include "mips/pitch_mipsr1.h"
+#endif
+
+#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+# include "arm/pitch_arm.h"
+#endif
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+      int len, int C, int arch);
+
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+                  int len, int max_pitch, int *pitch, int arch);
+
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+      int N, int *T0, int prev_period, opus_val16 prev_gain, int arch);
+
+
+/* OPT: This is the kernel you really want to optimize. It gets used a lot
+   by the prefilter and by the PLC. */
+static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+   celt_assert(len>=3);
+   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+   y_0=*y++;
+   y_1=*y++;
+   y_2=*y++;
+   for (j=0;j<len-3;j+=4)
+   {
+      opus_val16 tmp;
+      tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+      tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+      tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+      tmp=*x++;
+      y_2=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_3);
+      sum[1] = MAC16_16(sum[1],tmp,y_0);
+      sum[2] = MAC16_16(sum[2],tmp,y_1);
+      sum[3] = MAC16_16(sum[3],tmp,y_2);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+   }
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+   }
+}
+
+#ifndef OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)(arch),xcorr_kernel_c(x, y, sum, len))
+#endif /* OVERRIDE_XCORR_KERNEL */
+
+
+static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+   for (i=0;i<N;i++)
+   {
+      xy01 = MAC16_16(xy01, x[i], y01[i]);
+      xy02 = MAC16_16(xy02, x[i], y02[i]);
+   }
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+
+#ifndef OVERRIDE_DUAL_INNER_PROD
+# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
+    ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2))
+#endif
+
+/*We make sure a C version is always available for cases where the overhead of
+  vectorization and passing around an arch flag aren't worth it.*/
+static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x,
+      const opus_val16 *y, int N)
+{
+   int i;
+   opus_val32 xy=0;
+   for (i=0;i<N;i++)
+      xy = MAC16_16(xy, x[i], y[i]);
+   return xy;
+}
+
+#if !defined(OVERRIDE_CELT_INNER_PROD)
+# define celt_inner_prod(x, y, N, arch) \
+    ((void)(arch),celt_inner_prod_c(x, y, N))
+#endif
+
+#ifdef NON_STATIC_COMB_FILTER_CONST_C
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+     opus_val16 g10, opus_val16 g11, opus_val16 g12);
+#endif
+
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch, int arch);
+
+#ifndef OVERRIDE_PITCH_XCORR
+# define celt_pitch_xcorr celt_pitch_xcorr_c
+#endif
+
+#endif
diff --git a/libopus/celt/quant_bands.c b/libopus/celt/quant_bands.c
new file mode 100644
index 0000000..39a221e
--- /dev/null
+++ b/libopus/celt/quant_bands.c
@@ -0,0 +1,563 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "quant_bands.h"
+#include "laplace.h"
+#include <math.h>
+#include "os_support.h"
+#include "arch.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+#include "rate.h"
+
+#ifdef FIXED_POINT
+/* Mean energy in each band quantized in Q4 */
+const signed char eMeans[25] = {
+      103,100, 92, 85, 81,
+       77, 72, 70, 78, 75,
+       73, 71, 78, 74, 69,
+       72, 70, 74, 76, 71,
+       60, 60, 60, 60, 60
+};
+#else
+/* Mean energy in each band quantized in Q4 and converted back to float */
+const opus_val16 eMeans[25] = {
+      6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
+      4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
+      4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
+      4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f,
+      3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f
+};
+#endif
+/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */
+#ifdef FIXED_POINT
+static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384};
+static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554};
+static const opus_val16 beta_intra = 4915;
+#else
+static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.};
+static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.};
+static const opus_val16 beta_intra = 4915/32768.;
+#endif
+
+/*Parameters of the Laplace-like probability models used for the coarse energy.
+  There is one pair of parameters for each frame size, prediction type
+   (inter/intra), and band number.
+  The first number of each pair is the probability of 0, and the second is the
+   decay rate, both in Q8 precision.*/
+static const unsigned char e_prob_model[4][2][42] = {
+   /*120 sample frames.*/
+   {
+      /*Inter*/
+      {
+          72, 127,  65, 129,  66, 128,  65, 128,  64, 128,  62, 128,  64, 128,
+          64, 128,  92,  78,  92,  79,  92,  78,  90,  79, 116,  41, 115,  40,
+         114,  40, 132,  26, 132,  26, 145,  17, 161,  12, 176,  10, 177,  11
+      },
+      /*Intra*/
+      {
+          24, 179,  48, 138,  54, 135,  54, 132,  53, 134,  56, 133,  55, 132,
+          55, 132,  61, 114,  70,  96,  74,  88,  75,  88,  87,  74,  89,  66,
+          91,  67, 100,  59, 108,  50, 120,  40, 122,  37,  97,  43,  78,  50
+      }
+   },
+   /*240 sample frames.*/
+   {
+      /*Inter*/
+      {
+          83,  78,  84,  81,  88,  75,  86,  74,  87,  71,  90,  73,  93,  74,
+          93,  74, 109,  40, 114,  36, 117,  34, 117,  34, 143,  17, 145,  18,
+         146,  19, 162,  12, 165,  10, 178,   7, 189,   6, 190,   8, 177,   9
+      },
+      /*Intra*/
+      {
+          23, 178,  54, 115,  63, 102,  66,  98,  69,  99,  74,  89,  71,  91,
+          73,  91,  78,  89,  86,  80,  92,  66,  93,  64, 102,  59, 103,  60,
+         104,  60, 117,  52, 123,  44, 138,  35, 133,  31,  97,  38,  77,  45
+      }
+   },
+   /*480 sample frames.*/
+   {
+      /*Inter*/
+      {
+          61,  90,  93,  60, 105,  42, 107,  41, 110,  45, 116,  38, 113,  38,
+         112,  38, 124,  26, 132,  27, 136,  19, 140,  20, 155,  14, 159,  16,
+         158,  18, 170,  13, 177,  10, 187,   8, 192,   6, 175,   9, 159,  10
+      },
+      /*Intra*/
+      {
+          21, 178,  59, 110,  71,  86,  75,  85,  84,  83,  91,  66,  88,  73,
+          87,  72,  92,  75,  98,  72, 105,  58, 107,  54, 115,  52, 114,  55,
+         112,  56, 129,  51, 132,  40, 150,  33, 140,  29,  98,  35,  77,  42
+      }
+   },
+   /*960 sample frames.*/
+   {
+      /*Inter*/
+      {
+          42, 121,  96,  66, 108,  43, 111,  40, 117,  44, 123,  32, 120,  36,
+         119,  33, 127,  33, 134,  34, 139,  21, 147,  23, 152,  20, 158,  25,
+         154,  26, 166,  21, 173,  16, 184,  13, 184,  10, 150,  13, 139,  15
+      },
+      /*Intra*/
+      {
+          22, 178,  63, 114,  74,  82,  84,  83,  92,  82, 103,  62,  96,  72,
+          96,  67, 101,  73, 107,  72, 113,  55, 118,  52, 125,  52, 118,  52,
+         117,  55, 135,  49, 137,  39, 157,  32, 145,  29,  97,  33,  77,  40
+      }
+   }
+};
+
+static const unsigned char small_energy_icdf[3]={2,1,0};
+
+static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C)
+{
+   int c, i;
+   opus_val32 dist = 0;
+   c=0; do {
+      for (i=start;i<end;i++)
+      {
+         opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3));
+         dist = MAC16_16(dist, d,d);
+      }
+   } while (++c<C);
+   return MIN32(200,SHR32(dist,2*DB_SHIFT-6));
+}
+
+static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
+      const opus_val16 *eBands, opus_val16 *oldEBands,
+      opus_int32 budget, opus_int32 tell,
+      const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
+      int C, int LM, int intra, opus_val16 max_decay, int lfe)
+{
+   int i, c;
+   int badness = 0;
+   opus_val32 prev[2] = {0,0};
+   opus_val16 coef;
+   opus_val16 beta;
+
+   if (tell+3 <= budget)
+      ec_enc_bit_logp(enc, intra, 3);
+   if (intra)
+   {
+      coef = 0;
+      beta = beta_intra;
+   } else {
+      beta = beta_coef[LM];
+      coef = pred_coef[LM];
+   }
+
+   /* Encode at a fixed coarse resolution */
+   for (i=start;i<end;i++)
+   {
+      c=0;
+      do {
+         int bits_left;
+         int qi, qi0;
+         opus_val32 q;
+         opus_val16 x;
+         opus_val32 f, tmp;
+         opus_val16 oldE;
+         opus_val16 decay_bound;
+         x = eBands[i+c*m->nbEBands];
+         oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+#ifdef FIXED_POINT
+         f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c];
+         /* Rounding to nearest integer here is really important! */
+         qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7);
+         decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT),
+               SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay)));
+#else
+         f = x-coef*oldE-prev[c];
+         /* Rounding to nearest integer here is really important! */
+         qi = (int)floor(.5f+f);
+         decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay;
+#endif
+         /* Prevent the energy from going down too quickly (e.g. for bands
+            that have just one bin) */
+         if (qi < 0 && x < decay_bound)
+         {
+            qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT);
+            if (qi > 0)
+               qi = 0;
+         }
+         qi0 = qi;
+         /* If we don't have enough bits to encode all the energy, just assume
+             something safe. */
+         tell = ec_tell(enc);
+         bits_left = budget-tell-3*C*(end-i);
+         if (i!=start && bits_left < 30)
+         {
+            if (bits_left < 24)
+               qi = IMIN(1, qi);
+            if (bits_left < 16)
+               qi = IMAX(-1, qi);
+         }
+         if (lfe && i>=2)
+            qi = IMIN(qi, 0);
+         if (budget-tell >= 15)
+         {
+            int pi;
+            pi = 2*IMIN(i,20);
+            ec_laplace_encode(enc, &qi,
+                  prob_model[pi]<<7, prob_model[pi+1]<<6);
+         }
+         else if(budget-tell >= 2)
+         {
+            qi = IMAX(-1, IMIN(qi, 1));
+            ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2);
+         }
+         else if(budget-tell >= 1)
+         {
+            qi = IMIN(0, qi);
+            ec_enc_bit_logp(enc, -qi, 1);
+         }
+         else
+            qi = -1;
+         error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT);
+         badness += abs(qi0-qi);
+         q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+         tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7);
+#ifdef FIXED_POINT
+         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+      } while (++c < C);
+   }
+   return lfe ? 0 : badness;
+}
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+      opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
+      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
+{
+   int intra;
+   opus_val16 max_decay;
+   VARDECL(opus_val16, oldEBands_intra);
+   VARDECL(opus_val16, error_intra);
+   ec_enc enc_start_state;
+   opus_uint32 tell;
+   int badness1=0;
+   opus_int32 intra_bias;
+   opus_val32 new_distortion;
+   SAVE_STACK;
+
+   intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C);
+   intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512));
+   new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C);
+
+   tell = ec_tell(enc);
+   if (tell+3 > budget)
+      two_pass = intra = 0;
+
+   max_decay = QCONST16(16.f,DB_SHIFT);
+   if (end-start>10)
+   {
+#ifdef FIXED_POINT
+      max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
+#else
+      max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
+#endif
+   }
+   if (lfe)
+      max_decay = QCONST16(3.f,DB_SHIFT);
+   enc_start_state = *enc;
+
+   ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
+   ALLOC(error_intra, C*m->nbEBands, opus_val16);
+   OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands);
+
+   if (two_pass || intra)
+   {
+      badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget,
+            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe);
+   }
+
+   if (!intra)
+   {
+      unsigned char *intra_buf;
+      ec_enc enc_intra_state;
+      opus_int32 tell_intra;
+      opus_uint32 nstart_bytes;
+      opus_uint32 nintra_bytes;
+      opus_uint32 save_bytes;
+      int badness2;
+      VARDECL(unsigned char, intra_bits);
+
+      tell_intra = ec_tell_frac(enc);
+
+      enc_intra_state = *enc;
+
+      nstart_bytes = ec_range_bytes(&enc_start_state);
+      nintra_bytes = ec_range_bytes(&enc_intra_state);
+      intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes;
+      save_bytes = nintra_bytes-nstart_bytes;
+      if (save_bytes == 0)
+         save_bytes = ALLOC_NONE;
+      ALLOC(intra_bits, save_bytes, unsigned char);
+      /* Copy bits from intra bit-stream */
+      OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes);
+
+      *enc = enc_start_state;
+
+      badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
+            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
+
+      if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra)))
+      {
+         *enc = enc_intra_state;
+         /* Copy intra bits to bit-stream */
+         OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes);
+         OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+         OPUS_COPY(error, error_intra, C*m->nbEBands);
+         intra = 1;
+      }
+   } else {
+      OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+      OPUS_COPY(error, error_intra, C*m->nbEBands);
+   }
+
+   if (intra)
+      *delayedIntra = new_distortion;
+   else
+      *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra),
+            new_distortion);
+
+   RESTORE_STACK;
+}
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C)
+{
+   int i, c;
+
+   /* Encode finer resolution */
+   for (i=start;i<end;i++)
+   {
+      opus_int16 frac = 1<<fine_quant[i];
+      if (fine_quant[i] <= 0)
+         continue;
+      c=0;
+      do {
+         int q2;
+         opus_val16 offset;
+#ifdef FIXED_POINT
+         /* Has to be without rounding */
+         q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]);
+#else
+         q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac);
+#endif
+         if (q2 > frac-1)
+            q2 = frac-1;
+         if (q2<0)
+            q2 = 0;
+         ec_enc_bits(enc, q2, fine_quant[i]);
+#ifdef FIXED_POINT
+         offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+         offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+         oldEBands[i+c*m->nbEBands] += offset;
+         error[i+c*m->nbEBands] -= offset;
+         /*printf ("%f ", error[i] - offset);*/
+      } while (++c < C);
+   }
+}
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C)
+{
+   int i, prio, c;
+
+   /* Use up the remaining bits */
+   for (prio=0;prio<2;prio++)
+   {
+      for (i=start;i<end && bits_left>=C ;i++)
+      {
+         if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+            continue;
+         c=0;
+         do {
+            int q2;
+            opus_val16 offset;
+            q2 = error[i+c*m->nbEBands]<0 ? 0 : 1;
+            ec_enc_bits(enc, q2, 1);
+#ifdef FIXED_POINT
+            offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+            offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+            oldEBands[i+c*m->nbEBands] += offset;
+            error[i+c*m->nbEBands] -= offset;
+            bits_left--;
+         } while (++c < C);
+      }
+   }
+}
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM)
+{
+   const unsigned char *prob_model = e_prob_model[LM][intra];
+   int i, c;
+   opus_val32 prev[2] = {0, 0};
+   opus_val16 coef;
+   opus_val16 beta;
+   opus_int32 budget;
+   opus_int32 tell;
+
+   if (intra)
+   {
+      coef = 0;
+      beta = beta_intra;
+   } else {
+      beta = beta_coef[LM];
+      coef = pred_coef[LM];
+   }
+
+   budget = dec->storage*8;
+
+   /* Decode at a fixed coarse resolution */
+   for (i=start;i<end;i++)
+   {
+      c=0;
+      do {
+         int qi;
+         opus_val32 q;
+         opus_val32 tmp;
+         /* It would be better to express this invariant as a
+            test on C at function entry, but that isn't enough
+            to make the static analyzer happy. */
+         celt_sig_assert(c<2);
+         tell = ec_tell(dec);
+         if(budget-tell>=15)
+         {
+            int pi;
+            pi = 2*IMIN(i,20);
+            qi = ec_laplace_decode(dec,
+                  prob_model[pi]<<7, prob_model[pi+1]<<6);
+         }
+         else if(budget-tell>=2)
+         {
+            qi = ec_dec_icdf(dec, small_energy_icdf, 2);
+            qi = (qi>>1)^-(qi&1);
+         }
+         else if(budget-tell>=1)
+         {
+            qi = -ec_dec_bit_logp(dec, 1);
+         }
+         else
+            qi = -1;
+         q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+         oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+         tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7);
+#ifdef FIXED_POINT
+         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+      } while (++c < C);
+   }
+}
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C)
+{
+   int i, c;
+   /* Decode finer resolution */
+   for (i=start;i<end;i++)
+   {
+      if (fine_quant[i] <= 0)
+         continue;
+      c=0;
+      do {
+         int q2;
+         opus_val16 offset;
+         q2 = ec_dec_bits(dec, fine_quant[i]);
+#ifdef FIXED_POINT
+         offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+         offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+         oldEBands[i+c*m->nbEBands] += offset;
+      } while (++c < C);
+   }
+}
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant,  int *fine_priority, int bits_left, ec_dec *dec, int C)
+{
+   int i, prio, c;
+
+   /* Use up the remaining bits */
+   for (prio=0;prio<2;prio++)
+   {
+      for (i=start;i<end && bits_left>=C ;i++)
+      {
+         if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+            continue;
+         c=0;
+         do {
+            int q2;
+            opus_val16 offset;
+            q2 = ec_dec_bits(dec, 1);
+#ifdef FIXED_POINT
+            offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+            offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+            oldEBands[i+c*m->nbEBands] += offset;
+            bits_left--;
+         } while (++c < C);
+      }
+   }
+}
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+      celt_ener *bandE, opus_val16 *bandLogE, int C)
+{
+   int c, i;
+   c=0;
+   do {
+      for (i=0;i<effEnd;i++)
+      {
+         bandLogE[i+c*m->nbEBands] =
+               celt_log2(bandE[i+c*m->nbEBands])
+               - SHL16((opus_val16)eMeans[i],6);
+#ifdef FIXED_POINT
+         /* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */
+         bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT);
+#endif
+      }
+      for (i=effEnd;i<end;i++)
+         bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
+   } while (++c < C);
+}
diff --git a/libopus/celt/quant_bands.h b/libopus/celt/quant_bands.h
new file mode 100644
index 0000000..0490bca
--- /dev/null
+++ b/libopus/celt/quant_bands.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef QUANT_BANDS
+#define QUANT_BANDS
+
+#include "arch.h"
+#include "modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "mathops.h"
+
+#ifdef FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+      celt_ener *bandE, opus_val16 *bandLogE, int C);
+
+void log2Amp(const CELTMode *m, int start, int end,
+      celt_ener *eBands, const opus_val16 *oldEBands, int C);
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+      opus_val16 *error, ec_enc *enc, int C, int LM,
+      int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
+      int two_pass, int loss_rate, int lfe);
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C);
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM);
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C);
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C);
+
+#endif /* QUANT_BANDS */
diff --git a/libopus/celt/rate.c b/libopus/celt/rate.c
new file mode 100644
index 0000000..465e1ba
--- /dev/null
+++ b/libopus/celt/rate.c
@@ -0,0 +1,644 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include "modes.h"
+#include "cwrs.h"
+#include "arch.h"
+#include "os_support.h"
+
+#include "entcode.h"
+#include "rate.h"
+
+static const unsigned char LOG2_FRAC_TABLE[24]={
+   0,
+   8,13,
+  16,19,21,23,
+  24,26,27,28,29,30,31,32,
+  32,33,34,34,35,36,36,37,37
+};
+
+#ifdef CUSTOM_MODES
+
+/*Determines if V(N,K) fits in a 32-bit unsigned integer.
+  N and K are themselves limited to 15 bits.*/
+static int fits_in32(int _n, int _k)
+{
+   static const opus_int16 maxN[15] = {
+      32767, 32767, 32767, 1476, 283, 109,  60,  40,
+       29,  24,  20,  18,  16,  14,  13};
+   static const opus_int16 maxK[15] = {
+      32767, 32767, 32767, 32767, 1172, 238,  95,  53,
+       36,  27,  22,  18,  16,  15,  13};
+   if (_n>=14)
+   {
+      if (_k>=14)
+         return 0;
+      else
+         return _n <= maxN[_k];
+   } else {
+      return _k <= maxK[_n];
+   }
+}
+
+void compute_pulse_cache(CELTMode *m, int LM)
+{
+   int C;
+   int i;
+   int j;
+   int curr=0;
+   int nbEntries=0;
+   int entryN[100], entryK[100], entryI[100];
+   const opus_int16 *eBands = m->eBands;
+   PulseCache *cache = &m->cache;
+   opus_int16 *cindex;
+   unsigned char *bits;
+   unsigned char *cap;
+
+   cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2));
+   cache->index = cindex;
+
+   /* Scan for all unique band sizes */
+   for (i=0;i<=LM+1;i++)
+   {
+      for (j=0;j<m->nbEBands;j++)
+      {
+         int k;
+         int N = (eBands[j+1]-eBands[j])<<i>>1;
+         cindex[i*m->nbEBands+j] = -1;
+         /* Find other bands that have the same size */
+         for (k=0;k<=i;k++)
+         {
+            int n;
+            for (n=0;n<m->nbEBands && (k!=i || n<j);n++)
+            {
+               if (N == (eBands[n+1]-eBands[n])<<k>>1)
+               {
+                  cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n];
+                  break;
+               }
+            }
+         }
+         if (cache->index[i*m->nbEBands+j] == -1 && N!=0)
+         {
+            int K;
+            entryN[nbEntries] = N;
+            K = 0;
+            while (fits_in32(N,get_pulses(K+1)) && K<MAX_PSEUDO)
+               K++;
+            entryK[nbEntries] = K;
+            cindex[i*m->nbEBands+j] = curr;
+            entryI[nbEntries] = curr;
+
+            curr += K+1;
+            nbEntries++;
+         }
+      }
+   }
+   bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr);
+   cache->bits = bits;
+   cache->size = curr;
+   /* Compute the cache for all unique sizes */
+   for (i=0;i<nbEntries;i++)
+   {
+      unsigned char *ptr = bits+entryI[i];
+      opus_int16 tmp[CELT_MAX_PULSES+1];
+      get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
+      for (j=1;j<=entryK[i];j++)
+         ptr[j] = tmp[get_pulses(j)]-1;
+      ptr[0] = entryK[i];
+   }
+
+   /* Compute the maximum rate for each band at which we'll reliably use as
+       many bits as we ask for. */
+   cache->caps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands);
+   for (i=0;i<=LM;i++)
+   {
+      for (C=1;C<=2;C++)
+      {
+         for (j=0;j<m->nbEBands;j++)
+         {
+            int N0;
+            int max_bits;
+            N0 = m->eBands[j+1]-m->eBands[j];
+            /* N=1 bands only have a sign bit and fine bits. */
+            if (N0<<i == 1)
+               max_bits = C*(1+MAX_FINE_BITS)<<BITRES;
+            else
+            {
+               const unsigned char *pcache;
+               opus_int32           num;
+               opus_int32           den;
+               int                  LM0;
+               int                  N;
+               int                  offset;
+               int                  ndof;
+               int                  qb;
+               int                  k;
+               LM0 = 0;
+               /* Even-sized bands bigger than N=2 can be split one more time.
+                  As of commit 44203907 all bands >1 are even, including custom modes.*/
+               if (N0 > 2)
+               {
+                  N0>>=1;
+                  LM0--;
+               }
+               /* N0=1 bands can't be split down to N<2. */
+               else if (N0 <= 1)
+               {
+                  LM0=IMIN(i,1);
+                  N0<<=LM0;
+               }
+               /* Compute the cost for the lowest-level PVQ of a fully split
+                   band. */
+               pcache = bits + cindex[(LM0+1)*m->nbEBands+j];
+               max_bits = pcache[pcache[0]]+1;
+               /* Add in the cost of coding regular splits. */
+               N = N0;
+               for(k=0;k<i-LM0;k++){
+                  max_bits <<= 1;
+                  /* Offset the number of qtheta bits by log2(N)/2
+                      + QTHETA_OFFSET compared to their "fair share" of
+                      total/N */
+                  offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET;
+                  /* The number of qtheta bits we'll allocate if the remainder
+                      is to be max_bits.
+                     The average measured cost for theta is 0.89701 times qb,
+                      approximated here as 459/512. */
+                  num=459*(opus_int32)((2*N-1)*offset+max_bits);
+                  den=((opus_int32)(2*N-1)<<9)-459;
+                  qb = IMIN((num+(den>>1))/den, 57);
+                  celt_assert(qb >= 0);
+                  max_bits += qb;
+                  N <<= 1;
+               }
+               /* Add in the cost of a stereo split, if necessary. */
+               if (C==2)
+               {
+                  max_bits <<= 1;
+                  offset = ((m->logN[j]+(i<<BITRES))>>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET);
+                  ndof = 2*N-1-(N==2);
+                  /* The average measured cost for theta with the step PDF is
+                      0.95164 times qb, approximated here as 487/512. */
+                  num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset);
+                  den = ((opus_int32)ndof<<9)-(N==2?512:487);
+                  qb = IMIN((num+(den>>1))/den, (N==2?64:61));
+                  celt_assert(qb >= 0);
+                  max_bits += qb;
+               }
+               /* Add the fine bits we'll use. */
+               /* Compensate for the extra DoF in stereo */
+               ndof = C*N + ((C==2 && N>2) ? 1 : 0);
+               /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET
+                   compared to their "fair share" of total/N */
+               offset = ((m->logN[j] + (i<<BITRES))>>1)-FINE_OFFSET;
+               /* N=2 is the only point that doesn't match the curve */
+               if (N==2)
+                  offset += 1<<BITRES>>2;
+               /* The number of fine bits we'll allocate if the remainder is
+                   to be max_bits. */
+               num = max_bits+ndof*offset;
+               den = (ndof-1)<<BITRES;
+               qb = IMIN((num+(den>>1))/den, MAX_FINE_BITS);
+               celt_assert(qb >= 0);
+               max_bits += C*qb<<BITRES;
+            }
+            max_bits = (4*max_bits/(C*((m->eBands[j+1]-m->eBands[j])<<i)))-64;
+            celt_assert(max_bits >= 0);
+            celt_assert(max_bits < 256);
+            *cap++ = (unsigned char)max_bits;
+         }
+      }
+   }
+}
+
+#endif /* CUSTOM_MODES */
+
+#define ALLOC_STEPS 6
+
+static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
+      const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
+      int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
+      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+   opus_int32 psum;
+   int lo, hi;
+   int i, j;
+   int logM;
+   int stereo;
+   int codedBands=-1;
+   int alloc_floor;
+   opus_int32 left, percoeff;
+   int done;
+   opus_int32 balance;
+   SAVE_STACK;
+
+   alloc_floor = C<<BITRES;
+   stereo = C>1;
+
+   logM = LM<<BITRES;
+   lo = 0;
+   hi = 1<<ALLOC_STEPS;
+   for (i=0;i<ALLOC_STEPS;i++)
+   {
+      int mid = (lo+hi)>>1;
+      psum = 0;
+      done = 0;
+      for (j=end;j-->start;)
+      {
+         int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS);
+         if (tmp >= thresh[j] || done)
+         {
+            done = 1;
+            /* Don't allocate more than we can actually use */
+            psum += IMIN(tmp, cap[j]);
+         } else {
+            if (tmp >= alloc_floor)
+               psum += alloc_floor;
+         }
+      }
+      if (psum > total)
+         hi = mid;
+      else
+         lo = mid;
+   }
+   psum = 0;
+   /*printf ("interp bisection gave %d\n", lo);*/
+   done = 0;
+   for (j=end;j-->start;)
+   {
+      int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
+      if (tmp < thresh[j] && !done)
+      {
+         if (tmp >= alloc_floor)
+            tmp = alloc_floor;
+         else
+            tmp = 0;
+      } else
+         done = 1;
+      /* Don't allocate more than we can actually use */
+      tmp = IMIN(tmp, cap[j]);
+      bits[j] = tmp;
+      psum += tmp;
+   }
+
+   /* Decide which bands to skip, working backwards from the end. */
+   for (codedBands=end;;codedBands--)
+   {
+      int band_width;
+      int band_bits;
+      int rem;
+      j = codedBands-1;
+      /* Never skip the first band, nor a band that has been boosted by
+          dynalloc.
+         In the first case, we'd be coding a bit to signal we're going to waste
+          all the other bits.
+         In the second case, we'd be coding a bit to redistribute all the bits
+          we just signaled should be cocentrated in this band. */
+      if (j<=skip_start)
+      {
+         /* Give the bit we reserved to end skipping back. */
+         total += skip_rsv;
+         break;
+      }
+      /*Figure out how many left-over bits we would be adding to this band.
+        This can include bits we've stolen back from higher, skipped bands.*/
+      left = total-psum;
+      percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
+      left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+      rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
+      band_width = m->eBands[codedBands]-m->eBands[j];
+      band_bits = (int)(bits[j] + percoeff*band_width + rem);
+      /*Only code a skip decision if we're above the threshold for this band.
+        Otherwise it is force-skipped.
+        This ensures that we have enough bits to code the skip flag.*/
+      if (band_bits >= IMAX(thresh[j], alloc_floor+(1<<BITRES)))
+      {
+         if (encode)
+         {
+            /*This if() block is the only part of the allocation function that
+               is not a mandatory part of the bitstream: any bands we choose to
+               skip here must be explicitly signaled.*/
+            int depth_threshold;
+            /*We choose a threshold with some hysteresis to keep bands from
+               fluctuating in and out, but we try not to fold below a certain point. */
+            if (codedBands > 17)
+               depth_threshold = j<prev ? 7 : 9;
+            else
+               depth_threshold = 0;
+#ifdef FUZZING
+            if ((rand()&0x1) == 0)
+#else
+            if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
+#endif
+            {
+               ec_enc_bit_logp(ec, 1, 1);
+               break;
+            }
+            ec_enc_bit_logp(ec, 0, 1);
+         } else if (ec_dec_bit_logp(ec, 1)) {
+            break;
+         }
+         /*We used a bit to skip this band.*/
+         psum += 1<<BITRES;
+         band_bits -= 1<<BITRES;
+      }
+      /*Reclaim the bits originally allocated to this band.*/
+      psum -= bits[j]+intensity_rsv;
+      if (intensity_rsv > 0)
+         intensity_rsv = LOG2_FRAC_TABLE[j-start];
+      psum += intensity_rsv;
+      if (band_bits >= alloc_floor)
+      {
+         /*If we have enough for a fine energy bit per channel, use it.*/
+         psum += alloc_floor;
+         bits[j] = alloc_floor;
+      } else {
+         /*Otherwise this band gets nothing at all.*/
+         bits[j] = 0;
+      }
+   }
+
+   celt_assert(codedBands > start);
+   /* Code the intensity and dual stereo parameters. */
+   if (intensity_rsv > 0)
+   {
+      if (encode)
+      {
+         *intensity = IMIN(*intensity, codedBands);
+         ec_enc_uint(ec, *intensity-start, codedBands+1-start);
+      }
+      else
+         *intensity = start+ec_dec_uint(ec, codedBands+1-start);
+   }
+   else
+      *intensity = 0;
+   if (*intensity <= start)
+   {
+      total += dual_stereo_rsv;
+      dual_stereo_rsv = 0;
+   }
+   if (dual_stereo_rsv > 0)
+   {
+      if (encode)
+         ec_enc_bit_logp(ec, *dual_stereo, 1);
+      else
+         *dual_stereo = ec_dec_bit_logp(ec, 1);
+   }
+   else
+      *dual_stereo = 0;
+
+   /* Allocate the remaining bits */
+   left = total-psum;
+   percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
+   left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+   for (j=start;j<codedBands;j++)
+      bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
+   for (j=start;j<codedBands;j++)
+   {
+      int tmp = (int)IMIN(left, m->eBands[j+1]-m->eBands[j]);
+      bits[j] += tmp;
+      left -= tmp;
+   }
+   /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/
+
+   balance = 0;
+   for (j=start;j<codedBands;j++)
+   {
+      int N0, N, den;
+      int offset;
+      int NClogN;
+      opus_int32 excess, bit;
+
+      celt_assert(bits[j] >= 0);
+      N0 = m->eBands[j+1]-m->eBands[j];
+      N=N0<<LM;
+      bit = (opus_int32)bits[j]+balance;
+
+      if (N>1)
+      {
+         excess = MAX32(bit-cap[j],0);
+         bits[j] = bit-excess;
+
+         /* Compensate for the extra DoF in stereo */
+         den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0));
+
+         NClogN = den*(m->logN[j] + logM);
+
+         /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET
+            compared to their "fair share" of total/N */
+         offset = (NClogN>>1)-den*FINE_OFFSET;
+
+         /* N=2 is the only point that doesn't match the curve */
+         if (N==2)
+            offset += den<<BITRES>>2;
+
+         /* Changing the offset for allocating the second and third
+             fine energy bit */
+         if (bits[j] + offset < den*2<<BITRES)
+            offset += NClogN>>2;
+         else if (bits[j] + offset < den*3<<BITRES)
+            offset += NClogN>>3;
+
+         /* Divide with rounding */
+         ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))));
+         ebits[j] = celt_udiv(ebits[j], den)>>BITRES;
+
+         /* Make sure not to bust */
+         if (C*ebits[j] > (bits[j]>>BITRES))
+            ebits[j] = bits[j] >> stereo >> BITRES;
+
+         /* More than that is useless because that's about as far as PVQ can go */
+         ebits[j] = IMIN(ebits[j], MAX_FINE_BITS);
+
+         /* If we rounded down or capped this band, make it a candidate for the
+             final fine energy pass */
+         fine_priority[j] = ebits[j]*(den<<BITRES) >= bits[j]+offset;
+
+         /* Remove the allocated fine bits; the rest are assigned to PVQ */
+         bits[j] -= C*ebits[j]<<BITRES;
+
+      } else {
+         /* For N=1, all bits go to fine energy except for a single sign bit */
+         excess = MAX32(0,bit-(C<<BITRES));
+         bits[j] = bit-excess;
+         ebits[j] = 0;
+         fine_priority[j] = 1;
+      }
+
+      /* Fine energy can't take advantage of the re-balancing in
+          quant_all_bands().
+         Instead, do the re-balancing here.*/
+      if(excess > 0)
+      {
+         int extra_fine;
+         int extra_bits;
+         extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]);
+         ebits[j] += extra_fine;
+         extra_bits = extra_fine*C<<BITRES;
+         fine_priority[j] = extra_bits >= excess-balance;
+         excess -= extra_bits;
+      }
+      balance = excess;
+
+      celt_assert(bits[j] >= 0);
+      celt_assert(ebits[j] >= 0);
+   }
+   /* Save any remaining bits over the cap for the rebalancing in
+       quant_all_bands(). */
+   *_balance = balance;
+
+   /* The skipped bands use all their bits for fine energy. */
+   for (;j<end;j++)
+   {
+      ebits[j] = bits[j] >> stereo >> BITRES;
+      celt_assert(C*ebits[j]<<BITRES == bits[j]);
+      bits[j] = 0;
+      fine_priority[j] = ebits[j]<1;
+   }
+   RESTORE_STACK;
+   return codedBands;
+}
+
+int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+   int lo, hi, len, j;
+   int codedBands;
+   int skip_start;
+   int skip_rsv;
+   int intensity_rsv;
+   int dual_stereo_rsv;
+   VARDECL(int, bits1);
+   VARDECL(int, bits2);
+   VARDECL(int, thresh);
+   VARDECL(int, trim_offset);
+   SAVE_STACK;
+
+   total = IMAX(total, 0);
+   len = m->nbEBands;
+   skip_start = start;
+   /* Reserve a bit to signal the end of manually skipped bands. */
+   skip_rsv = total >= 1<<BITRES ? 1<<BITRES : 0;
+   total -= skip_rsv;
+   /* Reserve bits for the intensity and dual stereo parameters. */
+   intensity_rsv = dual_stereo_rsv = 0;
+   if (C==2)
+   {
+      intensity_rsv = LOG2_FRAC_TABLE[end-start];
+      if (intensity_rsv>total)
+         intensity_rsv = 0;
+      else
+      {
+         total -= intensity_rsv;
+         dual_stereo_rsv = total>=1<<BITRES ? 1<<BITRES : 0;
+         total -= dual_stereo_rsv;
+      }
+   }
+   ALLOC(bits1, len, int);
+   ALLOC(bits2, len, int);
+   ALLOC(thresh, len, int);
+   ALLOC(trim_offset, len, int);
+
+   for (j=start;j<end;j++)
+   {
+      /* Below this threshold, we're sure not to allocate any PVQ bits */
+      thresh[j] = IMAX((C)<<BITRES, (3*(m->eBands[j+1]-m->eBands[j])<<LM<<BITRES)>>4);
+      /* Tilt of the allocation curve */
+      trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1)
+            *(1<<(LM+BITRES))>>6;
+      /* Giving less resolution to single-coefficient bands because they get
+         more benefit from having one coarse value per coefficient*/
+      if ((m->eBands[j+1]-m->eBands[j])<<LM==1)
+         trim_offset[j] -= C<<BITRES;
+   }
+   lo = 1;
+   hi = m->nbAllocVectors - 1;
+   do
+   {
+      int done = 0;
+      int psum = 0;
+      int mid = (lo+hi) >> 1;
+      for (j=end;j-->start;)
+      {
+         int bitsj;
+         int N = m->eBands[j+1]-m->eBands[j];
+         bitsj = C*N*m->allocVectors[mid*len+j]<<LM>>2;
+         if (bitsj > 0)
+            bitsj = IMAX(0, bitsj + trim_offset[j]);
+         bitsj += offsets[j];
+         if (bitsj >= thresh[j] || done)
+         {
+            done = 1;
+            /* Don't allocate more than we can actually use */
+            psum += IMIN(bitsj, cap[j]);
+         } else {
+            if (bitsj >= C<<BITRES)
+               psum += C<<BITRES;
+         }
+      }
+      if (psum > total)
+         hi = mid - 1;
+      else
+         lo = mid + 1;
+      /*printf ("lo = %d, hi = %d\n", lo, hi);*/
+   }
+   while (lo <= hi);
+   hi = lo--;
+   /*printf ("interp between %d and %d\n", lo, hi);*/
+   for (j=start;j<end;j++)
+   {
+      int bits1j, bits2j;
+      int N = m->eBands[j+1]-m->eBands[j];
+      bits1j = C*N*m->allocVectors[lo*len+j]<<LM>>2;
+      bits2j = hi>=m->nbAllocVectors ?
+            cap[j] : C*N*m->allocVectors[hi*len+j]<<LM>>2;
+      if (bits1j > 0)
+         bits1j = IMAX(0, bits1j + trim_offset[j]);
+      if (bits2j > 0)
+         bits2j = IMAX(0, bits2j + trim_offset[j]);
+      if (lo > 0)
+         bits1j += offsets[j];
+      bits2j += offsets[j];
+      if (offsets[j]>0)
+         skip_start = j;
+      bits2j = IMAX(0,bits2j-bits1j);
+      bits1[j] = bits1j;
+      bits2[j] = bits2j;
+   }
+   codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
+         total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
+         pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
+   RESTORE_STACK;
+   return codedBands;
+}
+
diff --git a/libopus/celt/rate.h b/libopus/celt/rate.h
new file mode 100644
index 0000000..fad5e41
--- /dev/null
+++ b/libopus/celt/rate.h
@@ -0,0 +1,101 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef RATE_H
+#define RATE_H
+
+#define MAX_PSEUDO 40
+#define LOG_MAX_PSEUDO 6
+
+#define CELT_MAX_PULSES 128
+
+#define MAX_FINE_BITS 8
+
+#define FINE_OFFSET 21
+#define QTHETA_OFFSET 4
+#define QTHETA_OFFSET_TWOPHASE 16
+
+#include "cwrs.h"
+#include "modes.h"
+
+void compute_pulse_cache(CELTMode *m, int LM);
+
+static OPUS_INLINE int get_pulses(int i)
+{
+   return i<8 ? i : (8 + (i&7)) << ((i>>3)-1);
+}
+
+static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits)
+{
+   int i;
+   int lo, hi;
+   const unsigned char *cache;
+
+   LM++;
+   cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+
+   lo = 0;
+   hi = cache[0];
+   bits--;
+   for (i=0;i<LOG_MAX_PSEUDO;i++)
+   {
+      int mid = (lo+hi+1)>>1;
+      /* OPT: Make sure this is implemented with a conditional move */
+      if ((int)cache[mid] >= bits)
+         hi = mid;
+      else
+         lo = mid;
+   }
+   if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits)
+      return lo;
+   else
+      return hi;
+}
+
+static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
+{
+   const unsigned char *cache;
+
+   LM++;
+   cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+   return pulses == 0 ? 0 : cache[pulses]+1;
+}
+
+/** Compute the pulse allocation, i.e. how many pulses will go in each
+  * band.
+ @param m mode
+ @param offsets Requested increase or decrease in the number of bits for
+                each band
+ @param total Number of bands
+ @param pulses Number of pulses per band (returned)
+ @return Total number of bits allocated
+*/
+int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
+
+#endif
diff --git a/libopus/celt/stack_alloc.h b/libopus/celt/stack_alloc.h
new file mode 100644
index 0000000..2b51c8d
--- /dev/null
+++ b/libopus/celt/stack_alloc.h
@@ -0,0 +1,184 @@
+/* Copyright (C) 2002-2003 Jean-Marc Valin
+   Copyright (C) 2007-2009 Xiph.Org Foundation */
+/**
+   @file stack_alloc.h
+   @brief Temporary memory allocation on stack
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STACK_ALLOC_H
+#define STACK_ALLOC_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
+#error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode."
+#endif
+
+#ifdef USE_ALLOCA
+# ifdef WIN32
+#  include <malloc.h>
+# else
+#  ifdef HAVE_ALLOCA_H
+#   include <alloca.h>
+#  else
+#   include <stdlib.h>
+#  endif
+# endif
+#endif
+
+/**
+ * @def ALIGN(stack, size)
+ *
+ * Aligns the stack to a 'size' boundary
+ *
+ * @param stack Stack
+ * @param size  New size boundary
+ */
+
+/**
+ * @def PUSH(stack, size, type)
+ *
+ * Allocates 'size' elements of type 'type' on the stack
+ *
+ * @param stack Stack
+ * @param size  Number of elements
+ * @param type  Type of element
+ */
+
+/**
+ * @def VARDECL(var)
+ *
+ * Declare variable on stack
+ *
+ * @param var Variable to declare
+ */
+
+/**
+ * @def ALLOC(var, size, type)
+ *
+ * Allocate 'size' elements of 'type' on stack
+ *
+ * @param var  Name of variable to allocate
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+#if defined(VAR_ARRAYS)
+
+#define VARDECL(type, var)
+#define ALLOC(var, size, type) type var[size]
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+/* C99 does not allow VLAs of size zero */
+#define ALLOC_NONE 1
+
+#elif defined(USE_ALLOCA)
+
+#define VARDECL(type, var) type *var
+
+# ifdef WIN32
+#  define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
+# else
+#  define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+# endif
+
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+#define ALLOC_NONE 0
+
+#else
+
+#ifdef CELT_C
+char *scratch_ptr=0;
+char *global_stack=0;
+#else
+extern char *global_stack;
+extern char *scratch_ptr;
+#endif /* CELT_C */
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+
+#ifdef CELT_C
+char *global_stack_top=0;
+#else
+extern char *global_stack_top;
+#endif /* CELT_C */
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char))))
+#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack))
+#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack;
+
+#else
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#if 0 /* Set this to 1 to instrument pseudostack usage */
+#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
+#else
+#define RESTORE_STACK (global_stack = _saved_stack)
+#endif
+#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack;
+
+#endif /* ENABLE_VALGRIND */
+
+#include "os_support.h"
+#define VARDECL(type, var) type *var
+#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
+#define SAVE_STACK char *_saved_stack = global_stack;
+#define ALLOC_NONE 0
+
+#endif /* VAR_ARRAYS */
+
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
+#define OPUS_FPRINTF fprintf
+
+#else
+
+static OPUS_INLINE int _opus_false(void) {return 0;}
+#define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
+#define OPUS_CHECK_VALUE(value) _opus_false()
+#define OPUS_PRINT_INT(value) do{}while(0)
+#define OPUS_FPRINTF (void)
+
+#endif
+
+
+#endif /* STACK_ALLOC_H */
diff --git a/libopus/celt/static_modes_fixed.h b/libopus/celt/static_modes_fixed.h
new file mode 100644
index 0000000..8717d62
--- /dev/null
+++ b/libopus/celt/static_modes_fixed.h
@@ -0,0 +1,892 @@
+/* The contents of this file was automatically generated by dump_modes.c
+   with arguments: 48000 960
+   It contains static definitions for some pre-defined modes. */
+#include "modes.h"
+#include "rate.h"
+
+#ifdef HAVE_ARM_NE10
+#define OVERRIDE_FFT 1
+#include "static_modes_fixed_arm_ne10.h"
+#endif
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+2, 20, 55, 108, 178,
+266, 372, 494, 635, 792,
+966, 1157, 1365, 1590, 1831,
+2089, 2362, 2651, 2956, 3276,
+3611, 3961, 4325, 4703, 5094,
+5499, 5916, 6346, 6788, 7241,
+7705, 8179, 8663, 9156, 9657,
+10167, 10684, 11207, 11736, 12271,
+12810, 13353, 13899, 14447, 14997,
+15547, 16098, 16648, 17197, 17744,
+18287, 18827, 19363, 19893, 20418,
+20936, 21447, 21950, 22445, 22931,
+23407, 23874, 24330, 24774, 25208,
+25629, 26039, 26435, 26819, 27190,
+27548, 27893, 28224, 28541, 28845,
+29135, 29411, 29674, 29924, 30160,
+30384, 30594, 30792, 30977, 31151,
+31313, 31463, 31602, 31731, 31849,
+31958, 32057, 32148, 32229, 32303,
+32370, 32429, 32481, 32528, 32568,
+32604, 32634, 32661, 32683, 32701,
+32717, 32729, 32740, 32748, 32754,
+32758, 32762, 32764, 32766, 32767,
+32767, 32767, 32767, 32767, 32767,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{32767, 0}, {32766, -429},
+{32757, -858}, {32743, -1287},
+{32724, -1715}, {32698, -2143},
+{32667, -2570}, {32631, -2998},
+{32588, -3425}, {32541, -3851},
+{32488, -4277}, {32429, -4701},
+{32364, -5125}, {32295, -5548},
+{32219, -5971}, {32138, -6393},
+{32051, -6813}, {31960, -7231},
+{31863, -7650}, {31760, -8067},
+{31652, -8481}, {31539, -8895},
+{31419, -9306}, {31294, -9716},
+{31165, -10126}, {31030, -10532},
+{30889, -10937}, {30743, -11340},
+{30592, -11741}, {30436, -12141},
+{30274, -12540}, {30107, -12935},
+{29936, -13328}, {29758, -13718},
+{29577, -14107}, {29390, -14493},
+{29197, -14875}, {29000, -15257},
+{28797, -15635}, {28590, -16010},
+{28379, -16384}, {28162, -16753},
+{27940, -17119}, {27714, -17484},
+{27482, -17845}, {27246, -18205},
+{27006, -18560}, {26760, -18911},
+{26510, -19260}, {26257, -19606},
+{25997, -19947}, {25734, -20286},
+{25466, -20621}, {25194, -20952},
+{24918, -21281}, {24637, -21605},
+{24353, -21926}, {24063, -22242},
+{23770, -22555}, {23473, -22865},
+{23171, -23171}, {22866, -23472},
+{22557, -23769}, {22244, -24063},
+{21927, -24352}, {21606, -24636},
+{21282, -24917}, {20954, -25194},
+{20622, -25465}, {20288, -25733},
+{19949, -25997}, {19607, -26255},
+{19261, -26509}, {18914, -26760},
+{18561, -27004}, {18205, -27246},
+{17846, -27481}, {17485, -27713},
+{17122, -27940}, {16755, -28162},
+{16385, -28378}, {16012, -28590},
+{15636, -28797}, {15258, -28999},
+{14878, -29197}, {14494, -29389},
+{14108, -29576}, {13720, -29757},
+{13329, -29934}, {12937, -30107},
+{12540, -30274}, {12142, -30435},
+{11744, -30592}, {11342, -30743},
+{10939, -30889}, {10534, -31030},
+{10127, -31164}, {9718, -31294},
+{9307, -31418}, {8895, -31537},
+{8482, -31652}, {8067, -31759},
+{7650, -31862}, {7233, -31960},
+{6815, -32051}, {6393, -32138},
+{5973, -32219}, {5549, -32294},
+{5127, -32364}, {4703, -32429},
+{4278, -32487}, {3852, -32541},
+{3426, -32588}, {2999, -32630},
+{2572, -32667}, {2144, -32698},
+{1716, -32724}, {1287, -32742},
+{860, -32757}, {430, -32766},
+{0, -32767}, {-429, -32766},
+{-858, -32757}, {-1287, -32743},
+{-1715, -32724}, {-2143, -32698},
+{-2570, -32667}, {-2998, -32631},
+{-3425, -32588}, {-3851, -32541},
+{-4277, -32488}, {-4701, -32429},
+{-5125, -32364}, {-5548, -32295},
+{-5971, -32219}, {-6393, -32138},
+{-6813, -32051}, {-7231, -31960},
+{-7650, -31863}, {-8067, -31760},
+{-8481, -31652}, {-8895, -31539},
+{-9306, -31419}, {-9716, -31294},
+{-10126, -31165}, {-10532, -31030},
+{-10937, -30889}, {-11340, -30743},
+{-11741, -30592}, {-12141, -30436},
+{-12540, -30274}, {-12935, -30107},
+{-13328, -29936}, {-13718, -29758},
+{-14107, -29577}, {-14493, -29390},
+{-14875, -29197}, {-15257, -29000},
+{-15635, -28797}, {-16010, -28590},
+{-16384, -28379}, {-16753, -28162},
+{-17119, -27940}, {-17484, -27714},
+{-17845, -27482}, {-18205, -27246},
+{-18560, -27006}, {-18911, -26760},
+{-19260, -26510}, {-19606, -26257},
+{-19947, -25997}, {-20286, -25734},
+{-20621, -25466}, {-20952, -25194},
+{-21281, -24918}, {-21605, -24637},
+{-21926, -24353}, {-22242, -24063},
+{-22555, -23770}, {-22865, -23473},
+{-23171, -23171}, {-23472, -22866},
+{-23769, -22557}, {-24063, -22244},
+{-24352, -21927}, {-24636, -21606},
+{-24917, -21282}, {-25194, -20954},
+{-25465, -20622}, {-25733, -20288},
+{-25997, -19949}, {-26255, -19607},
+{-26509, -19261}, {-26760, -18914},
+{-27004, -18561}, {-27246, -18205},
+{-27481, -17846}, {-27713, -17485},
+{-27940, -17122}, {-28162, -16755},
+{-28378, -16385}, {-28590, -16012},
+{-28797, -15636}, {-28999, -15258},
+{-29197, -14878}, {-29389, -14494},
+{-29576, -14108}, {-29757, -13720},
+{-29934, -13329}, {-30107, -12937},
+{-30274, -12540}, {-30435, -12142},
+{-30592, -11744}, {-30743, -11342},
+{-30889, -10939}, {-31030, -10534},
+{-31164, -10127}, {-31294, -9718},
+{-31418, -9307}, {-31537, -8895},
+{-31652, -8482}, {-31759, -8067},
+{-31862, -7650}, {-31960, -7233},
+{-32051, -6815}, {-32138, -6393},
+{-32219, -5973}, {-32294, -5549},
+{-32364, -5127}, {-32429, -4703},
+{-32487, -4278}, {-32541, -3852},
+{-32588, -3426}, {-32630, -2999},
+{-32667, -2572}, {-32698, -2144},
+{-32724, -1716}, {-32742, -1287},
+{-32757, -860}, {-32766, -430},
+{-32767, 0}, {-32766, 429},
+{-32757, 858}, {-32743, 1287},
+{-32724, 1715}, {-32698, 2143},
+{-32667, 2570}, {-32631, 2998},
+{-32588, 3425}, {-32541, 3851},
+{-32488, 4277}, {-32429, 4701},
+{-32364, 5125}, {-32295, 5548},
+{-32219, 5971}, {-32138, 6393},
+{-32051, 6813}, {-31960, 7231},
+{-31863, 7650}, {-31760, 8067},
+{-31652, 8481}, {-31539, 8895},
+{-31419, 9306}, {-31294, 9716},
+{-31165, 10126}, {-31030, 10532},
+{-30889, 10937}, {-30743, 11340},
+{-30592, 11741}, {-30436, 12141},
+{-30274, 12540}, {-30107, 12935},
+{-29936, 13328}, {-29758, 13718},
+{-29577, 14107}, {-29390, 14493},
+{-29197, 14875}, {-29000, 15257},
+{-28797, 15635}, {-28590, 16010},
+{-28379, 16384}, {-28162, 16753},
+{-27940, 17119}, {-27714, 17484},
+{-27482, 17845}, {-27246, 18205},
+{-27006, 18560}, {-26760, 18911},
+{-26510, 19260}, {-26257, 19606},
+{-25997, 19947}, {-25734, 20286},
+{-25466, 20621}, {-25194, 20952},
+{-24918, 21281}, {-24637, 21605},
+{-24353, 21926}, {-24063, 22242},
+{-23770, 22555}, {-23473, 22865},
+{-23171, 23171}, {-22866, 23472},
+{-22557, 23769}, {-22244, 24063},
+{-21927, 24352}, {-21606, 24636},
+{-21282, 24917}, {-20954, 25194},
+{-20622, 25465}, {-20288, 25733},
+{-19949, 25997}, {-19607, 26255},
+{-19261, 26509}, {-18914, 26760},
+{-18561, 27004}, {-18205, 27246},
+{-17846, 27481}, {-17485, 27713},
+{-17122, 27940}, {-16755, 28162},
+{-16385, 28378}, {-16012, 28590},
+{-15636, 28797}, {-15258, 28999},
+{-14878, 29197}, {-14494, 29389},
+{-14108, 29576}, {-13720, 29757},
+{-13329, 29934}, {-12937, 30107},
+{-12540, 30274}, {-12142, 30435},
+{-11744, 30592}, {-11342, 30743},
+{-10939, 30889}, {-10534, 31030},
+{-10127, 31164}, {-9718, 31294},
+{-9307, 31418}, {-8895, 31537},
+{-8482, 31652}, {-8067, 31759},
+{-7650, 31862}, {-7233, 31960},
+{-6815, 32051}, {-6393, 32138},
+{-5973, 32219}, {-5549, 32294},
+{-5127, 32364}, {-4703, 32429},
+{-4278, 32487}, {-3852, 32541},
+{-3426, 32588}, {-2999, 32630},
+{-2572, 32667}, {-2144, 32698},
+{-1716, 32724}, {-1287, 32742},
+{-860, 32757}, {-430, 32766},
+{0, 32767}, {429, 32766},
+{858, 32757}, {1287, 32743},
+{1715, 32724}, {2143, 32698},
+{2570, 32667}, {2998, 32631},
+{3425, 32588}, {3851, 32541},
+{4277, 32488}, {4701, 32429},
+{5125, 32364}, {5548, 32295},
+{5971, 32219}, {6393, 32138},
+{6813, 32051}, {7231, 31960},
+{7650, 31863}, {8067, 31760},
+{8481, 31652}, {8895, 31539},
+{9306, 31419}, {9716, 31294},
+{10126, 31165}, {10532, 31030},
+{10937, 30889}, {11340, 30743},
+{11741, 30592}, {12141, 30436},
+{12540, 30274}, {12935, 30107},
+{13328, 29936}, {13718, 29758},
+{14107, 29577}, {14493, 29390},
+{14875, 29197}, {15257, 29000},
+{15635, 28797}, {16010, 28590},
+{16384, 28379}, {16753, 28162},
+{17119, 27940}, {17484, 27714},
+{17845, 27482}, {18205, 27246},
+{18560, 27006}, {18911, 26760},
+{19260, 26510}, {19606, 26257},
+{19947, 25997}, {20286, 25734},
+{20621, 25466}, {20952, 25194},
+{21281, 24918}, {21605, 24637},
+{21926, 24353}, {22242, 24063},
+{22555, 23770}, {22865, 23473},
+{23171, 23171}, {23472, 22866},
+{23769, 22557}, {24063, 22244},
+{24352, 21927}, {24636, 21606},
+{24917, 21282}, {25194, 20954},
+{25465, 20622}, {25733, 20288},
+{25997, 19949}, {26255, 19607},
+{26509, 19261}, {26760, 18914},
+{27004, 18561}, {27246, 18205},
+{27481, 17846}, {27713, 17485},
+{27940, 17122}, {28162, 16755},
+{28378, 16385}, {28590, 16012},
+{28797, 15636}, {28999, 15258},
+{29197, 14878}, {29389, 14494},
+{29576, 14108}, {29757, 13720},
+{29934, 13329}, {30107, 12937},
+{30274, 12540}, {30435, 12142},
+{30592, 11744}, {30743, 11342},
+{30889, 10939}, {31030, 10534},
+{31164, 10127}, {31294, 9718},
+{31418, 9307}, {31537, 8895},
+{31652, 8482}, {31759, 8067},
+{31862, 7650}, {31960, 7233},
+{32051, 6815}, {32138, 6393},
+{32219, 5973}, {32294, 5549},
+{32364, 5127}, {32429, 4703},
+{32487, 4278}, {32541, 3852},
+{32588, 3426}, {32630, 2999},
+{32667, 2572}, {32698, 2144},
+{32724, 1716}, {32742, 1287},
+{32757, 860}, {32766, 430},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
+8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
+16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
+24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
+4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
+12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
+20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
+28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
+1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
+9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
+17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
+25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
+5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
+13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
+21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
+29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
+2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
+10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
+18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
+26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
+6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
+14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
+22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
+30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
+3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
+11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
+19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
+27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
+7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
+15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
+23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
+31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
+4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
+8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
+12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
+1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
+5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
+9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
+13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
+2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
+6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
+10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
+14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
+3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
+7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
+11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
+15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
+4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
+1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
+5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
+2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
+6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
+3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
+7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
+1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
+2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
+3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480,    /* nfft */
+17476,    /* scale */
+8,      /* scale_shift */
+-1,     /* shift */
+{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev480,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_480,
+#else
+NULL,
+#endif
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240,    /* nfft */
+17476,    /* scale */
+7,      /* scale_shift */
+1,      /* shift */
+{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev240,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_240,
+#else
+NULL,
+#endif
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120,    /* nfft */
+17476,    /* scale */
+6,      /* scale_shift */
+2,      /* shift */
+{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev120,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_120,
+#else
+NULL,
+#endif
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60,     /* nfft */
+17476,    /* scale */
+5,      /* scale_shift */
+3,      /* shift */
+{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev60,   /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_60,
+#else
+NULL,
+#endif
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[1800] = {
+32767, 32767, 32767, 32766, 32765,
+32763, 32761, 32759, 32756, 32753,
+32750, 32746, 32742, 32738, 32733,
+32728, 32722, 32717, 32710, 32704,
+32697, 32690, 32682, 32674, 32666,
+32657, 32648, 32639, 32629, 32619,
+32609, 32598, 32587, 32576, 32564,
+32552, 32539, 32526, 32513, 32500,
+32486, 32472, 32457, 32442, 32427,
+32411, 32395, 32379, 32362, 32345,
+32328, 32310, 32292, 32274, 32255,
+32236, 32217, 32197, 32177, 32157,
+32136, 32115, 32093, 32071, 32049,
+32027, 32004, 31981, 31957, 31933,
+31909, 31884, 31859, 31834, 31809,
+31783, 31756, 31730, 31703, 31676,
+31648, 31620, 31592, 31563, 31534,
+31505, 31475, 31445, 31415, 31384,
+31353, 31322, 31290, 31258, 31226,
+31193, 31160, 31127, 31093, 31059,
+31025, 30990, 30955, 30920, 30884,
+30848, 30812, 30775, 30738, 30701,
+30663, 30625, 30587, 30548, 30509,
+30470, 30430, 30390, 30350, 30309,
+30269, 30227, 30186, 30144, 30102,
+30059, 30016, 29973, 29930, 29886,
+29842, 29797, 29752, 29707, 29662,
+29616, 29570, 29524, 29477, 29430,
+29383, 29335, 29287, 29239, 29190,
+29142, 29092, 29043, 28993, 28943,
+28892, 28842, 28791, 28739, 28688,
+28636, 28583, 28531, 28478, 28425,
+28371, 28317, 28263, 28209, 28154,
+28099, 28044, 27988, 27932, 27876,
+27820, 27763, 27706, 27648, 27591,
+27533, 27474, 27416, 27357, 27298,
+27238, 27178, 27118, 27058, 26997,
+26936, 26875, 26814, 26752, 26690,
+26628, 26565, 26502, 26439, 26375,
+26312, 26247, 26183, 26119, 26054,
+25988, 25923, 25857, 25791, 25725,
+25658, 25592, 25524, 25457, 25389,
+25322, 25253, 25185, 25116, 25047,
+24978, 24908, 24838, 24768, 24698,
+24627, 24557, 24485, 24414, 24342,
+24270, 24198, 24126, 24053, 23980,
+23907, 23834, 23760, 23686, 23612,
+23537, 23462, 23387, 23312, 23237,
+23161, 23085, 23009, 22932, 22856,
+22779, 22701, 22624, 22546, 22468,
+22390, 22312, 22233, 22154, 22075,
+21996, 21916, 21836, 21756, 21676,
+21595, 21515, 21434, 21352, 21271,
+21189, 21107, 21025, 20943, 20860,
+20777, 20694, 20611, 20528, 20444,
+20360, 20276, 20192, 20107, 20022,
+19937, 19852, 19767, 19681, 19595,
+19509, 19423, 19336, 19250, 19163,
+19076, 18988, 18901, 18813, 18725,
+18637, 18549, 18460, 18372, 18283,
+18194, 18104, 18015, 17925, 17835,
+17745, 17655, 17565, 17474, 17383,
+17292, 17201, 17110, 17018, 16927,
+16835, 16743, 16650, 16558, 16465,
+16372, 16279, 16186, 16093, 15999,
+15906, 15812, 15718, 15624, 15529,
+15435, 15340, 15245, 15150, 15055,
+14960, 14864, 14769, 14673, 14577,
+14481, 14385, 14288, 14192, 14095,
+13998, 13901, 13804, 13706, 13609,
+13511, 13414, 13316, 13218, 13119,
+13021, 12923, 12824, 12725, 12626,
+12527, 12428, 12329, 12230, 12130,
+12030, 11930, 11831, 11730, 11630,
+11530, 11430, 11329, 11228, 11128,
+11027, 10926, 10824, 10723, 10622,
+10520, 10419, 10317, 10215, 10113,
+10011, 9909, 9807, 9704, 9602,
+9499, 9397, 9294, 9191, 9088,
+8985, 8882, 8778, 8675, 8572,
+8468, 8364, 8261, 8157, 8053,
+7949, 7845, 7741, 7637, 7532,
+7428, 7323, 7219, 7114, 7009,
+6905, 6800, 6695, 6590, 6485,
+6380, 6274, 6169, 6064, 5958,
+5853, 5747, 5642, 5536, 5430,
+5325, 5219, 5113, 5007, 4901,
+4795, 4689, 4583, 4476, 4370,
+4264, 4157, 4051, 3945, 3838,
+3732, 3625, 3518, 3412, 3305,
+3198, 3092, 2985, 2878, 2771,
+2664, 2558, 2451, 2344, 2237,
+2130, 2023, 1916, 1809, 1702,
+1594, 1487, 1380, 1273, 1166,
+1059, 952, 844, 737, 630,
+523, 416, 308, 201, 94,
+-13, -121, -228, -335, -442,
+-550, -657, -764, -871, -978,
+-1086, -1193, -1300, -1407, -1514,
+-1621, -1728, -1835, -1942, -2049,
+-2157, -2263, -2370, -2477, -2584,
+-2691, -2798, -2905, -3012, -3118,
+-3225, -3332, -3439, -3545, -3652,
+-3758, -3865, -3971, -4078, -4184,
+-4290, -4397, -4503, -4609, -4715,
+-4821, -4927, -5033, -5139, -5245,
+-5351, -5457, -5562, -5668, -5774,
+-5879, -5985, -6090, -6195, -6301,
+-6406, -6511, -6616, -6721, -6826,
+-6931, -7036, -7140, -7245, -7349,
+-7454, -7558, -7663, -7767, -7871,
+-7975, -8079, -8183, -8287, -8390,
+-8494, -8597, -8701, -8804, -8907,
+-9011, -9114, -9217, -9319, -9422,
+-9525, -9627, -9730, -9832, -9934,
+-10037, -10139, -10241, -10342, -10444,
+-10546, -10647, -10748, -10850, -10951,
+-11052, -11153, -11253, -11354, -11455,
+-11555, -11655, -11756, -11856, -11955,
+-12055, -12155, -12254, -12354, -12453,
+-12552, -12651, -12750, -12849, -12947,
+-13046, -13144, -13242, -13340, -13438,
+-13536, -13633, -13731, -13828, -13925,
+-14022, -14119, -14216, -14312, -14409,
+-14505, -14601, -14697, -14793, -14888,
+-14984, -15079, -15174, -15269, -15364,
+-15459, -15553, -15647, -15741, -15835,
+-15929, -16023, -16116, -16210, -16303,
+-16396, -16488, -16581, -16673, -16766,
+-16858, -16949, -17041, -17133, -17224,
+-17315, -17406, -17497, -17587, -17678,
+-17768, -17858, -17948, -18037, -18127,
+-18216, -18305, -18394, -18483, -18571,
+-18659, -18747, -18835, -18923, -19010,
+-19098, -19185, -19271, -19358, -19444,
+-19531, -19617, -19702, -19788, -19873,
+-19959, -20043, -20128, -20213, -20297,
+-20381, -20465, -20549, -20632, -20715,
+-20798, -20881, -20963, -21046, -21128,
+-21210, -21291, -21373, -21454, -21535,
+-21616, -21696, -21776, -21856, -21936,
+-22016, -22095, -22174, -22253, -22331,
+-22410, -22488, -22566, -22643, -22721,
+-22798, -22875, -22951, -23028, -23104,
+-23180, -23256, -23331, -23406, -23481,
+-23556, -23630, -23704, -23778, -23852,
+-23925, -23998, -24071, -24144, -24216,
+-24288, -24360, -24432, -24503, -24574,
+-24645, -24716, -24786, -24856, -24926,
+-24995, -25064, -25133, -25202, -25270,
+-25339, -25406, -25474, -25541, -25608,
+-25675, -25742, -25808, -25874, -25939,
+-26005, -26070, -26135, -26199, -26264,
+-26327, -26391, -26455, -26518, -26581,
+-26643, -26705, -26767, -26829, -26891,
+-26952, -27013, -27073, -27133, -27193,
+-27253, -27312, -27372, -27430, -27489,
+-27547, -27605, -27663, -27720, -27777,
+-27834, -27890, -27946, -28002, -28058,
+-28113, -28168, -28223, -28277, -28331,
+-28385, -28438, -28491, -28544, -28596,
+-28649, -28701, -28752, -28803, -28854,
+-28905, -28955, -29006, -29055, -29105,
+-29154, -29203, -29251, -29299, -29347,
+-29395, -29442, -29489, -29535, -29582,
+-29628, -29673, -29719, -29764, -29808,
+-29853, -29897, -29941, -29984, -30027,
+-30070, -30112, -30154, -30196, -30238,
+-30279, -30320, -30360, -30400, -30440,
+-30480, -30519, -30558, -30596, -30635,
+-30672, -30710, -30747, -30784, -30821,
+-30857, -30893, -30929, -30964, -30999,
+-31033, -31068, -31102, -31135, -31168,
+-31201, -31234, -31266, -31298, -31330,
+-31361, -31392, -31422, -31453, -31483,
+-31512, -31541, -31570, -31599, -31627,
+-31655, -31682, -31710, -31737, -31763,
+-31789, -31815, -31841, -31866, -31891,
+-31915, -31939, -31963, -31986, -32010,
+-32032, -32055, -32077, -32099, -32120,
+-32141, -32162, -32182, -32202, -32222,
+-32241, -32260, -32279, -32297, -32315,
+-32333, -32350, -32367, -32383, -32399,
+-32415, -32431, -32446, -32461, -32475,
+-32489, -32503, -32517, -32530, -32542,
+-32555, -32567, -32579, -32590, -32601,
+-32612, -32622, -32632, -32641, -32651,
+-32659, -32668, -32676, -32684, -32692,
+-32699, -32706, -32712, -32718, -32724,
+-32729, -32734, -32739, -32743, -32747,
+-32751, -32754, -32757, -32760, -32762,
+-32764, -32765, -32767, -32767, -32767,
+32767, 32767, 32765, 32761, 32756,
+32750, 32742, 32732, 32722, 32710,
+32696, 32681, 32665, 32647, 32628,
+32608, 32586, 32562, 32538, 32512,
+32484, 32455, 32425, 32393, 32360,
+32326, 32290, 32253, 32214, 32174,
+32133, 32090, 32046, 32001, 31954,
+31906, 31856, 31805, 31753, 31700,
+31645, 31588, 31530, 31471, 31411,
+31349, 31286, 31222, 31156, 31089,
+31020, 30951, 30880, 30807, 30733,
+30658, 30582, 30504, 30425, 30345,
+30263, 30181, 30096, 30011, 29924,
+29836, 29747, 29656, 29564, 29471,
+29377, 29281, 29184, 29086, 28987,
+28886, 28784, 28681, 28577, 28471,
+28365, 28257, 28147, 28037, 27925,
+27812, 27698, 27583, 27467, 27349,
+27231, 27111, 26990, 26868, 26744,
+26620, 26494, 26367, 26239, 26110,
+25980, 25849, 25717, 25583, 25449,
+25313, 25176, 25038, 24900, 24760,
+24619, 24477, 24333, 24189, 24044,
+23898, 23751, 23602, 23453, 23303,
+23152, 22999, 22846, 22692, 22537,
+22380, 22223, 22065, 21906, 21746,
+21585, 21423, 21261, 21097, 20933,
+20767, 20601, 20434, 20265, 20096,
+19927, 19756, 19584, 19412, 19239,
+19065, 18890, 18714, 18538, 18361,
+18183, 18004, 17824, 17644, 17463,
+17281, 17098, 16915, 16731, 16546,
+16361, 16175, 15988, 15800, 15612,
+15423, 15234, 15043, 14852, 14661,
+14469, 14276, 14083, 13889, 13694,
+13499, 13303, 13107, 12910, 12713,
+12515, 12317, 12118, 11918, 11718,
+11517, 11316, 11115, 10913, 10710,
+10508, 10304, 10100, 9896, 9691,
+9486, 9281, 9075, 8869, 8662,
+8455, 8248, 8040, 7832, 7623,
+7415, 7206, 6996, 6787, 6577,
+6366, 6156, 5945, 5734, 5523,
+5311, 5100, 4888, 4675, 4463,
+4251, 4038, 3825, 3612, 3399,
+3185, 2972, 2758, 2544, 2330,
+2116, 1902, 1688, 1474, 1260,
+1045, 831, 617, 402, 188,
+-27, -241, -456, -670, -885,
+-1099, -1313, -1528, -1742, -1956,
+-2170, -2384, -2598, -2811, -3025,
+-3239, -3452, -3665, -3878, -4091,
+-4304, -4516, -4728, -4941, -5153,
+-5364, -5576, -5787, -5998, -6209,
+-6419, -6629, -6839, -7049, -7258,
+-7467, -7676, -7884, -8092, -8300,
+-8507, -8714, -8920, -9127, -9332,
+-9538, -9743, -9947, -10151, -10355,
+-10558, -10761, -10963, -11165, -11367,
+-11568, -11768, -11968, -12167, -12366,
+-12565, -12762, -12960, -13156, -13352,
+-13548, -13743, -13937, -14131, -14324,
+-14517, -14709, -14900, -15091, -15281,
+-15470, -15659, -15847, -16035, -16221,
+-16407, -16593, -16777, -16961, -17144,
+-17326, -17508, -17689, -17869, -18049,
+-18227, -18405, -18582, -18758, -18934,
+-19108, -19282, -19455, -19627, -19799,
+-19969, -20139, -20308, -20475, -20642,
+-20809, -20974, -21138, -21301, -21464,
+-21626, -21786, -21946, -22105, -22263,
+-22420, -22575, -22730, -22884, -23037,
+-23189, -23340, -23490, -23640, -23788,
+-23935, -24080, -24225, -24369, -24512,
+-24654, -24795, -24934, -25073, -25211,
+-25347, -25482, -25617, -25750, -25882,
+-26013, -26143, -26272, -26399, -26526,
+-26651, -26775, -26898, -27020, -27141,
+-27260, -27379, -27496, -27612, -27727,
+-27841, -27953, -28065, -28175, -28284,
+-28391, -28498, -28603, -28707, -28810,
+-28911, -29012, -29111, -29209, -29305,
+-29401, -29495, -29587, -29679, -29769,
+-29858, -29946, -30032, -30118, -30201,
+-30284, -30365, -30445, -30524, -30601,
+-30677, -30752, -30825, -30897, -30968,
+-31038, -31106, -31172, -31238, -31302,
+-31365, -31426, -31486, -31545, -31602,
+-31658, -31713, -31766, -31818, -31869,
+-31918, -31966, -32012, -32058, -32101,
+-32144, -32185, -32224, -32262, -32299,
+-32335, -32369, -32401, -32433, -32463,
+-32491, -32518, -32544, -32568, -32591,
+-32613, -32633, -32652, -32669, -32685,
+-32700, -32713, -32724, -32735, -32744,
+-32751, -32757, -32762, -32766, -32767,
+32767, 32764, 32755, 32741, 32720,
+32694, 32663, 32626, 32583, 32535,
+32481, 32421, 32356, 32286, 32209,
+32128, 32041, 31948, 31850, 31747,
+31638, 31523, 31403, 31278, 31148,
+31012, 30871, 30724, 30572, 30415,
+30253, 30086, 29913, 29736, 29553,
+29365, 29172, 28974, 28771, 28564,
+28351, 28134, 27911, 27684, 27452,
+27216, 26975, 26729, 26478, 26223,
+25964, 25700, 25432, 25159, 24882,
+24601, 24315, 24026, 23732, 23434,
+23133, 22827, 22517, 22204, 21886,
+21565, 21240, 20912, 20580, 20244,
+19905, 19563, 19217, 18868, 18516,
+18160, 17802, 17440, 17075, 16708,
+16338, 15964, 15588, 15210, 14829,
+14445, 14059, 13670, 13279, 12886,
+12490, 12093, 11693, 11291, 10888,
+10482, 10075, 9666, 9255, 8843,
+8429, 8014, 7597, 7180, 6760,
+6340, 5919, 5496, 5073, 4649,
+4224, 3798, 3372, 2945, 2517,
+2090, 1661, 1233, 804, 375,
+-54, -483, -911, -1340, -1768,
+-2197, -2624, -3052, -3479, -3905,
+-4330, -4755, -5179, -5602, -6024,
+-6445, -6865, -7284, -7702, -8118,
+-8533, -8946, -9358, -9768, -10177,
+-10584, -10989, -11392, -11793, -12192,
+-12589, -12984, -13377, -13767, -14155,
+-14541, -14924, -15305, -15683, -16058,
+-16430, -16800, -17167, -17531, -17892,
+-18249, -18604, -18956, -19304, -19649,
+-19990, -20329, -20663, -20994, -21322,
+-21646, -21966, -22282, -22595, -22904,
+-23208, -23509, -23806, -24099, -24387,
+-24672, -24952, -25228, -25499, -25766,
+-26029, -26288, -26541, -26791, -27035,
+-27275, -27511, -27741, -27967, -28188,
+-28405, -28616, -28823, -29024, -29221,
+-29412, -29599, -29780, -29957, -30128,
+-30294, -30455, -30611, -30761, -30906,
+-31046, -31181, -31310, -31434, -31552,
+-31665, -31773, -31875, -31972, -32063,
+-32149, -32229, -32304, -32373, -32437,
+-32495, -32547, -32594, -32635, -32671,
+-32701, -32726, -32745, -32758, -32766,
+32767, 32754, 32717, 32658, 32577,
+32473, 32348, 32200, 32029, 31837,
+31624, 31388, 31131, 30853, 30553,
+30232, 29891, 29530, 29148, 28746,
+28324, 27883, 27423, 26944, 26447,
+25931, 25398, 24847, 24279, 23695,
+23095, 22478, 21846, 21199, 20538,
+19863, 19174, 18472, 17757, 17030,
+16291, 15541, 14781, 14010, 13230,
+12441, 11643, 10837, 10024, 9204,
+8377, 7545, 6708, 5866, 5020,
+4171, 3319, 2464, 1608, 751,
+-107, -965, -1822, -2678, -3532,
+-4383, -5232, -6077, -6918, -7754,
+-8585, -9409, -10228, -11039, -11843,
+-12639, -13426, -14204, -14972, -15730,
+-16477, -17213, -17937, -18648, -19347,
+-20033, -20705, -21363, -22006, -22634,
+-23246, -23843, -24423, -24986, -25533,
+-26062, -26573, -27066, -27540, -27995,
+-28431, -28848, -29245, -29622, -29979,
+-30315, -30630, -30924, -31197, -31449,
+-31679, -31887, -32074, -32239, -32381,
+-32501, -32600, -32675, -32729, -32759,
+};
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000,  /* Fs */
+120,    /* overlap */
+21,     /* nbEBands */
+21,     /* effEBands */
+{27853, 0, 4096, 8192, },       /* preemph */
+eband5ms,       /* eBands */
+3,      /* maxLM */
+8,      /* nbShortMdcts */
+120,    /* shortMdctSize */
+11,     /* nbAllocVectors */
+band_allocation,        /* allocVectors */
+logN400,        /* logN */
+window120,      /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960},    /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50},       /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/libopus/celt/static_modes_fixed_arm_ne10.h b/libopus/celt/static_modes_fixed_arm_ne10.h
new file mode 100644
index 0000000..7623092
--- /dev/null
+++ b/libopus/celt/static_modes_fixed_arm_ne10.h
@@ -0,0 +1,388 @@
+/* The contents of this file was automatically generated by
+ * dump_mode_arm_ne10.c with arguments: 48000 960
+ * It contains static definitions for some pre-defined modes. */
+#include <NE10_types.h>
+
+#ifndef NE10_FFT_PARAMS48000_960
+#define NE10_FFT_PARAMS48000_960
+static const ne10_int32_t ne10_factors_480[64] = {
+4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_240[64] = {
+3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_120[64] = {
+3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_60[64] = {
+2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
+{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
+{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
+{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
+{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
+{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
+{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
+{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
+{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
+{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
+{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
+{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
+{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
+{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
+{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
+{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
+{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
+{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
+{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
+{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
+{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
+{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
+{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
+{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
+{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570},
+{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154},
+{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172},
+{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068},
+{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822},
+{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146},
+{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682},
+{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231},
+{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791},
+{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029},
+{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313},
+{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783},
+{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661},
+{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541},
+{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450},
+{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914},
+{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194},
+{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807},
+{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067},
+{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658},
+{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677},
+{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704},
+{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277},
+{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512},
+{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510},
+{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682},
+{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424},
+{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524},
+{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195},
+{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164},
+{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771},
+{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961},
+{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705},
+{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540},
+{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994},
+{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540},
+{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646},
+{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814},
+{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593},
+{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667},
+{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
+{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
+{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
+{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
+{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
+{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
+{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
+{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
+{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
+{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
+{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
+{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
+{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
+{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
+{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
+{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
+{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
+{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
+{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
+{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
+{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584},
+{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622},
+{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955},
+{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651},
+{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703},
+{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114},
+{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330},
+{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403},
+{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078},
+{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372},
+{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738},
+{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248},
+{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400},
+{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552},
+{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141},
+{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553},
+{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377},
+{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448},
+{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240},
+{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574},
+{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630},
+{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288},
+{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960},
+{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934},
+{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097},
+{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189},
+{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282},
+{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280},
+{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875},
+{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603},
+{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339},
+{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386},
+{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674},
+{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010},
+{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564},
+{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860},
+{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118},
+{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768},
+{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839},
+{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595},
+{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001},
+{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837},
+{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918},
+{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354},
+{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119},
+{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555},
+{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188},
+{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928},
+{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484},
+{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706},
+{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252},
+{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149},
+{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287},
+{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850},
+{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059},
+{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202},
+{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458},
+{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823},
+{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542},
+{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015},
+};
+static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
+{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
+{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
+{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
+{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
+{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
+{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
+{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
+{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
+{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
+{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
+{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
+{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
+{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
+{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
+{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
+{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
+{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
+{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
+{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
+{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
+{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
+{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
+{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
+{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
+{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
+{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
+{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
+{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
+{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
+{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
+{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
+{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
+{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
+{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
+{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496},
+{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855},
+{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883},
+{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330},
+{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268},
+{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035},
+{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910},
+{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276},
+{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785},
+{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298},
+{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465},
+{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435},
+{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248},
+{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368},
+{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912},
+{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536},
+{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647},
+{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493},
+{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827},
+{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413},
+{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096},
+{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084},
+{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406},
+{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881},
+{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895},
+{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257},
+{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506},
+{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107},
+{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239},
+{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668},
+};
+static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
+{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
+{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
+{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
+{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
+{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
+{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
+{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
+{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
+{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
+{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
+{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
+{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
+{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
+{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
+{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
+{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
+{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
+{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
+{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
+{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
+{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
+{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
+{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
+};
+static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = {
+{0,0}, {2147483647,0}, {2147483647,0},
+{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
+{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
+{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
+{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
+{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
+{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
+{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
+{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
+{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
+{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
+{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
+{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
+{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
+{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
+{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
+{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
+{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
+{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
+{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
+};
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = {
+120,
+(ne10_int32_t *)ne10_factors_480,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_480,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120],
+};
+static const arch_fft_state cfg_arch_480 = {
+1,
+(void *)&ne10_fft_state_int32_t_480,
+};
+
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = {
+60,
+(ne10_int32_t *)ne10_factors_240,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_240,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60],
+};
+static const arch_fft_state cfg_arch_240 = {
+1,
+(void *)&ne10_fft_state_int32_t_240,
+};
+
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = {
+30,
+(ne10_int32_t *)ne10_factors_120,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_120,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30],
+};
+static const arch_fft_state cfg_arch_120 = {
+1,
+(void *)&ne10_fft_state_int32_t_120,
+};
+
+static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = {
+15,
+(ne10_int32_t *)ne10_factors_60,
+(ne10_fft_cpx_int32_t *)ne10_twiddles_60,
+NULL,
+(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15],
+};
+static const arch_fft_state cfg_arch_60 = {
+1,
+(void *)&ne10_fft_state_int32_t_60,
+};
+
+#endif  /* end NE10_FFT_PARAMS48000_960 */
diff --git a/libopus/celt/static_modes_float.h b/libopus/celt/static_modes_float.h
new file mode 100644
index 0000000..e102a38
--- /dev/null
+++ b/libopus/celt/static_modes_float.h
@@ -0,0 +1,888 @@
+/* The contents of this file was automatically generated by dump_modes.c
+   with arguments: 48000 960
+   It contains static definitions for some pre-defined modes. */
+#include "modes.h"
+#include "rate.h"
+
+#ifdef HAVE_ARM_NE10
+#define OVERRIDE_FFT 1
+#include "static_modes_float_arm_ne10.h"
+#endif
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
+0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
+0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
+0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f,
+0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f,
+0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f,
+0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f,
+0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f,
+0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f,
+0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f,
+0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f,
+0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f,
+0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f,
+0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f,
+0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f,
+0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f,
+0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f,
+0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f,
+0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f,
+0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f,
+0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
+0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
+0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
+0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f},
+{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f},
+{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f},
+{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f},
+{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f},
+{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f},
+{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f},
+{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f},
+{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f},
+{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f},
+{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f},
+{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f},
+{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f},
+{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f},
+{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f},
+{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f},
+{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f},
+{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f},
+{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f},
+{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f},
+{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f},
+{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f},
+{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f},
+{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f},
+{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f},
+{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f},
+{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f},
+{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f},
+{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f},
+{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f},
+{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f},
+{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f},
+{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f},
+{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f},
+{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f},
+{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f},
+{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f},
+{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f},
+{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f},
+{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f},
+{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f},
+{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f},
+{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f},
+{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f},
+{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f},
+{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f},
+{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f},
+{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f},
+{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f},
+{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f},
+{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f},
+{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f},
+{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f},
+{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f},
+{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f},
+{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f},
+{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f},
+{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f},
+{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f},
+{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f},
+{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f},
+{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f},
+{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f},
+{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f},
+{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f},
+{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f},
+{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f},
+{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f},
+{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f},
+{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f},
+{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f},
+{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f},
+{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f},
+{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f},
+{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f},
+{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f},
+{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f},
+{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f},
+{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f},
+{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f},
+{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f},
+{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f},
+{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f},
+{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f},
+{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f},
+{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f},
+{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f},
+{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f},
+{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f},
+{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f},
+{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f},
+{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f},
+{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f},
+{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f},
+{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f},
+{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f},
+{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f},
+{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f},
+{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f},
+{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f},
+{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f},
+{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f},
+{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f},
+{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f},
+{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f},
+{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f},
+{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f},
+{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f},
+{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f},
+{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f},
+{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f},
+{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f},
+{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f},
+{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f},
+{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f},
+{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f},
+{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f},
+{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f},
+{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f},
+{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f},
+{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f},
+{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f},
+{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f},
+{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f},
+{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f},
+{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f},
+{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f},
+{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f},
+{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f},
+{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f},
+{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f},
+{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f},
+{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f},
+{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f},
+{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f},
+{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f},
+{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f},
+{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f},
+{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f},
+{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f},
+{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f},
+{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f},
+{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f},
+{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f},
+{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f},
+{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f},
+{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f},
+{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f},
+{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f},
+{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f},
+{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f},
+{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f},
+{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f},
+{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f},
+{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f},
+{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f},
+{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f},
+{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f},
+{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f},
+{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f},
+{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f},
+{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f},
+{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f},
+{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f},
+{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f},
+{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f},
+{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f},
+{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f},
+{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f},
+{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f},
+{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f},
+{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f},
+{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f},
+{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f},
+{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f},
+{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f},
+{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f},
+{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f},
+{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f},
+{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f},
+{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f},
+{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f},
+{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f},
+{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f},
+{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f},
+{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f},
+{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f},
+{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f},
+{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f},
+{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f},
+{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f},
+{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f},
+{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f},
+{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f},
+{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f},
+{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f},
+{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f},
+{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f},
+{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f},
+{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f},
+{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f},
+{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f},
+{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f},
+{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f},
+{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f},
+{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f},
+{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f},
+{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f},
+{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f},
+{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f},
+{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f},
+{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f},
+{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f},
+{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f},
+{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f},
+{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f},
+{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f},
+{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f},
+{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f},
+{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f},
+{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f},
+{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f},
+{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f},
+{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f},
+{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f},
+{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f},
+{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f},
+{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f},
+{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f},
+{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f},
+{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f},
+{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f},
+{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f},
+{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f},
+{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f},
+{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f},
+{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f},
+{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f},
+{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f},
+{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
+8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
+16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
+24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
+4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
+12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
+20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
+28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
+1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
+9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
+17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
+25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
+5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
+13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
+21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
+29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
+2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
+10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
+18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
+26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
+6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
+14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
+22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
+30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
+3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
+11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
+19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
+27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
+7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
+15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
+23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
+31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
+4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
+8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
+12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
+1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
+5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
+9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
+13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
+2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
+6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
+10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
+14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
+3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
+7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
+11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
+15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
+4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
+1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
+5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
+2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
+6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
+3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
+7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
+1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
+2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
+3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480,    /* nfft */
+0.002083333f,   /* scale */
+-1,     /* shift */
+{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev480,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_480,
+#else
+NULL,
+#endif
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240,    /* nfft */
+0.004166667f,   /* scale */
+1,      /* shift */
+{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev240,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_240,
+#else
+NULL,
+#endif
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120,    /* nfft */
+0.008333333f,   /* scale */
+2,      /* shift */
+{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev120,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_120,
+#else
+NULL,
+#endif
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60,     /* nfft */
+0.016666667f,   /* scale */
+3,      /* shift */
+{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev60,   /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+#ifdef OVERRIDE_FFT
+(arch_fft_state *)&cfg_arch_60,
+#else
+NULL,
+#endif
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[1800] = {
+0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f,
+0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f,
+0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f,
+0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f,
+0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f,
+0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f,
+0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f,
+0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f,
+0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f,
+0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f,
+0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f,
+0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f,
+0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f,
+0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f,
+0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f,
+0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f,
+0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f,
+0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f,
+0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f,
+0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f,
+0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f,
+0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f,
+0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f,
+0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f,
+0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f,
+0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f,
+0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f,
+0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f,
+0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f,
+0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f,
+0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f,
+0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f,
+0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f,
+0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f,
+0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f,
+0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f,
+0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f,
+0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f,
+0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f,
+0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f,
+0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f,
+0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f,
+0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f,
+0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f,
+0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f,
+0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f,
+0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f,
+0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f,
+0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f,
+0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f,
+0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f,
+0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f,
+0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f,
+0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f,
+0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f,
+0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f,
+0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f,
+0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f,
+0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f,
+0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f,
+0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f,
+0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f,
+0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f,
+0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f,
+0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f,
+0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f,
+0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f,
+0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f,
+0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f,
+0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f,
+0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f,
+0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f,
+0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f,
+0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f,
+0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f,
+0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f,
+0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f,
+0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f,
+0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f,
+0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f,
+0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f,
+0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f,
+0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f,
+0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f,
+0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f,
+0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f,
+0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f,
+0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f,
+0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f,
+0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f,
+0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f,
+0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f,
+0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f,
+0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f,
+0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f,
+0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f,
+-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f,
+-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f,
+-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f,
+-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f,
+-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f,
+-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f,
+-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f,
+-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f,
+-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f,
+-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f,
+-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f,
+-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f,
+-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f,
+-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f,
+-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f,
+-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f,
+-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f,
+-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f,
+-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f,
+-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f,
+-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f,
+-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f,
+-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f,
+-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f,
+-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f,
+-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f,
+-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f,
+-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f,
+-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f,
+-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f,
+-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f,
+-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f,
+-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f,
+-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f,
+-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f,
+-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f,
+-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f,
+-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f,
+-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f,
+-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f,
+-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f,
+-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f,
+-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f,
+-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f,
+-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f,
+-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f,
+-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f,
+-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f,
+-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f,
+-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f,
+-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f,
+-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f,
+-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f,
+-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f,
+-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f,
+-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f,
+-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f,
+-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f,
+-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f,
+-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f,
+-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f,
+-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f,
+-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f,
+-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f,
+-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f,
+-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f,
+-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f,
+-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f,
+-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f,
+-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f,
+-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f,
+-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f,
+-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f,
+-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f,
+-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f,
+-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f,
+-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f,
+-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f,
+-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f,
+-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f,
+-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f,
+-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f,
+-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f,
+-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f,
+-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f,
+-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f,
+-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f,
+-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f,
+-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f,
+-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f,
+-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f,
+-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f,
+-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f,
+-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f,
+-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f,
+-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f,
+0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f,
+0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f,
+0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f,
+0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f,
+0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f,
+0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f,
+0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f,
+0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f,
+0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f,
+0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f,
+0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f,
+0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f,
+0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f,
+0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f,
+0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f,
+0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f,
+0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f,
+0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f,
+0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f,
+0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f,
+0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f,
+0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f,
+0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f,
+0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f,
+0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f,
+0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f,
+0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f,
+0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f,
+0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f,
+0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f,
+0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f,
+0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f,
+0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f,
+0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f,
+0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f,
+0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f,
+0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f,
+0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f,
+0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f,
+0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f,
+0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f,
+0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f,
+0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f,
+0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f,
+0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f,
+0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f,
+0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f,
+0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f,
+-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f,
+-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f,
+-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f,
+-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f,
+-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f,
+-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f,
+-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f,
+-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f,
+-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f,
+-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f,
+-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f,
+-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f,
+-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f,
+-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f,
+-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f,
+-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f,
+-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f,
+-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f,
+-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f,
+-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f,
+-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f,
+-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f,
+-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f,
+-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f,
+-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f,
+-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f,
+-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f,
+-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f,
+-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f,
+-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f,
+-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f,
+-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f,
+-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f,
+-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f,
+-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f,
+-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f,
+-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f,
+-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f,
+-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f,
+-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f,
+-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f,
+-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f,
+-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f,
+-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f,
+-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f,
+-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f,
+-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f,
+-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f,
+0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f,
+0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f,
+0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f,
+0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f,
+0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f,
+0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f,
+0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f,
+0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f,
+0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f,
+0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f,
+0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f,
+0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f,
+0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f,
+0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f,
+0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f,
+0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f,
+0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f,
+0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f,
+0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f,
+0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f,
+0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f,
+0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f,
+0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f,
+0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f,
+-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f,
+-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f,
+-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f,
+-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f,
+-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f,
+-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f,
+-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f,
+-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f,
+-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f,
+-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f,
+-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f,
+-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f,
+-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f,
+-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f,
+-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f,
+-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f,
+-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f,
+-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f,
+-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f,
+-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f,
+-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f,
+-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f,
+-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f,
+-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f,
+0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f,
+0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f,
+0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f,
+0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f,
+0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f,
+0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f,
+0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f,
+0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f,
+0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f,
+0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f,
+0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f,
+0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f,
+-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f,
+-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f,
+-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f,
+-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f,
+-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f,
+-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f,
+-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f,
+-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f,
+-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f,
+-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f,
+-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f,
+-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f,
+};
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000,  /* Fs */
+120,    /* overlap */
+21,     /* nbEBands */
+21,     /* effEBands */
+{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, },    /* preemph */
+eband5ms,       /* eBands */
+3,      /* maxLM */
+8,      /* nbShortMdcts */
+120,    /* shortMdctSize */
+11,     /* nbAllocVectors */
+band_allocation,        /* allocVectors */
+logN400,        /* logN */
+window120,      /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960},    /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50},       /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/libopus/celt/static_modes_float_arm_ne10.h b/libopus/celt/static_modes_float_arm_ne10.h
new file mode 100644
index 0000000..66e1abb
--- /dev/null
+++ b/libopus/celt/static_modes_float_arm_ne10.h
@@ -0,0 +1,404 @@
+/* The contents of this file was automatically generated by
+ * dump_mode_arm_ne10.c with arguments: 48000 960
+ * It contains static definitions for some pre-defined modes. */
+#include <NE10_types.h>
+
+#ifndef NE10_FFT_PARAMS48000_960
+#define NE10_FFT_PARAMS48000_960
+static const ne10_int32_t ne10_factors_480[64] = {
+4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_240[64] = {
+3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_120[64] = {
+3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_int32_t ne10_factors_60[64] = {
+2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, };
+static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
+{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
+{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
+{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
+{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
+{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
+{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
+{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
+{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
+{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
+{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
+{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
+{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
+{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
+{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
+{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
+{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
+{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
+{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
+{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
+{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
+{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
+{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
+{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
+{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
+{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
+{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
+{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
+{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
+{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
+{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
+{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
+{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
+{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
+{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
+{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
+{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
+{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
+{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
+{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
+{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
+{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
+{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
+{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
+{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
+{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
+{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
+{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
+{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
+{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
+{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
+{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
+{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
+{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
+{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
+{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
+{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
+{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
+{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
+{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
+{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
+{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
+{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
+{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
+{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
+{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
+{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
+{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
+{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
+{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
+{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
+{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
+{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
+{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
+{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
+{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
+{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
+{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
+{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
+{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
+{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
+{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
+{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
+{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
+{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
+{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
+{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
+{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
+{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
+{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
+{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
+{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
+{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
+{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
+{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
+{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
+{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
+{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
+{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
+{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
+{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
+{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
+{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
+{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
+{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
+{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
+{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
+{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
+{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
+{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
+{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
+{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
+{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
+{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
+{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
+{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
+{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
+{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
+{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
+{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
+{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
+{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
+{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
+{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
+{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
+{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
+{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
+{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
+{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
+{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
+{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
+{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
+{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
+{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
+{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
+{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
+{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
+{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
+{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
+{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
+{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
+{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
+{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
+{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
+{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
+{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
+{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
+{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
+{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
+{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
+{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
+{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
+{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
+{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
+{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
+{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
+{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
+{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
+{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
+{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
+{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
+{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
+{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
+{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
+{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
+{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
+{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
+{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
+{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
+{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
+{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
+{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
+{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
+{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
+{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
+{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
+{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
+{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
+{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
+{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
+{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
+{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
+{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
+{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
+{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
+{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
+{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
+{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
+{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
+{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
+{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
+{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
+{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
+{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
+{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
+{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
+{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
+{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
+{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
+{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
+{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
+{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
+{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
+{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
+{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
+{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
+{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
+{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
+{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
+{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
+{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
+};
+static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
+{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
+{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
+{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
+{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
+{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
+{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
+{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
+{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
+{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
+{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
+{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
+{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
+{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
+{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
+{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
+{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
+{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
+{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
+{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
+};
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = {
+120,
+(ne10_int32_t *)ne10_factors_480,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_480 = {
+1,
+(void *)&ne10_fft_state_float32_t_480,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = {
+60,
+(ne10_int32_t *)ne10_factors_240,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_240 = {
+1,
+(void *)&ne10_fft_state_float32_t_240,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = {
+30,
+(ne10_int32_t *)ne10_factors_120,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_120 = {
+1,
+(void *)&ne10_fft_state_float32_t_120,
+};
+
+static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = {
+15,
+(ne10_int32_t *)ne10_factors_60,
+(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
+NULL,
+(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
+/* is_forward_scaled = true */
+(ne10_int32_t) 1,
+/* is_backward_scaled = false */
+(ne10_int32_t) 0,
+};
+static const arch_fft_state cfg_arch_60 = {
+1,
+(void *)&ne10_fft_state_float32_t_60,
+};
+
+#endif  /* end NE10_FFT_PARAMS48000_960 */
diff --git a/libopus/celt/tests/test_unit_cwrs32.c b/libopus/celt/tests/test_unit_cwrs32.c
new file mode 100644
index 0000000..36dd8af
--- /dev/null
+++ b/libopus/celt/tests/test_unit_cwrs32.c
@@ -0,0 +1,161 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
+                           Gregory Maxwell
+   Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#else
+#define TEST_CUSTOM_MODES
+#endif
+
+#define CELT_C
+#include "stack_alloc.h"
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "cwrs.c"
+#include "mathops.c"
+#include "rate.h"
+
+#define NMAX (240)
+#define KMAX (128)
+
+#ifdef TEST_CUSTOM_MODES
+
+#define NDIMS (44)
+static const int pn[NDIMS]={
+   2,   3,   4,   5,   6,   7,   8,   9,  10,
+  11,  12,  13,  14,  15,  16,  18,  20,  22,
+  24,  26,  28,  30,  32,  36,  40,  44,  48,
+  52,  56,  60,  64,  72,  80,  88,  96, 104,
+ 112, 120, 128, 144, 160, 176, 192, 208
+};
+static const int pkmax[NDIMS]={
+ 128, 128, 128, 128,  88,  52,  36,  26,  22,
+  18,  16,  15,  13,  12,  12,  11,  10,   9,
+   9,   8,   8,   7,   7,   7,   7,   6,   6,
+   6,   6,   6,   5,   5,   5,   5,   5,   5,
+   4,   4,   4,   4,   4,   4,   4,   4
+};
+
+#else /* TEST_CUSTOM_MODES */
+
+#define NDIMS (22)
+static const int pn[NDIMS]={
+   2,   3,   4,   6,   8,   9,  11,  12,  16,
+  18,  22,  24,  32,  36,  44,  48,  64,  72,
+  88,  96, 144, 176
+};
+static const int pkmax[NDIMS]={
+ 128, 128, 128,  88,  36,  26,  18,  16,  12,
+  11,   9,   9,   7,   7,   6,   6,   5,   5,
+   5,   5,   4,   4
+};
+
+#endif
+
+int main(void){
+  int t;
+  int n;
+  ALLOC_STACK;
+  for(t=0;t<NDIMS;t++){
+    int pseudo;
+    n=pn[t];
+    for(pseudo=1;pseudo<41;pseudo++)
+    {
+      int k;
+#if defined(SMALL_FOOTPRINT)
+      opus_uint32 uu[KMAX+2U];
+#endif
+      opus_uint32 inc;
+      opus_uint32 nc;
+      opus_uint32 i;
+      k=get_pulses(pseudo);
+      if (k>pkmax[t])break;
+      printf("Testing CWRS with N=%i, K=%i...\n",n,k);
+#if defined(SMALL_FOOTPRINT)
+      nc=ncwrs_urow(n,k,uu);
+#else
+      nc=CELT_PVQ_V(n,k);
+#endif
+      inc=nc/20000;
+      if(inc<1)inc=1;
+      for(i=0;i<nc;i+=inc){
+#if defined(SMALL_FOOTPRINT)
+        opus_uint32 u[KMAX+2U];
+#endif
+        int         y[NMAX];
+        int         sy;
+        opus_uint32 v;
+        opus_uint32 ii;
+        int         j;
+#if defined(SMALL_FOOTPRINT)
+        memcpy(u,uu,(k+2U)*sizeof(*u));
+        cwrsi(n,k,i,y,u);
+#else
+        cwrsi(n,k,i,y);
+#endif
+        sy=0;
+        for(j=0;j<n;j++)sy+=abs(y[j]);
+        if(sy!=k){
+          fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n",
+           n,sy,k);
+          return 99;
+        }
+        /*printf("%6u of %u:",i,nc);
+        for(j=0;j<n;j++)printf(" %+3i",y[j]);
+        printf(" ->");*/
+#if defined(SMALL_FOOTPRINT)
+        ii=icwrs(n,k,&v,y,u);
+#else
+        ii=icwrs(n,y);
+        v=CELT_PVQ_V(n,k);
+#endif
+        if(ii!=i){
+          fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n",
+           (long)ii,(long)i);
+          return 1;
+        }
+        if(v!=nc){
+          fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n",
+           (long)v,(long)nc);
+          return 2;
+        }
+        /*printf(" %6u\n",i);*/
+      }
+      /*printf("\n");*/
+    }
+  }
+  return 0;
+}
diff --git a/libopus/celt/tests/test_unit_dft.c b/libopus/celt/tests/test_unit_dft.c
new file mode 100644
index 0000000..70f8f49
--- /dev/null
+++ b/libopus/celt/tests/test_unit_dft.c
@@ -0,0 +1,179 @@
+/* Copyright (c) 2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "stack_alloc.h"
+#include "kiss_fft.h"
+#include "mathops.h"
+#include "modes.h"
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+int ret = 0;
+
+void check(kiss_fft_cpx  * in,kiss_fft_cpx  * out,int nfft,int isinverse)
+{
+    int bin,k;
+    double errpow=0,sigpow=0, snr;
+
+    for (bin=0;bin<nfft;++bin) {
+        double ansr = 0;
+        double ansi = 0;
+        double difr;
+        double difi;
+
+        for (k=0;k<nfft;++k) {
+            double phase = -2*M_PI*bin*k/nfft;
+            double re = cos(phase);
+            double im = sin(phase);
+            if (isinverse)
+                im = -im;
+
+            if (!isinverse)
+            {
+               re /= nfft;
+               im /= nfft;
+            }
+
+            ansr += in[k].r * re - in[k].i * im;
+            ansi += in[k].r * im + in[k].i * re;
+        }
+        /*printf ("%d %d ", (int)ansr, (int)ansi);*/
+        difr = ansr - out[bin].r;
+        difi = ansi - out[bin].i;
+        errpow += difr*difr + difi*difi;
+        sigpow += ansr*ansr+ansi*ansi;
+    }
+    snr = 10*log10(sigpow/errpow);
+    printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+    if (snr<60) {
+       printf( "** poor snr: %f ** \n", snr);
+       ret = 1;
+    }
+}
+
+void test1d(int nfft,int isinverse,int arch)
+{
+    size_t buflen = sizeof(kiss_fft_cpx)*nfft;
+    kiss_fft_cpx *in;
+    kiss_fft_cpx *out;
+    int k;
+#ifdef CUSTOM_MODES
+    kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch);
+#else
+    int id;
+    const kiss_fft_state *cfg;
+    CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+    if (nfft == 480) id = 0;
+    else if (nfft == 240) id = 1;
+    else if (nfft == 120) id = 2;
+    else if (nfft == 60) id = 3;
+    else return;
+    cfg = mode->mdct.kfft[id];
+#endif
+
+    in = (kiss_fft_cpx*)malloc(buflen);
+    out = (kiss_fft_cpx*)malloc(buflen);
+
+    for (k=0;k<nfft;++k) {
+        in[k].r = (rand() % 32767) - 16384;
+        in[k].i = (rand() % 32767) - 16384;
+    }
+
+    for (k=0;k<nfft;++k) {
+       in[k].r *= 32768;
+       in[k].i *= 32768;
+    }
+
+    if (isinverse)
+    {
+       for (k=0;k<nfft;++k) {
+          in[k].r /= nfft;
+          in[k].i /= nfft;
+       }
+    }
+
+    /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
+
+    if (isinverse)
+       opus_ifft(cfg,in,out, arch);
+    else
+       opus_fft(cfg,in,out, arch);
+
+    /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
+
+    check(in,out,nfft,isinverse);
+
+    free(in);
+    free(out);
+#ifdef CUSTOM_MODES
+    opus_fft_free(cfg, arch);
+#endif
+}
+
+int main(int argc,char ** argv)
+{
+    ALLOC_STACK;
+    int arch = opus_select_arch();
+
+    if (argc>1) {
+        int k;
+        for (k=1;k<argc;++k) {
+            test1d(atoi(argv[k]),0,arch);
+            test1d(atoi(argv[k]),1,arch);
+        }
+    }else{
+        test1d(32,0,arch);
+        test1d(32,1,arch);
+        test1d(128,0,arch);
+        test1d(128,1,arch);
+        test1d(256,0,arch);
+        test1d(256,1,arch);
+#ifndef RADIX_TWO_ONLY
+        test1d(36,0,arch);
+        test1d(36,1,arch);
+        test1d(50,0,arch);
+        test1d(50,1,arch);
+        test1d(60,0,arch);
+        test1d(60,1,arch);
+        test1d(120,0,arch);
+        test1d(120,1,arch);
+        test1d(240,0,arch);
+        test1d(240,1,arch);
+        test1d(480,0,arch);
+        test1d(480,1,arch);
+#endif
+    }
+    return ret;
+}
diff --git a/libopus/celt/tests/test_unit_entropy.c b/libopus/celt/tests/test_unit_entropy.c
new file mode 100644
index 0000000..7f67452
--- /dev/null
+++ b/libopus/celt/tests/test_unit_entropy.c
@@ -0,0 +1,383 @@
+/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation,
+                           Gregory Maxwell
+   Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#define CELT_C
+#include "entcode.h"
+#include "entenc.h"
+#include "entdec.h"
+#include <string.h>
+
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+
+#ifndef M_LOG2E
+# define M_LOG2E    1.4426950408889634074
+#endif
+#define DATA_SIZE 10000000
+#define DATA_SIZE2 10000
+
+int main(int _argc,char **_argv){
+  ec_enc         enc;
+  ec_dec         dec;
+  long           nbits;
+  long           nbits2;
+  double         entropy;
+  int            ft;
+  int            ftb;
+  int            sz;
+  int            i;
+  int            ret;
+  unsigned int   sym;
+  unsigned int   seed;
+  unsigned char *ptr;
+  const char    *env_seed;
+  ret=0;
+  entropy=0;
+  if (_argc > 2) {
+    fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]);
+    return 1;
+  }
+  env_seed = getenv("SEED");
+  if (_argc > 1)
+    seed = atoi(_argv[1]);
+  else if (env_seed)
+    seed = atoi(env_seed);
+  else
+    seed = time(NULL);
+  /*Testing encoding of raw bit values.*/
+  ptr = (unsigned char *)malloc(DATA_SIZE);
+  ec_enc_init(&enc,ptr, DATA_SIZE);
+  for(ft=2;ft<1024;ft++){
+    for(i=0;i<ft;i++){
+      entropy+=log(ft)*M_LOG2E;
+      ec_enc_uint(&enc,i,ft);
+    }
+  }
+  /*Testing encoding of raw bit values.*/
+  for(ftb=1;ftb<16;ftb++){
+    for(i=0;i<(1<<ftb);i++){
+      entropy+=ftb;
+      nbits=ec_tell(&enc);
+      ec_enc_bits(&enc,i,ftb);
+      nbits2=ec_tell(&enc);
+      if(nbits2-nbits!=ftb){
+        fprintf(stderr,"Used %li bits to encode %i bits directly.\n",
+         nbits2-nbits,ftb);
+        ret=-1;
+      }
+    }
+  }
+  nbits=ec_tell_frac(&enc);
+  ec_enc_done(&enc);
+  fprintf(stderr,
+   "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n",
+   entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits);
+  fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc));
+  ec_dec_init(&dec,ptr,DATA_SIZE);
+  for(ft=2;ft<1024;ft++){
+    for(i=0;i<ft;i++){
+      sym=ec_dec_uint(&dec,ft);
+      if(sym!=(unsigned)i){
+        fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
+        ret=-1;
+      }
+    }
+  }
+  for(ftb=1;ftb<16;ftb++){
+    for(i=0;i<(1<<ftb);i++){
+      sym=ec_dec_bits(&dec,ftb);
+      if(sym!=(unsigned)i){
+        fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
+        ret=-1;
+      }
+    }
+  }
+  nbits2=ec_tell_frac(&dec);
+  if(nbits!=nbits2){
+    fprintf(stderr,
+     "Reported number of bits used was %0.2lf, should be %0.2lf.\n",
+     ldexp(nbits2,-3),ldexp(nbits,-3));
+    ret=-1;
+  }
+  /*Testing an encoder bust prefers range coder data over raw bits.
+    This isn't a general guarantee, will only work for data that is buffered in
+     the encoder state and not yet stored in the user buffer, and should never
+     get used in practice.
+    It's mostly here for code coverage completeness.*/
+  /*Start with a 16-bit buffer.*/
+  ec_enc_init(&enc,ptr,2);
+  /*Write 7 raw bits.*/
+  ec_enc_bits(&enc,0x55,7);
+  /*Write 12.3 bits of range coder data.*/
+  ec_enc_uint(&enc,1,2);
+  ec_enc_uint(&enc,1,3);
+  ec_enc_uint(&enc,1,4);
+  ec_enc_uint(&enc,1,5);
+  ec_enc_uint(&enc,2,6);
+  ec_enc_uint(&enc,6,7);
+  ec_enc_done(&enc);
+  ec_dec_init(&dec,ptr,2);
+  if(!enc.error
+   /*The raw bits should have been overwritten by the range coder data.*/
+   ||ec_dec_bits(&dec,7)!=0x05
+   /*And all the range coder data should have been encoded correctly.*/
+   ||ec_dec_uint(&dec,2)!=1
+   ||ec_dec_uint(&dec,3)!=1
+   ||ec_dec_uint(&dec,4)!=1
+   ||ec_dec_uint(&dec,5)!=1
+   ||ec_dec_uint(&dec,6)!=2
+   ||ec_dec_uint(&dec,7)!=6){
+    fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n");
+    ret=-1;
+  }
+  srand(seed);
+  fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536);
+  for(i=0;i<409600;i++){
+    unsigned *data;
+    unsigned *tell;
+    unsigned tell_bits;
+    int       j;
+    int zeros;
+    ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10;
+    sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
+    data=(unsigned *)malloc(sz*sizeof(*data));
+    tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
+    ec_enc_init(&enc,ptr,DATA_SIZE2);
+    zeros = rand()%13==0;
+    tell[0]=ec_tell_frac(&enc);
+    for(j=0;j<sz;j++){
+      if (zeros)
+        data[j]=0;
+      else
+        data[j]=rand()%ft;
+      ec_enc_uint(&enc,data[j],ft);
+      tell[j+1]=ec_tell_frac(&enc);
+    }
+    if (rand()%2==0)
+      while(ec_tell(&enc)%8 != 0)
+        ec_enc_uint(&enc, rand()%2, 2);
+    tell_bits = ec_tell(&enc);
+    ec_enc_done(&enc);
+    if(tell_bits!=(unsigned)ec_tell(&enc)){
+      fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n",
+       ec_tell(&enc),tell_bits,seed);
+      ret=-1;
+    }
+    if ((tell_bits+7)/8 < ec_range_bytes(&enc))
+    {
+      fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
+               ec_range_bytes(&enc), (tell_bits+7)/8,seed);
+      ret=-1;
+    }
+    ec_dec_init(&dec,ptr,DATA_SIZE2);
+    if(ec_tell_frac(&dec)!=tell[0]){
+      fprintf(stderr,
+       "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+       0,ec_tell_frac(&dec),tell[0],seed);
+    }
+    for(j=0;j<sz;j++){
+      sym=ec_dec_uint(&dec,ft);
+      if(sym!=data[j]){
+        fprintf(stderr,
+         "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n",
+         sym,data[j],ft,j,sz,seed);
+        ret=-1;
+      }
+      if(ec_tell_frac(&dec)!=tell[j+1]){
+        fprintf(stderr,
+         "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+         j+1,ec_tell_frac(&dec),tell[j+1],seed);
+      }
+    }
+    free(tell);
+    free(data);
+  }
+  /*Test compatibility between multiple different encode/decode routines.*/
+  for(i=0;i<409600;i++){
+    unsigned *logp1;
+    unsigned *data;
+    unsigned *tell;
+    unsigned *enc_method;
+    int       j;
+    sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
+    logp1=(unsigned *)malloc(sz*sizeof(*logp1));
+    data=(unsigned *)malloc(sz*sizeof(*data));
+    tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
+    enc_method=(unsigned *)malloc(sz*sizeof(*enc_method));
+    ec_enc_init(&enc,ptr,DATA_SIZE2);
+    tell[0]=ec_tell_frac(&enc);
+    for(j=0;j<sz;j++){
+      data[j]=rand()/((RAND_MAX>>1)+1);
+      logp1[j]=(rand()%15)+1;
+      enc_method[j]=rand()/((RAND_MAX>>2)+1);
+      switch(enc_method[j]){
+        case 0:{
+          ec_encode(&enc,data[j]?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]);
+        }break;
+        case 1:{
+          ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(data[j]?0:1),logp1[j]);
+        }break;
+        case 2:{
+          ec_enc_bit_logp(&enc,data[j],logp1[j]);
+        }break;
+        case 3:{
+          unsigned char icdf[2];
+          icdf[0]=1;
+          icdf[1]=0;
+          ec_enc_icdf(&enc,data[j],icdf,logp1[j]);
+        }break;
+      }
+      tell[j+1]=ec_tell_frac(&enc);
+    }
+    ec_enc_done(&enc);
+    if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){
+      fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
+       ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed);
+      ret=-1;
+    }
+    ec_dec_init(&dec,ptr,DATA_SIZE2);
+    if(ec_tell_frac(&dec)!=tell[0]){
+      fprintf(stderr,
+       "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+       0,ec_tell_frac(&dec),tell[0],seed);
+    }
+    for(j=0;j<sz;j++){
+      int fs;
+      int dec_method;
+      dec_method=rand()/((RAND_MAX>>2)+1);
+      switch(dec_method){
+        case 0:{
+          fs=ec_decode(&dec,1<<logp1[j]);
+          sym=fs>=(1<<logp1[j])-1;
+          ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
+        }break;
+        case 1:{
+          fs=ec_decode_bin(&dec,logp1[j]);
+          sym=fs>=(1<<logp1[j])-1;
+          ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
+        }break;
+        case 2:{
+          sym=ec_dec_bit_logp(&dec,logp1[j]);
+        }break;
+        case 3:{
+          unsigned char icdf[2];
+          icdf[0]=1;
+          icdf[1]=0;
+          sym=ec_dec_icdf(&dec,icdf,logp1[j]);
+        }break;
+      }
+      if(sym!=data[j]){
+        fprintf(stderr,
+         "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n",
+         sym,data[j],logp1[j],j,sz,seed);
+        fprintf(stderr,"Encoding method: %i, decoding method: %i\n",
+         enc_method[j],dec_method);
+        ret=-1;
+      }
+      if(ec_tell_frac(&dec)!=tell[j+1]){
+        fprintf(stderr,
+         "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+         j+1,ec_tell_frac(&dec),tell[j+1],seed);
+      }
+    }
+    free(enc_method);
+    free(tell);
+    free(data);
+    free(logp1);
+  }
+  ec_enc_init(&enc,ptr,DATA_SIZE2);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,2);
+  ec_enc_patch_initial_bits(&enc,3,2);
+  if(enc.error){
+    fprintf(stderr,"patch_initial_bits failed");
+    ret=-1;
+  }
+  ec_enc_patch_initial_bits(&enc,0,5);
+  if(!enc.error){
+    fprintf(stderr,"patch_initial_bits didn't fail when it should have");
+    ret=-1;
+  }
+  ec_enc_done(&enc);
+  if(ec_range_bytes(&enc)!=1||ptr[0]!=192){
+    fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]);
+    ret=-1;
+  }
+  ec_enc_init(&enc,ptr,DATA_SIZE2);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,1,6);
+  ec_enc_bit_logp(&enc,0,2);
+  ec_enc_patch_initial_bits(&enc,0,2);
+  if(enc.error){
+    fprintf(stderr,"patch_initial_bits failed");
+    ret=-1;
+  }
+  ec_enc_done(&enc);
+  if(ec_range_bytes(&enc)!=2||ptr[0]!=63){
+    fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]);
+    ret=-1;
+  }
+  ec_enc_init(&enc,ptr,2);
+  ec_enc_bit_logp(&enc,0,2);
+  for(i=0;i<48;i++){
+    ec_enc_bits(&enc,0,1);
+  }
+  ec_enc_done(&enc);
+  if(!enc.error){
+    fprintf(stderr,"Raw bits overfill didn't fail when it should have");
+    ret=-1;
+  }
+  ec_enc_init(&enc,ptr,2);
+  for(i=0;i<17;i++){
+    ec_enc_bits(&enc,0,1);
+  }
+  ec_enc_done(&enc);
+  if(!enc.error){
+    fprintf(stderr,"17 raw bits encoded in two bytes");
+    ret=-1;
+  }
+  free(ptr);
+  return ret;
+}
diff --git a/libopus/celt/tests/test_unit_laplace.c b/libopus/celt/tests/test_unit_laplace.c
new file mode 100644
index 0000000..727bf01
--- /dev/null
+++ b/libopus/celt/tests/test_unit_laplace.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation
+   Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#define CELT_C
+#include "laplace.h"
+#include "stack_alloc.h"
+
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "laplace.c"
+
+#define DATA_SIZE 40000
+
+int ec_laplace_get_start_freq(int decay)
+{
+   opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1);
+   int fs = (ft*(16384-decay))/(16384+decay);
+   return fs+LAPLACE_MINP;
+}
+
+int main(void)
+{
+   int i;
+   int ret = 0;
+   ec_enc enc;
+   ec_dec dec;
+   unsigned char *ptr;
+   int val[10000], decay[10000];
+   ALLOC_STACK;
+   ptr = (unsigned char *)malloc(DATA_SIZE);
+   ec_enc_init(&enc,ptr,DATA_SIZE);
+
+   val[0] = 3; decay[0] = 6000;
+   val[1] = 0; decay[1] = 5800;
+   val[2] = -1; decay[2] = 5600;
+   for (i=3;i<10000;i++)
+   {
+      val[i] = rand()%15-7;
+      decay[i] = rand()%11000+5000;
+   }
+   for (i=0;i<10000;i++)
+      ec_laplace_encode(&enc, &val[i],
+            ec_laplace_get_start_freq(decay[i]), decay[i]);
+
+   ec_enc_done(&enc);
+
+   ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
+
+   for (i=0;i<10000;i++)
+   {
+      int d = ec_laplace_decode(&dec,
+            ec_laplace_get_start_freq(decay[i]), decay[i]);
+      if (d != val[i])
+      {
+         fprintf (stderr, "Got %d instead of %d\n", d, val[i]);
+         ret = 1;
+      }
+   }
+
+   free(ptr);
+   return ret;
+}
diff --git a/libopus/celt/tests/test_unit_mathops.c b/libopus/celt/tests/test_unit_mathops.c
new file mode 100644
index 0000000..874e9ad
--- /dev/null
+++ b/libopus/celt/tests/test_unit_mathops.c
@@ -0,0 +1,266 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
+                           Gregory Maxwell
+   Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#include <stdio.h>
+#include <math.h>
+#include "mathops.h"
+#include "bands.h"
+
+#ifdef FIXED_POINT
+#define WORD "%d"
+#else
+#define WORD "%f"
+#endif
+
+int ret = 0;
+
+void testdiv(void)
+{
+   opus_int32 i;
+   for (i=1;i<=327670;i++)
+   {
+      double prod;
+      opus_val32 val;
+      val = celt_rcp(i);
+#ifdef FIXED_POINT
+      prod = (1./32768./65526.)*val*i;
+#else
+      prod = val*i;
+#endif
+      if (fabs(prod-1) > .00025)
+      {
+         fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod);
+         ret = 1;
+      }
+   }
+}
+
+void testsqrt(void)
+{
+   opus_int32 i;
+   for (i=1;i<=1000000000;i++)
+   {
+      double ratio;
+      opus_val16 val;
+      val = celt_sqrt(i);
+      ratio = val/sqrt(i);
+      if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2)
+      {
+         fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
+         ret = 1;
+      }
+      i+= i>>10;
+   }
+}
+
+void testbitexactcos(void)
+{
+   int i;
+   opus_int32 min_d,max_d,last,chk;
+   chk=max_d=0;
+   last=min_d=32767;
+   for(i=64;i<=16320;i++)
+   {
+      opus_int32 d;
+      opus_int32 q=bitexact_cos(i);
+      chk ^= q*i;
+      d = last - q;
+      if (d>max_d)max_d=d;
+      if (d<min_d)min_d=d;
+      last = q;
+   }
+   if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)||
+       (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171))
+   {
+      fprintf (stderr, "bitexact_cos failed\n");
+      ret = 1;
+   }
+}
+
+void testbitexactlog2tan(void)
+{
+   int i,fail;
+   opus_int32 min_d,max_d,last,chk;
+   fail=chk=max_d=0;
+   last=min_d=15059;
+   for(i=64;i<8193;i++)
+   {
+      opus_int32 d;
+      opus_int32 mid=bitexact_cos(i);
+      opus_int32 side=bitexact_cos(16384-i);
+      opus_int32 q=bitexact_log2tan(mid,side);
+      chk ^= q*i;
+      d = last - q;
+      if (q!=-1*bitexact_log2tan(side,mid))
+        fail = 1;
+      if (d>max_d)max_d=d;
+      if (d<min_d)min_d=d;
+      last = q;
+   }
+   if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail||
+       (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)||
+       (bitexact_log2tan(23171,23171)!=0))
+   {
+      fprintf (stderr, "bitexact_log2tan failed\n");
+      ret = 1;
+   }
+}
+
+#ifndef FIXED_POINT
+void testlog2(void)
+{
+   float x;
+   for (x=0.001;x<1677700.0;x+=(x/8.0))
+   {
+      float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
+      if (error>0.0009)
+      {
+         fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2(void)
+{
+   float x;
+   for (x=-11.0;x<24.0;x+=0.0007)
+   {
+      float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
+      if (error>0.0002)
+      {
+         fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2log2(void)
+{
+   float x;
+   for (x=-11.0;x<24.0;x+=0.0007)
+   {
+      float error = fabs(x-(celt_log2(celt_exp2(x))));
+      if (error>0.001)
+      {
+         fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error);
+         ret = 1;
+      }
+   }
+}
+#else
+void testlog2(void)
+{
+   opus_val32 x;
+   for (x=8;x<1073741824;x+=(x>>3))
+   {
+      float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0);
+      if (error>0.003)
+      {
+         fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2(void)
+{
+   opus_val16 x;
+   for (x=-32768;x<15360;x++)
+   {
+      float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0)));
+      float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0);
+      if (error1>0.0002&&error2>0.00004)
+      {
+         fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2log2(void)
+{
+   opus_val32 x;
+   for (x=8;x<65536;x+=(x>>3))
+   {
+      float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384;
+      if (error>0.004)
+      {
+         fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testilog2(void)
+{
+   opus_val32 x;
+   for (x=1;x<=268435455;x+=127)
+   {
+      opus_val32 lg;
+      opus_val32 y;
+
+      lg = celt_ilog2(x);
+      if (lg<0 || lg>=31)
+      {
+         printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg);
+         ret = 1;
+      }
+      y = 1<<lg;
+
+      if (x<y || (x>>1)>=y)
+      {
+         printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y);
+         ret = 1;
+      }
+   }
+}
+#endif
+
+int main(void)
+{
+   testbitexactcos();
+   testbitexactlog2tan();
+   testdiv();
+   testsqrt();
+   testlog2();
+   testexp2();
+   testexp2log2();
+#ifdef FIXED_POINT
+   testilog2();
+#endif
+   return ret;
+}
diff --git a/libopus/celt/tests/test_unit_mdct.c b/libopus/celt/tests/test_unit_mdct.c
new file mode 100644
index 0000000..4a563cc
--- /dev/null
+++ b/libopus/celt/tests/test_unit_mdct.c
@@ -0,0 +1,227 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "mdct.h"
+#include "stack_alloc.h"
+#include "kiss_fft.h"
+#include "mdct.h"
+#include "modes.h"
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+int ret = 0;
+void check(kiss_fft_scalar  * in,kiss_fft_scalar  * out,int nfft,int isinverse)
+{
+    int bin,k;
+    double errpow=0,sigpow=0;
+    double snr;
+    for (bin=0;bin<nfft/2;++bin) {
+        double ansr = 0;
+        double difr;
+
+        for (k=0;k<nfft;++k) {
+           double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft;
+           double re = cos(phase);
+
+           re /= nfft/4;
+
+           ansr += in[k] * re;
+        }
+        /*printf ("%f %f\n", ansr, out[bin]);*/
+        difr = ansr - out[bin];
+        errpow += difr*difr;
+        sigpow += ansr*ansr;
+    }
+    snr = 10*log10(sigpow/errpow);
+    printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+    if (snr<60) {
+       printf( "** poor snr: %f **\n", snr);
+       ret = 1;
+    }
+}
+
+void check_inv(kiss_fft_scalar  * in,kiss_fft_scalar  * out,int nfft,int isinverse)
+{
+   int bin,k;
+   double errpow=0,sigpow=0;
+   double snr;
+   for (bin=0;bin<nfft;++bin) {
+      double ansr = 0;
+      double difr;
+
+      for (k=0;k<nfft/2;++k) {
+         double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft;
+         double re = cos(phase);
+
+         /*re *= 2;*/
+
+         ansr += in[k] * re;
+      }
+      /*printf ("%f %f\n", ansr, out[bin]);*/
+      difr = ansr - out[bin];
+      errpow += difr*difr;
+      sigpow += ansr*ansr;
+   }
+   snr = 10*log10(sigpow/errpow);
+   printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+   if (snr<60) {
+      printf( "** poor snr: %f **\n", snr);
+      ret = 1;
+   }
+}
+
+
+void test1d(int nfft,int isinverse,int arch)
+{
+    size_t buflen = sizeof(kiss_fft_scalar)*nfft;
+    kiss_fft_scalar *in;
+    kiss_fft_scalar *in_copy;
+    kiss_fft_scalar *out;
+    opus_val16 *window;
+    int k;
+
+#ifdef CUSTOM_MODES
+    int shift = 0;
+    const mdct_lookup *cfg;
+    mdct_lookup _cfg;
+    clt_mdct_init(&_cfg, nfft, 0, arch);
+    cfg = &_cfg;
+#else
+    int shift;
+    const mdct_lookup *cfg;
+    CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+    if (nfft == 1920) shift = 0;
+    else if (nfft == 960) shift = 1;
+    else if (nfft == 480) shift = 2;
+    else if (nfft == 240) shift = 3;
+    else return;
+    cfg = &mode->mdct;
+#endif
+
+    in = (kiss_fft_scalar*)malloc(buflen);
+    in_copy = (kiss_fft_scalar*)malloc(buflen);
+    out = (kiss_fft_scalar*)malloc(buflen);
+    window = (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
+
+    for (k=0;k<nfft;++k) {
+        in[k] = (rand() % 32768) - 16384;
+    }
+
+    for (k=0;k<nfft/2;++k) {
+       window[k] = Q15ONE;
+    }
+    for (k=0;k<nfft;++k) {
+       in[k] *= 32768;
+    }
+
+    if (isinverse)
+    {
+       for (k=0;k<nfft;++k) {
+          in[k] /= nfft;
+       }
+    }
+
+    for (k=0;k<nfft;++k)
+       in_copy[k] = in[k];
+    /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
+
+    if (isinverse)
+    {
+       for (k=0;k<nfft;++k)
+          out[k] = 0;
+       clt_mdct_backward(cfg,in,out, window, nfft/2, shift, 1, arch);
+       /* apply TDAC because clt_mdct_backward() no longer does that */
+       for (k=0;k<nfft/4;++k)
+          out[nfft-k-1] = out[nfft/2+k];
+       check_inv(in,out,nfft,isinverse);
+    } else {
+       clt_mdct_forward(cfg,in,out,window, nfft/2, shift, 1, arch);
+       check(in_copy,out,nfft,isinverse);
+    }
+    /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
+
+
+    free(in);
+    free(in_copy);
+    free(out);
+    free(window);
+#ifdef CUSTOM_MODES
+    clt_mdct_clear(&_cfg, arch);
+#endif
+}
+
+int main(int argc,char ** argv)
+{
+    ALLOC_STACK;
+    int arch = opus_select_arch();
+
+    if (argc>1) {
+        int k;
+        for (k=1;k<argc;++k) {
+            test1d(atoi(argv[k]),0,arch);
+            test1d(atoi(argv[k]),1,arch);
+        }
+    }else{
+        test1d(32,0,arch);
+        test1d(32,1,arch);
+        test1d(256,0,arch);
+        test1d(256,1,arch);
+        test1d(512,0,arch);
+        test1d(512,1,arch);
+        test1d(1024,0,arch);
+        test1d(1024,1,arch);
+        test1d(2048,0,arch);
+        test1d(2048,1,arch);
+#ifndef RADIX_TWO_ONLY
+        test1d(36,0,arch);
+        test1d(36,1,arch);
+        test1d(40,0,arch);
+        test1d(40,1,arch);
+        test1d(60,0,arch);
+        test1d(60,1,arch);
+        test1d(120,0,arch);
+        test1d(120,1,arch);
+        test1d(240,0,arch);
+        test1d(240,1,arch);
+        test1d(480,0,arch);
+        test1d(480,1,arch);
+        test1d(960,0,arch);
+        test1d(960,1,arch);
+        test1d(1920,0,arch);
+        test1d(1920,1,arch);
+#endif
+    }
+    return ret;
+}
diff --git a/libopus/celt/tests/test_unit_rotation.c b/libopus/celt/tests/test_unit_rotation.c
new file mode 100644
index 0000000..8a31b3f
--- /dev/null
+++ b/libopus/celt/tests/test_unit_rotation.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "vq.h"
+#include "bands.h"
+#include "stack_alloc.h"
+#include <math.h>
+
+
+#define MAX_SIZE 100
+
+int ret=0;
+void test_rotation(int N, int K)
+{
+   int i;
+   double err = 0, ener = 0, snr, snr0;
+   opus_val16 x0[MAX_SIZE];
+   opus_val16 x1[MAX_SIZE];
+   for (i=0;i<N;i++)
+      x1[i] = x0[i] = rand()%32767-16384;
+   exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL);
+   for (i=0;i<N;i++)
+   {
+      err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
+      ener += x0[i]*(double)x0[i];
+   }
+   snr0 = 20*log10(ener/err);
+   err = ener = 0;
+   exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL);
+   for (i=0;i<N;i++)
+   {
+      err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
+      ener += x0[i]*(double)x0[i];
+   }
+   snr = 20*log10(ener/err);
+   printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0);
+   if (snr < 60 || snr0 > 20)
+   {
+      fprintf(stderr, "FAIL!\n");
+      ret = 1;
+   }
+}
+
+int main(void)
+{
+   ALLOC_STACK;
+   test_rotation(15, 3);
+   test_rotation(23, 5);
+   test_rotation(50, 3);
+   test_rotation(80, 1);
+   return ret;
+}
diff --git a/libopus/celt/tests/test_unit_types.c b/libopus/celt/tests/test_unit_types.c
new file mode 100644
index 0000000..67a0fb8
--- /dev/null
+++ b/libopus/celt/tests/test_unit_types.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_types.h"
+#include <stdio.h>
+
+int main(void)
+{
+   opus_int16 i = 1;
+   i <<= 14;
+   if (i>>14 != 1)
+   {
+      fprintf(stderr, "opus_int16 isn't 16 bits\n");
+      return 1;
+   }
+   if (sizeof(opus_int16)*2 != sizeof(opus_int32))
+   {
+      fprintf(stderr, "16*2 != 32\n");
+      return 1;
+   }
+   return 0;
+}
diff --git a/libopus/celt/vq.c b/libopus/celt/vq.c
new file mode 100644
index 0000000..8011e22
--- /dev/null
+++ b/libopus/celt/vq.c
@@ -0,0 +1,442 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+#include "cwrs.h"
+#include "vq.h"
+#include "arch.h"
+#include "os_support.h"
+#include "bands.h"
+#include "rate.h"
+#include "pitch.h"
+
+#if defined(MIPSr1_ASM)
+#include "mips/vq_mipsr1.h"
+#endif
+
+#ifndef OVERRIDE_vq_exp_rotation1
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+   int i;
+   opus_val16 ms;
+   celt_norm *Xptr;
+   Xptr = X;
+   ms = NEG16(s);
+   for (i=0;i<len-stride;i++)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+   Xptr = &X[len-2*stride-1];
+   for (i=len-2*stride-1;i>=0;i--)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
+   }
+}
+#endif /* OVERRIDE_vq_exp_rotation1 */
+
+void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
+{
+   static const int SPREAD_FACTOR[3]={15,10,5};
+   int i;
+   opus_val16 c, s;
+   opus_val16 gain, theta;
+   int stride2=0;
+   int factor;
+
+   if (2*K>=len || spread==SPREAD_NONE)
+      return;
+   factor = SPREAD_FACTOR[spread-1];
+
+   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
+   theta = HALF16(MULT16_16_Q15(gain,gain));
+
+   c = celt_cos_norm(EXTEND32(theta));
+   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
+
+   if (len>=8*stride)
+   {
+      stride2 = 1;
+      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
+         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
+      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
+         stride2++;
+   }
+   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+      extract_collapse_mask().*/
+   len = celt_udiv(len, stride);
+   for (i=0;i<stride;i++)
+   {
+      if (dir < 0)
+      {
+         if (stride2)
+            exp_rotation1(X+i*len, len, stride2, s, c);
+         exp_rotation1(X+i*len, len, 1, c, s);
+      } else {
+         exp_rotation1(X+i*len, len, 1, c, -s);
+         if (stride2)
+            exp_rotation1(X+i*len, len, stride2, s, -c);
+      }
+   }
+}
+
+/** Takes the pitch vector and the decoded residual vector, computes the gain
+    that will give ||p+g*y||=1 and mixes the residual with the pitch. */
+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
+      int N, opus_val32 Ryy, opus_val16 gain)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 t;
+   opus_val16 g;
+
+#ifdef FIXED_POINT
+   k = celt_ilog2(Ryy)>>1;
+#endif
+   t = VSHR32(Ryy, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   i=0;
+   do
+      X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
+   while (++i < N);
+}
+
+static unsigned extract_collapse_mask(int *iy, int N, int B)
+{
+   unsigned collapse_mask;
+   int N0;
+   int i;
+   if (B<=1)
+      return 1;
+   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+      exp_rotation().*/
+   N0 = celt_udiv(N, B);
+   collapse_mask = 0;
+   i=0; do {
+      int j;
+      unsigned tmp=0;
+      j=0; do {
+         tmp |= iy[i*N0+j];
+      } while (++j<N0);
+      collapse_mask |= (tmp!=0)<<i;
+   } while (++i<B);
+   return collapse_mask;
+}
+
+opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
+{
+   VARDECL(celt_norm, y);
+   VARDECL(int, signx);
+   int i, j;
+   int pulsesLeft;
+   opus_val32 sum;
+   opus_val32 xy;
+   opus_val16 yy;
+   SAVE_STACK;
+
+   (void)arch;
+   ALLOC(y, N, celt_norm);
+   ALLOC(signx, N, int);
+
+   /* Get rid of the sign */
+   sum = 0;
+   j=0; do {
+      signx[j] = X[j]<0;
+      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
+      X[j] = ABS16(X[j]);
+      iy[j] = 0;
+      y[j] = 0;
+   } while (++j<N);
+
+   xy = yy = 0;
+
+   pulsesLeft = K;
+
+   /* Do a pre-search by projecting on the pyramid */
+   if (K > (N>>1))
+   {
+      opus_val16 rcp;
+      j=0; do {
+         sum += X[j];
+      }  while (++j<N);
+
+      /* If X is too small, just replace it with a pulse at 0 */
+#ifdef FIXED_POINT
+      if (sum <= K)
+#else
+      /* Prevents infinities and NaNs from causing too many pulses
+         to be allocated. 64 is an approximation of infinity here. */
+      if (!(sum > EPSILON && sum < 64))
+#endif
+      {
+         X[0] = QCONST16(1.f,14);
+         j=1; do
+            X[j]=0;
+         while (++j<N);
+         sum = QCONST16(1.f,14);
+      }
+#ifdef FIXED_POINT
+      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
+#else
+      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
+      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
+#endif
+      j=0; do {
+#ifdef FIXED_POINT
+         /* It's really important to round *towards zero* here */
+         iy[j] = MULT16_16_Q15(X[j],rcp);
+#else
+         iy[j] = (int)floor(rcp*X[j]);
+#endif
+         y[j] = (celt_norm)iy[j];
+         yy = MAC16_16(yy, y[j],y[j]);
+         xy = MAC16_16(xy, X[j],y[j]);
+         y[j] *= 2;
+         pulsesLeft -= iy[j];
+      }  while (++j<N);
+   }
+   celt_sig_assert(pulsesLeft>=0);
+
+   /* This should never happen, but just in case it does (e.g. on silence)
+      we fill the first bin with pulses. */
+#ifdef FIXED_POINT_DEBUG
+   celt_sig_assert(pulsesLeft<=N+3);
+#endif
+   if (pulsesLeft > N+3)
+   {
+      opus_val16 tmp = (opus_val16)pulsesLeft;
+      yy = MAC16_16(yy, tmp, tmp);
+      yy = MAC16_16(yy, tmp, y[0]);
+      iy[0] += pulsesLeft;
+      pulsesLeft=0;
+   }
+
+   for (i=0;i<pulsesLeft;i++)
+   {
+      opus_val16 Rxy, Ryy;
+      int best_id;
+      opus_val32 best_num;
+      opus_val16 best_den;
+#ifdef FIXED_POINT
+      int rshift;
+#endif
+#ifdef FIXED_POINT
+      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
+#endif
+      best_id = 0;
+      /* The squared magnitude term gets added anyway, so we might as well
+         add it outside the loop */
+      yy = ADD16(yy, 1);
+
+      /* Calculations for position 0 are out of the loop, in part to reduce
+         mispredicted branches (since the if condition is usually false)
+         in the loop. */
+      /* Temporary sums of the new pulse(s) */
+      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
+      /* We're multiplying y[j] by two so we don't have to do it here */
+      Ryy = ADD16(yy, y[0]);
+
+      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
+         Rxy is positive because the sign is pre-computed) */
+      Rxy = MULT16_16_Q15(Rxy,Rxy);
+      best_den = Ryy;
+      best_num = Rxy;
+      j=1;
+      do {
+         /* Temporary sums of the new pulse(s) */
+         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
+         /* We're multiplying y[j] by two so we don't have to do it here */
+         Ryy = ADD16(yy, y[j]);
+
+         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
+            Rxy is positive because the sign is pre-computed) */
+         Rxy = MULT16_16_Q15(Rxy,Rxy);
+         /* The idea is to check for num/den >= best_num/best_den, but that way
+            we can do it without any division */
+         /* OPT: It's not clear whether a cmov is faster than a branch here
+            since the condition is more often false than true and using
+            a cmov introduces data dependencies across iterations. The optimal
+            choice may be architecture-dependent. */
+         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
+         {
+            best_den = Ryy;
+            best_num = Rxy;
+            best_id = j;
+         }
+      } while (++j<N);
+
+      /* Updating the sums of the new pulse(s) */
+      xy = ADD32(xy, EXTEND32(X[best_id]));
+      /* We're multiplying y[j] by two so we don't have to do it here */
+      yy = ADD16(yy, y[best_id]);
+
+      /* Only now that we've made the final choice, update y/iy */
+      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
+      y[best_id] += 2;
+      iy[best_id]++;
+   }
+
+   /* Put the original sign back */
+   j=0;
+   do {
+      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
+      /* OPT: The is more likely to be compiled without a branch than the code above
+         but has the same performance otherwise. */
+      iy[j] = (iy[j]^-signx[j]) + signx[j];
+   } while (++j<N);
+   RESTORE_STACK;
+   return yy;
+}
+
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
+      opus_val16 gain, int resynth, int arch)
+{
+   VARDECL(int, iy);
+   opus_val16 yy;
+   unsigned collapse_mask;
+   SAVE_STACK;
+
+   celt_assert2(K>0, "alg_quant() needs at least one pulse");
+   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
+
+   /* Covers vectorization by up to 4. */
+   ALLOC(iy, N+3, int);
+
+   exp_rotation(X, N, 1, B, K, spread);
+
+   yy = op_pvq_search(X, iy, K, N, arch);
+
+   encode_pulses(iy, N, K, enc);
+
+   if (resynth)
+   {
+      normalise_residual(iy, X, N, yy, gain);
+      exp_rotation(X, N, -1, B, K, spread);
+   }
+
+   collapse_mask = extract_collapse_mask(iy, N, B);
+   RESTORE_STACK;
+   return collapse_mask;
+}
+
+/** Decode pulse vector and combine the result with the pitch vector to produce
+    the final normalised signal in the current band. */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+      ec_dec *dec, opus_val16 gain)
+{
+   opus_val32 Ryy;
+   unsigned collapse_mask;
+   VARDECL(int, iy);
+   SAVE_STACK;
+
+   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
+   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
+   ALLOC(iy, N, int);
+   Ryy = decode_pulses(iy, N, K, dec);
+   normalise_residual(iy, X, N, Ryy, gain);
+   exp_rotation(X, N, -1, B, K, spread);
+   collapse_mask = extract_collapse_mask(iy, N, B);
+   RESTORE_STACK;
+   return collapse_mask;
+}
+
+#ifndef OVERRIDE_renormalise_vector
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 E;
+   opus_val16 g;
+   opus_val32 t;
+   celt_norm *xptr;
+   E = EPSILON + celt_inner_prod(X, X, N, arch);
+#ifdef FIXED_POINT
+   k = celt_ilog2(E)>>1;
+#endif
+   t = VSHR32(E, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   xptr = X;
+   for (i=0;i<N;i++)
+   {
+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+      xptr++;
+   }
+   /*return celt_sqrt(E);*/
+}
+#endif /* OVERRIDE_renormalise_vector */
+
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
+{
+   int i;
+   int itheta;
+   opus_val16 mid, side;
+   opus_val32 Emid, Eside;
+
+   Emid = Eside = EPSILON;
+   if (stereo)
+   {
+      for (i=0;i<N;i++)
+      {
+         celt_norm m, s;
+         m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
+         s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
+         Emid = MAC16_16(Emid, m, m);
+         Eside = MAC16_16(Eside, s, s);
+      }
+   } else {
+      Emid += celt_inner_prod(X, X, N, arch);
+      Eside += celt_inner_prod(Y, Y, N, arch);
+   }
+   mid = celt_sqrt(Emid);
+   side = celt_sqrt(Eside);
+#ifdef FIXED_POINT
+   /* 0.63662 = 2/pi */
+   itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
+#else
+   itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
+#endif
+
+   return itheta;
+}
diff --git a/libopus/celt/vq.h b/libopus/celt/vq.h
new file mode 100644
index 0000000..45ec559
--- /dev/null
+++ b/libopus/celt/vq.h
@@ -0,0 +1,79 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file vq.h
+   @brief Vector quantisation of the residual
+ */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VQ_H
+#define VQ_H
+
+#include "entenc.h"
+#include "entdec.h"
+#include "modes.h"
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT))
+#include "x86/vq_sse.h"
+#endif
+
+void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
+
+opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch);
+
+#if !defined(OVERRIDE_OP_PVQ_SEARCH)
+#define op_pvq_search(x, iy, K, N, arch) \
+    (op_pvq_search_c(x, iy, K, N, arch))
+#endif
+
+/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
+  * the pitch and a combination of pulses such that its norm is still equal
+  * to 1. This is the function that will typically require the most CPU.
+ * @param X Residual signal to quantise/encode (returns quantised version)
+ * @param N Number of samples to encode
+ * @param K Number of pulses to use
+ * @param enc Entropy encoder state
+ * @ret A mask indicating which blocks in the band received pulses
+*/
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
+      opus_val16 gain, int resynth, int arch);
+
+/** Algebraic pulse decoder
+ * @param X Decoded normalised spectrum (returned)
+ * @param N Number of samples to decode
+ * @param K Number of pulses to use
+ * @param dec Entropy decoder state
+ * @ret A mask indicating which blocks in the band received pulses
+ */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+      ec_dec *dec, opus_val16 gain);
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch);
+
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch);
+
+#endif /* VQ_H */
diff --git a/libopus/celt/x86/celt_lpc_sse.h b/libopus/celt/x86/celt_lpc_sse.h
new file mode 100644
index 0000000..7d1ecf7
--- /dev/null
+++ b/libopus/celt/x86/celt_lpc_sse.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_LPC_SSE_H
+#define CELT_LPC_SSE_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+#define OVERRIDE_CELT_FIR
+
+void celt_fir_sse4_1(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         int arch);
+
+#if defined(OPUS_X86_PRESUME_SSE4_1)
+#define celt_fir(x, num, y, N, ord, arch) \
+    ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch))
+
+#else
+
+extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         int arch);
+
+#  define celt_fir(x, num, y, N, ord, arch) \
+    ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch))
+
+#endif
+#endif
+
+#endif
diff --git a/libopus/celt/x86/celt_lpc_sse4_1.c b/libopus/celt/x86/celt_lpc_sse4_1.c
new file mode 100644
index 0000000..5478568
--- /dev/null
+++ b/libopus/celt/x86/celt_lpc_sse4_1.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+#include "x86cpu.h"
+
+#if defined(FIXED_POINT)
+
+void celt_fir_sse4_1(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         int arch)
+{
+    int i,j;
+    VARDECL(opus_val16, rnum);
+
+    __m128i vecNoA;
+    opus_int32 noA ;
+    SAVE_STACK;
+
+   ALLOC(rnum, ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   noA = EXTEND32(1) << SIG_SHIFT >> 1;
+   vecNoA = _mm_set_epi32(noA, noA, noA, noA);
+
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sums[4] = {0};
+      __m128i vecSum, vecX;
+
+      xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
+
+      vecSum = _mm_loadu_si128((__m128i *)sums);
+      vecSum = _mm_add_epi32(vecSum, vecNoA);
+      vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
+      vecX = OP_CVTEPI16_EPI32_M64(x + i);
+      vecSum = _mm_add_epi32(vecSum, vecX);
+      vecSum = _mm_packs_epi32(vecSum, vecSum);
+      _mm_storel_epi64((__m128i *)(y + i), vecSum);
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
+      y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+
+   RESTORE_STACK;
+}
+
+#endif
diff --git a/libopus/celt/x86/pitch_sse.c b/libopus/celt/x86/pitch_sse.c
new file mode 100644
index 0000000..20e7312
--- /dev/null
+++ b/libopus/celt/x86/pitch_sse.c
@@ -0,0 +1,185 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "macros.h"
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
+
+#include <xmmintrin.h>
+#include "arch.h"
+
+void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+{
+   int j;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_loadu_ps(sum);
+   xsum2 = _mm_setzero_ps();
+
+   for (j = 0; j < len-3; j += 4)
+   {
+      __m128 x0 = _mm_loadu_ps(x+j);
+      __m128 yj = _mm_loadu_ps(y+j);
+      __m128 y3 = _mm_loadu_ps(y+j+3);
+
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
+                                          _mm_shuffle_ps(yj,y3,0x49)));
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
+                                          _mm_shuffle_ps(yj,y3,0x9e)));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
+   }
+   if (j < len)
+   {
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+      if (++j < len)
+      {
+         xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         if (++j < len)
+         {
+            xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         }
+      }
+   }
+   _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+}
+
+
+void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_setzero_ps();
+   xsum2 = _mm_setzero_ps();
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 xi = _mm_loadu_ps(x+i);
+      __m128 y1i = _mm_loadu_ps(y01+i);
+      __m128 y2i = _mm_loadu_ps(y02+i);
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
+   }
+   /* Horizontal sum */
+   xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
+   xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
+   _mm_store_ss(xy1, xsum1);
+   xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
+   xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
+   _mm_store_ss(xy2, xsum2);
+   for (;i<N;i++)
+   {
+      *xy1 = MAC16_16(*xy1, x[i], y01[i]);
+      *xy2 = MAC16_16(*xy2, x[i], y02[i]);
+   }
+}
+
+opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
+      int N)
+{
+   int i;
+   float xy;
+   __m128 sum;
+   sum = _mm_setzero_ps();
+   /* FIXME: We should probably go 8-way and use 2 sums. */
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 xi = _mm_loadu_ps(x+i);
+      __m128 yi = _mm_loadu_ps(y+i);
+      sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
+   }
+   /* Horizontal sum */
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&xy, sum);
+   for (;i<N;i++)
+   {
+      xy = MAC16_16(xy, x[i], y[i]);
+   }
+   return xy;
+}
+
+void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   int i;
+   __m128 x0v;
+   __m128 g10v, g11v, g12v;
+   g10v = _mm_load1_ps(&g10);
+   g11v = _mm_load1_ps(&g11);
+   g12v = _mm_load1_ps(&g12);
+   x0v = _mm_loadu_ps(&x[-T-2]);
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 yi, yi2, x1v, x2v, x3v, x4v;
+      const opus_val32 *xp = &x[i-T-2];
+      yi = _mm_loadu_ps(x+i);
+      x4v = _mm_loadu_ps(xp+4);
+#if 0
+      /* Slower version with all loads */
+      x1v = _mm_loadu_ps(xp+1);
+      x2v = _mm_loadu_ps(xp+2);
+      x3v = _mm_loadu_ps(xp+3);
+#else
+      x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
+      x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
+      x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
+#endif
+
+      yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
+#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
+      yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
+      yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+#else
+      /* Use partial sums */
+      yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
+                       _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+      yi = _mm_add_ps(yi, yi2);
+#endif
+      x0v=x4v;
+      _mm_storeu_ps(y+i, yi);
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x[i-T])
+               + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+               + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
+   }
+#endif
+}
+
+
+#endif
diff --git a/libopus/celt/x86/pitch_sse.h b/libopus/celt/x86/pitch_sse.h
new file mode 100644
index 0000000..e5f87ab
--- /dev/null
+++ b/libopus/celt/x86/pitch_sse.h
@@ -0,0 +1,192 @@
+/* Copyright (c) 2013 Jean-Marc Valin and John Ridges
+   Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
+/**
+   @file pitch_sse.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_SSE_H
+#define PITCH_SSE_H
+
+#if defined(HAVE_CONFIG_H)
+#include "config.h"
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+void xcorr_kernel_sse4_1(
+                    const opus_int16 *x,
+                    const opus_int16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
+void xcorr_kernel_sse(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#endif
+
+#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len))
+
+#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)arch, xcorr_kernel_sse(x, y, sum, len))
+
+#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+
+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
+
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse4_1(
+    const opus_int16 *x,
+    const opus_int16 *y,
+    int               N);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse2(
+    const opus_int16 *x,
+    const opus_int16 *y,
+    int               N);
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse(
+    const opus_val16 *x,
+    const opus_val16 *y,
+    int               N);
+#endif
+
+
+#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+    ((void)arch, celt_inner_prod_sse4_1(x, y, N))
+
+#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+    ((void)arch, celt_inner_prod_sse2(x, y, N))
+
+#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+    ((void)arch, celt_inner_prod_sse(x, y, N))
+
+
+#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
+    (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+
+extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    int               N);
+
+#define OVERRIDE_CELT_INNER_PROD
+#define celt_inner_prod(x, y, N, arch) \
+    ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
+
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
+
+#define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_COMB_FILTER_CONST
+
+#undef dual_inner_prod
+#undef comb_filter_const
+
+void dual_inner_prod_sse(const opus_val16 *x,
+    const opus_val16 *y01,
+    const opus_val16 *y02,
+    int               N,
+    opus_val32       *xy1,
+    opus_val32       *xy2);
+
+void comb_filter_const_sse(opus_val32 *y,
+    opus_val32 *x,
+    int         T,
+    int         N,
+    opus_val16  g10,
+    opus_val16  g11,
+    opus_val16  g12);
+
+
+#if defined(OPUS_X86_PRESUME_SSE)
+# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
+    ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
+
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
+    ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
+#else
+
+extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+              const opus_val16 *x,
+              const opus_val16 *y01,
+              const opus_val16 *y02,
+              int               N,
+              opus_val32       *xy1,
+              opus_val32       *xy2);
+
+#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
+    ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
+
+extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
+              opus_val32 *y,
+              opus_val32 *x,
+              int         T,
+              int         N,
+              opus_val16  g10,
+              opus_val16  g11,
+              opus_val16  g12);
+
+#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
+    ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
+
+#define NON_STATIC_COMB_FILTER_CONST_C
+
+#endif
+#endif
+
+#endif
diff --git a/libopus/celt/x86/pitch_sse2.c b/libopus/celt/x86/pitch_sse2.c
new file mode 100644
index 0000000..a0e7d1b
--- /dev/null
+++ b/libopus/celt/x86/pitch_sse2.c
@@ -0,0 +1,95 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "macros.h"
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
+opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y,
+      int N)
+{
+    opus_int  i, dataSize16;
+    opus_int32 sum;
+
+    __m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
+    __m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
+
+    sum = 0;
+    dataSize16 = N & ~15;
+
+    acc1 = _mm_setzero_si128();
+    acc2 = _mm_setzero_si128();
+
+    for (i=0;i<dataSize16;i+=16)
+    {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
+        inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+        inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
+    }
+
+    acc1 = _mm_add_epi32( acc1, acc2 );
+
+    if (N - i >= 8)
+    {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        i += 8;
+    }
+
+    acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1));
+    acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E));
+    sum += _mm_cvtsi128_si32(acc1);
+
+    for (;i<N;i++) {
+        sum = silk_SMLABB(sum, x[i], y[i]);
+    }
+
+    return sum;
+}
+#endif
diff --git a/libopus/celt/x86/pitch_sse4_1.c b/libopus/celt/x86/pitch_sse4_1.c
new file mode 100644
index 0000000..a092c68
--- /dev/null
+++ b/libopus/celt/x86/pitch_sse4_1.c
@@ -0,0 +1,195 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "macros.h"
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
+#include <smmintrin.h>
+#include "x86cpu.h"
+
+opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y,
+      int N)
+{
+    opus_int  i, dataSize16;
+    opus_int32 sum;
+    __m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
+    __m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
+    __m128i inVec1_3210, inVec2_3210;
+
+    sum = 0;
+    dataSize16 = N & ~15;
+
+    acc1 = _mm_setzero_si128();
+    acc2 = _mm_setzero_si128();
+
+    for (i=0;i<dataSize16;i+=16) {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
+        inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+        inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
+    }
+
+    acc1 = _mm_add_epi32(acc1, acc2);
+
+    if (N - i >= 8)
+    {
+        inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
+        inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
+
+        inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_76543210);
+        i += 8;
+    }
+
+    if (N - i >= 4)
+    {
+        inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]);
+        inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]);
+
+        inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210);
+
+        acc1 = _mm_add_epi32(acc1, inVec1_3210);
+        i += 4;
+    }
+
+    acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1));
+    acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E));
+
+    sum += _mm_cvtsi128_si32(acc1);
+
+    for (;i<N;i++)
+    {
+        sum = silk_SMLABB(sum, x[i], y[i]);
+    }
+
+    return sum;
+}
+
+void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len)
+{
+    int j;
+
+    __m128i vecX, vecX0, vecX1, vecX2, vecX3;
+    __m128i vecY0, vecY1, vecY2, vecY3;
+    __m128i sum0, sum1, sum2, sum3, vecSum;
+    __m128i initSum;
+
+    celt_assert(len >= 3);
+
+    sum0 = _mm_setzero_si128();
+    sum1 = _mm_setzero_si128();
+    sum2 = _mm_setzero_si128();
+    sum3 = _mm_setzero_si128();
+
+    for (j=0;j<(len-7);j+=8)
+    {
+        vecX = _mm_loadu_si128((__m128i *)(&x[j + 0]));
+        vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0]));
+        vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1]));
+        vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2]));
+        vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3]));
+
+        sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0));
+        sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1));
+        sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2));
+        sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3));
+    }
+
+    sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0));
+    sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E));
+
+    sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1));
+    sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E));
+
+    sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2));
+    sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E));
+
+    sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3));
+    sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E));
+
+    vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1),
+          _mm_unpacklo_epi32(sum2, sum3));
+
+    for (;j<(len-3);j+=4)
+    {
+        vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
+        vecX0 = _mm_shuffle_epi32(vecX, 0x00);
+        vecX1 = _mm_shuffle_epi32(vecX, 0x55);
+        vecX2 = _mm_shuffle_epi32(vecX, 0xaa);
+        vecX3 = _mm_shuffle_epi32(vecX, 0xff);
+
+        vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
+        vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
+        vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
+        vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]);
+
+        sum0 = _mm_mullo_epi32(vecX0, vecY0);
+        sum1 = _mm_mullo_epi32(vecX1, vecY1);
+        sum2 = _mm_mullo_epi32(vecX2, vecY2);
+        sum3 = _mm_mullo_epi32(vecX3, vecY3);
+
+        sum0 = _mm_add_epi32(sum0, sum1);
+        sum2 = _mm_add_epi32(sum2, sum3);
+        vecSum = _mm_add_epi32(vecSum, sum0);
+        vecSum = _mm_add_epi32(vecSum, sum2);
+    }
+
+    for (;j<len;j++)
+    {
+        vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
+        vecX0 = _mm_shuffle_epi32(vecX, 0x00);
+
+        vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
+
+        sum0 = _mm_mullo_epi32(vecX0, vecY0);
+        vecSum = _mm_add_epi32(vecSum, sum0);
+    }
+
+    initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
+    initSum = _mm_add_epi32(initSum, vecSum);
+    _mm_storeu_si128((__m128i *)sum, initSum);
+}
+#endif
diff --git a/libopus/celt/x86/vq_sse.h b/libopus/celt/x86/vq_sse.h
new file mode 100644
index 0000000..b4efe8f
--- /dev/null
+++ b/libopus/celt/x86/vq_sse.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2016  Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VQ_SSE_H
+#define VQ_SSE_H
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
+#define OVERRIDE_OP_PVQ_SEARCH
+
+opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
+
+#if defined(OPUS_X86_PRESUME_SSE2)
+#define op_pvq_search(x, iy, K, N, arch) \
+    (op_pvq_search_sse2(x, iy, K, N, arch))
+
+#else
+
+extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
+      celt_norm *_X, int *iy, int K, int N, int arch);
+
+#  define op_pvq_search(X, iy, K, N, arch) \
+    ((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch))
+
+#endif
+#endif
+
+#endif
diff --git a/libopus/celt/x86/vq_sse2.c b/libopus/celt/x86/vq_sse2.c
new file mode 100644
index 0000000..7750428
--- /dev/null
+++ b/libopus/celt/x86/vq_sse2.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2016 Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "vq.h"
+#include "x86cpu.h"
+
+
+#ifndef FIXED_POINT
+
+opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
+{
+   int i, j;
+   int pulsesLeft;
+   float xy, yy;
+   VARDECL(celt_norm, y);
+   VARDECL(celt_norm, X);
+   VARDECL(float, signy);
+   __m128 signmask;
+   __m128 sums;
+   __m128i fours;
+   SAVE_STACK;
+
+   (void)arch;
+   /* All bits set to zero, except for the sign bit. */
+   signmask = _mm_set_ps1(-0.f);
+   fours = _mm_set_epi32(4, 4, 4, 4);
+   ALLOC(y, N+3, celt_norm);
+   ALLOC(X, N+3, celt_norm);
+   ALLOC(signy, N+3, float);
+
+   OPUS_COPY(X, _X, N);
+   X[N] = X[N+1] = X[N+2] = 0;
+   sums = _mm_setzero_ps();
+   for (j=0;j<N;j+=4)
+   {
+      __m128 x4, s4;
+      x4 = _mm_loadu_ps(&X[j]);
+      s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
+      /* Get rid of the sign */
+      x4 = _mm_andnot_ps(signmask, x4);
+      sums = _mm_add_ps(sums, x4);
+      /* Clear y and iy in case we don't do the projection. */
+      _mm_storeu_ps(&y[j], _mm_setzero_ps());
+      _mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
+      _mm_storeu_ps(&X[j], x4);
+      _mm_storeu_ps(&signy[j], s4);
+   }
+   sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2)));
+   sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1)));
+
+   xy = yy = 0;
+
+   pulsesLeft = K;
+
+   /* Do a pre-search by projecting on the pyramid */
+   if (K > (N>>1))
+   {
+      __m128i pulses_sum;
+      __m128 yy4, xy4;
+      __m128 rcp4;
+      opus_val32 sum = _mm_cvtss_f32(sums);
+      /* If X is too small, just replace it with a pulse at 0 */
+      /* Prevents infinities and NaNs from causing too many pulses
+         to be allocated. 64 is an approximation of infinity here. */
+      if (!(sum > EPSILON && sum < 64))
+      {
+         X[0] = QCONST16(1.f,14);
+         j=1; do
+            X[j]=0;
+         while (++j<N);
+         sums = _mm_set_ps1(1.f);
+      }
+      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
+      rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums));
+      xy4 = yy4 = _mm_setzero_ps();
+      pulses_sum = _mm_setzero_si128();
+      for (j=0;j<N;j+=4)
+      {
+         __m128 rx4, x4, y4;
+         __m128i iy4;
+         x4 = _mm_loadu_ps(&X[j]);
+         rx4 = _mm_mul_ps(x4, rcp4);
+         iy4 = _mm_cvttps_epi32(rx4);
+         pulses_sum = _mm_add_epi32(pulses_sum, iy4);
+         _mm_storeu_si128((__m128i*)&iy[j], iy4);
+         y4 = _mm_cvtepi32_ps(iy4);
+         xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
+         yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
+         /* double the y[] vector so we don't have to do it in the search loop. */
+         _mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
+      }
+      pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2)));
+      pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1)));
+      pulsesLeft -= _mm_cvtsi128_si32(pulses_sum);
+      xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2)));
+      xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
+      xy = _mm_cvtss_f32(xy4);
+      yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
+      yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
+      yy = _mm_cvtss_f32(yy4);
+   }
+   X[N] = X[N+1] = X[N+2] = -100;
+   y[N] = y[N+1] = y[N+2] = 100;
+   celt_sig_assert(pulsesLeft>=0);
+
+   /* This should never happen, but just in case it does (e.g. on silence)
+      we fill the first bin with pulses. */
+   if (pulsesLeft > N+3)
+   {
+      opus_val16 tmp = (opus_val16)pulsesLeft;
+      yy = MAC16_16(yy, tmp, tmp);
+      yy = MAC16_16(yy, tmp, y[0]);
+      iy[0] += pulsesLeft;
+      pulsesLeft=0;
+   }
+
+   for (i=0;i<pulsesLeft;i++)
+   {
+      int best_id;
+      __m128 xy4, yy4;
+      __m128 max, max2;
+      __m128i count;
+      __m128i pos;
+      /* The squared magnitude term gets added anyway, so we might as well
+         add it outside the loop */
+      yy = ADD16(yy, 1);
+      xy4 = _mm_load1_ps(&xy);
+      yy4 = _mm_load1_ps(&yy);
+      max = _mm_setzero_ps();
+      pos = _mm_setzero_si128();
+      count = _mm_set_epi32(3, 2, 1, 0);
+      for (j=0;j<N;j+=4)
+      {
+         __m128 x4, y4, r4;
+         x4 = _mm_loadu_ps(&X[j]);
+         y4 = _mm_loadu_ps(&y[j]);
+         x4 = _mm_add_ps(x4, xy4);
+         y4 = _mm_add_ps(y4, yy4);
+         y4 = _mm_rsqrt_ps(y4);
+         r4 = _mm_mul_ps(x4, y4);
+         /* Update the index of the max. */
+         pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
+         /* Update the max. */
+         max = _mm_max_ps(max, r4);
+         /* Update the indices (+4) */
+         count = _mm_add_epi32(count, fours);
+      }
+      /* Horizontal max */
+      max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
+      max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
+      /* Now that max2 contains the max at all positions, look at which value(s) of the
+         partial max is equal to the global max. */
+      pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
+      pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
+      pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
+      best_id = _mm_cvtsi128_si32(pos);
+
+      /* Updating the sums of the new pulse(s) */
+      xy = ADD32(xy, EXTEND32(X[best_id]));
+      /* We're multiplying y[j] by two so we don't have to do it here */
+      yy = ADD16(yy, y[best_id]);
+
+      /* Only now that we've made the final choice, update y/iy */
+      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
+      y[best_id] += 2;
+      iy[best_id]++;
+   }
+
+   /* Put the original sign back */
+   for (j=0;j<N;j+=4)
+   {
+      __m128i y4;
+      __m128i s4;
+      y4 = _mm_loadu_si128((__m128i*)&iy[j]);
+      s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
+      y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
+      _mm_storeu_si128((__m128i*)&iy[j], y4);
+   }
+   RESTORE_STACK;
+   return yy;
+}
+
+#endif
diff --git a/libopus/celt/x86/x86_celt_map.c b/libopus/celt/x86/x86_celt_map.c
new file mode 100644
index 0000000..d39d88e
--- /dev/null
+++ b/libopus/celt/x86/x86_celt_map.c
@@ -0,0 +1,167 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+#include "config.h"
+#endif
+
+#include "x86/x86cpu.h"
+#include "celt_lpc.h"
+#include "pitch.h"
+#include "pitch_sse.h"
+#include "vq.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(FIXED_POINT)
+
+#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
+
+void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16       *y,
+         int              N,
+         int              ord,
+         int              arch
+) = {
+  celt_fir_c,                /* non-sse */
+  celt_fir_c,
+  celt_fir_c,
+  MAY_HAVE_SSE4_1(celt_fir), /* sse4.1  */
+  MAY_HAVE_SSE4_1(celt_fir)  /* avx  */
+};
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* non-sse */
+  xcorr_kernel_c,
+  xcorr_kernel_c,
+  MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1  */
+  MAY_HAVE_SSE4_1(xcorr_kernel)  /* avx  */
+};
+
+#endif
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
+ (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
+
+opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         int              N
+) = {
+  celt_inner_prod_c,                /* non-sse */
+  celt_inner_prod_c,
+  MAY_HAVE_SSE2(celt_inner_prod),
+  MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1  */
+  MAY_HAVE_SSE4_1(celt_inner_prod)  /* avx  */
+};
+
+#endif
+
+# else
+
+#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* non-sse */
+  MAY_HAVE_SSE(xcorr_kernel),
+  MAY_HAVE_SSE(xcorr_kernel),
+  MAY_HAVE_SSE(xcorr_kernel),
+  MAY_HAVE_SSE(xcorr_kernel)
+};
+
+opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         int              N
+) = {
+  celt_inner_prod_c,                /* non-sse */
+  MAY_HAVE_SSE(celt_inner_prod),
+  MAY_HAVE_SSE(celt_inner_prod),
+  MAY_HAVE_SSE(celt_inner_prod),
+  MAY_HAVE_SSE(celt_inner_prod)
+};
+
+void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y01,
+                    const opus_val16 *y02,
+                    int               N,
+                    opus_val32       *xy1,
+                    opus_val32       *xy2
+) = {
+  dual_inner_prod_c,                /* non-sse */
+  MAY_HAVE_SSE(dual_inner_prod),
+  MAY_HAVE_SSE(dual_inner_prod),
+  MAY_HAVE_SSE(dual_inner_prod),
+  MAY_HAVE_SSE(dual_inner_prod)
+};
+
+void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
+              opus_val32 *y,
+              opus_val32 *x,
+              int         T,
+              int         N,
+              opus_val16  g10,
+              opus_val16  g11,
+              opus_val16  g12
+) = {
+  comb_filter_const_c,                /* non-sse */
+  MAY_HAVE_SSE(comb_filter_const),
+  MAY_HAVE_SSE(comb_filter_const),
+  MAY_HAVE_SSE(comb_filter_const),
+  MAY_HAVE_SSE(comb_filter_const)
+};
+
+
+#endif
+
+#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
+opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
+      celt_norm *_X, int *iy, int K, int N, int arch
+) = {
+  op_pvq_search_c,                /* non-sse */
+  op_pvq_search_c,
+  MAY_HAVE_SSE2(op_pvq_search),
+  MAY_HAVE_SSE2(op_pvq_search),
+  MAY_HAVE_SSE2(op_pvq_search)
+};
+#endif
+
+#endif
+#endif
diff --git a/libopus/celt/x86/x86cpu.c b/libopus/celt/x86/x86cpu.c
new file mode 100644
index 0000000..080eb25
--- /dev/null
+++ b/libopus/celt/x86/x86cpu.c
@@ -0,0 +1,157 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "cpu_support.h"
+#include "macros.h"
+#include "main.h"
+#include "pitch.h"
+#include "x86cpu.h"
+
+#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
+  (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
+  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+
+
+#if defined(_MSC_VER)
+
+#include <intrin.h>
+static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
+{
+    __cpuid((int*)CPUInfo, InfoType);
+}
+
+#else
+
+#if defined(CPU_INFO_BY_C)
+#include <cpuid.h>
+#endif
+
+static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
+{
+#if defined(CPU_INFO_BY_ASM)
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
+    __asm__ __volatile__ (
+        "xchg %%ebx, %1\n"
+        "cpuid\n"
+        "xchg %%ebx, %1\n":
+        "=a" (CPUInfo[0]),
+        "=r" (CPUInfo[1]),
+        "=c" (CPUInfo[2]),
+        "=d" (CPUInfo[3]) :
+        "0" (InfoType)
+    );
+#else
+    __asm__ __volatile__ (
+        "cpuid":
+        "=a" (CPUInfo[0]),
+        "=b" (CPUInfo[1]),
+        "=c" (CPUInfo[2]),
+        "=d" (CPUInfo[3]) :
+        "0" (InfoType)
+    );
+#endif
+#elif defined(CPU_INFO_BY_C)
+    __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]));
+#endif
+}
+
+#endif
+
+typedef struct CPU_Feature{
+    /*  SIMD: 128-bit */
+    int HW_SSE;
+    int HW_SSE2;
+    int HW_SSE41;
+    /*  SIMD: 256-bit */
+    int HW_AVX;
+} CPU_Feature;
+
+static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
+{
+    unsigned int info[4] = {0};
+    unsigned int nIds = 0;
+
+    cpuid(info, 0);
+    nIds = info[0];
+
+    if (nIds >= 1){
+        cpuid(info, 1);
+        cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
+        cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
+        cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
+        cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
+    }
+    else {
+        cpu_feature->HW_SSE = 0;
+        cpu_feature->HW_SSE2 = 0;
+        cpu_feature->HW_SSE41 = 0;
+        cpu_feature->HW_AVX = 0;
+    }
+}
+
+int opus_select_arch(void)
+{
+    CPU_Feature cpu_feature;
+    int arch;
+
+    opus_cpu_feature_check(&cpu_feature);
+
+    arch = 0;
+    if (!cpu_feature.HW_SSE)
+    {
+       return arch;
+    }
+    arch++;
+
+    if (!cpu_feature.HW_SSE2)
+    {
+       return arch;
+    }
+    arch++;
+
+    if (!cpu_feature.HW_SSE41)
+    {
+        return arch;
+    }
+    arch++;
+
+    if (!cpu_feature.HW_AVX)
+    {
+        return arch;
+    }
+    arch++;
+
+    return arch;
+}
+
+#endif
diff --git a/libopus/celt/x86/x86cpu.h b/libopus/celt/x86/x86cpu.h
new file mode 100644
index 0000000..1e2bf17
--- /dev/null
+++ b/libopus/celt/x86/x86cpu.h
@@ -0,0 +1,95 @@
+/* Copyright (c) 2014, Cisco Systems, INC
+   Written by XiangMingZhu WeiZhou MinPeng YanWang
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(X86CPU_H)
+# define X86CPU_H
+
+# if defined(OPUS_X86_MAY_HAVE_SSE)
+#  define MAY_HAVE_SSE(name) name ## _sse
+# else
+#  define MAY_HAVE_SSE(name) name ## _c
+# endif
+
+# if defined(OPUS_X86_MAY_HAVE_SSE2)
+#  define MAY_HAVE_SSE2(name) name ## _sse2
+# else
+#  define MAY_HAVE_SSE2(name) name ## _c
+# endif
+
+# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#  define MAY_HAVE_SSE4_1(name) name ## _sse4_1
+# else
+#  define MAY_HAVE_SSE4_1(name) name ## _c
+# endif
+
+# if defined(OPUS_X86_MAY_HAVE_AVX)
+#  define MAY_HAVE_AVX(name) name ## _avx
+# else
+#  define MAY_HAVE_AVX(name) name ## _c
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
+int opus_select_arch(void);
+# endif
+
+/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
+  or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
+  actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
+  reference, these require 16-byte alignment and load a full 16 bytes (instead
+  of 4 or 8), possibly reading out of bounds.
+
+  We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
+  _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
+  reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
+  optimize this out when optimizations ARE enabled.
+
+  Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
+  (which is fair, since technically the compiler is always allowed to do the
+  dereference before invoking the function implementing the intrinsic).
+  However, it is smart enough to eliminate the extra MOVD instruction.
+  For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
+  the extra MOVQ if it's specified explicitly */
+
+# if defined(__clang__) || !defined(__OPTIMIZE__)
+#  define OP_CVTEPI8_EPI32_M32(x) \
+ (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
+# else
+#  define OP_CVTEPI8_EPI32_M32(x) \
+ (_mm_cvtepi8_epi32(*(__m128i *)(x)))
+#endif
+
+/* similar reasoning about the instruction sequence as in the 32-bit macro above,
+ */
+# if defined(__clang__) || !defined(__OPTIMIZE__)
+#  define OP_CVTEPI16_EPI32_M64(x) \
+ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
+# else
+#  define OP_CVTEPI16_EPI32_M64(x) \
+ (_mm_cvtepi16_epi32(*(__m128i *)(x)))
+# endif
+
+#endif