mirror of https://github.com/mstorsjo/fdk-aac.git
Merge "UBSAN overflow in FFT"
This commit is contained in:
commit
f59fd73d44
|
@ -1,572 +0,0 @@
|
||||||
/* -----------------------------------------------------------------------------
|
|
||||||
Software License for The Fraunhofer FDK AAC Codec Library for Android
|
|
||||||
|
|
||||||
© Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Förderung der angewandten
|
|
||||||
Forschung e.V. All rights reserved.
|
|
||||||
|
|
||||||
1. INTRODUCTION
|
|
||||||
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
|
|
||||||
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
|
|
||||||
scheme for digital audio. This FDK AAC Codec software is intended to be used on
|
|
||||||
a wide variety of Android devices.
|
|
||||||
|
|
||||||
AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
|
|
||||||
general perceptual audio codecs. AAC-ELD is considered the best-performing
|
|
||||||
full-bandwidth communications codec by independent studies and is widely
|
|
||||||
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
|
|
||||||
specifications.
|
|
||||||
|
|
||||||
Patent licenses for necessary patent claims for the FDK AAC Codec (including
|
|
||||||
those of Fraunhofer) may be obtained through Via Licensing
|
|
||||||
(www.vialicensing.com) or through the respective patent owners individually for
|
|
||||||
the purpose of encoding or decoding bit streams in products that are compliant
|
|
||||||
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
|
|
||||||
Android devices already license these patent claims through Via Licensing or
|
|
||||||
directly from the patent owners, and therefore FDK AAC Codec software may
|
|
||||||
already be covered under those patent licenses when it is used for those
|
|
||||||
licensed purposes only.
|
|
||||||
|
|
||||||
Commercially-licensed AAC software libraries, including floating-point versions
|
|
||||||
with enhanced sound quality, are also available from Fraunhofer. Users are
|
|
||||||
encouraged to check the Fraunhofer website for additional applications
|
|
||||||
information and documentation.
|
|
||||||
|
|
||||||
2. COPYRIGHT LICENSE
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
are permitted without payment of copyright license fees provided that you
|
|
||||||
satisfy the following conditions:
|
|
||||||
|
|
||||||
You must retain the complete text of this software license in redistributions of
|
|
||||||
the FDK AAC Codec or your modifications thereto in source code form.
|
|
||||||
|
|
||||||
You must retain the complete text of this software license in the documentation
|
|
||||||
and/or other materials provided with redistributions of the FDK AAC Codec or
|
|
||||||
your modifications thereto in binary form. You must make available free of
|
|
||||||
charge copies of the complete source code of the FDK AAC Codec and your
|
|
||||||
modifications thereto to recipients of copies in binary form.
|
|
||||||
|
|
||||||
The name of Fraunhofer may not be used to endorse or promote products derived
|
|
||||||
from this library without prior written permission.
|
|
||||||
|
|
||||||
You may not charge copyright license fees for anyone to use, copy or distribute
|
|
||||||
the FDK AAC Codec software or your modifications thereto.
|
|
||||||
|
|
||||||
Your modified versions of the FDK AAC Codec must carry prominent notices stating
|
|
||||||
that you changed the software and the date of any change. For modified versions
|
|
||||||
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
|
|
||||||
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
|
|
||||||
AAC Codec Library for Android."
|
|
||||||
|
|
||||||
3. NO PATENT LICENSE
|
|
||||||
|
|
||||||
NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
|
|
||||||
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
|
|
||||||
Fraunhofer provides no warranty of patent non-infringement with respect to this
|
|
||||||
software.
|
|
||||||
|
|
||||||
You may use this FDK AAC Codec software or modifications thereto only for
|
|
||||||
purposes that are authorized by appropriate patent licenses.
|
|
||||||
|
|
||||||
4. DISCLAIMER
|
|
||||||
|
|
||||||
This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
|
|
||||||
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
||||||
including but not limited to the implied warranties of merchantability and
|
|
||||||
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
||||||
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
|
|
||||||
or consequential damages, including but not limited to procurement of substitute
|
|
||||||
goods or services; loss of use, data, or profits, or business interruption,
|
|
||||||
however caused and on any theory of liability, whether in contract, strict
|
|
||||||
liability, or tort (including negligence), arising in any way out of the use of
|
|
||||||
this software, even if advised of the possibility of such damage.
|
|
||||||
|
|
||||||
5. CONTACT INFORMATION
|
|
||||||
|
|
||||||
Fraunhofer Institute for Integrated Circuits IIS
|
|
||||||
Attention: Audio and Multimedia Departments - FDK AAC LL
|
|
||||||
Am Wolfsmantel 33
|
|
||||||
91058 Erlangen, Germany
|
|
||||||
|
|
||||||
www.iis.fraunhofer.de/amm
|
|
||||||
amm-info@iis.fraunhofer.de
|
|
||||||
----------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
/******************* Library for basic calculation routines ********************
|
|
||||||
|
|
||||||
Author(s):
|
|
||||||
|
|
||||||
Description:
|
|
||||||
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
#ifdef FUNCTION_dct_IV_func1
|
|
||||||
|
|
||||||
/*
|
|
||||||
Note: This assembler routine is here, because the ARM926 compiler does
|
|
||||||
not encode the inline assembler with optimal speed.
|
|
||||||
With this version, we save 2 cycles per loop iteration.
|
|
||||||
*/
|
|
||||||
|
|
||||||
__asm void dct_IV_func1(int i, const FIXP_SPK *twiddle,
|
|
||||||
FIXP_DBL *RESTRICT pDat_0, FIXP_DBL *RESTRICT pDat_1) {
|
|
||||||
/* Register map:
|
|
||||||
r0 i
|
|
||||||
r1 twiddle
|
|
||||||
r2 pDat_0
|
|
||||||
r3 pDat_1
|
|
||||||
r4 accu1
|
|
||||||
r5 accu2
|
|
||||||
r6 accu3
|
|
||||||
r7 accu4
|
|
||||||
r8 val_tw
|
|
||||||
r9 accuX
|
|
||||||
*/
|
|
||||||
PUSH{r4 - r9}
|
|
||||||
|
|
||||||
/* 44 cycles for 2 iterations = 22 cycles/iteration */
|
|
||||||
dct_IV_loop1_start
|
|
||||||
/* First iteration */
|
|
||||||
LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++;
|
|
||||||
LDR r5,
|
|
||||||
[ r2, #0 ] // accu2 = pDat_0[0]
|
|
||||||
LDR r4,
|
|
||||||
[ r3, #0 ] // accu1 = pDat_1[0]
|
|
||||||
|
|
||||||
SMULWT r9,
|
|
||||||
r5,
|
|
||||||
r8 // accuX = accu2*val_tw.l
|
|
||||||
SMULWB r5,
|
|
||||||
r5,
|
|
||||||
r8 // accu2 = accu2*val_tw.h
|
|
||||||
RSB r9,
|
|
||||||
r9,
|
|
||||||
# 0 // accuX =-accu2*val_tw.l
|
|
||||||
SMLAWT r5, r4, r8,
|
|
||||||
r5 // accu2 = accu2*val_tw.h + accu1*val_tw.l
|
|
||||||
SMLAWB r4,
|
|
||||||
r4, r8,
|
|
||||||
r9 // accu1 = accu1*val_tw.h - accu2*val_tw.l
|
|
||||||
|
|
||||||
LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++;
|
|
||||||
LDR r7,
|
|
||||||
[ r3, # - 4 ] // accu4 = pDat_1[-1]
|
|
||||||
LDR r6,
|
|
||||||
[ r2, #4 ] // accu3 = pDat_0[1]
|
|
||||||
|
|
||||||
SMULWB r9,
|
|
||||||
r7,
|
|
||||||
r8 // accuX = accu4*val_tw.h
|
|
||||||
SMULWT r7,
|
|
||||||
r7,
|
|
||||||
r8 // accu4 = accu4*val_tw.l
|
|
||||||
RSB r9,
|
|
||||||
r9,
|
|
||||||
# 0 // accuX =-accu4*val_tw.h
|
|
||||||
SMLAWB r7, r6, r8,
|
|
||||||
r7 // accu4 = accu4*val_tw.l+accu3*val_tw.h
|
|
||||||
SMLAWT r6,
|
|
||||||
r6, r8,
|
|
||||||
r9 // accu3 = accu3*val_tw.l-accu4*val_tw.h
|
|
||||||
|
|
||||||
STR r5,
|
|
||||||
[r2],
|
|
||||||
# 4 // *pDat_0++ = accu2
|
|
||||||
STR r4, [r2],
|
|
||||||
# 4 // *pDat_0++ = accu1
|
|
||||||
STR r6, [r3],
|
|
||||||
#- 4 // *pDat_1-- = accu3
|
|
||||||
STR r7, [r3],
|
|
||||||
#- 4 // *pDat_1-- = accu4
|
|
||||||
|
|
||||||
/* Second iteration */
|
|
||||||
LDR r8, [r1],
|
|
||||||
# 4 // val_tw = *twiddle++;
|
|
||||||
LDR r5,
|
|
||||||
[ r2, #0 ] // accu2 = pDat_0[0]
|
|
||||||
LDR r4,
|
|
||||||
[ r3, #0 ] // accu1 = pDat_1[0]
|
|
||||||
|
|
||||||
SMULWT r9,
|
|
||||||
r5,
|
|
||||||
r8 // accuX = accu2*val_tw.l
|
|
||||||
SMULWB r5,
|
|
||||||
r5,
|
|
||||||
r8 // accu2 = accu2*val_tw.h
|
|
||||||
RSB r9,
|
|
||||||
r9,
|
|
||||||
# 0 // accuX =-accu2*val_tw.l
|
|
||||||
SMLAWT r5, r4, r8,
|
|
||||||
r5 // accu2 = accu2*val_tw.h + accu1*val_tw.l
|
|
||||||
SMLAWB r4,
|
|
||||||
r4, r8,
|
|
||||||
r9 // accu1 = accu1*val_tw.h - accu2*val_tw.l
|
|
||||||
|
|
||||||
LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++;
|
|
||||||
LDR r7,
|
|
||||||
[ r3, # - 4 ] // accu4 = pDat_1[-1]
|
|
||||||
LDR r6,
|
|
||||||
[ r2, #4 ] // accu3 = pDat_0[1]
|
|
||||||
|
|
||||||
SMULWB r9,
|
|
||||||
r7,
|
|
||||||
r8 // accuX = accu4*val_tw.h
|
|
||||||
SMULWT r7,
|
|
||||||
r7,
|
|
||||||
r8 // accu4 = accu4*val_tw.l
|
|
||||||
RSB r9,
|
|
||||||
r9,
|
|
||||||
# 0 // accuX =-accu4*val_tw.h
|
|
||||||
SMLAWB r7, r6, r8,
|
|
||||||
r7 // accu4 = accu4*val_tw.l+accu3*val_tw.h
|
|
||||||
SMLAWT r6,
|
|
||||||
r6, r8,
|
|
||||||
r9 // accu3 = accu3*val_tw.l-accu4*val_tw.h
|
|
||||||
|
|
||||||
STR r5,
|
|
||||||
[r2],
|
|
||||||
# 4 // *pDat_0++ = accu2
|
|
||||||
STR r4, [r2],
|
|
||||||
# 4 // *pDat_0++ = accu1
|
|
||||||
STR r6, [r3],
|
|
||||||
#- 4 // *pDat_1-- = accu3
|
|
||||||
STR r7, [r3],
|
|
||||||
#- 4 // *pDat_1-- = accu4
|
|
||||||
|
|
||||||
SUBS r0, r0,
|
|
||||||
# 1 BNE dct_IV_loop1_start
|
|
||||||
|
|
||||||
POP { r4 - r9 }
|
|
||||||
|
|
||||||
BX lr
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* FUNCTION_dct_IV_func1 */
|
|
||||||
|
|
||||||
#ifdef FUNCTION_dct_IV_func2
|
|
||||||
|
|
||||||
/* __attribute__((noinline)) */
|
|
||||||
static inline void dct_IV_func2(int i, const FIXP_SPK *twiddle,
|
|
||||||
FIXP_DBL *pDat_0, FIXP_DBL *pDat_1, int inc) {
|
|
||||||
FIXP_DBL accu1, accu2, accu3, accu4, accuX;
|
|
||||||
LONG val_tw;
|
|
||||||
|
|
||||||
accu1 = pDat_1[-2];
|
|
||||||
accu2 = pDat_1[-1];
|
|
||||||
|
|
||||||
*--pDat_1 = -(pDat_0[1] >> 1);
|
|
||||||
*pDat_0++ = (pDat_0[0] >> 1);
|
|
||||||
|
|
||||||
twiddle += inc;
|
|
||||||
|
|
||||||
__asm {
|
|
||||||
LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
|
|
||||||
B dct_IV_loop2_2nd_part
|
|
||||||
|
|
||||||
/* 42 cycles for 2 iterations = 21 cycles/iteration */
|
|
||||||
dct_IV_loop2:
|
|
||||||
SMULWT accuX, accu2, val_tw
|
|
||||||
SMULWB accu2, accu2, val_tw
|
|
||||||
RSB accuX, accuX, #0
|
|
||||||
SMLAWB accuX, accu1, val_tw, accuX
|
|
||||||
SMLAWT accu2, accu1, val_tw, accu2
|
|
||||||
STR accuX, [pDat_0], #4
|
|
||||||
STR accu2, [pDat_1, #-4] !
|
|
||||||
|
|
||||||
LDR accu4, [pDat_0, #4]
|
|
||||||
LDR accu3, [pDat_0]
|
|
||||||
SMULWB accuX, accu4, val_tw
|
|
||||||
SMULWT accu4, accu4, val_tw
|
|
||||||
RSB accuX, accuX, #0
|
|
||||||
SMLAWT accuX, accu3, val_tw, accuX
|
|
||||||
SMLAWB accu4, accu3, val_tw, accu4
|
|
||||||
|
|
||||||
LDR accu1, [pDat_1, #-8]
|
|
||||||
LDR accu2, [pDat_1, #-4]
|
|
||||||
|
|
||||||
LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
|
|
||||||
|
|
||||||
STR accuX, [pDat_1, #-4] !
|
|
||||||
STR accu4, [pDat_0], #4
|
|
||||||
|
|
||||||
dct_IV_loop2_2nd_part:
|
|
||||||
SMULWT accuX, accu2, val_tw
|
|
||||||
SMULWB accu2, accu2, val_tw
|
|
||||||
RSB accuX, accuX, #0
|
|
||||||
SMLAWB accuX, accu1, val_tw, accuX
|
|
||||||
SMLAWT accu2, accu1, val_tw, accu2
|
|
||||||
STR accuX, [pDat_0], #4
|
|
||||||
STR accu2, [pDat_1, #-4] !
|
|
||||||
|
|
||||||
LDR accu4, [pDat_0, #4]
|
|
||||||
LDR accu3, [pDat_0]
|
|
||||||
SMULWB accuX, accu4, val_tw
|
|
||||||
SMULWT accu4, accu4, val_tw
|
|
||||||
RSB accuX, accuX, #0
|
|
||||||
SMLAWT accuX, accu3, val_tw, accuX
|
|
||||||
SMLAWB accu4, accu3, val_tw, accu4
|
|
||||||
|
|
||||||
LDR accu1, [pDat_1, #-8]
|
|
||||||
LDR accu2, [pDat_1, #-4]
|
|
||||||
|
|
||||||
STR accuX, [pDat_1, #-4] !
|
|
||||||
STR accu4, [pDat_0], #4
|
|
||||||
|
|
||||||
LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
|
|
||||||
|
|
||||||
SUBS i, i, #1
|
|
||||||
BNE dct_IV_loop2
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Last Sin and Cos value pair are the same */
|
|
||||||
accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
|
|
||||||
accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
|
|
||||||
|
|
||||||
*--pDat_1 = accu1 + accu2;
|
|
||||||
*pDat_0++ = accu1 - accu2;
|
|
||||||
}
|
|
||||||
#endif /* FUNCTION_dct_IV_func2 */
|
|
||||||
|
|
||||||
#ifdef FUNCTION_dst_IV_func1
|
|
||||||
|
|
||||||
__asm void dst_IV_func1(int i, const FIXP_SPK *twiddle, FIXP_DBL *pDat_0,
|
|
||||||
FIXP_DBL *pDat_1) {
|
|
||||||
/* Register map:
|
|
||||||
r0 i
|
|
||||||
r1 twiddle
|
|
||||||
r2 pDat_0
|
|
||||||
r3 pDat_1
|
|
||||||
r4 accu1
|
|
||||||
r5 accu2
|
|
||||||
r6 accu3
|
|
||||||
r7 accu4
|
|
||||||
r8 val_tw
|
|
||||||
r9 accuX
|
|
||||||
*/
|
|
||||||
PUSH{r4 - r9}
|
|
||||||
|
|
||||||
dst_IV_loop1 LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++
|
|
||||||
LDR r5,
|
|
||||||
[r2] // accu2 = pDat_0[0]
|
|
||||||
LDR r6,
|
|
||||||
[ r2, #4 ] // accu3 = pDat_0[1]
|
|
||||||
RSB r5,
|
|
||||||
r5,
|
|
||||||
# 0 // accu2 = -accu2
|
|
||||||
SMULWT r9, r5,
|
|
||||||
r8 // accuX = (-accu2)*val_tw.l
|
|
||||||
LDR r4,
|
|
||||||
[ r3, # - 4 ] // accu1 = pDat_1[-1]
|
|
||||||
RSB r9,
|
|
||||||
r9,
|
|
||||||
# 0 // accuX = -(-accu2)*val_tw.l
|
|
||||||
SMLAWB r9, r4, r8,
|
|
||||||
r9 // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
|
|
||||||
SMULWT r4,
|
|
||||||
r4,
|
|
||||||
r8 // accu1 = accu1*val_tw.l
|
|
||||||
LDR r7,
|
|
||||||
[ r3, # - 8 ] // accu4 = pDat_1[-2]
|
|
||||||
SMLAWB r5,
|
|
||||||
r5, r8,
|
|
||||||
r4 // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
|
|
||||||
LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++
|
|
||||||
STR r5, [r2],
|
|
||||||
# 4 // *pDat_0++ = accu2
|
|
||||||
STR r9, [r2],
|
|
||||||
# 4 // *pDat_0++ = accu1 (accuX)
|
|
||||||
RSB r7, r7,
|
|
||||||
# 0 // accu4 = -accu4
|
|
||||||
SMULWB r5, r7,
|
|
||||||
r8 // accu2 = (-accu4)*val_tw.h
|
|
||||||
SMULWB r4,
|
|
||||||
r6,
|
|
||||||
r8 // accu1 = (-accu4)*val_tw.l
|
|
||||||
RSB r5,
|
|
||||||
r5,
|
|
||||||
# 0 // accu2 = -(-accu4)*val_tw.h
|
|
||||||
SMLAWT r6, r6, r8,
|
|
||||||
r5 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
|
|
||||||
SMLAWT r7,
|
|
||||||
r7, r8,
|
|
||||||
r4 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
|
|
||||||
STR r6,
|
|
||||||
[ r3, # - 4 ] ! // *--pDat_1 = accu3
|
|
||||||
STR r7,
|
|
||||||
[ r3, # - 4 ] ! // *--pDat_1 = accu4
|
|
||||||
|
|
||||||
LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++
|
|
||||||
LDR r5,
|
|
||||||
[r2] // accu2 = pDat_0[0]
|
|
||||||
LDR r6,
|
|
||||||
[ r2, #4 ] // accu3 = pDat_0[1]
|
|
||||||
RSB r5,
|
|
||||||
r5,
|
|
||||||
# 0 // accu2 = -accu2
|
|
||||||
SMULWT r9, r5,
|
|
||||||
r8 // accuX = (-accu2)*val_tw.l
|
|
||||||
LDR r4,
|
|
||||||
[ r3, # - 4 ] // accu1 = pDat_1[-1]
|
|
||||||
RSB r9,
|
|
||||||
r9,
|
|
||||||
# 0 // accuX = -(-accu2)*val_tw.l
|
|
||||||
SMLAWB r9, r4, r8,
|
|
||||||
r9 // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
|
|
||||||
SMULWT r4,
|
|
||||||
r4,
|
|
||||||
r8 // accu1 = accu1*val_tw.l
|
|
||||||
LDR r7,
|
|
||||||
[ r3, # - 8 ] // accu4 = pDat_1[-2]
|
|
||||||
SMLAWB r5,
|
|
||||||
r5, r8,
|
|
||||||
r4 // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
|
|
||||||
LDR r8,
|
|
||||||
[r1],
|
|
||||||
# 4 // val_tw = *twiddle++
|
|
||||||
STR r5, [r2],
|
|
||||||
# 4 // *pDat_0++ = accu2
|
|
||||||
STR r9, [r2],
|
|
||||||
# 4 // *pDat_0++ = accu1 (accuX)
|
|
||||||
RSB r7, r7,
|
|
||||||
# 0 // accu4 = -accu4
|
|
||||||
SMULWB r5, r7,
|
|
||||||
r8 // accu2 = (-accu4)*val_tw.h
|
|
||||||
SMULWB r4,
|
|
||||||
r6,
|
|
||||||
r8 // accu1 = (-accu4)*val_tw.l
|
|
||||||
RSB r5,
|
|
||||||
r5,
|
|
||||||
# 0 // accu2 = -(-accu4)*val_tw.h
|
|
||||||
SMLAWT r6, r6, r8,
|
|
||||||
r5 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
|
|
||||||
SMLAWT r7,
|
|
||||||
r7, r8,
|
|
||||||
r4 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
|
|
||||||
STR r6,
|
|
||||||
[ r3, # - 4 ] ! // *--pDat_1 = accu3
|
|
||||||
STR r7,
|
|
||||||
[ r3, # - 4 ] ! // *--pDat_1 = accu4
|
|
||||||
|
|
||||||
SUBS r0,
|
|
||||||
r0,
|
|
||||||
# 4 // i-= 4
|
|
||||||
BNE dst_IV_loop1
|
|
||||||
|
|
||||||
POP{r4 - r9} BX lr
|
|
||||||
}
|
|
||||||
#endif /* FUNCTION_dst_IV_func1 */
|
|
||||||
|
|
||||||
#ifdef FUNCTION_dst_IV_func2
|
|
||||||
|
|
||||||
/* __attribute__((noinline)) */
|
|
||||||
static inline void dst_IV_func2(int i, const FIXP_SPK *twiddle,
|
|
||||||
FIXP_DBL *RESTRICT pDat_0,
|
|
||||||
FIXP_DBL *RESTRICT pDat_1, int inc) {
|
|
||||||
FIXP_DBL accu1, accu2, accu3, accu4;
|
|
||||||
LONG val_tw;
|
|
||||||
|
|
||||||
accu4 = pDat_0[0];
|
|
||||||
accu3 = pDat_0[1];
|
|
||||||
accu4 >>= 1;
|
|
||||||
accu3 >>= 1;
|
|
||||||
accu4 = -accu4;
|
|
||||||
|
|
||||||
accu1 = pDat_1[-1];
|
|
||||||
accu2 = pDat_1[0];
|
|
||||||
|
|
||||||
*pDat_0++ = accu3;
|
|
||||||
*pDat_1-- = accu4;
|
|
||||||
|
|
||||||
__asm {
|
|
||||||
B dst_IV_loop2_2nd_part
|
|
||||||
|
|
||||||
/* 50 cycles for 2 iterations = 25 cycles/iteration */
|
|
||||||
|
|
||||||
dst_IV_loop2:
|
|
||||||
|
|
||||||
LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
|
|
||||||
|
|
||||||
RSB accu2, accu2, #0 // accu2 = -accu2
|
|
||||||
RSB accu1, accu1, #0 // accu1 = -accu1
|
|
||||||
SMULWT accu3, accu2, val_tw // accu3 = (-accu2)*val_tw.l
|
|
||||||
SMULWT accu4, accu1, val_tw // accu4 = (-accu1)*val_tw.l
|
|
||||||
RSB accu3, accu3, #0 // accu3 = -accu2*val_tw.l
|
|
||||||
SMLAWB accu1, accu1, val_tw, accu3 // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
|
|
||||||
SMLAWB accu2, accu2, val_tw, accu4 // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
|
|
||||||
STR accu1, [pDat_1], #-4 // *pDat_1-- = accu1
|
|
||||||
STR accu2, [pDat_0], #4 // *pDat_0++ = accu2
|
|
||||||
|
|
||||||
LDR accu4, [pDat_0] // accu4 = pDat_0[0]
|
|
||||||
LDR accu3, [pDat_0, #4] // accu3 = pDat_0[1]
|
|
||||||
|
|
||||||
RSB accu4, accu4, #0 // accu4 = -accu4
|
|
||||||
RSB accu3, accu3, #0 // accu3 = -accu3
|
|
||||||
|
|
||||||
SMULWB accu1, accu3, val_tw // accu1 = (-accu3)*val_tw.h
|
|
||||||
SMULWT accu2, accu3, val_tw // accu2 = (-accu3)*val_tw.l
|
|
||||||
RSB accu1, accu1, #0 // accu1 = -(-accu3)*val_tw.h
|
|
||||||
SMLAWT accu3, accu4, val_tw, accu1 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
|
|
||||||
SMLAWB accu4, accu4, val_tw, accu2 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
|
|
||||||
|
|
||||||
LDR accu1, [pDat_1, #-4] // accu1 = pDat_1[-1]
|
|
||||||
LDR accu2, [pDat_1] // accu2 = pDat_1[0]
|
|
||||||
|
|
||||||
STR accu3, [pDat_0], #4 // *pDat_0++ = accu3
|
|
||||||
STR accu4, [pDat_1], #-4 // *pDat_1-- = accu4
|
|
||||||
|
|
||||||
dst_IV_loop2_2nd_part:
|
|
||||||
|
|
||||||
LDR val_tw, [twiddle], inc, LSL #2 // val_tw = *twiddle; twiddle += inc
|
|
||||||
|
|
||||||
RSB accu2, accu2, #0 // accu2 = -accu2
|
|
||||||
RSB accu1, accu1, #0 // accu1 = -accu1
|
|
||||||
SMULWT accu3, accu2, val_tw // accu3 = (-accu2)*val_tw.l
|
|
||||||
SMULWT accu4, accu1, val_tw // accu4 = (-accu1)*val_tw.l
|
|
||||||
RSB accu3, accu3, #0 // accu3 = -accu2*val_tw.l
|
|
||||||
SMLAWB accu1, accu1, val_tw, accu3 // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
|
|
||||||
SMLAWB accu2, accu2, val_tw, accu4 // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
|
|
||||||
STR accu1, [pDat_1], #-4 // *pDat_1-- = accu1
|
|
||||||
STR accu2, [pDat_0], #4 // *pDat_0++ = accu2
|
|
||||||
|
|
||||||
LDR accu4, [pDat_0] // accu4 = pDat_0[0]
|
|
||||||
LDR accu3, [pDat_0, #4] // accu3 = pDat_0[1]
|
|
||||||
|
|
||||||
RSB accu4, accu4, #0 // accu4 = -accu4
|
|
||||||
RSB accu3, accu3, #0 // accu3 = -accu3
|
|
||||||
|
|
||||||
SMULWB accu1, accu3, val_tw // accu1 = (-accu3)*val_tw.h
|
|
||||||
SMULWT accu2, accu3, val_tw // accu2 = (-accu3)*val_tw.l
|
|
||||||
RSB accu1, accu1, #0 // accu1 = -(-accu3)*val_tw.h
|
|
||||||
SMLAWT accu3, accu4, val_tw, accu1 // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
|
|
||||||
SMLAWB accu4, accu4, val_tw, accu2 // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
|
|
||||||
|
|
||||||
LDR accu1, [pDat_1, #-4] // accu1 = pDat_1[-1]
|
|
||||||
LDR accu2, [pDat_1] // accu2 = pDat_1[0]
|
|
||||||
|
|
||||||
STR accu3, [pDat_0], #4 // *pDat_0++ = accu3
|
|
||||||
STR accu4, [pDat_1], #-4 // *pDat_1-- = accu4
|
|
||||||
|
|
||||||
SUBS i, i, #1
|
|
||||||
BNE dst_IV_loop2
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Last Sin and Cos value pair are the same */
|
|
||||||
accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
|
|
||||||
accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
|
|
||||||
|
|
||||||
*pDat_0 = accu1 + accu2;
|
|
||||||
*pDat_1 = accu1 - accu2;
|
|
||||||
}
|
|
||||||
#endif /* FUNCTION_dst_IV_func2 */
|
|
|
@ -124,10 +124,6 @@ amm-info@iis.fraunhofer.de
|
||||||
#include "FDK_tools_rom.h"
|
#include "FDK_tools_rom.h"
|
||||||
#include "fft.h"
|
#include "fft.h"
|
||||||
|
|
||||||
#if defined(__arm__)
|
|
||||||
#include "arm/dct_arm.cpp"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle,
|
void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle,
|
||||||
int *sin_step, int length) {
|
int *sin_step, int length) {
|
||||||
const FIXP_WTP *twiddle;
|
const FIXP_WTP *twiddle;
|
||||||
|
@ -387,12 +383,6 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
|
|
||||||
dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
|
dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
|
||||||
|
|
||||||
#ifdef FUNCTION_dct_IV_func1
|
|
||||||
if (M >= 4 && (M & 3) == 0) {
|
|
||||||
/* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */
|
|
||||||
dct_IV_func1(M >> 2, twiddle, &pDat[0], &pDat[L - 1]);
|
|
||||||
} else
|
|
||||||
#endif /* FUNCTION_dct_IV_func1 */
|
|
||||||
{
|
{
|
||||||
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
|
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
|
||||||
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
|
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
|
||||||
|
@ -410,10 +400,10 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
||||||
cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
|
cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
|
||||||
|
|
||||||
pDat_0[0] = accu2;
|
pDat_0[0] = accu2 >> 1;
|
||||||
pDat_0[1] = accu1;
|
pDat_0[1] = accu1 >> 1;
|
||||||
pDat_1[0] = accu4;
|
pDat_1[0] = accu4 >> 1;
|
||||||
pDat_1[1] = -accu3;
|
pDat_1[1] = -(accu3 >> 1);
|
||||||
}
|
}
|
||||||
if (M & 1) {
|
if (M & 1) {
|
||||||
FIXP_DBL accu1, accu2;
|
FIXP_DBL accu1, accu2;
|
||||||
|
@ -423,19 +413,13 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
|
|
||||||
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
||||||
|
|
||||||
pDat_0[0] = accu2;
|
pDat_0[0] = accu2 >> 1;
|
||||||
pDat_0[1] = accu1;
|
pDat_0[1] = accu1 >> 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fft(M, pDat, pDat_e);
|
fft(M, pDat, pDat_e);
|
||||||
|
|
||||||
#ifdef FUNCTION_dct_IV_func2
|
|
||||||
if (M >= 4 && (M & 3) == 0) {
|
|
||||||
/* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */
|
|
||||||
dct_IV_func2(M >> 2, sin_twiddle, &pDat[0], &pDat[L], sin_step);
|
|
||||||
} else
|
|
||||||
#endif /* FUNCTION_dct_IV_func2 */
|
|
||||||
{
|
{
|
||||||
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
|
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
|
||||||
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
|
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
|
||||||
|
@ -446,20 +430,19 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
accu1 = pDat_1[0];
|
accu1 = pDat_1[0];
|
||||||
accu2 = pDat_1[1];
|
accu2 = pDat_1[1];
|
||||||
|
|
||||||
pDat_1[1] = -(pDat_0[1] >> 1);
|
pDat_1[1] = -pDat_0[1];
|
||||||
pDat_0[0] = (pDat_0[0] >> 1);
|
|
||||||
|
|
||||||
/* 28 cycles for ARM926 */
|
/* 28 cycles for ARM926 */
|
||||||
for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
|
for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
|
||||||
FIXP_STP twd = sin_twiddle[idx];
|
FIXP_STP twd = sin_twiddle[idx];
|
||||||
cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
|
cplxMult(&accu3, &accu4, accu1, accu2, twd);
|
||||||
pDat_0[1] = accu3;
|
pDat_0[1] = accu3;
|
||||||
pDat_1[0] = accu4;
|
pDat_1[0] = accu4;
|
||||||
|
|
||||||
pDat_0 += 2;
|
pDat_0 += 2;
|
||||||
pDat_1 -= 2;
|
pDat_1 -= 2;
|
||||||
|
|
||||||
cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
|
cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
|
||||||
|
|
||||||
accu1 = pDat_1[0];
|
accu1 = pDat_1[0];
|
||||||
accu2 = pDat_1[1];
|
accu2 = pDat_1[1];
|
||||||
|
@ -470,8 +453,8 @@ void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
|
|
||||||
if ((M & 1) == 0) {
|
if ((M & 1) == 0) {
|
||||||
/* Last Sin and Cos value pair are the same */
|
/* Last Sin and Cos value pair are the same */
|
||||||
accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
|
accu1 = fMult(accu1, WTC(0x5a82799a));
|
||||||
accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
|
accu2 = fMult(accu2, WTC(0x5a82799a));
|
||||||
|
|
||||||
pDat_1[0] = accu1 + accu2;
|
pDat_1[0] = accu1 + accu2;
|
||||||
pDat_0[1] = accu1 - accu2;
|
pDat_0[1] = accu1 - accu2;
|
||||||
|
@ -497,11 +480,6 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
|
|
||||||
dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
|
dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
|
||||||
|
|
||||||
#ifdef FUNCTION_dst_IV_func1
|
|
||||||
if ((M >= 4) && ((M & 3) == 0)) {
|
|
||||||
dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
|
FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
|
||||||
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
|
FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
|
||||||
|
@ -519,10 +497,10 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
||||||
cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
|
cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
|
||||||
|
|
||||||
pDat_0[0] = accu2;
|
pDat_0[0] = accu2 >> 1;
|
||||||
pDat_0[1] = accu1;
|
pDat_0[1] = accu1 >> 1;
|
||||||
pDat_1[0] = accu4;
|
pDat_1[0] = accu4 >> 1;
|
||||||
pDat_1[1] = -accu3;
|
pDat_1[1] = -(accu3 >> 1);
|
||||||
}
|
}
|
||||||
if (M & 1) {
|
if (M & 1) {
|
||||||
FIXP_DBL accu1, accu2;
|
FIXP_DBL accu1, accu2;
|
||||||
|
@ -532,19 +510,13 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
|
|
||||||
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
|
||||||
|
|
||||||
pDat_0[0] = accu2;
|
pDat_0[0] = accu2 >> 1;
|
||||||
pDat_0[1] = accu1;
|
pDat_0[1] = accu1 >> 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fft(M, pDat, pDat_e);
|
fft(M, pDat, pDat_e);
|
||||||
|
|
||||||
#ifdef FUNCTION_dst_IV_func2
|
|
||||||
if ((M >= 4) && ((M & 3) == 0)) {
|
|
||||||
dst_IV_func2(M >> 2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1],
|
|
||||||
sin_step);
|
|
||||||
} else
|
|
||||||
#endif /* FUNCTION_dst_IV_func2 */
|
|
||||||
{
|
{
|
||||||
FIXP_DBL *RESTRICT pDat_0;
|
FIXP_DBL *RESTRICT pDat_0;
|
||||||
FIXP_DBL *RESTRICT pDat_1;
|
FIXP_DBL *RESTRICT pDat_1;
|
||||||
|
@ -558,20 +530,20 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
accu1 = pDat_1[0];
|
accu1 = pDat_1[0];
|
||||||
accu2 = pDat_1[1];
|
accu2 = pDat_1[1];
|
||||||
|
|
||||||
pDat_1[1] = -(pDat_0[0] >> 1);
|
pDat_1[1] = -pDat_0[0];
|
||||||
pDat_0[0] = (pDat_0[1] >> 1);
|
pDat_0[0] = pDat_0[1];
|
||||||
|
|
||||||
for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
|
for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
|
||||||
FIXP_STP twd = sin_twiddle[idx];
|
FIXP_STP twd = sin_twiddle[idx];
|
||||||
|
|
||||||
cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
|
cplxMult(&accu3, &accu4, accu1, accu2, twd);
|
||||||
pDat_1[0] = -accu3;
|
pDat_1[0] = -accu3;
|
||||||
pDat_0[1] = -accu4;
|
pDat_0[1] = -accu4;
|
||||||
|
|
||||||
pDat_0 += 2;
|
pDat_0 += 2;
|
||||||
pDat_1 -= 2;
|
pDat_1 -= 2;
|
||||||
|
|
||||||
cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
|
cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
|
||||||
|
|
||||||
accu1 = pDat_1[0];
|
accu1 = pDat_1[0];
|
||||||
accu2 = pDat_1[1];
|
accu2 = pDat_1[1];
|
||||||
|
@ -582,8 +554,8 @@ void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
|
||||||
|
|
||||||
if ((M & 1) == 0) {
|
if ((M & 1) == 0) {
|
||||||
/* Last Sin and Cos value pair are the same */
|
/* Last Sin and Cos value pair are the same */
|
||||||
accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
|
accu1 = fMult(accu1, WTC(0x5a82799a));
|
||||||
accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
|
accu2 = fMult(accu2, WTC(0x5a82799a));
|
||||||
|
|
||||||
pDat_0[1] = -accu1 - accu2;
|
pDat_0[1] = -accu1 - accu2;
|
||||||
pDat_1[0] = accu2 - accu1;
|
pDat_1[0] = accu2 - accu1;
|
||||||
|
|
Loading…
Reference in New Issue