mirror of https://gitlab.com/ecodis/exhale.git
2309 lines
106 KiB
C++
2309 lines
106 KiB
C++
/* exhaleEnc.cpp - source file for class providing Extended HE-AAC encoding capability
|
|
* written by C. R. Helmrich, last modified in 2023 - see License.htm for legal notices
|
|
* C API corrected and API compilation extended by J. Regan in 2022, see merge request 8
|
|
*
|
|
* The copyright in this software is being made available under the exhale Copyright License
|
|
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
|
|
* party rights, including patent rights. No such rights are granted under this License.
|
|
*
|
|
* Copyright (c) 2018-2024 Christian R. Helmrich, project ecodis. All rights reserved.
|
|
*/
|
|
|
|
#include "exhaleLibPch.h"
|
|
#include "exhaleEnc.h"
|
|
|
|
// static helper functions
|
|
static double modifiedBesselFunctionOfFirstKind (const double x)
|
|
{
|
|
const double xOver2 = x * 0.5;
|
|
double d = 1.0, sum = 1.0;
|
|
int i = 0;
|
|
|
|
do
|
|
{
|
|
const double x2di = xOver2 / double (++i);
|
|
|
|
d *= (x2di * x2di);
|
|
sum += d;
|
|
}
|
|
while (d > sum * 1.2e-38); // FLT_MIN
|
|
|
|
return sum;
|
|
}
|
|
|
|
static int32_t* initWindowHalfCoeffs (const USAC_WSHP windowShape, const unsigned frameLength)
|
|
{
|
|
int32_t* windowBuf = nullptr;
|
|
unsigned u;
|
|
|
|
if ((windowBuf = (int32_t*) malloc (frameLength * sizeof (int32_t))) == nullptr)
|
|
{
|
|
return nullptr; // allocation error
|
|
}
|
|
|
|
if (windowShape == WINDOW_SINE)
|
|
{
|
|
const double dNorm = 3.141592653589793 / (2.0 * frameLength);
|
|
// MLT sine window half
|
|
for (u = 0; u < frameLength; u++)
|
|
{
|
|
windowBuf[u] = int32_t (sin (dNorm * (u + 0.5)) * WIN_SCALE + 0.5);
|
|
}
|
|
}
|
|
else // if windowShape == WINDOW_KBD
|
|
{
|
|
const double alpha = 3.141592653589793 * (frameLength > 256 ? 4.0 : 6.0);
|
|
const double dBeta = 1.0 / modifiedBesselFunctionOfFirstKind (alpha /*sqrt (1.0)*/);
|
|
const double dNorm = 4.0 / (2.0 * frameLength);
|
|
const double iScal = double (1u << 30);
|
|
const double dScal = 1.0 / iScal;
|
|
double d, sum = 0.0;
|
|
// create Kaiser-Bessel window half
|
|
for (u = 0; u < frameLength; u++)
|
|
{
|
|
const double du1 = dNorm * u - 1.0;
|
|
|
|
d = dBeta * modifiedBesselFunctionOfFirstKind (alpha * sqrt (1.0 - du1 * du1));
|
|
sum += d;
|
|
windowBuf[u] = int32_t (d * iScal + 0.5);
|
|
}
|
|
d = 1.0 / sum; // normalized to sum
|
|
sum = 0.0;
|
|
// KBD window half
|
|
for (u = 0; u < frameLength; u++)
|
|
{
|
|
sum += dScal * windowBuf[u];
|
|
windowBuf[u] = int32_t (sqrt (d * sum /*cumulative sum*/) * WIN_SCALE + 0.5);
|
|
}
|
|
}
|
|
return windowBuf;
|
|
}
|
|
|
|
static uint32_t quantizeSfbWithMinSnr (const unsigned* const coeffMagn, const uint16_t* const sfbOffset, const unsigned b,
|
|
const uint8_t groupLength, uint8_t* const quantMagn, char* const arithTuples, const bool nonZeroSnr = false)
|
|
{
|
|
const uint16_t sfbStart = sfbOffset[b];
|
|
const uint16_t sfbWidth = sfbOffset[b + 1] - sfbStart;
|
|
const unsigned* sfbMagn = &coeffMagn[sfbStart];
|
|
uint32_t maxIndex = 0, maxLevel = sfbMagn[0];
|
|
|
|
for (uint16_t s = sfbWidth - 1; s > 0; s--)
|
|
{
|
|
if (maxLevel < sfbMagn[s]) // find largest-level magn. in SFB
|
|
{
|
|
maxLevel = sfbMagn[s];
|
|
maxIndex = s;
|
|
}
|
|
}
|
|
if (quantMagn != nullptr) // update quantized sample magnitudes
|
|
{
|
|
memset (&quantMagn[sfbStart], 0, sfbWidth * sizeof (uint8_t));
|
|
|
|
if (nonZeroSnr) quantMagn[sfbStart + maxIndex] = 1; // magn. 1
|
|
}
|
|
|
|
if (arithTuples != nullptr) // update entropy coding two-tuples
|
|
{
|
|
const uint16_t swbStart = ((sfbStart - sfbOffset[0]) * oneTwentyEightOver[groupLength]) >> 7;
|
|
|
|
memset (&arithTuples[swbStart >> 1], 1, ((sfbWidth * oneTwentyEightOver[groupLength]) >> 8) * sizeof (char));
|
|
|
|
if (nonZeroSnr && (groupLength == 1)) // max. two-tuple is 1+1
|
|
{
|
|
arithTuples[(swbStart + maxIndex) >> 1] = 2;
|
|
}
|
|
}
|
|
|
|
return maxLevel;
|
|
}
|
|
|
|
// inline helper functions
|
|
static inline void applyStereoPreProcessingCplx (int32_t* mdctSample1, int32_t* mdctSample2,
|
|
int32_t* mdstSample1, int32_t* mdstSample2,
|
|
const int64_t factIn, const int64_t factDe, const int64_t sign)
|
|
{
|
|
const int32_t valI1 = *mdstSample1;
|
|
const int32_t valI2 = *mdstSample2;
|
|
const int32_t valR1 = *mdctSample1;
|
|
const int32_t valR2 = *mdctSample2;
|
|
const int64_t absR1 = abs (valR1);
|
|
const int64_t absR2 = abs (valR2);
|
|
int64_t dmxI1, dmxR1 = valR1 * factDe + sign * valR2 * factIn; // cross
|
|
int64_t dmxI2, dmxR2 = valR2 * factDe + sign * valR1 * factIn; // -talk
|
|
double n, d;
|
|
|
|
if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
|
|
{
|
|
if (absR1 * factDe < absR2 * factIn)
|
|
{
|
|
dmxR1 = valR2 * factIn - sign * valR1 * factDe;
|
|
dmxI1 = valI2 * factIn - sign * valI1 * factDe;
|
|
}
|
|
else
|
|
{
|
|
dmxR1 = valR1 * factDe - sign * valR2 * factIn;
|
|
dmxI1 = valI1 * factDe - sign * valI2 * factIn;
|
|
}
|
|
}
|
|
else dmxI1 = valI1 * factDe + sign * valI2 * factIn;
|
|
|
|
if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
|
|
{
|
|
if (absR1 * factIn < absR2 * factDe)
|
|
{
|
|
dmxR2 = valR2 * factDe - sign * valR1 * factIn;
|
|
dmxI2 = valI2 * factDe - sign * valI1 * factIn;
|
|
}
|
|
else
|
|
{
|
|
dmxR2 = valR1 * factIn - sign * valR2 * factDe;
|
|
dmxI2 = valI1 * factIn - sign * valI2 * factDe;
|
|
}
|
|
}
|
|
else dmxI2 = valI2 * factDe + sign * valI1 * factIn;
|
|
|
|
n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
|
|
d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
|
|
d = sqrt (n / __max (1.0, d));
|
|
*mdctSample1 = int32_t (dmxR1 * d + (dmxR1 < 0 ? -0.5 : 0.5));
|
|
*mdstSample1 = int32_t (dmxI1 * d + (dmxI1 < 0 ? -0.5 : 0.5));
|
|
|
|
n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
|
|
d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
|
|
d = sqrt (n / __max (1.0, d));
|
|
*mdctSample2 = int32_t (dmxR2 * d + (dmxR2 < 0 ? -0.5 : 0.5));
|
|
*mdstSample2 = int32_t (dmxI2 * d + (dmxI2 < 0 ? -0.5 : 0.5));
|
|
}
|
|
|
|
static inline void applyStereoPreProcessingReal (int32_t* mdctSample1, int32_t* mdctSample2,
|
|
int32_t* prevSample1, int32_t* prevSample2,
|
|
const int64_t factIn, const int64_t factDe, const int64_t sign)
|
|
{
|
|
const int64_t valI1 = (*(mdctSample1 + 1) - (int64_t) *prevSample1) >> 1; // estimate, see also
|
|
const int64_t valI2 = (*(mdctSample2 + 1) - (int64_t) *prevSample2) >> 1; // getMeanAbsValues()
|
|
const int32_t valR1 = (*prevSample1 = *mdctSample1);
|
|
const int32_t valR2 = (*prevSample2 = *mdctSample2);
|
|
const int64_t absR1 = abs (valR1);
|
|
const int64_t absR2 = abs (valR2);
|
|
int64_t dmxI1, dmxR1 = valR1 * factDe + sign * valR2 * factIn; // cross
|
|
int64_t dmxI2, dmxR2 = valR2 * factDe + sign * valR1 * factIn; // -talk
|
|
double n, d;
|
|
|
|
if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
|
|
{
|
|
if (absR1 * factDe < absR2 * factIn)
|
|
{
|
|
dmxR1 = valR2 * factIn - sign * valR1 * factDe;
|
|
dmxI1 = valI2 * factIn - sign * valI1 * factDe;
|
|
}
|
|
else
|
|
{
|
|
dmxR1 = valR1 * factDe - sign * valR2 * factIn;
|
|
dmxI1 = valI1 * factDe - sign * valI2 * factIn;
|
|
}
|
|
}
|
|
else dmxI1 = valI1 * factDe + sign * valI2 * factIn;
|
|
|
|
if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
|
|
{
|
|
if (absR1 * factIn < absR2 * factDe)
|
|
{
|
|
dmxR2 = valR2 * factDe - sign * valR1 * factIn;
|
|
dmxI2 = valI2 * factDe - sign * valI1 * factIn;
|
|
}
|
|
else
|
|
{
|
|
dmxR2 = valR1 * factIn - sign * valR2 * factDe;
|
|
dmxI2 = valI1 * factIn - sign * valI2 * factDe;
|
|
}
|
|
}
|
|
else dmxI2 = valI2 * factDe + sign * valI1 * factIn;
|
|
|
|
n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
|
|
d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
|
|
*mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5));
|
|
|
|
n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
|
|
d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
|
|
*mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5));
|
|
}
|
|
|
|
static inline void applyTnsCoeff2ChannelSynch (LinearPredictor& predictor, TnsData& tnsData1, TnsData& tnsData2,
|
|
const uint16_t maxTnsOrder, const unsigned n, bool* const commonFlag)
|
|
{
|
|
int16_t* const parCor1 = tnsData1.coeffParCor[n];
|
|
int16_t* const parCor2 = tnsData2.coeffParCor[n];
|
|
|
|
for (uint16_t s = 0; s < maxTnsOrder; s++) // synchronize coeffs
|
|
{
|
|
parCor1[s] = (parCor1[s] + parCor2[s] + 1) >> 1;
|
|
}
|
|
tnsData1.coeffResLow[n] = false; // optimize synchronized coeffs
|
|
tnsData1.filterOrder[n] = predictor.calcOptTnsCoeffs (parCor1, tnsData1.coeff[n], &tnsData1.coeffResLow[n],
|
|
maxTnsOrder, UCHAR_MAX /*max pred gain*/, 0, LP_DEPTH);
|
|
tnsData1.numFilters[n] = (tnsData1.filterOrder[n] > 0 ? 1 : 0);
|
|
memcpy (&tnsData2, &tnsData1, sizeof (TnsData)); // synchronize
|
|
|
|
if (commonFlag != nullptr) *commonFlag &= (true);
|
|
}
|
|
|
|
static inline void applyTnsCoeffPreProcessing (LinearPredictor& predictor, TnsData& tnsData1, TnsData& tnsData2,
|
|
const uint16_t maxTnsOrder, const unsigned n, bool* const commonFlag, const int16_t fact)
|
|
{
|
|
const int32_t weightI = __min (64, fact); // crosstalk constant
|
|
const int32_t weightD = 128 - weightI; // (1 - crosstalk) * 128
|
|
int16_t* const parCor1 = tnsData1.coeffParCor[n];
|
|
int16_t* const parCor2 = tnsData2.coeffParCor[n];
|
|
|
|
for (uint16_t s = 0; s < maxTnsOrder; s++) // apply crosstalking
|
|
{
|
|
const int16_t coeff1 = parCor1[s];
|
|
|
|
parCor1[s] = int16_t ((coeff1 * weightD + parCor2[s] * weightI + 64) >> 7);
|
|
parCor2[s] = int16_t ((coeff1 * weightI + parCor2[s] * weightD + 64) >> 7);
|
|
}
|
|
tnsData1.coeffResLow[n] = false; // optimize coeffs of channel 1
|
|
tnsData1.filterOrder[n] = predictor.calcOptTnsCoeffs (parCor1, tnsData1.coeff[n], &tnsData1.coeffResLow[n],
|
|
maxTnsOrder, UCHAR_MAX /*max pred gain*/, 0, LP_DEPTH);
|
|
tnsData1.numFilters[n] = (tnsData1.filterOrder[n] > 0 ? 1 : 0);
|
|
|
|
tnsData2.coeffResLow[n] = false; // optimize coeffs of channel 2
|
|
tnsData2.filterOrder[n] = predictor.calcOptTnsCoeffs (parCor2, tnsData2.coeff[n], &tnsData2.coeffResLow[n],
|
|
maxTnsOrder, UCHAR_MAX /*max pred gain*/, 0, LP_DEPTH);
|
|
tnsData2.numFilters[n] = (tnsData2.filterOrder[n] > 0 ? 1 : 0);
|
|
|
|
if (commonFlag != nullptr) *commonFlag &= (tnsData1.coeffResLow[n] == tnsData2.coeffResLow[n] && tnsData1.filterOrder[n] == tnsData2.filterOrder[n]);
|
|
if (commonFlag != nullptr && *commonFlag) *commonFlag &= (memcmp (tnsData1.coeff[n], tnsData2.coeff[n], sizeof (tnsData1.coeff[n])) == 0);
|
|
}
|
|
|
|
static inline uint8_t brModeAndFsToMaxSfbLong (const unsigned bitRateMode, const unsigned samplingRate)
|
|
{
|
|
// max. for fs of 44 kHz: band 47 (19.3 kHz), 48 kHz: 45 (19.5 kHz), 64 kHz: 39 (22.0 kHz)
|
|
return __max (39, (0x20A000 + (samplingRate >> 1)) / samplingRate) - 9 + bitRateMode - (samplingRate < 46009 ? bitRateMode >> 3 : 0);
|
|
}
|
|
|
|
static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const unsigned samplingRate)
|
|
{
|
|
// max. for fs of 44 kHz: band 13 (19.3 kHz), 48 kHz: 13 (21.0 kHz), 64 kHz: 11 (23.0 kHz)
|
|
return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2);
|
|
}
|
|
|
|
#if !EE_MORE_MSE
|
|
static inline void findActualBandwidthShort (uint8_t* const maxSfbShort, const uint16_t* sfbOffsets,
|
|
const int32_t* mdctSignals, const int32_t* mdstSignals, const unsigned nSamplesInShort)
|
|
{
|
|
const uint16_t b = sfbOffsets[1]; // beginning of search region
|
|
uint8_t maxSfb = __max (1, *maxSfbShort);
|
|
uint16_t sfbOffs = sfbOffsets[maxSfb - 1];
|
|
|
|
for (uint16_t e = sfbOffsets[maxSfb] - 1; e >= b; e--) // search
|
|
{
|
|
int32_t maxAbs = abs (mdctSignals[e]);
|
|
|
|
if (mdstSignals != nullptr)
|
|
{
|
|
maxAbs = __max (maxAbs, abs (mdstSignals[e]));
|
|
for (uint16_t w = 7; w > 0; w--)
|
|
{
|
|
maxAbs = __max (maxAbs, abs (mdctSignals[e + w * nSamplesInShort]));
|
|
maxAbs = __max (maxAbs, abs (mdstSignals[e + w * nSamplesInShort]));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint16_t w = 7; w > 0; w--)
|
|
{
|
|
maxAbs = __max (maxAbs, abs (mdctSignals[e + w * nSamplesInShort]));
|
|
}
|
|
}
|
|
if (maxAbs > maxSfb * (SA_EPS >> 1)) break;
|
|
|
|
if (e == sfbOffs) sfbOffs = sfbOffsets[(--maxSfb) - 1];
|
|
}
|
|
|
|
if (*maxSfbShort > maxSfb) *maxSfbShort = maxSfb;
|
|
}
|
|
#endif
|
|
|
|
static inline uint8_t stereoCorrGrouping (const SfbGroupData& grpData, const unsigned nSamplesInFrame, uint8_t* stereoCorrData)
|
|
{
|
|
const uint16_t numWinGroup = grpData.numWindowGroups;
|
|
const uint16_t numBandsWin = nSamplesInFrame >> (SA_BW_SHIFT + 3);
|
|
uint32_t m = 0, w;
|
|
|
|
for (uint16_t gr = 0; gr < numWinGroup; gr++)
|
|
{
|
|
const uint16_t grpLength = grpData.windowGroupLength[gr];
|
|
const uint16_t grpLenFac = oneTwentyEightOver[grpLength]; // for grpStereoCorr/grpLength
|
|
const uint16_t grpLenOff = ((grpLenFac & (grpLenFac - 1)) > 0 ? 0 : 64); // for rounding
|
|
|
|
for (uint16_t b = 0; b < numBandsWin; b++)
|
|
{
|
|
uint32_t grpStereoCorr = 0;
|
|
|
|
for (w = 0; w < grpLength; w++) grpStereoCorr += stereoCorrData[b + w * numBandsWin];
|
|
|
|
if (b == 0) m += grpStereoCorr;
|
|
grpStereoCorr = (grpStereoCorr * grpLenFac + grpLenOff) >> 7;
|
|
|
|
for (w = 0; w < grpLength; w++) stereoCorrData[b + w * numBandsWin] = grpStereoCorr;
|
|
}
|
|
stereoCorrData += grpLength * numBandsWin;
|
|
}
|
|
|
|
return uint8_t ((m + 4) >> 3); // mean low-band correlation value
|
|
}
|
|
|
|
// ISO/IEC 23003-3, Table 75
|
|
static inline unsigned toFrameLength (const USAC_CCFL coreCoderFrameLength)
|
|
{
|
|
return (unsigned) coreCoderFrameLength;
|
|
}
|
|
|
|
// ISO/IEC 23003-3, Table 73
|
|
static const uint8_t numberOfChannels[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 2, 3, 4, 5, 6, 8, 2, 3, 4, 7, 8};
|
|
|
|
static inline unsigned toNumChannels (const USAC_CCI chConfigurationIndex)
|
|
{
|
|
return numberOfChannels[__max (0, (signed char) chConfigurationIndex)];
|
|
}
|
|
|
|
// ISO/IEC 14496-3, Table 4.140
|
|
static const uint16_t sfbOffsetL0[42] = { // 88.2 and 96 kHz
|
|
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 108,
|
|
120, 132, 144, 156, 172, 188, 212, 240, 276, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024
|
|
};
|
|
// ISO/IEC 14496-3, Table 4.141
|
|
static const uint16_t sfbOffsetS0[13] = {
|
|
0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
|
|
};
|
|
|
|
// ISO/IEC 14496-3, Table 4.138
|
|
static const uint16_t sfbOffsetL1[48] = { // 64 kHz
|
|
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 100, 112, 124, 140, 156,
|
|
172, 192, 216, 240, 268, 304, 344, 384, 424, 464, 504, 544, 584, 624, 664, 704, 744, 784, 824, 864, 904, 944, 984, 1024
|
|
};
|
|
// ISO/IEC 14496-3, Table 4.139
|
|
static const uint16_t sfbOffsetS1[13] = {
|
|
0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
|
|
};
|
|
|
|
// ISO/IEC 14496-3, Table 4.131
|
|
static const uint16_t sfbOffsetL2[52] = { // 32, 44.1, and 48 kHz
|
|
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, 56, 64, 72, 80, 88, 96, 108, 120, 132, 144, 160, 176, 196, 216, 240,
|
|
264, 292, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960/*!*/, 992/*!*/, 1024
|
|
};
|
|
// ISO/IEC 14496-3, Table 4.130
|
|
static const uint16_t sfbOffsetS2[15] = {
|
|
0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, 128
|
|
};
|
|
|
|
// ISO/IEC 14496-3, Table 4.136
|
|
static const uint16_t sfbOffsetL3[48] = { // 22.05 and 24 kHz
|
|
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, 136, 148,
|
|
160, 172, 188, 204, 220, 240, 260, 284, 308, 336, 364, 396, 432, 468, 508, 552, 600, 652, 704, 768, 832, 896, 960, 1024
|
|
};
|
|
// ISO/IEC 14496-3, Table 4.137
|
|
static const uint16_t sfbOffsetS3[16] = {
|
|
0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, 92, 108, 128
|
|
};
|
|
|
|
// ISO/IEC 14496-3, Table 4.134
|
|
static const uint16_t sfbOffsetL4[44] = { // 11.025, 12, and 16 kHz
|
|
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 100, 112, 124, 136, 148, 160, 172, 184, 196, 212,
|
|
228, 244, 260, 280, 300, 320, 344, 368, 396, 424, 456, 492, 532, 572, 616, 664, 716, 772, 832, 896, 960, 1024
|
|
};
|
|
// ISO/IEC 14496-3, Table 4.135
|
|
static const uint16_t sfbOffsetS4[16] = {
|
|
0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 60, 72, 88, 108, 128
|
|
};
|
|
|
|
// ISO/IEC 14496-3, Table 4.132
|
|
static const uint16_t sfbOffsetL5[41] = { // 8 kHz
|
|
0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 172, 188, 204, 220, 236, 252, 268,
|
|
288, 308, 328, 348, 372, 396, 420, 448, 476, 508, 544, 580, 620, 664, 712, 764, 820, 880, 944, 1024
|
|
};
|
|
// ISO/IEC 14496-3, Table 4.133
|
|
static const uint16_t sfbOffsetS5[16] = {
|
|
0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 60, 72, 88, 108, 128
|
|
};
|
|
|
|
// long-window SFB offset tables
|
|
static const uint16_t* swbOffsetsL[USAC_NUM_FREQ_TABLES] = {
|
|
sfbOffsetL0, sfbOffsetL1, sfbOffsetL2, sfbOffsetL3, sfbOffsetL4, sfbOffsetL5
|
|
};
|
|
static const uint8_t numSwbOffsetL[USAC_NUM_FREQ_TABLES] = {42, 48, 52, 48, 44, 41};
|
|
|
|
// short-window SFB offset tables
|
|
static const uint16_t* swbOffsetsS[USAC_NUM_FREQ_TABLES] = {
|
|
sfbOffsetS0, sfbOffsetS1, sfbOffsetS2, sfbOffsetS3, sfbOffsetS4, sfbOffsetS5
|
|
};
|
|
static const uint8_t numSwbOffsetS[USAC_NUM_FREQ_TABLES] = {13, 13, 15, 16, 16, 16};
|
|
|
|
// ISO/IEC 23003-3, Table 79
|
|
static const uint8_t freqIdxToSwbTableIdxAAC[USAC_NUM_SAMPLE_RATES + 2] = {
|
|
/*96000*/ 0, 0, 1, 2, 2, 2,/*24000*/ 3, 3, 4, 4, 4, 5, 5, // AAC
|
|
255, 255, 1, 2, 2, 2, 2, 2,/*25600*/ 3, 3, 3, 4, 4, 4, 4 // USAC
|
|
};
|
|
#if !RESTRICT_TO_AAC
|
|
static const uint8_t freqIdxToSwbTableIdx768[USAC_NUM_SAMPLE_RATES + 2] = {
|
|
/*96000*/ 0, 0, 0, 1, 1, 2,/*24000*/ 2, 2, 3, 4, 4, 4, 4, // AAC
|
|
255, 255, 0, 1, 2, 2, 2, 2,/*25600*/ 2, 3, 3, 3, 3, 4, 4 // USAC
|
|
};
|
|
#endif
|
|
|
|
// ISO/IEC 23003-3, Table 131
|
|
static const uint8_t tnsScaleFactorBandLimit[2 /*long/short*/][USAC_NUM_FREQ_TABLES] = { // TNS_MAX_BANDS
|
|
{31, 34, 51 /*to be corrected to 42 (44.1) and 40 (48 kHz)!*/, 47, 43, 40}, {9, 10, 14, 15, 15, 15}
|
|
};
|
|
|
|
static const uint8_t sbrRateOffset[10] = {7, 6, 6, 8, 7, 8, 9, 9, 9, 9}; // used for scaleSBR
|
|
|
|
// scale_factor_grouping map
|
|
// group lengths based on transient location: 1133, 1115, 2114, 3113, 4112, 5111, 3311, 1331
|
|
static const uint8_t scaleFactorGrouping[8] = {0x1B, 0x0F, 0x47, 0x63, 0x71, 0x78, 0x6C, 0x36};
|
|
|
|
static const uint8_t windowGroupingTable[8][NUM_WINDOW_GROUPS] = { // for window_group_length
|
|
{1, 1, 3, 3}, {1, 1, 1, 5}, {2, 1, 1, 4}, {3, 1, 1, 3}, {4, 1, 1, 2}, {5, 1, 1, 1}, {3, 3, 1, 1}, {1, 3, 3, 1}
|
|
};
|
|
|
|
// window_sequence equalizer
|
|
static const USAC_WSEQ windowSequenceSynch[5][5] = { // 1st: chan index 0, 2nd: chan index 1
|
|
{ONLY_LONG, LONG_START, EIGHT_SHORT, LONG_STOP, STOP_START }, // left: ONLY_LONG
|
|
#if RESTRICT_TO_AAC
|
|
{LONG_START, LONG_START, EIGHT_SHORT, EIGHT_SHORT, STOP_START }, // Left: LONG_START
|
|
#else
|
|
{LONG_START, LONG_START, EIGHT_SHORT, STOP_START, STOP_START }, // Left: LONG_START
|
|
#endif
|
|
{EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT}, // Left: EIGHT_SHORT
|
|
#if RESTRICT_TO_AAC
|
|
{LONG_STOP, EIGHT_SHORT, EIGHT_SHORT, LONG_STOP, STOP_START }, // Left: LONG_STOP
|
|
#else
|
|
{LONG_STOP, STOP_START, EIGHT_SHORT, LONG_STOP, STOP_START }, // Left: LONG_STOP
|
|
#endif
|
|
{STOP_START, STOP_START, EIGHT_SHORT, STOP_START, STOP_START } // Left: STOP_START
|
|
};
|
|
|
|
// private helper functions
|
|
unsigned ExhaleEncoder::applyTnsToWinGroup (SfbGroupData& grpData, const uint8_t grpIndex, const uint8_t maxSfb, TnsData& tnsData,
|
|
const unsigned channelIndex, const unsigned n, const bool realOnlyCalc)
|
|
{
|
|
const uint16_t filtOrder = tnsData.filterOrder[n];
|
|
const uint16_t* grpSO = &grpData.sfbOffsets[m_numSwbShort * grpIndex];
|
|
const bool eightShorts = (grpData.numWindowGroups > 1);
|
|
unsigned errorValue = 0; // no error
|
|
|
|
if ((grpIndex >= NUM_WINDOW_GROUPS) || (maxSfb > (eightShorts ? MAX_NUM_SWB_SHORT : MAX_NUM_SWB_LONG)) || (channelIndex >= USAC_MAX_NUM_CHANNELS))
|
|
{
|
|
return 1; // invalid arguments error
|
|
}
|
|
|
|
if (filtOrder > 0) // determine TNS filter length in SFBs and apply TNS analysis filtering
|
|
{
|
|
const int numSwbWin = (eightShorts ? m_numSwbShort : m_numSwbLong);
|
|
uint8_t tnsMaxBands = tnsScaleFactorBandLimit[eightShorts ? 1 : 0][m_swbTableIdx];
|
|
int tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx); // 8-short TNS start
|
|
|
|
if (!eightShorts)
|
|
{
|
|
const unsigned samplingRate = toSamplingRate (m_frequencyIdx); // refine TNS_MAX_BANDS
|
|
const unsigned tnsStartOffs = (m_specAnaCurr[channelIndex] & 31) << SA_BW_SHIFT;
|
|
|
|
if ((samplingRate >= 46009) && (samplingRate < 55426)) tnsMaxBands = 40; // for 48 kHz
|
|
else
|
|
if ((samplingRate >= 37566) && (samplingRate < 46009)) tnsMaxBands = 42; // & 44.1 kHz
|
|
|
|
while (grpSO[tnsStartSfb] < tnsStartOffs) tnsStartSfb++; // start band for TNS filter
|
|
}
|
|
if ((tnsMaxBands = __min (tnsMaxBands, maxSfb)) <= tnsStartSfb) tnsStartSfb = numSwbWin;
|
|
|
|
if ((tnsData.filterLength[n] = __max (0, numSwbWin - tnsStartSfb)) > 0)
|
|
{
|
|
int32_t* const signal = m_mdctSignals[channelIndex];
|
|
const short offs = grpSO[tnsStartSfb];
|
|
uint16_t s = grpSO[tnsMaxBands] - offs;
|
|
short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
|
|
int32_t* predSig = &signal[grpSO[tnsMaxBands]]; // end of signal region to be filtered
|
|
|
|
errorValue |= m_linPredictor.quantTnsToLpCoeffs (tnsData.coeff[n], filtOrder, tnsData.coeffResLow[n], tnsData.coeffParCor[n], filterC);
|
|
|
|
// back up the leading MDCT samples
|
|
memcpy (m_tempIntBuf, &signal[offs - MAX_PREDICTION_ORDER], MAX_PREDICTION_ORDER * sizeof (int32_t));
|
|
// TNS compliance: set them to zero
|
|
memset (&signal[offs - MAX_PREDICTION_ORDER], 0, MAX_PREDICTION_ORDER * sizeof (int32_t));
|
|
|
|
if (filtOrder >= 4) // max. order 4
|
|
{
|
|
for (predSig--; s > 0; s--)
|
|
{
|
|
const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
|
|
*(predSig - 3) * (int64_t) filterC[2] + *(predSig - 4) * (int64_t) filterC[3];
|
|
*(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1));
|
|
}
|
|
}
|
|
else if (filtOrder == 3) // order 3
|
|
{
|
|
for (predSig--; s > 0; s--)
|
|
{
|
|
const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
|
|
*(predSig - 3) * (int64_t) filterC[2];
|
|
*(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1));
|
|
}
|
|
}
|
|
else // save 1-2 MACs, order 2 or 1
|
|
{
|
|
for (predSig--; s > 0; s--)
|
|
{
|
|
const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1];
|
|
|
|
*(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1));
|
|
}
|
|
}
|
|
// restore the leading MDCT samples
|
|
memcpy (&signal[offs - MAX_PREDICTION_ORDER], m_tempIntBuf, MAX_PREDICTION_ORDER * sizeof (int32_t));
|
|
|
|
// compute RMS data after filtering
|
|
errorValue |= m_specAnalyzer.getMeanAbsValues (signal, realOnlyCalc ? nullptr : m_mdstSignals[channelIndex],
|
|
grpSO[grpData.sfbsPerGroup], (eightShorts ? USAC_MAX_NUM_CHANNELS : channelIndex),
|
|
grpSO /*below TNS*/, __min ((int) grpData.sfbsPerGroup, tnsStartSfb),
|
|
&grpData.sfbRmsValues[m_numSwbShort * grpIndex]);
|
|
errorValue |= m_specAnalyzer.getMeanAbsValues (signal, nullptr /*no TNS on MDST*/, grpSO[grpData.sfbsPerGroup], channelIndex,
|
|
&grpSO[tnsStartSfb], __max (0, grpData.sfbsPerGroup - tnsStartSfb),
|
|
&grpData.sfbRmsValues[m_numSwbShort * grpIndex + tnsStartSfb]);
|
|
}
|
|
else tnsData.filterOrder[n] = tnsData.numFilters[n] = 0; // disable length-0 TNS filters
|
|
} // if order > 0
|
|
|
|
return errorValue;
|
|
}
|
|
|
|
unsigned ExhaleEncoder::eightShortGrouping (SfbGroupData& grpData, uint16_t* const grpOffsets,
|
|
int32_t* const mdctSignal, int32_t* const mdstSignal)
|
|
{
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
|
const unsigned nSamplesInShort = nSamplesInFrame >> 3;
|
|
int32_t* const tempIntBuf/*2*/ = m_timeSignals[1]; // NOTE: requires at least stereo input
|
|
unsigned grpStartLine = nSamplesInFrame;
|
|
|
|
if ((grpOffsets == nullptr) || (mdctSignal == nullptr))
|
|
{
|
|
return 1; // invalid arguments error
|
|
}
|
|
|
|
for (short gr = grpData.numWindowGroups - 1; gr >= 0; gr--) // grouping, 14496-3 Fig. 4.24
|
|
{
|
|
const unsigned grpLength = grpData.windowGroupLength[gr];
|
|
uint16_t* const grpOffset = &grpOffsets[m_numSwbShort * gr];
|
|
int32_t* const grpMdctSig = &mdctSignal[grpStartLine -= nSamplesInShort * grpLength];
|
|
int32_t* const grpMdstSig = (mdstSignal != nullptr ? &mdstSignal[grpStartLine] : nullptr);
|
|
|
|
for (uint16_t b = 0; b < m_numSwbShort; b++)
|
|
{
|
|
const unsigned swbOffset = grpOffsets[b];
|
|
const unsigned numCoeffs = __min (grpOffsets[b + 1], nSamplesInShort) - swbOffset;
|
|
|
|
// adjust scale factor band offsets
|
|
grpOffset[b] = uint16_t (grpStartLine + swbOffset * grpLength);
|
|
// interleave spectral coefficients
|
|
for (uint16_t w = 0; w < grpLength; w++)
|
|
{
|
|
memcpy (&m_tempIntBuf[grpOffset[b] + w * numCoeffs], &grpMdctSig[swbOffset + w * nSamplesInShort], numCoeffs * sizeof (int32_t));
|
|
if (grpMdstSig != nullptr)
|
|
{
|
|
memcpy (&tempIntBuf[grpOffset[b] + w * numCoeffs], &grpMdstSig[swbOffset + w * nSamplesInShort], numCoeffs * sizeof (int32_t));
|
|
}
|
|
}
|
|
}
|
|
grpOffset[m_numSwbShort] = uint16_t (grpStartLine + nSamplesInShort * grpLength);
|
|
} // for gr
|
|
|
|
memcpy (mdctSignal, m_tempIntBuf, nSamplesInFrame * sizeof (int32_t));
|
|
if (mdstSignal != nullptr)
|
|
{
|
|
memcpy (mdstSignal, tempIntBuf, nSamplesInFrame * sizeof (int32_t));
|
|
}
|
|
|
|
return 0; // no error
|
|
}
|
|
|
|
unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const uint8_t maxSfb, TnsData& tnsData,
|
|
const unsigned channelIndex, const uint8_t firstGroupIndexToTest /*= 0*/)
|
|
{
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
|
const unsigned tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx); // 8-short start
|
|
uint32_t temp, predGainMax = 0;
|
|
|
|
if ((maxSfb <= tnsStartSfb) || (channelIndex >= USAC_MAX_NUM_CHANNELS))
|
|
{
|
|
return 0; // invalid arguments error
|
|
}
|
|
|
|
if (grpData.numWindowGroups == 1) // LONG window: use ParCor coeffs from spectral analyzer
|
|
{
|
|
tnsData.coeffResLow[0] = false;
|
|
tnsData.filterDownward[0] = false; // enforce direction = 0 for now, detection difficult
|
|
#if EE_MORE_MSE
|
|
tnsData.filterOrder[0] = uint8_t (m_bitRateMode >= EE_MORE_MSE ? 0 : m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor[0], channelIndex));
|
|
#else
|
|
tnsData.filterOrder[0] = (uint8_t) m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor[0], channelIndex);
|
|
#endif
|
|
tnsData.firstTnsWindow = 0;
|
|
|
|
if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases
|
|
{
|
|
const uint16_t filtOrder = tnsData.filterOrder[0];
|
|
uint16_t s = 0,b = __min ((m_specAnaCurr[channelIndex] & 31) + 2, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
|
|
short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
|
|
int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset
|
|
|
|
m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor[0], filtOrder, filterC);
|
|
|
|
for (b--, predSig--; b > 0; b--) // start a bit higher; b is in spectr. analysis units
|
|
{
|
|
uint64_t sumAbsOrg = 0, sumAbsTns = 0;
|
|
|
|
if (filtOrder >= 4) // max. order 4
|
|
{
|
|
for (s = SA_BW; s > 0; s--) // get 4th-order TNS residual
|
|
{
|
|
const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
|
|
*(predSig - 3) * (int64_t) filterC[2] + *(predSig - 4) * (int64_t) filterC[3];
|
|
const int64_t mdctSample = *(predSig--);
|
|
const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9);
|
|
|
|
sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample);
|
|
}
|
|
}
|
|
else if (filtOrder == 3) // order 3
|
|
{
|
|
for (s = SA_BW; s > 0; s--) // get 3rd-order TNS residual
|
|
{
|
|
const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
|
|
*(predSig - 3) * (int64_t) filterC[2];
|
|
const int64_t mdctSample = *(predSig--);
|
|
const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9);
|
|
|
|
sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample);
|
|
}
|
|
}
|
|
else // save 1-2 MACs, order 2 or 1
|
|
{
|
|
for (s = SA_BW; s > 0; s--) // get 2nd-order TNS residual
|
|
{
|
|
const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1];
|
|
const int64_t mdctSample = *(predSig--);
|
|
const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9);
|
|
|
|
sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample);
|
|
}
|
|
}
|
|
if (sumAbsOrg * 17 <= sumAbsTns * 16) break; // band SNR reduced by more than 0.5 dB
|
|
}
|
|
m_specAnaCurr[channelIndex] = (m_specAnaCurr[channelIndex] & (UINT_MAX - 31)) | (b + 1);
|
|
} // if order > 0
|
|
|
|
return (m_specAnaCurr[channelIndex] >> 24) & UCHAR_MAX; // spectral analyzer's pred gain
|
|
}
|
|
|
|
// SHORT window: for each length-1 group, get TNS filter, then determine best filter order
|
|
tnsData.firstTnsWindow = UCHAR_MAX;
|
|
for (uint8_t n = 0, gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
if (grpData.windowGroupLength[gr] == 1)
|
|
{
|
|
tnsData.coeffResLow[n] = false;
|
|
tnsData.filterDownward[n] = false; // force direction = 0 for now, detection difficult
|
|
tnsData.filterOrder[n] = 0;
|
|
if (tnsData.firstTnsWindow == UCHAR_MAX) tnsData.firstTnsWindow = gr;
|
|
|
|
if (gr < firstGroupIndexToTest)
|
|
{
|
|
memset (tnsData.coeffParCor[n], 0, MAX_PREDICTION_ORDER * sizeof (int16_t));
|
|
}
|
|
else // first length-one group tested
|
|
{
|
|
const int32_t* signal = m_mdctSignals[channelIndex];
|
|
const uint16_t* grpSO = &grpData.sfbOffsets[m_numSwbShort * gr];
|
|
uint32_t predGainCurr, predGainPrev, bestOrder = MAX_PREDICTION_ORDER;
|
|
|
|
temp = m_linPredictor.calcParCorCoeffs (&signal[grpSO[tnsStartSfb]], grpSO[maxSfb] - grpSO[tnsStartSfb], bestOrder, tnsData.coeffParCor[n]);
|
|
if (predGainMax < temp) predGainMax = temp; // maximum pred gain of filtered groups
|
|
|
|
predGainCurr = (temp >> 24) & UCHAR_MAX;
|
|
predGainPrev = (temp >> 16) & UCHAR_MAX;
|
|
while ((predGainPrev >= predGainCurr) && --bestOrder > 1) // lowest-order gain max.
|
|
{
|
|
predGainCurr = predGainPrev;
|
|
predGainPrev = (temp >> (8 * bestOrder - 16)) & UCHAR_MAX;
|
|
}
|
|
#if EE_MORE_MSE
|
|
tnsData.filterOrder[n] = uint8_t (m_bitRateMode >= EE_MORE_MSE ? 0 : ((bestOrder == 1) && (tnsData.coeffParCor[n][0] == 0) ? 0 : bestOrder));
|
|
#else
|
|
tnsData.filterOrder[n] = uint8_t ((bestOrder == 1) && (tnsData.coeffParCor[n][0] == 0) ? 0 : bestOrder);
|
|
#endif
|
|
}
|
|
n++;
|
|
}
|
|
} // for gr
|
|
|
|
return (predGainMax >> 24) & UCHAR_MAX; // max pred gain of all orders and length-1 groups
|
|
}
|
|
|
|
uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbIndex)
|
|
{
|
|
const uint16_t* const sfbLoudMem = m_sfbLoudMem[channelIndex][sfbIndex];
|
|
uint32_t sumSfbLoud = 0;
|
|
|
|
for (int16_t s = 31; s >= 0; s--) sumSfbLoud += sfbLoudMem[s];
|
|
sumSfbLoud = (sumSfbLoud + 32) >> 6; // -6 dB
|
|
|
|
return sumSfbLoud * (sumSfbLoud >> (toSamplingRate (m_frequencyIdx) >> 13)); // scaled SMR
|
|
}
|
|
|
|
unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via scale factors
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
|
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
|
|
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
|
|
const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
|
|
const uint8_t maxSfbLong = (useMaxBandwidth ? m_numSwbLong : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
|
const uint16_t scaleSBR = (m_shiftValSBR > 0 || m_nonMpegExt ? sbrRateOffset[m_bitRateMode] : 0); // -25% rate
|
|
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
|
|
: (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
|
|
const uint64_t scaleBr = (m_bitRateMode == 0 || m_frameCount <= 1 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1))
|
|
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR;
|
|
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
|
|
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
|
|
unsigned ci = 0, s; // running index
|
|
unsigned errorValue = 0; // no error
|
|
|
|
// psychoacoustic processing of SFB RMS values yielding masking thresholds in m_tempIntBuf
|
|
errorValue |= m_bitAllocator.initSfbStepSizes (m_scaleFacData, m_numSwbShort, m_specAnaCurr, m_tempAnaCurr,
|
|
nChannels, samplingRate, sfbStepSizes, lfeChannelIndex);
|
|
|
|
// get means of spectral and temporal flatness for every channel
|
|
m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);
|
|
|
|
for (unsigned el = 0; el < m_numElements; el++) // element loop
|
|
{
|
|
CoreCoderData& coreConfig = *m_elementData[el];
|
|
const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()
|
|
|
|
if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements
|
|
{
|
|
SfbGroupData& grpData = coreConfig.groupingData[0];
|
|
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
|
|
const uint16_t* off = grpData.sfbOffsets;
|
|
const uint32_t* rms = grpData.sfbRmsValues;
|
|
uint8_t* scaleFactors = grpData.scaleFactors;
|
|
|
|
for (uint16_t b = 0; b < grpData.sfbsPerGroup; b++)
|
|
{
|
|
const unsigned lfConst = (samplingRate < 27713 ? 1 : 2);
|
|
const unsigned lfAtten = 4 + b * lfConst; // LF SNR boost, cf my M.Sc. thesis, p. 54
|
|
const uint8_t sfbWidth = off[b + 1] - off[b];
|
|
const uint64_t scale = scaleBr * __min (32, lfAtten); // rate control part 1 (SFB)
|
|
|
|
// scale step-sizes according to VBR mode, then derive scale factors from step-sizes
|
|
stepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 9) + stepSizes[b] * scale) >> 10));
|
|
|
|
scaleFactors[b] = m_bitAllocator.getScaleFac (stepSizes[b], &m_mdctSignals[ci][off[b]], sfbWidth, rms[b]);
|
|
}
|
|
ci++;
|
|
}
|
|
else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
|
|
{
|
|
const bool eightShorts0 = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
|
|
const TnsData& tnsData0 = coreConfig.tnsData[0];
|
|
const TnsData& tnsData1 = coreConfig.tnsData[1];
|
|
uint8_t realOnlyStartSfb = (eightShorts0 ? m_numSwbShort : m_numSwbLong) - __max (tnsData0.filterLength[0], tnsData1.filterLength[0]);
|
|
|
|
if (coreConfig.commonWindow && (coreConfig.stereoMode == 0) && (m_perCorrHCurr[el] > SCHAR_MAX || m_perCorrLCurr[el] > (UCHAR_MAX * 5) / 8))
|
|
{
|
|
coreConfig.stereoMode = 1;
|
|
}
|
|
if (m_perCorrHCurr[el] > 128) // execute stereo pre-processing to increase correlation
|
|
{
|
|
const int16_t chanCorrSign = (coreConfig.stereoConfig & 2 ? -1 : 1);
|
|
const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]);
|
|
const bool reducedStrength = (coreConfig.tnsActive && (m_bitRateMode > 0)) || (m_bitRateMode >= 5);
|
|
const uint8_t steppFadeLen = (eightShorts0 ? 4 : (reducedStrength ? 32 : 64));
|
|
const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts0 ? 2 : 5);
|
|
const int64_t steppWeightI = __min (64, m_perCorrHCurr[el] - 128) >> (eightShorts0 || reducedStrength ? 1 : 0); // crosstalk * 128
|
|
const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
|
|
|
|
for (uint16_t n = 0, gr = 0; gr < coreConfig.groupingData[0].numWindowGroups; gr++)
|
|
{
|
|
const uint8_t grpLength = coreConfig.groupingData[0].windowGroupLength[gr];
|
|
const uint16_t* grpOff = &coreConfig.groupingData[0].sfbOffsets[m_numSwbShort * gr];
|
|
const uint16_t grpStart = grpOff[0] + steppFadeOff * grpLength;
|
|
int32_t* sigR0 = &m_mdctSignals[ci][grpStart];
|
|
int32_t* sigR1 = &m_mdctSignals[ci + 1][grpStart];
|
|
int64_t xTalkI = 0, xTalkD = 0; // weights for crosstalk
|
|
|
|
if ((grpLength == 1) && (tnsData0.numFilters[n] > 0 || tnsData1.numFilters[n] > 0))
|
|
{
|
|
const uint16_t maxLen = (eightShorts0 ? grpOff[m_numSwbShort] - 1 : __min (nSamplesInFrame - 1u, nSamplesMax)) - grpStart;
|
|
int32_t prevR0 = 0; // NOTE: functions also on grouped
|
|
int32_t prevR1 = 0; // MDCT spectra, but not properly!
|
|
|
|
for (uint16_t w = 0; w < grpLength; w++) // sub-window
|
|
{
|
|
prevR0 = *(sigR0++); prevR1 = *(sigR1++); // processing starts at offset of 1!
|
|
xTalkI = steppWeightI;
|
|
xTalkD = steppWeightD * (2 * steppFadeLen - 1);
|
|
|
|
for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++) // start with fade-in
|
|
{
|
|
applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD, chanCorrSign);
|
|
xTalkI += steppWeightI;
|
|
xTalkD -= steppWeightD;
|
|
}
|
|
}
|
|
for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++) // end
|
|
{
|
|
applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD, chanCorrSign);
|
|
}
|
|
if (eightShorts0 || (nSamplesMax >= nSamplesInFrame)) *sigR0 = *sigR1 = 0;
|
|
|
|
realOnlyStartSfb = __min (realOnlyStartSfb, __min ((eightShorts0 ? 5 : 24), steppFadeOff / (eightShorts0 ? 4 : 7)));
|
|
}
|
|
else // TNS inactive, both MDCTs and MDSTs are available
|
|
{
|
|
const uint16_t maxLen = (eightShorts0 ? grpOff[m_numSwbShort] : nSamplesMax) - grpStart;
|
|
int32_t* sigI0 = &m_mdstSignals[ci][grpStart]; // imag
|
|
int32_t* sigI1 = &m_mdstSignals[ci + 1][grpStart];
|
|
|
|
for (uint16_t w = 0; w < grpLength; w++) // sub-window
|
|
{
|
|
sigR0++; sigR1++; sigI0++; sigI1++; // processing starts at an offset of 1!
|
|
xTalkI = steppWeightI;
|
|
xTalkD = steppWeightD * (2 * steppFadeLen - 1);
|
|
|
|
for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
|
|
{
|
|
applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD, chanCorrSign);
|
|
xTalkI += steppWeightI;
|
|
xTalkD -= steppWeightD;
|
|
}
|
|
}
|
|
for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
|
|
{
|
|
applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD, chanCorrSign);
|
|
}
|
|
}
|
|
if (grpLength == 1) n++;
|
|
}
|
|
} // if m_perCorrHCurr[el] > 128
|
|
|
|
if ((errorValue == 0) && (coreConfig.stereoMode > 0)) // perform M/S, synch statistics
|
|
{
|
|
const uint8_t numSwbFrame = (eightShorts0 ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong + 1));
|
|
const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;
|
|
|
|
errorValue = m_stereoCoder.applyPredJointStereo (m_mdctSignals[ci], m_mdctSignals[ci + 1],
|
|
m_mdstSignals[ci], m_mdstSignals[ci + 1],
|
|
coreConfig.groupingData[0], coreConfig.groupingData[1],
|
|
coreConfig.tnsData[0], coreConfig.tnsData[1],
|
|
numSwbFrame, coreConfig.stereoDataCurr,
|
|
m_bitRateMode, coreConfig.stereoMode > 1,
|
|
(coreConfig.stereoConfig & 2) > 0, realOnlyStartSfb,
|
|
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * ci],
|
|
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);
|
|
if (errorValue >= 2) // signal M/S with complex prediction
|
|
{
|
|
coreConfig.stereoConfig |= (errorValue & 7) - 2; // dir.
|
|
coreConfig.stereoMode += 2; errorValue = 0;
|
|
}
|
|
m_specAnaCurr[ci ] = (m_specAnaCurr[ci ] & (UINT_MAX - 65504)) | peakIndexSte;
|
|
m_specAnaCurr[ci + 1] = (m_specAnaCurr[ci + 1] & (UINT_MAX - 65504)) | peakIndexSte;
|
|
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
|
|
}
|
|
else memset (coreConfig.stereoDataCurr, 0, (eightShorts0 || !coreConfig.commonWindow
|
|
? MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS : MAX_NUM_SWB_LONG) * sizeof (uint8_t));
|
|
errorValue |= m_bitAllocator.imprSfbStepSizes (m_scaleFacData, m_numSwbShort, m_mdctSignals, nSamplesInFrame, nrChannels,
|
|
((32 + 5 * m_shiftValSBR) * samplingRate) >> 5, sfbStepSizes, ci, meanSpecFlat,
|
|
coreConfig.commonWindow, coreConfig.stereoDataCurr, coreConfig.stereoConfig);
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
|
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
|
|
const uint8_t maxSfbCh = grpData.sfbsPerGroup;
|
|
#if !RESTRICT_TO_AAC
|
|
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
|
|
#endif
|
|
#if !EE_MORE_MSE
|
|
const uint16_t rateFac = m_bitAllocator.getRateCtrlFac (m_rateFactor, samplingRate, meanSpecFlat[ci], coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
|
|
#endif
|
|
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
|
|
|
|
memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
|
|
|
|
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
const uint16_t* grpOff = &grpData.sfbOffsets[m_numSwbShort * gr];
|
|
const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];
|
|
uint8_t* grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
|
|
uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr];
|
|
|
|
#if EE_MORE_MSE
|
|
s = 0;
|
|
for (unsigned b = grpOff[0]; b < grpOff[maxSfbCh]; b++)
|
|
{
|
|
s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[ci][b])));
|
|
}
|
|
if (el == 0 && nrChannels == 2)
|
|
{
|
|
for (unsigned b = grpOff[0]; b < grpOff[maxSfbCh]; b++)
|
|
{
|
|
s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[1 - ci][b])));
|
|
}
|
|
s = (s + 1u) >> 1;
|
|
}
|
|
if (grpOff[maxSfbCh] > grpOff[0])
|
|
{
|
|
s = unsigned ((s * (eightShorts ? (24u + (grpData.windowGroupLength[gr] >> 2)) / grpData.windowGroupLength[gr] : 3u) + 4096u) >> 13);
|
|
# ifndef NO_PREROLL_DATA
|
|
if (((m_frameCount - 1u) % (m_indepPeriod << 1)) == 1 && m_numElements == 1 && !eightShorts) s = (4u + 9u * s) >> 3;
|
|
# endif
|
|
}
|
|
s = __max (1u + ((UINT32_MAX / (eightShorts ? 3u : 8u)) >> ((2 + m_bitRateMode / 9) * m_bitRateMode)), s * s);
|
|
#endif
|
|
for (unsigned b = 0; b < maxSfbCh; b++)
|
|
{
|
|
#if EE_MORE_MSE
|
|
const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
|
|
const bool stereoCoded = (nrChannels == 2 && coreConfig.stereoMode > 0 && (coreConfig.stereoDataCurr[b] > 0 || !(coreConfig.stereoMode & 1)));
|
|
const uint32_t rmsbMax = (stereoCoded ? __max (grpRms[b], coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr + b]) : grpRms[b]);
|
|
const uint64_t sThresh = __max (1u + (UINT32_MAX >> 30), (rmsbMax * uint64_t (__max (16, b * b * grpData.numWindowGroups)) + 32u) >> 6);
|
|
const uint64_t predFac = (eightShorts || coreConfig.stereoMode < 3 || coreConfig.stereoDataCurr[b & 62] == 0 ? (eightShorts && !b ? 48u : 64u) :
|
|
uint64_t (0.5 + 64 - pow (__min (1.0, fabs (coreConfig.stereoDataCurr[b & 62] * 0.1 - 1.6)), 1.5) * 19.0)); // MS
|
|
grpStepSizes[b] = uint32_t (__min (sThresh, (s * predFac + 32u) >> 6));
|
|
if (stereoCoded && rmsbMax)
|
|
{
|
|
const uint32_t rmsCh = coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr + b];
|
|
|
|
grpStepSizes[b] = uint32_t (0.5 + grpStepSizes[b] * (1.0 - sqrt ((double) __min (grpRms[b], rmsCh) / rmsbMax) * 0.29289322));
|
|
}
|
|
#else
|
|
const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis
|
|
const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
|
|
const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
|
|
const uint64_t scale = scaleBr * rateFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB)
|
|
|
|
// scale step-sizes according to VBR mode & derive scale factors from step-sizes
|
|
grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 24) + grpStepSizes[b] * scale) >> 25));
|
|
#endif
|
|
#if !RESTRICT_TO_AAC
|
|
if (!m_noiseFilling[el] || (m_bitRateMode > 0) || (m_shiftValSBR == 0) || (samplingRate < 23004) ||
|
|
(b + 3 - (meanSpecFlat[ci] >> 6) < m_numSwbLong)) // HF
|
|
#endif
|
|
grpScaleFacs[b] = m_bitAllocator.getScaleFac (grpStepSizes[b], &m_mdctSignals[ci][grpOff[b]], sfbWidth, grpRms[b]);
|
|
}
|
|
} // for gr
|
|
|
|
#if !RESTRICT_TO_AAC
|
|
if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_shiftValSBR > 0 || m_bitRateMode <= 3 || !eightShorts))
|
|
{
|
|
const uint32_t maxSfbCurr = (eightShorts ? (useMaxBandwidth ? __min (15, 17 - (samplingRate >> 13) + (samplingRate >> 15))
|
|
: brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong);
|
|
const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts));
|
|
const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) || (m_shiftValSBR > 0) ? 4u : 3u)) >> 2,
|
|
maxSfbCurr + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0u : 1u));
|
|
|
|
if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
|
|
{
|
|
for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13))));
|
|
}
|
|
if ((maxSfbCh < numSwbFrame) || (m_bitRateMode <= 2)) // increase coding bandwidth
|
|
{
|
|
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];
|
|
|
|
if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713))
|
|
{
|
|
const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
|
|
uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
|
|
const unsigned sfbStart = __max (samplingRate < 18783 ? 17 : 24, m_specGapFiller.getFirstGapFillSfb ());
|
|
|
|
for (s = sfbStart; s < maxSfbCh; s++)
|
|
{
|
|
const double rmsValue = double (grpStereoData[s] > 0 ? (grpRms[s] + (uint64_t) refRms[s] + 1) >> 1 : grpRms[s]);
|
|
const unsigned sfbIdx = s - sfbStart;
|
|
|
|
m_sfbLoudMem[ch][sfbIdx][m_frameCount & 31] = __max (BA_EPS, uint16_t (sqrt (rmsValue)));
|
|
if (grpRms[s] < getThr (ch, sfbIdx)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
|
|
}
|
|
}
|
|
else if ((m_bitRateMode <= 4) && (meanSpecFlat[ci] <= (SCHAR_MAX >> 1))) // lo
|
|
{
|
|
for (s = __max (samplingRate < 27713 ? (samplingRate < 18783 ? 17 : 24) : 22, m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++)
|
|
{
|
|
if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
|
|
}
|
|
}
|
|
|
|
memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
|
|
}
|
|
grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = __max (maxSfbCh, numSwbFrame);
|
|
}
|
|
if (ch > 0 && coreConfig.commonWindow) // resynchronize the two max_sfb for stereo
|
|
{
|
|
uint8_t& maxSfb0 = coreConfig.icsInfoCurr[0].maxSfb;
|
|
uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb;
|
|
|
|
if (coreConfig.stereoMode > 0)
|
|
{
|
|
maxSfb0 = maxSfb1 = coreConfig.groupingData[0].sfbsPerGroup = grpData.sfbsPerGroup = __max (maxSfb0, maxSfb1);
|
|
}
|
|
coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1);
|
|
}
|
|
}
|
|
else if (m_noiseFilling[el] && (m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713))
|
|
{
|
|
for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS;
|
|
}
|
|
#endif
|
|
ci++;
|
|
} // for ch
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
|
TnsData& tnsData = coreConfig.tnsData[ch];
|
|
|
|
if (tnsData.numFilters[0] + tnsData.numFilters[1] + tnsData.numFilters[2] > 0)
|
|
{
|
|
s = tnsData.firstTnsWindow = 0; // store length-1 group map for bit-stream writing
|
|
for (uint16_t gr = 0; gr < grpData.numWindowGroups; s += grpData.windowGroupLength[gr++])
|
|
{
|
|
if (grpData.windowGroupLength[gr] == 1) tnsData.firstTnsWindow |= (1u << s);
|
|
}
|
|
}
|
|
} // for ch
|
|
}
|
|
} // for el
|
|
|
|
return errorValue;
|
|
}
|
|
|
|
unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and entropy coding
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
|
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
|
|
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
|
#if !EE_MORE_MSE
|
|
const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
|
|
#endif
|
|
const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
|
|
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
|
|
uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208};
|
|
unsigned ci = 0, s; // running index
|
|
unsigned errorValue = (coeffMagn == nullptr ? 1 : 0);
|
|
|
|
// get means of spectral and temporal flatness for every channel
|
|
m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);
|
|
if ((m_bitRateMode < (2u >> m_shiftValSBR)) && (samplingRate >= 23004) && (samplingRate < 37566))
|
|
{
|
|
m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
|
|
}
|
|
|
|
for (unsigned el = 0; el < m_numElements; el++) // element loop
|
|
{
|
|
CoreCoderData& coreConfig = *m_elementData[el];
|
|
const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()
|
|
|
|
if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs
|
|
{
|
|
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
|
|
meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
|
|
}
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
EntropyCoder& entrCoder = m_entropyCoder[ci];
|
|
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
|
const bool shortWinCurr = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
|
|
const bool shortWinPrev = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
|
|
char* const arithTuples = entrCoder.arithGetTuplePtr ();
|
|
uint8_t sfIdxPred = UCHAR_MAX;
|
|
|
|
if ((errorValue > 0) || (arithTuples == nullptr))
|
|
{
|
|
return 0; // an internal error
|
|
}
|
|
|
|
// back up entropy coder memory for use by bit-stream writer
|
|
memcpy (m_tempIntBuf, arithTuples, (nSamplesInFrame >> 1) * sizeof (char));
|
|
errorValue |= (entrCoder.getIsShortWindow () != shortWinPrev ? 1 : 0); // sanity check
|
|
|
|
memset (m_mdctQuantMag[ci], 0, nSamplesInFrame * sizeof (uint8_t)); // initialization
|
|
|
|
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
const uint8_t grpLength = grpData.windowGroupLength[gr];
|
|
const uint16_t* grpOff = &grpData.sfbOffsets[m_numSwbShort * gr];
|
|
uint32_t* const grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr]; // coding stats
|
|
uint8_t* grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
|
|
uint32_t estimBitCount = 0;
|
|
unsigned lastSfb = 0, lastSOff = 0;
|
|
|
|
errorValue |= entrCoder.initWindowCoding (m_indepFlag && (gr == 0), shortWinCurr);
|
|
s = 0;
|
|
|
|
for (uint16_t b = 0; b < grpData.sfbsPerGroup; b++)
|
|
{
|
|
// partial SFB ungrouping for entropy coding setup below
|
|
const uint16_t swbSize = ((grpOff[b + 1] - grpOff[b]) * oneTwentyEightOver[grpLength]) >> 7; // sfbWidth / grpLength
|
|
uint8_t* const swbMagn = &m_mdctQuantMag[ci][grpOff[b + 1] - swbSize];
|
|
|
|
grpScaleFacs[b] = m_sfbQuantizer.quantizeSpecSfb (entrCoder, m_mdctSignals[ci], grpLength, grpOff, grpRms,
|
|
b, grpScaleFacs[b], sfIdxPred, m_mdctQuantMag[ci]);
|
|
if ((b > 0) && (grpScaleFacs[b] < UCHAR_MAX) && (sfIdxPred == UCHAR_MAX))
|
|
{
|
|
// back-propagate first nonzero-SFB scale factor index
|
|
memset (grpScaleFacs, grpScaleFacs[b], b * sizeof (uint8_t));
|
|
}
|
|
sfIdxPred = grpScaleFacs[b];
|
|
|
|
// correct previous scale factor if the delta exceeds 60
|
|
if ((b > 0) && (grpScaleFacs[b] > grpScaleFacs[b - 1] + INDEX_OFFSET))
|
|
{
|
|
const uint16_t sfbM1Start = grpOff[b - 1];
|
|
const uint16_t sfbM1Width = grpOff[b] - sfbM1Start;
|
|
const uint16_t swbM1Size = (sfbM1Width * oneTwentyEightOver[grpLength]) >> 7; // sfbM1Width / grpLength
|
|
|
|
grpScaleFacs[b - 1] = grpScaleFacs[b] - (b > 1 ? INDEX_OFFSET : 0); // zero-out
|
|
memset (&m_mdctQuantMag[ci][sfbM1Start], 0, sfbM1Width * sizeof (uint8_t));
|
|
|
|
// correct SFB statistics with some bit count estimate
|
|
grpRms[b - 1] = 1 + (sfbM1Width >> 3) + entrCoder.indexGetBitCount (b > 1 ? (int) grpScaleFacs[b - 1] - grpScaleFacs[b - 2] : 0);
|
|
// correct entropy coding 2-tuples for the next window
|
|
memset (&arithTuples[lastSOff], 1, (swbM1Size >> 1) * sizeof (char));
|
|
}
|
|
// correct next scale factor if the reduction exceeds 60
|
|
if ((b + 1u < grpData.sfbsPerGroup) && (sfIdxPred < UCHAR_MAX) && (grpLength == 1) &&
|
|
(grpScaleFacs[b] > grpScaleFacs[b + 1] + INDEX_OFFSET))
|
|
{
|
|
grpScaleFacs[b + 1] = grpScaleFacs[b] - INDEX_OFFSET; // avoid preset-9 zero-out
|
|
}
|
|
|
|
if (b > 0)
|
|
{
|
|
if ((grpRms[b - 1] >> 16) > 0) lastSfb = b - 1;
|
|
estimBitCount += grpRms[b - 1] & USHRT_MAX;
|
|
}
|
|
// set up entropy coding 2-tuples for next SFB or window
|
|
lastSOff = s;
|
|
for (uint16_t c = 0; c < swbSize; c += 2)
|
|
{
|
|
arithTuples[s++] = __min (0xF, swbMagn[c] + swbMagn[c + 1] + 1); // 23003-3, 7.4
|
|
}
|
|
} // for b
|
|
|
|
if (grpData.sfbsPerGroup > 0) // rate control part 2 to reach constrained VBR (CVBR)
|
|
{
|
|
#if EE_MORE_MSE
|
|
const unsigned targetBitCount25 = INT32_MAX;
|
|
#else
|
|
const uint8_t maxSfbLong = (useMaxBandwidth ? 54 - (samplingRate >> 13) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
|
const uint8_t maxSfbShort = (useMaxBandwidth ? 19 - (samplingRate >> 13) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
|
|
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
|
|
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) +
|
|
(shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
|
|
const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) /
|
|
(samplingRate * ((grpData.numWindowGroups + 1) >> 1));
|
|
#endif
|
|
unsigned b = grpData.sfbsPerGroup - 1;
|
|
|
|
if ((grpRms[b] >> 16) > 0) lastSfb = b;
|
|
estimBitCount += grpRms[b] & USHRT_MAX;
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
if (grpLength == 1) // finalize bit count estimate, RDOC
|
|
{
|
|
estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, estimBitCount + 2u,
|
|
grpOff, grpRms, grpData.sfbsPerGroup, m_mdctQuantMag[ci]);
|
|
for (b = 1; b < grpData.sfbsPerGroup; b++)
|
|
{
|
|
// correct previous scale factor if delta exceeds 60
|
|
if (grpScaleFacs[b] > grpScaleFacs[b - 1] + INDEX_OFFSET)
|
|
{
|
|
const uint16_t sfbM1Start = grpOff[b - 1];
|
|
const uint16_t sfbM1Width = grpOff[b] - sfbM1Start;
|
|
|
|
grpScaleFacs[b - 1] = grpScaleFacs[b] - (b > 1 ? INDEX_OFFSET : 0); // 0-out
|
|
memset (&m_mdctQuantMag[ci][sfbM1Start], 0, sfbM1Width * sizeof (uint8_t));
|
|
|
|
// correct statistics with some bit count estimate
|
|
grpRms[b - 1] = 1 + (sfbM1Width >> 3) + entrCoder.indexGetBitCount (b > 1 ? (int) grpScaleFacs[b - 1] - grpScaleFacs[b - 2] : 0);
|
|
// correct entropy coding 2-tuples for next window
|
|
memset (&arithTuples[(sfbM1Start - grpOff[0]) >> 1], 1, (sfbM1Width >> 1) * sizeof (char));
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
#if EE_MORE_MSE
|
|
b = lastSfb;
|
|
#else
|
|
// coarse-quantize near-Nyquist SFB with SBR @ 48-64 kHz
|
|
b = 40 + (samplingRate >> 12);
|
|
if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb;
|
|
|
|
while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1) + (m_bitRateMode / 5)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) &&
|
|
((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count estim. available for grouped spectrum*/)))
|
|
{
|
|
b--; // search first coarsely quantized high-freq. SFB
|
|
}
|
|
#endif
|
|
lastSOff = b;
|
|
|
|
for (b++; b <= lastSfb; b++)
|
|
{
|
|
if ((grpRms[b] >> 16) > 0) // re-quantize nonzero band
|
|
{
|
|
#if RESTRICT_TO_AAC
|
|
uint32_t maxVal = 1;
|
|
#else
|
|
uint32_t maxVal = (shortWinCurr || !m_noiseFilling[el] ? 1 : (m_specAnaCurr[ci] >> 23) & 1); // 1 or 0
|
|
#endif
|
|
estimBitCount -= grpRms[b] & USHRT_MAX;
|
|
grpRms[b] = (maxVal << 16) + maxVal; // bit estimate
|
|
maxVal = quantizeSfbWithMinSnr (coeffMagn, grpOff, b, grpLength, m_mdctQuantMag[ci], arithTuples, maxVal > 0);
|
|
|
|
grpScaleFacs[b] = __min (SCHAR_MAX, m_sfbQuantizer.getScaleFacOffset ((double) maxVal));
|
|
|
|
// correct SFB statistics with estimate of bit count
|
|
grpRms[b] += 3 + entrCoder.indexGetBitCount ((int) grpScaleFacs[b] - grpScaleFacs[b - 1]);
|
|
estimBitCount += grpRms[b] & USHRT_MAX;
|
|
}
|
|
else // re-repeat scale factor for zero quantized band
|
|
{
|
|
grpScaleFacs[b] = grpScaleFacs[b - 1];
|
|
}
|
|
}
|
|
|
|
if (estimBitCount > targetBitCount25) // too many bits!!
|
|
{
|
|
for (b = lastSOff; b > 0; b--)
|
|
{
|
|
if ((grpRms[b] >> 16) > 0) // emergency re-quantizer
|
|
{
|
|
#if RESTRICT_TO_AAC
|
|
uint32_t maxVal = 1;
|
|
#else
|
|
uint32_t maxVal = (shortWinCurr || !m_noiseFilling[el] ? 1 : (m_specAnaCurr[ci] >> 23) & 1); // 1 or 0
|
|
#endif
|
|
estimBitCount -= grpRms[b] & USHRT_MAX;
|
|
grpRms[b] = (maxVal << 16) + maxVal; // bit estim.
|
|
maxVal = quantizeSfbWithMinSnr (coeffMagn, grpOff, b, grpLength, m_mdctQuantMag[ci], arithTuples, maxVal > 0);
|
|
|
|
grpScaleFacs[b] = __min (SCHAR_MAX, m_sfbQuantizer.getScaleFacOffset ((double) maxVal));
|
|
|
|
// correct SFB statistics with estimated bit count
|
|
grpRms[b] += 3 + entrCoder.indexGetBitCount ((int) grpScaleFacs[b] - grpScaleFacs[b - 1]);
|
|
estimBitCount += grpRms[b] & USHRT_MAX;
|
|
}
|
|
if (estimBitCount <= targetBitCount25) break;
|
|
}
|
|
|
|
for (b++; b <= lastSfb; b++) // re-repeat scale factor
|
|
{
|
|
if ((grpRms[b] >> 16) == 0) // a zero quantized band
|
|
{
|
|
grpScaleFacs[b] = grpScaleFacs[b - 1];
|
|
}
|
|
}
|
|
} // if estimBitCount > targetBitCount25
|
|
|
|
for (b = lastSfb + 1; b < grpData.sfbsPerGroup; b++)
|
|
{
|
|
if ((grpRms[b] >> 16) == 0) // HF zero quantized bands
|
|
{
|
|
grpScaleFacs[b] = grpScaleFacs[b - 1];
|
|
}
|
|
}
|
|
|
|
if ((grpScaleFacs[0] == UCHAR_MAX) &&
|
|
#if !RESTRICT_TO_AAC
|
|
!m_noiseFilling[el] &&
|
|
#endif
|
|
(lastSfb == 0)) // ensure all scale factors are set
|
|
{
|
|
memset (grpScaleFacs, (gr == 1 ? grpData.scaleFactors[grpData.sfbsPerGroup - 1] : 0), grpData.sfbsPerGroup * sizeof (uint8_t));
|
|
}
|
|
}
|
|
} // for gr
|
|
|
|
// restore entropy coder memory for use by bit-stream writer
|
|
memcpy (arithTuples, m_tempIntBuf, (nSamplesInFrame >> 1) * sizeof (char));
|
|
entrCoder.setIsShortWindow (shortWinPrev);
|
|
#if !RESTRICT_TO_AAC
|
|
s = 22050 + 7350 * m_bitRateMode; // compute channel-wise noise_level and noise_offset
|
|
sfIdxPred = ((m_bitRateMode == 0) && (m_priLength) && (m_shiftValSBR) && ((m_tempAnaCurr[ci] >> 24) || (m_tempAnaNext[ci] >> 24)) && (meanSpecFlat[ci] +
|
|
__min ((m_tempAnaCurr[ci] >> 16) & UCHAR_MAX, (m_tempAnaNext[ci] >> 16) & UCHAR_MAX) >= 192) ? UCHAR_MAX : meanSpecFlat[ci]);
|
|
coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci], m_numSwbShort,
|
|
grpData, nSamplesInFrame, samplingRate, s,
|
|
shortWinCurr ? 0 : sfIdxPred));
|
|
if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
|
|
#endif
|
|
s = ci + nrChannels - 1 - 2 * ch; // other channel in stereo
|
|
if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data
|
|
{
|
|
const uint8_t msfVal = (shortWinPrev ? 31 : __max (2, __max (m_meanSpecPrev[ci], meanSpecFlat[ci]) >> 3));
|
|
const uint8_t msfSte = (coreConfig.stereoMode == 0 ? 0 : (coreConfig.icsInfoPrev[s + ch - ci].windowSequence ==
|
|
EIGHT_SHORT ? 31 : __max (2, __max (m_meanSpecPrev[s ], meanSpecFlat[s ]) >> 3)));
|
|
int32_t tmpValSynch = 0;
|
|
|
|
memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t));
|
|
#if ENABLE_INTERTES
|
|
m_coreSignals[ci][0] = 0x40000000; // bs_interTes = 1 for all frames
|
|
#endif
|
|
m_coreSignals[ci][0] |= 4 - int32_t (sqrt (0.75 * msfVal)); // filter mode, 0 = none
|
|
|
|
if (ch > 0 && coreConfig.stereoMode > 0) // synch. sbr_grid(), sbr_invf() for stereo
|
|
{
|
|
tmpValSynch = (m_coreSignals[s][0] >> 21) & 3; // nEnv, bits 23-22
|
|
m_coreSignals[ci][0] |= m_coreSignals[s][0] & 0x10000F; // bits 21
|
|
m_coreSignals[s][0] |= m_coreSignals[ci][0] & 0x10000F; // and 4-1
|
|
}
|
|
m_coreSignals[ci][0] |= getSbrEnvelopeAndNoise (&m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame], msfVal,
|
|
__max (m_meanTempPrev[ci], meanTempFlat[ci]) >> 3, m_bitRateMode == 0,
|
|
m_indepFlag, msfSte, tmpValSynch, nSamplesInFrame, &m_coreSignals[ci][1]);
|
|
if (ch + 1 == nrChannels) // update the flatness histories
|
|
{
|
|
m_meanSpecPrev[ci] = meanSpecFlat[ci]; m_meanSpecPrev[s] = meanSpecFlat[s];
|
|
m_meanTempPrev[ci] = meanTempFlat[ci]; m_meanTempPrev[s] = meanTempFlat[s];
|
|
}
|
|
}
|
|
ci++;
|
|
}
|
|
} // for el
|
|
#if !RESTRICT_TO_AAC
|
|
m_rateFactor = samplingRate; // rate ctrl
|
|
#endif
|
|
return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag,
|
|
m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf,
|
|
#if !RESTRICT_TO_AAC
|
|
m_timeWarping, m_noiseFilling, m_frameCount - 1u, m_indepPeriod, &m_rateFactor,
|
|
#endif
|
|
m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
|
|
}
|
|
|
|
unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS and SFB data
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
|
const unsigned nSamplesInShort = nSamplesInFrame >> 3;
|
|
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
|
|
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
|
|
const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
|
|
unsigned ci = 0, s; // running index
|
|
unsigned errorValue = 0; // no error
|
|
|
|
// get spectral channel statistics for last frame, used for input bandwidth (BW) detection
|
|
m_specAnalyzer.getSpectralBandwidth (m_bandwidPrev, nChannels);
|
|
|
|
// spectral analysis for current MCLT signal (windowed time-samples for the current frame)
|
|
errorValue |= m_specAnalyzer.spectralAnalysis (m_mdctSignals, m_mdstSignals, nChannels, nSamplesInFrame, samplingRate, lfeChannelIndex);
|
|
|
|
// get spectral channel statistics for this frame, used for perceptual model & BW detector
|
|
m_specAnalyzer.getSpecAnalysisStats (m_specAnaCurr, nChannels);
|
|
m_specAnalyzer.getSpectralBandwidth (m_bandwidCurr, nChannels);
|
|
|
|
for (unsigned el = 0; el < m_numElements; el++) // element loop
|
|
{
|
|
CoreCoderData& coreConfig = *m_elementData[el];
|
|
const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()
|
|
|
|
coreConfig.commonMaxSfb = false;
|
|
coreConfig.commonTnsData = false;
|
|
coreConfig.tnsActive = false;
|
|
coreConfig.tnsOnLeftRight = true; // enforce tns_on_lr = 1 for now, detection difficult
|
|
memset (coreConfig.tnsData, 0, nrChannels * sizeof (TnsData));
|
|
|
|
if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements
|
|
{
|
|
SfbGroupData& grpData = coreConfig.groupingData[0];
|
|
uint16_t* grpSO = grpData.sfbOffsets;
|
|
IcsInfo& icsCurr = coreConfig.icsInfoCurr[0];
|
|
|
|
memcpy (grpSO, swbOffsetsL[m_swbTableIdx], numSwbOffsetL[m_swbTableIdx] * sizeof (uint16_t));
|
|
|
|
icsCurr.maxSfb = MAX_NUM_SWB_LFE;
|
|
while (grpSO[icsCurr.maxSfb] > LFE_MAX) icsCurr.maxSfb--; // limit coefficients in LFE
|
|
grpData.sfbsPerGroup = icsCurr.maxSfb;
|
|
ci++;
|
|
}
|
|
else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
|
|
{
|
|
coreConfig.stereoConfig = coreConfig.stereoMode = 0;
|
|
|
|
if (coreConfig.commonWindow && (m_bitRateMode <= 5)) // stereo pre-processing analysis
|
|
{
|
|
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
|
|
const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
|
|
const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
|
|
const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
|
|
const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1], m_mdstSignals[ci], m_mdstSignals[ci + 1],
|
|
nSamplesMax, nSamplesInFrame, eightShorts, coreConfig.stereoDataCurr);
|
|
if (steAnaStats == SHRT_MIN) errorValue = 1;
|
|
|
|
if ((s = abs (steAnaStats)) * m_perCorrHCurr[el] == 0) // transition to/from silence
|
|
{
|
|
m_perCorrHCurr[el] = uint8_t ((32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6);
|
|
}
|
|
else // gentle overlap length dependent temporal smoothing
|
|
{
|
|
const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
|
|
const int16_t prevPerCorr = __max (128, __min (192, m_perCorrHCurr[el]));
|
|
const int16_t currPerCorr = (32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6;
|
|
|
|
m_perCorrHCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, currPerCorr));
|
|
}
|
|
m_perCorrLCurr[el] = coreConfig.stereoDataCurr[0];
|
|
|
|
if ((int) s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: S>M, pred_dir=1
|
|
if (s > (UCHAR_MAX * (6u + m_shiftValSBR)) / 8) coreConfig.stereoMode = 2; // 2: all
|
|
if (s >= UCHAR_MAX - 2u + (m_bitRateMode / 5) + (meanSpecFlat >> 6)) coreConfig.stereoConfig |= 8; // tuning for mono-in-stereo audio
|
|
}
|
|
else if (nrChannels > 1) m_perCorrHCurr[el] = m_perCorrLCurr[el] = 128; // "mid" value
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
|
uint16_t* grpSO = grpData.sfbOffsets;
|
|
IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
|
|
TnsData& tnsData = coreConfig.tnsData[ch];
|
|
|
|
memset (grpSO, 0, (1 + MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint16_t));
|
|
|
|
if (icsCurr.windowSequence != EIGHT_SHORT)
|
|
{
|
|
memcpy (grpSO, swbOffsetsL[m_swbTableIdx], numSwbOffsetL[m_swbTableIdx] * sizeof (uint16_t));
|
|
|
|
icsCurr.maxSfb = 0;
|
|
while (grpSO[icsCurr.maxSfb] < nSamplesInFrame) icsCurr.maxSfb++; // num_swb_long
|
|
grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInFrame;
|
|
grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb; // changed to max_sfb later
|
|
|
|
if (samplingRate > 32000) // set max_sfb based on VBR mode and bandwidth detection
|
|
{
|
|
if (icsCurr.maxSfb > 49) // may still be 51 for 32 kHz
|
|
{
|
|
grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb = 49; // fix 44.1, 48 kHz
|
|
}
|
|
icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
|
}
|
|
#if !EE_MORE_MSE
|
|
while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci]) + (icsCurr.maxSfb >> 1)) icsCurr.maxSfb--; // detect BW
|
|
#endif
|
|
}
|
|
else // icsCurr.windowSequence == EIGHT_SHORT
|
|
{
|
|
memcpy (grpSO, swbOffsetsS[m_swbTableIdx], numSwbOffsetS[m_swbTableIdx] * sizeof (uint16_t));
|
|
|
|
icsCurr.maxSfb = 0;
|
|
while (grpSO[icsCurr.maxSfb] < nSamplesInShort) icsCurr.maxSfb++; // num_swb_short
|
|
grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInShort;
|
|
grpData.sfbsPerGroup = m_numSwbShort = icsCurr.maxSfb; // changed to max_sfb later
|
|
|
|
if (samplingRate > 32000) // set max_sfb based on VBR mode and zero-ness detection
|
|
{
|
|
icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate));
|
|
}
|
|
|
|
if (ch > 0 && coreConfig.commonWindow) // resynchronize the scale_factor_grouping
|
|
{
|
|
if (icsCurr.windowGrouping != coreConfig.icsInfoCurr[0].windowGrouping)
|
|
{
|
|
icsCurr.windowGrouping = coreConfig.icsInfoCurr[0].windowGrouping;
|
|
}
|
|
}
|
|
else // first element channel or not common_window, optimize scale_factor_grouping
|
|
{
|
|
if ((s = m_specAnalyzer.optimizeGrouping (ci, grpSO[icsCurr.maxSfb] << 3, icsCurr.windowGrouping)) < 8)
|
|
{
|
|
icsCurr.windowGrouping = (uint8_t) s;
|
|
}
|
|
}
|
|
memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));
|
|
#if !EE_MORE_MSE
|
|
findActualBandwidthShort (&icsCurr.maxSfb, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci], nSamplesInShort);
|
|
#endif
|
|
errorValue |= eightShortGrouping (grpData, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci]);
|
|
} // if EIGHT_SHORT
|
|
|
|
// compute and quantize optimal TNS coefficients, then find optimal TNS filter order
|
|
s = getOptParCorCoeffs (grpData, icsCurr.maxSfb, tnsData, ci, (ch > 0 && coreConfig.commonWindow ? coreConfig.tnsData[0].firstTnsWindow : 0));
|
|
|
|
for (uint16_t n = 0, gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
if (grpData.windowGroupLength[gr] == 1)
|
|
{
|
|
const uint8_t tonality = (m_specAnaCurr[ci] >> 16) & UCHAR_MAX;
|
|
#if EE_MORE_MSE
|
|
bool noTnsFilt = (m_bitRateMode >= EE_MORE_MSE || icsCurr.maxSfb <= 40);
|
|
|
|
if (!noTnsFilt && samplingRate >= 27713 && samplingRate < 55426 && icsCurr.maxSfb > 40)
|
|
{
|
|
errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], m_mdstSignals[ci], nSamplesInFrame, ci, &grpSO[29], 12, grpData.sfbRmsValues);
|
|
if (errorValue == 0)
|
|
{
|
|
for (int b = 0; b < 12; b++)
|
|
{
|
|
errorValue += unsigned (0.5 + sqrt ((double) grpData.sfbRmsValues[b]));
|
|
}
|
|
noTnsFilt |= (errorValue < ((unsigned) m_bitRateMode << 7)); // avoid clicks
|
|
errorValue = 0;
|
|
}
|
|
}
|
|
if (noTnsFilt) tnsData.filterOrder[n] = 0; else
|
|
#endif
|
|
tnsData.filterOrder[n] = m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n],
|
|
tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5));
|
|
tnsData.numFilters[n] = (tnsData.filterOrder[n] > 0 ? 1 : 0);
|
|
if ((ch == 0) && (icsCurr.windowSequence == EIGHT_SHORT) && (tnsData.numFilters[n] == 0) && (tnsData.firstTnsWindow == gr))
|
|
{
|
|
tnsData.firstTnsWindow++; // simplify TNS stereo synching in eight-short frame
|
|
}
|
|
n++;
|
|
}
|
|
}
|
|
m_tempFlatPrev[ci++] = (uint8_t) s;
|
|
} // for ch
|
|
|
|
if (coreConfig.commonWindow) // synchronization of all StereoCoreToolInfo() components
|
|
{
|
|
uint8_t& maxSfb0 = coreConfig.icsInfoCurr[0].maxSfb;
|
|
uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb;
|
|
const uint8_t maxSfbSte = __max (maxSfb0, maxSfb1); // max_sfb_ste, as in Table 24
|
|
|
|
if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1 || coreConfig.stereoMode > 0))
|
|
{
|
|
uint32_t& sac0 = m_specAnaCurr[ci-2];
|
|
uint32_t& sac1 = m_specAnaCurr[ci-1];
|
|
TnsData& tnsData0 = coreConfig.tnsData[0];
|
|
TnsData& tnsData1 = coreConfig.tnsData[1];
|
|
const int specFlat[2] = {int (sac0 >> 16) & UCHAR_MAX, int (sac1 >> 16) & UCHAR_MAX};
|
|
const int tnsStart[2] = {int (sac0 & 31), int (sac1 & 31)}; // long TNS start band
|
|
|
|
if ((abs (specFlat[0] - specFlat[1]) <= (UCHAR_MAX >> 3)) &&
|
|
(abs (tnsStart[0] - tnsStart[1]) <= (UCHAR_MAX >> 4))) // TNS synchronization
|
|
{
|
|
coreConfig.commonTnsData = true;
|
|
for (uint16_t n = 0; n < 3; n++)
|
|
{
|
|
if ((s = __max (tnsData0.filterOrder[n], tnsData1.filterOrder[n])) == 0) continue;
|
|
|
|
if ((coreConfig.stereoMode > 0) || m_linPredictor.similarParCorCoeffs (tnsData0.coeffParCor[n], tnsData1.coeffParCor[n], s, LP_DEPTH))
|
|
{
|
|
applyTnsCoeff2ChannelSynch (m_linPredictor, tnsData0, tnsData1, s, n, &coreConfig.commonTnsData);
|
|
}
|
|
else if ((m_bitRateMode <= 5) && (m_perCorrHCurr[el] > 128))
|
|
{
|
|
applyTnsCoeffPreProcessing (m_linPredictor, tnsData0, tnsData1, s, n, &coreConfig.commonTnsData, m_perCorrHCurr[el] - 128);
|
|
}
|
|
else coreConfig.commonTnsData = false;
|
|
}
|
|
|
|
if (coreConfig.commonTnsData || (abs (tnsStart[0] - tnsStart[1]) <= (UCHAR_MAX >> 5)))
|
|
{
|
|
const uint32_t avgTnsStart = (tnsStart[0] + tnsStart[1]) >> 1; // synch start
|
|
|
|
sac0 = (sac0 & (UINT_MAX - 31)) | avgTnsStart; // used by applyTnsToWinGroup()
|
|
sac1 = (sac1 & (UINT_MAX - 31)) | avgTnsStart;
|
|
}
|
|
}
|
|
maxSfb0 = maxSfb1 = maxSfbSte;
|
|
|
|
if ((m_bitRateMode <= 5) && (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT))
|
|
{
|
|
m_perCorrLCurr[el] = stereoCorrGrouping (coreConfig.groupingData[0], nSamplesInFrame, coreConfig.stereoDataCurr);
|
|
}
|
|
}
|
|
else coreConfig.stereoMode = 0; // since a max_sfb is 0
|
|
|
|
coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1); // synch
|
|
} // if coreConfig.commonWindow
|
|
}
|
|
|
|
ci -= nrChannels; // zero frequency coefficients above num_swb for all channels, windows
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
|
const uint16_t* grpSO = grpData.sfbOffsets;
|
|
const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
|
|
const bool eightShorts = (icsCurr.windowSequence == EIGHT_SHORT);
|
|
|
|
if (eightShorts) // map grouping table idx to scale_factor_grouping idx for bit-stream
|
|
{
|
|
coreConfig.icsInfoCurr[ch].windowGrouping = scaleFactorGrouping[icsCurr.windowGrouping];
|
|
}
|
|
s = 0;
|
|
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
const unsigned grMax = grpSO[grpData.sfbsPerGroup + m_numSwbShort * gr];
|
|
|
|
s += (eightShorts ? nSamplesInShort : nSamplesInFrame) * grpData.windowGroupLength[gr];
|
|
memset (&m_mdctSignals[ci][grMax], 0, (s - grMax) * sizeof (int32_t));
|
|
memset (&m_mdstSignals[ci][grMax], 0, (s - grMax) * sizeof (int32_t));
|
|
}
|
|
memset (grpData.sfbRmsValues, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint32_t));
|
|
|
|
if (icsCurr.maxSfb > 0)
|
|
{
|
|
// use MCLTs for LONG but only MDCTs for SHORT windows when the MDSTs aren't grouped
|
|
const uint8_t* nFilters = coreConfig.tnsData[ch].numFilters;
|
|
const bool realOnlyCalc = (eightShorts && nChannels < 2);
|
|
|
|
for (uint8_t n = 0, gr = 0; gr < grpData.numWindowGroups; gr++)
|
|
{
|
|
if (grpData.windowGroupLength[gr] == 1)
|
|
{
|
|
errorValue |= applyTnsToWinGroup (grpData, gr, grpData.sfbsPerGroup, coreConfig.tnsData[ch], ci, n, realOnlyCalc);
|
|
coreConfig.tnsActive |= (nFilters[n++] > 0); // set tns_data_present, tns_active
|
|
}
|
|
if ((grpData.windowGroupLength[gr] > 1) || (nFilters[n - 1] == 0))
|
|
{
|
|
s = m_numSwbShort * gr;
|
|
errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], realOnlyCalc ? nullptr : m_mdstSignals[ci],
|
|
grpSO[grpData.sfbsPerGroup + s], (eightShorts ? USAC_MAX_NUM_CHANNELS : ci),
|
|
&grpSO[s], grpData.sfbsPerGroup, &grpData.sfbRmsValues[s]);
|
|
}
|
|
}
|
|
}
|
|
|
|
grpData.sfbsPerGroup = icsCurr.maxSfb; // change num_swb to max_sfb for coding process
|
|
ci++;
|
|
}
|
|
} // for el
|
|
|
|
return errorValue;
|
|
}
|
|
|
|
unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects of ics_info()
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
|
|
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
|
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
|
|
unsigned ci = 0; // running ch index
|
|
unsigned errorValue = 0; // no error
|
|
|
|
// get temporal channel statistics for this frame, used for spectral grouping/quantization
|
|
m_tempAnalyzer.getTempAnalysisStats (m_tempAnaCurr, nChannels);
|
|
m_tempAnalyzer.getTransientAndPitch (m_tranLocCurr, nChannels);
|
|
|
|
// temporal analysis for look-ahead signal (central nSamplesInFrame samples of next frame)
|
|
errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna,
|
|
m_shiftValSBR, m_coreSignals, lfeChannelIndex);
|
|
// get temporal channel statistics for next frame, used for window length/overlap decision
|
|
m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels);
|
|
m_tempAnalyzer.getTransientAndPitch (m_tranLocNext, nChannels);
|
|
|
|
#ifdef NO_PREROLL_DATA
|
|
m_indepFlag = (((m_frameCount++) % m_indepPeriod) == 0); // configure usacIndependencyFlag
|
|
#else
|
|
m_indepFlag = (((m_frameCount++) % m_indepPeriod) <= 1); // configure usacIndependencyFlag
|
|
#endif
|
|
|
|
for (unsigned el = 0; el < m_numElements; el++) // element loop
|
|
{
|
|
CoreCoderData& coreConfig = *m_elementData[el];
|
|
const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()
|
|
|
|
coreConfig.commonWindow = false;
|
|
coreConfig.icsInfoPrev[0] = coreConfig.icsInfoCurr[0];
|
|
coreConfig.icsInfoPrev[1] = coreConfig.icsInfoCurr[1];
|
|
|
|
if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements
|
|
{
|
|
IcsInfo& icsCurr = coreConfig.icsInfoCurr[0];
|
|
|
|
icsCurr.windowGrouping = 0;
|
|
icsCurr.windowSequence = ONLY_LONG;
|
|
#if RESTRICT_TO_AAC
|
|
icsCurr.windowShape = WINDOW_SINE;
|
|
#else
|
|
icsCurr.windowShape = WINDOW_KBD;
|
|
#endif
|
|
ci++;
|
|
}
|
|
else // SCE or CPE: short-window, low-overlap, and sine-shape detection for each channel
|
|
{
|
|
unsigned tsCurr[2]; // save temporal stationarity values
|
|
unsigned tsNext[2]; // for common_window decision in CPE
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch];
|
|
IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
|
|
const USAC_WSEQ wsPrev = icsPrev.windowSequence;
|
|
USAC_WSEQ& wsCurr = icsCurr.windowSequence;
|
|
// get temporal signal statistics, then determine overlap config. for the next frame
|
|
#if !EE_MORE_MSE
|
|
const unsigned plCurr = abs (m_tranLocCurr[ci]) & ((1024 << m_shiftValSBR) - 1);
|
|
#endif
|
|
const unsigned sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX;
|
|
const unsigned tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX;
|
|
#if !EE_MORE_MSE
|
|
const unsigned plNext = abs (m_tranLocNext[ci]) & ((1024 << m_shiftValSBR) - 1);
|
|
#endif
|
|
const unsigned sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX;
|
|
const unsigned tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX;
|
|
#if !EE_MORE_MSE
|
|
const unsigned tThresh = UCHAR_MAX * (__max (plCurr, plNext) < 614 /*0.6 * 1024*/ ? 16 : 15 - (m_bitRateMode >> 3));
|
|
#endif
|
|
|
|
tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX;
|
|
tsNext[ch] = (m_tempAnaNext[ci] >> 8) & UCHAR_MAX;
|
|
// save maximum spectral flatness of current and neighboring frames for quantization
|
|
m_tempAnaCurr [ci] = (m_tempAnaCurr[ci] & 0xFFFFFF) | (__max (sfCurr, __max (m_specFlatPrev[ci], sfNext)) << 24);
|
|
m_specFlatPrev[ci] = (uint8_t) sfCurr;
|
|
#if EE_MORE_MSE
|
|
const bool lowOlapNext = (m_tranLocNext[ci] >= 0);
|
|
#else
|
|
const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) ||
|
|
(tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8);
|
|
#endif
|
|
const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) &&
|
|
(tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20);
|
|
// set window_sequence
|
|
if ((wsPrev == ONLY_LONG) || (wsPrev == LONG_STOP)) // 1st window half - max overlap
|
|
{
|
|
wsCurr = (lowOlapNext ? LONG_START : ONLY_LONG);
|
|
}
|
|
else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
|
|
{
|
|
#if EE_MORE_MSE
|
|
wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT :
|
|
#else
|
|
wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > tThresh / 16) ? EIGHT_SHORT :
|
|
#endif
|
|
#if RESTRICT_TO_AAC
|
|
(lowOlapNext ? EIGHT_SHORT : LONG_STOP);
|
|
#else
|
|
(lowOlapNext ? STOP_START : LONG_STOP);
|
|
#endif
|
|
}
|
|
|
|
// set window_shape
|
|
if ((wsCurr == ONLY_LONG) || (wsCurr == LONG_STOP)) // 2nd window half - max overlap
|
|
{
|
|
icsCurr.windowShape = (sineWinCurr ? WINDOW_SINE : WINDOW_KBD);
|
|
}
|
|
else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
|
|
{
|
|
icsCurr.windowShape = (m_tranLocCurr[ci] >= 0) ? WINDOW_KBD :
|
|
(sineWinCurr ? WINDOW_SINE : WINDOW_KBD);
|
|
}
|
|
|
|
// set scale_factor_grouping
|
|
icsCurr.windowGrouping = (wsCurr == EIGHT_SHORT ? __max (0, m_tranLocCurr[ci]) / (2 * nSamplesInFrame) : 0);
|
|
ci++;
|
|
} // for ch
|
|
|
|
if (nrChannels > 1) // common_window element detection for use in StereoCoreToolInfo()
|
|
{
|
|
IcsInfo& icsInfo0 = coreConfig.icsInfoCurr[0];
|
|
IcsInfo& icsInfo1 = coreConfig.icsInfoCurr[1];
|
|
USAC_WSEQ& winSeq0 = icsInfo0.windowSequence;
|
|
USAC_WSEQ& winSeq1 = icsInfo1.windowSequence;
|
|
|
|
if (winSeq0 != winSeq1) // try to synch window_sequences
|
|
{
|
|
const USAC_WSEQ initialWs0 = winSeq0;
|
|
const USAC_WSEQ initialWs1 = winSeq1;
|
|
|
|
winSeq0 = winSeq1 = windowSequenceSynch[initialWs0][initialWs1]; // equalization
|
|
if ((winSeq0 != initialWs0) && (winSeq0 == EIGHT_SHORT))
|
|
{
|
|
#if !RESTRICT_TO_AAC
|
|
if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2) &&
|
|
(abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
|
|
{
|
|
winSeq0 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
|
|
}
|
|
else
|
|
#endif
|
|
icsInfo0.windowGrouping = icsInfo1.windowGrouping;
|
|
}
|
|
if ((winSeq1 != initialWs1) && (winSeq1 == EIGHT_SHORT))
|
|
{
|
|
#if !RESTRICT_TO_AAC
|
|
if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2) &&
|
|
(abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
|
|
{
|
|
winSeq1 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
|
|
}
|
|
else
|
|
#endif
|
|
icsInfo1.windowGrouping = icsInfo0.windowGrouping;
|
|
}
|
|
}
|
|
else if (winSeq0 == EIGHT_SHORT) // resynchronize scale_factor_grouping if necessary
|
|
{
|
|
const int16_t tranLocSynch = __min (m_tranLocCurr[ci - 2], m_tranLocCurr[ci - 1]);
|
|
|
|
icsInfo0.windowGrouping = icsInfo1.windowGrouping = __max (0, tranLocSynch) / (2 * nSamplesInFrame);
|
|
}
|
|
|
|
if ((icsInfo0.windowShape != WINDOW_SINE) || (icsInfo1.windowShape != WINDOW_SINE))
|
|
{
|
|
icsInfo0.windowShape = WINDOW_KBD; // always synchronize window_shapes in order to
|
|
icsInfo1.windowShape = WINDOW_KBD; // encourage synch in next frame; KBD dominates
|
|
}
|
|
coreConfig.commonWindow = (winSeq0 == winSeq1); // synch
|
|
|
|
memset (coreConfig.stereoDataPrev, 0, (MAX_NUM_SWB_LONG + 1) * sizeof (uint8_t));
|
|
|
|
if (((winSeq0 == EIGHT_SHORT) == (coreConfig.icsInfoPrev[0].windowSequence == EIGHT_SHORT)) && !m_indepFlag &&
|
|
((winSeq1 == EIGHT_SHORT) == (coreConfig.icsInfoPrev[1].windowSequence == EIGHT_SHORT)) && (coreConfig.stereoMode > 0))
|
|
{
|
|
const unsigned lastGrpOffset = (coreConfig.icsInfoPrev[0].windowSequence == EIGHT_SHORT ? m_numSwbShort * (NUM_WINDOW_GROUPS - 1) : 0);
|
|
const unsigned maxSfbStePrev = __max (coreConfig.icsInfoPrev[0].maxSfb, coreConfig.icsInfoPrev[1].maxSfb) + 1u; // for safety
|
|
|
|
memcpy (coreConfig.stereoDataPrev, &coreConfig.stereoDataCurr[lastGrpOffset], __min (60 - lastGrpOffset, maxSfbStePrev) * sizeof (uint8_t));
|
|
}
|
|
coreConfig.stereoDataCurr[0] = (m_bitRateMode <= 1 ? m_tempAnalyzer.stereoPreAnalysis (&m_timeSignals[ci - 2], &m_specFlatPrev[ci - 2], nSamplesInFrame) : 0);
|
|
} // if nrChannels > 1
|
|
}
|
|
|
|
ci -= nrChannels; // modulated complex lapped transform (MCLT) for all channels, windows
|
|
|
|
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
|
{
|
|
const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch];
|
|
const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
|
|
const int32_t* timeSig = (m_shiftValSBR > 0 ? m_coreSignals[ci] : m_timeSignals[ci]);
|
|
const USAC_WSEQ wsCurr = icsCurr.windowSequence;
|
|
const bool eightShorts = (wsCurr == EIGHT_SHORT);
|
|
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
|
|
|
grpData.numWindowGroups = (eightShorts ? NUM_WINDOW_GROUPS : 1); // fill groupingData
|
|
memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));
|
|
|
|
errorValue |= m_transform.applyMCLT (timeSig, eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
|
|
wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]);
|
|
m_scaleFacData[ci++] = &grpData;
|
|
}
|
|
} // for el
|
|
|
|
return errorValue;
|
|
}
|
|
|
|
// constructor
|
|
ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned char* const outputAuData,
|
|
const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/,
|
|
const unsigned frameLength /*= 1024*/, const unsigned indepPeriod /*= 45*/,
|
|
const unsigned varBitRateMode /*= 3*/
|
|
#if !RESTRICT_TO_AAC
|
|
, const bool useNoiseFilling /*= true*/, const bool useEcodisExt /*= false*/
|
|
#endif
|
|
)
|
|
{
|
|
// adopt basic coding parameters
|
|
m_bitRateMode = __min (9, varBitRateMode);
|
|
m_channelConf = (numChannels >= 7 ? CCI_UNDEF : (USAC_CCI) numChannels); // see 23003-3, Tables 73 & 161
|
|
if (m_channelConf == CCI_CONF) m_channelConf = CCI_2_CHM; // passing numChannels = 0 means 2-ch dual-mono
|
|
m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig
|
|
m_shiftValSBR = (frameLength >= 1536 ? 1 : 0);
|
|
m_frameCount = m_rateFactor = 0;
|
|
m_priLength = 0;
|
|
m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex
|
|
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex
|
|
m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame
|
|
m_indepPeriod = (indepPeriod == 0 ? USHRT_MAX : __min (USHRT_MAX, indepPeriod)); // random-access period
|
|
#if RESTRICT_TO_AAC
|
|
m_nonMpegExt = false;
|
|
#else
|
|
m_nonMpegExt = useEcodisExt;
|
|
#endif
|
|
m_numSwbLong = MAX_NUM_SWB_LONG;
|
|
m_numSwbShort = MAX_NUM_SWB_SHORT;
|
|
m_outAuData = outputAuData;
|
|
m_pcm24Data = inputPcmData;
|
|
m_tempIntBuf = nullptr;
|
|
|
|
// initialize all helper structs
|
|
for (unsigned el = 0; el < USAC_MAX_NUM_ELEMENTS; el++)
|
|
{
|
|
const ELEM_TYPE et = elementTypeConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS][el]; // usacElementType
|
|
|
|
m_elementData[el] = nullptr;
|
|
m_perCorrHCurr[el] = 0;
|
|
m_perCorrLCurr[el] = 0;
|
|
#if !RESTRICT_TO_AAC
|
|
m_noiseFilling[el] = (useNoiseFilling && (et < ID_USAC_LFE));
|
|
m_timeWarping[el] = (false /* N/A */ && (et < ID_USAC_LFE));
|
|
#endif
|
|
}
|
|
// initialize all signal buffers
|
|
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
|
|
{
|
|
m_bandwidCurr[ch] = 0;
|
|
m_bandwidPrev[ch] = 0;
|
|
m_coreSignals[ch] = nullptr;
|
|
m_mdctQuantMag[ch] = nullptr;
|
|
m_mdctSignals[ch] = nullptr;
|
|
m_mdstSignals[ch] = nullptr;
|
|
m_meanSpecPrev[ch] = 0;
|
|
m_meanTempPrev[ch] = 0;
|
|
m_scaleFacData[ch] = nullptr;
|
|
m_specAnaCurr[ch] = 0;
|
|
m_specFlatPrev[ch] = 0;
|
|
m_tempAnaCurr[ch] = 0;
|
|
m_tempAnaNext[ch] = 0;
|
|
m_tempFlatPrev[ch] = 0;
|
|
m_timeSignals[ch] = nullptr;
|
|
m_tranLocCurr[ch] = -1;
|
|
m_tranLocNext[ch] = -1;
|
|
}
|
|
// initialize all window buffers
|
|
for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++)
|
|
{
|
|
m_timeWindowL[ws] = nullptr;
|
|
m_timeWindowS[ws] = nullptr;
|
|
}
|
|
}
|
|
|
|
// destructor
|
|
ExhaleEncoder::~ExhaleEncoder ()
|
|
{
|
|
// free allocated helper structs
|
|
for (unsigned el = 0; el < USAC_MAX_NUM_ELEMENTS; el++)
|
|
{
|
|
MFREE (m_elementData[el]);
|
|
}
|
|
// free allocated signal buffers
|
|
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
|
|
{
|
|
if (m_shiftValSBR > 0) MFREE (m_coreSignals[ch]);
|
|
MFREE (m_mdctQuantMag[ch]);
|
|
MFREE (m_mdctSignals[ch]);
|
|
MFREE (m_mdstSignals[ch]);
|
|
MFREE (m_timeSignals[ch]);
|
|
}
|
|
// free allocated window buffers
|
|
for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++)
|
|
{
|
|
MFREE (m_timeWindowL[ws]);
|
|
MFREE (m_timeWindowS[ws]);
|
|
}
|
|
// execute sub-class destructors
|
|
}
|
|
|
|
// public functions
|
|
unsigned ExhaleEncoder::encodeLookahead ()
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
|
|
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
|
const int32_t* chSig = m_pcm24Data;
|
|
unsigned ch, s;
|
|
|
|
// copy nSamplesInFrame external channel-interleaved samples into internal channel buffers
|
|
for (s = 0; s < nSamplesInFrame; s++) // sample loop
|
|
{
|
|
for (ch = 0; ch < nChannels; ch++) m_timeSignals[ch][nSamplesTempAna + s] = *(chSig++);
|
|
}
|
|
|
|
// generate first nSamplesTempAna - m_priLength samples (previous frame data) by LP filter
|
|
for (ch = 0; ch < nChannels; ch++)
|
|
{
|
|
short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
|
|
short parCorC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
|
|
int32_t* predSig = &m_timeSignals[ch][nSamplesTempAna - m_priLength];
|
|
|
|
m_linPredictor.calcParCorCoeffs (predSig, uint16_t (nSamplesInFrame >> 1), MAX_PREDICTION_ORDER, parCorC);
|
|
m_linPredictor.parCorToLpCoeffs (parCorC, MAX_PREDICTION_ORDER, filterC);
|
|
|
|
for (s = nSamplesTempAna - m_priLength; s > 0; s--) // generate predicted priming signal
|
|
{
|
|
const int64_t predSample = *(predSig + 0) * (int64_t) filterC[0] + *(predSig + 1) * (int64_t) filterC[1] +
|
|
*(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3];
|
|
*(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9);
|
|
}
|
|
if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, ((nSamplesInFrame * 41) >> (4 + m_shiftValSBR)) * sizeof (int32_t));
|
|
}
|
|
|
|
// set initial temporal channel statistic to something meaningful before first coded frame
|
|
m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame,
|
|
m_shiftValSBR, m_coreSignals); // default lfeChannelIndex
|
|
if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform
|
|
{
|
|
return 2; // internal error in temporal processing
|
|
}
|
|
if (spectralProcessing ()) // MCLT domain: (common_)max_sfb, grouping 2, TNS, and SFB data
|
|
{
|
|
return 2; // internal error in spectral processing
|
|
}
|
|
if (psychBitAllocation ()) // SFB domain: psychoacoustic model and scale factor estimation
|
|
{
|
|
return 1; // internal error in bit-allocation code
|
|
}
|
|
|
|
return quantizationCoding (); // max(3, coded bytes)
|
|
}
|
|
|
|
unsigned ExhaleEncoder::encodeFrame ()
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
|
|
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
|
const int32_t* chSig = m_pcm24Data;
|
|
unsigned ch, s;
|
|
|
|
// move internal channel buffers nSamplesInFrame to the past to make room for next samples
|
|
for (ch = 0; ch < nChannels; ch++)
|
|
{
|
|
memcpy (&m_timeSignals[ch][0], &m_timeSignals[ch][nSamplesInFrame], nSamplesInFrame * sizeof (int32_t));
|
|
memcpy (&m_timeSignals[ch][nSamplesInFrame], &m_timeSignals[ch][2 * nSamplesInFrame], (nSamplesTempAna - nSamplesInFrame) * sizeof (int32_t));
|
|
|
|
if (m_shiftValSBR > 0)
|
|
{
|
|
const unsigned nSmpInFrame = toFrameLength (m_frameLength); // core coder frame length
|
|
|
|
memcpy (&m_coreSignals[ch][0], &m_coreSignals[ch][nSmpInFrame], nSmpInFrame * sizeof (int32_t));
|
|
memcpy (&m_coreSignals[ch][nSmpInFrame], &m_coreSignals[ch][2 * nSmpInFrame], (nSamplesInFrame >> 2) * sizeof (int32_t));
|
|
}
|
|
}
|
|
|
|
// copy nSamplesInFrame external channel-interleaved samples into internal channel buffers
|
|
for (s = 0; s < nSamplesInFrame; s++) // sample loop
|
|
{
|
|
for (ch = 0; ch < nChannels; ch++) m_timeSignals[ch][nSamplesTempAna + s] = *(chSig++);
|
|
}
|
|
|
|
if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform
|
|
{
|
|
return 2; // internal error in temporal processing
|
|
}
|
|
if (spectralProcessing ()) // MCLT domain: (common_)max_sfb, grouping 2, TNS, and SFB data
|
|
{
|
|
return 2; // internal error in spectral processing
|
|
}
|
|
if (psychBitAllocation ()) // SFB domain: psychoacoustic model and scale factor estimation
|
|
{
|
|
return 1; // internal error in bit-allocation code
|
|
}
|
|
|
|
return quantizationCoding (); // max(3, coded bytes)
|
|
}
|
|
|
|
unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uint32_t* const audioConfigBytes /*= nullptr*/)
|
|
{
|
|
const unsigned nChannels = toNumChannels (m_channelConf);
|
|
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
|
const unsigned specSigBufSize = nSamplesInFrame * sizeof (int32_t);
|
|
const unsigned timeSigBufSize = (((nSamplesInFrame << m_shiftValSBR) * 41) >> 4) * sizeof (int32_t); // core-codec delay*4
|
|
const unsigned char chConf = m_channelConf;
|
|
unsigned ch, errorValue = 0; // no error
|
|
|
|
// check user's input parameters
|
|
#if RESTRICT_TO_AAC
|
|
if ((m_channelConf <= CCI_CONF) || (m_channelConf > CCI_8_CH))
|
|
#else
|
|
if ((m_channelConf <= CCI_CONF) || (m_channelConf > CCI_8_CHS))
|
|
#endif
|
|
{
|
|
errorValue |= 128;
|
|
}
|
|
#if RESTRICT_TO_AAC
|
|
if (m_frameLength != CCFL_1024)
|
|
#else
|
|
if ((m_frameLength != CCFL_768) && (m_frameLength != CCFL_1024))
|
|
#endif
|
|
{
|
|
errorValue |= 64;
|
|
}
|
|
if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> (m_shiftValSBR > 0 ? 11 : 12)) + 2))
|
|
{
|
|
errorValue |= 32;
|
|
}
|
|
if ((m_outAuData == nullptr) || (m_pcm24Data == nullptr))
|
|
{
|
|
errorValue |= 16;
|
|
}
|
|
if (errorValue > 0) return errorValue;
|
|
|
|
// get window band table index
|
|
ch = (unsigned) m_frequencyIdx; // for temporary storage
|
|
#if RESTRICT_TO_AAC
|
|
m_swbTableIdx = freqIdxToSwbTableIdxAAC[ch];
|
|
#else
|
|
m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[ch] : freqIdxToSwbTableIdxAAC[ch]);
|
|
#endif
|
|
|
|
if (m_elementData[0] != nullptr) // initEncoder was called before, don't reallocate memory
|
|
{
|
|
if (audioConfigBuffer != nullptr) // recreate the UsacConfig()
|
|
{
|
|
errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
|
|
elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0,
|
|
#if !RESTRICT_TO_AAC
|
|
m_timeWarping, m_noiseFilling,
|
|
#endif
|
|
m_shiftValSBR, audioConfigBuffer);
|
|
if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes
|
|
errorValue = (errorValue == 0 ? 1 : 0);
|
|
}
|
|
|
|
return errorValue;
|
|
}
|
|
|
|
// allocate all helper structs
|
|
for (unsigned el = 0; el < m_numElements; el++) // element loop
|
|
{
|
|
if ((m_elementData[el] = (CoreCoderData*) malloc (sizeof (CoreCoderData))) == nullptr)
|
|
{
|
|
errorValue |= 8;
|
|
}
|
|
else
|
|
{
|
|
memset (m_elementData[el], 0, sizeof (CoreCoderData));
|
|
m_elementData[el]->elementType = elementTypeConfig[chConf][el]; // usacElementType[el]
|
|
}
|
|
}
|
|
memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t));
|
|
|
|
// allocate all signal buffers
|
|
if (m_shiftValSBR > 0)
|
|
{
|
|
if (m_shiftValSBR > 1) return (errorValue | 4); // no 8:3, 4:1
|
|
|
|
for (ch = 0; ch < nChannels; ch++)
|
|
{
|
|
if ((m_coreSignals[ch] = (int32_t*) malloc (timeSigBufSize >> m_shiftValSBR)) == nullptr)
|
|
{
|
|
errorValue |= 4;
|
|
}
|
|
}
|
|
}
|
|
for (ch = 0; ch < nChannels; ch++)
|
|
{
|
|
if ((m_entropyCoder[ch].initCodingMemory (nSamplesInFrame) > 0) ||
|
|
(m_mdctQuantMag[ch]= (uint8_t*) malloc (nSamplesInFrame * sizeof (uint8_t))) == nullptr ||
|
|
(m_mdctSignals[ch] = (int32_t*) malloc (specSigBufSize)) == nullptr ||
|
|
(m_mdstSignals[ch] = (int32_t*) malloc (specSigBufSize)) == nullptr ||
|
|
(m_timeSignals[ch] = (int32_t*) malloc (timeSigBufSize)) == nullptr)
|
|
{
|
|
errorValue |= 4;
|
|
}
|
|
}
|
|
// allocate all window buffers
|
|
for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++)
|
|
{
|
|
if ((m_timeWindowL[ws] = initWindowHalfCoeffs ((USAC_WSHP) ws, nSamplesInFrame)) == nullptr ||
|
|
(m_timeWindowS[ws] = initWindowHalfCoeffs ((USAC_WSHP) ws, nSamplesInFrame >> 3)) == nullptr)
|
|
{
|
|
errorValue |= 2;
|
|
}
|
|
}
|
|
if (errorValue > 0) return errorValue;
|
|
|
|
// initialize coder class memory
|
|
m_tempIntBuf = m_timeSignals[0];
|
|
if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode >> ((nChannels - 1) >> 2)) > 0 ||
|
|
#if EC_TRELLIS_OPT_CODING
|
|
m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
|
|
#else
|
|
m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
|
|
#endif
|
|
m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 5 ? nChannels : 0, nSamplesInFrame) > 0 ||
|
|
m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0)
|
|
{
|
|
errorValue |= 1;
|
|
}
|
|
|
|
if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout
|
|
{
|
|
const uint32_t loudnessInfo = (audioConfigBytes ? *audioConfigBytes : 0);
|
|
|
|
if (*audioConfigBuffer & 1) m_frameCount--; // to skip 1 frame
|
|
m_priLength = (*audioConfigBuffer >> 1);
|
|
errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
|
|
elementTypeConfig[chConf], loudnessInfo,
|
|
#if !RESTRICT_TO_AAC
|
|
m_timeWarping, m_noiseFilling,
|
|
#endif
|
|
m_shiftValSBR, audioConfigBuffer);
|
|
if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes
|
|
errorValue = (errorValue == 0 ? 1 : 0);
|
|
|
|
// NOTE: Below, value 256 is actually a warning, not an error. If the library is used in
|
|
// live scenarios and a nonzero loudness level is provided before any frames were coded,
|
|
// it reminds developers to apply short-term R128 normalization of the incoming samples.
|
|
if ((m_frameCount == 0) && (loudnessInfo & 16383)) errorValue |= 256;
|
|
}
|
|
if (m_priLength)
|
|
{
|
|
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> (4 - m_shiftValSBR);
|
|
const int32_t* chSig = &m_pcm24Data[nChannels * ((nSamplesInFrame << m_shiftValSBR) - m_priLength)];
|
|
|
|
for (unsigned s = nSamplesTempAna - m_priLength; s < nSamplesTempAna; s++)
|
|
{
|
|
for (ch = 0; ch < nChannels; ch++) m_timeSignals[ch][s] = *(chSig++);
|
|
}
|
|
}
|
|
|
|
return errorValue;
|
|
}
|
|
|
|
extern "C"
|
|
{
|
|
// C constructor
|
|
EXHALE_DECL ExhaleEncAPI* exhaleCreate (int32_t* const inputPcmData, unsigned char* const outputAuData,
|
|
const unsigned sampleRate, const unsigned numChannels,
|
|
const unsigned frameLength, const unsigned indepPeriod,
|
|
const unsigned varBitRateMode, const bool useNoiseFilling,
|
|
const bool useEcodisExt)
|
|
{
|
|
return reinterpret_cast<ExhaleEncAPI*> (new ExhaleEncoder (inputPcmData, outputAuData, sampleRate, numChannels, frameLength, indepPeriod, varBitRateMode
|
|
#if !RESTRICT_TO_AAC
|
|
, useNoiseFilling, useEcodisExt
|
|
#endif
|
|
));
|
|
}
|
|
|
|
// C destructor
|
|
EXHALE_DECL unsigned exhaleDelete (ExhaleEncAPI* exhaleEnc)
|
|
{
|
|
if (exhaleEnc != NULL) { delete reinterpret_cast<ExhaleEncoder*> (exhaleEnc); return 0; }
|
|
|
|
return USHRT_MAX; // error
|
|
}
|
|
|
|
// C initializer
|
|
EXHALE_DECL unsigned exhaleInitEncoder (ExhaleEncAPI* exhaleEnc, unsigned char* const audioConfigBuffer,
|
|
uint32_t* const audioConfigBytes)
|
|
{
|
|
if (exhaleEnc != NULL) return reinterpret_cast<ExhaleEncoder*> (exhaleEnc)->initEncoder (audioConfigBuffer, audioConfigBytes);
|
|
|
|
return USHRT_MAX; // error
|
|
}
|
|
|
|
// C lookahead encoder
|
|
EXHALE_DECL unsigned exhaleEncodeLookahead (ExhaleEncAPI* exhaleEnc)
|
|
{
|
|
if (exhaleEnc != NULL) return reinterpret_cast<ExhaleEncoder*> (exhaleEnc)->encodeLookahead ();
|
|
|
|
return USHRT_MAX; // error
|
|
}
|
|
|
|
// C frame encoder
|
|
EXHALE_DECL unsigned exhaleEncodeFrame (ExhaleEncAPI* exhaleEnc)
|
|
{
|
|
if (exhaleEnc != NULL) return reinterpret_cast<ExhaleEncoder*> (exhaleEnc)->encodeFrame ();
|
|
|
|
return USHRT_MAX; // error
|
|
}
|
|
|
|
} // extern "C"
|