exhale/src/lib/exhaleEnc.cpp

/* exhaleEnc.cpp - source file for class providing Extended HE-AAC encoding capability
 * written by C. R. Helmrich, last modified in 2023 - see License.htm for legal notices
 * C API corrected and API compilation extended by J. Regan in 2022, see merge request 8
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
 * Copyright (c) 2018-2024 Christian R. Helmrich, project ecodis. All rights reserved.
 */

#include "exhaleLibPch.h"
#include "exhaleEnc.h"

// static helper functions
static double modifiedBesselFunctionOfFirstKind (const double x)
{
  const double xOver2 = x * 0.5;
  double d = 1.0, sum = 1.0;
  int    i = 0;

  do
  {
    const double x2di = xOver2 / double (++i);

    d *= (x2di * x2di);
    sum += d;
  }
  while (d > sum * 1.2e-38); // FLT_MIN

  return sum;
}

static int32_t* initWindowHalfCoeffs (const USAC_WSHP windowShape, const unsigned frameLength)
{
  int32_t* windowBuf = nullptr;
  unsigned u;

  if ((windowBuf = (int32_t*) malloc (frameLength * sizeof (int32_t))) == nullptr)
  {
    return nullptr; // allocation error
  }

  if (windowShape == WINDOW_SINE)
  {
    const double dNorm = 3.141592653589793 / (2.0 * frameLength);
    // MLT sine window half
    for (u = 0; u < frameLength; u++)
    {
      windowBuf[u] = int32_t (sin (dNorm * (u + 0.5)) * WIN_SCALE + 0.5);
    }
  }
  else  // if windowShape == WINDOW_KBD
  {
    const double alpha = 3.141592653589793 * (frameLength > 256 ? 4.0 : 6.0);
    const double dBeta = 1.0 / modifiedBesselFunctionOfFirstKind (alpha /*sqrt (1.0)*/);
    const double dNorm = 4.0 / (2.0 * frameLength);
    const double iScal = double (1u << 30);
    const double dScal = 1.0 / iScal;
    double d, sum = 0.0;
    // create Kaiser-Bessel window half
    for (u = 0; u < frameLength; u++)
    {
      const double du1 = dNorm * u - 1.0;

      d = dBeta * modifiedBesselFunctionOfFirstKind (alpha * sqrt (1.0 - du1 * du1));
      sum += d;
      windowBuf[u] = int32_t (d * iScal + 0.5);
    }
    d = 1.0 / sum; // normalized to sum
    sum = 0.0;
    // KBD window half
    for (u = 0; u < frameLength; u++)
    {
      sum += dScal * windowBuf[u];
      windowBuf[u] = int32_t (sqrt (d * sum /*cumulative sum*/) * WIN_SCALE + 0.5);
    }
  }
  return windowBuf;
}

static uint32_t quantizeSfbWithMinSnr (const unsigned* const coeffMagn, const uint16_t* const sfbOffset, const unsigned b,
                                       const uint8_t groupLength, uint8_t* const quantMagn, char* const arithTuples, const bool nonZeroSnr = false)
{
  const uint16_t sfbStart = sfbOffset[b];
  const uint16_t sfbWidth = sfbOffset[b + 1] - sfbStart;
  const unsigned* sfbMagn = &coeffMagn[sfbStart];
  uint32_t maxIndex = 0, maxLevel = sfbMagn[0];

  for (uint16_t s = sfbWidth - 1; s > 0; s--)
  {
    if (maxLevel < sfbMagn[s])  // find largest-level magn. in SFB
    {
      maxLevel = sfbMagn[s];
      maxIndex = s;
    }
  }
  if (quantMagn != nullptr)  // update quantized sample magnitudes
  {
    memset (&quantMagn[sfbStart], 0, sfbWidth * sizeof (uint8_t));

    if (nonZeroSnr) quantMagn[sfbStart + maxIndex] = 1; // magn. 1
  }

  if (arithTuples != nullptr)  // update entropy coding two-tuples
  {
    const uint16_t swbStart = ((sfbStart - sfbOffset[0]) * oneTwentyEightOver[groupLength]) >> 7;

    memset (&arithTuples[swbStart >> 1], 1, ((sfbWidth * oneTwentyEightOver[groupLength]) >> 8) * sizeof (char));

    if (nonZeroSnr && (groupLength == 1)) // max. two-tuple is 1+1
    {
      arithTuples[(swbStart + maxIndex) >> 1] = 2;
    }
  }

  return maxLevel;
}

// inline helper functions
static inline void applyStereoPreProcessingCplx (int32_t* mdctSample1, int32_t* mdctSample2,
                                                 int32_t* mdstSample1, int32_t* mdstSample2,
                                                 const int64_t factIn, const int64_t factDe, const int64_t sign)
{
  const int32_t  valI1 = *mdstSample1;
  const int32_t  valI2 = *mdstSample2;
  const int32_t  valR1 = *mdctSample1;
  const int32_t  valR2 = *mdctSample2;
  const int64_t  absR1 = abs (valR1);
  const int64_t  absR2 = abs (valR2);
  int64_t dmxI1, dmxR1 = valR1 * factDe + sign * valR2 * factIn; // cross
  int64_t dmxI2, dmxR2 = valR2 * factDe + sign * valR1 * factIn; // -talk
  double n, d;

  if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
  {
    if (absR1 * factDe < absR2 * factIn)
    {
      dmxR1 = valR2 * factIn - sign * valR1 * factDe;
      dmxI1 = valI2 * factIn - sign * valI1 * factDe;
    }
    else
    {
      dmxR1 = valR1 * factDe - sign * valR2 * factIn;
      dmxI1 = valI1 * factDe - sign * valI2 * factIn;
    }
  }
  else dmxI1 = valI1 * factDe + sign * valI2 * factIn;

  if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
  {
    if (absR1 * factIn < absR2 * factDe)
    {
      dmxR2 = valR2 * factDe - sign * valR1 * factIn;
      dmxI2 = valI2 * factDe - sign * valI1 * factIn;
    }
    else
    {
      dmxR2 = valR1 * factIn - sign * valR2 * factDe;
      dmxI2 = valI1 * factIn - sign * valI2 * factDe;
    }
  }
  else dmxI2 = valI2 * factDe + sign * valI1 * factIn;

  n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
  d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
  d = sqrt (n / __max (1.0, d));
  *mdctSample1 = int32_t (dmxR1 * d + (dmxR1 < 0 ? -0.5 : 0.5));
  *mdstSample1 = int32_t (dmxI1 * d + (dmxI1 < 0 ? -0.5 : 0.5));

  n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
  d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
  d = sqrt (n / __max (1.0, d));
  *mdctSample2 = int32_t (dmxR2 * d + (dmxR2 < 0 ? -0.5 : 0.5));
  *mdstSample2 = int32_t (dmxI2 * d + (dmxI2 < 0 ? -0.5 : 0.5));
}

static inline void applyStereoPreProcessingReal (int32_t* mdctSample1, int32_t* mdctSample2,
                                                 int32_t* prevSample1, int32_t* prevSample2,
                                                 const int64_t factIn, const int64_t factDe, const int64_t sign)
{
  const int64_t  valI1 = (*(mdctSample1 + 1) - (int64_t) *prevSample1) >> 1; // estimate, see also
  const int64_t  valI2 = (*(mdctSample2 + 1) - (int64_t) *prevSample2) >> 1; // getMeanAbsValues()
  const int32_t  valR1 = (*prevSample1 = *mdctSample1);
  const int32_t  valR2 = (*prevSample2 = *mdctSample2);
  const int64_t  absR1 = abs (valR1);
  const int64_t  absR2 = abs (valR2);
  int64_t dmxI1, dmxR1 = valR1 * factDe + sign * valR2 * factIn; // cross
  int64_t dmxI2, dmxR2 = valR2 * factDe + sign * valR1 * factIn; // -talk
  double n, d;

  if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
  {
    if (absR1 * factDe < absR2 * factIn)
    {
      dmxR1 = valR2 * factIn - sign * valR1 * factDe;
      dmxI1 = valI2 * factIn - sign * valI1 * factDe;
    }
    else
    {
      dmxR1 = valR1 * factDe - sign * valR2 * factIn;
      dmxI1 = valI1 * factDe - sign * valI2 * factIn;
    }
  }
  else dmxI1 = valI1 * factDe + sign * valI2 * factIn;

  if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
  {
    if (absR1 * factIn < absR2 * factDe)
    {
      dmxR2 = valR2 * factDe - sign * valR1 * factIn;
      dmxI2 = valI2 * factDe - sign * valI1 * factIn;
    }
    else
    {
      dmxR2 = valR1 * factIn - sign * valR2 * factDe;
      dmxI2 = valI1 * factIn - sign * valI2 * factDe;
    }
  }
  else dmxI2 = valI2 * factDe + sign * valI1 * factIn;

  n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
  d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
  *mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5));

  n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
  d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
  *mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5));
}

static inline void applyTnsCoeff2ChannelSynch (LinearPredictor& predictor, TnsData& tnsData1, TnsData& tnsData2,
                                               const uint16_t maxTnsOrder, const unsigned  n, bool* const commonFlag)
{
  int16_t* const parCor1 = tnsData1.coeffParCor[n];
  int16_t* const parCor2 = tnsData2.coeffParCor[n];

  for (uint16_t s = 0; s < maxTnsOrder; s++) // synchronize coeffs
  {
    parCor1[s] = (parCor1[s] + parCor2[s] + 1) >> 1;
  }
  tnsData1.coeffResLow[n] = false; // optimize synchronized coeffs
  tnsData1.filterOrder[n] = predictor.calcOptTnsCoeffs (parCor1, tnsData1.coeff[n], &tnsData1.coeffResLow[n],
                                                        maxTnsOrder, UCHAR_MAX /*max pred gain*/, 0, LP_DEPTH);
  tnsData1.numFilters[n] = (tnsData1.filterOrder[n] > 0 ? 1 : 0);
  memcpy (&tnsData2, &tnsData1, sizeof (TnsData));  // synchronize

  if (commonFlag != nullptr) *commonFlag &= (true);
}

static inline void applyTnsCoeffPreProcessing (LinearPredictor& predictor, TnsData& tnsData1, TnsData& tnsData2,
                                               const uint16_t maxTnsOrder, const unsigned  n, bool* const commonFlag, const int16_t fact)
{
  const int32_t  weightI = __min (64, fact); // crosstalk constant
  const int32_t  weightD = 128 - weightI; // (1 - crosstalk) * 128
  int16_t* const parCor1 = tnsData1.coeffParCor[n];
  int16_t* const parCor2 = tnsData2.coeffParCor[n];

  for (uint16_t s = 0; s < maxTnsOrder; s++) // apply crosstalking
  {
    const int16_t coeff1 = parCor1[s];

    parCor1[s] = int16_t ((coeff1 * weightD + parCor2[s] * weightI + 64) >> 7);
    parCor2[s] = int16_t ((coeff1 * weightI + parCor2[s] * weightD + 64) >> 7);
  }
  tnsData1.coeffResLow[n] = false; // optimize coeffs of channel 1
  tnsData1.filterOrder[n] = predictor.calcOptTnsCoeffs (parCor1, tnsData1.coeff[n], &tnsData1.coeffResLow[n],
                                                        maxTnsOrder, UCHAR_MAX /*max pred gain*/, 0, LP_DEPTH);
  tnsData1.numFilters[n] = (tnsData1.filterOrder[n] > 0 ? 1 : 0);

  tnsData2.coeffResLow[n] = false; // optimize coeffs of channel 2
  tnsData2.filterOrder[n] = predictor.calcOptTnsCoeffs (parCor2, tnsData2.coeff[n], &tnsData2.coeffResLow[n],
                                                        maxTnsOrder, UCHAR_MAX /*max pred gain*/, 0, LP_DEPTH);
  tnsData2.numFilters[n] = (tnsData2.filterOrder[n] > 0 ? 1 : 0);

  if (commonFlag != nullptr) *commonFlag &= (tnsData1.coeffResLow[n] == tnsData2.coeffResLow[n] && tnsData1.filterOrder[n] == tnsData2.filterOrder[n]);
  if (commonFlag != nullptr && *commonFlag) *commonFlag &= (memcmp (tnsData1.coeff[n], tnsData2.coeff[n], sizeof (tnsData1.coeff[n])) == 0);
}

static inline uint8_t brModeAndFsToMaxSfbLong (const unsigned bitRateMode, const unsigned samplingRate)
{
  // max. for fs of 44 kHz: band 47 (19.3 kHz), 48 kHz: 45 (19.5 kHz), 64 kHz: 39 (22.0 kHz)
  return __max (39, (0x20A000 + (samplingRate >> 1)) / samplingRate) - 9 + bitRateMode - (samplingRate < 46009 ? bitRateMode >> 3 : 0);
}

static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const unsigned samplingRate)
{
  // max. for fs of 44 kHz: band 13 (19.3 kHz), 48 kHz: 13 (21.0 kHz), 64 kHz: 11 (23.0 kHz)
  return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2);
}

#if !EE_MORE_MSE
static inline void findActualBandwidthShort (uint8_t* const maxSfbShort, const uint16_t* sfbOffsets,
                                             const int32_t* mdctSignals, const int32_t* mdstSignals, const unsigned nSamplesInShort)
{
  const uint16_t b = sfbOffsets[1];  // beginning of search region
  uint8_t   maxSfb = __max (1, *maxSfbShort);
  uint16_t sfbOffs = sfbOffsets[maxSfb - 1];

  for (uint16_t e = sfbOffsets[maxSfb] - 1; e >= b; e--) // search
  {
    int32_t maxAbs = abs (mdctSignals[e]);

    if (mdstSignals != nullptr)
    {
      maxAbs = __max (maxAbs, abs (mdstSignals[e]));
      for (uint16_t w = 7; w > 0; w--)
      {
        maxAbs = __max (maxAbs, abs (mdctSignals[e + w * nSamplesInShort]));
        maxAbs = __max (maxAbs, abs (mdstSignals[e + w * nSamplesInShort]));
      }
    }
    else
    {
      for (uint16_t w = 7; w > 0; w--)
      {
        maxAbs = __max (maxAbs, abs (mdctSignals[e + w * nSamplesInShort]));
      }
    }
    if (maxAbs > maxSfb * (SA_EPS >> 1)) break;

    if (e == sfbOffs) sfbOffs = sfbOffsets[(--maxSfb) - 1];
  }

  if (*maxSfbShort > maxSfb) *maxSfbShort = maxSfb;
}
#endif

static inline uint8_t stereoCorrGrouping (const SfbGroupData& grpData, const unsigned nSamplesInFrame, uint8_t* stereoCorrData)
{
  const uint16_t numWinGroup = grpData.numWindowGroups;
  const uint16_t numBandsWin = nSamplesInFrame >> (SA_BW_SHIFT + 3);
  uint32_t m = 0, w;

  for (uint16_t gr = 0; gr < numWinGroup; gr++)
  {
    const uint16_t grpLength = grpData.windowGroupLength[gr];
    const uint16_t grpLenFac = oneTwentyEightOver[grpLength]; // for grpStereoCorr/grpLength
    const uint16_t grpLenOff = ((grpLenFac & (grpLenFac - 1)) > 0 ? 0 : 64); // for rounding

    for (uint16_t b = 0; b < numBandsWin; b++)
    {
      uint32_t grpStereoCorr = 0;

      for (w = 0; w < grpLength; w++) grpStereoCorr += stereoCorrData[b + w * numBandsWin];

      if (b == 0) m += grpStereoCorr;
      grpStereoCorr = (grpStereoCorr * grpLenFac + grpLenOff) >> 7;

      for (w = 0; w < grpLength; w++) stereoCorrData[b + w * numBandsWin] = grpStereoCorr;
    }
    stereoCorrData += grpLength * numBandsWin;
  }

  return uint8_t ((m + 4) >> 3); // mean low-band correlation value
}

// ISO/IEC 23003-3, Table 75
static inline unsigned toFrameLength (const USAC_CCFL coreCoderFrameLength)
{
  return (unsigned) coreCoderFrameLength;
}

// ISO/IEC 23003-3, Table 73
static const uint8_t numberOfChannels[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 2, 3, 4, 5, 6, 8, 2, 3, 4, 7, 8};

static inline unsigned toNumChannels (const USAC_CCI chConfigurationIndex)
{
  return numberOfChannels[__max (0, (signed char) chConfigurationIndex)];
}

// ISO/IEC 14496-3, Table 4.140
static const uint16_t sfbOffsetL0[42] = { // 88.2 and 96 kHz
    0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,  48,  52,  56,  64,  72,  80,  88,  96, 108,
  120, 132, 144, 156, 172, 188, 212, 240, 276, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024
};
// ISO/IEC 14496-3, Table 4.141
static const uint16_t sfbOffsetS0[13] = {
  0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
};

// ISO/IEC 14496-3, Table 4.138
static const uint16_t sfbOffsetL1[48] = { // 64 kHz
    0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,  48,  52,  56,  64,  72,  80,  88, 100, 112, 124, 140, 156,
  172, 192, 216, 240, 268, 304, 344, 384, 424, 464, 504, 544, 584, 624, 664, 704, 744, 784, 824, 864, 904, 944, 984, 1024
};
// ISO/IEC 14496-3, Table 4.139
static const uint16_t sfbOffsetS1[13] = {
  0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
};

// ISO/IEC 14496-3, Table 4.131
static const uint16_t sfbOffsetL2[52] = { // 32, 44.1, and 48 kHz
    0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  48,  56,  64,  72,  80,  88,  96, 108, 120, 132, 144, 160, 176, 196, 216, 240,
  264, 292, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960/*!*/, 992/*!*/, 1024
};
// ISO/IEC 14496-3, Table 4.130
static const uint16_t sfbOffsetS2[15] = {
  0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, 128
};

// ISO/IEC 14496-3, Table 4.136
static const uint16_t sfbOffsetL3[48] = { // 22.05 and 24 kHz
    0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,  52,  60,  68,  76,  84,  92, 100, 108, 116, 124, 136, 148,
  160, 172, 188, 204, 220, 240, 260, 284, 308, 336, 364, 396, 432, 468, 508, 552, 600, 652, 704, 768, 832, 896, 960, 1024
};
// ISO/IEC 14496-3, Table 4.137
static const uint16_t sfbOffsetS3[16] = {
  0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, 92, 108, 128
};

// ISO/IEC 14496-3, Table 4.134
static const uint16_t sfbOffsetL4[44] = { // 11.025, 12, and 16 kHz
    0,   8,  16,  24,  32,  40,  48,  56,  64,  72,  80,  88, 100, 112, 124, 136, 148, 160, 172, 184, 196, 212,
  228, 244, 260, 280, 300, 320, 344, 368, 396, 424, 456, 492, 532, 572, 616, 664, 716, 772, 832, 896, 960, 1024
};
// ISO/IEC 14496-3, Table 4.135
static const uint16_t sfbOffsetS4[16] = {
  0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 60, 72, 88, 108, 128
};

// ISO/IEC 14496-3, Table 4.132
static const uint16_t sfbOffsetL5[41] = { // 8 kHz
    0,  12,  24,  36,  48,  60,  72,  84,  96, 108, 120, 132, 144, 156, 172, 188, 204, 220, 236, 252, 268,
  288, 308, 328, 348, 372, 396, 420, 448, 476, 508, 544, 580, 620, 664, 712, 764, 820, 880, 944, 1024
};
// ISO/IEC 14496-3, Table 4.133
static const uint16_t sfbOffsetS5[16] = {
  0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 60, 72, 88, 108, 128
};

// long-window SFB offset tables
static const uint16_t* swbOffsetsL[USAC_NUM_FREQ_TABLES] = {
  sfbOffsetL0, sfbOffsetL1, sfbOffsetL2, sfbOffsetL3, sfbOffsetL4, sfbOffsetL5
};
static const uint8_t numSwbOffsetL[USAC_NUM_FREQ_TABLES] = {42, 48, 52, 48, 44, 41};

// short-window SFB offset tables
static const uint16_t* swbOffsetsS[USAC_NUM_FREQ_TABLES] = {
  sfbOffsetS0, sfbOffsetS1, sfbOffsetS2, sfbOffsetS3, sfbOffsetS4, sfbOffsetS5
};
static const uint8_t numSwbOffsetS[USAC_NUM_FREQ_TABLES] = {13, 13, 15, 16, 16, 16};

// ISO/IEC 23003-3, Table 79
static const uint8_t freqIdxToSwbTableIdxAAC[USAC_NUM_SAMPLE_RATES + 2] = {
  /*96000*/ 0, 0, 1, 2, 2, 2,/*24000*/ 3, 3, 4, 4, 4, 5, 5, // AAC
  255, 255, 1, 2, 2, 2, 2, 2,/*25600*/ 3, 3, 3, 4, 4, 4, 4 // USAC
};
#if !RESTRICT_TO_AAC
static const uint8_t freqIdxToSwbTableIdx768[USAC_NUM_SAMPLE_RATES + 2] = {
  /*96000*/ 0, 0, 0, 1, 1, 2,/*24000*/ 2, 2, 3, 4, 4, 4, 4, // AAC
  255, 255, 0, 1, 2, 2, 2, 2,/*25600*/ 2, 3, 3, 3, 3, 4, 4 // USAC
};
#endif

// ISO/IEC 23003-3, Table 131
static const uint8_t tnsScaleFactorBandLimit[2 /*long/short*/][USAC_NUM_FREQ_TABLES] = { // TNS_MAX_BANDS
  {31, 34, 51 /*to be corrected to 42 (44.1) and 40 (48 kHz)!*/, 47, 43, 40}, {9, 10, 14, 15, 15, 15}
};

static const uint8_t sbrRateOffset[10] = {7, 6, 6, 8, 7, 8, 9, 9, 9, 9}; // used for scaleSBR

// scale_factor_grouping map
// group lengths based on transient location:  1133, 1115, 2114, 3113, 4112, 5111, 3311, 1331
static const uint8_t scaleFactorGrouping[8] = {0x1B, 0x0F, 0x47, 0x63, 0x71, 0x78, 0x6C, 0x36};

static const uint8_t windowGroupingTable[8][NUM_WINDOW_GROUPS] = { // for window_group_length
  {1, 1, 3, 3}, {1, 1, 1, 5}, {2, 1, 1, 4}, {3, 1, 1, 3}, {4, 1, 1, 2}, {5, 1, 1, 1}, {3, 3, 1, 1}, {1, 3, 3, 1}
};

// window_sequence equalizer
static const USAC_WSEQ windowSequenceSynch[5][5] = {  // 1st: chan index 0, 2nd: chan index 1
  {ONLY_LONG,   LONG_START,  EIGHT_SHORT, LONG_STOP,   STOP_START }, // left: ONLY_LONG
#if RESTRICT_TO_AAC
  {LONG_START,  LONG_START,  EIGHT_SHORT, EIGHT_SHORT, STOP_START }, // Left: LONG_START
#else
  {LONG_START,  LONG_START,  EIGHT_SHORT, STOP_START,  STOP_START }, // Left: LONG_START
#endif
  {EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT}, // Left: EIGHT_SHORT
#if RESTRICT_TO_AAC
  {LONG_STOP,   EIGHT_SHORT, EIGHT_SHORT, LONG_STOP,   STOP_START }, // Left: LONG_STOP
#else
  {LONG_STOP,   STOP_START,  EIGHT_SHORT, LONG_STOP,   STOP_START }, // Left: LONG_STOP
#endif
  {STOP_START,  STOP_START,  EIGHT_SHORT, STOP_START,  STOP_START }  // Left: STOP_START
};

// private helper functions
unsigned ExhaleEncoder::applyTnsToWinGroup (SfbGroupData& grpData, const uint8_t grpIndex, const uint8_t maxSfb, TnsData& tnsData,
                                            const unsigned channelIndex, const unsigned n, const bool realOnlyCalc)
{
  const uint16_t filtOrder = tnsData.filterOrder[n];
  const uint16_t*    grpSO = &grpData.sfbOffsets[m_numSwbShort * grpIndex];
  const bool   eightShorts = (grpData.numWindowGroups > 1);
  unsigned errorValue = 0; // no error

  if ((grpIndex >= NUM_WINDOW_GROUPS) || (maxSfb > (eightShorts ? MAX_NUM_SWB_SHORT : MAX_NUM_SWB_LONG)) || (channelIndex >= USAC_MAX_NUM_CHANNELS))
  {
    return 1; // invalid arguments error
  }

  if (filtOrder > 0) // determine TNS filter length in SFBs and apply TNS analysis filtering
  {
    const int numSwbWin = (eightShorts ? m_numSwbShort : m_numSwbLong);
    uint8_t tnsMaxBands = tnsScaleFactorBandLimit[eightShorts ? 1 : 0][m_swbTableIdx];
    int     tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx);  // 8-short TNS start

    if (!eightShorts)
    {
      const unsigned samplingRate = toSamplingRate (m_frequencyIdx); // refine TNS_MAX_BANDS
      const unsigned tnsStartOffs = (m_specAnaCurr[channelIndex] & 31) << SA_BW_SHIFT;

      if ((samplingRate >= 46009) && (samplingRate < 55426)) tnsMaxBands = 40; // for 48 kHz
      else
      if ((samplingRate >= 37566) && (samplingRate < 46009)) tnsMaxBands = 42; // & 44.1 kHz

      while (grpSO[tnsStartSfb] < tnsStartOffs) tnsStartSfb++;  // start band for TNS filter
    }
    if ((tnsMaxBands = __min (tnsMaxBands, maxSfb)) <= tnsStartSfb) tnsStartSfb = numSwbWin;

    if ((tnsData.filterLength[n] = __max (0, numSwbWin - tnsStartSfb)) > 0)
    {
      int32_t* const signal = m_mdctSignals[channelIndex];
      const short offs = grpSO[tnsStartSfb];
      uint16_t       s = grpSO[tnsMaxBands] - offs;
      short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
      int32_t* predSig = &signal[grpSO[tnsMaxBands]]; // end of signal region to be filtered

      errorValue |= m_linPredictor.quantTnsToLpCoeffs (tnsData.coeff[n], filtOrder, tnsData.coeffResLow[n], tnsData.coeffParCor[n], filterC);

      // back up the leading MDCT samples
      memcpy (m_tempIntBuf, &signal[offs - MAX_PREDICTION_ORDER], MAX_PREDICTION_ORDER * sizeof (int32_t));
      // TNS compliance: set them to zero
      memset (&signal[offs - MAX_PREDICTION_ORDER], 0, MAX_PREDICTION_ORDER * sizeof (int32_t));

      if (filtOrder >= 4) // max. order 4
      {
        for (predSig--; s > 0; s--)
        {
          const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
                                     *(predSig - 3) * (int64_t) filterC[2] + *(predSig - 4) * (int64_t) filterC[3];
          *(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1));
        }
      }
      else if (filtOrder == 3) // order 3
      {
        for (predSig--; s > 0; s--)
        {
          const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
                                     *(predSig - 3) * (int64_t) filterC[2];
          *(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1));
        }
      }
      else // save 1-2 MACs, order 2 or 1
      {
        for (predSig--; s > 0; s--)
        {
          const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1];

          *(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1));
        }
      }
      // restore the leading MDCT samples
      memcpy (&signal[offs - MAX_PREDICTION_ORDER], m_tempIntBuf, MAX_PREDICTION_ORDER * sizeof (int32_t));

      // compute RMS data after filtering
      errorValue |= m_specAnalyzer.getMeanAbsValues (signal, realOnlyCalc ? nullptr : m_mdstSignals[channelIndex],
                                                     grpSO[grpData.sfbsPerGroup], (eightShorts ? USAC_MAX_NUM_CHANNELS : channelIndex),
                                                     grpSO /*below TNS*/, __min ((int) grpData.sfbsPerGroup, tnsStartSfb),
                                                     &grpData.sfbRmsValues[m_numSwbShort * grpIndex]);
      errorValue |= m_specAnalyzer.getMeanAbsValues (signal, nullptr /*no TNS on MDST*/, grpSO[grpData.sfbsPerGroup], channelIndex,
                                                     &grpSO[tnsStartSfb], __max (0, grpData.sfbsPerGroup - tnsStartSfb),
                                                     &grpData.sfbRmsValues[m_numSwbShort * grpIndex + tnsStartSfb]);
    }
    else tnsData.filterOrder[n] = tnsData.numFilters[n] = 0; // disable length-0 TNS filters
  } // if order > 0

  return errorValue;
}

unsigned ExhaleEncoder::eightShortGrouping (SfbGroupData& grpData, uint16_t* const grpOffsets,
                                            int32_t* const mdctSignal, int32_t* const mdstSignal)
{
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
  const unsigned nSamplesInShort = nSamplesInFrame >> 3;
  int32_t* const tempIntBuf/*2*/ = m_timeSignals[1]; // NOTE: requires at least stereo input
  unsigned grpStartLine = nSamplesInFrame;

  if ((grpOffsets == nullptr) || (mdctSignal == nullptr))
  {
    return 1; // invalid arguments error
  }

  for (short gr = grpData.numWindowGroups - 1; gr >= 0; gr--) // grouping, 14496-3 Fig. 4.24
  {
    const unsigned   grpLength = grpData.windowGroupLength[gr];
    uint16_t* const  grpOffset = &grpOffsets[m_numSwbShort * gr];
    int32_t* const  grpMdctSig = &mdctSignal[grpStartLine -= nSamplesInShort * grpLength];
    int32_t* const  grpMdstSig = (mdstSignal != nullptr ? &mdstSignal[grpStartLine] : nullptr);

    for (uint16_t b = 0; b < m_numSwbShort; b++)
    {
      const unsigned swbOffset = grpOffsets[b];
      const unsigned numCoeffs = __min (grpOffsets[b + 1], nSamplesInShort) - swbOffset;

      // adjust scale factor band offsets
      grpOffset[b] = uint16_t (grpStartLine + swbOffset * grpLength);
      // interleave spectral coefficients
      for (uint16_t w = 0; w < grpLength; w++)
      {
        memcpy (&m_tempIntBuf[grpOffset[b] + w * numCoeffs], &grpMdctSig[swbOffset + w * nSamplesInShort], numCoeffs * sizeof (int32_t));
        if (grpMdstSig != nullptr)
        {
          memcpy (&tempIntBuf[grpOffset[b] + w * numCoeffs], &grpMdstSig[swbOffset + w * nSamplesInShort], numCoeffs * sizeof (int32_t));
        }
      }
    }
    grpOffset[m_numSwbShort] = uint16_t (grpStartLine + nSamplesInShort * grpLength);
  } // for gr

  memcpy (mdctSignal, m_tempIntBuf, nSamplesInFrame * sizeof (int32_t));
  if (mdstSignal != nullptr)
  {
    memcpy (mdstSignal, tempIntBuf, nSamplesInFrame * sizeof (int32_t));
  }

  return 0; // no error
}

unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const uint8_t maxSfb, TnsData& tnsData,
                                            const unsigned channelIndex, const uint8_t firstGroupIndexToTest /*= 0*/)
{
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
  const unsigned tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx); // 8-short start
  uint32_t temp, predGainMax = 0;

  if ((maxSfb <= tnsStartSfb) || (channelIndex >= USAC_MAX_NUM_CHANNELS))
  {
    return 0; // invalid arguments error
  }

  if (grpData.numWindowGroups == 1) // LONG window: use ParCor coeffs from spectral analyzer
  {
    tnsData.coeffResLow[0] = false;
    tnsData.filterDownward[0] = false; // enforce direction = 0 for now, detection difficult
#if EE_MORE_MSE
    tnsData.filterOrder[0] = uint8_t (m_bitRateMode >= EE_MORE_MSE ? 0 : m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor[0], channelIndex));
#else
    tnsData.filterOrder[0] = (uint8_t) m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor[0], channelIndex);
#endif
    tnsData.firstTnsWindow = 0;

    if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases
    {
      const uint16_t filtOrder = tnsData.filterOrder[0];
      uint16_t s = 0,b = __min ((m_specAnaCurr[channelIndex] & 31) + 2, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
      short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
      int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset

      m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor[0], filtOrder, filterC);

      for (b--, predSig--; b > 0; b--) // start a bit higher; b is in spectr. analysis units
      {
        uint64_t sumAbsOrg = 0, sumAbsTns = 0;

        if (filtOrder >= 4) // max. order 4
        {
          for (s = SA_BW; s > 0; s--) // get 4th-order TNS residual
          {
            const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
                                       *(predSig - 3) * (int64_t) filterC[2] + *(predSig - 4) * (int64_t) filterC[3];
            const int64_t mdctSample = *(predSig--);
            const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9);

            sumAbsOrg += abs (mdctSample);  sumAbsTns += abs (resiSample);
          }
        }
        else if (filtOrder == 3) // order 3
        {
          for (s = SA_BW; s > 0; s--) // get 3rd-order TNS residual
          {
            const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] +
                                       *(predSig - 3) * (int64_t) filterC[2];
            const int64_t mdctSample = *(predSig--);
            const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9);

            sumAbsOrg += abs (mdctSample);  sumAbsTns += abs (resiSample);
          }
        }
        else // save 1-2 MACs, order 2 or 1
        {
          for (s = SA_BW; s > 0; s--) // get 2nd-order TNS residual
          {
            const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1];
            const int64_t mdctSample = *(predSig--);
            const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9);

            sumAbsOrg += abs (mdctSample);  sumAbsTns += abs (resiSample);
          }
        }
        if (sumAbsOrg * 17 <= sumAbsTns * 16) break; // band SNR reduced by more than 0.5 dB
      }
      m_specAnaCurr[channelIndex] = (m_specAnaCurr[channelIndex] & (UINT_MAX - 31)) | (b + 1);
    } // if order > 0

    return (m_specAnaCurr[channelIndex] >> 24) & UCHAR_MAX; // spectral analyzer's pred gain
  }

  // SHORT window: for each length-1 group, get TNS filter, then determine best filter order
  tnsData.firstTnsWindow = UCHAR_MAX;
  for (uint8_t n = 0, gr = 0; gr < grpData.numWindowGroups; gr++)
  {
    if (grpData.windowGroupLength[gr] == 1)
    {
      tnsData.coeffResLow[n] = false;
      tnsData.filterDownward[n] = false; // force direction = 0 for now, detection difficult
      tnsData.filterOrder[n] = 0;
      if (tnsData.firstTnsWindow == UCHAR_MAX) tnsData.firstTnsWindow = gr;

      if (gr < firstGroupIndexToTest)
      {
        memset (tnsData.coeffParCor[n], 0, MAX_PREDICTION_ORDER * sizeof (int16_t));
      }
      else // first length-one group tested
      {
        const int32_t* signal = m_mdctSignals[channelIndex];
        const uint16_t* grpSO = &grpData.sfbOffsets[m_numSwbShort * gr];
        uint32_t predGainCurr, predGainPrev, bestOrder = MAX_PREDICTION_ORDER;

        temp = m_linPredictor.calcParCorCoeffs (&signal[grpSO[tnsStartSfb]], grpSO[maxSfb] - grpSO[tnsStartSfb], bestOrder, tnsData.coeffParCor[n]);
        if (predGainMax < temp) predGainMax = temp;  // maximum pred gain of filtered groups

        predGainCurr = (temp >> 24) & UCHAR_MAX;
        predGainPrev = (temp >> 16) & UCHAR_MAX;
        while ((predGainPrev >= predGainCurr) && --bestOrder > 1)  // lowest-order gain max.
        {
          predGainCurr = predGainPrev;
          predGainPrev = (temp >> (8 * bestOrder - 16)) & UCHAR_MAX;
        }
#if EE_MORE_MSE
        tnsData.filterOrder[n] = uint8_t (m_bitRateMode >= EE_MORE_MSE ? 0 : ((bestOrder == 1) && (tnsData.coeffParCor[n][0] == 0) ? 0 : bestOrder));
#else
        tnsData.filterOrder[n] = uint8_t ((bestOrder == 1) && (tnsData.coeffParCor[n][0] == 0) ? 0 : bestOrder);
#endif
      }
      n++;
    }
  } // for gr

  return (predGainMax >> 24) & UCHAR_MAX; // max pred gain of all orders and length-1 groups
}

uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbIndex)
{
  const uint16_t* const sfbLoudMem = m_sfbLoudMem[channelIndex][sfbIndex];
  uint32_t sumSfbLoud = 0;

  for (int16_t s = 31; s >= 0; s--) sumSfbLoud += sfbLoudMem[s];
  sumSfbLoud = (sumSfbLoud + 32) >> 6; // -6 dB

  return sumSfbLoud * (sumSfbLoud >> (toSamplingRate (m_frequencyIdx) >> 13)); // scaled SMR
}

unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via scale factors
{
  const unsigned nChannels       = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
  const unsigned samplingRate    = toSamplingRate (m_frequencyIdx);
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  const bool     useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
  const uint8_t  maxSfbLong      = (useMaxBandwidth ? m_numSwbLong : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
  const uint16_t scaleSBR        = (m_shiftValSBR > 0 || m_nonMpegExt ? sbrRateOffset[m_bitRateMode] : 0); // -25% rate
  const uint64_t scaleSr         = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
                                                         : (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
  const uint64_t scaleBr         = (m_bitRateMode == 0 || m_frameCount <= 1 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1))
                                   : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR;
  uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
  unsigned ci = 0, s; // running index
  unsigned errorValue = 0; // no error

  // psychoacoustic processing of SFB RMS values yielding masking thresholds in m_tempIntBuf
  errorValue |= m_bitAllocator.initSfbStepSizes (m_scaleFacData, m_numSwbShort, m_specAnaCurr, m_tempAnaCurr,
                                                 nChannels, samplingRate, sfbStepSizes, lfeChannelIndex);

  // get means of spectral and temporal flatness for every channel
  m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);

  for (unsigned el = 0; el < m_numElements; el++)  // element loop
  {
    CoreCoderData& coreConfig = *m_elementData[el];
    const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()

    if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements
    {
      SfbGroupData& grpData = coreConfig.groupingData[0];
      uint32_t*   stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
      const uint16_t*   off = grpData.sfbOffsets;
      const uint32_t*   rms = grpData.sfbRmsValues;
      uint8_t* scaleFactors = grpData.scaleFactors;

      for (uint16_t b = 0; b < grpData.sfbsPerGroup; b++)
      {
        const unsigned lfConst = (samplingRate < 27713 ? 1 : 2);
        const unsigned lfAtten = 4 + b * lfConst; // LF SNR boost, cf my M.Sc. thesis, p. 54
        const uint8_t sfbWidth = off[b + 1] - off[b];
        const uint64_t   scale = scaleBr * __min (32, lfAtten); // rate control part 1 (SFB)

        // scale step-sizes according to VBR mode, then derive scale factors from step-sizes
        stepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 9) + stepSizes[b] * scale) >> 10));

        scaleFactors[b] = m_bitAllocator.getScaleFac (stepSizes[b], &m_mdctSignals[ci][off[b]], sfbWidth, rms[b]);
      }
      ci++;
    }
    else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
    {
      const bool  eightShorts0 = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
      const TnsData&  tnsData0 = coreConfig.tnsData[0];
      const TnsData&  tnsData1 = coreConfig.tnsData[1];
      uint8_t realOnlyStartSfb = (eightShorts0 ? m_numSwbShort : m_numSwbLong) - __max (tnsData0.filterLength[0], tnsData1.filterLength[0]);

      if (coreConfig.commonWindow && (coreConfig.stereoMode == 0) && (m_perCorrHCurr[el] > SCHAR_MAX || m_perCorrLCurr[el] > (UCHAR_MAX * 5) / 8))
      {
        coreConfig.stereoMode = 1;
      }
      if (m_perCorrHCurr[el] > 128) // execute stereo pre-processing to increase correlation
      {
        const int16_t chanCorrSign = (coreConfig.stereoConfig & 2 ? -1 : 1);
        const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]);
        const bool reducedStrength = (coreConfig.tnsActive && (m_bitRateMode > 0)) || (m_bitRateMode >= 5);
        const uint8_t steppFadeLen = (eightShorts0 ? 4 : (reducedStrength ? 32 : 64));
        const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts0 ? 2 : 5);
        const int64_t steppWeightI = __min (64, m_perCorrHCurr[el] - 128) >> (eightShorts0 || reducedStrength ? 1 : 0); // crosstalk * 128
        const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128

        for (uint16_t n = 0, gr = 0; gr < coreConfig.groupingData[0].numWindowGroups; gr++)
        {
          const uint8_t grpLength = coreConfig.groupingData[0].windowGroupLength[gr];
          const uint16_t*  grpOff = &coreConfig.groupingData[0].sfbOffsets[m_numSwbShort * gr];
          const uint16_t grpStart = grpOff[0] + steppFadeOff * grpLength;
          int32_t* sigR0 = &m_mdctSignals[ci][grpStart];
          int32_t* sigR1 = &m_mdctSignals[ci + 1][grpStart];
          int64_t xTalkI = 0, xTalkD = 0; // weights for crosstalk

          if ((grpLength == 1) && (tnsData0.numFilters[n] > 0 || tnsData1.numFilters[n] > 0))
          {
            const uint16_t maxLen = (eightShorts0 ? grpOff[m_numSwbShort] - 1 : __min (nSamplesInFrame - 1u, nSamplesMax)) - grpStart;
            int32_t prevR0 = 0; // NOTE: functions also on grouped
            int32_t prevR1 = 0; // MDCT spectra, but not properly!

            for (uint16_t w = 0; w < grpLength; w++) // sub-window
            {
              prevR0 = *(sigR0++); prevR1 = *(sigR1++); // processing starts at offset of 1!
              xTalkI = steppWeightI;
              xTalkD = steppWeightD * (2 * steppFadeLen - 1);

              for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++) // start with fade-in
              {
                applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD, chanCorrSign);
                xTalkI += steppWeightI;
                xTalkD -= steppWeightD;
              }
            }
            for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++) // end
            {
              applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD, chanCorrSign);
            }
            if (eightShorts0 || (nSamplesMax >= nSamplesInFrame)) *sigR0 = *sigR1 = 0;

            realOnlyStartSfb = __min (realOnlyStartSfb, __min ((eightShorts0 ? 5 : 24), steppFadeOff / (eightShorts0 ? 4 : 7)));
          }
          else // TNS inactive, both MDCTs and MDSTs are available
          {
            const uint16_t maxLen = (eightShorts0 ? grpOff[m_numSwbShort] : nSamplesMax) - grpStart;
            int32_t* sigI0 = &m_mdstSignals[ci][grpStart]; // imag
            int32_t* sigI1 = &m_mdstSignals[ci + 1][grpStart];

            for (uint16_t w = 0; w < grpLength; w++) // sub-window
            {
              sigR0++;  sigR1++;  sigI0++;  sigI1++; // processing starts at an offset of 1!
              xTalkI = steppWeightI;
              xTalkD = steppWeightD * (2 * steppFadeLen - 1);

              for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
              {
                applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD, chanCorrSign);
                xTalkI += steppWeightI;
                xTalkD -= steppWeightD;
              }
            }
            for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
            {
              applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD, chanCorrSign);
            }
          }
          if (grpLength == 1) n++;
        }
      } // if m_perCorrHCurr[el] > 128

      if ((errorValue == 0) && (coreConfig.stereoMode > 0)) // perform M/S, synch statistics
      {
        const uint8_t   numSwbFrame = (eightShorts0 ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong + 1));
        const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;

        errorValue = m_stereoCoder.applyPredJointStereo (m_mdctSignals[ci], m_mdctSignals[ci + 1],
                                                         m_mdstSignals[ci], m_mdstSignals[ci + 1],
                                                         coreConfig.groupingData[0], coreConfig.groupingData[1],
                                                         coreConfig.tnsData[0], coreConfig.tnsData[1],
                                                         numSwbFrame, coreConfig.stereoDataCurr,
                                                         m_bitRateMode, coreConfig.stereoMode > 1,
                                                         (coreConfig.stereoConfig & 2) > 0, realOnlyStartSfb,
                                                         &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS *  ci],
                                                         &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);
        if (errorValue >= 2) // signal M/S with complex prediction
        {
          coreConfig.stereoConfig |= (errorValue & 7) - 2; // dir.
          coreConfig.stereoMode += 2; errorValue = 0;
        }
        m_specAnaCurr[ci    ] = (m_specAnaCurr[ci    ] & (UINT_MAX - 65504)) | peakIndexSte;
        m_specAnaCurr[ci + 1] = (m_specAnaCurr[ci + 1] & (UINT_MAX - 65504)) | peakIndexSte;
        meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
      }
      else memset (coreConfig.stereoDataCurr, 0, (eightShorts0 || !coreConfig.commonWindow
                                                  ? MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS : MAX_NUM_SWB_LONG) * sizeof (uint8_t));
      errorValue |= m_bitAllocator.imprSfbStepSizes (m_scaleFacData, m_numSwbShort, m_mdctSignals, nSamplesInFrame, nrChannels,
                                                     ((32 + 5 * m_shiftValSBR) * samplingRate) >> 5, sfbStepSizes, ci, meanSpecFlat,
                                                     coreConfig.commonWindow, coreConfig.stereoDataCurr, coreConfig.stereoConfig);

      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        SfbGroupData&  grpData = coreConfig.groupingData[ch];
        const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
        const uint8_t maxSfbCh = grpData.sfbsPerGroup;
#if !RESTRICT_TO_AAC
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
#endif
#if !EE_MORE_MSE
        const uint16_t rateFac = m_bitAllocator.getRateCtrlFac (m_rateFactor, samplingRate, meanSpecFlat[ci], coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
#endif
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];

        memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));

        for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
        {
          const uint16_t* grpOff = &grpData.sfbOffsets[m_numSwbShort * gr];
          const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];
          uint8_t*  grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
          uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr];

#if EE_MORE_MSE
          s = 0;
          for (unsigned b = grpOff[0]; b < grpOff[maxSfbCh]; b++)
          {
            s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[ci][b])));
          }
          if (el == 0 && nrChannels == 2)
          {
            for (unsigned b = grpOff[0]; b < grpOff[maxSfbCh]; b++)
            {
              s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[1 - ci][b])));
            }
            s = (s + 1u) >> 1;
          }
          if (grpOff[maxSfbCh] > grpOff[0])
          {
            s = unsigned ((s * (eightShorts ? (24u + (grpData.windowGroupLength[gr] >> 2)) / grpData.windowGroupLength[gr] : 3u) + 4096u) >> 13);
# ifndef NO_PREROLL_DATA
            if (((m_frameCount - 1u) % (m_indepPeriod << 1)) == 1 && m_numElements == 1 && !eightShorts) s = (4u + 9u * s) >> 3;
# endif
          }
          s = __max (1u + ((UINT32_MAX / (eightShorts ? 3u : 8u)) >> ((2 + m_bitRateMode / 9) * m_bitRateMode)), s * s);
#endif
          for (unsigned b = 0; b < maxSfbCh; b++)
          {
#if EE_MORE_MSE
            const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
            const bool stereoCoded = (nrChannels == 2 && coreConfig.stereoMode > 0 && (coreConfig.stereoDataCurr[b] > 0 || !(coreConfig.stereoMode & 1)));
            const uint32_t rmsbMax = (stereoCoded ? __max (grpRms[b], coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr + b]) : grpRms[b]);
            const uint64_t sThresh = __max (1u + (UINT32_MAX >> 30), (rmsbMax * uint64_t (__max (16, b * b * grpData.numWindowGroups)) + 32u) >> 6);
            const uint64_t predFac = (eightShorts || coreConfig.stereoMode < 3 || coreConfig.stereoDataCurr[b & 62] == 0 ? (eightShorts && !b ? 48u : 64u) :
                                      uint64_t (0.5 + 64 - pow (__min (1.0, fabs (coreConfig.stereoDataCurr[b & 62] * 0.1 - 1.6)), 1.5) * 19.0)); // MS
            grpStepSizes[b] = uint32_t (__min (sThresh, (s * predFac + 32u) >> 6));
            if (stereoCoded && rmsbMax)
            {
              const uint32_t rmsCh = coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr + b];

              grpStepSizes[b] = uint32_t (0.5 + grpStepSizes[b] * (1.0 - sqrt ((double) __min (grpRms[b], rmsCh) / rmsbMax) * 0.29289322));
            }
#else
            const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis
            const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
            const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
            const uint64_t   scale = scaleBr * rateFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB)

            // scale step-sizes according to VBR mode & derive scale factors from step-sizes
            grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 24) + grpStepSizes[b] * scale) >> 25));
#endif
#if !RESTRICT_TO_AAC
            if (!m_noiseFilling[el] || (m_bitRateMode > 0) || (m_shiftValSBR == 0) || (samplingRate < 23004) ||
                (b + 3 - (meanSpecFlat[ci] >> 6) < m_numSwbLong)) // HF
#endif
            grpScaleFacs[b] = m_bitAllocator.getScaleFac (grpStepSizes[b], &m_mdctSignals[ci][grpOff[b]], sfbWidth, grpRms[b]);
          }
        } // for gr

#if !RESTRICT_TO_AAC
        if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_shiftValSBR > 0 || m_bitRateMode <= 3 || !eightShorts))
        {
          const uint32_t maxSfbCurr = (eightShorts ? (useMaxBandwidth ? __min (15, 17 - (samplingRate >> 13) + (samplingRate >> 15))
                                                                      : brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong);
          const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts));
          const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) || (m_shiftValSBR > 0) ? 4u : 3u)) >> 2,
                                             maxSfbCurr + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0u : 1u));

          if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
          {
            for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13))));
          }
          if ((maxSfbCh < numSwbFrame) || (m_bitRateMode <= 2)) // increase coding bandwidth
          {
            for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
            {
              const uint32_t*  grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];

              if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713))
              {
                const uint32_t*  refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
                uint8_t*  grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
                const unsigned sfbStart = __max (samplingRate < 18783 ? 17 : 24, m_specGapFiller.getFirstGapFillSfb ());

                for (s = sfbStart; s < maxSfbCh; s++)
                {
                  const double rmsValue = double (grpStereoData[s] > 0 ? (grpRms[s] + (uint64_t) refRms[s] + 1) >> 1 : grpRms[s]);
                  const unsigned sfbIdx = s - sfbStart;

                  m_sfbLoudMem[ch][sfbIdx][m_frameCount & 31] = __max (BA_EPS, uint16_t (sqrt (rmsValue)));
                  if (grpRms[s] < getThr (ch, sfbIdx)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
                }
              }
              else if ((m_bitRateMode <= 4) && (meanSpecFlat[ci] <= (SCHAR_MAX >> 1))) // lo
              {
                for (s = __max (samplingRate < 27713 ? (samplingRate < 18783 ? 17 : 24) : 22, m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++)
                {
                  if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
                }
              }

              memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
            }
            grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = __max (maxSfbCh, numSwbFrame);
          }
          if (ch > 0 && coreConfig.commonWindow) // resynchronize the two max_sfb for stereo
          {
            uint8_t& maxSfb0 = coreConfig.icsInfoCurr[0].maxSfb;
            uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb;

            if (coreConfig.stereoMode > 0)
            {
              maxSfb0 = maxSfb1 = coreConfig.groupingData[0].sfbsPerGroup = grpData.sfbsPerGroup = __max (maxSfb0, maxSfb1);
            }
            coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1);
          }
        }
        else if (m_noiseFilling[el] && (m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713))
        {
          for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS;
        }
#endif
        ci++;
      } // for ch

      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        SfbGroupData& grpData = coreConfig.groupingData[ch];
        TnsData&      tnsData = coreConfig.tnsData[ch];

        if (tnsData.numFilters[0] + tnsData.numFilters[1] + tnsData.numFilters[2] > 0)
        {
          s = tnsData.firstTnsWindow = 0; // store length-1 group map for bit-stream writing
          for (uint16_t gr = 0; gr < grpData.numWindowGroups; s += grpData.windowGroupLength[gr++])
          {
            if (grpData.windowGroupLength[gr] == 1) tnsData.firstTnsWindow |= (1u << s);
          }
        }
      } // for ch
    }
  } // for el

  return errorValue;
}

unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and entropy coding
{
  const unsigned nChannels        = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame  = toFrameLength (m_frameLength);
  const unsigned samplingRate     = toSamplingRate (m_frequencyIdx);
  const unsigned nSamplesTempAna  = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
#if !EE_MORE_MSE
  const bool     useMaxBandwidth  = (samplingRate < 37566 || m_shiftValSBR > 0);
#endif
  const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
  uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208};
  unsigned ci = 0, s; // running index
  unsigned errorValue = (coeffMagn == nullptr ? 1 : 0);

  // get means of spectral and temporal flatness for every channel
  m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);
  if ((m_bitRateMode < (2u >> m_shiftValSBR)) && (samplingRate >= 23004) && (samplingRate < 37566))
  {
    m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
  }

  for (unsigned el = 0; el < m_numElements; el++)  // element loop
  {
    CoreCoderData& coreConfig = *m_elementData[el];
    const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()

    if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs
    {
      meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
      meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
    }

    for (unsigned ch = 0; ch < nrChannels; ch++)   // channel loop
    {
      EntropyCoder& entrCoder = m_entropyCoder[ci];
      SfbGroupData&   grpData = coreConfig.groupingData[ch];
      const bool shortWinCurr = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
      const bool shortWinPrev = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
      char* const arithTuples = entrCoder.arithGetTuplePtr ();
      uint8_t sfIdxPred = UCHAR_MAX;

      if ((errorValue > 0) || (arithTuples == nullptr))
      {
        return 0; // an internal error
      }

      // back up entropy coder memory for use by bit-stream writer
      memcpy (m_tempIntBuf, arithTuples, (nSamplesInFrame >> 1) * sizeof (char));
      errorValue |= (entrCoder.getIsShortWindow () != shortWinPrev ? 1 : 0); // sanity check

      memset (m_mdctQuantMag[ci], 0, nSamplesInFrame * sizeof (uint8_t));  // initialization

      for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
      {
        const uint8_t grpLength = grpData.windowGroupLength[gr];
        const uint16_t*  grpOff = &grpData.sfbOffsets[m_numSwbShort * gr];
        uint32_t* const  grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr]; // coding stats
        uint8_t*   grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
        uint32_t estimBitCount = 0;
        unsigned lastSfb = 0, lastSOff = 0;

        errorValue |= entrCoder.initWindowCoding (m_indepFlag && (gr == 0), shortWinCurr);
        s = 0;

        for (uint16_t b = 0; b < grpData.sfbsPerGroup; b++)
        {
          // partial SFB ungrouping for entropy coding setup below
          const uint16_t swbSize = ((grpOff[b + 1] - grpOff[b]) * oneTwentyEightOver[grpLength]) >> 7; // sfbWidth / grpLength
          uint8_t* const swbMagn = &m_mdctQuantMag[ci][grpOff[b + 1] - swbSize];

          grpScaleFacs[b] = m_sfbQuantizer.quantizeSpecSfb (entrCoder, m_mdctSignals[ci], grpLength, grpOff, grpRms,
                                                            b, grpScaleFacs[b], sfIdxPred, m_mdctQuantMag[ci]);
          if ((b > 0) && (grpScaleFacs[b] < UCHAR_MAX) && (sfIdxPred == UCHAR_MAX))
          {
            // back-propagate first nonzero-SFB scale factor index
            memset (grpScaleFacs, grpScaleFacs[b], b * sizeof (uint8_t));
          }
          sfIdxPred = grpScaleFacs[b];

          // correct previous scale factor if the delta exceeds 60
          if ((b > 0) && (grpScaleFacs[b] > grpScaleFacs[b - 1] + INDEX_OFFSET))
          {
            const uint16_t sfbM1Start = grpOff[b - 1];
            const uint16_t sfbM1Width = grpOff[b] - sfbM1Start;
            const uint16_t swbM1Size  = (sfbM1Width * oneTwentyEightOver[grpLength]) >> 7; // sfbM1Width / grpLength

            grpScaleFacs[b - 1] = grpScaleFacs[b] - (b > 1 ? INDEX_OFFSET : 0);  // zero-out
            memset (&m_mdctQuantMag[ci][sfbM1Start], 0, sfbM1Width * sizeof (uint8_t));

            // correct SFB statistics with some bit count estimate
            grpRms[b - 1] = 1 + (sfbM1Width >> 3) + entrCoder.indexGetBitCount (b > 1 ? (int) grpScaleFacs[b - 1] - grpScaleFacs[b - 2] : 0);
            // correct entropy coding 2-tuples for the next window
            memset (&arithTuples[lastSOff], 1, (swbM1Size >> 1) * sizeof (char));
          }
          // correct next scale factor if the reduction exceeds 60
          if ((b + 1u < grpData.sfbsPerGroup) && (sfIdxPred < UCHAR_MAX) && (grpLength == 1) &&
              (grpScaleFacs[b] > grpScaleFacs[b + 1] + INDEX_OFFSET))
          {
            grpScaleFacs[b + 1] = grpScaleFacs[b] - INDEX_OFFSET; // avoid preset-9 zero-out
          }

          if (b > 0)
          {
            if ((grpRms[b - 1] >> 16) > 0) lastSfb = b - 1;
            estimBitCount += grpRms[b - 1] & USHRT_MAX;
          }
          // set up entropy coding 2-tuples for next SFB or window
          lastSOff = s;
          for (uint16_t c = 0; c < swbSize; c += 2)
          {
            arithTuples[s++] = __min (0xF, swbMagn[c] + swbMagn[c + 1] + 1); // 23003-3, 7.4
          }
        } // for b

        if (grpData.sfbsPerGroup > 0) // rate control part 2 to reach constrained VBR (CVBR)
        {
#if EE_MORE_MSE
          const unsigned targetBitCount25 = INT32_MAX;
#else
          const uint8_t maxSfbLong  = (useMaxBandwidth ? 54 - (samplingRate >> 13) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
          const uint8_t maxSfbShort = (useMaxBandwidth ? 19 - (samplingRate >> 13) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
          const uint16_t peakIndex  = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong  - 6 + (meanSpecFlat[ci] >> 5)) +
                                            (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
          const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) /
                                            (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
#endif
          unsigned b = grpData.sfbsPerGroup - 1;

          if ((grpRms[b] >> 16) > 0) lastSfb = b;
          estimBitCount += grpRms[b] & USHRT_MAX;

#if EC_TRELLIS_OPT_CODING
          if (grpLength == 1) // finalize bit count estimate, RDOC
          {
            estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, estimBitCount + 2u,
                                                             grpOff, grpRms, grpData.sfbsPerGroup, m_mdctQuantMag[ci]);
            for (b = 1; b < grpData.sfbsPerGroup; b++)
            {
              // correct previous scale factor if delta exceeds 60
              if (grpScaleFacs[b] > grpScaleFacs[b - 1] + INDEX_OFFSET)
              {
                const uint16_t sfbM1Start = grpOff[b - 1];
                const uint16_t sfbM1Width = grpOff[b] - sfbM1Start;

                grpScaleFacs[b - 1] = grpScaleFacs[b] - (b > 1 ? INDEX_OFFSET : 0); // 0-out
                memset (&m_mdctQuantMag[ci][sfbM1Start], 0, sfbM1Width * sizeof (uint8_t));

                // correct statistics with some bit count estimate
                grpRms[b - 1] = 1 + (sfbM1Width >> 3) + entrCoder.indexGetBitCount (b > 1 ? (int) grpScaleFacs[b - 1] - grpScaleFacs[b - 2] : 0);
                // correct entropy coding 2-tuples for next window
                memset (&arithTuples[(sfbM1Start - grpOff[0]) >> 1], 1, (sfbM1Width >> 1) * sizeof (char));
              }
            }
          }
#endif
#if EE_MORE_MSE
          b = lastSfb;
#else
          // coarse-quantize near-Nyquist SFB with SBR @ 48-64 kHz
          b = 40 + (samplingRate >> 12);
          if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb;

          while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1) + (m_bitRateMode / 5)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) &&
                 ((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count estim. available for grouped spectrum*/)))
          {
            b--; // search first coarsely quantized high-freq. SFB
          }
#endif
          lastSOff = b;

          for (b++; b <= lastSfb; b++)
          {
            if ((grpRms[b] >> 16) > 0) // re-quantize nonzero band
            {
#if RESTRICT_TO_AAC
              uint32_t maxVal = 1;
#else
              uint32_t maxVal = (shortWinCurr || !m_noiseFilling[el] ? 1 : (m_specAnaCurr[ci] >> 23) & 1); // 1 or 0
#endif
              estimBitCount -= grpRms[b] & USHRT_MAX;
              grpRms[b] = (maxVal << 16) + maxVal; // bit estimate
              maxVal = quantizeSfbWithMinSnr (coeffMagn, grpOff, b, grpLength, m_mdctQuantMag[ci], arithTuples, maxVal > 0);

              grpScaleFacs[b] = __min (SCHAR_MAX, m_sfbQuantizer.getScaleFacOffset ((double) maxVal));

              // correct SFB statistics with estimate of bit count
              grpRms[b] += 3 + entrCoder.indexGetBitCount ((int) grpScaleFacs[b] - grpScaleFacs[b - 1]);
              estimBitCount += grpRms[b] & USHRT_MAX;
            }
            else // re-repeat scale factor for zero quantized band
            {
              grpScaleFacs[b] = grpScaleFacs[b - 1];
            }
          }

          if (estimBitCount > targetBitCount25) // too many bits!!
          {
            for (b = lastSOff; b > 0; b--)
            {
              if ((grpRms[b] >> 16) > 0) // emergency re-quantizer
              {
#if RESTRICT_TO_AAC
                uint32_t maxVal = 1;
#else
                uint32_t maxVal = (shortWinCurr || !m_noiseFilling[el] ? 1 : (m_specAnaCurr[ci] >> 23) & 1); // 1 or 0
#endif
                estimBitCount -= grpRms[b] & USHRT_MAX;
                grpRms[b] = (maxVal << 16) + maxVal; // bit estim.
                maxVal = quantizeSfbWithMinSnr (coeffMagn, grpOff, b, grpLength, m_mdctQuantMag[ci], arithTuples, maxVal > 0);

                grpScaleFacs[b] = __min (SCHAR_MAX, m_sfbQuantizer.getScaleFacOffset ((double) maxVal));

                // correct SFB statistics with estimated bit count
                grpRms[b] += 3 + entrCoder.indexGetBitCount ((int) grpScaleFacs[b] - grpScaleFacs[b - 1]);
                estimBitCount += grpRms[b] & USHRT_MAX;
              }
              if (estimBitCount <= targetBitCount25) break;
            }

            for (b++; b <= lastSfb; b++) // re-repeat scale factor
            {
              if ((grpRms[b] >> 16) == 0) // a zero quantized band
              {
                grpScaleFacs[b] = grpScaleFacs[b - 1];
              }
            }
          } // if estimBitCount > targetBitCount25

          for (b = lastSfb + 1; b < grpData.sfbsPerGroup; b++)
          {
            if ((grpRms[b] >> 16) == 0) // HF zero quantized bands
            {
              grpScaleFacs[b] = grpScaleFacs[b - 1];
            }
          }

          if ((grpScaleFacs[0] == UCHAR_MAX) &&
#if !RESTRICT_TO_AAC
              !m_noiseFilling[el] &&
#endif
              (lastSfb == 0))  // ensure all scale factors are set
          {
            memset (grpScaleFacs, (gr == 1 ? grpData.scaleFactors[grpData.sfbsPerGroup - 1] : 0), grpData.sfbsPerGroup * sizeof (uint8_t));
          }
        }
      } // for gr

      // restore entropy coder memory for use by bit-stream writer
      memcpy (arithTuples, m_tempIntBuf, (nSamplesInFrame >> 1) * sizeof (char));
      entrCoder.setIsShortWindow (shortWinPrev);
#if !RESTRICT_TO_AAC
      s = 22050 + 7350 * m_bitRateMode; // compute channel-wise noise_level and noise_offset
      sfIdxPred = ((m_bitRateMode == 0) && (m_priLength) && (m_shiftValSBR) && ((m_tempAnaCurr[ci] >> 24) || (m_tempAnaNext[ci] >> 24)) && (meanSpecFlat[ci] +
                    __min ((m_tempAnaCurr[ci] >> 16) & UCHAR_MAX, (m_tempAnaNext[ci] >> 16) & UCHAR_MAX) >= 192) ? UCHAR_MAX : meanSpecFlat[ci]);
      coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci], m_numSwbShort,
                                                                                                     grpData, nSamplesInFrame, samplingRate, s,
                                                                                                     shortWinCurr ? 0 : sfIdxPred));
      if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
#endif
      s = ci + nrChannels - 1 - 2 * ch; // other channel in stereo
      if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data
      {
        const uint8_t msfVal = (shortWinPrev ? 31 : __max (2, __max (m_meanSpecPrev[ci], meanSpecFlat[ci]) >> 3));
        const uint8_t msfSte = (coreConfig.stereoMode == 0 ? 0 : (coreConfig.icsInfoPrev[s + ch - ci].windowSequence ==
                                 EIGHT_SHORT ? 31 : __max (2, __max (m_meanSpecPrev[s ], meanSpecFlat[s ]) >> 3)));
        int32_t  tmpValSynch = 0;

        memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t));
#if ENABLE_INTERTES
        m_coreSignals[ci][0] = 0x40000000; // bs_interTes = 1 for all frames
#endif
        m_coreSignals[ci][0] |= 4 - int32_t (sqrt (0.75 * msfVal)); // filter mode, 0 = none

        if (ch > 0 && coreConfig.stereoMode > 0) // synch. sbr_grid(), sbr_invf() for stereo
        {
          tmpValSynch = (m_coreSignals[s][0] >> 21) & 3; // nEnv, bits 23-22
          m_coreSignals[ci][0] |= m_coreSignals[s][0] & 0x10000F; // bits 21
          m_coreSignals[s][0] |= m_coreSignals[ci][0] & 0x10000F; // and 4-1
        }
        m_coreSignals[ci][0] |= getSbrEnvelopeAndNoise (&m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame], msfVal,
                                                        __max (m_meanTempPrev[ci], meanTempFlat[ci]) >> 3, m_bitRateMode == 0,
                                                        m_indepFlag, msfSte, tmpValSynch, nSamplesInFrame, &m_coreSignals[ci][1]);
        if (ch + 1 == nrChannels) // update the flatness histories
        {
          m_meanSpecPrev[ci] = meanSpecFlat[ci];  m_meanSpecPrev[s] = meanSpecFlat[s];
          m_meanTempPrev[ci] = meanTempFlat[ci];  m_meanTempPrev[s] = meanTempFlat[s];
        }
      }
      ci++;
    }
  } // for el
#if !RESTRICT_TO_AAC
  m_rateFactor = samplingRate; // rate ctrl
#endif
  return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag,
                                                             m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf,
#if !RESTRICT_TO_AAC
                                                             m_timeWarping, m_noiseFilling, m_frameCount - 1u, m_indepPeriod, &m_rateFactor,
#endif
                                                             m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
}

unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS and SFB data
{
  const unsigned nChannels       = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
  const unsigned nSamplesInShort = nSamplesInFrame >> 3;
  const unsigned samplingRate    = toSamplingRate (m_frequencyIdx);
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  const bool     useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
  unsigned ci = 0, s; // running index
  unsigned errorValue = 0; // no error

  // get spectral channel statistics for last frame, used for input bandwidth (BW) detection
  m_specAnalyzer.getSpectralBandwidth (m_bandwidPrev, nChannels);

  // spectral analysis for current MCLT signal (windowed time-samples for the current frame)
  errorValue |= m_specAnalyzer.spectralAnalysis (m_mdctSignals, m_mdstSignals, nChannels, nSamplesInFrame, samplingRate, lfeChannelIndex);

  // get spectral channel statistics for this frame, used for perceptual model & BW detector
  m_specAnalyzer.getSpecAnalysisStats (m_specAnaCurr, nChannels);
  m_specAnalyzer.getSpectralBandwidth (m_bandwidCurr, nChannels);

  for (unsigned el = 0; el < m_numElements; el++)  // element loop
  {
    CoreCoderData& coreConfig = *m_elementData[el];
    const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()

    coreConfig.commonMaxSfb   = false;
    coreConfig.commonTnsData  = false;
    coreConfig.tnsActive      = false;
    coreConfig.tnsOnLeftRight = true;  // enforce tns_on_lr = 1 for now, detection difficult
    memset (coreConfig.tnsData, 0, nrChannels * sizeof (TnsData));

    if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements
    {
      SfbGroupData& grpData = coreConfig.groupingData[0];
      uint16_t*  grpSO = grpData.sfbOffsets;
      IcsInfo& icsCurr = coreConfig.icsInfoCurr[0];

      memcpy (grpSO, swbOffsetsL[m_swbTableIdx], numSwbOffsetL[m_swbTableIdx] * sizeof (uint16_t));

      icsCurr.maxSfb = MAX_NUM_SWB_LFE;
      while (grpSO[icsCurr.maxSfb] > LFE_MAX) icsCurr.maxSfb--; // limit coefficients in LFE
      grpData.sfbsPerGroup = icsCurr.maxSfb;
      ci++;
    }
    else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
    {
      coreConfig.stereoConfig = coreConfig.stereoMode = 0;

      if (coreConfig.commonWindow && (m_bitRateMode <= 5)) // stereo pre-processing analysis
      {
        const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
        const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
        const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
        const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
        const int16_t  steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1], m_mdstSignals[ci], m_mdstSignals[ci + 1],
                                                                       nSamplesMax, nSamplesInFrame, eightShorts, coreConfig.stereoDataCurr);
        if (steAnaStats == SHRT_MIN) errorValue = 1;

        if ((s = abs (steAnaStats)) * m_perCorrHCurr[el] == 0) // transition to/from silence
        {
          m_perCorrHCurr[el] = uint8_t ((32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6);
        }
        else // gentle overlap length dependent temporal smoothing
        {
          const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
          const int16_t prevPerCorr = __max (128, __min (192, m_perCorrHCurr[el]));
          const int16_t currPerCorr = (32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6;

          m_perCorrHCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, currPerCorr));
        }
        m_perCorrLCurr[el] = coreConfig.stereoDataCurr[0];

        if ((int) s == steAnaStats * -1) coreConfig.stereoConfig = 2;  // 2: S>M, pred_dir=1
        if (s > (UCHAR_MAX * (6u + m_shiftValSBR)) / 8) coreConfig.stereoMode = 2; // 2: all
        if (s >= UCHAR_MAX - 2u + (m_bitRateMode / 5) + (meanSpecFlat >> 6)) coreConfig.stereoConfig |= 8; // tuning for mono-in-stereo audio
      }
      else if (nrChannels > 1) m_perCorrHCurr[el] = m_perCorrLCurr[el] = 128; // "mid" value

      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        SfbGroupData& grpData = coreConfig.groupingData[ch];
        uint16_t*  grpSO = grpData.sfbOffsets;
        IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
        TnsData& tnsData = coreConfig.tnsData[ch];

        memset (grpSO, 0, (1 + MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint16_t));

        if (icsCurr.windowSequence != EIGHT_SHORT)
        {
          memcpy (grpSO, swbOffsetsL[m_swbTableIdx], numSwbOffsetL[m_swbTableIdx] * sizeof (uint16_t));

          icsCurr.maxSfb = 0;
          while (grpSO[icsCurr.maxSfb] < nSamplesInFrame) icsCurr.maxSfb++;  // num_swb_long
          grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInFrame;
          grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb;  // changed to max_sfb later

          if (samplingRate > 32000) // set max_sfb based on VBR mode and bandwidth detection
          {
            if (icsCurr.maxSfb > 49) // may still be 51 for 32 kHz
            {
              grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb = 49; // fix 44.1, 48 kHz
            }
            icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
          }
#if !EE_MORE_MSE
          while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci]) + (icsCurr.maxSfb >> 1)) icsCurr.maxSfb--; // detect BW
#endif
        }
        else // icsCurr.windowSequence == EIGHT_SHORT
        {
          memcpy (grpSO, swbOffsetsS[m_swbTableIdx], numSwbOffsetS[m_swbTableIdx] * sizeof (uint16_t));

          icsCurr.maxSfb = 0;
          while (grpSO[icsCurr.maxSfb] < nSamplesInShort) icsCurr.maxSfb++; // num_swb_short
          grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInShort;
          grpData.sfbsPerGroup = m_numSwbShort = icsCurr.maxSfb; // changed to max_sfb later

          if (samplingRate > 32000) // set max_sfb based on VBR mode and zero-ness detection
          {
            icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate));
          }

          if (ch > 0 && coreConfig.commonWindow)  // resynchronize the scale_factor_grouping
          {
            if (icsCurr.windowGrouping != coreConfig.icsInfoCurr[0].windowGrouping)
            {
              icsCurr.windowGrouping = coreConfig.icsInfoCurr[0].windowGrouping;
            }
          }
          else // first element channel or not common_window, optimize scale_factor_grouping
          {
            if ((s = m_specAnalyzer.optimizeGrouping (ci, grpSO[icsCurr.maxSfb] << 3, icsCurr.windowGrouping)) < 8)
            {
              icsCurr.windowGrouping = (uint8_t) s;
            }
          }
          memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));
#if !EE_MORE_MSE
          findActualBandwidthShort (&icsCurr.maxSfb, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci], nSamplesInShort);
#endif
          errorValue |= eightShortGrouping (grpData, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci]);
        } // if EIGHT_SHORT

        // compute and quantize optimal TNS coefficients, then find optimal TNS filter order
        s = getOptParCorCoeffs (grpData, icsCurr.maxSfb, tnsData, ci, (ch > 0 && coreConfig.commonWindow ? coreConfig.tnsData[0].firstTnsWindow : 0));

        for (uint16_t n = 0, gr = 0; gr < grpData.numWindowGroups; gr++)
        {
          if (grpData.windowGroupLength[gr] == 1)
          {
            const uint8_t tonality = (m_specAnaCurr[ci] >> 16) & UCHAR_MAX;
#if EE_MORE_MSE
            bool noTnsFilt = (m_bitRateMode >= EE_MORE_MSE || icsCurr.maxSfb <= 40);

            if (!noTnsFilt && samplingRate >= 27713 && samplingRate < 55426 && icsCurr.maxSfb > 40)
            {
              errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], m_mdstSignals[ci], nSamplesInFrame, ci, &grpSO[29], 12, grpData.sfbRmsValues);
              if (errorValue == 0)
              {
                for (int b = 0; b < 12; b++)
                {
                  errorValue += unsigned (0.5 + sqrt ((double) grpData.sfbRmsValues[b]));
                }
                noTnsFilt |= (errorValue < ((unsigned) m_bitRateMode << 7)); // avoid clicks
                errorValue = 0;
              }
            }
            if (noTnsFilt) tnsData.filterOrder[n] = 0; else
#endif
            tnsData.filterOrder[n] = m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n],
                                                                      tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5));
            tnsData.numFilters[n] = (tnsData.filterOrder[n] > 0 ? 1 : 0);
            if ((ch == 0) && (icsCurr.windowSequence == EIGHT_SHORT) && (tnsData.numFilters[n] == 0) && (tnsData.firstTnsWindow == gr))
            {
              tnsData.firstTnsWindow++; // simplify TNS stereo synching in eight-short frame
            }
            n++;
          }
        }
        m_tempFlatPrev[ci++] = (uint8_t) s;
      } // for ch

      if (coreConfig.commonWindow) // synchronization of all StereoCoreToolInfo() components
      {
        uint8_t& maxSfb0 = coreConfig.icsInfoCurr[0].maxSfb;
        uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb;
        const uint8_t maxSfbSte = __max (maxSfb0, maxSfb1);   // max_sfb_ste, as in Table 24

        if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1 || coreConfig.stereoMode > 0))
        {
          uint32_t& sac0 = m_specAnaCurr[ci-2];
          uint32_t& sac1 = m_specAnaCurr[ci-1];
          TnsData&  tnsData0 = coreConfig.tnsData[0];
          TnsData&  tnsData1 = coreConfig.tnsData[1];
          const int specFlat[2] = {int (sac0 >> 16) & UCHAR_MAX, int (sac1 >> 16) & UCHAR_MAX};
          const int tnsStart[2] = {int (sac0 & 31), int (sac1 & 31)}; // long TNS start band

          if ((abs (specFlat[0] - specFlat[1]) <= (UCHAR_MAX >> 3)) &&
              (abs (tnsStart[0] - tnsStart[1]) <= (UCHAR_MAX >> 4)))  // TNS synchronization
          {
            coreConfig.commonTnsData = true;
            for (uint16_t n = 0; n < 3; n++)
            {
              if ((s = __max (tnsData0.filterOrder[n], tnsData1.filterOrder[n])) == 0) continue;

              if ((coreConfig.stereoMode > 0) || m_linPredictor.similarParCorCoeffs (tnsData0.coeffParCor[n], tnsData1.coeffParCor[n], s, LP_DEPTH))
              {
                applyTnsCoeff2ChannelSynch (m_linPredictor, tnsData0, tnsData1, s, n, &coreConfig.commonTnsData);
              }
              else if ((m_bitRateMode <= 5) && (m_perCorrHCurr[el] > 128))
              {
                applyTnsCoeffPreProcessing (m_linPredictor, tnsData0, tnsData1, s, n, &coreConfig.commonTnsData, m_perCorrHCurr[el] - 128);
              }
              else coreConfig.commonTnsData = false;
            }

            if (coreConfig.commonTnsData || (abs (tnsStart[0] - tnsStart[1]) <= (UCHAR_MAX >> 5)))
            {
              const uint32_t avgTnsStart = (tnsStart[0] + tnsStart[1]) >> 1;  // synch start

              sac0 = (sac0 & (UINT_MAX - 31)) | avgTnsStart; // used by applyTnsToWinGroup()
              sac1 = (sac1 & (UINT_MAX - 31)) | avgTnsStart;
            }
          }
          maxSfb0 = maxSfb1 = maxSfbSte;

          if ((m_bitRateMode <= 5) && (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT))
          {
            m_perCorrLCurr[el] = stereoCorrGrouping (coreConfig.groupingData[0], nSamplesInFrame, coreConfig.stereoDataCurr);
          }
        }
        else coreConfig.stereoMode = 0;  // since a max_sfb is 0

        coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1); // synch
      } // if coreConfig.commonWindow
    }

    ci -= nrChannels; // zero frequency coefficients above num_swb for all channels, windows

    for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
    {
      SfbGroupData&  grpData = coreConfig.groupingData[ch];
      const uint16_t*  grpSO = grpData.sfbOffsets;
      const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
      const bool eightShorts = (icsCurr.windowSequence == EIGHT_SHORT);

      if (eightShorts) // map grouping table idx to scale_factor_grouping idx for bit-stream
      {
        coreConfig.icsInfoCurr[ch].windowGrouping = scaleFactorGrouping[icsCurr.windowGrouping];
      }
      s = 0;
      for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
      {
        const unsigned grMax = grpSO[grpData.sfbsPerGroup + m_numSwbShort * gr];

        s += (eightShorts ? nSamplesInShort : nSamplesInFrame) * grpData.windowGroupLength[gr];
        memset (&m_mdctSignals[ci][grMax], 0, (s - grMax) * sizeof (int32_t));
        memset (&m_mdstSignals[ci][grMax], 0, (s - grMax) * sizeof (int32_t));
      }
      memset (grpData.sfbRmsValues, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint32_t));

      if (icsCurr.maxSfb > 0)
      {
        // use MCLTs for LONG but only MDCTs for SHORT windows when the MDSTs aren't grouped
        const uint8_t* nFilters = coreConfig.tnsData[ch].numFilters;
        const bool realOnlyCalc = (eightShorts && nChannels < 2);

        for (uint8_t n = 0, gr = 0; gr < grpData.numWindowGroups; gr++)
        {
          if (grpData.windowGroupLength[gr] == 1)
          {
            errorValue |= applyTnsToWinGroup (grpData, gr, grpData.sfbsPerGroup, coreConfig.tnsData[ch], ci, n, realOnlyCalc);
            coreConfig.tnsActive |= (nFilters[n++] > 0); // set tns_data_present, tns_active
          }
          if ((grpData.windowGroupLength[gr] > 1) || (nFilters[n - 1] == 0))
          {
            s = m_numSwbShort * gr;
            errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], realOnlyCalc ? nullptr : m_mdstSignals[ci],
                                                           grpSO[grpData.sfbsPerGroup + s], (eightShorts ? USAC_MAX_NUM_CHANNELS : ci),
                                                           &grpSO[s], grpData.sfbsPerGroup, &grpData.sfbRmsValues[s]);
          }
        }
      }

      grpData.sfbsPerGroup = icsCurr.maxSfb; // change num_swb to max_sfb for coding process
      ci++;
    }
  } // for el

  return errorValue;
}

unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects of ics_info()
{
  const unsigned nChannels       = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
  const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4;  // pre-delay for look-ahead
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  unsigned ci = 0; // running ch index
  unsigned errorValue = 0; // no error

  // get temporal channel statistics for this frame, used for spectral grouping/quantization
  m_tempAnalyzer.getTempAnalysisStats (m_tempAnaCurr, nChannels);
  m_tempAnalyzer.getTransientAndPitch (m_tranLocCurr, nChannels);

  // temporal analysis for look-ahead signal (central nSamplesInFrame samples of next frame)
  errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna,
                                                 m_shiftValSBR, m_coreSignals, lfeChannelIndex);
  // get temporal channel statistics for next frame, used for window length/overlap decision
  m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels);
  m_tempAnalyzer.getTransientAndPitch (m_tranLocNext, nChannels);

#ifdef NO_PREROLL_DATA
  m_indepFlag = (((m_frameCount++) % m_indepPeriod) == 0); // configure usacIndependencyFlag
#else
  m_indepFlag = (((m_frameCount++) % m_indepPeriod) <= 1); // configure usacIndependencyFlag
#endif

  for (unsigned el = 0; el < m_numElements; el++)  // element loop
  {
    CoreCoderData& coreConfig = *m_elementData[el];
    const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()

    coreConfig.commonWindow   = false;
    coreConfig.icsInfoPrev[0] = coreConfig.icsInfoCurr[0];
    coreConfig.icsInfoPrev[1] = coreConfig.icsInfoCurr[1];

    if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements
    {
      IcsInfo& icsCurr = coreConfig.icsInfoCurr[0];

      icsCurr.windowGrouping  = 0;
      icsCurr.windowSequence  = ONLY_LONG;
#if RESTRICT_TO_AAC
      icsCurr.windowShape     = WINDOW_SINE;
#else
      icsCurr.windowShape     = WINDOW_KBD;
#endif
      ci++;
    }
    else // SCE or CPE: short-window, low-overlap, and sine-shape detection for each channel
    {
      unsigned tsCurr[2]; // save temporal stationarity values
      unsigned tsNext[2]; // for common_window decision in CPE

      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch];
              IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
        const USAC_WSEQ wsPrev = icsPrev.windowSequence;
             USAC_WSEQ& wsCurr = icsCurr.windowSequence;
        // get temporal signal statistics, then determine overlap config. for the next frame
#if !EE_MORE_MSE
        const unsigned  plCurr = abs (m_tranLocCurr[ci]) & ((1024 << m_shiftValSBR) - 1);
#endif
        const unsigned  sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX;
        const unsigned  tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX;
#if !EE_MORE_MSE
        const unsigned  plNext = abs (m_tranLocNext[ci]) & ((1024 << m_shiftValSBR) - 1);
#endif
        const unsigned  sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX;
        const unsigned  tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX;
#if !EE_MORE_MSE
        const unsigned tThresh = UCHAR_MAX * (__max (plCurr, plNext) < 614 /*0.6 * 1024*/ ? 16 : 15 - (m_bitRateMode >> 3));
#endif

        tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX;
        tsNext[ch] = (m_tempAnaNext[ci] >>  8) & UCHAR_MAX;
        // save maximum spectral flatness of current and neighboring frames for quantization
        m_tempAnaCurr [ci] = (m_tempAnaCurr[ci] & 0xFFFFFF) | (__max (sfCurr, __max (m_specFlatPrev[ci], sfNext)) << 24);
        m_specFlatPrev[ci] = (uint8_t) sfCurr;
#if EE_MORE_MSE
        const bool lowOlapNext = (m_tranLocNext[ci] >= 0);
#else
        const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) ||
                                 (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8);
#endif
        const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) &&
                                 (tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20);
        // set window_sequence
        if ((wsPrev == ONLY_LONG) || (wsPrev == LONG_STOP)) // 1st window half - max overlap
        {
          wsCurr = (lowOlapNext ? LONG_START : ONLY_LONG);
        }
        else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
        {
#if EE_MORE_MSE
          wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT :
#else
          wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > tThresh / 16) ? EIGHT_SHORT :
#endif
#if RESTRICT_TO_AAC
                   (lowOlapNext ? EIGHT_SHORT : LONG_STOP);
#else
                   (lowOlapNext ? STOP_START : LONG_STOP);
#endif
        }

        // set window_shape
        if ((wsCurr == ONLY_LONG) || (wsCurr == LONG_STOP)) // 2nd window half - max overlap
        {
          icsCurr.windowShape  = (sineWinCurr ? WINDOW_SINE : WINDOW_KBD);
        }
        else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
        {
          icsCurr.windowShape  = (m_tranLocCurr[ci] >= 0) ? WINDOW_KBD :
                                 (sineWinCurr ? WINDOW_SINE : WINDOW_KBD);
        }

        // set scale_factor_grouping
        icsCurr.windowGrouping = (wsCurr == EIGHT_SHORT ? __max (0, m_tranLocCurr[ci]) / (2 * nSamplesInFrame) : 0);
        ci++;
      } // for ch

      if (nrChannels > 1) // common_window element detection for use in StereoCoreToolInfo()
      {
        IcsInfo&  icsInfo0 = coreConfig.icsInfoCurr[0];
        IcsInfo&  icsInfo1 = coreConfig.icsInfoCurr[1];
        USAC_WSEQ& winSeq0 = icsInfo0.windowSequence;
        USAC_WSEQ& winSeq1 = icsInfo1.windowSequence;

        if (winSeq0 != winSeq1) // try to synch window_sequences
        {
          const USAC_WSEQ initialWs0 = winSeq0;
          const USAC_WSEQ initialWs1 = winSeq1;

          winSeq0 = winSeq1 = windowSequenceSynch[initialWs0][initialWs1];   // equalization
          if ((winSeq0 != initialWs0) && (winSeq0 == EIGHT_SHORT))
          {
#if !RESTRICT_TO_AAC
            if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2) &&
                (abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
            {
              winSeq0 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
            }
            else
#endif
            icsInfo0.windowGrouping = icsInfo1.windowGrouping;
          }
          if ((winSeq1 != initialWs1) && (winSeq1 == EIGHT_SHORT))
          {
#if !RESTRICT_TO_AAC
            if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2) &&
                (abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
            {
              winSeq1 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
            }
            else
#endif
            icsInfo1.windowGrouping = icsInfo0.windowGrouping;
          }
        }
        else if (winSeq0 == EIGHT_SHORT) // resynchronize scale_factor_grouping if necessary
        {
          const int16_t tranLocSynch = __min (m_tranLocCurr[ci - 2], m_tranLocCurr[ci - 1]);

          icsInfo0.windowGrouping = icsInfo1.windowGrouping = __max (0, tranLocSynch) / (2 * nSamplesInFrame);
        }

        if ((icsInfo0.windowShape != WINDOW_SINE) || (icsInfo1.windowShape != WINDOW_SINE))
        {
          icsInfo0.windowShape = WINDOW_KBD; // always synchronize window_shapes in order to
          icsInfo1.windowShape = WINDOW_KBD; // encourage synch in next frame; KBD dominates
        }
        coreConfig.commonWindow = (winSeq0 == winSeq1); // synch

        memset (coreConfig.stereoDataPrev, 0, (MAX_NUM_SWB_LONG + 1) * sizeof (uint8_t));

        if (((winSeq0 == EIGHT_SHORT) == (coreConfig.icsInfoPrev[0].windowSequence == EIGHT_SHORT)) && !m_indepFlag &&
            ((winSeq1 == EIGHT_SHORT) == (coreConfig.icsInfoPrev[1].windowSequence == EIGHT_SHORT)) && (coreConfig.stereoMode > 0))
        {
          const unsigned lastGrpOffset = (coreConfig.icsInfoPrev[0].windowSequence == EIGHT_SHORT ? m_numSwbShort * (NUM_WINDOW_GROUPS - 1) : 0);
          const unsigned maxSfbStePrev = __max (coreConfig.icsInfoPrev[0].maxSfb, coreConfig.icsInfoPrev[1].maxSfb) + 1u; // for safety

          memcpy (coreConfig.stereoDataPrev, &coreConfig.stereoDataCurr[lastGrpOffset], __min (60 - lastGrpOffset, maxSfbStePrev) * sizeof (uint8_t));
        }
        coreConfig.stereoDataCurr[0] = (m_bitRateMode <= 1 ? m_tempAnalyzer.stereoPreAnalysis (&m_timeSignals[ci - 2], &m_specFlatPrev[ci - 2], nSamplesInFrame) : 0);
      } // if nrChannels > 1
    }

    ci -= nrChannels; // modulated complex lapped transform (MCLT) for all channels, windows

    for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
    {
      const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch];
      const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
      const int32_t* timeSig = (m_shiftValSBR > 0 ? m_coreSignals[ci] : m_timeSignals[ci]);
      const USAC_WSEQ wsCurr = icsCurr.windowSequence;
      const bool eightShorts = (wsCurr == EIGHT_SHORT);
      SfbGroupData&  grpData = coreConfig.groupingData[ch];

      grpData.numWindowGroups = (eightShorts ? NUM_WINDOW_GROUPS : 1);  // fill groupingData
      memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));

      errorValue |= m_transform.applyMCLT (timeSig, eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
                                           wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]);
      m_scaleFacData[ci++] = &grpData;
    }
  } // for el

  return errorValue;
}

// constructor
ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData,           unsigned char* const outputAuData,
                              const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/,
                              const unsigned frameLength /*= 1024*/, const unsigned indepPeriod /*= 45*/,
                              const unsigned varBitRateMode /*= 3*/
#if !RESTRICT_TO_AAC
                            , const bool useNoiseFilling /*= true*/, const bool useEcodisExt /*= false*/
#endif
                              )
{
  // adopt basic coding parameters
  m_bitRateMode  = __min (9, varBitRateMode);
  m_channelConf  = (numChannels >= 7 ? CCI_UNDEF : (USAC_CCI) numChannels); // see 23003-3, Tables 73 & 161
  if (m_channelConf == CCI_CONF) m_channelConf = CCI_2_CHM; // passing numChannels = 0 means 2-ch dual-mono
  m_numElements  = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig
  m_shiftValSBR  = (frameLength >= 1536 ? 1 : 0);
  m_frameCount   = m_rateFactor = 0;
  m_priLength    = 0;
  m_frameLength  = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex
  m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex
  m_indepFlag    = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame
  m_indepPeriod  = (indepPeriod == 0 ? USHRT_MAX : __min (USHRT_MAX, indepPeriod)); // random-access period
#if RESTRICT_TO_AAC
  m_nonMpegExt   = false;
#else
  m_nonMpegExt   = useEcodisExt;
#endif
  m_numSwbLong   = MAX_NUM_SWB_LONG;
  m_numSwbShort  = MAX_NUM_SWB_SHORT;
  m_outAuData    = outputAuData;
  m_pcm24Data    = inputPcmData;
  m_tempIntBuf   = nullptr;

  // initialize all helper structs
  for (unsigned el = 0; el < USAC_MAX_NUM_ELEMENTS; el++)
  {
    const ELEM_TYPE et = elementTypeConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS][el];  // usacElementType

    m_elementData[el]  = nullptr;
    m_perCorrHCurr[el] = 0;
    m_perCorrLCurr[el] = 0;
#if !RESTRICT_TO_AAC
    m_noiseFilling[el] = (useNoiseFilling && (et < ID_USAC_LFE));
    m_timeWarping[el]  = (false /* N/A */ && (et < ID_USAC_LFE));
#endif
  }
  // initialize all signal buffers
  for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
  {
    m_bandwidCurr[ch]  = 0;
    m_bandwidPrev[ch]  = 0;
    m_coreSignals[ch]  = nullptr;
    m_mdctQuantMag[ch] = nullptr;
    m_mdctSignals[ch]  = nullptr;
    m_mdstSignals[ch]  = nullptr;
    m_meanSpecPrev[ch] = 0;
    m_meanTempPrev[ch] = 0;
    m_scaleFacData[ch] = nullptr;
    m_specAnaCurr[ch]  = 0;
    m_specFlatPrev[ch] = 0;
    m_tempAnaCurr[ch]  = 0;
    m_tempAnaNext[ch]  = 0;
    m_tempFlatPrev[ch] = 0;
    m_timeSignals[ch]  = nullptr;
    m_tranLocCurr[ch]  = -1;
    m_tranLocNext[ch]  = -1;
  }
  // initialize all window buffers
  for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++)
  {
    m_timeWindowL[ws] = nullptr;
    m_timeWindowS[ws] = nullptr;
  }
}

// destructor
ExhaleEncoder::~ExhaleEncoder ()
{
  // free allocated helper structs
  for (unsigned el = 0; el < USAC_MAX_NUM_ELEMENTS; el++)
  {
    MFREE (m_elementData[el]);
  }
  // free allocated signal buffers
  for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
  {
    if (m_shiftValSBR > 0) MFREE (m_coreSignals[ch]);
    MFREE (m_mdctQuantMag[ch]);
    MFREE (m_mdctSignals[ch]);
    MFREE (m_mdstSignals[ch]);
    MFREE (m_timeSignals[ch]);
  }
  // free allocated window buffers
  for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++)
  {
    MFREE (m_timeWindowL[ws]);
    MFREE (m_timeWindowS[ws]);
  }
  // execute sub-class destructors
}

// public functions
unsigned ExhaleEncoder::encodeLookahead ()
{
  const unsigned nChannels       = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
  const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4;  // pre-delay for look-ahead
  const int32_t* chSig           = m_pcm24Data;
  unsigned ch, s;

  // copy nSamplesInFrame external channel-interleaved samples into internal channel buffers
  for (s = 0; s < nSamplesInFrame; s++) // sample loop
  {
    for (ch = 0; ch < nChannels; ch++) m_timeSignals[ch][nSamplesTempAna + s] = *(chSig++);
  }

  // generate first nSamplesTempAna - m_priLength samples (previous frame data) by LP filter
  for (ch = 0; ch < nChannels; ch++)
  {
    short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
    short parCorC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
    int32_t* predSig = &m_timeSignals[ch][nSamplesTempAna - m_priLength];

    m_linPredictor.calcParCorCoeffs (predSig, uint16_t (nSamplesInFrame >> 1), MAX_PREDICTION_ORDER, parCorC);
    m_linPredictor.parCorToLpCoeffs (parCorC, MAX_PREDICTION_ORDER, filterC);

    for (s = nSamplesTempAna - m_priLength; s > 0; s--) // generate predicted priming signal
    {
      const int64_t predSample = *(predSig + 0) * (int64_t) filterC[0] + *(predSig + 1) * (int64_t) filterC[1] +
                                 *(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3];
      *(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9);
    }
    if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, ((nSamplesInFrame * 41) >> (4 + m_shiftValSBR)) * sizeof (int32_t));
  }

  // set initial temporal channel statistic to something meaningful before first coded frame
  m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame,
                                   m_shiftValSBR, m_coreSignals); // default lfeChannelIndex
  if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform
  {
    return 2; // internal error in temporal processing
  }
  if (spectralProcessing ()) // MCLT domain: (common_)max_sfb, grouping 2, TNS, and SFB data
  {
    return 2; // internal error in spectral processing
  }
  if (psychBitAllocation ()) // SFB domain: psychoacoustic model and scale factor estimation
  {
    return 1; // internal error in bit-allocation code
  }

  return quantizationCoding (); // max(3, coded bytes)
}

unsigned ExhaleEncoder::encodeFrame ()
{
  const unsigned nChannels       = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
  const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4;  // pre-delay for look-ahead
  const int32_t* chSig           = m_pcm24Data;
  unsigned ch, s;

  // move internal channel buffers nSamplesInFrame to the past to make room for next samples
  for (ch = 0; ch < nChannels; ch++)
  {
    memcpy (&m_timeSignals[ch][0], &m_timeSignals[ch][nSamplesInFrame], nSamplesInFrame * sizeof (int32_t));
    memcpy (&m_timeSignals[ch][nSamplesInFrame], &m_timeSignals[ch][2 * nSamplesInFrame], (nSamplesTempAna - nSamplesInFrame) * sizeof (int32_t));

    if (m_shiftValSBR > 0)
    {
      const unsigned nSmpInFrame = toFrameLength (m_frameLength); // core coder frame length

      memcpy (&m_coreSignals[ch][0], &m_coreSignals[ch][nSmpInFrame], nSmpInFrame * sizeof (int32_t));
      memcpy (&m_coreSignals[ch][nSmpInFrame], &m_coreSignals[ch][2 * nSmpInFrame], (nSamplesInFrame >> 2) * sizeof (int32_t));
    }
  }

  // copy nSamplesInFrame external channel-interleaved samples into internal channel buffers
  for (s = 0; s < nSamplesInFrame; s++) // sample loop
  {
    for (ch = 0; ch < nChannels; ch++) m_timeSignals[ch][nSamplesTempAna + s] = *(chSig++);
  }

  if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform
  {
    return 2; // internal error in temporal processing
  }
  if (spectralProcessing ()) // MCLT domain: (common_)max_sfb, grouping 2, TNS, and SFB data
  {
    return 2; // internal error in spectral processing
  }
  if (psychBitAllocation ()) // SFB domain: psychoacoustic model and scale factor estimation
  {
    return 1; // internal error in bit-allocation code
  }

  return quantizationCoding (); // max(3, coded bytes)
}

unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uint32_t* const audioConfigBytes /*= nullptr*/)
{
  const unsigned nChannels       = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
  const unsigned specSigBufSize  = nSamplesInFrame * sizeof (int32_t);
  const unsigned timeSigBufSize  = (((nSamplesInFrame << m_shiftValSBR) * 41) >> 4) * sizeof (int32_t); // core-codec delay*4
  const unsigned char chConf     = m_channelConf;
  unsigned ch, errorValue = 0; // no error

  // check user's input parameters
#if RESTRICT_TO_AAC
  if ((m_channelConf <= CCI_CONF) || (m_channelConf > CCI_8_CH))
#else
  if ((m_channelConf <= CCI_CONF) || (m_channelConf > CCI_8_CHS))
#endif
  {
    errorValue |= 128;
  }
#if RESTRICT_TO_AAC
  if (m_frameLength != CCFL_1024)
#else
  if ((m_frameLength != CCFL_768) && (m_frameLength != CCFL_1024))
#endif
  {
    errorValue |=  64;
  }
  if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> (m_shiftValSBR > 0 ? 11 : 12)) + 2))
  {
    errorValue |=  32;
  }
  if ((m_outAuData == nullptr) || (m_pcm24Data == nullptr))
  {
    errorValue |=  16;
  }
  if (errorValue > 0) return errorValue;

  // get window band table index
  ch = (unsigned) m_frequencyIdx; // for temporary storage
#if RESTRICT_TO_AAC
  m_swbTableIdx = freqIdxToSwbTableIdxAAC[ch];
#else
  m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[ch] : freqIdxToSwbTableIdxAAC[ch]);
#endif

  if (m_elementData[0] != nullptr) // initEncoder was called before, don't reallocate memory
  {
    if (audioConfigBuffer != nullptr) // recreate the UsacConfig()
    {
      errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
                                                  elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0,
#if !RESTRICT_TO_AAC
                                                  m_timeWarping, m_noiseFilling,
#endif
                                                  m_shiftValSBR, audioConfigBuffer);
      if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes
      errorValue = (errorValue == 0 ? 1 : 0);
    }

    return errorValue;
  }

  // allocate all helper structs
  for (unsigned el = 0; el < m_numElements; el++)  // element loop
  {
    if ((m_elementData[el] = (CoreCoderData*) malloc (sizeof (CoreCoderData))) == nullptr)
    {
      errorValue |= 8;
    }
    else
    {
      memset (m_elementData[el], 0, sizeof (CoreCoderData));
      m_elementData[el]->elementType = elementTypeConfig[chConf][el]; // usacElementType[el]
    }
  }
  memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t));

  // allocate all signal buffers
  if (m_shiftValSBR > 0)
  {
    if (m_shiftValSBR > 1) return (errorValue | 4); // no 8:3, 4:1

    for (ch = 0; ch < nChannels; ch++)
    {
      if ((m_coreSignals[ch] = (int32_t*) malloc (timeSigBufSize >> m_shiftValSBR)) == nullptr)
      {
        errorValue |= 4;
      }
    }
  }
  for (ch = 0; ch < nChannels; ch++)
  {
    if ((m_entropyCoder[ch].initCodingMemory (nSamplesInFrame) > 0) ||
        (m_mdctQuantMag[ch]= (uint8_t*) malloc (nSamplesInFrame * sizeof (uint8_t))) == nullptr ||
        (m_mdctSignals[ch] = (int32_t*) malloc (specSigBufSize)) == nullptr ||
        (m_mdstSignals[ch] = (int32_t*) malloc (specSigBufSize)) == nullptr ||
        (m_timeSignals[ch] = (int32_t*) malloc (timeSigBufSize)) == nullptr)
    {
      errorValue |= 4;
    }
  }
  // allocate all window buffers
  for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++)
  {
    if ((m_timeWindowL[ws] = initWindowHalfCoeffs ((USAC_WSHP) ws, nSamplesInFrame)) == nullptr ||
        (m_timeWindowS[ws] = initWindowHalfCoeffs ((USAC_WSHP) ws, nSamplesInFrame >> 3)) == nullptr)
    {
      errorValue |= 2;
    }
  }
  if (errorValue > 0) return errorValue;

  // initialize coder class memory
  m_tempIntBuf = m_timeSignals[0];
  if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode >> ((nChannels - 1) >> 2)) > 0 ||
#if EC_TRELLIS_OPT_CODING
      m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
#else
      m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
#endif
      m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 5 ? nChannels : 0, nSamplesInFrame) > 0 ||
      m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0)
  {
    errorValue |= 1;
  }

  if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout
  {
    const uint32_t loudnessInfo = (audioConfigBytes ? *audioConfigBytes : 0);

    if (*audioConfigBuffer & 1) m_frameCount--; // to skip 1 frame
    m_priLength = (*audioConfigBuffer >> 1);
    errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
                                                elementTypeConfig[chConf], loudnessInfo,
#if !RESTRICT_TO_AAC
                                                m_timeWarping, m_noiseFilling,
#endif
                                                m_shiftValSBR, audioConfigBuffer);
    if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes
    errorValue = (errorValue == 0 ? 1 : 0);

    // NOTE: Below, value 256 is actually a warning, not an error. If the library is used in
    // live scenarios and a nonzero loudness level is provided before any frames were coded,
    // it reminds developers to apply short-term R128 normalization of the incoming samples.
    if ((m_frameCount == 0) && (loudnessInfo & 16383)) errorValue |= 256;
  }
  if (m_priLength)
  {
    const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> (4 - m_shiftValSBR);
    const int32_t* chSig = &m_pcm24Data[nChannels * ((nSamplesInFrame << m_shiftValSBR) - m_priLength)];

    for (unsigned s = nSamplesTempAna - m_priLength; s < nSamplesTempAna; s++)
    {
      for (ch = 0; ch < nChannels; ch++) m_timeSignals[ch][s] = *(chSig++);
    }
  }

  return errorValue;
}

extern "C"
{
// C constructor
EXHALE_DECL ExhaleEncAPI* exhaleCreate (int32_t* const inputPcmData,   unsigned char* const outputAuData,
                                        const unsigned sampleRate,     const unsigned numChannels,
                                        const unsigned frameLength,    const unsigned indepPeriod,
                                        const unsigned varBitRateMode, const bool useNoiseFilling,
                                        const bool useEcodisExt)
{
  return reinterpret_cast<ExhaleEncAPI*> (new ExhaleEncoder (inputPcmData, outputAuData, sampleRate, numChannels, frameLength, indepPeriod, varBitRateMode
#if !RESTRICT_TO_AAC
                                        , useNoiseFilling, useEcodisExt
#endif
                                          ));
}

// C destructor
EXHALE_DECL unsigned exhaleDelete (ExhaleEncAPI* exhaleEnc)
{
  if (exhaleEnc != NULL) { delete reinterpret_cast<ExhaleEncoder*> (exhaleEnc); return 0; }

  return USHRT_MAX; // error
}

// C initializer
EXHALE_DECL unsigned exhaleInitEncoder (ExhaleEncAPI* exhaleEnc, unsigned char* const audioConfigBuffer,
                                        uint32_t* const audioConfigBytes)
{
  if (exhaleEnc != NULL) return reinterpret_cast<ExhaleEncoder*> (exhaleEnc)->initEncoder (audioConfigBuffer, audioConfigBytes);

  return USHRT_MAX; // error
}

// C lookahead encoder
EXHALE_DECL unsigned exhaleEncodeLookahead (ExhaleEncAPI* exhaleEnc)
{
  if (exhaleEnc != NULL) return reinterpret_cast<ExhaleEncoder*> (exhaleEnc)->encodeLookahead ();

  return USHRT_MAX; // error
}

// C frame encoder
EXHALE_DECL unsigned exhaleEncodeFrame (ExhaleEncAPI* exhaleEnc)
{
  if (exhaleEnc != NULL) return reinterpret_cast<ExhaleEncoder*> (exhaleEnc)->encodeFrame ();

  return USHRT_MAX; // error
}

} // extern "C"