exhale/src/lib/quantization.cpp

/* quantization.cpp - source file for class with nonuniform quantization functionality
 * written by C. R. Helmrich, last modified in 2023 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
 * Copyright (c) 2018-2024 Christian R. Helmrich, project ecodis. All rights reserved.
 */

#include "exhaleLibPch.h"
#include "quantization.h"
#if SFB_QUANT_SSE
# include <xmmintrin.h>
#endif

#define EC_TRAIN (0 && EC_TRELLIS_OPT_CODING) // for RDOC testing

// static helper functions
static inline short getBitCount (EntropyCoder& entrCoder, const int sfIndex, const int sfIndexPred,
                                 const uint8_t groupLength, const uint8_t* coeffQuant,
                                 const uint16_t coeffOffset, const uint16_t numCoeffs)
{
  unsigned bitCount = (sfIndex != UCHAR_MAX && sfIndexPred == UCHAR_MAX ? 8 : entrCoder.indexGetBitCount (sfIndex - sfIndexPred));

  if (groupLength == 1) // include arithmetic coding in bit count
  {
#if EC_TRELLIS_OPT_CODING
    bitCount += entrCoder.arithCodeSigTest (coeffQuant, coeffOffset, numCoeffs);
#else
    bitCount += entrCoder.arithCodeSigMagn (coeffQuant, coeffOffset, numCoeffs);
#endif
  }

  return (short) __min (SHRT_MAX, bitCount); // exclude sign bits
}

#if EC_TRELLIS_OPT_CODING && !EC_TRAIN
static inline double getLagrangeValue (const uint16_t rateIndex) // RD optimization constant
{
  return (95.0 + rateIndex * rateIndex) * 0.0009765625; // / 1024
}
#endif

// private helper functions
double SfbQuantizer::getQuantDist (const unsigned* const coeffMagn, const uint8_t scaleFactor,
                                   const uint8_t* const coeffQuant, const uint16_t numCoeffs)
{
#if SFB_QUANT_SSE
  const __m128 stepSizeDiv = _mm_set_ps1 ((float) m_lutSfNorm[scaleFactor]); // or _mm_set1_ps ()
  __m128 sumsSquares = _mm_setzero_ps ();
  float dist[4];

  for (int i = numCoeffs - 4; i >= 0; i -= 4)
  {
    __m128 orig = _mm_set_ps ((float) coeffMagn[i + 0], (float) coeffMagn[i + 1],
                              (float) coeffMagn[i + 2], (float) coeffMagn[i + 3]);
    __m128 reco = _mm_set_ps ((float) m_lutXExp43[coeffQuant[i + 0]], (float) m_lutXExp43[coeffQuant[i + 1]],
                              (float) m_lutXExp43[coeffQuant[i + 2]], (float) m_lutXExp43[coeffQuant[i + 3]]);
    __m128 diff = _mm_sub_ps (reco, _mm_mul_ps (orig, stepSizeDiv));

    sumsSquares = _mm_add_ps (sumsSquares, _mm_mul_ps (diff, diff));
  }
  _mm_storeu_ps (dist, sumsSquares);

  // consider quantization step-size in calculation of distortion
  return ((double) dist[0] + dist[1] + dist[2] + dist[3]) * m_lut2ExpX4[scaleFactor] * m_lut2ExpX4[scaleFactor];
#else
  const double stepSizeDiv = m_lutSfNorm[scaleFactor];
  double dDist = 0.0;

  for (int i = numCoeffs - 1; i >= 0; i--)
  {
    const double d = m_lutXExp43[coeffQuant[i]] - coeffMagn[i] * stepSizeDiv;

    dDist += d * d;
  }

  // consider quantization step-size in calculation of distortion
  return dDist * m_lut2ExpX4[scaleFactor] * m_lut2ExpX4[scaleFactor];
#endif
}

uint8_t SfbQuantizer::quantizeMagnSfb (const unsigned* const coeffMagn, const uint8_t scaleFactor,
                                      /*mod*/uint8_t* const coeffQuant, const uint16_t numCoeffs,
#if EC_TRELLIS_OPT_CODING
                                       EntropyCoder* const arithmCoder, const uint16_t coeffOffset,
#endif
                                       short* const sigMaxQ /*= nullptr*/, short* const sigNumQ /*= nullptr*/)
{
  const double stepSizeDiv = m_lutSfNorm[scaleFactor];
  double  dNum = 0.0, dDen = 0.0;
  short sf, maxQ = 0, numQ = 0;

  for (int i = numCoeffs - 1; i >= 0; i--)
  {
    const double normalizedMagn = (double) coeffMagn[i] * stepSizeDiv;
    short q;

    if (normalizedMagn < 28.5)  // fast approximate pow (d, 0.75)
    {
      // based on code from: N. N. Schraudolph, "A Fast, Compact Approximation of the Expo-
      // nential Function," Neural Comput., vol. 11, pp. 853-862, 1998 and M. Ankerl, 2007,
      // https://martin.ankerl.com/2007/10/04/optimized-pow-approximation-for-java-and-c-c/
      union { double d; int32_t i[2]; } u = { normalizedMagn };

      u.i[1] = int32_t (0.75 * (u.i[1] - 1072632447) + 1072632447.0);
      u.i[0] = 0;
      q = short (u.d + (u.d < 1.0 ? 0.3822484 : 0.2734375));
    }
    else
    {
      q = short (SFB_QUANT_OFFSET + pow (__min (1048544.0, normalizedMagn), 0.75)); // min avoids rare preset-9 overflow
    }

    if (q > 0)
    {
      if (q >= SCHAR_MAX)
      {
        if (maxQ < q)
        {
          maxQ = q; // find maximum quantized magnitude in vector
        }
        q = SCHAR_MAX;
      }
      else
      {
        const double diffRoundD = m_lutXExp43[q    ] - normalizedMagn;
        const double diffRoundU = m_lutXExp43[q + 1] - normalizedMagn;

        if (diffRoundU * diffRoundU < diffRoundD * diffRoundD)
        {
          q++; // round-up gives lower distortion than round-down
        }
      }
      if (maxQ < q)
      {
        maxQ = q;
      }
      numQ++;
      dNum += m_lutXExp43[q] * normalizedMagn;
      dDen += m_lutXExp43[q] * m_lutXExp43[q];
    }
#if SFB_QUANT_PERCEPT_OPT
    else // q == 0, assume perceptual transparency for code below
    {
      dNum += normalizedMagn * normalizedMagn;
      dDen += normalizedMagn * normalizedMagn;
    }
#endif
    coeffQuant[i] = (uint8_t) q;
  }

  if (sigMaxQ) *sigMaxQ = maxQ; // max. quantized value magnitude
  if (sigNumQ) *sigNumQ = numQ; // nonzero coeff. count (L0 norm)

  sf = scaleFactor;
  // compute least-squares optimal modifier added to scale factor
  if (dNum > SF_THRESH_POS * dDen) sf++;
  else
  if (dNum < SF_THRESH_NEG * dDen) sf--;

#if EC_TRELLIS_OPT_CODING
  if (arithmCoder && (sf > 0) && (maxQ <= SCHAR_MAX)) // use RDOC
  {
    EntropyCoder& entrCoder = *arithmCoder;
#if EC_TRAIN
    const uint32_t codStart = entrCoder.arithGetCodState ();
    const uint32_t ctxStart = entrCoder.arithGetCtxState ();
    uint32_t bitCount = entrCoder.arithCodeSigTest (&coeffQuant[-((int) coeffOffset)], coeffOffset, numCoeffs) + (uint32_t) numQ;

    entrCoder.arithSetCodState (codStart);  // back to last state
    entrCoder.arithSetCtxState (ctxStart);
#else
    uint32_t bitCount = (uint32_t) numQ;
#endif
    if ((bitCount = quantizeMagnRDOC (entrCoder, (uint8_t) sf, bitCount, coeffOffset, coeffMagn, numCoeffs, coeffQuant)) > 0)
    {
      numQ = bitCount & SHRT_MAX;

      if ((numQ > 0) && (sf < m_maxSfIndex)) // nonzero-quantized
      {
        const double magnNormDiv = m_lutSfNorm[sf];

        dNum = dDen = 0.0;
        for (int i = numCoeffs - 1; i >= 0; i--)
        {
          const double normalizedMagn = (double) coeffMagn[i] * magnNormDiv;
          const uint8_t q = coeffQuant[i];

          if (q > 0)
          {
            dNum += m_lutXExp43[q] * normalizedMagn;
            dDen += m_lutXExp43[q] * m_lutXExp43[q];
          }
# if SFB_QUANT_PERCEPT_OPT
          else   // assume perceptual transparency for code below
          {
            dNum += normalizedMagn * normalizedMagn;
            dDen += normalizedMagn * normalizedMagn;
          }
# endif
        }

        // re-compute least-squares optimal scale factor modifier
        if (dNum > SF_THRESH_POS * dDen) sf++;
# if !SFB_QUANT_PERCEPT_OPT
        else
        if (dNum < SF_THRESH_NEG * dDen) sf--; // reduces SFB RMS
# endif
      } // if nonzero

      if (sigMaxQ) *sigMaxQ = (numQ > 0 ? maxQ : 0); // a new max
      if (sigNumQ) *sigNumQ = numQ; // a new nonzero coeff. count
    }
  }
#endif // EC_TRELLIS_OPT_CODING

#if SFB_QUANT_PERCEPT_OPT
  if ((numQ > 0) && (sf > 0 && sf <= scaleFactor)) // recover RMS
  {
# if SFB_QUANT_SSE
    const __m128 magnNormDiv = _mm_set_ps1 ((float) m_lutSfNorm[sf]); // or _mm_set1_ps ()
    __m128 sumsSquares = _mm_setzero_ps ();
    float fl[4]; // dDen has normalized energy after quantization

    for (int i = numCoeffs - 4; i >= 0; i -= 4)
    {
      __m128 orig = _mm_set_ps ((float) coeffMagn[i + 0], (float) coeffMagn[i + 1],
                                (float) coeffMagn[i + 2], (float) coeffMagn[i + 3]);
      __m128 norm = _mm_mul_ps (orig, magnNormDiv);

      sumsSquares = _mm_add_ps (sumsSquares, _mm_mul_ps (norm, norm));
    }
    _mm_storeu_ps (fl, sumsSquares);

    if ((double) fl[0] + fl[1] + fl[2] + fl[3] > SF_THRESH_POS * SF_THRESH_POS * dDen) sf++;
# else
    const double magnNormDiv = m_lutSfNorm[sf];

    dNum = 0.0;  // dDen has normalized energy after quantization
    for (int i = numCoeffs - 1; i >= 0; i--)
    {
      const double normalizedMagn = (double) coeffMagn[i] * magnNormDiv;

      dNum += normalizedMagn * normalizedMagn;
    }

    if (dNum > SF_THRESH_POS * SF_THRESH_POS * dDen) sf++;
# endif
  }
#endif
  return (uint8_t) __max (0, sf); // optimized scale factor index
}

#if EC_TRELLIS_OPT_CODING
uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8_t optimalSf, const unsigned targetBitCount,
                                         const uint16_t coeffOffset, const unsigned* const coeffMagn, // initial MDCT magnitudes
                                         const uint16_t numCoeffs, uint8_t* const quantCoeffs) // returns updated SFB statistics
{
  // numTuples: num of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
  // "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," in Proc. IEEE Workshop on
  // Speech Coding, pp. 142-144, Sep. 2000. Modified for arithmetic instead of Huffman coder
  const uint32_t  codStart = entropyCoder.arithGetCodState ();
  const uint32_t  ctxStart = entropyCoder.arithGetCtxState (); // before call to getBitCount
  const double stepSizeDiv = m_lutSfNorm[optimalSf];
  const uint16_t numStates = 4; // 4 reduction types: [0, 0], [0, -1], [-1, 0], and [-1, -1]
  const uint16_t numTuples = numCoeffs >> 1;
  uint8_t* const quantRate = &m_coeffTemp[((unsigned) m_maxSize8M1 + 1) << 3];
  uint32_t prevCodState[4] = {0, 0, 0, 0};
  uint32_t prevCtxState[4] = {0, 0, 0, 0};
  double   prevVtrbCost[4] = {0, 0, 0, 0};
  uint32_t tempCodState[4] = {0, 0, 0, 0};
  uint32_t tempCtxState[4] = {0, 0, 0, 0};
  double   tempVtrbCost[4] = {0, 0, 0, 0};
  double   quantDist[32][4];   // TODO: dynamic memory allocation
  uint8_t* const optimalIs = (uint8_t* const) (quantDist[32-1]);
  uint8_t  tempQuant[4], numQ; // for tuple/SFB sign bit counting
  unsigned tuple, is;
  int ds;
#if EC_TRAIN
  unsigned tempBitCount;
  double refSfbDist = 0.0, tempSfbDist = 0.0;
#else
  const double lambda = getLagrangeValue (m_rateIndex);
#endif

  if ((coeffMagn == nullptr) || (quantCoeffs == nullptr) || (optimalSf > m_maxSfIndex) || (numTuples == 0) || (numTuples > 32) ||
      (targetBitCount == 0)  || (targetBitCount > SHRT_MAX))
  {
    return 0; // invalid input error
  }

  // save third-last tuple value, required due to an insufficiency of arithGet/SetCtxState()
  if (coeffOffset > 5) tempQuant[3] = entropyCoder.arithGetTuplePtr ()[(coeffOffset >> 1) - 3];

  for (tuple = 0; tuple < numTuples; tuple++) // tuple-wise non-weighted distortion and rate
  {
    const uint16_t  tupleStart = tuple << 1;
    const uint16_t tupleOffset = coeffOffset + tupleStart;
    const double   normalMagnA = (double) coeffMagn[tupleStart    ] * stepSizeDiv;
    const double   normalMagnB = (double) coeffMagn[tupleStart + 1] * stepSizeDiv;
    uint8_t  coeffQuantA = quantCoeffs[tupleStart];
    uint8_t  coeffQuantB = quantCoeffs[tupleStart + 1];

    for (is = 0; is < numStates; is++)  // populate tuple trellis
    {
      uint8_t* const mag = (is != 0 ? tempQuant : quantCoeffs) - (int) tupleOffset; // see arithCodeTupTest()
      uint8_t*  currRate = &quantRate[(is + tuple * numStates) * numStates];
      double diffA, diffB;

      if (is != 0) // test reduction of quantized MDCT magnitudes
      {
        const uint8_t redA = is >> 1;
        const uint8_t redB = is &  1;

        if ((redA > 0 && coeffQuantA != 1) || (redB > 0 && coeffQuantB != 1))  // avoid path
        {
          tempCodState[is] = tempCodState[0];
          tempCtxState[is] = tempCtxState[0];
          memset (currRate, UCHAR_MAX, numStates);

          continue;
        }
        tempQuant[0] = (coeffQuantA -= redA);
        tempQuant[1] = (coeffQuantB -= redB);
      }
      diffA = m_lutXExp43[coeffQuantA] - normalMagnA;
      diffB = m_lutXExp43[coeffQuantB] - normalMagnB;
      quantDist[tuple][is] = diffA * diffA + diffB * diffB;

      numQ  = (coeffQuantA > 0 ? 1 : 0) + (coeffQuantB > 0 ? 1 : 0);

      if (tuple == 0) // first tuple, with tupleStart == sfbStart
      {
        entropyCoder.arithSetCodState (codStart); // start of SFB
        entropyCoder.arithSetCtxState (ctxStart, 0);

        memset (currRate, entropyCoder.arithCodeTupTest (mag, tupleOffset) + numQ, numStates); // +- m_acBits
      }
      else // tuple > 0, rate depends on decisions for last tuple
      {
        for (ds = numStates - 1; ds >= 0; ds--)
        {
          if (quantRate[(ds + (tuple-1) * numStates) * numStates] >= UCHAR_MAX)// avoid path
          {
            currRate[ds] = UCHAR_MAX;

            continue;
          }

          entropyCoder.arithSetCodState (prevCodState[ds]);
          entropyCoder.arithSetCtxState (prevCtxState[ds], tupleOffset);

          currRate[ds] = uint8_t (entropyCoder.arithCodeTupTest (mag, tupleOffset) + numQ); // incl. m_acBits
        }
      }
      // statistically best place to save states is after ds == 0
      tempCodState[is] = entropyCoder.arithGetCodState ();
      tempCtxState[is] = entropyCoder.arithGetCtxState ();
    } // for is
#if EC_TRAIN
    refSfbDist += quantDist[tuple][0];
#endif
    memcpy (prevCodState, tempCodState, numStates * sizeof (uint32_t));
    memcpy (prevCtxState, tempCtxState, numStates * sizeof (uint32_t));
  } // for tuple

  entropyCoder.arithSetCodState (codStart); // back to last state
  entropyCoder.arithSetCtxState (ctxStart, coeffOffset);
  // restore third-last tuple value, see insufficiency note above
  if (coeffOffset > 5) entropyCoder.arithGetTuplePtr ()[(coeffOffset >> 1) - 3] = tempQuant[3];

#if EC_TRAIN
  tempBitCount = targetBitCount + 1; // Viterbi search for minimum distortion at target rate
  for (double lambda = 0.015625; (lambda <= 0.375) && (tempBitCount > targetBitCount); lambda += 0.0078125)
#endif
  {
    double* const  prevCost = prevVtrbCost;
#if !EC_TRAIN
    uint8_t* const prevPath = (uint8_t*) quantDist;// backtracker
#endif
    double   costMinIs = (double) UINT_MAX;
    unsigned pathMinIs = 0;
#if EC_TRAIN
    uint8_t prevPath[16*4];
    tempSfbDist = 0.0;
#endif

    for (is = 0; is < numStates; is++) // initialize minimum path
    {
      const uint8_t  currRate = quantRate[is * numStates];

      prevCost[is] = (currRate >= UCHAR_MAX ? (double) UINT_MAX : lambda * currRate + quantDist[0][is]);
      prevPath[is] = 0;
    }

    for (tuple = 1; tuple < numTuples; tuple++) // find min. path
    {
      double* const  currCost = tempVtrbCost;
      uint8_t* const currPath = &prevPath[tuple * numStates];

      for (is = 0; is < numStates; is++)  // tuple's minimum path
      {
        uint8_t* currRate = &quantRate[(is + tuple * numStates) * numStates];
        double  costMinDs = (double) UINT_MAX;
        uint8_t pathMinDs = 0;

        for (ds = numStates - 1; ds >= 0; ds--)    // transitions
        {
          const double costCurr = (currRate[ds] >= UCHAR_MAX ? (double) UINT_MAX : prevCost[ds] + lambda * currRate[ds]);

          if (costMinDs > costCurr)
          {
            costMinDs = costCurr;
            pathMinDs = (uint8_t) ds;
          }
        }
        if (costMinDs < UINT_MAX) costMinDs += quantDist[tuple][is];

        currCost[is] = costMinDs;
        currPath[is] = pathMinDs;
      } // for is

      memcpy (prevCost, currCost, numStates * sizeof (double)); // TODO: avoid memcpy, use pointer swapping instead for speed
    } // for tuple
#if EC_TRAIN
    tempBitCount = 0;
#endif
    for (is = 0; is < numStates; is++) // search for minimum path
    {
      if (costMinIs > prevCost[is])
      {
        costMinIs = prevCost[is];
        pathMinIs = is;
      }
    }

    for (tuple--; tuple > 0; tuple--)  // min-cost rate and types
    {
      const uint8_t* currPath = &prevPath[tuple * numStates];
      const uint8_t pathMinDs = currPath[pathMinIs];

      optimalIs[tuple] = (uint8_t) pathMinIs;
#if EC_TRAIN
      tempBitCount += quantRate[pathMinDs + (pathMinIs + tuple * numStates) * numStates];
      tempSfbDist += quantDist[tuple][pathMinIs];
#endif
      pathMinIs = pathMinDs;
    }
    optimalIs[0]  = (uint8_t) pathMinIs;
#if EC_TRAIN
    tempBitCount += quantRate[pathMinIs * numStates];
    tempSfbDist += quantDist[0][pathMinIs];
#endif
  } // Viterbi search

#if EC_TRAIN
  if ((tempSfbDist <= refSfbDist) || (tempBitCount <= targetBitCount))
#endif
  {
#if !EC_TRAIN
    numQ = 0;
#endif
    for (tuple = 0; tuple < numTuples; tuple++) // re-quantize SFB with R/D optimal rounding
    {
      const uint16_t tupleStart = tuple << 1;
      const uint8_t  tupIs = optimalIs[tuple];
      uint8_t& coeffQuantA = quantCoeffs[tupleStart];
      uint8_t& coeffQuantB = quantCoeffs[tupleStart + 1];

      if (tupIs != 0) // optimal red of quantized MDCT magnitudes
      {
        coeffQuantA -= (tupIs >> 1);
        coeffQuantB -= (tupIs &  1);
      }
#if !EC_TRAIN
      numQ += (coeffQuantA > 0 ? 1 : 0) + (coeffQuantB > 0 ? 1 : 0);
#endif
    } // for tuple

#if EC_TRAIN
    return tempBitCount;
#else
    return (1u << 15) | numQ; // final stats: OK flag | sign bits
#endif
  }

  return targetBitCount;
}
#endif // EC_TRELLIS_OPT_CODING

// constructor
SfbQuantizer::SfbQuantizer ()
{
  // initialize all helper buffers
  m_coeffMagn = nullptr;
#if EC_TRELLIS_OPT_CODING
  m_coeffTemp = nullptr;
#endif
  m_lut2ExpX4 = nullptr;
  m_lutSfNorm = nullptr;
  m_lutXExp43 = nullptr;

  m_maxSfIndex = 0;
#if EC_TRELLIS_OPT_CODING
  m_numCStates = 0;

  for (unsigned b = 0; b < 52; b++)
  {
    m_quantDist[b] = nullptr;
    m_quantInSf[b] = nullptr;
    m_quantRate[b] = nullptr;
  }
#endif
}

// destructor
SfbQuantizer::~SfbQuantizer ()
{
  // free allocated helper buffers
  MFREE (m_coeffMagn);
#if EC_TRELLIS_OPT_CODING
  MFREE (m_coeffTemp);
#endif
  MFREE (m_lut2ExpX4);
  MFREE (m_lutSfNorm);
  MFREE (m_lutXExp43);
#if EC_TRELLIS_OPT_CODING

  for (unsigned b = 0; b < 52; b++)
  {
    MFREE (m_quantDist[b]);
    MFREE (m_quantInSf[b]);
    MFREE (m_quantRate[b]);
  }
#endif
}

// public functions
unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
#if EC_TRELLIS_OPT_CODING
                                        const uint8_t numSwb, const uint8_t bitRateMode, const unsigned samplingRate,
#endif
                                        const uint8_t maxScaleFacIndex /*= SCHAR_MAX*/)
{
  const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
#if EC_TRELLIS_OPT_CODING
  const uint8_t complexityOffset = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) + ((bitRateMode == 0) && (samplingRate >= 8192) ? 1 : 0);
  const uint8_t numTrellisStates = complexityOffset - __min (2, (bitRateMode + 2) >> 2);  // number of states per SFB
  const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
  const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC()
#endif
  unsigned x;

  if ((maxTransfLength < 128) || (maxTransfLength > 2048) || (maxTransfLength & 7) || (maxScaleFacIndex == 0) || (maxScaleFacIndex > SCHAR_MAX))
  {
    return 1; // invalid arguments error
  }

  m_maxSfIndex = maxScaleFacIndex;

  if ((m_coeffMagn = (unsigned*) malloc (maxTransfLength * sizeof (unsigned))) == nullptr ||
#if EC_TRELLIS_OPT_CODING
      (m_coeffTemp = (uint8_t* ) malloc (maxTransfLength + quantRateLength  )) == nullptr ||
#endif
      (m_lut2ExpX4 = (double*  ) malloc (numScaleFactors * sizeof (double  ))) == nullptr ||
      (m_lutSfNorm = (double*  ) malloc (numScaleFactors * sizeof (double  ))) == nullptr ||
      (m_lutXExp43 = (double*  ) malloc ((SCHAR_MAX + 1) * sizeof (double  ))) == nullptr)
  {
    return 2; // memory allocation error
  }
#if EC_TRELLIS_OPT_CODING
  m_maxSize8M1 = (maxTransfLength >> 3) - 1;
  m_numCStates = numTrellisStates;
  m_rateIndex  = bitRateMode;

  for (x = 0; x < __min (52u, numSwb); x++)
  {
    if ((m_quantDist[x] = (double*  ) malloc (numTrellisStates * sizeof (double  ))) == nullptr ||
        (m_quantInSf[x] = (uint8_t* ) malloc (numTrellisStates * sizeof (uint8_t ))) == nullptr ||
        (m_quantRate[x] = (uint16_t*) malloc (numSquaredStates * sizeof (uint16_t))) == nullptr)
    {
      return 2;
    }
  }
#else
  memset (m_coeffTemp, 0, sizeof (m_coeffTemp));
#endif
  // calculate scale factor gain 2^(x/4)
  for (x = 0; x < numScaleFactors; x++)
  {
    m_lut2ExpX4[x] = pow (2.0, (double) x / 4.0);
    m_lutSfNorm[x] = 1.0 / m_lut2ExpX4[x];
  }
  // calculate dequantized coeff x^(4/3)
  for (x = 0; x < (SCHAR_MAX + 1); x++)
  {
    m_lutXExp43[x] = pow ((double) x, 4.0 / 3.0);
  }

  return 0; // no error
}

uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t* const inputCoeffs, const uint8_t grpLength,
                                       const uint16_t* const grpOffsets, uint32_t* const grpStats,  // quant./coding statistics
                                       const unsigned sfb, const uint8_t sfIndex, const uint8_t sfIndexPred /*= UCHAR_MAX*/,
                                       uint8_t* const quantCoeffs /*= nullptr*/) // returns the RD optimized scale factor index
{
#if EC_TRELLIS_OPT_CODING
  EntropyCoder* const entrCoder = (grpLength == 1 ? &entropyCoder : nullptr);
#endif
  uint8_t sfBest = sfIndex;

  if ((inputCoeffs == nullptr) || (grpOffsets == nullptr) || (sfb >= 52) || (sfIndex > m_maxSfIndex))
  {
    return UCHAR_MAX; // invalid input error
  }

#if EC_TRELLIS_OPT_CODING
  if (grpLength == 1) // references for RDOC
  {
    m_quantDist[sfb][1] = -1.0;
    m_quantInSf[sfb][1] = sfIndex;
    m_quantRate[sfb][1] = 0; // for sgn bits
    m_quantRate[sfb][0] = entropyCoder.arithGetCtxState () & USHRT_MAX; // ref start context
  }
#endif
  if ((sfIndex == 0) || (sfIndexPred <= m_maxSfIndex && sfIndex + INDEX_OFFSET < sfIndexPred))
  {
    const uint16_t   grpStart = grpOffsets[0];
    const uint16_t   sfbStart = grpOffsets[sfb];
    const uint16_t   sfbWidth = grpOffsets[sfb + 1] - sfbStart;
    uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];

    for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
    {
      coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
    }

    if (quantCoeffs)
    {
      memset (&quantCoeffs[sfbStart], 0, sfbWidth * sizeof (uint8_t)); // SFB output zeroing
      if (grpStats) // approximate bit count
      {
        grpStats[sfb] = getBitCount (entropyCoder, 0, 0, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
      }
    }
    return sfIndexPred - (sfIndex == 0 ? 0 : INDEX_OFFSET); // save delta bits if applicable
  }
  else // nonzero sf, optimized quantization
  {
    const uint16_t   grpStart = grpOffsets[0];
    const uint16_t   sfbStart = grpOffsets[sfb];
    const uint16_t   sfbWidth = grpOffsets[sfb + 1] - sfbStart;
    const uint16_t   cpyWidth = sfbWidth * sizeof (uint8_t);
    uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];
    uint32_t codStart = 0, ctxStart = 0;
    uint32_t codFinal = 0, ctxFinal = 0;
    double   distBest = 0, distCurr = 0;
    short    maxQBest = 0, maxQCurr = 0;
    short    numQBest = 0, numQCurr = 0;
#if EC_TRELLIS_OPT_CODING
    bool rdOptimQuant = (grpLength != 1);
#else
    bool rdOptimQuant = true;
#endif
    uint8_t* ptrBest  = &m_coeffTemp[0];
    uint8_t* ptrCurr  = &m_coeffTemp[100];
    uint8_t  sfCurr   = sfIndex;

    for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
    {
      coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
    }

// --- determine default quantization result using range limited scale factor as a reference
    sfBest = quantizeMagnSfb (coeffMagn, sfCurr, ptrBest, sfbWidth,
#if EC_TRELLIS_OPT_CODING
                              entrCoder, sfbStart - grpStart,
#endif
                              &maxQBest, &numQBest);

    if (maxQBest > SCHAR_MAX) // limit SNR via scale factor index
    {
      for (uint8_t c = 0; (c < 2) && (maxQBest > SCHAR_MAX); c++)  // very rarely done twice
      {
        sfCurr += getScaleFacOffset (pow ((double) maxQBest, 4.0 / 3.0) * 0.001566492688) + c; // / m_lutXExp43[SCHAR_MAX]
        sfBest = quantizeMagnSfb (coeffMagn, sfCurr, ptrBest, sfbWidth,
#if EC_TRELLIS_OPT_CODING
                                  entrCoder, sfbStart - grpStart,
#endif
                                  &maxQBest, &numQBest);
      }
      rdOptimQuant = false;
    }
    else if ((sfBest < sfCurr) && (sfBest != sfIndexPred)) // re-optimize above quantization
    {
      sfBest = quantizeMagnSfb (coeffMagn, --sfCurr, ptrBest, sfbWidth,
#if EC_TRELLIS_OPT_CODING
                                entrCoder, sfbStart - grpStart,
#endif
                                &maxQBest, &numQBest);

      rdOptimQuant &= (maxQBest <= SCHAR_MAX);
    }

#if EC_TRELLIS_OPT_CODING
    if (grpLength == 1) // ref masking level
    {
      m_quantInSf[sfb][1] = __min (sfCurr, m_maxSfIndex);
    }
#endif
    if (maxQBest == 0) // SFB was quantized to zero - zero output
    {
      if (quantCoeffs)
      {
        memset (&quantCoeffs[sfbStart], 0, cpyWidth);
        if (grpStats) // estimated bit count
        {
          grpStats[sfb] = getBitCount (entropyCoder, 0, 0, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
        }
      }
      return sfIndexPred; // repeat scale factor, save delta bits
    }

// --- check whether optimized quantization and coding results in lower rate-distortion cost
    distBest = getQuantDist (coeffMagn, sfBest, ptrBest, sfbWidth);

#if EC_TRELLIS_OPT_CODING
    if (grpLength == 1) // ref band-wise NMR
    {
      const double refSfbNmrDiv = m_lutSfNorm[m_quantInSf[sfb][1]];

      m_quantDist[sfb][1] = distBest * refSfbNmrDiv * refSfbNmrDiv;
      m_quantRate[sfb][1] = numQBest; // sgn
    }
#endif
    if (quantCoeffs)
    {
      memcpy (&quantCoeffs[sfbStart], ptrBest, cpyWidth);

      codStart = entropyCoder.arithGetCodState (); // start state
      ctxStart = entropyCoder.arithGetCtxState ();
      numQBest += getBitCount (entropyCoder, sfBest, sfIndexPred, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
      codFinal = entropyCoder.arithGetCodState (); // final state
      ctxFinal = entropyCoder.arithGetCtxState ();
    }
    rdOptimQuant &= (distBest > 0.0);

    if ((sfBest < sfCurr) && (sfBest != sfIndexPred) && rdOptimQuant) // R/D re-optimization
    {
#if EC_TRELLIS_OPT_CODING
      const double refSfbNmrDiv = m_lutSfNorm[sfCurr];
      const double lambda       = getLagrangeValue (m_rateIndex);
#endif
      sfCurr = quantizeMagnSfb (coeffMagn, sfCurr - 1, ptrCurr, sfbWidth,
#if EC_TRELLIS_OPT_CODING
                                entrCoder, sfbStart - grpStart,
#endif
                                &maxQCurr, &numQCurr);

      distCurr = getQuantDist (coeffMagn, sfCurr, ptrCurr, sfbWidth);
      if (quantCoeffs)
      {
        memcpy (&quantCoeffs[sfbStart], ptrCurr, cpyWidth);

        entropyCoder.arithSetCodState (codStart);  // reset state
        entropyCoder.arithSetCtxState (ctxStart);
        numQCurr += getBitCount (entropyCoder, sfCurr, sfIndexPred, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
      }

      // rate-distortion decision, using empirical Lagrange value
#if EC_TRELLIS_OPT_CODING
      if (distCurr * refSfbNmrDiv * refSfbNmrDiv + lambda * numQCurr < distBest * refSfbNmrDiv * refSfbNmrDiv + lambda * numQBest)
#else
      if ((maxQCurr <= maxQBest) && (numQCurr <= numQBest + (distCurr >= distBest ? -1 : short (0.5 + distBest / __max (1.0, distCurr)))))
#endif
      {
        maxQBest = maxQCurr;
        numQBest = numQCurr;
        sfBest   = sfCurr;
      }
      else if (quantCoeffs) // discard result, recover best trial
      {
        memcpy (&quantCoeffs[sfbStart], ptrBest, cpyWidth);

        entropyCoder.arithSetCodState (codFinal);  // reset state
        entropyCoder.arithSetCtxState (ctxFinal);
      }
    }

    if (grpStats)
    {
      grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count
    }
  } // if sfIndex == 0

  return __min (sfBest, m_maxSfIndex);
}

#if EC_TRELLIS_OPT_CODING
unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* const optimalSf, const unsigned targetBitCount,
                                         const uint16_t* const grpOffsets, uint32_t* const grpStats,  // quant./coding statistics
                                         const unsigned numSfb, uint8_t* const quantCoeffs)  // returns RD optimization bit count
{
  // numSfb: number of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
  // "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," see also quantizeMagnRDOC
  const uint32_t codStart = USHRT_MAX << 16;
  const uint32_t ctxStart = m_quantRate[0][0]; // start context before call to quantizeSfb()
  const uint32_t codFinal = entropyCoder.arithGetCodState ();
  const uint32_t ctxFinal = entropyCoder.arithGetCtxState (); // after call to quantizeSfb()
  const uint16_t grpStart = grpOffsets[0];
  uint8_t* const inScaleFac = &m_coeffTemp[((unsigned) m_maxSize8M1 - 6) << 3];
  uint32_t  prevCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint32_t  prevCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint8_t   prevScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  double    prevVtrbCost[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint32_t  tempCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint32_t  tempCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint8_t   tempScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  double    tempVtrbCost[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  unsigned  tempBitCount, sfb, is;
  int ds;
#if EC_TRAIN
  double refGrpDist = 0.0, tempGrpDist = 0.0;
#else
  const double lambda = getLagrangeValue (m_rateIndex);
#endif

  if ((optimalSf == nullptr) || (quantCoeffs == nullptr) || (grpOffsets == nullptr) || (numSfb == 0) || (numSfb > 52) ||
      (targetBitCount == 0)  || (targetBitCount > SHRT_MAX))
  {
    return 0; // invalid input error
  }

  for (sfb = 0; sfb < numSfb; sfb++) // SFB-wise scale factor, weighted distortion, and rate
  {
    const uint8_t       refSf = m_quantInSf[sfb][1];
    const uint16_t    refNumQ = m_quantRate[sfb][1];
    const double refQuantDist = m_quantDist[sfb][1];
    const double refQuantNorm = m_lutSfNorm[refSf] * m_lutSfNorm[refSf];
    const uint16_t   sfbStart = grpOffsets[sfb];
    const uint16_t   sfbWidth = grpOffsets[sfb + 1] - sfbStart;
    const uint32_t* coeffMagn = &m_coeffMagn[sfbStart];
    uint8_t* const  tempQuant = &m_coeffTemp[sfbStart - grpStart];
    bool maxSnrReached = false;

    if (refQuantDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
#if EC_TRAIN
    else refGrpDist += refQuantDist;
#endif
    if (grpStats)
    {
      grpStats[sfb] = (grpStats[sfb] & (USHRT_MAX << 16)) | refNumQ; // keep magn, sign bits
    }

    for (is = 0; is < m_numCStates; is++) // populate SFB trellis
    {
      const uint8_t* mag = (is != 1 ? m_coeffTemp /*= tempQuant[grpStart - sfbStart]*/ : &quantCoeffs[grpStart]);
      double&   currDist = m_quantDist[sfb][is];
      uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
      uint8_t     sfBest = optimalSf[sfb]; // optimal scalefactor
      short maxQCurr = 0, numQCurr = 0; // for sign bits counting

      if (refQuantDist < 0.0) // -1.0 means SFB is zero-quantized
      {
        currDist = -1.0;
        m_quantInSf[sfb][is] = refSf;
      }
      else if (is != 1) // quantization & distortion not computed
      {
        const uint8_t sfCurr = __max (0, __min (m_maxSfIndex, refSf + 1 - (int) is));

        currDist = -1.0;
        if ((sfCurr == 0) || maxSnrReached)
        {
          maxSnrReached = true;
        }
        else // sfCurr > 0 && sfCurr <= m_maxSfIndex, re-quantize
        {
          sfBest = quantizeMagnSfb (coeffMagn, sfCurr, tempQuant, sfbWidth,
                                    &entropyCoder, sfbStart - grpStart,
                                    &maxQCurr, &numQCurr);

          if (maxQCurr > SCHAR_MAX)
          {
            maxSnrReached = true; numQCurr = 0;
          }
          else
          {
            currDist = getQuantDist (coeffMagn, sfBest, tempQuant, sfbWidth) * refQuantNorm;
          }
        }
        if (currDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
        m_quantInSf[sfb][is] = sfCurr; // store initial scale fac
      }
      else // is == 1, quant. & dist. computed with quantizeSfb()
      {
        numQCurr = refNumQ;
      }

      if (sfb == 0) // first SFB, having sfbStart - grpStart == 0
      {
        entropyCoder.arithSetCodState (codStart);  // group start
        entropyCoder.arithSetCtxState (ctxStart);
        tempBitCount = (maxSnrReached ? USHRT_MAX : numQCurr + getBitCount (entropyCoder, sfBest, UCHAR_MAX, 1, mag, 0, sfbWidth));

        for (ds = m_numCStates - 1; ds >= 0; ds--)
        {
          currRate[ds] = (uint16_t) tempBitCount;
        }
        tempCodState[is] = entropyCoder.arithGetCodState ();
        tempCtxState[is] = entropyCoder.arithGetCtxState ();
      }
      else // sfb > 0, rate depends on decisions in preceding SFB
      {
        for (ds = m_numCStates - 1; ds >= 0; ds--)
        {
          const uint16_t prevRate = m_quantRate[sfb - 1][ds * m_numCStates];

          entropyCoder.arithSetCodState (prevCodState[ds]);
          entropyCoder.arithSetCtxState (prevCtxState[ds], sfbStart - grpStart);
          tempBitCount = (maxSnrReached || (prevRate >= USHRT_MAX) ? USHRT_MAX : numQCurr + getBitCount (entropyCoder,
                           (numQCurr == 0 ? prevScaleFac[ds] : sfBest), prevScaleFac[ds], 1, mag, sfbStart - grpStart, sfbWidth));
          currRate[ds] = (uint16_t) tempBitCount;

          if (ds == 1) // statistically best place to save states
          {
            tempCodState[is] = entropyCoder.arithGetCodState ();
            tempCtxState[is] = entropyCoder.arithGetCtxState ();
          }
        }
      }
      tempScaleFac[is] = sfBest; // optimized factor for next SFB
    } // for is

    memcpy (prevCodState, tempCodState, m_numCStates * sizeof (uint32_t));
    memcpy (prevCtxState, tempCtxState, m_numCStates * sizeof (uint32_t));
    memcpy (prevScaleFac, tempScaleFac, m_numCStates * sizeof (uint8_t ));
  } // for sfb

  entropyCoder.arithSetCodState (codFinal); // back to last state
  entropyCoder.arithSetCtxState (ctxFinal, grpOffsets[numSfb] - grpStart);

#if EC_TRAIN
  tempBitCount = targetBitCount + 1; // Viterbi search for minimum distortion at target rate
  for (double lambda = 0.015625; (lambda <= 0.375) && (tempBitCount > targetBitCount); lambda += 0.0078125)
#endif
  {
    double* const  prevCost = prevVtrbCost;
    uint8_t* const prevPath = m_coeffTemp; // trellis backtracker
    double   costMinIs = (double) UINT_MAX;
    unsigned pathMinIs = 1;
#if EC_TRAIN
    tempGrpDist = 0.0;
#endif

    for (is = 0; is < m_numCStates; is++) // initial minimum path
    {
      const uint16_t currRate = m_quantRate[0][is * m_numCStates];

      prevCost[is] = (currRate >= USHRT_MAX ? (double) UINT_MAX : lambda * currRate + __max (0.0, m_quantDist[0][is]));
      prevPath[is] = 0;
    }

    for (sfb = 1; sfb < numSfb; sfb++) // search for minimum path
    {
      double* const  currCost = tempVtrbCost;
      uint8_t* const currPath = &prevPath[sfb * m_numCStates];

      for (is = 0; is < m_numCStates; is++) // SFB's minimum path
      {
        uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
        double   costMinDs = (double) UINT_MAX;
        uint8_t  pathMinDs = 1;

        for (ds = m_numCStates - 1; ds >= 0; ds--) // transitions
        {
          const double costCurr = (currRate[ds] >= USHRT_MAX ? (double) UINT_MAX : prevCost[ds] + lambda * currRate[ds]);

          if (costMinDs > costCurr)
          {
            costMinDs = costCurr;
            pathMinDs = (uint8_t) ds;
          }
        }
        if (costMinDs < UINT_MAX) costMinDs += __max (0.0, m_quantDist[sfb][is]);

        currCost[is] = costMinDs;
        currPath[is] = pathMinDs;
      } // for is

      memcpy (prevCost, currCost, m_numCStates * sizeof (double)); // TODO: avoid memcpy, use pointer swapping instead for speed
    } // for sfb

    for (sfb--, is = 0; is < m_numCStates; is++) // group minimum
    {
      if (costMinIs > prevCost[is])
      {
        costMinIs = prevCost[is];
        pathMinIs = is;
      }
    }

    for (tempBitCount = 0; sfb > 0; sfb--) // min-cost group rate
    {
      const uint8_t* currPath = &prevPath[sfb * m_numCStates];
      const uint8_t pathMinDs = currPath[pathMinIs];

      inScaleFac[sfb] = (m_quantDist[sfb][pathMinIs] < 0.0 ? UCHAR_MAX : m_quantInSf[sfb][pathMinIs]);
      tempBitCount   +=  m_quantRate[sfb][pathMinDs + pathMinIs * m_numCStates];
#if EC_TRAIN
      tempGrpDist += __max (0.0, m_quantDist[sfb][pathMinIs]);
#endif
      pathMinIs = pathMinDs;
    }
    inScaleFac[0] = (m_quantDist[0][pathMinIs] < 0.0 ? UCHAR_MAX : m_quantInSf[0][pathMinIs]);
    tempBitCount +=  m_quantRate[0][pathMinIs * m_numCStates];
#if EC_TRAIN
    tempGrpDist += __max (0.0, m_quantDist[0][pathMinIs]);
#endif
  } // Viterbi search

#if EC_TRAIN
  if ((tempGrpDist <= refGrpDist) || (tempBitCount <= targetBitCount))
#endif
  {
    uint8_t sfIndexPred = UCHAR_MAX;

    if (grpStats)
    {
      entropyCoder.arithSetCodState (codStart);// set group start
      entropyCoder.arithSetCtxState (ctxStart);

      tempBitCount = 0;
    }
    for (sfb = 0; sfb < numSfb; sfb++) // re-quantize spectrum with R/D optimized parameters
    {
      const uint16_t sfbStart = grpOffsets[sfb];
      const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;

      if ((inScaleFac[sfb] == UCHAR_MAX) || (sfIndexPred <= m_maxSfIndex && inScaleFac[sfb] + INDEX_OFFSET < sfIndexPred))
      {
        memset (&quantCoeffs[sfbStart], 0, sfbWidth * sizeof (uint8_t));  // zero SFB output

        optimalSf[sfb] = sfIndexPred - (inScaleFac[sfb] == UCHAR_MAX ? 0 : INDEX_OFFSET);
      }
      else if (inScaleFac[sfb] != m_quantInSf[sfb][1]) // speedup
      {
        short maxQBest = 0, numQBest = 0;

        optimalSf[sfb] = quantizeMagnSfb (&m_coeffMagn[sfbStart], inScaleFac[sfb], &quantCoeffs[sfbStart], sfbWidth,
                                          &entropyCoder, sfbStart - grpStart,
                                          &maxQBest, &numQBest);

        if (maxQBest == 0) optimalSf[sfb] = sfIndexPred; // empty
        if (grpStats)
        {
          grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magn. and sign bits
        }
      }

      if (grpStats) // complete statistics with per-SFB bit count
      {
        grpStats[sfb] += getBitCount (entropyCoder, optimalSf[sfb], sfIndexPred, 1, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
        tempBitCount  += grpStats[sfb] & USHRT_MAX;
      }

      if ((sfb > 0) && (optimalSf[sfb] < UCHAR_MAX) && (sfIndexPred == UCHAR_MAX))
      {
        memset (optimalSf, optimalSf[sfb], sfb * sizeof (uint8_t)); // back-propagate factor
      }
      sfIndexPred = optimalSf[sfb];
    } // for sfb

    return tempBitCount + (grpStats ? 2 : 0); // last coding bits
  }

  return targetBitCount;
}
#endif // EC_TRELLIS_OPT_CODING