low-rate fixes

2025-06-05 21:59:32 +02:00 · 2020-03-10 00:00:29 +01:00
parent 07a448a1f6
commit 0cbfa8c995
5 changed files with 47 additions and 25 deletions
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -688,12 +688,19 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s

 unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and entropy coding
 {
+  const unsigned nChannels        = toNumChannels (m_channelConf);
  const unsigned nSamplesInFrame  = toFrameLength (m_frameLength);
  const unsigned samplingRate     = toSamplingRate (m_frequencyIdx);
  const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
+  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
+//uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS];
  unsigned ci = 0, s; // running index
  unsigned errorValue = (coeffMagn == nullptr ? 1 : 0);

+  // get means of spectral and temporal flatness for every channel
+  m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);
+//m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
+
  for (unsigned el = 0; el < m_numElements; el++)  // element loop
  {
    CoreCoderData& coreConfig = *m_elementData[el];
@@ -907,7 +914,8 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
 #if !RESTRICT_TO_AAC
      // obtain channel-wise noise_level and noise_offset for USAC
      coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci],
-                                                                                                     m_numSwbShort, grpData, nSamplesInFrame));
+                                                                                                     m_numSwbShort, grpData, nSamplesInFrame,
+                                                                                                     shortWinCurr ? 0 : meanSpecFlat[ci]));
      // NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed
      if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
 #endif
@@ -1600,7 +1608,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
  // initialize coder class memory
  m_tempIntBuf = m_timeSignals[0];
 #if EC_TRELLIS_OPT_CODING
-  if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 ||
+  if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
 #else
  if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
 #endif
--- a/src/lib/quantization.cpp
+++ b/src/lib/quantization.cpp
@@ -216,15 +216,15 @@ uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8
  const double stepSizeDiv = m_lutSfNorm[optimalSf];
  const uint16_t numStates = 4; // 4 reduction types: [0, 0], [0, -1], [-1, 0], and [-1, -1]
  const uint16_t numTuples = numCoeffs >> 1;
-  uint8_t* const quantRate = &m_coeffTemp[768];
+  uint8_t* const quantRate = &m_coeffTemp[((unsigned) m_maxSize8M1 + 1) << 3];
  uint32_t prevCodState[4] = {0, 0, 0, 0};
  uint32_t prevCtxState[4] = {0, 0, 0, 0};
  double   prevVtrbCost[4] = {0, 0, 0, 0};
  uint32_t tempCodState[4] = {0, 0, 0, 0};
  uint32_t tempCtxState[4] = {0, 0, 0, 0};
  double   tempVtrbCost[4] = {0, 0, 0, 0};
-  double   quantDist[16][4];   // TODO: dynamic memory allocation
-  uint8_t* const optimalIs = (uint8_t* const) (quantDist[16-1]);
+  double   quantDist[32][4];   // TODO: dynamic memory allocation
+  uint8_t* const optimalIs = (uint8_t* const) (quantDist[32-1]);
  uint8_t  tempQuant[4], numQ; // for tuple/SFB sign bit counting
  unsigned tempBitCount, tuple, is;
  int ds;
@@ -234,7 +234,7 @@ uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8
  const double lambda = getLagrangeValue (m_rateIndex);
 #endif

-  if ((coeffMagn == nullptr) || (quantCoeffs == nullptr) || (optimalSf > m_maxSfIndex) || (numTuples == 0) || (numTuples > 16) ||
+  if ((coeffMagn == nullptr) || (quantCoeffs == nullptr) || (optimalSf > m_maxSfIndex) || (numTuples == 0) || (numTuples > 32) ||
      (targetBitCount == 0)  || (targetBitCount > SHRT_MAX))
  {
    return 0; // invalid input error
@@ -457,6 +457,9 @@ SfbQuantizer::SfbQuantizer ()
 {
  // initialize all helper buffers
  m_coeffMagn = nullptr;
+#if EC_TRELLIS_OPT_CODING
+  m_coeffTemp = nullptr;
+#endif
  m_lut2ExpX4 = nullptr;
  m_lutSfNorm = nullptr;
  m_lutXExp43 = nullptr;
@@ -479,6 +482,9 @@ SfbQuantizer::~SfbQuantizer ()
 {
  // free allocated helper buffers
  MFREE (m_coeffMagn);
+#if EC_TRELLIS_OPT_CODING
+  MFREE (m_coeffTemp);
+#endif
  MFREE (m_lut2ExpX4);
  MFREE (m_lutSfNorm);
  MFREE (m_lutXExp43);
@@ -496,18 +502,19 @@ SfbQuantizer::~SfbQuantizer ()
 // public functions
 unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
 #if EC_TRELLIS_OPT_CODING
-                                        const uint8_t numSwb, const uint8_t bitRateMode,
+                                        const uint8_t numSwb, const uint8_t bitRateMode, const unsigned samplingRate,
 #endif
                                        const uint8_t maxScaleFacIndex /*= SCHAR_MAX*/)
 {
  const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
 #if EC_TRELLIS_OPT_CODING
-  const uint8_t numTrellisStates = 5 - __min (2, (bitRateMode + 2) >> 2);  // states per SFB
+  const uint8_t numTrellisStates = (samplingRate < 44100 ? 8 - samplingRate / 32000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
  const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
+  const uint16_t quantRateLength = (samplingRate < 28800 ? 512 : 256); // quantizeMagnRDOC()
 #endif
  unsigned x;

-  if ((maxTransfLength < 128) || (maxTransfLength > 8192) || (maxTransfLength & 7) || (maxScaleFacIndex == 0) || (maxScaleFacIndex > SCHAR_MAX))
+  if ((maxTransfLength < 128) || (maxTransfLength > 2048) || (maxTransfLength & 7) || (maxScaleFacIndex == 0) || (maxScaleFacIndex > SCHAR_MAX))
  {
    return 1; // invalid arguments error
  }
@@ -515,6 +522,9 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
  m_maxSfIndex = maxScaleFacIndex;

  if ((m_coeffMagn = (unsigned*) malloc (maxTransfLength * sizeof (unsigned))) == nullptr ||
+#if EC_TRELLIS_OPT_CODING
+      (m_coeffTemp = (uint8_t* ) malloc (maxTransfLength + quantRateLength  )) == nullptr ||
+#endif
      (m_lut2ExpX4 = (double*  ) malloc (numScaleFactors * sizeof (double  ))) == nullptr ||
      (m_lutSfNorm = (double*  ) malloc (numScaleFactors * sizeof (double  ))) == nullptr ||
      (m_lutXExp43 = (double*  ) malloc ((SCHAR_MAX + 1) * sizeof (double  ))) == nullptr)
@@ -522,6 +532,7 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
    return 2; // memory allocation error
  }
 #if EC_TRELLIS_OPT_CODING
+  m_maxSize8M1 = (maxTransfLength >> 3) - 1;
  m_numCStates = numTrellisStates;
  m_rateIndex  = bitRateMode;

@@ -534,9 +545,9 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
      return 2;
    }
  }
-#endif
+#else
  memset (m_coeffTemp, 0, sizeof (m_coeffTemp));
-
+#endif
  // calculate scale factor gain 2^(x/4)
  for (x = 0; x < numScaleFactors; x++)
  {
@@ -762,7 +773,7 @@ unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* co
  const uint32_t codFinal = entropyCoder.arithGetCodState ();
  const uint32_t ctxFinal = entropyCoder.arithGetCtxState (); // after call to quantizeSfb()
  const uint16_t grpStart = grpOffsets[0];
-  uint8_t* const inScaleFac = &m_coeffTemp[716];
+  uint8_t* const inScaleFac = &m_coeffTemp[((unsigned) m_maxSize8M1 - 6) << 3];
  uint32_t  prevCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint32_t  prevCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  uint8_t   prevScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
@@ -797,7 +808,6 @@ unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* co
    uint8_t* const  tempQuant = &m_coeffTemp[sfbStart - grpStart];
    bool maxSnrReached = false;

-    if (refQuantDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
 #if EC_TRAIN
    else refGrpDist += refQuantDist;
 #endif
@@ -808,22 +818,22 @@ unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* co

    for (is = 0; is < m_numCStates; is++) // populate SFB trellis
    {
-      uint8_t* const mag = (is != 1 ? m_coeffTemp /*= tempQuant[grpStart - sfbStart]*/ : &quantCoeffs[grpStart]);
-      double*   currDist = &m_quantDist[sfb][is];
+      const uint8_t* mag = (is != 1 ? m_coeffTemp /*= tempQuant[grpStart - sfbStart]*/ : &quantCoeffs[grpStart]);
+      double&   currDist = m_quantDist[sfb][is];
      uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
      uint8_t     sfBest = optimalSf[sfb]; // optimal scalefactor
      short maxQCurr = 0, numQCurr = 0; // for sign bits counting

      if (refQuantDist < 0.0) // -1.0 means SFB is zero-quantized
      {
-        *currDist = -1.0;
+        currDist = -1.0;
        m_quantInSf[sfb][is] = refSf;
      }
      else if (is != 1) // quantization & distortion not computed
      {
        const uint8_t sfCurr = __max (0, __min (m_maxSfIndex, refSf + 1 - (int) is));

-        *currDist = -1.0;
+        currDist = -1.0;
        if ((sfCurr == 0) || maxSnrReached)
        {
          maxSnrReached = true;
@@ -840,10 +850,10 @@ unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* co
          }
          else
          {
-            *currDist = getQuantDist (coeffMagn, sfBest, tempQuant, sfbWidth) * refQuantNorm;
+            currDist = getQuantDist (coeffMagn, sfBest, tempQuant, sfbWidth) * refQuantNorm;
          }
        }
-        if (*currDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
+     // if (currDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
        m_quantInSf[sfb][is] = sfCurr; // store initial scale fac
      }
      else // is == 1, quant. & dist. computed with quantizeSfb()
@@ -851,6 +861,8 @@ unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* co
        numQCurr = refNumQ;
      }

+      if ((currDist < 0.0) || (numQCurr == 0)) mag = nullptr; // to accelerate getBitCount()
+
      if (sfb == 0) // first SFB, having sfbStart - grpStart == 0
      {
        entropyCoder.arithSetCodState (codStart);  // group start
--- a/src/lib/quantization.h
+++ b/src/lib/quantization.h
@@ -35,7 +35,7 @@ private:
  // member variables
  unsigned* m_coeffMagn; // temp memory
 #if EC_TRELLIS_OPT_CODING
-  uint8_t   m_coeffTemp[1024]; // TODO?
+  uint8_t*  m_coeffTemp; // temp result
 #else
  uint8_t   m_coeffTemp[200]; // 40 * 5 - NOTE: increase this when maximum grpLength > 5
 #endif
@@ -44,8 +44,9 @@ private:
  double*   m_lutXExp43; // for X^(4/3)
  uint8_t   m_maxSfIndex; // 1,..., 127
 #if EC_TRELLIS_OPT_CODING
+  uint8_t   m_maxSize8M1; // (size/8)-1
  uint8_t   m_numCStates; // states/SFB
-  uint16_t  m_rateIndex; // lambda mode
+  uint8_t   m_rateIndex; // lambda mode
  // trellis memory, max. 8 KB @ num_swb=51
  double*   m_quantDist[52]; // quantizing distortion
  uint8_t*  m_quantInSf[52]; // initial scale factors
@@ -78,7 +79,7 @@ public:
  uint8_t getScaleFacOffset (const double absValue) const { return uint8_t (SF_QUANT_OFFSET + FOUR_LOG102 * log10 (__max (1.0, absValue))); }
  unsigned  initQuantMemory (const unsigned maxTransfLength,
 #if EC_TRELLIS_OPT_CODING
-                             const uint8_t numSwb, const uint8_t bitRateMode,
+                             const uint8_t numSwb, const uint8_t bitRateMode, const unsigned samplingRate,
 #endif
                             const uint8_t maxScaleFacIndex = SCHAR_MAX);
  uint8_t   quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t* const inputCoeffs, const uint8_t grpLength,
--- a/src/lib/specGapFilling.cpp
+++ b/src/lib/specGapFilling.cpp
@@ -24,7 +24,7 @@ SpecGapFiller::SpecGapFiller ()
 // public functions
 uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, const uint8_t* const quantMagn,
                                             const uint8_t numSwbShort, SfbGroupData& grpData /*modified*/,
-                                             const unsigned nSamplesInFrame /*= 1024*/)
+                                             const unsigned nSamplesInFrame /*= 1024*/, const uint8_t specFlat /*= 0*/)
 {
  const unsigned* const coeffMagn = sfbQuantizer.getCoeffMagnPtr ();
  const double* const  sfNormFacs = sfbQuantizer.getSfNormTabPtr ();
@@ -166,6 +166,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
  s = 0;
 #endif
  u = __min (7, uint16_t (14.47118288 + 9.965784285 * log10 (magnSum / (double) u)));
+  u = __max (1, u - int (specFlat >> 5)); // SFM-adaptive reduction

  magnSum = pow (2.0, (14 - u) / 3.0); // noiseVal^-1, 23003-3, 7.2

--- a/src/lib/specGapFilling.h
+++ b/src/lib/specGapFilling.h
@@ -1,5 +1,5 @@
 /* specGapFilling.h - header file for class with spectral gap filling coding methods
- * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under a Modified BSD-Style License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@@ -38,7 +38,7 @@ public:
  uint16_t  getFirstGapFillSfb () const { return m_1stGapFillSfb; }
  uint8_t   getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, const uint8_t* const quantMagn,
                                  const uint8_t numSwbShort, SfbGroupData& grpData /*modified*/,
-                                  const unsigned nSamplesInFrame = 1024);
+                                  const unsigned nSamplesInFrame = 1024, const uint8_t specFlat = 0);
 }; // SpecGapFiller

 #endif // _SPEC_GAP_FILLING_H_