transient tuning pt. 2

2025-03-12 01:00:11 +01:00 · 2020-04-11 01:00:03 +02:00 · 2020-04-11 01:00:03 +02:00 · 9f82a8a5bf
commit 9f82a8a5bf
parent 83f3dc2f88
3 changed files with 54 additions and 40 deletions
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@ -12,6 +12,11 @@
 #include "bitAllocation.h"

 // static helper functions
+static inline uint32_t intSqrt (const uint32_t val)
+{
+  return uint32_t (0.5 + sqrt ((double) val));
+}
+
 static inline uint32_t jndModel (const uint32_t val, const uint32_t mean,
                                 const unsigned expTimes512, const unsigned mulTimes512)
 {
@ -102,7 +107,7 @@ uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* co
  u = 0;
  for (sf = 0; sf < sfbWidth; sf++)
  {
-    u += uint32_t (0.5 + sqrt (abs ((double) sfbSignal[sf])));
+    u += intSqrt (abs (sfbSignal[sf]));
  }
  u = uint32_t ((u * 16384ui64 + (sfbWidth >> 1)) / sfbWidth);
  u = uint32_t (0.5 + sqrt ((double) u) * 128.0);
@ -197,10 +202,10 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
        for (/*b*/; b > 0; b--)
        {
          gStepSizes[b] = __max (gRms[b], BA_EPS);
-          sumStepSizes += unsigned (0.5 + sqrt ((double) gStepSizes[b]));
+          sumStepSizes += intSqrt (gStepSizes[b]);
        }
        gStepSizes[0]   = __max (gRms[0], BA_EPS);
-        sumStepSizes   += unsigned (0.5 + sqrt ((double) gStepSizes[0]));
+        sumStepSizes   += intSqrt (gStepSizes[0]);
      } // for gr

      if (ch != lfeChannelIndex)
@ -218,28 +223,25 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA

            if (curGrpStep > maxGrpStep) maxGrpStep = curGrpStep;
          }
-          for (gr = 0; gr + 1 < grpData.numWindowGroups; gr++)
+          for (gr = 0; gr < grpData.numWindowGroups; gr++)
          {
-            const uint32_t newGrpStep = __max (stepSizeM1, stepSizes[b + numSwbShort * (gr + 1)]);
+            const uint32_t newGrpStep = __max (stepSizeM1, (gr + 1 == grpData.numWindowGroups ? BA_EPS : stepSizes[b + numSwbShort * (gr + 1)]));

            stepSizeM1 = stepSizes[b + numSwbShort * gr];

            if ((stepSizeM1 == maxGrpStep) && (maxGrpStep > newGrpStep))
            {
-              sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep));
-              stepSizes[b + numSwbShort * gr] = newGrpStep;
-              sumStepSizes += unsigned (0.5 + sqrt ((double) newGrpStep));
+              const uint32_t sqrtOldStep = intSqrt (maxGrpStep);
+              const uint32_t sqrtNewStep = intSqrt (newGrpStep);
+              uint32_t& gStepSize = stepSizes[b + numSwbShort * gr];
+
+              sumStepSizes += (gStepSize = (sqrtOldStep + sqrtNewStep) >> 1) - sqrtOldStep;
+              gStepSize *= gStepSize; // for square-mean-root
            }
          }
-          if ((stepSizes[b + numSwbShort * gr] == maxGrpStep) && (maxGrpStep > stepSizeM1))
-          {
-            sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep));
-            stepSizes[b + numSwbShort * gr] = stepSizeM1;
-            sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizeM1));
-          }
        } // for b

-        m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh));
+        m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh);
        sumMeans += m_avgStepSize[ch];
        m_avgStepSize[ch] *= m_avgStepSize[ch];

@ -288,29 +290,30 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
        stepSizes[b] = __max (rms[b], maskingSlope + BA_EPS);
      }
    }
+    stepSizes[b] = 0;
    for (b -= 1; b > __min (MF, maxSfbInCh); b--) // complete simultaneous masking by reversing the pattern
    {
-      sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
+      sumStepSizes += intSqrt (stepSizes[b]);
      maskingSlope     = ((uint64_t) stepSizes[b] * (8u + b - MF) + (msOffset << 3u)) >> (msShift + 3u);
      stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
    }
    for (/*b*/; b > __min (LF, maxSfbInCh); b--)  // typical reversed mid-freq. simultaneous masking slopes
    {
-      sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
+      sumStepSizes += intSqrt (stepSizes[b]);
      maskingSlope     = (stepSizes[b] + msOffset) >> msShift;
      stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
    }
    for (/*b = min (9, maxSfbInCh)*/; b > 0; b--) // steeper reversed low-freq. simultaneous masking slopes
    {
-      sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
+      sumStepSizes += intSqrt (stepSizes[b]);
      maskingSlope     = (stepSizes[b] + (msOffset << (10u - b))) >> (msShift + 10u - b);
      stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
    }
-    sumStepSizes   += unsigned (0.5 + sqrt ((double) stepSizes[0]));
+    sumStepSizes   += intSqrt (stepSizes[0]);

 // --- LONG window: apply perceptual JND model and local band-peak smoothing, undo equal-loudness weighting
    nMeans++;
-    m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh));
+    m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh);
    sumMeans += m_avgStepSize[ch];
    m_avgStepSize[ch] *= m_avgStepSize[ch];

--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -573,13 +573,13 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const int32_t* const mdctSignal, con
    if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases
    {
      const uint16_t filtOrder = tnsData.filterOrder[0];
-      uint16_t b = __min (m_specAnaCurr[channelIndex] & 31, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
+      uint16_t b = __min ((m_specAnaCurr[channelIndex] & 31) + 2, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
      short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
      int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset

      m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor, filtOrder, filterC);

-      for (b = (b > 0 ? b - 1 : 0), predSig--; b > 0; b--) // b is in spectr. analysis units
+      for (b--, predSig--; b > 0; b--) // start a bit higher; b is in spectr. analysis units
      {
        uint64_t sumAbsOrg = 0, sumAbsTns = 0;

@ -664,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  const uint32_t maxSfbLong      = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
  const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3);  // undercoding reduction
-  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0);
-  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1));
+  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 3 ? nChannels >> 1 : 0);
+  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
  uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
 //uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS];
@ -717,7 +717,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
        const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
        const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]);
        const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64));
-        const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5);
+        const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts ? 2 : 5);
        const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0);
        const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
        const TnsData&    tnsData0 = coreConfig.tnsData[0];
@ -785,7 +785,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s

      if ((errorValue == 0) && (coreConfig.stereoMode == 2))  // frame M/S, synch statistics
      {
-        const uint8_t   numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong));
+        const uint8_t   numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong + 1));
        const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;

        errorValue = m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1],
@ -810,6 +810,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
      {
        SfbGroupData&  grpData = coreConfig.groupingData[ch];
        const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
+        const uint8_t maxSfbCh = grpData.sfbsPerGroup;
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
        const uint8_t  mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
@ -827,7 +828,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s

          // undercoding reduction for case where large number of coefs is quantized to zero
          s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
-          for (b = 0; b < grpData.sfbsPerGroup; b++)
+          for (b = 0; b < maxSfbCh; b++)
          {
 #if SA_IMPROVED_REAL_ABS
            const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
@ -860,7 +861,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          }
          s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5));

-          for (b = 0; b < grpData.sfbsPerGroup; b++)
+          for (b = 0; b < maxSfbCh; b++)
          {
            const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // LF SNR boost, cf my M.Sc. thesis
            const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
@ -876,13 +877,16 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
        } // for gr

 #if !RESTRICT_TO_AAC
-        if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill
+        if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts))
        {
-          const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong);  // rate based bandwidth
+          const uint8_t numSwbFrame = __min (numSwbCh, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1));

-          if (grpData.sfbsPerGroup < numSwbFrame)
+          if (maxSfbCh < numSwbFrame) // increase coding bandwidth
          {
-            memset (&grpData.scaleFactors[grpData.sfbsPerGroup], 0, (numSwbFrame - grpData.sfbsPerGroup) * sizeof (uint8_t));
+            for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
+            {
+              memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
+            }
            grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame;
          }
          if (ch > 0) coreConfig.commonMaxSfb = (coreConfig.icsInfoCurr[0].maxSfb == coreConfig.icsInfoCurr[1].maxSfb);
@ -1213,6 +1217,7 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
      if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis
      {
        const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
+        const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
        const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
        const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
        const int16_t  steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1],
@ -1222,14 +1227,15 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS

        if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence
        {
-          m_perCorrCurr[el] = (uint8_t) s;
+          m_perCorrCurr[el] = uint8_t((32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6);
        }
        else // gentle overlap length dependent temporal smoothing
        {
          const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
          const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el]));
+          const int16_t currPerCorr = (32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6;

-          m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s));
+          m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, currPerCorr));
        }

        if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
@ -1489,7 +1495,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
        tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX;
        tsNext[ch] = (m_tempAnaNext[ci] >>  8) & UCHAR_MAX;

-        const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext < 68 && tfNext >= 204) || (tsCurr[ch] >= 153) || (tsNext[ch] >= 153);
+        const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) ||
+                                 (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8);
        const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) &&
                                 (tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20);
        // set window_sequence
@ -1499,11 +1506,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
        }
        else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
        {
-          wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT :
+          wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > (UCHAR_MAX * 15) / 16) ? EIGHT_SHORT :
 #if RESTRICT_TO_AAC
-                   (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != EIGHT_SHORT) ? EIGHT_SHORT : LONG_STOP);
+                   (lowOlapNext ? EIGHT_SHORT : LONG_STOP);
 #else
-                   (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != STOP_START) ? STOP_START : LONG_STOP);
+                   (lowOlapNext ? STOP_START : LONG_STOP);
 #endif
        }

--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@ -30,7 +30,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
  const bool alterPredDir = (applyPredSte && (useAltPredDir > 0)); // predict mid from side?
  const SfbGroupData& grp = groupingData1;
  const bool  eightShorts = (grp.numWindowGroups > 1);
-  const uint8_t maxSfbSte = (eightShorts ? __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) : numSwbFrame);
+  const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
  uint32_t  numSfbPredSte = 0; // counter

  if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) ||
@ -282,7 +282,11 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in

          sfbTempVar = (applyPredSte ? __max (rmsSfbM[b], rmsSfbS[b]) : __max (grpRms1[idx], grpRms2[idx]));

-          if (sfbFacLR <= 1.0) // total simultaneous masking - no positive SNR in either SFB
+          if ((grpStepSizes1[idx] == 0) || (grpStepSizes2[idx] == 0)) // HF noise filled SFB
+          {
+            grpStepSizes1[idx] = grpStepSizes2[idx] = 0;
+          }
+          else if (sfbFacLR <= 1.0)  // simultaneous masking - no positive SNR in either SFB
          {
            const double max = __max (sfbRmsL, sfbRmsR);