From 9f82a8a5bf4acc34cf82c43bf53b859621003954 Mon Sep 17 00:00:00 2001
From: "Christian R. Helmrich" <c.helmrich@ecodis.de>
Date: Sat, 11 Apr 2020 01:00:03 +0200
Subject: [PATCH] transient tuning pt. 2

---
 src/lib/bitAllocation.cpp    | 43 +++++++++++++++++++-----------------
 src/lib/exhaleEnc.cpp        | 43 +++++++++++++++++++++---------------
 src/lib/stereoProcessing.cpp |  8 +++++--
 3 files changed, 54 insertions(+), 40 deletions(-)

diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp
index d21bd14..caa880b 100644
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@@ -12,6 +12,11 @@
 #include "bitAllocation.h"
 
 // static helper functions
+static inline uint32_t intSqrt (const uint32_t val)
+{
+  return uint32_t (0.5 + sqrt ((double) val));
+}
+
 static inline uint32_t jndModel (const uint32_t val, const uint32_t mean,
                                  const unsigned expTimes512, const unsigned mulTimes512)
 {
@@ -102,7 +107,7 @@ uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* co
   u = 0;
   for (sf = 0; sf < sfbWidth; sf++)
   {
-    u += uint32_t (0.5 + sqrt (abs ((double) sfbSignal[sf])));
+    u += intSqrt (abs (sfbSignal[sf]));
   }
   u = uint32_t ((u * 16384ui64 + (sfbWidth >> 1)) / sfbWidth);
   u = uint32_t (0.5 + sqrt ((double) u) * 128.0);
@@ -197,10 +202,10 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
         for (/*b*/; b > 0; b--)
         {
           gStepSizes[b] = __max (gRms[b], BA_EPS);
-          sumStepSizes += unsigned (0.5 + sqrt ((double) gStepSizes[b]));
+          sumStepSizes += intSqrt (gStepSizes[b]);
         }
         gStepSizes[0]   = __max (gRms[0], BA_EPS);
-        sumStepSizes   += unsigned (0.5 + sqrt ((double) gStepSizes[0]));
+        sumStepSizes   += intSqrt (gStepSizes[0]);
       } // for gr
 
       if (ch != lfeChannelIndex)
@@ -218,28 +223,25 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
 
             if (curGrpStep > maxGrpStep) maxGrpStep = curGrpStep;
           }
-          for (gr = 0; gr + 1 < grpData.numWindowGroups; gr++)
+          for (gr = 0; gr < grpData.numWindowGroups; gr++)
           {
-            const uint32_t newGrpStep = __max (stepSizeM1, stepSizes[b + numSwbShort * (gr + 1)]);
+            const uint32_t newGrpStep = __max (stepSizeM1, (gr + 1 == grpData.numWindowGroups ? BA_EPS : stepSizes[b + numSwbShort * (gr + 1)]));
 
             stepSizeM1 = stepSizes[b + numSwbShort * gr];
 
             if ((stepSizeM1 == maxGrpStep) && (maxGrpStep > newGrpStep))
             {
-              sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep));
-              stepSizes[b + numSwbShort * gr] = newGrpStep;
-              sumStepSizes += unsigned (0.5 + sqrt ((double) newGrpStep));
+              const uint32_t sqrtOldStep = intSqrt (maxGrpStep);
+              const uint32_t sqrtNewStep = intSqrt (newGrpStep);
+              uint32_t& gStepSize = stepSizes[b + numSwbShort * gr];
+
+              sumStepSizes += (gStepSize = (sqrtOldStep + sqrtNewStep) >> 1) - sqrtOldStep;
+              gStepSize *= gStepSize; // for square-mean-root
             }
           }
-          if ((stepSizes[b + numSwbShort * gr] == maxGrpStep) && (maxGrpStep > stepSizeM1))
-          {
-            sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep));
-            stepSizes[b + numSwbShort * gr] = stepSizeM1;
-            sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizeM1));
-          }
         } // for b
 
-        m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh));
+        m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh);
         sumMeans += m_avgStepSize[ch];
         m_avgStepSize[ch] *= m_avgStepSize[ch];
 
@@ -288,29 +290,30 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
         stepSizes[b] = __max (rms[b], maskingSlope + BA_EPS);
       }
     }
+    stepSizes[b] = 0;
     for (b -= 1; b > __min (MF, maxSfbInCh); b--) // complete simultaneous masking by reversing the pattern
     {
-      sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
+      sumStepSizes += intSqrt (stepSizes[b]);
       maskingSlope     = ((uint64_t) stepSizes[b] * (8u + b - MF) + (msOffset << 3u)) >> (msShift + 3u);
       stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
     }
     for (/*b*/; b > __min (LF, maxSfbInCh); b--)  // typical reversed mid-freq. simultaneous masking slopes
     {
-      sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
+      sumStepSizes += intSqrt (stepSizes[b]);
       maskingSlope     = (stepSizes[b] + msOffset) >> msShift;
       stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
     }
     for (/*b = min (9, maxSfbInCh)*/; b > 0; b--) // steeper reversed low-freq. simultaneous masking slopes
     {
-      sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
+      sumStepSizes += intSqrt (stepSizes[b]);
       maskingSlope     = (stepSizes[b] + (msOffset << (10u - b))) >> (msShift + 10u - b);
       stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
     }
-    sumStepSizes   += unsigned (0.5 + sqrt ((double) stepSizes[0]));
+    sumStepSizes   += intSqrt (stepSizes[0]);
 
 // --- LONG window: apply perceptual JND model and local band-peak smoothing, undo equal-loudness weighting
     nMeans++;
-    m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh));
+    m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh);
     sumMeans += m_avgStepSize[ch];
     m_avgStepSize[ch] *= m_avgStepSize[ch];
 
diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp
index 0a1eaff..0439958 100644
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -573,13 +573,13 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const int32_t* const mdctSignal, con
     if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases
     {
       const uint16_t filtOrder = tnsData.filterOrder[0];
-      uint16_t b = __min (m_specAnaCurr[channelIndex] & 31, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
+      uint16_t b = __min ((m_specAnaCurr[channelIndex] & 31) + 2, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
       short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
       int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset
 
       m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor, filtOrder, filterC);
 
-      for (b = (b > 0 ? b - 1 : 0), predSig--; b > 0; b--) // b is in spectr. analysis units
+      for (b--, predSig--; b > 0; b--) // start a bit higher; b is in spectr. analysis units
       {
         uint64_t sumAbsOrg = 0, sumAbsTns = 0;
 
@@ -664,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
   const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
   const uint32_t maxSfbLong      = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
   const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3);  // undercoding reduction
-  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0);
-  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1));
+  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 3 ? nChannels >> 1 : 0);
+  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
   uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
   uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
 //uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS];
@@ -717,7 +717,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
         const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
         const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]);
         const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64));
-        const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5);
+        const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts ? 2 : 5);
         const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0);
         const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
         const TnsData&    tnsData0 = coreConfig.tnsData[0];
@@ -785,7 +785,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 
       if ((errorValue == 0) && (coreConfig.stereoMode == 2))  // frame M/S, synch statistics
       {
-        const uint8_t   numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong));
+        const uint8_t   numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong + 1));
         const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;
 
         errorValue = m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1],
@@ -810,6 +810,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
       {
         SfbGroupData&  grpData = coreConfig.groupingData[ch];
         const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
+        const uint8_t maxSfbCh = grpData.sfbsPerGroup;
         const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
         const uint8_t  mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
         uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
@@ -827,7 +828,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 
           // undercoding reduction for case where large number of coefs is quantized to zero
           s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
-          for (b = 0; b < grpData.sfbsPerGroup; b++)
+          for (b = 0; b < maxSfbCh; b++)
           {
 #if SA_IMPROVED_REAL_ABS
             const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
@@ -860,7 +861,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
           }
           s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5));
 
-          for (b = 0; b < grpData.sfbsPerGroup; b++)
+          for (b = 0; b < maxSfbCh; b++)
           {
             const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // LF SNR boost, cf my M.Sc. thesis
             const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
@@ -876,13 +877,16 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
         } // for gr
 
 #if !RESTRICT_TO_AAC
-        if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill
+        if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts))
         {
-          const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong);  // rate based bandwidth
+          const uint8_t numSwbFrame = __min (numSwbCh, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1));
 
-          if (grpData.sfbsPerGroup < numSwbFrame)
+          if (maxSfbCh < numSwbFrame) // increase coding bandwidth
           {
-            memset (&grpData.scaleFactors[grpData.sfbsPerGroup], 0, (numSwbFrame - grpData.sfbsPerGroup) * sizeof (uint8_t));
+            for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
+            {
+              memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
+            }
             grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame;
           }
           if (ch > 0) coreConfig.commonMaxSfb = (coreConfig.icsInfoCurr[0].maxSfb == coreConfig.icsInfoCurr[1].maxSfb);
@@ -1213,6 +1217,7 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
       if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis
       {
         const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
+        const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
         const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
         const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
         const int16_t  steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1],
@@ -1222,14 +1227,15 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
 
         if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence
         {
-          m_perCorrCurr[el] = (uint8_t) s;
+          m_perCorrCurr[el] = uint8_t((32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6);
         }
         else // gentle overlap length dependent temporal smoothing
         {
           const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
           const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el]));
+          const int16_t currPerCorr = (32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6;
 
-          m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s));
+          m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, currPerCorr));
         }
 
         if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
@@ -1489,7 +1495,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
         tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX;
         tsNext[ch] = (m_tempAnaNext[ci] >>  8) & UCHAR_MAX;
 
-        const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext < 68 && tfNext >= 204) || (tsCurr[ch] >= 153) || (tsNext[ch] >= 153);
+        const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) ||
+                                 (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8);
         const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) &&
                                  (tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20);
         // set window_sequence
@@ -1499,11 +1506,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
         }
         else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
         {
-          wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT :
+          wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > (UCHAR_MAX * 15) / 16) ? EIGHT_SHORT :
 #if RESTRICT_TO_AAC
-                   (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != EIGHT_SHORT) ? EIGHT_SHORT : LONG_STOP);
+                   (lowOlapNext ? EIGHT_SHORT : LONG_STOP);
 #else
-                   (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != STOP_START) ? STOP_START : LONG_STOP);
+                   (lowOlapNext ? STOP_START : LONG_STOP);
 #endif
         }
 
diff --git a/src/lib/stereoProcessing.cpp b/src/lib/stereoProcessing.cpp
index 99673dc..82343dd 100644
--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@@ -30,7 +30,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
   const bool alterPredDir = (applyPredSte && (useAltPredDir > 0)); // predict mid from side?
   const SfbGroupData& grp = groupingData1;
   const bool  eightShorts = (grp.numWindowGroups > 1);
-  const uint8_t maxSfbSte = (eightShorts ? __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) : numSwbFrame);
+  const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
   uint32_t  numSfbPredSte = 0; // counter
 
   if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) ||
@@ -282,7 +282,11 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
 
           sfbTempVar = (applyPredSte ? __max (rmsSfbM[b], rmsSfbS[b]) : __max (grpRms1[idx], grpRms2[idx]));
 
-          if (sfbFacLR <= 1.0) // total simultaneous masking - no positive SNR in either SFB
+          if ((grpStepSizes1[idx] == 0) || (grpStepSizes2[idx] == 0)) // HF noise filled SFB
+          {
+            grpStepSizes1[idx] = grpStepSizes2[idx] = 0;
+          }
+          else if (sfbFacLR <= 1.0)  // simultaneous masking - no positive SNR in either SFB
           {
             const double max = __max (sfbRmsL, sfbRmsR);