tune MSE test code

2025-02-15 02:50:47 +01:00 · 2023-09-30 19:00:00 +02:00 · 2023-09-30 19:00:00 +02:00 · 0b683be9c7
commit 0b683be9c7
parent 1b231994ec
3 changed files with 59 additions and 18 deletions
--- a/src/app/exhaleApp.cpp
+++ b/src/app/exhaleApp.cpp
@ -57,10 +57,16 @@
 #define _GETCWD  getcwd
 #define _STRLEN  strlen

+#if 0  // change this to "#if 1" to avoid garbage text in some terminals
+#define EXHALE_TEXT_INIT  ""
+#define EXHALE_TEXT_BLUE  ""
+#define EXHALE_TEXT_PINK  ""
+#else
 #define EXHALE_TEXT_INIT  "\x1b[0m"
 #define EXHALE_TEXT_BLUE  "\x1b[36m"
 #define EXHALE_TEXT_PINK  "\x1b[35m"
 #endif
+#endif

 // constants, experimental macros
 #if LE_ACCURATE_CALC
@ -866,7 +872,7 @@ int main (const int argc, char* argv[])
 #ifdef NO_PREROLL_DATA
    outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size
 #else
-    outAuData = (uint8_t*) malloc ((9216 >> 3) * numChannels); // max frame AU size
+    outAuData = (uint8_t*) malloc ((9984 >> 3) * numChannels); // max frame AU size
 #endif
    if ((inPcmData == nullptr) || (outAuData == nullptr))
    {
--- a/src/lib/bitStreamWriter.cpp
+++ b/src/lib/bitStreamWriter.cpp
@ -1,5 +1,5 @@
 /* bitStreamWriter.cpp - source file for class with basic bit-stream writing capability
- * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2023 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -1135,7 +1135,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
    }
    else *rate = 0; // insufficient data
  }
-  memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1152 : 768), bitCount >> 3));
+  memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1248 : 768), bitCount >> 3));
 #endif
  return (bitCount >> 3);  // byte count
 }
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -288,6 +288,7 @@ static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const
  return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2);
 }

+#if !EE_MORE_MSE
 static inline void findActualBandwidthShort (uint8_t* const maxSfbShort, const uint16_t* sfbOffsets,
                                             const int32_t* mdctSignals, const int32_t* mdstSignals, const unsigned nSamplesInShort)
 {
@ -322,6 +323,7 @@ static inline void findActualBandwidthShort (uint8_t* const maxSfbShort, const u

  if (*maxSfbShort > maxSfb) *maxSfbShort = maxSfb;
 }
+#endif

 static inline uint8_t stereoCorrGrouping (const SfbGroupData& grpData, const unsigned nSamplesInFrame, uint8_t* stereoCorrData)
 {
@ -932,7 +934,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 #if !RESTRICT_TO_AAC
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
 #endif
+#if !EE_MORE_MSE
        const uint16_t rateFac = m_bitAllocator.getRateCtrlFac (m_rateFactor, samplingRate, meanSpecFlat[ci], coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
+#endif
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];

        memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -956,21 +960,30 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
            {
              s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[1 - ci][b])));
            }
-            s = (s + 1) >> 1;
+            s = (s + 1u) >> 1;
          }
          if (grpOff[maxSfbCh] > grpOff[0])
          {
-            s = unsigned ((s * (eightShorts ? (24u + (grpData.windowGroupLength[gr] >> 2)) / grpData.windowGroupLength[gr] : 4u) + 4096u) >> 13);
+            s = unsigned ((s * (eightShorts ? (24u + (grpData.windowGroupLength[gr] >> 2)) / grpData.windowGroupLength[gr] : 3u) + 4096u) >> 13);
          }
-          s = unsigned (__max (1u + (INT32_MAX >> ((eightShorts ? 1 : 2) + (2 + m_bitRateMode / 9) * m_bitRateMode)), s * s));
+          s = __max (1u + ((UINT32_MAX / (eightShorts ? 3u : 8u)) >> ((2 + m_bitRateMode / 9) * m_bitRateMode)), s * s);
 #endif
          for (unsigned b = 0; b < maxSfbCh; b++)
          {
 #if EE_MORE_MSE
            const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
-            const uint64_t sThresh = __max (1u + (INT32_MAX >> 29), (grpRms[b] * uint64_t (__max (16, b * b)) + 32u) >> 6);
+            const bool stereoCoded = (nrChannels == 2 && coreConfig.stereoMode > 0 && (coreConfig.stereoDataCurr[b] > 0 || !(coreConfig.stereoMode & 1)));
+            const uint32_t rmsbMax = (stereoCoded ? __max (grpRms[b], coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr + b]) : grpRms[b]);
+            const uint64_t sThresh = __max (1u + (UINT32_MAX >> 30), (rmsbMax * uint64_t (__max (16, b * b * grpData.numWindowGroups)) + 32u) >> 6);
+            const uint64_t predFac = (eightShorts || coreConfig.stereoMode < 3 || coreConfig.stereoDataCurr[b & 62] == 0 ? (eightShorts && !b ? 48u : 64u) :
+                                      uint64_t (0.5 + 64 - pow (__min (1.0, fabs (coreConfig.stereoDataCurr[b & 62] * 0.1 - 1.6)), 1.5) * 19.0)); // MS
+            grpStepSizes[b] = uint32_t (__min (sThresh, (s * predFac + 32u) >> 6));
+            if (stereoCoded && rmsbMax)
+            {
+              const uint32_t rmsCh = coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr + b];

-            grpStepSizes[b] = uint32_t (!eightShorts && s > sThresh ? sThresh : (eightShorts ? s >> __max (0, 2 - int (b)) : s));
+              grpStepSizes[b] = uint32_t (0.5 + grpStepSizes[b] * (1.0 - sqrt ((double) __min (grpRms[b], rmsCh) / rmsbMax) * 0.29289322));
+            }
 #else
            const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis
            const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
@ -1080,7 +1093,9 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
  const unsigned nSamplesInFrame  = toFrameLength (m_frameLength);
  const unsigned samplingRate     = toSamplingRate (m_frequencyIdx);
  const unsigned nSamplesTempAna  = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
+#if !EE_MORE_MSE
  const bool     useMaxBandwidth  = (samplingRate < 37566 || m_shiftValSBR > 0);
+#endif
  const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
  uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208};
@ -1183,14 +1198,14 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en

        if (grpData.sfbsPerGroup > 0) // rate control part 2 to reach constrained VBR (CVBR)
        {
+#if EE_MORE_MSE
+          const unsigned targetBitCount25 = INT32_MAX;
+#else
          const uint8_t maxSfbLong  = (useMaxBandwidth ? 54 - (samplingRate >> 13) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
          const uint8_t maxSfbShort = (useMaxBandwidth ? 19 - (samplingRate >> 13) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
          const uint16_t peakIndex  = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong  - 6 + (meanSpecFlat[ci] >> 5)) +
                                            (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
-#if EE_MORE_MSE
-          const unsigned targetBitCount25 = INT32_MAX;
-#else
          const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) /
                                            (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
 #endif
@ -1475,7 +1490,9 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
            }
            icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
          }
+#if !EE_MORE_MSE
          while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci]) + (icsCurr.maxSfb >> 1)) icsCurr.maxSfb--; // detect BW
+#endif
        }
        else // icsCurr.windowSequence == EIGHT_SHORT
        {
@ -1506,9 +1523,9 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
            }
          }
          memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));
-
+#if !EE_MORE_MSE
          findActualBandwidthShort (&icsCurr.maxSfb, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci], nSamplesInShort);
-
+#endif
          errorValue |= eightShortGrouping (grpData, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci]);
        } // if EIGHT_SHORT

@ -1520,14 +1537,26 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
          if (grpData.windowGroupLength[gr] == 1)
          {
            const uint8_t tonality = (m_specAnaCurr[ci] >> 16) & UCHAR_MAX;
-
 #if EE_MORE_MSE
-            tnsData.filterOrder[n] = (m_bitRateMode >= EE_MORE_MSE ? 0 : m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n],
-                                                                                                          tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5)));
-#else
+            bool noTnsFilt = (m_bitRateMode >= EE_MORE_MSE || icsCurr.maxSfb <= 40);
+
+            if (!noTnsFilt && samplingRate >= 27713 && samplingRate < 55426 && icsCurr.maxSfb > 40)
+            {
+              errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], m_mdstSignals[ci], nSamplesInFrame, ci, &grpSO[29], 12, grpData.sfbRmsValues);
+              if (errorValue == 0)
+              {
+                for (int b = 0; b < 12; b++)
+                {
+                  errorValue += unsigned (0.5 + sqrt ((double) grpData.sfbRmsValues[b]));
+                }
+                noTnsFilt |= (errorValue < ((unsigned) m_bitRateMode << 7)); // avoid clicks
+                errorValue = 0;
+              }
+            }
+            if (noTnsFilt) tnsData.filterOrder[n] = 0; else
+#endif
            tnsData.filterOrder[n] = m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n],
                                                                      tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5));
-#endif
            tnsData.numFilters[n] = (tnsData.filterOrder[n] > 0 ? 1 : 0);
            if ((ch == 0) && (icsCurr.windowSequence == EIGHT_SHORT) && (tnsData.numFilters[n] == 0) && (tnsData.firstTnsWindow == gr))
            {
@ -1709,13 +1738,19 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
        const USAC_WSEQ wsPrev = icsPrev.windowSequence;
             USAC_WSEQ& wsCurr = icsCurr.windowSequence;
        // get temporal signal statistics, then determine overlap config. for the next frame
+#if !EE_MORE_MSE
        const unsigned  plCurr = abs (m_tranLocCurr[ci]) & ((1024 << m_shiftValSBR) - 1);
+#endif
        const unsigned  sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX;
        const unsigned  tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX;
+#if !EE_MORE_MSE
        const unsigned  plNext = abs (m_tranLocNext[ci]) & ((1024 << m_shiftValSBR) - 1);
+#endif
        const unsigned  sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX;
        const unsigned  tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX;
+#if !EE_MORE_MSE
        const unsigned tThresh = UCHAR_MAX * (__max (plCurr, plNext) < 614 /*0.6 * 1024*/ ? 16 : 15 - (m_bitRateMode >> 3));
+#endif

        tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX;
        tsNext[ch] = (m_tempAnaNext[ci] >>  8) & UCHAR_MAX;