extrema tuning

2025-01-23 20:20:52 +01:00 · 2020-05-01 15:00:35 +02:00 · 2020-05-01 15:00:35 +02:00 · a3be5338f2
commit a3be5338f2
parent 70534ca7b4
4 changed files with 80 additions and 9 deletions
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -845,7 +845,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
        const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
        const uint8_t maxSfbCh = grpData.sfbsPerGroup;
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
-        const uint8_t  mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
+        const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4);
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];

        memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -909,7 +909,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
            const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
            const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
            const uint64_t rateFac = mSfmFac * s * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1
-            const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 24;
+            const uint64_t sScaled = ((1u << 24) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 25;

            // scale step-sizes according to VBR mode & derive scale factors from step-sizes
            grpStepSizes[b] = uint32_t (__max (BA_EPS, __min (UINT_MAX, sScaled)));
@ -1972,7 +1972,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
 #else
  if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
 #endif
-      m_specAnalyzer.initLinPredictor (&m_linPredictor) > 0 ||
+      m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 4 ? nChannels : 0, nSamplesInFrame) > 0 ||
      m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0)
  {
    errorValue |= 1;
--- a/src/lib/quantization.cpp
+++ b/src/lib/quantization.cpp
@ -527,7 +527,7 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
 {
  const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
 #if EC_TRELLIS_OPT_CODING
-  const uint8_t numTrellisStates = (samplingRate < 44100 ? 8 - samplingRate / 32000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
+  const uint8_t numTrellisStates = (samplingRate < 44100 ? 8 - samplingRate / 16000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
  const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
  const uint16_t quantRateLength = (samplingRate < 28800 ? 512 : 256); // quantizeMagnRDOC()
 #endif
--- a/src/lib/specAnalysis.cpp
+++ b/src/lib/specAnalysis.cpp
@ -30,6 +30,10 @@ SpecAnalyzer::SpecAnalyzer ()
  for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
  {
    m_bandwidthOff[ch] = 0;
+#if SA_IMPROVED_SFM_ESTIM
+    m_magnCorrPrev[ch] = 0;
+    m_magnSpectra [ch] = nullptr;
+#endif
    m_numAnaBands [ch] = 0;
    m_specAnaStats[ch] = 0;
    memset (m_parCorCoeffs[ch], 0, MAX_PREDICTION_ORDER * sizeof (short));
@ -171,14 +175,23 @@ void SpecAnalyzer::getSpectralBandwidth (uint16_t bandwidthOffset[USAC_MAX_NUM_C
  memcpy (bandwidthOffset, m_bandwidthOff, nChannels * sizeof (uint16_t));
 }

-unsigned SpecAnalyzer::initLinPredictor (LinearPredictor* const linPredictor)
+unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, const unsigned nChannels, const unsigned maxTransfLength)
 {
  if (linPredictor == nullptr)
  {
    return 1; // invalid arguments error
  }
  m_tnsPredictor = linPredictor;
-
+#if SA_IMPROVED_SFM_ESTIM
+  for (unsigned ch = 0; ch < nChannels; ch++)
+  {
+    if ((m_magnSpectra[ch] = (uint32_t*) malloc (maxTransfLength * sizeof (uint32_t))) == nullptr)
+    {
+      return 2; // mem. allocation error
+    }
+    memset (m_magnSpectra[ch], 0, maxTransfLength * sizeof (uint32_t));
+  }
+#endif
  return 0; // no error
 }

@ -236,6 +249,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
                                         const unsigned nChannels, const unsigned nSamplesInFrame, const unsigned samplingRate,
                                         const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel
 {
+  const uint64_t anaBwOffset = SA_BW >> 1;
  const unsigned lpcStopBand16k = (samplingRate <= 32000 ? nSamplesInFrame : (32000 * nSamplesInFrame) / samplingRate) >> SA_BW_SHIFT;
  const unsigned thresholdSlope = (48000 + SA_EPS * samplingRate) / 96000;
  const unsigned thresholdStart = samplingRate >> 15;
@ -250,6 +264,11 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
  {
    const int32_t* const chMdct = mdctSignals[ch];
    const int32_t* const chMdst = mdstSignals[ch];
+#if SA_IMPROVED_SFM_ESTIM
+    uint32_t* const   chPrvMagn = m_magnSpectra[ch];
+    const bool improvedSfmEstim = (chPrvMagn != nullptr);
+    uint16_t currMC = 0, numMC = 0; // channel average
+#endif
 // --- get L1 norm and max value in each band
    uint16_t idxMaxSpec = 0;
    uint64_t sumAvgBand = 0;
@ -273,10 +292,17 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
      const uint16_t         offs = b << SA_BW_SHIFT; // start offset of current analysis band
      const int32_t* const  bMdct = &chMdct[offs];
      const int32_t* const  bMdst = &chMdst[offs];
+#if SA_IMPROVED_SFM_ESTIM
+      uint32_t* const     prvMagn = (improvedSfmEstim ? &chPrvMagn[offs] : nullptr);
+#endif
      uint16_t maxAbsIdx = 0;
      uint32_t maxAbsVal = 0, tmp = UINT_MAX;
      uint64_t sumAbsVal = 0;
-
+#if SA_IMPROVED_SFM_ESTIM
+      uint64_t sumAbsPrv = 0;
+      uint64_t sumPrdCP  = 0, sumPrdCC = 0, sumPrdPP = 0;
+      double ncp, dcc, dpp;
+#endif
      for (int s = SA_BW - 1; s >= 0; s--)
      {
        // sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index
@ -287,6 +313,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
        const uint64_t absReal   = abs (bMdct[s]);   // Richard Lyons, 1997; en.wikipedia.org/
        const uint64_t absImag   = abs (bMdst[s]);   // wiki/Alpha_max_plus_beta_min_algorithm
        const uint32_t absSample = uint32_t (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
+#endif
+#if SA_IMPROVED_SFM_ESTIM
+        if (improvedSfmEstim)   // correlation between current and previous magnitude spectrum
+        {
+          const uint64_t prvSample = prvMagn[s];
+
+          sumPrdCP += ((uint64_t) absSample * prvSample + anaBwOffset) >> SA_BW_SHIFT;
+          sumPrdCC += ((uint64_t) absSample * absSample + anaBwOffset) >> SA_BW_SHIFT;
+          sumPrdPP += ((uint64_t) prvSample * prvSample + anaBwOffset) >> SA_BW_SHIFT;
+
+          sumAbsPrv += prvSample;
+          prvMagn[s] = absSample;
+        }
 #endif
        sumAbsVal += absSample;
        if (offs + s > 0) // exclude DC from max & min
@ -310,9 +349,22 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
        m_bandwidthOff[ch] = __min (m_bandwidthOff[ch], nSamplesInFrame);
      }
      // save mean magnitude
-      tmp/*mean*/ = uint32_t ((sumAbsVal + (1 << (SA_BW_SHIFT - 1))) >> SA_BW_SHIFT);
+      tmp/*mean*/ = uint32_t ((sumAbsVal + anaBwOffset) >> SA_BW_SHIFT);
      m_meanAbsValue[ch][b] = tmp;
      // spectral statistics
+#if SA_IMPROVED_SFM_ESTIM
+      if (improvedSfmEstim && (b > 0) && ((unsigned) b < lpcStopBand16k))
+      {
+        dcc = double (tmp);
+        dpp = double ((sumAbsPrv + anaBwOffset) >> SA_BW_SHIFT);
+        ncp = (sumPrdCP + dcc * dpp) * SA_BW - sumAbsVal * dpp - sumAbsPrv * dcc;
+        dcc = (sumPrdCC + dcc * dcc) * SA_BW - sumAbsVal * dcc - sumAbsVal * dcc;
+        dpp = (sumPrdPP + dpp * dpp) * SA_BW - sumAbsPrv * dpp - sumAbsPrv * dpp;
+        sumPrdCP = uint64_t ((ncp <= 0.0) || (dcc * dpp <= 0.0) ? 0 : 0.5 + (256.0 * ncp * ncp) / (dcc * dpp));
+
+        currMC += (uint16_t) __min (UCHAR_MAX, sumPrdCP); numMC++; // temporal correlation sum
+      }
+#endif
      if (b > 0)
      {
        sumAvgBand += tmp;
@ -334,6 +386,16 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
    m_tnsPredGains[ch] = m_tnsPredictor->calcParCorCoeffs (&chMdct[b], __min (m_bandwidthOff[ch], lpcStopBand16k << SA_BW_SHIFT) - b,
                                                           MAX_PREDICTION_ORDER, m_parCorCoeffs[ch]);
    m_specAnaStats[ch] = packAvgSpecAnalysisStats (sumAvgBand, sumMaxBand, m_tnsPredGains[ch] >> 24, idxMaxSpec, (unsigned) b >> SA_BW_SHIFT);
+#if SA_IMPROVED_SFM_ESTIM
+    if (improvedSfmEstim)
+    {
+      if (numMC > 1) currMC = (currMC + (numMC >> 1)) / numMC;// smoothed temporal correlation
+      valMaxSpec = (currMC + m_magnCorrPrev[ch] + 1) >> 1;
+      m_magnCorrPrev[ch] = (uint8_t) currMC; // update
+
+      if (valMaxSpec > ((m_specAnaStats[ch] >> 16) & UCHAR_MAX)) m_specAnaStats[ch] = (m_specAnaStats[ch] & 0xFF00FFFF) | (valMaxSpec << 16);
+    }
+#endif
  } // for ch

  return 0; // no error
--- a/src/lib/specAnalysis.h
+++ b/src/lib/specAnalysis.h
@ -20,6 +20,7 @@
 #define SA_EPS               1024
 #define SA_EXACT_COMPLEX_ABS    0
 #define SA_IMPROVED_REAL_ABS    1
+#define SA_IMPROVED_SFM_ESTIM   1
 #define SA_OPT_WINDOW_GROUPING  1

 // spectral signal analysis class
@ -29,6 +30,10 @@ private:

  // member variables
  uint16_t m_bandwidthOff[USAC_MAX_NUM_CHANNELS];
+#if SA_IMPROVED_SFM_ESTIM
+  uint8_t  m_magnCorrPrev[USAC_MAX_NUM_CHANNELS];
+  uint32_t* m_magnSpectra[USAC_MAX_NUM_CHANNELS];
+#endif
  uint32_t m_meanAbsValue[USAC_MAX_NUM_CHANNELS][1024 >> SA_BW_SHIFT];
  uint16_t m_numAnaBands [USAC_MAX_NUM_CHANNELS];
  short    m_parCorCoeffs[USAC_MAX_NUM_CHANNELS][MAX_PREDICTION_ORDER];
@ -41,7 +46,11 @@ public:
  // constructor
  SpecAnalyzer ();
  // destructor
+#if SA_IMPROVED_SFM_ESTIM
+  ~SpecAnalyzer () { for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) MFREE (m_magnSpectra[ch]); }
+#else
  ~SpecAnalyzer () { }
+#endif
  // public functions
  unsigned getLinPredCoeffs (short parCorCoeffs[MAX_PREDICTION_ORDER], const unsigned channelIndex); // returns best filter order
  unsigned getMeanAbsValues (const int32_t* const mdctSignal, const int32_t* const mdstSignal, const unsigned nSamplesInFrame,
@ -49,7 +58,7 @@ public:
                             uint32_t* const meanBandValues);
  void getSpecAnalysisStats (uint32_t avgSpecAnaStats[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
  void getSpectralBandwidth (uint16_t bandwidthOffset[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
-  unsigned initLinPredictor (LinearPredictor* const linPredictor);
+  unsigned initSigAnaMemory (LinearPredictor* const linPredictor, const unsigned nChannels, const unsigned maxTransfLength);
 #if SA_OPT_WINDOW_GROUPING
  unsigned optimizeGrouping (const unsigned channelIndex, const unsigned preferredBandwidth, const unsigned preferredGrouping);
 #endif