retune TNS for SBR

2025-03-11 00:30:17 +01:00 · 2021-05-13 01:00:00 +02:00 · 2021-05-13 01:00:00 +02:00 · a327f120c1
commit a327f120c1
parent 08ac873cef
4 changed files with 37 additions and 56 deletions
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@ -1,11 +1,11 @@
 /* bitAllocation.cpp - source file for class needed for psychoacoustic bit-allocation
- * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #include "exhaleLibPch.h"
@ -205,13 +205,13 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA

    m_avgStepSize[ch] = 0;

-    b = ((specAnaStats[ch] >> 16) & UCHAR_MAX); // start with squared spec. flatness from spectral analysis
-    b = __max (b * b, (tempAnaStats[ch] >> 24) * (tempAnaStats[ch] >> 24)); // ..and from temporal analysis
-    m_avgSpecFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // normalized maximum
+    b = ((specAnaStats[ch] >> 16) & UCHAR_MAX);
+    b = __max (b * b, (tempAnaStats[ch] >> 24) * (tempAnaStats[ch] >> 24));
+    m_avgSpecFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // max. of squared SFM from spec. and temp. analysis

-    b = ((tempAnaStats[ch] >> 16) & UCHAR_MAX); // now derive squared temp. flatness from temporal analysis
-    b = __max (b * b, (specAnaStats[ch] >> 24) * (specAnaStats[ch] >> 24)); // ..and from spectral analysis
-    m_avgTempFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // normalized maximum
+    b = ((tempAnaStats[ch] >> 16) & UCHAR_MAX);
+    b = __max (b * b, (specAnaStats[ch] >> 24) * (specAnaStats[ch] >> 24));
+    m_avgTempFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // max. of squared TFM from spec. and temp. analysis

    if ((nBandsInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS))
    {
@ -373,12 +373,6 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA

  sumMeans = (sumMeans + (nMeans >> 1)) / nMeans;
  sumMeans *= sumMeans;  // since we've averaged square-roots
-#if BA_INTER_CHAN_SIM_MASK
-  if (nMeans > 3)
-  {
-    // TODO: cross-channel simultaneous masking for 4.0 - 7.1
-  }
-#endif

  for (unsigned ch = 0; ch < nChannels; ch++)
  {
@ -435,9 +429,10 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
    const SfbGroupData& grpData = *groupData[ch];
    const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
    const bool    eightShorts = (grpData.numWindowGroups != 1);
-    const bool  lowRateTuning = (samplingRate >= 25495) && (sfm[ch] <= (SCHAR_MAX >> 1));
-    const uint32_t*   rms = grpData.sfbRmsValues;
-    uint32_t*   stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
+    const bool  lowRateTuning = (m_rateIndex == 0) && (samplingRate >= 25495 && sfm[ch] <= (SCHAR_MAX >> 1));
+    const bool undercodingRed = (m_rateIndex >  0) || (samplingRate >= 25495 && sfm[ch] * 8 > UCHAR_MAX * 7) || lowRateTuning;
+    const uint32_t* rms = grpData.sfbRmsValues;
+    uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];

    if ((grpData.numWindowGroups * maxSfbInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS))
    {
@ -454,8 +449,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
      uint64_t  s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);

      memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t));
-
-      if ((m_rateIndex == 0) && lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
+      if (lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
      {
        uint32_t numRedBands = nSamplesInFrame; // final result lies between 1/4 and 1/2

@ -491,10 +485,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
          }
        }
        if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
-#if 1
-        if ((m_rateIndex > 0) || lowRateTuning)
-#endif
-        if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
+
+        if (undercodingRed && (rmsComp >= rmsRef9) && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
        {
          s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11;
        }
@ -505,10 +497,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
        const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
        const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
        const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b];
-#if 1
-        if ((m_rateIndex > 0) || lowRateTuning)
-#endif
-        if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
+
+        if (undercodingRed && (rmsComp >= rmsRef9)) // check only first SFB above max_sfb as simplification
        {
          s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11;
        }
@ -522,7 +512,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
        grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15);
        if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11);

-        if ((m_rateIndex == 0) && lowRateTuning) // clip near-zero SNRs to a minimum SNR
+        if (lowRateTuning) // clip near-0 SNRs to minimum SNR
        {
          const uint32_t lim = uint32_t ((grpRms[b] * (8192u - (uint64_t) sfm[ch] * sfm[ch]) + (1u << 12)) >> 13);

--- a/src/lib/bitAllocation.h
+++ b/src/lib/bitAllocation.h
@ -1,11 +1,11 @@
 /* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation
- * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #ifndef _BIT_ALLOCATION_H_
@ -16,7 +16,6 @@

 // constants, experimental macros
 #define BA_EPS                  1
-#define BA_INTER_CHAN_SIM_MASK  0  // 5.1 cross-channel simultaneous masking

 // class for audio bit-allocation
 class BitAllocator
--- a/src/lib/specAnalysis.cpp
+++ b/src/lib/specAnalysis.cpp
@ -1,11 +1,11 @@
 /* specAnalysis.cpp - source file for class providing spectral analysis of MCLT signals
- * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #include "exhaleLibPch.h"
@ -44,10 +44,8 @@ SpecAnalyzer::SpecAnalyzer ()
  for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
  {
    m_bandwidthOff[ch] = 0;
-#if SA_IMPROVED_SFM_ESTIM
    m_magnCorrPrev[ch] = 0;
    m_magnSpectra [ch] = nullptr;
-#endif
    m_numAnaBands [ch] = 0;
    m_specAnaStats[ch] = 0;
    memset (m_parCorCoeffs[ch], 0, MAX_PREDICTION_ORDER * sizeof (short));
@ -169,7 +167,7 @@ unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, co
    return 1; // invalid arguments error
  }
  m_tnsPredictor = linPredictor;
-#if SA_IMPROVED_SFM_ESTIM
+
  for (unsigned ch = 0; ch < nChannels; ch++)
  {
    if ((m_magnSpectra[ch] = (uint32_t*) malloc (maxTransfLength * sizeof (uint32_t))) == nullptr)
@ -178,7 +176,6 @@ unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, co
    }
    memset (m_magnSpectra[ch], 0, maxTransfLength * sizeof (uint32_t));
  }
-#endif
  return 0; // no error
 }

@ -242,7 +239,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
  const unsigned thresholdStart = samplingRate >> 15;

  if ((mdctSignals == nullptr) || (mdstSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
-      (nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (samplingRate < 7350) || (samplingRate > 96000))
+      (nSamplesInFrame > 2048) || (nSamplesInFrame <= 127) || (samplingRate < 7350) || (samplingRate > 96000))
  {
    return 1; // invalid arguments error
  }
@ -251,11 +248,10 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
  {
    const int32_t* const chMdct = mdctSignals[ch];
    const int32_t* const chMdst = mdstSignals[ch];
-#if SA_IMPROVED_SFM_ESTIM
    uint32_t* const   chPrvMagn = m_magnSpectra[ch];
    const bool improvedSfmEstim = (chPrvMagn != nullptr);
    uint16_t currMC = 0, numMC = 0; // channel average
-#endif
+
 // --- get L1 norm and max value in each band
    uint16_t idxMaxSpec = 0;
    uint64_t sumAvgBand = 0;
@ -279,22 +275,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
      const uint16_t         offs = b << SA_BW_SHIFT; // start offset of current analysis band
      const int32_t* const  bMdct = &chMdct[offs];
      const int32_t* const  bMdst = &chMdst[offs];
-#if SA_IMPROVED_SFM_ESTIM
      uint32_t* const     prvMagn = (improvedSfmEstim ? &chPrvMagn[offs] : nullptr);
-#endif
      uint16_t maxAbsIdx = 0;
      uint32_t maxAbsVal = 0, tmp = UINT_MAX;
      uint64_t sumAbsVal = 0;
-#if SA_IMPROVED_SFM_ESTIM
      uint64_t sumAbsPrv = 0;
      uint64_t sumPrdCP  = 0, sumPrdCC = 0, sumPrdPP = 0;
      double ncp, dcc, dpp;
-#endif
+
      for (int s = SA_BW - 1; s >= 0; s--)
      {
        // sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index
        const uint64_t absSample = complexAbs (bMdct[s], bMdst[s]);
-#if SA_IMPROVED_SFM_ESTIM
+
        if (improvedSfmEstim)   // correlation between current and previous magnitude spectrum
        {
          const uint64_t prvSample = prvMagn[s];
@ -306,7 +299,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
          sumAbsPrv += prvSample;
          prvMagn[s] = (uint32_t) absSample;
        }
-#endif
        sumAbsVal += absSample;
        if (offs + s > 0) // exclude DC from max & min
        {
@ -332,7 +324,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
      tmp/*mean*/ = uint32_t ((sumAbsVal + anaBwOffset) >> SA_BW_SHIFT);
      m_meanAbsValue[ch][b] = tmp;
      // spectral statistics
-#if SA_IMPROVED_SFM_ESTIM
      if (improvedSfmEstim && (b > 0) && ((unsigned) b < lpcStopBand16k))
      {
        dcc = double (tmp);
@ -344,7 +335,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M

        currMC += (uint16_t) __min (UCHAR_MAX, sumPrdCP); numMC++; // temporal correlation sum
      }
-#endif
      if (b > 0)
      {
        sumAvgBand += tmp;
@ -359,14 +349,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M

 // --- spectral analysis statistics for frame
    b = 1;
+#if SA_IMPROVED_FILT_CALC
+    if (samplingRate < 27713) sumAvgBand -= m_meanAbsValue[ch][b++];
+#endif
    while (((unsigned) b + 1 < lpcStopBand16k) && ((uint64_t) m_meanAbsValue[ch][b] * (m_numAnaBands[ch] - 1) > sumAvgBand)) b++;
    b = __min (m_bandwidthOff[ch], b << SA_BW_SHIFT);
-
+#if SA_IMPROVED_FILT_CALC
+    if (samplingRate < 27713) sumAvgBand += m_meanAbsValue[ch][1];
+#endif
    // obtain prediction gain across spectrum
    m_tnsPredGains[ch] = m_tnsPredictor->calcParCorCoeffs (&chMdct[b], __min (m_bandwidthOff[ch], lpcStopBand16k << SA_BW_SHIFT) - b,
                                                           MAX_PREDICTION_ORDER, m_parCorCoeffs[ch]);
    m_specAnaStats[ch] = packAvgSpecAnalysisStats (sumAvgBand, sumMaxBand, m_tnsPredGains[ch] >> 24, idxMaxSpec, (unsigned) b >> SA_BW_SHIFT);
-#if SA_IMPROVED_SFM_ESTIM
+
    if (improvedSfmEstim)
    {
      if (numMC > 1) currMC = (currMC + (numMC >> 1)) / numMC;// smoothed temporal correlation
@ -375,7 +370,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M

      if (valMaxSpec > ((m_specAnaStats[ch] >> 16) & UCHAR_MAX)) m_specAnaStats[ch] = (m_specAnaStats[ch] & 0xFF00FFFF) | (valMaxSpec << 16);
    }
-#endif
  } // for ch

  return 0; // no error
--- a/src/lib/specAnalysis.h
+++ b/src/lib/specAnalysis.h
@ -1,11 +1,11 @@
 /* specAnalysis.h - header file for class providing spectral analysis of MCLT signals
- * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under the exhale Copyright License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #ifndef _SPEC_ANALYSIS_H_
@ -19,7 +19,7 @@
 #define SA_BW  (1 << SA_BW_SHIFT)
 #define SA_EPS               1024
 #define SA_EXACT_COMPLEX_ABS    0
-#define SA_IMPROVED_SFM_ESTIM   1
+#define SA_IMPROVED_FILT_CALC   1
 #define SA_OPT_WINDOW_GROUPING  1

 // spectral signal analysis class
@ -29,10 +29,8 @@ private:

  // member variables
  uint16_t m_bandwidthOff[USAC_MAX_NUM_CHANNELS];
-#if SA_IMPROVED_SFM_ESTIM
  uint8_t  m_magnCorrPrev[USAC_MAX_NUM_CHANNELS];
  uint32_t* m_magnSpectra[USAC_MAX_NUM_CHANNELS];
-#endif
  uint32_t m_meanAbsValue[USAC_MAX_NUM_CHANNELS][1024 >> SA_BW_SHIFT];
  uint16_t m_numAnaBands [USAC_MAX_NUM_CHANNELS];
  short    m_parCorCoeffs[USAC_MAX_NUM_CHANNELS][MAX_PREDICTION_ORDER];