fixes and cleanup

2025-06-05 21:59:32 +02:00 · 2020-07-27 01:00:00 +02:00
parent f8ad0b34d7
commit c72996090e
4 changed files with 18 additions and 19 deletions
--- a/src/app/exhaleApp.cpp
+++ b/src/app/exhaleApp.cpp
@ -92,7 +92,7 @@ static bool eaInitDownsampler (int32_t** resampleBuffer, const uint16_t bitRateM
 {
  const uint16_t inLength = (frameSize * 3u) >> 1;
  const uint16_t chLength = inLength + (frameSize >> 3);
-  const bool useResampler = (frameSize >= 512 && bitRateMode == 1 && sampleRate == 48000);
+  const bool useResampler = (frameSize >= 512 && bitRateMode <= 1 && sampleRate == 48000);

  if (useResampler)
  {
@ -550,7 +550,7 @@ int main (const int argc, char* argv[])

    if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900
 #if ENABLE_RESAMPLING
-        && (variableCoreBitRateMode != 1 || wavReader.getSampleRate () != 48000)
+        && (variableCoreBitRateMode > 1 || wavReader.getSampleRate () != 48000)
 #endif
        )
    {
@ -704,7 +704,7 @@ int main (const int argc, char* argv[])
      if (*argv[1] != '#') // user-def. mode
      {
        fprintf_s (stdout, " Encoding %d-kHz %d-channel %d-bit WAVE to low-complexity xHE-AAC at %d kbit/s\n\n",
-                   sampleRate / 1000, numChannels, inSampDepth, __min (4, numChannels) * (24 + variableCoreBitRateMode * 8));
+                   sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (24 + variableCoreBitRateMode * 8));
      }
      if (!readStdin && (mod3Percent > 0))
      {
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@ -412,7 +412,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
 unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort,
                                         const int32_t* const mdctSpec[USAC_MAX_NUM_CHANNELS], const unsigned nSamplesInFrame,
                                         const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes,
-                                         const unsigned firstChannelIndex, const bool commonWindow /*= false*/,
+                                         const unsigned firstChannelIndex, const uint8_t* const sfm, const bool commonWindow,
                                         const uint8_t* const sfbStereoData /*= nullptr*/, const uint8_t stereoConfig /*= 0*/)
 {
  const uint8_t maxSfbL16k = 16 + __min (35, (9 << 17) / __max (1, samplingRate)); // SFB index at 15.8 kHz
@ -420,7 +420,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
  const uint32_t redWeight = __min (4, 9 - __min (9, m_rateIndex));
  short* const  tempCoeffs = (short* const) m_tempSfbValue;

-  if ((groupData == nullptr) || (mdctSpec == nullptr) || (sfbStepSizes == nullptr) || (nSamplesInFrame > 2048) ||
+  if ((groupData == nullptr) || (mdctSpec == nullptr) || (sfbStepSizes == nullptr) || (sfm == nullptr) || (nSamplesInFrame > 2048) ||
      (numSwbShort < MIN_NUM_SWB_SHORT) || (numSwbShort > MAX_NUM_SWB_SHORT) || (nChannels > USAC_MAX_NUM_CHANNELS) ||
      (samplingRate < 7350) || (samplingRate > 96000) || (firstChannelIndex > USAC_MAX_NUM_CHANNELS))
  {
@ -469,8 +469,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
          }
        }
        if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
-#ifndef NO_DTX_MODE
-        if (m_rateIndex > 0)
+#if 1
+        if ((m_rateIndex > 0) || (samplingRate >= 27713 && sfm[ch] <= (SCHAR_MAX >> 1)))
 #endif
        if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
        {
@ -483,8 +483,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
        const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
        const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
        const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b];
-#ifndef NO_DTX_MODE
-        if (m_rateIndex > 0)
+#if 1
+        if ((m_rateIndex > 0) || (samplingRate >= 27713 && sfm[ch] <= (SCHAR_MAX >> 1)))
 #endif
        if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
        {
@ -498,6 +498,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
      for (b = 0; b < maxSfbInCh; b++) // improve step-sizes by limiting and attenuation
      {
        grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15);
+        if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11);
      }
    } // for gr
  } // for ch
--- a/src/lib/bitAllocation.h
+++ b/src/lib/bitAllocation.h
@ -51,7 +51,7 @@ public:
  unsigned imprSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort,
                             const int32_t* const mdctSpec[USAC_MAX_NUM_CHANNELS], const unsigned nSamplesInFrame,
                             const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes,
-                             const unsigned firstChannelIndex, const bool commonWindow = false,
+                             const unsigned firstChannelIndex, const uint8_t* const sfm, const bool commonWindow,
                             const uint8_t* const sfbStereoData = nullptr, const uint8_t stereoConfig = 0);
 }; // BitAllocator

--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -941,14 +941,13 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
      else memset (coreConfig.stereoDataCurr, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));

      errorValue |= m_bitAllocator.imprSfbStepSizes (m_scaleFacData, m_numSwbShort, m_mdctSignals, nSamplesInFrame,
-                                                     nrChannels, samplingRate, sfbStepSizes, ci, coreConfig.commonWindow,
-                                                     coreConfig.stereoDataCurr, coreConfig.stereoConfig);
+                                                     nrChannels, samplingRate, sfbStepSizes, ci, meanSpecFlat,
+                                                     coreConfig.commonWindow, coreConfig.stereoDataCurr, coreConfig.stereoConfig);

      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        SfbGroupData&  grpData = coreConfig.groupingData[ch];
        const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
-        const bool saveBitRate = (meanSpecFlat[ci] > (UCHAR_MAX * 3) / 4 && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
        const uint8_t maxSfbCh = grpData.sfbsPerGroup;
 #if !RESTRICT_TO_AAC
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
@ -968,7 +967,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          for (unsigned b = 0; b < maxSfbCh; b++)
          {
            const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis
-            const unsigned lfAtten = (saveBitRate || b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
+            const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
            const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
            const uint64_t   scale = scaleBr * mSfmFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB)

@ -987,8 +986,6 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) ? 4u : 3u)) >> 2,
                                      (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0 : 1));
 #ifndef NO_DTX_MODE
-          const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
-
          if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
          {
            for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13))));
@ -1007,7 +1004,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
              {
                const uint32_t*  refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
                uint8_t*  grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
-                const unsigned sfbStart = (prvEightShorts ? (samplingRate > 16000 ? 24 : 17) : m_specGapFiller.getFirstGapFillSfb ());
+                const unsigned sfbStart = __max (samplingRate < 18783 ? 17 : 24, m_specGapFiller.getFirstGapFillSfb ());

                for (s = sfbStart; s < maxSfbCh; s++)
                {
@ -1021,7 +1018,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
              else
              if ((m_bitRateMode <= 4) && (meanSpecFlat[ci] <= (SCHAR_MAX >> 1))) // low-RMS
              {
-                for (s = (prvEightShorts ? (samplingRate < 27713 ? 24 : 22) : m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++)
+                for (s = __max (samplingRate < 27713 ? (samplingRate < 18783 ? 17 : 24) : 22, m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++)
                {
                  if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
                }
@ -1360,6 +1357,7 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS

      icsCurr.maxSfb = MAX_NUM_SWB_LFE;
      while (grpSO[icsCurr.maxSfb] > LFE_MAX) icsCurr.maxSfb--; // limit coefficients in LFE
+      grpData.sfbsPerGroup = icsCurr.maxSfb;
      ci++;
    }
    else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
@ -2078,7 +2076,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin

  // initialize coder class memory
  m_tempIntBuf = m_timeSignals[0];
-  if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 ||
+  if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode >> ((nChannels - 1) >> 1)) > 0 ||
 #if EC_TRELLIS_OPT_CODING
      m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
 #else