low-energy tuning

2025-03-11 16:50:09 +01:00 · 2020-04-23 00:00:56 +02:00 · 2020-04-23 00:00:56 +02:00 · 8bc32a02cd
commit 8bc32a02cd
parent 660cffb257
4 changed files with 32 additions and 11 deletions
--- a/README.md
+++ b/README.md
@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
 similar to the 3-clause BSD license but modified to address specific
 aspects dictated by the nature and the output of this application.

-The license text and release notes for the current version 1.0.2 can
+The license text and release notes for the current version 1.0.3 can
 be found in the `include` subdirectory of the exhale distribution.


--- a/include/Release.htm
+++ b/include/Release.htm
@ -25,9 +25,13 @@
 <td valign="top">

 <h1><br><span class="pink">exhale</span> - <span class="pink">e</span>codis e<span class="pink">x</span>tended <span class="pink">h</span>igh-efficiency <span class="pink">a</span>nd <span class="pink">l</span>ow-complexity <span class="pink">e</span>ncoder<br><span class="gray"><sup><br>Software Release Notes, Version History, Known Issues, Upcoming Feature Roadmap</sup></span><br><br></h1>
-<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.2</b> (official pub&shy;lic minor release) from March 2020. Please check <a href="http://www.ecodis.de">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
+<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.3</b> (official pub&shy;lic minor release) from April 2020. Please check <a href="http://www.ecodis.de">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
 <h3><br><b>Chronological Version History</b></h3>
-<h3>&nbsp; &nbsp;Version <b>1.0.2 <span class="gray">&nbsp;Mar. 2020, this release</span></b></h3>
+<h3>&nbsp; &nbsp;Version <b>1.0.3 <span class="gray">&nbsp;Apr. 2020, this release</span></b></h3>
+<ul>
+ <li><h3>extended basic joint-stereo coding functionality for mid/high rates, minor bugfixes</h3></li>
+</ul>
+<h3>&nbsp; &nbsp;Version <b>1.0.2 <span class="gray">&nbsp;Mar. 2020</span></b></h3>
 <ul>
 <li><h3>added basic low/mid-rate joint-stereo coding functionality, bugfixes, and speedups</h3></li>
 <li><h3>exhaleApp: support for input sampling rates of up to 48000 Hz with CVBR mode 2</h3></li>
@ -63,13 +67,13 @@
 <h3>&nbsp; &nbsp;If you are in need of an additional library or application feature <b>not</b> mentioned below, please contact ecodis or a contributor with a request, and we will see what we can do.</h3>
 <ul>
 <li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
- <li><h3>exhaleLib: finalized integration of higher-rate joint-channel coding, version 1.0.3</h3></li>
- <li><h3>exhaleLib: quality tuning and bug fixing for low-rate stereo coding, version 1.0.3</h3></li>
- <li><h3>exhaleLib: finalization of support for 3.0 &#x2013; 5.1 multichannel coding, version 1.0.4</h3></li>
+ <li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.4</h3></li>
+ <li><h3>exhaleLib: quality tuning and bug fixing for low-rate stereo coding, version 1.0.4</h3></li>
+ <li><h3>exhaleLib: finalization of support for 3.0 &#x2013; 5.1 multichannel coding, version 1.0.5</h3></li>
 <li><h3>exhaleLib: speed-ups and further quality tuning for critical signals, version 1.0.5.</h3></li>
 </ul>
 <h3><br></h3>
-<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.2, Mar. 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
+<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.3, Apr. 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>

 </td>
 <td valign="top" colspan="2">
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -693,7 +693,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  const uint32_t maxSfbLong      = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
  const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3);  // undercoding reduction
-  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 3 ? nChannels >> 1 : 0);
+  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 2 ? nChannels >> 1 : 0);
  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
  uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
@ -857,7 +857,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
          uint8_t*  grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
          uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr];
+#if SA_IMPROVED_REAL_ABS
          uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
+#endif
          uint32_t  b, grpRmsMin = INT_MAX; // min. RMS value, used for overcoding reduction

          // undercoding reduction for case where large number of coefs is quantized to zero
@ -873,6 +875,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
                                     getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
 #endif
            if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
+#ifndef NO_DTX_MODE
+            if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS))
+#endif
            if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
            {
              s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
@ -887,6 +892,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
            const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
                                     getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
+#endif
+#ifndef NO_DTX_MODE
+            if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS))
 #endif
            if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
            {
@ -901,7 +909,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
            const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
            const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
            const uint64_t rateFac = mSfmFac * s * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1
-            const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * (scaleBr - (grpStereoData[b] > 0 ? 1 : 0)) * rateFac) >> 24;
+            const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 24;

            // scale step-sizes according to VBR mode & derive scale factors from step-sizes
            grpStepSizes[b] = uint32_t (__max (BA_EPS, __min (UINT_MAX, sScaled)));
@ -919,6 +927,15 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          {
            for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
            {
+#ifndef NO_DTX_MODE
+              if (meanSpecFlat[ci] <= (SCHAR_MAX >> 1)) // noise-like low-energy "DTX" frame
+              {
+                for (s = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT ? 24 : m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++)
+                {
+                  if (grpData.sfbRmsValues[s + m_numSwbShort * gr] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
+                }
+              }
+#endif
              memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
            }
            grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame;
@ -1061,7 +1078,7 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
          const uint8_t maxSfbLong  = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
          const uint8_t maxSfbShort = (samplingRate < 37566 ? 14 /*32 kHz*/ : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
          const uint16_t peakIndex  = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
-          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 5 + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5);
+          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 4 : maxSfbLong - 6) + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5);
          const unsigned targetBitCount25 = ((60000 + 20000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
          unsigned b = grpData.sfbsPerGroup - 1;

--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@ -453,7 +453,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in

        if (alphaRe == 0)
        {
-          if (realOnlyCalc) // update previous magnitude value
+          if (realOnlyCalc) // update previous res. MDCT value
          {
            sfbMdctR += sfbWidth - 1;
            prevResi = (grpSData[sfbEv] > 0 ? *sfbMdctR : int32_t (((int64_t) sfbMdctD[sfbWidth - 1] +