rate ctrl, 32 kHz fix

2025-03-11 16:50:09 +01:00 · 2020-08-18 01:00:00 +02:00 · 2020-08-18 01:00:00 +02:00 · b033287115
commit b033287115
parent 051900fe7c
9 changed files with 48 additions and 36 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
 endif()


-project(exhale VERSION 1.0.6 LANGUAGES CXX)
+project(exhale VERSION 1.0.7 LANGUAGES CXX)

 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
    set(CMAKE_BUILD_TYPE Release
--- a/README.md
+++ b/README.md
@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
 similar to the 3-clause BSD license but modified to address specific
 aspects dictated by the nature and the output of this application.

-The license text and release notes for the current version 1.0.6 can
+The license text and release notes for the current version 1.0.7 can
 be found in the `include` subdirectory of the exhale distribution.


--- a/include/Release.htm
+++ b/include/Release.htm
@ -25,9 +25,14 @@
 <td valign="top">

 <h1><br><span class="pink">exhale</span> - <span class="pink">e</span>codis e<span class="pink">x</span>tended <span class="pink">h</span>igh-efficiency <span class="pink">a</span>nd <span class="pink">l</span>ow-complexity <span class="pink">e</span>ncoder<br><span class="gray"><sup><br>Software Release Notes, Version History, Known Issues, Upcoming Feature Roadmap</sup></span><br><br></h1>
-<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.6</b> (official pub&shy;lic minor release) from July 30, 2020. Please check <a href="http://www.ecodis.de/audio.htm#mpeg">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
+<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.7</b> (official pub&shy;lic minor release) from August 2020. Please check <a href="http://www.ecodis.de/audio.htm#mpeg">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
 <h3><br><b>Chronological Version History</b></h3>
-<h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020, this release</span></b></h3>
+<h3>&nbsp; &nbsp;Version <b>1.0.7 <span class="gray">&nbsp;Aug. 2020, this release</span></b></h3>
+<ul>
+ <li><h3>minor bugfixes in bit-rate control and higher-rate coding at 32 kHz sampling rate</h3></li>
+ <li><h3>exhaleApp: write complete MP4 &laquo;stss&raquo; data for improved compatibility (issue 13)</h3></li>
+</ul>
+<h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020</span></b></h3>
 <ul>
 <li><h3>bugfixes, improved quality on some transient signals, better decoder compatibility</h3></li>
 <li><h3>exhaleApp: support for Extensible WAVE format, write MP4 &laquo;prol&raquo; data (issue 10)</h3></li>
@ -85,19 +90,19 @@
 <h3><br><b>Known Issues with This Release</b></h3>
 <h3>&nbsp; &nbsp;If you notice an issue with this release <b>not</b> mentioned below, please contact ecodis or a contributor with the details (configuration, input file) needed to reproduce the issue.</h3>
 <ul>
- <li><h3>exhaleLib: Coding of stereo or multichannel input occasionally leads to slightly in&shy;creased bit-rates because the predictive joint-channel coding provided by ISO/IEC <a href="https://www.iso.org/standard/76385.html">23003-3</a> has not been fully implemented. See the functionality roadmap below.</h3></li>
+ <li><h3>exhaleApp: At the end of each encoding run, the MPEG-4 file header with seeking information is written to the beginning of the output file to maximize compatibility in audio streaming applications. Since such a file operation is not supported with pipe based output, encoding to stdout instead of files is currently not supported.</h3></li>
 <li><h3>exhaleApp: Only basic WAVE input file reading functionality has been implemen&shy;ted. Specifically, 8-bit WAVE input is assumed to contain an even number of audio samples, and ITU-R <a href="https://www.itu.int/rec/R-REC-BS.2088/en">BS.2088</a> (RF64, Broadcast WAVE) files are not supported.</h3></li>
 </ul>
 <h3><br><b>Roadmap of Upcoming Features</b></h3>
 <h3>&nbsp; &nbsp;If you are in need of an additional library or application feature <b>not</b> mentioned below, please contact ecodis or a contributor with a request, and we will see what we can do.</h3>
 <ul>
 <li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
- <li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.7</h3></li>
+ <li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.1.x</h3></li>
 <li><h3>exhaleLib: finalization of support for 3.0&#x2013;5.1 multichannel coding, no version plan</h3></li>
 <li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li>
 </ul>
 <h3><br></h3>
-<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.6, July 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
+<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.7, August 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>

 </td>
 <td valign="top" colspan="2">
--- a/include/version.h
+++ b/include/version.h
@ -15,5 +15,5 @@
 # define EXHALELIB_VERSION_MINOR "0"
 #endif
 #ifndef EXHALELIB_VERSION_BUGFIX
-# define EXHALELIB_VERSION_BUGFIX ".6" // "RC" or ".0", ".1", ...
+# define EXHALELIB_VERSION_BUGFIX ".7" // "RC" or ".0", ".1", ...
 #endif
--- a/src/app/exhaleApp.rc
+++ b/src/app/exhaleApp.rc
@ -13,7 +13,7 @@

 0 ICON "exhaleApp.ico"
 VS_VERSION_INFO VERSIONINFO
-FILEVERSION 1,0,6
+FILEVERSION 1,0,7
 BEGIN
  BLOCK "StringFileInfo"
  BEGIN
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@ -432,9 +432,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
    const SfbGroupData& grpData = *groupData[ch];
    const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
    const bool    eightShorts = (grpData.numWindowGroups != 1);
-#if 1
    const bool  lowRateTuning = (samplingRate >= 27713) && (sfm[ch] <= (SCHAR_MAX >> 1));
-#endif
    const uint32_t*   rms = grpData.sfbRmsValues;
    uint32_t*   stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];

@ -453,7 +451,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
      uint64_t  s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);

      memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t));
-#if 1
+
      if ((m_rateIndex == 0) && lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
      {
        uint32_t numRedBands = nSamplesInFrame; // final result lies between 1/4 and 1/2
@ -471,7 +469,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
          grpStepSizes[b] = __max (grpStepSizes[b], grpRms[b] >= (UINT_MAX >> 1) ? UINT_MAX : (grpRms[b] + 1) << 1);
        }
      }
-#endif
+
      // undercoding reduction for case where large number of coefs is quantized to zero
      for (b = 0; b < maxSfbInCh; b++)
      {
@ -520,14 +518,15 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
      {
        grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15);
        if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11);
-#if 1
-        if ((m_rateIndex == 0) && lowRateTuning)
+
+        if ((m_rateIndex == 0) && lowRateTuning) // clip near-zero SNRs to a minimum SNR
        {
-          if ((grpStepSizes[b] > grpRms[b]) && ((grpStepSizes[b] >> 1) <= grpRms[b])) grpStepSizes[b] = grpRms[b];
+          const uint32_t rms = uint32_t ((grpRms[b] * (8192u - (uint64_t) sfm[ch] * sfm[ch]) + (1u << 12)) >> 13);
+
+          if ((grpStepSizes[b] > grpRms[b]) && ((grpStepSizes[b] >> 1) <= rms)) grpStepSizes[b] = grpRms[b];
        }
-#endif
      }
-    } // for gr
+    }
  } // for ch

  return 0; // no error
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -838,7 +838,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
      const TnsData&  tnsData1 = coreConfig.tnsData[1];
      uint8_t realOnlyStartSfb = (eightShorts0 ? m_numSwbShort : m_numSwbLong) - __max (tnsData0.filterLength[0], tnsData1.filterLength[0]);

-      if (coreConfig.commonWindow && (coreConfig.stereoMode == 0) && (m_perCorrHCurr[el] > SCHAR_MAX || m_perCorrLCurr[el] > (UCHAR_MAX * 3) / 4))
+      if (coreConfig.commonWindow && (coreConfig.stereoMode == 0) && (m_perCorrHCurr[el] > SCHAR_MAX || m_perCorrLCurr[el] > (UCHAR_MAX * 5) / 8))
      {
        coreConfig.stereoMode = 1;
      }
@ -929,8 +929,11 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
                                                         (coreConfig.stereoConfig & 2) > 0, realOnlyStartSfb,
                                                         &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS *  ci],
                                                         &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);
-        if (errorValue == 2) // signal M/S with complex prediction
+        if (errorValue >= 2) // signal M/S with complex prediction
        {
+#if SP_MDST_PRED
+          coreConfig.stereoConfig |= errorValue - 2; // cplx coefs
+#endif
          coreConfig.stereoMode += 2; errorValue = 0;
        }
        m_specAnaCurr[ci    ] = (m_specAnaCurr[ci    ] & (UINT_MAX - 65504)) | peakIndexSte;
@ -952,7 +955,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 #if !RESTRICT_TO_AAC
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
 #endif
-        const uint16_t mSfmFac = UCHAR_MAX - (((16u + (m_bitRateMode >> 1)) * meanSpecFlat[ci]) >> 5);
+        const uint16_t mSfmSqr = (m_bitRateMode < 2 && samplingRate >= 27713 ? ((uint16_t) meanSpecFlat[ci] * meanSpecFlat[ci]) >> m_bitRateMode : 0);
+        const uint16_t mSfmFac = 256u - (((32u + m_bitRateMode) * ((uint32_t) meanSpecFlat[ci] << 4) - mSfmSqr + (1u << 9)) >> 10);
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];

        memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -1713,7 +1717,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
          if ((winSeq0 != initialWs0) && (winSeq0 == EIGHT_SHORT))
          {
 #if !RESTRICT_TO_AAC
-            if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2))
+            if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2) &&
+                (abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
            {
              winSeq0 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
            }
@ -1724,7 +1729,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
          if ((winSeq1 != initialWs1) && (winSeq1 == EIGHT_SHORT))
          {
 #if !RESTRICT_TO_AAC
-            if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2))
+            if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2) &&
+                (abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
            {
              winSeq1 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
            }
--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@ -77,9 +77,9 @@ static inline void   setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
 StereoProcessor::StereoProcessor ()
 {
 #if SP_OPT_ALPHA_QUANT
-  memset (m_randomIntMemRe, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
+  memset (m_randomIntMemRe, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
 # if SP_MDST_PRED
-  memset (m_randomIntMemIm, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
+  memset (m_randomIntMemIm, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
 # endif
 #endif
  memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
@ -132,7 +132,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
 #if SP_OPT_ALPHA_QUANT
  if ((bitRateMode >= 4) && eightShorts) // reset quantizer dither memory in short transform
  {
-    for (uint16_t sfb = 0; sfb < MAX_NUM_SWB_LONG / 2; sfb++)
+    for (uint16_t sfb = 0; sfb <= MAX_NUM_SWB_LONG / 2; sfb++)
    {
      m_randomIntMemRe[sfb] = (1 << 30);
 # if SP_MDST_PRED
@ -351,7 +351,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
          }
          sfbTempVar *= sfbTempVar;  // account for residual RMS reduction due to prediction
 #if SP_MDST_PRED
-          if (bitRateMode > 1) sfbTempVar += alphaLimit * alphaLimit;  // including alpha_im
+          if (bitRateMode > 0) sfbTempVar += alphaLimit * alphaLimit;  // including alpha_im
 #endif
          for (b = sfbIsOdd; b >= 0; b--)
          {
@ -501,15 +501,17 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
  if (numSfbPredSte == 0) // discard prediction coefficients and stay with legacy M/S stereo
  {
    if (applyPredSte)
-    for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
    {
-      uint8_t* const grpSData = &sfbStereoData[numSwbFrame * gr];
-
-      for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
+      for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
      {
-        if (grpSData[sfb] > 0) grpSData[sfb] = 16;
+        uint8_t* const grpSData = &sfbStereoData[numSwbFrame * gr];
+
+        for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
+        {
+          if (grpSData[sfb] > 0) grpSData[sfb] = 16;
+        }
+        if (numSwbFrame > maxSfbSte) memset (&grpSData[maxSfbSte], (useFullFrameMS ? 16 : 0), (numSwbFrame - maxSfbSte) * sizeof (uint8_t));
      }
-      if (numSwbFrame > maxSfbSte) memset (&grpSData[maxSfbSte], (useFullFrameMS ? 16 : 0), (numSwbFrame - maxSfbSte) * sizeof (uint8_t));
    }
  }
  else // at least one "significant" prediction band, apply prediction and update RMS values
@ -634,5 +636,5 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
    numSfbPredSte = 2;
  }

-  return (numSfbPredSte); // no error
+  return numSfbPredSte; // no error
 }
--- a/src/lib/stereoProcessing.h
+++ b/src/lib/stereoProcessing.h
@ -39,9 +39,9 @@ private:
 #endif
 #if SP_OPT_ALPHA_QUANT
  std::minstd_rand m_randomInt32;
-  int32_t m_randomIntMemRe[MAX_NUM_SWB_LONG / 2];
+  int32_t m_randomIntMemRe[1+MAX_NUM_SWB_LONG/2];
 # if SP_MDST_PRED
-  int32_t m_randomIntMemIm[MAX_NUM_SWB_LONG / 2];
+  int32_t m_randomIntMemIm[1+MAX_NUM_SWB_LONG/2];
 # endif
 #endif
  uint8_t m_stereoCorrValue[1024 >> SA_BW_SHIFT]; // one value for every 32 spectral coefficients