fixes, finish tuning

2020-07-25 14:00:14 +02:00 · 2020-07-25 14:00:14 +02:00 · f8ad0b34d7
parent 86ba7b8af6
commit f8ad0b34d7
5 changed files with 53 additions and 24 deletions
--- a/include/Release.htm
+++ b/include/Release.htm
@ -25,9 +25,16 @@
 <td valign="top">

 <h1><br><span class="pink">exhale</span> - <span class="pink">e</span>codis e<span class="pink">x</span>tended <span class="pink">h</span>igh-efficiency <span class="pink">a</span>nd <span class="pink">l</span>ow-complexity <span class="pink">e</span>ncoder<br><span class="gray"><sup><br>Software Release Notes, Version History, Known Issues, Upcoming Feature Roadmap</sup></span><br><br></h1>
-<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.5</b> (official pub&shy;lic minor release) from June 30, 2020. Please check <a href="http://www.ecodis.de">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
+<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.6</b> (official pub&shy;lic minor release) from July 30, 2020. Please check <a href="http://www.ecodis.de/audio.htm#mpeg">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
 <h3><br><b>Chronological Version History</b></h3>
-<h3>&nbsp; &nbsp;Version <b>1.0.5 <span class="gray">&nbsp;June 2020, this release</span></b></h3>
+<h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020, this release</span></b></h3>
+<ul>
+ <li><h3>bugfixes, improved quality on some transient signals, better decoder compatibility</h3></li>
+ <li><h3>exhaleApp: support for Extensible WAVE format, write MP4 &laquo;prol&raquo; data (issue 10)</h3></li>
+ <li><h3>exhaleApp: automatic downsampling of 48-kHz input to 32 kHz for CVBR mode 1</h3></li>
+ <li><h3>exhaleLib: fine-tuning of psychoacoustic model for difficult transient input signals</h3></li>
+</ul>
+<h3>&nbsp; &nbsp;Version <b>1.0.5 <span class="gray">&nbsp;June 2020</span></b></h3>
 <ul>
 <li><h3>slightly reduced bit-rates with lower modes, better compatibility when using stdin</h3></li>
 <li><h3>exhaleApp: support for Unicode text on Windows&trade;, 44100 Hz with CVBR mode 1</h3></li>
@ -79,18 +86,18 @@
 <h3>&nbsp; &nbsp;If you notice an issue with this release <b>not</b> mentioned below, please contact ecodis or a contributor with the details (configuration, input file) needed to reproduce the issue.</h3>
 <ul>
 <li><h3>exhaleLib: Coding of stereo or multichannel input occasionally leads to slightly in&shy;creased bit-rates because the predictive joint-channel coding provided by ISO/IEC <a href="https://www.iso.org/standard/76385.html">23003-3</a> has not been fully implemented. See the functionality roadmap below.</h3></li>
- <li><h3>exhaleApp: Only basic WAVE input file reading functionality has been implemen&shy;ted. Specifically, 8-bit WAVE input is assumed to contain an even number of audio samples, and ITU-R <a href="https://www.itu.int/rec/R-REC-BS.2088/en">BS.2088</a> (RF64) or Extensible WAVE files are not supported.</h3></li>
+ <li><h3>exhaleApp: Only basic WAVE input file reading functionality has been implemen&shy;ted. Specifically, 8-bit WAVE input is assumed to contain an even number of audio samples, and ITU-R <a href="https://www.itu.int/rec/R-REC-BS.2088/en">BS.2088</a> (RF64, Broadcast WAVE) files are not supported.</h3></li>
 </ul>
 <h3><br><b>Roadmap of Upcoming Features</b></h3>
 <h3>&nbsp; &nbsp;If you are in need of an additional library or application feature <b>not</b> mentioned below, please contact ecodis or a contributor with a request, and we will see what we can do.</h3>
 <ul>
 <li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
- <li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.6</h3></li>
+ <li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.7</h3></li>
 <li><h3>exhaleLib: finalization of support for 3.0&#x2013;5.1 multichannel coding, no version plan</h3></li>
 <li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li>
 </ul>
 <h3><br></h3>
-<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.5, June 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
+<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.6, July 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>

 </td>
 <td valign="top" colspan="2">
--- a/src/app/basicMP4Writer.cpp
+++ b/src/app/basicMP4Writer.cpp
@ -11,6 +11,7 @@

 #include "exhaleAppPch.h"
 #include "basicMP4Writer.h"
+#include "version.h"

 #if 0 // DEBUG
 static const uint8_t muLawHeader[44] = {
@ -317,7 +318,24 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
  m_dynamicHeader.push_back (0x61); m_dynamicHeader.push_back (0x74); // mdat
  for (uint32_t pNdx = 0; pNdx < headerPaddingLength; pNdx++)
  {
-    m_dynamicHeader.push_back (0x00); // add padding bytes. TODO: ver string?
+    if (pNdx == 0)  // add padding byte with library version
+    {
+      const char ver[] = EXHALELIB_VERSION_MAJOR "." EXHALELIB_VERSION_MINOR EXHALELIB_VERSION_BUGFIX;
+      const int verInt = (ver[0] - 0x30) * 100 + (ver[2] - 0x30) * 10 + (ver[4] - 0x30);
+
+      m_dynamicHeader.push_back (__max (0, __min (UCHAR_MAX, verInt)));
+    }
+    else if (pNdx == 1) // add 8-bit cyclic redundancy check
+    {
+      uint8_t crc8 = m_dynamicHeader.back(); // Baicheva '98
+
+      for (uint16_t i = 8; i > 0; i--) if (crc8 & 0x80) crc8 = (crc8 << 1) ^ 0x2F; else crc8 <<= 1;
+      m_dynamicHeader.push_back (crc8); // add padding CRC-8
+    }
+    else
+    {
+      m_dynamicHeader.push_back (0x00); // add padding bytes
+    }
  }

  _SEEK (m_fileHandle, 0, 0 /*SEEK_SET*/);  // back to start
@ -346,7 +364,9 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for

  return _WRITE (m_fileHandle, m_staticHeader, 44);
 #else
-  const bool flushFrameUsed = ((audioLength + m_pregapLength) % m_frameLength) > 0;
+  /* NOTE: the following condition is, as far as I can tell, correct, but some decoders with DRC processing
+  may decode too few samples with it. Hence, I disabled it. See also corresponding NOTE in exhaleApp.cpp */
+  const bool flushFrameUsed = true; // ((audioLength + m_pregapLength) % m_frameLength) > 0;
  const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1);
  const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod);
  const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod);
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@ -51,14 +51,15 @@ static void jndPowerLawAndPeakSmoothing (uint32_t* const  stepSizes, const unsig
  stepSizes[0] = __min (stepSizeM1, stepSizes[0]); // `- becomes --
  for (/*b*/; b < nStepSizes; b++)
  {
+    const uint64_t oneMinusB = 128 - b;
    const uint32_t stepSizeB = jndModel (stepSizes[b], avgStepSize, expTimes512, mulTimes512);

    if ((stepSizeM3 <= stepSizeM2) && (stepSizeM3 <= stepSizeM1) && (stepSizeB <= stepSizeM2) && (stepSizeB <= stepSizeM1))
    {
      const uint32_t maxM3M0 = __max (stepSizeM3, stepSizeB); // smoothen local spectral peak of _´`- shape

-      stepSizes[b - 2] = __min (maxM3M0, stepSizes[b - 2]); // _-`-
-      stepSizes[b - 1] = __min (maxM3M0, stepSizes[b - 1]); // _---
+      stepSizes[b - 2] = uint32_t ((b * (uint64_t) stepSizes[b - 2] + oneMinusB * __min (maxM3M0, stepSizes[b - 2]) + 64) >> 7); // _-`-
+      stepSizes[b - 1] = uint32_t ((b * (uint64_t) stepSizes[b - 1] + oneMinusB * __min (maxM3M0, stepSizes[b - 1]) + 64) >> 7); // _---
    }
    stepSizeM3 = stepSizeM2;
    stepSizeM2 = stepSizeM1;
@ -175,9 +176,9 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
  // equal-loudness weighting based on data from: K. Kurakata, T. Mizunami, and K. Matsushita, "Percentiles
  // of Normal Hearing-Threshold Distribution Under Free-Field Listening Conditions in Numerical Form," Ac.
  // Sci. Tech, vol. 26, no. 5, pp. 447-449, Jan. 2005, https://www.researchgate.net/publication/239433096.
-  const unsigned HF/*idx*/= ((123456 - samplingRate) >> 11) + (samplingRate <= 34150 ? 2 : 0); // start SFB
+  const unsigned HF/*idx*/= ((123456 - samplingRate) >> 11) + (samplingRate < 37566 ? 2 : 0);  // start SFB
  const unsigned LF/*idx*/= 9;
-  const unsigned MF/*idx*/= (samplingRate < 28800 ? HF : __min (HF, 30u));
+  const unsigned MF/*idx*/= (samplingRate < 27713 ? HF : __min (HF, 30u));
  const unsigned msShift  = (samplingRate + 36736) >> 15; // TODO: 768 smp
  const unsigned msOffset = 1 << (msShift - 1);
  uint32_t nMeans = 0, sumMeans = 0;
@ -291,7 +292,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
      maskingSlope = (stepSizes[b - 1] + msOffset) >> msShift;
      stepSizes[b] = __max (rms[b], maskingSlope + BA_EPS);
    }
-    if ((samplingRate >= 28800) && (samplingRate <= 64000))
+    if ((samplingRate >= 27713) && (samplingRate < 75132))
    {
      for (/*b*/; b < __min (HF, maxSfbInCh); b++) // compensate high-frequency slopes for linear SFB width
      {
@ -346,7 +347,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA

    jndPowerLawAndPeakSmoothing (stepSizes, maxSfbInCh, m_avgStepSize[ch], m_avgSpecFlat[ch], tnsDisabled ? m_avgTempFlat[ch] : 0);

-    if ((samplingRate >= 28800) && (samplingRate <= 64000))
+    if ((samplingRate >= 27713) && (samplingRate < 75132))
    {
      elw = 36; // 36/32 = 9/8
      for (b = HF; b < maxSfbInCh; b++)  // undo above additional high-frequency equal-loudness attenuation
@ -457,11 +458,11 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
        const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
        const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];

-        if (redWeight > 0 && !eightShorts && sfbWidth > 12) // further reduce step-sizes of transient bands
+        if (redWeight > 0 && !eightShorts && sfbWidth > (samplingRate >= 18783 ? 8 : 12)) // transient SFBs
        {
          const uint32_t gains = m_tnsPredictor->calcParCorCoeffs (&mdctSpec[ch][grpOff[b]], sfbWidth, MAX_PREDICTION_ORDER, tempCoeffs) >> 24;

-          m_tempSfbValue[b] = UCHAR_MAX - uint8_t ((512u + gains * gains * redWeight) >> (sfbWidth > 16 ? 10 : 11));
+          m_tempSfbValue[b] = UCHAR_MAX - uint8_t ((512u + gains * gains * redWeight) >> (10 + (sfbWidth > 16 ? 0 : (20 - sfbWidth) >> 2)));
          if ((b >= 2) && (m_tempSfbValue[b - 1] < m_tempSfbValue[b]) && (m_tempSfbValue[b - 1] < m_tempSfbValue[b - 2]))
          {
            m_tempSfbValue[b - 1] = __min (m_tempSfbValue[b], m_tempSfbValue[b - 2]); // remove local peaks
@ -477,7 +478,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
        }
      }

-      if ((samplingRate > 27712) && (b < maxSfbL16k) && !eightShorts) // zeroed HF coefs
+      if ((samplingRate >= 27713) && (b < maxSfbL16k) && !eightShorts) // zeroed HF data
      {
        const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
        const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -717,7 +717,7 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u
            sumAbsOrg += abs (mdctSample);  sumAbsTns += abs (resiSample);
          }
        }
-        if (sumAbsOrg * 9 <= sumAbsTns * 8) break; // band SNR was reduced by more than 1 dB
+        if (sumAbsOrg * 17 <= sumAbsTns * 16) break; // band SNR reduced by more than 0.5 dB
      }
      m_specAnaCurr[channelIndex] = (m_specAnaCurr[channelIndex] & (UINT_MAX - 31)) | (b + 1);
    } // if order > 0
@ -786,7 +786,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
  const unsigned samplingRate    = toSamplingRate (m_frequencyIdx);
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  const uint32_t maxSfbLong      = (samplingRate < 37566 ? MAX_NUM_SWB_LONG : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
-  const uint64_t scaleSr         = (samplingRate < 27713 ? (samplingRate < 24000 ? 32 : 34) - __min (3, m_bitRateMode) : 37) - (nChannels >> 1);
+  const uint64_t scaleSr         = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3, m_bitRateMode)
+                                                         : (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
  const uint64_t scaleBr         = (m_bitRateMode == 0 ? __min (32, 3 + (samplingRate >> 10) + (samplingRate >> 13) - (nChannels >> 1))
                                   : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
  uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
@ -947,12 +948,12 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
      {
        SfbGroupData&  grpData = coreConfig.groupingData[ch];
        const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
-        const bool saveBitRate = (meanSpecFlat[ci] > SCHAR_MAX && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
+        const bool saveBitRate = (meanSpecFlat[ci] > (UCHAR_MAX * 3) / 4 && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
        const uint8_t maxSfbCh = grpData.sfbsPerGroup;
 #if !RESTRICT_TO_AAC
        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
 #endif
-        const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4);
+        const uint16_t mSfmFac = UCHAR_MAX - (((16u + (m_bitRateMode >> 1)) * meanSpecFlat[ci]) >> 5);
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];

        memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -988,7 +989,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 #ifndef NO_DTX_MODE
          const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);

-          if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000) && eightShorts)
+          if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
          {
            for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13))));
          }
@ -1002,7 +1003,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 #ifndef NO_DTX_MODE
              const uint32_t*  grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];

-              if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000))
+              if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713))
              {
                const uint32_t*  refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
                uint8_t*  grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
@ -1043,7 +1044,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          }
        }
 #ifndef NO_DTX_MODE
-        else if (m_noiseFilling[el] && (m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000))
+        else if (m_noiseFilling[el] && (m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713))
        {
          for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS;
        }
--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@ -69,7 +69,7 @@ static inline void   setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
    const double rat = __min (1.0, grpStepSizes1[idx] / (sfbRmsL * 2.0)) * __min (1.0, grpStepSizes2[idx] / (sfbRmsR * 2.0)) * sfbFacLR;

    grpStepSizes1[idx] = grpStepSizes2[idx] = uint32_t (__max (SP_EPS, (min > rat * sfbMaxMS ? sqrt (rat * sfbMaxMS * min) :
-                                                                        __min (1.0, rat) * sfbMaxMS)) + 0.5);
+                                                                        __max (1.0/2048.0, __min (1.0, rat)) * sfbMaxMS)) + 0.5);
  }
 }