more mode 1 tuning

2025-02-03 00:47:37 +01:00 · 2020-05-17 15:00:00 +02:00 · 2020-05-17 15:00:00 +02:00 · 3c5fb0b0dd
commit 3c5fb0b0dd
parent ea74e998f3
4 changed files with 68 additions and 18 deletions
--- a/src/app/exhaleApp.cpp
+++ b/src/app/exhaleApp.cpp
@ -304,24 +304,20 @@ int main (const int argc, char* argv[])
      goto mainFinish;  // bad output string
    }

-    if ((variableCoreBitRateMode < 2) && (wavReader.getSampleRate () > 24000))
+    if ((variableCoreBitRateMode < 2) && (wavReader.getSampleRate () > 32000))
    {
-      fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=24 kHz for preset mode %d!\n\n", variableCoreBitRateMode);
+      fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=32 kHz for preset mode %d!\n\n", variableCoreBitRateMode);
      i = 4096; // return value

-      goto mainFinish; // resample to 24 kHz
+      goto mainFinish; // resample to 32 kHz
    }
-    if ((variableCoreBitRateMode < 3) && (wavReader.getSampleRate () > 48000))
+    if ((variableCoreBitRateMode < 4) && (wavReader.getSampleRate () > 48000))
    {
      fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=48 kHz for preset mode %d!\n\n", variableCoreBitRateMode);
      i = 4096; // return value

      goto mainFinish; // resample to 44 kHz
    }
-    if ((variableCoreBitRateMode == 2) && (wavReader.getSampleRate () > 32000))
-    {
-      fprintf_s (stderr, " WARNING: The input sampling rate should be 32 kHz or less for preset mode %d!\n\n", variableCoreBitRateMode);
-    }

    if (outPathEnd == 0) // name has no path
    {
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -726,7 +726,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
    }
    else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
    {
-      const uint32_t redFactor = (m_bitRateMode < 3 ? 2 : 3) - (coreConfig.stereoConfig >> 3);
+      const uint32_t redFactor = __max ((samplingRate < 27713 ? 2 : 1), __min (3, m_bitRateMode)) - (coreConfig.stereoConfig >> 3);
      const bool  eightShorts0 = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
      const TnsData&  tnsData0 = coreConfig.tnsData[0];
      const TnsData&  tnsData1 = coreConfig.tnsData[1];
@ -818,7 +818,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
                                                         coreConfig.groupingData[0], coreConfig.groupingData[1],
                                                         coreConfig.tnsData[0], coreConfig.tnsData[1],
                                                         numSwbFrame, coreConfig.stereoDataCurr,
-                                                         m_bitRateMode <= 4, coreConfig.stereoMode > 1,
+                                                         m_bitRateMode, coreConfig.stereoMode > 1,
                                                         (coreConfig.stereoConfig & 2) > 0, realOnlyStartSfb,
                                                         &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS *  ci],
                                                         &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);
--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@ -76,6 +76,12 @@ static inline void   setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
 // constructor
 StereoProcessor::StereoProcessor ()
 {
+#if SP_OPT_ALPHA_QUANT
+  memset (m_randomIntMemRe, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
+# if SP_MDST_PRED
+  memset (m_randomIntMemIm, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
+# endif
+#endif
  memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
 }

@ -85,7 +91,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
                                                SfbGroupData&  groupingData1, SfbGroupData&  groupingData2,
                                                const TnsData&   filterData1, const TnsData&   filterData2,
                                                const uint8_t    numSwbFrame, uint8_t* const sfbStereoData,
-                                                const bool    usePerCorrData, const bool    useFullFrameMS,
+                                                const uint8_t    bitRateMode, const bool    useFullFrameMS,
                                                const bool    reversePredDir, const uint8_t realOnlyOffset,
                                                uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
 {
@ -94,7 +100,10 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
  const SfbGroupData& grp = groupingData1;
  const bool  eightShorts = (grp.numWindowGroups > 1);
  const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
-  const bool  perCorrData = (usePerCorrData && !eightShorts); // use perceptual correlation?
+  const bool  perCorrData = ((bitRateMode <= 4) && !eightShorts); // perceptual correlation?
+#if SP_OPT_ALPHA_QUANT
+  const bool  quantDither = ((bitRateMode >= 4) && !eightShorts); // quantization dithering?
+#endif
  uint32_t rmsSfbL[2] = {0, 0}, rmsSfbR[2] = {0, 0};
  uint32_t  numSfbPredSte = 0; // counter
 #if SP_SFB_WISE_STEREO
@ -120,6 +129,18 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
 #endif

  if (applyPredSte && perCorrData) memcpy (m_stereoCorrValue, sfbStereoData, (grp.sfbOffsets[numSwbFrame] >> SA_BW_SHIFT) * sizeof (uint8_t));
+#if SP_OPT_ALPHA_QUANT
+  if ((bitRateMode >= 4) && eightShorts) // reset quantizer dither memory in short transform
+  {
+    for (uint16_t sfb = 0; sfb < MAX_NUM_SWB_LONG / 2; sfb++)
+    {
+      m_randomIntMemRe[sfb] = (1 << 30);
+# if SP_MDST_PRED
+      m_randomIntMemIm[sfb] = (1 << 30);
+# endif
+    }
+  }
+#endif

  for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
  {
@ -226,7 +247,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
      grpRms1[sfb] = uint32_t ((sumAbsValM + (sfbWidth >> 1)) / sfbWidth);
      grpRms2[sfb] = uint32_t ((sumAbsValS + (sfbWidth >> 1)) / sfbWidth);

-      if (applyPredSte) sfbStereoData[sfb + grOffset] = 16; // initialize to alpha_q to zero
+      if (applyPredSte) sfbStereoData[sfb + grOffset] = 16; // initialize alpha_q_.. to zero

      if ((sfbIsOdd) || (sfb + 1 == maxSfbSte)) // finish pair
      {
@ -271,7 +292,18 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
          sfbTempVar = CLIP_PM ((double) sumPrdReAReB / (double) sumPrdReAReA, alphaLimit);
 #if SP_OPT_ALPHA_QUANT
          b = __max (512, 524 - int32_t (abs (10.0 * sfbTempVar))); // rounding optimization
-          b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0 ? -0.0009765625 : 0.0009765625));
+# if 1
+          if (quantDither)
+          {
+            const int32_t r = (int32_t) m_randomInt32 ();
+            const double dr = 10.0 * sfbTempVar + (r - m_randomIntMemRe[sfbEv >> 1]) * SP_DIV;
+
+            b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
+            m_randomIntMemRe[sfbEv >> 1] = r;
+          }
+          else
+# endif
+          b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0.0 ? -0.0009765625 : 0.0009765625));
 #else
          b = int32_t (10.0 * sfbTempVar + (sfbTempVar < 0 ? -0.5 : 0.5));// nearest integer
 #endif
@ -280,13 +312,24 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
          alphaLimit = CLIP_PM ((double) sumPrdImAReB / (double) sumPrdImAImA, alphaLimit);
 # if SP_OPT_ALPHA_QUANT
          b = __max (512, 524 - int32_t (abs (10.0 * alphaLimit))); // rounding optimization
-          b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0 ? -0.0009765625 : 0.0009765625));
+#  if 1
+          if (quantDither)
+          {
+            const int32_t r = (int32_t) m_randomInt32 ();
+            const double dr = 10.0 * alphaLimit + (r - m_randomIntMemIm[sfbEv >> 1]) * SP_DIV;
+
+            b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
+            m_randomIntMemIm[sfbEv >> 1] = r;
+          }
+          else
+#  endif
+          b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0.0 ? -0.0009765625 : 0.0009765625));
 # else
          b = int32_t (10.0 * alphaLimit + (alphaLimit < 0 ? -0.5 : 0.5));// nearest integer
 # endif
          if (sfbEv + 1 < numSwbFrame)
          sfbStereoData[sfbEv + 1 + grOffset] = uint8_t (b + 16); // save initial alpha_q_im
-#endif
+#endif // SP_MDST_PRED

          if (perCorrData && ((offEv & (SA_BW - 1)) == 0) && ((width & (SA_BW - 1)) == 0))
          {
@ -304,7 +347,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
          }
          sfbTempVar *= sfbTempVar;  // account for residual RMS reduction due to prediction
 #if SP_MDST_PRED
-          sfbTempVar += alphaLimit * alphaLimit; // including complex prediction by alpha_im
+          if (bitRateMode > 1) sfbTempVar += alphaLimit * alphaLimit;  // including alpha_im
 #endif
          for (b = sfbIsOdd; b >= 0; b--)
          {
--- a/src/lib/stereoProcessing.h
+++ b/src/lib/stereoProcessing.h
@ -13,6 +13,7 @@

 #include "exhaleLibPch.h"
 #include "specAnalysis.h" // for SA_BW... constants
+#include <random>

 // constants, experimental macros
 #define SP_0_DOT_1_16BIT     6554
@ -20,6 +21,9 @@
 #define SP_MDST_PRED            1
 #define SP_OPT_ALPHA_QUANT      1 // quantize alpha_q minimizing RMS distortion in louder channel
 #define SP_SFB_WISE_STEREO      1
+#if SP_OPT_ALPHA_QUANT
+# define SP_DIV (1.0 / 4294967296.0)
+#endif

 // joint-channel processing class
 class StereoProcessor
@ -32,6 +36,13 @@ private:
  int32_t m_originBandMdct2[320];
  int32_t m_originBandMdst1[320];
  int32_t m_originBandMdst2[320];
+#endif
+#if SP_OPT_ALPHA_QUANT
+  std::minstd_rand m_randomInt32;
+  int32_t m_randomIntMemRe[MAX_NUM_SWB_LONG / 2];
+# if SP_MDST_PRED
+  int32_t m_randomIntMemIm[MAX_NUM_SWB_LONG / 2];
+# endif
 #endif
  uint8_t m_stereoCorrValue[1024 >> SA_BW_SHIFT]; // one value for every 32 spectral coefficients

@ -47,7 +58,7 @@ public:
                                 SfbGroupData&  groupingData1, SfbGroupData&  groupingData2,
                                 const TnsData&   filterData1, const TnsData&   filterData2,
                                 const uint8_t    numSwbFrame, uint8_t* const sfbStereoData,
-                                 const bool    usePerCorrData, const bool    useFullFrameMS,
+                                 const uint8_t    bitRateMode, const bool    useFullFrameMS,
                                 const bool    reversePredDir, const uint8_t realOnlyOffset,
                                 uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2);
 }; // StereoProcessor