prepare stereo pred.

2025-03-12 01:00:11 +01:00 · 2020-03-29 01:00:24 +01:00 · 2020-03-29 01:00:24 +01:00 · 036d9b7d20
commit 036d9b7d20
parent 5ceb1a0959
7 changed files with 84 additions and 35 deletions
--- a/src/lib/bitAllocation.h
+++ b/src/lib/bitAllocation.h
@ -1,5 +1,5 @@
 /* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation
- * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
 *
 * The copyright in this software is being made available under a Modified BSD-Style License
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -15,7 +15,7 @@

 // constants, experimental macros
 #define BA_EPS                  1
-#define BA_INTER_CHAN_SIM_MASK  0  // cross-channel simultaneous masking for surround
+#define BA_INTER_CHAN_SIM_MASK  0  // 5.1 cross-channel simultaneous masking

 // class for audio bit-allocation
 class BitAllocator
--- a/src/lib/bitStreamWriter.cpp
+++ b/src/lib/bitStreamWriter.cpp
@ -91,7 +91,7 @@ unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const
            bitCount += 2 + order * coefBits;
          }
        }
-      } // if (n_filt[w])
+      } // if n_filt[w] > 0
    }
  } // for w

@ -264,14 +264,14 @@ unsigned BitStreamWriter::writeFDChannelStream (const CoreCoderData& elData, Ent
        }
      }
    } // for w
-  } // if (maxSfb == 0)
+  } // if maxSfb == 0

  m_auBitStream.write (0, 1); // fac_data_present, no fac_data

  return bitCount;
 }

-unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
+unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder,
 #if !RESTRICT_TO_AAC
                                                   const bool timeWarping,
 #endif
@ -281,6 +281,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
  const IcsInfo& icsInfo1 = elData.icsInfoCurr[1];
  const TnsData& tnsData0 = elData.tnsData[0];
  const TnsData& tnsData1 = elData.tnsData[1];
+  const SfbGroupData& grp = elData.groupingData[0];
  unsigned bitCount = 2, g, b;

  m_auBitStream.write (elData.tnsActive ? 1 : 0, 1); // tns_active
@ -301,7 +302,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
    bitCount += 3;
    if (elData.stereoMode == 1) // write SFB-wise ms_used[][] flag
    {
-      for (g = 0; g < elData.groupingData[0].numWindowGroups; g++)
+      for (g = 0; g < grp.numWindowGroups; g++)
      {
        const uint8_t* const gMsUsed = &elData.stereoData[m_numSwbShort * g];

@ -315,10 +316,12 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
 #if !RESTRICT_TO_AAC
    else if (elData.stereoMode >= 3)  // SFB-wise cplx_pred_data()
    {
+      const bool complexCoef = (elData.stereoConfig & 1);
+
      m_auBitStream.write (elData.stereoMode - 3, 1); // _pred_all
      if (elData.stereoMode == 3)
      {
-        for (g = 0; g < elData.groupingData[0].numWindowGroups; g++)
+        for (g = 0; g < grp.numWindowGroups; g++)
        {
          const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g];

@ -329,9 +332,50 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
        }
        bitCount += ((maxSfbSte + 1) / SFB_PER_PRED_BAND) * g;
      }
-      // pred_dir and complex_coef. TODO: rest of cplx_pred_data()
-      m_auBitStream.write (elData.stereoConfig & 3, 2);
+      m_auBitStream.write (elData.stereoConfig & 3, 2);// pred_dir
      bitCount += 3;
+      if (!indepFlag) // use_prev_frame (&4), delta_code_time (&8)
+      {
+        if (complexCoef)
+        {
+          m_auBitStream.write (elData.stereoConfig & 4 ? 1 : 0, 1);
+          bitCount++;
+        }
+        m_auBitStream.write (elData.stereoConfig & 8 ? 1 : 0, 1);
+        bitCount++;
+      }
+      // TODO: complete the following code for delta_code_time > 0
+      for (g = 0; g < grp.numWindowGroups; g++)
+      {
+        const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g];
+        uint8_t aqReIdxPred = 16, aqImIdxPred = 16; // alpha_q = 0
+
+        for (b = 0; b < maxSfbSte; b += SFB_PER_PRED_BAND)
+        {
+          if (gCplxPredUsed[b] > 0) // write dpcm_alpha_q_re/_q_im
+          {
+            uint8_t aqIdx = gCplxPredUsed[b] & 31; // -15,..0,..15
+            int aqIdxDpcm = (int) aqIdx - aqReIdxPred;
+            unsigned bits = entrCoder.indexGetBitCount (aqIdxDpcm);
+
+            aqReIdxPred = aqIdx;
+            m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits);
+            bitCount += bits;
+
+            if (complexCoef)
+            {
+              aqIdx = gCplxPredUsed[b + 1] & 31; // <32 kHz short!
+              aqIdxDpcm = (int) aqIdx - aqImIdxPred;
+              bits = entrCoder.indexGetBitCount (aqIdxDpcm);
+
+              aqImIdxPred = aqIdx;
+              m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits);
+              bitCount += bits;
+            }
+          }
+          else aqReIdxPred = aqImIdxPred = 16;
+        }
+      } // for g
    }
 #endif
  } // common_window
@ -517,7 +561,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
        m_auBitStream.write (CORE_MODE_FD, 1); // L
        m_auBitStream.write (CORE_MODE_FD, 1); // R
        bitCount += 2;
-        bitCount += writeStereoCoreToolInfo (*elData,
+        bitCount += writeStereoCoreToolInfo (*elData, entropyCoder[ci], // L
 #if !RESTRICT_TO_AAC
                                             tw_mdct[el],
 #endif
--- a/src/lib/bitStreamWriter.h
+++ b/src/lib/bitStreamWriter.h
@ -41,7 +41,7 @@ private:
                                    const bool timeWarping, const bool noiseFilling,
 #endif
                                    const bool indepFlag = false);
-  unsigned writeStereoCoreToolInfo (const CoreCoderData& elData,
+  unsigned writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder,
 #if !RESTRICT_TO_AAC
                                    const bool timeWarping,
 #endif
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -235,7 +235,14 @@ static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const
  return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2);
 }

-#if !SA_IMPROVED_REAL_ABS
+#if SA_IMPROVED_REAL_ABS
+static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2)
+{
+  const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5;
+
+  return uint32_t (meanRoot * meanRoot + 0.5);
+}
+#else
 static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsigned sfbGroup, const unsigned sfbIndex,
                                           const uint8_t numSwb, const TnsData& tnsData)
 {
@ -657,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
  const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
  const uint32_t maxSfbLong      = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
  const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3);  // undercoding reduction
-  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37);
-  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - ((m_bitRateMode - 1) >> 1));
+  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0);
+  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1));
  uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
 //uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS];
@ -817,14 +824,14 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          for (b = 0; b < grpData.sfbsPerGroup; b++)
          {
 #if SA_IMPROVED_REAL_ABS
-            const uint32_t rmsComp = grpRms[b];
+            const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
            const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
 #else
            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
            const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
                                     getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
 #endif
-            if (rmsComp < grpRmsMin) grpRmsMin = rmsComp;
+            if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
            if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
            {
              s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
@ -833,7 +840,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
          if ((samplingRate >= 27713) && (b < maxSfbLong) && !eightShorts)  // uncoded coefs
          {
 #if SA_IMPROVED_REAL_ABS
-            const uint32_t rmsComp = grpRms[b];
+            const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
            const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
 #else
            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
--- a/src/lib/quantization.cpp
+++ b/src/lib/quantization.cpp
@ -774,7 +774,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t
    {
      grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count
    }
-  } // if (sfIndex == 0)
+  } // if sfIndex == 0

  return __min (sfBest, m_maxSfIndex);
 }
--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@ -18,15 +18,7 @@
 // constructor
 StereoProcessor::StereoProcessor ()
 {
-  for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
-  {
-    m_avgAbsHpPrev[ch] = 0;
-    m_maxAbsHpPrev[ch] = 0;
-    m_maxIdxHpPrev[ch] = 1;
-    m_pitchLagPrev[ch] = 0;
-    m_tempAnaStats[ch] = 0;
-    m_transientLoc[ch] = -1;
-  }
+  return;
 }

 // public functions
@ -37,7 +29,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
                                                const uint8_t    numSwbFrame, uint8_t* const sfbStereoData,
                                                uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
 {
-//const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
+  const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
  const uint8_t maxSfbSte = __max (groupingData1.sfbsPerGroup, groupingData2.sfbsPerGroup);

  if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (groupingData1.numWindowGroups != groupingData2.numWindowGroups) ||
@ -104,6 +96,17 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
          sfbNext1++; prevReM = dmixReM;
          sfbNext2++; prevReS = dmixReS;
        }
+        if (sfb + 1 == numSwbFrame) // handle remaining sample
+        {
+          const int32_t dmixReM = int32_t (((int64_t) *sfbMdct1 + (int64_t) *sfbMdct2 + 1) >> 1);
+          const int32_t dmixReS = int32_t (((int64_t) *sfbMdct1 - (int64_t) *sfbMdct2 + 1) >> 1);
+
+          sumAbsValM += abs (dmixReM);
+          sumAbsValS += abs (dmixReS);
+
+          *sfbMdct1 = dmixReM;
+          *sfbMdct2 = dmixReS;
+        }
      }
      else // complex data, both MDCTs and MDSTs are available
      {
@ -152,8 +155,9 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
      {
        double min = __min (grpRms1[sfb], grpRms2[sfb]);
        grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (SP_EPS, (min > sfbRatLR * sfbRmsMaxMS ? sqrt (sfbRatLR * sfbRmsMaxMS *
-                                                                            min) : __min (1.0/*TODO*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
+                                                                            min) : __min (1.0/*0 dB*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
      }
+      if (applyPredSte) sfbStereoData[sfb + numSwbFrame * gr] = 16; // zero prediction coefs
    } // for sfb
  }

--- a/src/lib/stereoProcessing.h
+++ b/src/lib/stereoProcessing.h
@ -22,12 +22,6 @@ class StereoProcessor
 private:

  // member variables
-  unsigned m_avgAbsHpPrev[USAC_MAX_NUM_CHANNELS];
-  unsigned m_maxAbsHpPrev[USAC_MAX_NUM_CHANNELS];
-  unsigned m_maxIdxHpPrev[USAC_MAX_NUM_CHANNELS];
-  unsigned m_pitchLagPrev[USAC_MAX_NUM_CHANNELS];
-  uint32_t m_tempAnaStats[USAC_MAX_NUM_CHANNELS];
-  int16_t  m_transientLoc[USAC_MAX_NUM_CHANNELS];

 public: