prepare M/S stereo

2025-03-12 01:00:11 +01:00 · 2020-03-25 01:00:02 +01:00 · 2020-03-25 01:00:02 +01:00 · e4bc905be2
commit e4bc905be2
parent 7185ac995f
5 changed files with 377 additions and 88 deletions
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@ -117,6 +117,112 @@ static uint32_t quantizeSfbWithMinSnr (const unsigned* const coeffMagn, const ui
 }

 // inline helper functions
+static inline void applyStereoPreProcessingCplx (int32_t* mdctSample1, int32_t* mdctSample2,
+                                                 int32_t* mdstSample1, int32_t* mdstSample2,
+                                                 const int64_t factIn, const int64_t factDe)
+{
+  const int32_t  valI1 = *mdstSample1;
+  const int32_t  valI2 = *mdstSample2;
+  const int32_t  valR1 = *mdctSample1;
+  const int32_t  valR2 = *mdctSample2;
+  const int64_t  absR1 = abs (valR1);
+  const int64_t  absR2 = abs (valR2);
+  int64_t dmxI1, dmxR1 = valR1 * factDe + valR2 * factIn; // cross
+  int64_t dmxI2, dmxR2 = valR1 * factIn + valR2 * factDe; // -talk
+  double n, d;
+
+  if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
+  {
+    if (absR1 * factDe < absR2 * factIn)
+    {
+      dmxR1 = valR2 * factIn - valR1 * factDe;
+      dmxI1 = valI2 * factIn - valI1 * factDe;
+    }
+    else
+    {
+      dmxR1 = valR1 * factDe - valR2 * factIn;
+      dmxI1 = valI1 * factDe - valI2 * factIn;
+    }
+  }
+  else dmxI1 = valI1 * factDe + valI2 * factIn;
+
+  if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
+  {
+    if (absR1 * factIn < absR2 * factDe)
+    {
+      dmxR2 = valR2 * factDe - valR1 * factIn;
+      dmxI2 = valI2 * factDe - valI1 * factIn;
+    }
+    else
+    {
+      dmxR2 = valR1 * factIn - valR2 * factDe;
+      dmxI2 = valI1 * factIn - valI2 * factDe;
+    }
+  }
+  else dmxI2 = valI1 * factIn + valI2 * factDe;
+
+  n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
+  d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
+  *mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5));
+
+  n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
+  d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
+  *mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5));
+}
+
+static inline void applyStereoPreProcessingReal (int32_t* mdctSample1, int32_t* mdctSample2,
+                                                 int32_t* prevSample1, int32_t* prevSample2,
+                                                 const int64_t factIn, const int64_t factDe)
+{
+  const int64_t  valI1 = (*(mdctSample1 + 1) - (int64_t) *prevSample1) >> 1; // estimate, see also
+  const int64_t  valI2 = (*(mdctSample2 + 1) - (int64_t) *prevSample2) >> 1; // getMeanAbsValues()
+  const int32_t  valR1 = (*prevSample1 = *mdctSample1);
+  const int32_t  valR2 = (*prevSample2 = *mdctSample2);
+  const int64_t  absR1 = abs (valR1);
+  const int64_t  absR2 = abs (valR2);
+  int64_t dmxI1, dmxR1 = valR1 * factDe + valR2 * factIn; // cross
+  int64_t dmxI2, dmxR2 = valR1 * factIn + valR2 * factDe; // -talk
+  double n, d;
+
+  if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
+  {
+    if (absR1 * factDe < absR2 * factIn)
+    {
+      dmxR1 = valR2 * factIn - valR1 * factDe;
+      dmxI1 = valI2 * factIn - valI1 * factDe;
+    }
+    else
+    {
+      dmxR1 = valR1 * factDe - valR2 * factIn;
+      dmxI1 = valI1 * factDe - valI2 * factIn;
+    }
+  }
+  else dmxI1 = valI1 * factDe + valI2 * factIn;
+
+  if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
+  {
+    if (absR1 * factIn < absR2 * factDe)
+    {
+      dmxR2 = valR2 * factDe - valR1 * factIn;
+      dmxI2 = valI2 * factDe - valI1 * factIn;
+    }
+    else
+    {
+      dmxR2 = valR1 * factIn - valR2 * factDe;
+      dmxI2 = valI1 * factIn - valI2 * factDe;
+    }
+  }
+  else dmxI2 = valI1 * factIn + valI2 * factDe;
+
+  n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
+  d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
+  *mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5));
+
+  n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
+  d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
+  *mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5));
+}
+
 static inline uint8_t brModeAndFsToMaxSfbLong (const unsigned bitRateMode, const unsigned samplingRate)
 {
  // max. for fs of 44 kHz: band 47 (19.3 kHz), 48 kHz: 45 (19.5 kHz), 64 kHz: 39 (22.0 kHz)
@ -134,8 +240,8 @@ static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsign
                                           const uint8_t numSwb, const TnsData& tnsData)
 {
  // compensate for missing MDST coefficients in RMS calculation of SFBs where TNS is active
-  return ((tnsData.numFilters > 0) && (sfbGroup == tnsData.filteredWindow) && (rmsValue <= UINT_MAX / 3) &&
-          (tnsData.filterLength[0] + sfbIndex >= numSwb) ? (rmsValue * 3u) >> 1 : rmsValue);
+  return ((tnsData.numFilters > 0) && (sfbGroup == tnsData.filteredWindow) && (rmsValue <= UINT_MAX / 5) &&
+          (tnsData.filterLength[0] + sfbIndex >= numSwb) ? (rmsValue * 5u) >> 2 : rmsValue);
 }
 #endif

@ -296,7 +402,6 @@ unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpD
 {
  const uint16_t filtOrder = tnsData.filterOrder[0];
  const uint16_t*    grpSO = &grpData.sfbOffsets[m_numSwbShort * tnsData.filteredWindow];
-  const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
  unsigned errorValue = 0; // no error

  if ((maxSfb > (eightShorts ? 15 : 51)) || (channelIndex >= USAC_MAX_NUM_CHANNELS))
@ -306,7 +411,7 @@ unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpD

  if (filtOrder > 0) // determine TNS filter length in SFBs and apply TNS analysis filtering
  {
-    uint8_t numSwbFrame = (eightShorts ? numSwbOffsetS[m_swbTableIdx] : numSwbOffsetL[m_swbTableIdx]) - 1;
+    const int numSwbWin = (eightShorts ? m_numSwbShort : m_numSwbLong);
    uint8_t tnsMaxBands = tnsScaleFactorBandLimit[eightShorts ? 1 : 0][m_swbTableIdx];
    uint8_t tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx);  // 8-short TNS start

@ -315,22 +420,15 @@ unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpD
      const unsigned samplingRate = toSamplingRate (m_frequencyIdx); // refine TNS_MAX_BANDS
      const unsigned tnsStartOffs = (m_specAnaCurr[channelIndex] & 31) << SA_BW_SHIFT;

-      if ((samplingRate >= 46009) && (samplingRate < 55426)) // ~48kHz
-      {
-        numSwbFrame = 49;
-        tnsMaxBands = 40;
-      }
+      if ((samplingRate >= 46009) && (samplingRate < 55426)) tnsMaxBands = 40; // for 48 kHz
      else
-      if ((samplingRate >= 37566) && (samplingRate < 46009)) // ~44kHz
-      {
-        numSwbFrame = 49;
-        tnsMaxBands = 42;
-      }
+      if ((samplingRate >= 37566) && (samplingRate < 46009)) tnsMaxBands = 42; // & 44.1 kHz
+
      while (grpSO[tnsStartSfb] < tnsStartOffs) tnsStartSfb++;  // start band for TNS filter
    }
    tnsMaxBands = __min (tnsMaxBands, maxSfb);

-    if ((tnsData.filterLength[0] = __max (0, numSwbFrame - (int) tnsStartSfb)) > 0)
+    if ((tnsData.filterLength[0] = __max (0, numSwbWin - tnsStartSfb)) > 0)
    {
      int32_t* const mdctSignal = m_mdctSignals[channelIndex];
      const short offs = grpSO[tnsStartSfb];
@ -604,18 +702,97 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
    }
    else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
    {
+   // if ((coreConfig.stereoMode == 0) && (m_perCorrCurr[el] > SCHAR_MAX)) coreConfig.stereoMode = 1;
+
+      if (coreConfig.commonWindow && (m_perCorrCurr[el] > 128)) // run stereo pre-processing
+      {
+        const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
+        const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]);
+        const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64));
+        const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5);
+        const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0);
+        const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
+
+        for (uint16_t gr = 0; gr < coreConfig.groupingData[0].numWindowGroups; gr++)
+        {
+          const uint8_t grpLength = coreConfig.groupingData[0].windowGroupLength[gr];
+          const uint16_t*  grpOff = &coreConfig.groupingData[0].sfbOffsets[m_numSwbShort * gr];
+          const uint16_t grpStart = grpOff[0] + steppFadeOff * grpLength;
+          int32_t* sigR0 = &m_mdctSignals[ci][grpStart];
+          int32_t* sigR1 = &m_mdctSignals[ci + 1][grpStart];
+          int64_t xTalkI = 0, xTalkD = 0; // weights for crosstalk
+
+          if (coreConfig.tnsActive && (gr == coreConfig.tnsData[0].filteredWindow || gr == coreConfig.tnsData[1].filteredWindow))
+          {
+            const uint16_t maxLen = (eightShorts ? grpOff[m_numSwbShort] - 1 : __min (nSamplesInFrame - 1u, nSamplesMax)) - grpStart;
+            int32_t prevR0 = 0; // NOTE: functions also on grouped
+            int32_t prevR1 = 0; // MDCT spectra, but not properly!
+
+            for (uint16_t w = 0; w < grpLength; w++) // sub-window
+            {
+              prevR0 = *(sigR0++); prevR1 = *(sigR1++); // processing starts at offset of 1!
+              xTalkI = steppWeightI;
+              xTalkD = steppWeightD * (2 * steppFadeLen - 1);
+
+              for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++) // start with fade-in
+              {
+                applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD);
+                xTalkI += steppWeightI;
+                xTalkD -= steppWeightD;
+              }
+            }
+            for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++) // end
+            {
+              applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD);
+            }
+          }
+          else // TNS inactive, both MDCTs and MDSTs are available
+          {
+            const uint16_t maxLen = (eightShorts ? grpOff[m_numSwbShort] : nSamplesMax) - grpStart;
+            int32_t* sigI0 = &m_mdstSignals[ci][grpStart]; // imag
+            int32_t* sigI1 = &m_mdstSignals[ci + 1][grpStart];
+
+            for (uint16_t w = 0; w < grpLength; w++) // sub-window
+            {
+              sigR0++;  sigR1++;  sigI0++;  sigI1++; // processing starts at an offset of 1!
+              xTalkI = steppWeightI;
+              xTalkD = steppWeightD * (2 * steppFadeLen - 1);
+
+              for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
+              {
+                applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD);
+                xTalkI += steppWeightI;
+                xTalkD -= steppWeightD;
+              }
+            }
+            for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
+            {
+              applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD);
+            }
+          } // if coreConfig.tnsActive
+        }
+      } // if coreConfig.commonWindow
+
+      if (coreConfig.stereoMode > 0)  // synch spectral statistics
+      {
+        const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;
+
+        // TODO: M/S matrixing, update of grpData{0,1}.sfbRmsValues and &sfbStepSizes[(ci + {0,1}) * m_numSwbShort * NUM_WINDOW_GROUPS]
+
+        m_specAnaCurr[ci    ] = (m_specAnaCurr[ci    ] & (UINT_MAX - 65504)) | peakIndexSte;
+        m_specAnaCurr[ci + 1] = (m_specAnaCurr[ci + 1] & (UINT_MAX - 65504)) | peakIndexSte;
+        meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
+     // meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
+      }
+
      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        SfbGroupData&  grpData = coreConfig.groupingData[ch];
        const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
+        const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
        const uint8_t  mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
        uint32_t*    stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
-        uint8_t    numSwbFrame = (eightShorts ? numSwbOffsetS[m_swbTableIdx] : numSwbOffsetL[m_swbTableIdx]) - 1;

-        if (!eightShorts && (samplingRate >= 37566) && (samplingRate < 55426)) // fix numSwb
-        {
-          numSwbFrame = 49;
-        }
        memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));

        for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
@ -635,9 +812,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
            const uint32_t rmsComp = grpRms[b];
            const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
 #else
-            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbFrame, coreConfig.tnsData[ch]);
+            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
            const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
-                                     getComplexRmsValue (refRms[b], gr, b, numSwbFrame, coreConfig.tnsData[1 - ch]) >> 9);
+                                     getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
 #endif
            if (rmsComp < grpRmsMin) grpRmsMin = rmsComp;
            if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
@ -651,9 +828,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
            const uint32_t rmsComp = grpRms[b];
            const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
 #else
-            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbFrame, coreConfig.tnsData[ch]);
+            const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
            const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
-                                     getComplexRmsValue (refRms[b], gr, b, numSwbFrame, coreConfig.tnsData[1 - ch]) >> 9);
+                                     getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
 #endif
            if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
            {
@ -680,7 +857,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 #if !RESTRICT_TO_AAC
        if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill
        {
-          numSwbFrame = __min (numSwbFrame, maxSfbLong); // bit-rate dependent max bandwidth
+          const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong);  // rate based bandwidth

          if (grpData.sfbsPerGroup < numSwbFrame)
          {
@ -738,6 +915,12 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
    CoreCoderData& coreConfig = *m_elementData[el];
    const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()

+    if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs
+    {
+      meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
+   // meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
+    }
+
    for (unsigned ch = 0; ch < nrChannels; ch++)   // channel loop
    {
      EntropyCoder& entrCoder = m_entropyCoder[ci];
@ -819,12 +1002,7 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
          const uint8_t maxSfbLong  = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
          const uint8_t maxSfbShort = (samplingRate < 37566 ? 14 /*32 kHz*/ : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
          const uint16_t peakIndex  = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
-#if RESTRICT_TO_AAC
-          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 6 + (m_bitRateMode >> 1) + ((m_specAnaCurr[ci] >> 21) & 7);
-#else
-          const unsigned highFreqMinStart = (m_noiseFilling[el] ? 6 : 6 - (m_bitRateMode >> 1));
-          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - highFreqMinStart + ((m_specAnaCurr[ci] >> 21) & 7);
-#endif
+          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 5 + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5);
          const unsigned targetBitCountX2 = ((48000 + 16000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * grpData.numWindowGroups);
          unsigned b = grpData.sfbsPerGroup - 1;

@ -919,7 +1097,7 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
                grpScaleFacs[b] = grpScaleFacs[b - 1];
              }
            }
-          } // if (estimBitCount > targetBitCountX2)
+          } // if estimBitCount > targetBitCountX2

          for (b = lastSfb + 1; b < grpData.sfbsPerGroup; b++)
          {
@ -974,7 +1152,7 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
  unsigned errorValue = 0; // no error

  // get spectral channel statistics for last frame, used for input bandwidth (BW) detection
-  m_specAnalyzer.getSpecAnalysisStats (m_specAnaPrev, nChannels);
+//m_specAnalyzer.getSpecAnalysisStats (m_specAnaPrev, nChannels);
  m_specAnalyzer.getSpectralBandwidth (m_bandwidPrev, nChannels);

  // spectral analysis for current MCLT signal (windowed time-samples for the current frame)
@ -1011,6 +1189,33 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
    {
      coreConfig.stereoConfig = coreConfig.stereoMode = 0;

+      if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis
+      {
+        const bool     eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
+        const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
+        const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
+        const int16_t  steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1],
+                                                                       m_mdstSignals[ci], m_mdstSignals[ci + 1], nSamplesMax,
+                                                                       nSamplesInFrame, eightShorts, (uint8_t* const) coreConfig.stereoData);
+        if (steAnaStats == SHRT_MIN) errorValue = 1;
+
+        if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence
+        {
+          m_perCorrCurr[el] = (uint8_t) s;
+        }
+        else // gentle overlap length dependent temporal smoothing
+        {
+          const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
+          const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el]));
+
+          m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s));
+        }
+
+        if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
+     // if (s > (UCHAR_MAX * 3) / 4) coreConfig.stereoMode = 2; // 2: all, ms_mask_present=2
+      }
+      else if (coreConfig.commonWindow) m_perCorrCurr[el] = 128; // update with midway value
+
      for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
      {
        SfbGroupData& grpData = coreConfig.groupingData[ch];
@ -1027,13 +1232,15 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
          icsCurr.maxSfb = 0;
          while (grpSO[icsCurr.maxSfb] < nSamplesInFrame) icsCurr.maxSfb++;  // num_swb_long
          grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInFrame;
-          grpData.sfbsPerGroup = icsCurr.maxSfb; // initialization, changed to max_sfb later
+          grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb;  // changed to max_sfb later

          if (samplingRate > 32000) // set max_sfb based on VBR mode and bandwidth detection
          {
+            if (icsCurr.maxSfb > 49) // may still be 51 for 32 kHz
+            {
+              grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb = 49; // fix 44.1, 48 kHz
+            }
            icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
-
-            if (grpData.sfbsPerGroup > 49) grpData.sfbsPerGroup = 49; // for 44.1 and 48 kHz
          }
          while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci])) icsCurr.maxSfb--; // BW detector
        }
@ -1090,7 +1297,7 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
        uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb;
        const uint8_t maxSfbSte = __max (maxSfb0, maxSfb1);   // max_sfb_ste, as in Table 24

-        if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1))
+        if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1 || coreConfig.stereoMode == 2))
        {
          uint32_t& sa0 = m_specAnaCurr[ci-2];
          uint32_t& sa1 = m_specAnaCurr[ci-1];
@ -1394,6 +1601,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData,           unsigned ch
 #if !RESTRICT_TO_AAC
  m_nonMpegExt   = useEcodisExt;
 #endif
+  m_numSwbLong   = 51;  // maximum
  m_numSwbShort  = MAX_NUM_SWB_SHORT;
  m_outAuData    = outputAuData;
  m_pcm24Data    = inputPcmData;
@ -1405,6 +1613,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData,           unsigned ch
    const ELEM_TYPE et = elementTypeConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS][el];  // usacElementType

    m_elementData[el]  = nullptr;
+    m_perCorrCurr[el]  = 0;
 #if !RESTRICT_TO_AAC
    m_noiseFilling[el] = (useNoiseFilling && (et < ID_USAC_LFE));
    m_timeWarping[el]  = (false /* N/A */ && (et < ID_USAC_LFE));
@ -1420,7 +1629,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData,           unsigned ch
    m_mdstSignals[ch]  = nullptr;
    m_scaleFacData[ch] = nullptr;
    m_specAnaCurr[ch]  = 0;
-    m_specAnaPrev[ch]  = 0;
+  //m_specAnaPrev[ch]  = 0;
    m_tempAnaCurr[ch]  = 0;
    m_tempAnaNext[ch]  = 0;
    m_timeSignals[ch]  = nullptr;
--- a/src/lib/exhaleEnc.h
+++ b/src/lib/exhaleEnc.h
@ -83,15 +83,17 @@ private:
  bool            m_nonMpegExt;
 #endif
  uint8_t         m_numElements;
+  uint8_t         m_numSwbLong;
  uint8_t         m_numSwbShort;
  unsigned char*  m_outAuData;
  BitStreamWriter m_outStream; // for access unit creation
  int32_t*        m_pcm24Data;
+  uint8_t         m_perCorrCurr[USAC_MAX_NUM_ELEMENTS];
  SfbGroupData*   m_scaleFacData[USAC_MAX_NUM_CHANNELS];
  SfbQuantizer    m_sfbQuantizer; // powerlaw quantization
  SpecAnalyzer    m_specAnalyzer; // for spectral analysis
  uint32_t        m_specAnaCurr[USAC_MAX_NUM_CHANNELS];
-  uint32_t        m_specAnaPrev[USAC_MAX_NUM_CHANNELS];
+//uint32_t        m_specAnaPrev[USAC_MAX_NUM_CHANNELS];
 #if !RESTRICT_TO_AAC
  SpecGapFiller   m_specGapFiller;// for noise/gap filling
 #endif
--- a/src/lib/exhaleLibPch.cpp
+++ b/src/lib/exhaleLibPch.cpp
@ -62,7 +62,7 @@ char toSamplingFrequencyIndex (const unsigned samplingRate)
      return i;
    }
 #if !RESTRICT_TO_AAC
-    if (samplingRate == allowedSamplingRates[i + AAC_NUM_SAMPLE_RATES])
+    if (samplingRate == allowedSamplingRates[i + AAC_NUM_SAMPLE_RATES] && (samplingRate % 19200) == 0) // Baseline USAC
    {
      return i + AAC_NUM_SAMPLE_RATES + 2;  // skip reserved entry
    }
--- a/src/lib/specAnalysis.cpp
+++ b/src/lib/specAnalysis.cpp
@ -240,7 +240,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
  const unsigned thresholdSlope = (48000 + SA_EPS * samplingRate) / 96000;
  const unsigned thresholdStart = samplingRate >> 15;

-  if ((mdctSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
+  if ((mdctSignals == nullptr) || (mdstSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
      (nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (samplingRate < 7350) || (samplingRate > 96000))
  {
    return 1; // invalid arguments error
@ -249,7 +249,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
  for (unsigned ch = 0; ch < nChannels; ch++)
  {
    const int32_t* const chMdct = mdctSignals[ch];
-    const int32_t* const chMdst = (mdstSignals == nullptr ? nullptr : mdstSignals[ch]);
+    const int32_t* const chMdst = mdstSignals[ch];
 // --- get L1 norm and max value in each band
    uint16_t idxMaxSpec = 0;
    uint64_t sumAvgBand = 0;
@ -272,61 +272,37 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
    {
      const uint16_t         offs = b << SA_BW_SHIFT; // start offset of current analysis band
      const int32_t* const  bMdct = &chMdct[offs];
-      const int32_t* const  bMdst = (chMdst == nullptr ? nullptr : &chMdst[offs]);
-      uint16_t  maxAbsIdx = 0;
-      uint32_t  maxAbsVal = 0, tmp = UINT_MAX;
-      uint64_t  sumAbsVal = 0;
+      const int32_t* const  bMdst = &chMdst[offs];
+      uint16_t maxAbsIdx = 0;
+      uint32_t maxAbsVal = 0, tmp = UINT_MAX;
+      uint64_t sumAbsVal = 0;

-      if (bMdst != nullptr) // complex-valued spectrum
+      for (int s = SA_BW - 1; s >= 0; s--)
      {
-        for (int s = SA_BW - 1; s >= 0; s--)
-        {
-          // sum absolute values of complex signal, derive L1 norm, peak value, and peak index
+        // sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index
 #if SA_EXACT_COMPLEX_ABS
-          const double  complexSqr = (double) bMdct[s] * (double) bMdct[s] + (double) bMdst[s] * (double) bMdst[s];
-          const uint32_t absSample = uint32_t (sqrt (complexSqr) + 0.5);
+        const double  complexSqr = (double) bMdct[s] * (double) bMdct[s] + (double) bMdst[s] * (double) bMdst[s];
+        const uint32_t absSample = uint32_t (sqrt (complexSqr) + 0.5);
 #else
-          const uint32_t absReal   = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
-          const uint32_t absImag   = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
-          const uint32_t absSample = (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
+        const uint32_t absReal   = abs (bMdct[s]);   // Richard Lyons, 1997; en.wikipedia.org/
+        const uint32_t absImag   = abs (bMdst[s]);   // wiki/Alpha_max_plus_beta_min_algorithm
+        const uint32_t absSample = (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
 #endif
-          sumAbsVal += absSample;
-          if (offs + s > 0) // exclude DC from max/min
-          {
-            if (maxAbsVal < absSample) // maximum data
-            {
-              maxAbsVal = absSample;
-              maxAbsIdx = (uint16_t) s;
-            }
-            if (tmp/*min*/> absSample) // minimum data
-            {
-              tmp/*min*/= absSample;
-            }
-          } // b > 0
-        }
-      }
-      else  // real-valued spectrum, no imaginary part
-      {
-        for (int s = SA_BW - 1; s >= 0; s--)
+        sumAbsVal += absSample;
+        if (offs + s > 0) // exclude DC from max & min
        {
-          // obtain absolute values of real signal, derive L1 norm, peak value, and peak index
-          const uint32_t absSample = abs (bMdct[s]);
-
-          sumAbsVal += absSample;
-          if (offs + s > 0) // exclude DC from max/min
+          if (maxAbsVal < absSample) // update maximum
          {
-            if (maxAbsVal < absSample) // maximum data
-            {
-              maxAbsVal = absSample;
-              maxAbsIdx = (uint16_t) s;
-            }
-            if (tmp/*min*/> absSample) // minimum data
-            {
-              tmp/*min*/= absSample;
-            }
+            maxAbsVal = absSample;
+            maxAbsIdx = (uint16_t) s;
+          }
+          if (tmp/*min*/> absSample) // update minimum
+          {
+            tmp/*min*/= absSample;
          }
        }
-      }
+      } // for s
+
      // bandwidth detection
      if ((m_bandwidthOff[ch] == 0) && (maxAbsVal > __max (thresholdSlope * (thresholdStart + b), SA_EPS)))
      {
@ -362,3 +338,101 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M

  return 0; // no error
 }
+
+int16_t SpecAnalyzer::stereoSigAnalysis (const int32_t* const mdctSignal1, const int32_t* const mdctSignal2,
+                                         const int32_t* const mdstSignal1, const int32_t* const mdstSignal2,
+                                         const unsigned nSamplesMax, const unsigned nSamplesInFrame, const bool shortTransforms,
+                                         uint8_t* const stereoCorrValue /*= nullptr*/) // per-band perceptual correlation data
+{
+  const uint64_t anaBwOffset = SA_BW >> 1;
+  const uint16_t numAnaBands = (shortTransforms ? nSamplesInFrame : nSamplesMax) >> SA_BW_SHIFT;
+  const uint16_t numAnaModul = (shortTransforms ? numAnaBands >> 3 : numAnaBands + 1);
+  int16_t b;
+
+  if ((mdctSignal1 == nullptr) || (mdctSignal2 == nullptr) || (mdstSignal1 == nullptr) || (mdstSignal2 == nullptr) ||
+      (nSamplesInFrame > 2048) || (nSamplesMax > 2048) || (numAnaBands == 0) || (numAnaModul == 0))
+  {
+    b = SHRT_MIN; // invalid arguments error
+  }
+  else
+  {
+    uint16_t currPC = 0, numPC = 0; // frame-average correlation
+    uint64_t sumReM = 0, sumReS = 0;// mid-side RMS distribution
+
+    for (b = numAnaBands - 1; b >= 0; b--)
+    {
+      const uint16_t anaBandModul = b % numAnaModul;  // to exclude first and last window band
+      const uint16_t         offs = b << SA_BW_SHIFT; // start offset of current analysis band
+      const int32_t* const lbMdct = &mdctSignal1[offs];
+      const int32_t* const lbMdst = &mdstSignal1[offs];
+      const int32_t* const rbMdct = &mdctSignal2[offs];
+      const int32_t* const rbMdst = &mdstSignal2[offs];
+      uint64_t sumMagnL = 0, sumMagnR = 0; // temporary RMS sums
+      uint64_t sumPrdLR = 0, sumPrdLL = 0, sumPrdRR = 0;
+      uint64_t sumRealL = 0, sumRealR = 0;
+      uint64_t sumRealM = 0, sumRealS = 0, sumPrdMS; // mid-side
+      double nlr, dll, drr;
+
+      for (int s = SA_BW - 1; s >= 0; s--)
+      {
+        const uint32_t absRealL  = abs (lbMdct[s]);
+        const uint32_t absRealR  = abs (rbMdct[s]);
+#if SA_EXACT_COMPLEX_ABS
+        const double complexSqrL = (double) lbMdct[s] * (double) lbMdct[s] + (double) lbMdst[s] * (double) lbMdst[s];
+        const uint32_t absMagnL  = uint32_t (sqrt (complexSqrL) + 0.5);
+        const double complexSqrR = (double) rbMdct[s] * (double) rbMdct[s] + (double) rbMdst[s] * (double) rbMdst[s];
+        const uint32_t absMagnR  = uint32_t (sqrt (complexSqrR) + 0.5);
+#else
+        const uint32_t absImagL  = abs (lbMdst[s]);  // Richard Lyons, 1997; en.wikipedia.org/
+        const uint32_t absImagR  = abs (rbMdst[s]);  // wiki/Alpha_max_plus_beta_min_algorithm
+        const uint32_t absMagnL  = (absRealL > absImagL ? absRealL + ((absImagL * 3) >> 3) : absImagL + ((absRealL * 3) >> 3));
+        const uint32_t absMagnR  = (absRealR > absImagR ? absRealR + ((absImagR * 3) >> 3) : absImagR + ((absRealR * 3) >> 3));
+#endif
+        sumRealL += absRealL;
+        sumRealR += absRealR;
+        sumRealM += abs (lbMdct[s] + rbMdct[s]); // i.e., 2*mid,
+        sumRealS += abs (lbMdct[s] - rbMdct[s]); // i.e., 2*side
+
+        sumMagnL += absMagnL;
+        sumMagnR += absMagnR;
+        sumPrdLR += ((uint64_t) absMagnL * (uint64_t) absMagnR + anaBwOffset) >> SA_BW_SHIFT;
+        sumPrdLL += ((uint64_t) absMagnL * (uint64_t) absMagnL + anaBwOffset) >> SA_BW_SHIFT;
+        sumPrdRR += ((uint64_t) absMagnR * (uint64_t) absMagnR + anaBwOffset) >> SA_BW_SHIFT;
+      } // for s
+
+      sumRealL = (sumRealL + anaBwOffset) >> SA_BW_SHIFT; // avg
+      sumRealR = (sumRealR + anaBwOffset) >> SA_BW_SHIFT;
+      sumRealM = (sumRealM + anaBwOffset) >> SA_BW_SHIFT;
+      sumRealS = (sumRealS + anaBwOffset) >> SA_BW_SHIFT;
+      nlr = double (sumRealL * sumRealR) * 0.46875; // tuned for uncorrelated full-scale noise
+      sumPrdMS = uint64_t (nlr > double (sumRealM * sumRealS) ? 256.0 : 0.5 + (512.0 * nlr) / __max (1.0, double (sumRealM * sumRealS)));
+
+      dll = double ((sumMagnL + anaBwOffset) >> SA_BW_SHIFT);
+      drr = double ((sumMagnR + anaBwOffset) >> SA_BW_SHIFT);
+      nlr = (sumPrdLR + dll * drr) * SA_BW - sumMagnL * drr - sumMagnR * dll;
+      dll = (sumPrdLL + dll * dll) * SA_BW - sumMagnL * dll - sumMagnL * dll;
+      drr = (sumPrdRR + drr * drr) * SA_BW - sumMagnR * drr - sumMagnR * drr;
+      sumPrdLR = uint64_t ((nlr <= 0.0) || (dll * drr <= 0.0) ? 0 : 0.5 + (256.0 * nlr * nlr) / (dll * drr));
+
+      stereoCorrValue[b] = (uint8_t) __min (UCHAR_MAX, __max (sumPrdMS, sumPrdLR)); // in band
+
+      if ((anaBandModul > 0) && (anaBandModul + 1 < numAnaModul)) // in frame (averaged below)
+      {
+        currPC += stereoCorrValue[b]; numPC++;
+        sumReM += sumRealM;
+        sumReS += sumRealS;
+      }
+    } // for b
+
+    for (b = numAnaBands; b < int16_t (nSamplesInFrame >> SA_BW_SHIFT); b++)
+    {
+      stereoCorrValue[b] = UCHAR_MAX; // to allow joint-stereo coding at very high frequencies
+    }
+
+    if (numPC > 1) currPC = (currPC + (numPC >> 1)) / numPC; // frame's perceptual correlation
+
+    b = (int16_t) currPC * (sumReS * 2 > sumReM * 3 ? -1 : 1);  // negation implies side > mid
+  }
+
+  return b;
+}
--- a/src/lib/specAnalysis.h
+++ b/src/lib/specAnalysis.h
@ -57,6 +57,10 @@ public:
                             const int32_t* const mdstSignals[USAC_MAX_NUM_CHANNELS],
                             const unsigned nChannels, const unsigned nSamplesInFrame, const unsigned samplingRate,
                             const unsigned lfeChannelIndex = USAC_MAX_NUM_CHANNELS); // to skip an LFE channel
+  int16_t stereoSigAnalysis (const int32_t* const mdctSignal1, const int32_t* const mdctSignal2,
+                             const int32_t* const mdstSignal1, const int32_t* const mdstSignal2,
+                             const unsigned nSamplesMax, const unsigned nSamplesInFrame, const bool shortTransforms,
+                             uint8_t* const stereoCorrValue = nullptr); // per-band perceptual correlation data
 }; // SpecAnalyzer

 #endif // _SPEC_ANALYSIS_H_