diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp
index caa880b..66a7cce 100644
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@@ -27,6 +27,13 @@ static inline uint32_t jndModel (const uint32_t val, const uint32_t mean,
   return uint32_t (__min ((double) UINT_MAX, res + 0.5));
 }
 
+static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2)
+{
+  const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5;
+
+  return uint32_t (meanRoot * meanRoot + 0.5);
+}
+
 static void jndPowerLawAndPeakSmoothing (uint32_t* const  stepSizes, const unsigned nStepSizes,
                                          const uint32_t avgStepSize, const uint8_t sfm, const uint8_t tfm)
 {
@@ -68,6 +75,9 @@ BitAllocator::BitAllocator ()
     m_avgSpecFlat[ch] = 0;
     m_avgTempFlat[ch] = 0;
   }
+  m_rateIndex    = 0;
+  m_tempSfbValue = nullptr;
+  m_tnsPredictor = nullptr;
 }
 
 // public functions
@@ -139,6 +149,23 @@ uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* co
   return __min (SCHAR_MAX, sf);
 }
 
+unsigned BitAllocator::initAllocMemory (LinearPredictor* const linPredictor, const uint8_t numSwb, const uint8_t bitRateMode)
+{
+  if (linPredictor == nullptr)
+  {
+    return 1; // invalid arguments error
+  }
+  m_rateIndex    = bitRateMode;
+  m_tnsPredictor = linPredictor;
+
+  if ((m_tempSfbValue = (uint8_t*) malloc (__max (MAX_PREDICTION_ORDER * sizeof (short), numSwb) * sizeof (uint8_t))) == nullptr)
+  {
+    return 2; // memory allocation error
+  }
+
+  return 0; // no error
+}
+
 unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort,
                                          const uint32_t specAnaStats[USAC_MAX_NUM_CHANNELS],
                                          const uint32_t tempAnaStats[USAC_MAX_NUM_CHANNELS],
@@ -164,8 +191,8 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
 
   for (unsigned ch = 0; ch < nChannels; ch++)
   {
-    const SfbGroupData&   grpData = *groupData[ch];
-    const uint32_t maxSfbInCh = grpData.sfbsPerGroup;
+    const SfbGroupData& grpData = *groupData[ch];
+    const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
     const uint32_t nBandsInCh = grpData.numWindowGroups * maxSfbInCh;
     const uint32_t*   rms = grpData.sfbRmsValues;
     uint32_t*   stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
@@ -322,7 +349,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
     if ((samplingRate >= 28800) && (samplingRate <= 64000))
     {
       elw = 36; // 36/32 = 9/8
-      for (b = HF; b < grpData.sfbsPerGroup; b++)  // undo additional high-freq. equal-loudness attenuation
+      for (b = HF; b < maxSfbInCh; b++)  // undo above additional high-frequency equal-loudness attenuation
       {
         for (unsigned d = b - HF; d > 0; d--)
         {
@@ -351,8 +378,8 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
 
   for (unsigned ch = 0; ch < nChannels; ch++)
   {
-    const SfbGroupData&   grpData = *groupData[ch];
-    const uint32_t maxSfbInCh = grpData.sfbsPerGroup;
+    const SfbGroupData& grpData = *groupData[ch];
+    const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
     const uint32_t nBandsInCh = grpData.numWindowGroups * maxSfbInCh;
     const uint32_t chStepSize = m_avgStepSize[ch];
     uint32_t*   stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
@@ -380,3 +407,99 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
 
   return 0; // no error
 }
+
+unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort,
+                                         const int32_t* const mdctSpec[USAC_MAX_NUM_CHANNELS], const unsigned nSamplesInFrame,
+                                         const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes,
+                                         const unsigned firstChannelIndex, const bool commonWindow /*= false*/,
+                                         const uint8_t* const sfbStereoData /*= nullptr*/, const uint8_t stereoConfig /*= 0*/)
+{
+  const uint8_t maxSfbL16k = 16 + __min (35, (9 << 17) / __max (1, samplingRate)); // SFB index at 15.8 kHz
+  const uint32_t redFactor = __max ((samplingRate < 27713 ? 2 : 1), __min (3, m_rateIndex)) - (stereoConfig >> 3);
+  const uint32_t redWeight = __min (4, 9 - __min (9, m_rateIndex));
+  short* const  tempCoeffs = (short* const) m_tempSfbValue;
+
+  if ((groupData == nullptr) || (mdctSpec == nullptr) || (sfbStepSizes == nullptr) || (nSamplesInFrame > 2048) ||
+      (numSwbShort < MIN_NUM_SWB_SHORT) || (numSwbShort > MAX_NUM_SWB_SHORT) || (nChannels > USAC_MAX_NUM_CHANNELS) ||
+      (samplingRate < 7350) || (samplingRate > 96000) || (firstChannelIndex > USAC_MAX_NUM_CHANNELS))
+  {
+    return 1; // invalid arguments error
+  }
+
+  for (unsigned ch = firstChannelIndex; ch < firstChannelIndex + nChannels; ch++)
+  {
+    const SfbGroupData& grpData = *groupData[ch];
+    const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
+    const bool    eightShorts = (grpData.numWindowGroups != 1);
+    const uint32_t*   rms = grpData.sfbRmsValues;
+    uint32_t*   stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
+
+    if ((grpData.numWindowGroups * maxSfbInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS))
+    {
+      continue;
+    }
+    for (unsigned gr = 0; gr < grpData.numWindowGroups; gr++)
+    {
+      const uint16_t* grpOff = &grpData.sfbOffsets[numSwbShort * gr];
+      const uint8_t*  grpSte = (sfbStereoData == nullptr ? nullptr : &sfbStereoData[numSwbShort * gr]);
+      const uint32_t* grpRms = &rms[numSwbShort * gr];
+      const uint32_t* refRms = &groupData[firstChannelIndex + nChannels - 1 - ch]->sfbRmsValues[numSwbShort * gr];
+      uint32_t* grpStepSizes = &stepSizes[numSwbShort * gr];
+      uint32_t  b, grpRmsMin = INT_MAX; // min. RMS value, used for overcoding reduction
+      uint64_t  s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
+
+      memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t));
+
+      // undercoding reduction for case where large number of coefs is quantized to zero
+      for (b = 0; b < maxSfbInCh; b++)
+      {
+        const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
+        const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
+        const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
+
+        if (redWeight > 0 && !eightShorts && sfbWidth > 12) // further reduce step-sizes of transient bands
+        {
+          const uint32_t gains = m_tnsPredictor->calcParCorCoeffs (&mdctSpec[ch][grpOff[b]], sfbWidth, MAX_PREDICTION_ORDER, tempCoeffs) >> 24;
+
+          m_tempSfbValue[b] = UCHAR_MAX - uint8_t ((512u + gains * gains * redWeight) >> (sfbWidth > 16 ? 10 : 11));
+          if ((b >= 2) && (m_tempSfbValue[b - 1] < m_tempSfbValue[b]) && (m_tempSfbValue[b - 1] < m_tempSfbValue[b - 2]))
+          {
+            m_tempSfbValue[b - 1] = __min (m_tempSfbValue[b], m_tempSfbValue[b - 2]); // remove local peaks
+          }
+        }
+        if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
+#ifndef NO_DTX_MODE
+        if (m_rateIndex > 0)
+#endif
+        if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
+        {
+          s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11;
+        }
+      }
+
+      if ((samplingRate > 27712) && (b < maxSfbL16k) && !eightShorts) // zeroed HF coefs
+      {
+        const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
+        const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
+        const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b];
+#ifndef NO_DTX_MODE
+        if (m_rateIndex > 0)
+#endif
+        if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
+        {
+          s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11;
+        }
+      }
+      s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5));
+
+      if (redWeight > 0 && !eightShorts) memset (tempCoeffs /*= m_tempSfbValue*/, UCHAR_MAX, MAX_PREDICTION_ORDER * sizeof (short));
+
+      for (b = 0; b < maxSfbInCh; b++) // improve step-sizes by limiting and attenuation
+      {
+        grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1ui64) - (1u << 14)) >> 15);
+      }
+    } // for gr
+  } // for ch
+
+  return 0; // no error
+}
diff --git a/src/lib/bitAllocation.h b/src/lib/bitAllocation.h
index 9988301..7d2f81a 100644
--- a/src/lib/bitAllocation.h
+++ b/src/lib/bitAllocation.h
@@ -12,6 +12,7 @@
 #define _BIT_ALLOCATION_H_
 
 #include "exhaleLibPch.h"
+#include "linearPrediction.h"
 
 // constants, experimental macros
 #define BA_EPS                  1
@@ -26,23 +27,32 @@ private:
   uint32_t m_avgStepSize[USAC_MAX_NUM_CHANNELS];
   uint8_t  m_avgSpecFlat[USAC_MAX_NUM_CHANNELS];
   uint8_t  m_avgTempFlat[USAC_MAX_NUM_CHANNELS];
+  uint8_t  m_rateIndex; // preset
+  uint8_t* m_tempSfbValue;
+  LinearPredictor* m_tnsPredictor;
 
 public:
 
   // constructor
   BitAllocator ();
   // destructor
-  ~BitAllocator () { }
+  ~BitAllocator () { MFREE (m_tempSfbValue); }
   // public functions
   void getChAverageSpecFlat (uint8_t meanSpecFlatInCh[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
   void getChAverageTempFlat (uint8_t meanTempFlatInCh[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
   uint8_t       getScaleFac (const uint32_t sfbStepSize, const int32_t* const sfbSignal, const uint8_t sfbWidth,
                              const uint32_t sfbRmsValue);
+  unsigned initAllocMemory  (LinearPredictor* const linPredictor, const uint8_t numSwb, const uint8_t bitRateMode);
   unsigned initSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort,
                              const uint32_t specAnaStats[USAC_MAX_NUM_CHANNELS],
                              const uint32_t tempAnaStats[USAC_MAX_NUM_CHANNELS],
                              const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes,
                              const unsigned lfeChannelIndex = USAC_MAX_NUM_CHANNELS, const bool tnsDisabled = false);
+  unsigned imprSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort,
+                             const int32_t* const mdctSpec[USAC_MAX_NUM_CHANNELS], const unsigned nSamplesInFrame,
+                             const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes,
+                             const unsigned firstChannelIndex, const bool commonWindow = false,
+                             const uint8_t* const sfbStereoData = nullptr, const uint8_t stereoConfig = 0);
 }; // BitAllocator
 
 #endif // _BIT_ALLOCATION_H_
diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp
index dbf7dc8..08432fc 100644
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -328,13 +328,6 @@ static inline void findActualBandwidthShort (uint8_t* const maxSfbShort, const u
   if (*maxSfbShort > maxSfb) *maxSfbShort = maxSfb;
 }
 
-static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2)
-{
-  const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5;
-
-  return uint32_t (meanRoot * meanRoot + 0.5);
-}
-
 static inline uint8_t stereoCorrGrouping (const SfbGroupData& grpData, const unsigned nSamplesInFrame, uint8_t* stereoCorrData)
 {
   const uint16_t numWinGroup = grpData.numWindowGroups;
@@ -780,7 +773,7 @@ uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbI
   uint32_t sumSfbLoud = 0;
 
   for (int16_t s = 31; s >= 0; s--) sumSfbLoud += sfbLoudMem[s];
-  sumSfbLoud = (sumSfbLoud + 16) >> 5;
+  sumSfbLoud = (sumSfbLoud + 32) >> 6; // -6 dB
 
   return sumSfbLoud * (sumSfbLoud >> (toSamplingRate (m_frequencyIdx) >> 13)); // scaled SMR
 }
@@ -792,9 +785,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
   const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
   const unsigned samplingRate    = toSamplingRate (m_frequencyIdx);
   const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
-  const uint32_t maxSfbLong      = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
+  const uint32_t maxSfbLong      = (samplingRate < 37566 ? MAX_NUM_SWB_LONG : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
   const uint64_t scaleSr         = (samplingRate < 27713 ? (samplingRate < 24000 ? 32 : 34) - __min (3, m_bitRateMode) : 37) - (nChannels >> 1);
-  const uint64_t scaleBr         = (m_bitRateMode == 0 ? __min (38, 3 + (samplingRate >> 10) + (samplingRate >> 13)) - (nChannels >> 1)
+  const uint64_t scaleBr         = (m_bitRateMode == 0 ? __min (32, 3 + (samplingRate >> 10) + (samplingRate >> 13) - (nChannels >> 1))
                                    : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
   uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
   uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
@@ -828,7 +821,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
         const unsigned lfConst = (samplingRate < 27713 ? 1 : 2);
         const unsigned lfAtten = 4 + b * lfConst; // LF SNR boost, cf my M.Sc. thesis, p. 54
         const uint8_t sfbWidth = off[b + 1] - off[b];
-        const uint64_t   scale = scaleBr * __min (32, lfAtten); // rate control part 1
+        const uint64_t   scale = scaleBr * __min (32, lfAtten); // rate control part 1 (SFB)
 
         // scale step-sizes according to VBR mode, then derive scale factors from step-sizes
         stepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 9) + stepSizes[b] * scale) >> 10));
@@ -839,7 +832,6 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
     }
     else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
     {
-      const uint32_t redFactor = __max ((samplingRate < 27713 ? 2 : 1), __min (3, m_bitRateMode)) - (coreConfig.stereoConfig >> 3);
       const bool  eightShorts0 = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
       const TnsData&  tnsData0 = coreConfig.tnsData[0];
       const TnsData&  tnsData1 = coreConfig.tnsData[1];
@@ -947,6 +939,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
       }
       else memset (coreConfig.stereoDataCurr, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
 
+      errorValue |= m_bitAllocator.imprSfbStepSizes (m_scaleFacData, m_numSwbShort, m_mdctSignals, nSamplesInFrame,
+                                                     nrChannels, samplingRate, sfbStepSizes, ci, coreConfig.commonWindow,
+                                                     coreConfig.stereoDataCurr, coreConfig.stereoConfig);
+
       for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
       {
         SfbGroupData&  grpData = coreConfig.groupingData[ch];
@@ -965,54 +961,18 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
         {
           const uint16_t* grpOff = &grpData.sfbOffsets[m_numSwbShort * gr];
           const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];
-          const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
           uint8_t*  grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
           uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr];
-          uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
-          uint32_t  b, grpRmsMin = INT_MAX; // min. RMS value, used for overcoding reduction
 
-          // undercoding reduction for case where large number of coefs is quantized to zero
-          s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
-          for (b = 0; b < maxSfbCh; b++)
+          for (unsigned b = 0; b < maxSfbCh; b++)
           {
-            const uint32_t rmsComp = (grpStereoData[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
-            const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
-
-            if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
-#ifndef NO_DTX_MODE
-            if ((m_bitRateMode > 0) || (m_numElements > 1) || (samplingRate > 24000))
-            if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS))
-#endif
-            if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
-            {
-              s -= ((grpOff[b + 1] - grpOff[b]) * redFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
-            }
-          }
-          if ((samplingRate >= 27713) && (b < maxSfbLong) && !eightShorts)  // uncoded coefs
-          {
-            const uint32_t rmsComp = (grpStereoData[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
-            const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
-#ifndef NO_DTX_MODE
-            if ((m_bitRateMode > 0) || (m_numElements > 1) || (samplingRate > 24000))
-            if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS))
-#endif
-            if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
-            {
-              s -= ((grpOff[maxSfbLong] - grpOff[b]) * redFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
-            }
-          }
-          s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5));
-
-          for (b = 0; b < maxSfbCh; b++)
-          {
-            const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // LF SNR boost, cf my M.Sc. thesis
+            const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis
             const unsigned lfAtten = (saveBitRate || b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
             const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
-            const uint64_t rateFac = mSfmFac * s * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1
-            const uint64_t sScaled = ((1u << 24) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 25;
+            const uint64_t   scale = scaleBr * mSfmFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB)
 
             // scale step-sizes according to VBR mode & derive scale factors from step-sizes
-            grpStepSizes[b] = uint32_t (__max (BA_EPS, __min (UINT_MAX, sScaled)));
+            grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 17) + grpStepSizes[b] * scale) >> 18));
 
             grpScaleFacs[b] = m_bitAllocator.getScaleFac (grpStepSizes[b], &m_mdctSignals[ci][grpOff[b]], sfbWidth, grpRms[b]);
           }
@@ -1050,7 +1010,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 
                 for (s = sfbStart; s < maxSfbCh; s++)
                 {
-                  const double rmsValue = double (grpStereoData[s] > 0 ? squareMeanRoot (refRms[s], grpRms[s]) : grpRms[s]);
+                  const double rmsValue = double (grpStereoData[s] > 0 ? (grpRms[s] + (uint64_t) refRms[s] + 1) >> 1 : grpRms[s]);
                   const unsigned sfbIdx = s - sfbStart;
 
                   m_sfbLoudMem[ch][sfbIdx][m_frameCount & 31] = __max (BA_EPS, uint16_t (sqrt (rmsValue)));
@@ -1506,8 +1466,10 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
         {
           if (grpData.windowGroupLength[gr] == 1)
           {
+            const uint8_t tonality = (m_specAnaCurr[ci] >> 16) & UCHAR_MAX;
+
             tnsData.filterOrder[n] = m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n],
-                                                                      tnsData.filterOrder[n], s, (m_specAnaCurr[ci] >> 16) & UCHAR_MAX);
+                                                                      tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5));
             tnsData.numFilters[n] = (tnsData.filterOrder[n] > 0 ? 1 : 0);
             if ((ch == 0) && (icsCurr.windowSequence == EIGHT_SHORT) && (tnsData.numFilters[n] == 0) && (tnsData.firstTnsWindow == gr))
             {
@@ -1516,7 +1478,7 @@ unsigned ExhaleEncoder::spectralProcessing ()  // complete ics_info(), calc TNS
             n++;
           }
         }
-        ci++;
+        m_tempFlatPrev[ci++] = (uint8_t) s;
       } // for ch
 
       if (coreConfig.commonWindow) // synchronization of all StereoCoreToolInfo() components
@@ -1814,8 +1776,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
 
       errorValue |= m_transform.applyMCLT (m_timeSignals[ci], eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
                                            wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]);
-      m_scaleFacData[ci] = &grpData;
-      ci++;
+      m_scaleFacData[ci++] = &grpData;
     }
   } // for el
 
@@ -1880,6 +1841,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData,           unsigned ch
     m_specFlatPrev[ch] = 0;
     m_tempAnaCurr[ch]  = 0;
     m_tempAnaNext[ch]  = 0;
+    m_tempFlatPrev[ch] = 0;
     m_timeSignals[ch]  = nullptr;
     m_tranLocCurr[ch]  = -1;
     m_tranLocNext[ch]  = -1;
@@ -2115,10 +2077,11 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
 
   // initialize coder class memory
   m_tempIntBuf = m_timeSignals[0];
+  if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 ||
 #if EC_TRELLIS_OPT_CODING
-  if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
+      m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
 #else
-  if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
+      m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
 #endif
       m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 4 ? nChannels : 0, nSamplesInFrame) > 0 ||
       m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0)
diff --git a/src/lib/exhaleEnc.h b/src/lib/exhaleEnc.h
index bb6bd24..f6b798a 100644
--- a/src/lib/exhaleEnc.h
+++ b/src/lib/exhaleEnc.h
@@ -106,6 +106,7 @@ private:
   TempAnalyzer    m_tempAnalyzer; // for temporal analysis
   uint32_t        m_tempAnaCurr[USAC_MAX_NUM_CHANNELS];
   uint32_t        m_tempAnaNext[USAC_MAX_NUM_CHANNELS];
+  uint8_t         m_tempFlatPrev[USAC_MAX_NUM_CHANNELS];
   int32_t*        m_tempIntBuf;  // temporary int32 buffer
   int32_t*        m_timeSignals[USAC_MAX_NUM_CHANNELS];
 #if !RESTRICT_TO_AAC