diff --git a/src/lib/bitAllocation.h b/src/lib/bitAllocation.h
index 6e197ff..9988301 100644
--- a/src/lib/bitAllocation.h
+++ b/src/lib/bitAllocation.h
@@ -1,5 +1,5 @@
 /* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation
- * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices
+ * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
  *
  * The copyright in this software is being made available under a Modified BSD-Style License
  * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@@ -15,7 +15,7 @@
 
 // constants, experimental macros
 #define BA_EPS                  1
-#define BA_INTER_CHAN_SIM_MASK  0  // cross-channel simultaneous masking for surround
+#define BA_INTER_CHAN_SIM_MASK  0  // 5.1 cross-channel simultaneous masking
 
 // class for audio bit-allocation
 class BitAllocator
diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp
index c5f5893..3a699ef 100644
--- a/src/lib/bitStreamWriter.cpp
+++ b/src/lib/bitStreamWriter.cpp
@@ -91,7 +91,7 @@ unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const
             bitCount += 2 + order * coefBits;
           }
         }
-      } // if (n_filt[w])
+      } // if n_filt[w] > 0
     }
   } // for w
 
@@ -264,14 +264,14 @@ unsigned BitStreamWriter::writeFDChannelStream (const CoreCoderData& elData, Ent
         }
       }
     } // for w
-  } // if (maxSfb == 0)
+  } // if maxSfb == 0
 
   m_auBitStream.write (0, 1); // fac_data_present, no fac_data
 
   return bitCount;
 }
 
-unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
+unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder,
 #if !RESTRICT_TO_AAC
                                                    const bool timeWarping,
 #endif
@@ -281,6 +281,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
   const IcsInfo& icsInfo1 = elData.icsInfoCurr[1];
   const TnsData& tnsData0 = elData.tnsData[0];
   const TnsData& tnsData1 = elData.tnsData[1];
+  const SfbGroupData& grp = elData.groupingData[0];
   unsigned bitCount = 2, g, b;
 
   m_auBitStream.write (elData.tnsActive ? 1 : 0, 1); // tns_active
@@ -301,7 +302,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
     bitCount += 3;
     if (elData.stereoMode == 1) // write SFB-wise ms_used[][] flag
     {
-      for (g = 0; g < elData.groupingData[0].numWindowGroups; g++)
+      for (g = 0; g < grp.numWindowGroups; g++)
       {
         const uint8_t* const gMsUsed = &elData.stereoData[m_numSwbShort * g];
 
@@ -315,10 +316,12 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
 #if !RESTRICT_TO_AAC
     else if (elData.stereoMode >= 3)  // SFB-wise cplx_pred_data()
     {
+      const bool complexCoef = (elData.stereoConfig & 1);
+
       m_auBitStream.write (elData.stereoMode - 3, 1); // _pred_all
       if (elData.stereoMode == 3)
       {
-        for (g = 0; g < elData.groupingData[0].numWindowGroups; g++)
+        for (g = 0; g < grp.numWindowGroups; g++)
         {
           const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g];
 
@@ -329,9 +332,50 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
         }
         bitCount += ((maxSfbSte + 1) / SFB_PER_PRED_BAND) * g;
       }
-      // pred_dir and complex_coef. TODO: rest of cplx_pred_data()
-      m_auBitStream.write (elData.stereoConfig & 3, 2);
+      m_auBitStream.write (elData.stereoConfig & 3, 2);// pred_dir
       bitCount += 3;
+      if (!indepFlag) // use_prev_frame (&4), delta_code_time (&8)
+      {
+        if (complexCoef)
+        {
+          m_auBitStream.write (elData.stereoConfig & 4 ? 1 : 0, 1);
+          bitCount++;
+        }
+        m_auBitStream.write (elData.stereoConfig & 8 ? 1 : 0, 1);
+        bitCount++;
+      }
+      // TODO: complete the following code for delta_code_time > 0
+      for (g = 0; g < grp.numWindowGroups; g++)
+      {
+        const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g];
+        uint8_t aqReIdxPred = 16, aqImIdxPred = 16; // alpha_q = 0
+
+        for (b = 0; b < maxSfbSte; b += SFB_PER_PRED_BAND)
+        {
+          if (gCplxPredUsed[b] > 0) // write dpcm_alpha_q_re/_q_im
+          {
+            uint8_t aqIdx = gCplxPredUsed[b] & 31; // -15,..0,..15
+            int aqIdxDpcm = (int) aqIdx - aqReIdxPred;
+            unsigned bits = entrCoder.indexGetBitCount (aqIdxDpcm);
+
+            aqReIdxPred = aqIdx;
+            m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits);
+            bitCount += bits;
+
+            if (complexCoef)
+            {
+              aqIdx = gCplxPredUsed[b + 1] & 31; // <32 kHz short!
+              aqIdxDpcm = (int) aqIdx - aqImIdxPred;
+              bits = entrCoder.indexGetBitCount (aqIdxDpcm);
+
+              aqImIdxPred = aqIdx;
+              m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits);
+              bitCount += bits;
+            }
+          }
+          else aqReIdxPred = aqImIdxPred = 16;
+        }
+      } // for g
     }
 #endif
   } // common_window
@@ -517,7 +561,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
         m_auBitStream.write (CORE_MODE_FD, 1); // L
         m_auBitStream.write (CORE_MODE_FD, 1); // R
         bitCount += 2;
-        bitCount += writeStereoCoreToolInfo (*elData,
+        bitCount += writeStereoCoreToolInfo (*elData, entropyCoder[ci], // L
 #if !RESTRICT_TO_AAC
                                              tw_mdct[el],
 #endif
diff --git a/src/lib/bitStreamWriter.h b/src/lib/bitStreamWriter.h
index 2a5aaa7..2fc28de 100644
--- a/src/lib/bitStreamWriter.h
+++ b/src/lib/bitStreamWriter.h
@@ -41,7 +41,7 @@ private:
                                     const bool timeWarping, const bool noiseFilling,
 #endif
                                     const bool indepFlag = false);
-  unsigned writeStereoCoreToolInfo (const CoreCoderData& elData,
+  unsigned writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder,
 #if !RESTRICT_TO_AAC
                                     const bool timeWarping,
 #endif
diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp
index 27bcc7a..639cbb3 100644
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -235,7 +235,14 @@ static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const
   return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2);
 }
 
-#if !SA_IMPROVED_REAL_ABS
+#if SA_IMPROVED_REAL_ABS
+static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2)
+{
+  const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5;
+
+  return uint32_t (meanRoot * meanRoot + 0.5);
+}
+#else
 static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsigned sfbGroup, const unsigned sfbIndex,
                                            const uint8_t numSwb, const TnsData& tnsData)
 {
@@ -657,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
   const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
   const uint32_t maxSfbLong      = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
   const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3);  // undercoding reduction
-  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37);
-  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - ((m_bitRateMode - 1) >> 1));
+  const uint64_t scaleSr         = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0);
+  const uint64_t scaleBr         = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1));
   uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
   uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
 //uint8_t  meanTempFlat[USAC_MAX_NUM_CHANNELS];
@@ -817,14 +824,14 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
           for (b = 0; b < grpData.sfbsPerGroup; b++)
           {
 #if SA_IMPROVED_REAL_ABS
-            const uint32_t rmsComp = grpRms[b];
+            const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
             const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
 #else
             const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
             const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
                                      getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
 #endif
-            if (rmsComp < grpRmsMin) grpRmsMin = rmsComp;
+            if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
             if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1)))  // zero-quantized
             {
               s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
@@ -833,7 +840,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
           if ((samplingRate >= 27713) && (b < maxSfbLong) && !eightShorts)  // uncoded coefs
           {
 #if SA_IMPROVED_REAL_ABS
-            const uint32_t rmsComp = grpRms[b];
+            const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
             const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
 #else
             const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
diff --git a/src/lib/quantization.cpp b/src/lib/quantization.cpp
index 33645a3..ddd731f 100644
--- a/src/lib/quantization.cpp
+++ b/src/lib/quantization.cpp
@@ -774,7 +774,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t
     {
       grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count
     }
-  } // if (sfIndex == 0)
+  } // if sfIndex == 0
 
   return __min (sfBest, m_maxSfIndex);
 }
diff --git a/src/lib/stereoProcessing.cpp b/src/lib/stereoProcessing.cpp
index 2855988..1621404 100644
--- a/src/lib/stereoProcessing.cpp
+++ b/src/lib/stereoProcessing.cpp
@@ -18,15 +18,7 @@
 // constructor
 StereoProcessor::StereoProcessor ()
 {
-  for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
-  {
-    m_avgAbsHpPrev[ch] = 0;
-    m_maxAbsHpPrev[ch] = 0;
-    m_maxIdxHpPrev[ch] = 1;
-    m_pitchLagPrev[ch] = 0;
-    m_tempAnaStats[ch] = 0;
-    m_transientLoc[ch] = -1;
-  }
+  return;
 }
 
 // public functions
@@ -37,7 +29,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
                                                 const uint8_t    numSwbFrame, uint8_t* const sfbStereoData,
                                                 uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
 {
-//const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
+  const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
   const uint8_t maxSfbSte = __max (groupingData1.sfbsPerGroup, groupingData2.sfbsPerGroup);
 
   if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (groupingData1.numWindowGroups != groupingData2.numWindowGroups) ||
@@ -104,6 +96,17 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
           sfbNext1++; prevReM = dmixReM;
           sfbNext2++; prevReS = dmixReS;
         }
+        if (sfb + 1 == numSwbFrame) // handle remaining sample
+        {
+          const int32_t dmixReM = int32_t (((int64_t) *sfbMdct1 + (int64_t) *sfbMdct2 + 1) >> 1);
+          const int32_t dmixReS = int32_t (((int64_t) *sfbMdct1 - (int64_t) *sfbMdct2 + 1) >> 1);
+
+          sumAbsValM += abs (dmixReM);
+          sumAbsValS += abs (dmixReS);
+
+          *sfbMdct1 = dmixReM;
+          *sfbMdct2 = dmixReS;
+        }
       }
       else // complex data, both MDCTs and MDSTs are available
       {
@@ -152,8 +155,9 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
       {
         double min = __min (grpRms1[sfb], grpRms2[sfb]);
         grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (SP_EPS, (min > sfbRatLR * sfbRmsMaxMS ? sqrt (sfbRatLR * sfbRmsMaxMS *
-                                                                            min) : __min (1.0/*TODO*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
+                                                                            min) : __min (1.0/*0 dB*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
       }
+      if (applyPredSte) sfbStereoData[sfb + numSwbFrame * gr] = 16; // zero prediction coefs
     } // for sfb
   }
 
diff --git a/src/lib/stereoProcessing.h b/src/lib/stereoProcessing.h
index e46dcb1..f61afa5 100644
--- a/src/lib/stereoProcessing.h
+++ b/src/lib/stereoProcessing.h
@@ -22,12 +22,6 @@ class StereoProcessor
 private:
 
   // member variables
-  unsigned m_avgAbsHpPrev[USAC_MAX_NUM_CHANNELS];
-  unsigned m_maxAbsHpPrev[USAC_MAX_NUM_CHANNELS];
-  unsigned m_maxIdxHpPrev[USAC_MAX_NUM_CHANNELS];
-  unsigned m_pitchLagPrev[USAC_MAX_NUM_CHANNELS];
-  uint32_t m_tempAnaStats[USAC_MAX_NUM_CHANNELS];
-  int16_t  m_transientLoc[USAC_MAX_NUM_CHANNELS];
 
 public: