diff --git a/src/lib/entropyCoding.cpp b/src/lib/entropyCoding.cpp
index 6c5f462..bfba2e6 100644
--- a/src/lib/entropyCoding.cpp
+++ b/src/lib/entropyCoding.cpp
@@ -253,24 +253,29 @@ static const uint16_t arithCumFreqR[3][4] = { // arith_cf_r
   {10827,  6884, 2929, 0}
 };
 
+static const uint8_t arithFastPkIndex[32] = {
+  1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 58, 3, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62
+};
+
 // static helper functions
 static inline unsigned arithGetPkIndex (const unsigned ctx) // cumul. frequency table index pki = arith_get_pk(c)
 {
-  int iMax = ARITH_SIZE - 1;
-  int iMin = -1;
-  int i    = iMin;
-  uint32_t j, k;
+  if ((ctx & 0xEEEEE) == 0) return arithFastPkIndex[((ctx >> 12) & 16) | ((ctx >> 9) & 8) | ((ctx >> 6) & 4) | ((ctx >> 3) & 2) | (ctx & 1)];
 
-  while (iMax > iMin + 1)
+  int32_t iMax = ARITH_SIZE - 1;
+  int32_t iMin = -1;
+
+  do
   {
-    i = iMin + ((iMax - iMin) >> 1);
-    j = arithHashM[i];
-    k = j >> 8;
+    const int32_t  i = iMin + ((iMax - iMin) >> 1);
+    const uint32_t j = arithHashM[i];
+    const uint32_t k = j >> 8;
 
     if (ctx < k)      iMax = i;
     else if (ctx > k) iMin = i;
     else return  j & UCHAR_MAX;
   }
+  while (iMax > iMin + 1);
 
   return arithLookupM[iMax]; // pki
 }
@@ -415,10 +420,9 @@ void EntropyCoder::arithSetContext (const unsigned newCtxState, const uint16_t s
 {
   m_csCurr = newCtxState;
   m_acBits = (m_csCurr >> 17) & 31;
-  for (uint16_t s = 1; s < 4; s++)
-  {
-    if (sigEnd >= s) m_qcCurr[sigEnd - s] = (m_csCurr >> (18 + 4 * s)) & 0xF;
-  }
+  if (sigEnd > 0) m_qcCurr[sigEnd - 1] = (m_csCurr >> 22) & 0xF;
+  if (sigEnd > 1) m_qcCurr[sigEnd - 2] = (m_csCurr >> 26) & 0xF;
+  if (sigEnd > 2) m_qcCurr[sigEnd - 3] = (m_csCurr >> 30);
 }
 #endif
 
@@ -466,7 +470,7 @@ unsigned EntropyCoder::arithCodeSigMagn (const uint8_t* const magn, const uint16
     while ((i >= 0) && ((a[i] | b[i]) == 0)) i -= 2;
     i = (sigOffset + i + 2) >> 1;
 
-    if (i + 28 < (int) sigEnd) sigEnd = (uint16_t) i;
+    if (i + 26 < (int) sigEnd) sigEnd = (uint16_t) i;
   }
 
   for (uint16_t s = sigOffset >> 1; s < sigEnd; s++)
@@ -523,11 +527,9 @@ unsigned EntropyCoder::arithCodeSigMagn (const uint8_t* const magn, const uint16
   else
   {
     m_csCurr = 0;
-
-    for (uint16_t s = 1; s < 4; s++)
-    {
-      if (sigEnd >= s) m_csCurr |= __min (255u >> (2 * s), m_qcCurr[sigEnd - s]) << (18 + 4 * s);
-    }
+    if (sigEnd > 0) m_csCurr |= m_qcCurr[sigEnd - 1] << 22;
+    if (sigEnd > 1) m_csCurr |= m_qcCurr[sigEnd - 2] << 26;
+    if (sigEnd > 2) m_csCurr |= __min (3, m_qcCurr[sigEnd - 3]) << 30;
   }
   m_csCurr |= ((unsigned) m_acBits << 17) | c;
 
diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp
index 2a18dd7..639342f 100644
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -982,7 +982,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
 #if !RESTRICT_TO_AAC
         if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts))
         {
-          const uint8_t numSwbFrame = __min (numSwbCh, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1));
+          const uint8_t numSwbFrame = __min (numSwbCh, (eightShorts ? maxSfbCh : maxSfbLong) +
+                                      (m_bitRateMode < 2 || m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1));
 #ifndef NO_DTX_MODE
           const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
 
@@ -1287,9 +1288,9 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
       memcpy (arithTuples, m_tempIntBuf, (nSamplesInFrame >> 1) * sizeof (char));
       entrCoder.setIsShortWindow (shortWinPrev);
 #if !RESTRICT_TO_AAC
-      // obtain channel-wise noise_level and noise_offset for USAC
-      coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci],
-                                                                                                     m_numSwbShort, grpData, nSamplesInFrame,
+      s = 22050 + 7350 * m_bitRateMode; // compute channel-wise noise_level and noise_offset
+      coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci], m_numSwbShort,
+                                                                                                     grpData, nSamplesInFrame, samplingRate >= s,
                                                                                                      shortWinCurr ? 0 : meanSpecFlat[ci]));
       // NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed
       if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
diff --git a/src/lib/quantization.cpp b/src/lib/quantization.cpp
index 7926984..c603b51 100644
--- a/src/lib/quantization.cpp
+++ b/src/lib/quantization.cpp
@@ -286,10 +286,8 @@ uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8
         {
           tempCodState[is] = tempCodState[0];
           tempCtxState[is] = tempCtxState[0];
-          for (ds = numStates - 1; ds >= 0; ds--)
-          {
-            currRate[ds] = UCHAR_MAX;
-          }
+          memset (currRate, UCHAR_MAX, numStates);
+
           continue;
         }
         tempQuant[0] = (coeffQuantA -= redA);
@@ -309,12 +307,8 @@ uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8
 
         tempBitCount += (entropyCoder.arithGetCtxState () >> 17) & 31;  // +new-old m_acBits
         tempBitCount -= __min ((ctxStart >> 17) & 31, tempBitCount);
-        tempBitCount += numQ; // add sign bits to finish estimate
 
-        for (ds = numStates - 1; ds >= 0; ds--)
-        {
-          currRate[ds] = (uint8_t) __min (UCHAR_MAX, tempBitCount);
-        }
+        memset (currRate, tempBitCount + numQ, numStates);
       }
       else // tuple > 0, rate depends on decisions for last tuple
       {
@@ -333,9 +327,8 @@ uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8
 
           tempBitCount += (entropyCoder.arithGetCtxState () >> 17) & 31;// +new-old m_acBits
           tempBitCount -= __min ((prevCtxState[ds] >> 17) & 31, tempBitCount);
-          tempBitCount += numQ; // + sign bits to finish estimate
 
-          currRate[ds] = (uint8_t) __min (UCHAR_MAX, tempBitCount);
+          currRate[ds] = uint8_t (tempBitCount + numQ);
         }
       }
       // statistically best place to save states is after ds == 0
@@ -785,8 +778,7 @@ unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* co
                                          const unsigned numSfb, uint8_t* const quantCoeffs)  // returns RD optimization bit count
 {
   // numSfb: number of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
-  // "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," in Proc. IEEE Workshop on
-  // Speech Coding, pp. 142-144, Sep. 2000. Modified for arithmetic instead of Huffman coder
+  // "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," see also quantizeMagnRDOC
   const uint32_t codStart = USHRT_MAX << 16;
   const uint32_t ctxStart = m_quantRate[0][0]; // start context before call to quantizeSfb()
   const uint32_t codFinal = entropyCoder.arithGetCodState ();
diff --git a/src/lib/specGapFilling.cpp b/src/lib/specGapFilling.cpp
index 077a6c0..384afbd 100644
--- a/src/lib/specGapFilling.cpp
+++ b/src/lib/specGapFilling.cpp
@@ -24,7 +24,7 @@ SpecGapFiller::SpecGapFiller ()
 // public functions
 uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, const uint8_t* const quantMagn,
                                              const uint8_t numSwbShort, SfbGroupData& grpData /*modified*/,
-                                             const unsigned nSamplesInFrame /*= 1024*/, const uint8_t specFlat /*= 0*/)
+                                             const unsigned nSamplesInFrame, const bool saveRate, const uint8_t specFlat)
 {
   const unsigned* const coeffMagn = sfbQuantizer.getCoeffMagnPtr ();
   const double* const  sfNormFacs = sfbQuantizer.getSfNormTabPtr ();
@@ -180,7 +180,9 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
     const uint16_t*   grpOff = &grpData.sfbOffsets[numSwbShort * gr];
     const uint32_t*   grpRms = &grpData.sfbRmsValues[numSwbShort * gr]; // quant/coder stats
     uint8_t* const grpScFacs = &grpData.scaleFactors[numSwbShort * gr];
-
+#if SGF_SF_PEAK_SMOOTHING
+    uint16_t  lastNonZeroSfb = 0;
+#endif
     for (uint16_t b = m_1stGapFillSfb; b < sfbsPerGrp; b++)  // get noise-fill scale factors
     {
       if ((grpRms[b] >> 16) == 0)  // the SFB is all-zero quantized
@@ -204,20 +206,52 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
         }
 #if SGF_SF_PEAK_SMOOTHING
         // save delta-code bits by smoothing scale factor peaks in zero quantized SFB ranges
-        if ((b >  m_1stGapFillSfb) && ((grpRms[b - 1] >> 16) == 0) && ((grpRms[b - 2] >> 16) == 0) &&
-            (grpScFacs[b - 1] > grpScFacs[b]) && (grpScFacs[b - 1] > grpScFacs[b - 2]))
+        if ((b > m_1stGapFillSfb) && ((grpRms[b - 1] >> 16) == 0) && ((grpRms[b - 2] >> 16) == 0))
         {
-          grpScFacs[b - 1] = (grpScFacs[b - 1] + __max (grpScFacs[b], grpScFacs[b - 2])) >> 1;
+          const uint16_t next = grpScFacs[b];
+          const uint16_t prev = grpScFacs[b - 2];
+          uint8_t&       curr = grpScFacs[b - 1];
+
+          if ((next | prev) && (curr > next) && (curr > prev)) curr = (curr + __max (next, prev)) >> 1;
+          else if (saveRate && (curr < next) && (curr < prev)) curr = (curr + __min (next, prev) + 1) >> 1;
         }
 #endif
       }
-
+#if SGF_SF_PEAK_SMOOTHING
+      else if (saveRate) lastNonZeroSfb = b;
+#endif
       if ((b > m_1stGapFillSfb) && (((grpRms[b - 1] >> 16) > 0) ^ ((grpRms[b - 2] >> 16) > 0)))
       {
         diff += (int) grpScFacs[b - 1] - (int) grpScFacs[b - 2]; // sum up transition deltas
         s++;
       }
     } // for b
+#if SGF_SF_PEAK_SMOOTHING
+    if ((lastNonZeroSfb > 0) && (lastNonZeroSfb + 4 < sfbsPerGrp)) // HF factor line-fitting
+    {
+      const int32_t start = lastNonZeroSfb + 1;
+      const int32_t size  = sfbsPerGrp - start - 1;
+      const int32_t xSum  = (size * (size + 1)) >> 1;
+      int32_t ySum = 0, a = 0, b = 0;
+      uint16_t x;
+
+      for (x = start + 1; x < sfbsPerGrp; x++) ySum += grpScFacs[x]; // size * (mean factor)
+
+      for (x = start + 1; x < sfbsPerGrp; x++)
+      {
+        const int32_t xZ = size * (x - start) - xSum; // zero-mean
+        a += xZ * xZ;
+        b += xZ * (size * grpScFacs[x] - ySum);
+      }
+      if (a > 0) // complete line and adjust gap-fill scale factors
+      {
+        b = CLIP_PM (((b << 8) + (a >> 1)) / a, SHRT_MAX);
+        a = ((ySum << 8) - b * xSum + (size >> 1)) / size;
+
+        for (x = start + 1; x < sfbsPerGrp; x++) grpScFacs[x] = CLIP_UCHAR ((a + b * (x - start) - SCHAR_MIN) >> 8);
+      }
+    }
+#endif
   } // for gr
 
   if (s > 0)
diff --git a/src/lib/specGapFilling.h b/src/lib/specGapFilling.h
index 9b9ec54..1359ae9 100644
--- a/src/lib/specGapFilling.h
+++ b/src/lib/specGapFilling.h
@@ -38,7 +38,7 @@ public:
   uint16_t  getFirstGapFillSfb () const { return m_1stGapFillSfb; }
   uint8_t   getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, const uint8_t* const quantMagn,
                                   const uint8_t numSwbShort, SfbGroupData& grpData /*modified*/,
-                                  const unsigned nSamplesInFrame = 1024, const uint8_t specFlat = 0);
+                                  const unsigned nSamplesInFrame, const bool saveRate, const uint8_t specFlat);
 }; // SpecGapFiller
 
 #endif // _SPEC_GAP_FILLING_H_