diff --git a/src/app/loudnessEstim.cpp b/src/app/loudnessEstim.cpp
index 852a304..3e586f0 100644
--- a/src/app/loudnessEstim.cpp
+++ b/src/app/loudnessEstim.cpp
@@ -13,8 +13,8 @@
 
 #if LE_ACCURATE_CALC
 static const int64_t kFilterCoeffs[4][8] = { // first 4: numerator, last 4: denominator, values fit into 32 bit
-  {-1007060950, 1418359536, -889278046, 209544004,  -986120192, 1360482752, -836214568, 193416912}, // <=32 kHz TODO
-  {-1007060950, 1418359536, -889278046, 209544004,  -986120192, 1360482752, -836214568, 193416912}, // 44.1 kHz
+  { -974848000, 1329463296, -808124416, 185073664,  -946145526, 1253229580, -741406522, 165888320}, // <=32 kHz
+  {-1007157248, 1418657792, -889585664, 209649664,  -986120192, 1360482752, -836214568, 193416912}, // 44.1 kHz
   {-1007547085, 1419341519, -889783607, 209553717,  -988032194, 1365543311, -840618073, 194671779}, // 48.0 kHz
   {-1007547085, 1419341519, -889783607, 209553717,  -988032194, 1365543311, -840618073, 194671779}  // >=64 kHz TODO
 };
@@ -26,7 +26,7 @@ LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData,           con
 {
 #if LE_ACCURATE_CALC
   m_filterCoeffs  = kFilterCoeffs[sampleRate <= 44100 ? (sampleRate <= 32000 ? 0 : 1) : (sampleRate <= 48000 ? 2 : 3)];
-  m_filterFactor  = (sampleRate < 48000 ? (48000 - sampleRate) >> 11 : 0);
+  m_filterFactor  = (sampleRate < 48000 ? (48200 - sampleRate) >> 12 : 0);
 #else
   m_filterFactor  = 224 + (__min (SHRT_MAX, (int) sampleRate - 47616) >> 10);
 #endif
diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp
index ce69b5a..ae46148 100644
--- a/src/lib/bitAllocation.cpp
+++ b/src/lib/bitAllocation.cpp
@@ -34,7 +34,7 @@ static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t val
   return uint32_t (meanRoot * meanRoot + 0.5);
 }
 
-static void jndPowerLawAndPeakSmoothing (uint32_t* const  stepSizes, const unsigned nStepSizes,
+static void jndPowerLawAndPeakSmoothing (uint32_t* const  stepSizes, const unsigned nStepSizes, const bool lowRateMode,
                                          const uint32_t avgStepSize, const uint8_t sfm, const uint8_t tfm)
 {
   const unsigned  expTimes512 = 512u - sfm; // 1.0 - sfm / 2.0
@@ -51,15 +51,16 @@ static void jndPowerLawAndPeakSmoothing (uint32_t* const  stepSizes, const unsig
   stepSizes[0] = __min (stepSizeM1, stepSizes[0]); // `- becomes --
   for (/*b*/; b < nStepSizes; b++)
   {
-    const uint64_t oneMinusB = 128 - b;
+    const uint64_t modifiedB = (lowRateMode ? 16 + b : b);
+    const uint64_t oneMinusB = 128 - modifiedB;
     const uint32_t stepSizeB = jndModel (stepSizes[b], avgStepSize, expTimes512, mulTimes512);
 
     if ((stepSizeM3 <= stepSizeM2) && (stepSizeM3 <= stepSizeM1) && (stepSizeB <= stepSizeM2) && (stepSizeB <= stepSizeM1))
     {
       const uint32_t maxM3M0 = __max (stepSizeM3, stepSizeB); // smoothen local spectral peak of _循- shape
 
-      stepSizes[b - 2] = uint32_t ((b * (uint64_t) stepSizes[b - 2] + oneMinusB * __min (maxM3M0, stepSizes[b - 2]) + 64) >> 7); // _-`-
-      stepSizes[b - 1] = uint32_t ((b * (uint64_t) stepSizes[b - 1] + oneMinusB * __min (maxM3M0, stepSizes[b - 1]) + 64) >> 7); // _---
+      stepSizes[b - 2] = uint32_t ((modifiedB * stepSizes[b - 2] + oneMinusB * __min (maxM3M0, stepSizes[b - 2]) + 64) >> 7); // _-`-
+      stepSizes[b - 1] = uint32_t ((modifiedB * stepSizes[b - 1] + oneMinusB * __min (maxM3M0, stepSizes[b - 1]) + 64) >> 7); // _---
     }
     stepSizeM3 = stepSizeM2;
     stepSizeM2 = stepSizeM1;
@@ -275,7 +276,8 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
 
         for (gr = 0; gr < grpData.numWindowGroups; gr++) // separate spectral peak smoothing for each group
         {
-          jndPowerLawAndPeakSmoothing (&stepSizes[numSwbShort * gr], maxSfbInCh, m_avgStepSize[ch], m_avgSpecFlat[ch], 0);
+          jndPowerLawAndPeakSmoothing (&stepSizes[numSwbShort * gr], maxSfbInCh, false,
+                                       m_avgStepSize[ch], m_avgSpecFlat[ch], 0);
         }
       }
       continue;
@@ -345,7 +347,8 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
     sumMeans += m_avgStepSize[ch];
     m_avgStepSize[ch] *= m_avgStepSize[ch];
 
-    jndPowerLawAndPeakSmoothing (stepSizes, maxSfbInCh, m_avgStepSize[ch], m_avgSpecFlat[ch], tnsDisabled ? m_avgTempFlat[ch] : 0);
+    jndPowerLawAndPeakSmoothing (stepSizes, maxSfbInCh, (m_rateIndex == 0) && (samplingRate >= 27713),
+                                 m_avgStepSize[ch], m_avgSpecFlat[ch], tnsDisabled ? m_avgTempFlat[ch] : 0);
 
     if ((samplingRate >= 27713) && (samplingRate < 75132))
     {
diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp
index bf70afc..82b1b8c 100644
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -2096,14 +2096,23 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
 
   if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout
   {
+    const uint32_t loudnessInfo = (audioConfigBytes ? *audioConfigBytes : 0);
+
     errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
-                                                elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0,
+                                                elementTypeConfig[chConf], loudnessInfo,
 #if !RESTRICT_TO_AAC
                                                 m_timeWarping, m_noiseFilling,
 #endif
                                                 audioConfigBuffer);
     if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes
     errorValue = (errorValue == 0 ? 1 : 0);
+
+    // NOTE: In the following, an error value of 256 is actually a warning, not an error. If
+    // the exhale library is used for realtime encoding and a nonzero program loudness level
+    // is provided before any frames have been encoded, this warning reminds the implementer
+    // to apply short-term loudness normalization of the incoming live audio before encoding
+    // each frame, preferably to a program level of -23 LUFS and as recommended in EBU R128.
+    if ((m_frameCount == 0) && ((loudnessInfo & 16383) > 0)) errorValue |= 256;
   }
 
   return errorValue;
diff --git a/src/lib/quantization.cpp b/src/lib/quantization.cpp
index e827556..a4c6ded 100644
--- a/src/lib/quantization.cpp
+++ b/src/lib/quantization.cpp
@@ -53,7 +53,7 @@ double SfbQuantizer::getQuantDist (const unsigned* const coeffMagn, const uint8_
   {
     const double d = m_lutXExp43[coeffQuant[i]] - coeffMagn[i] * stepSizeDiv;
 
-    dDist += d * d; // TODO: do this in fixed-point and with SIMD
+    dDist += d * d;
   }
 
   // consider quantization step-size in calculation of distortion
@@ -520,7 +520,8 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
 {
   const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
 #if EC_TRELLIS_OPT_CODING
-  const uint8_t numTrellisStates = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
+  const uint8_t complexityOffset = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) + (bitRateMode == 0 ? 1 : 0);
+  const uint8_t numTrellisStates = complexityOffset - __min (2, (bitRateMode + 2) >> 2);  // number of states per SFB
   const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
   const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC()
 #endif
@@ -606,7 +607,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t
     const uint16_t   sfbWidth = grpOffsets[sfb + 1] - sfbStart;
     uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];
 
-    for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes. TODO: use SIMD for speed?
+    for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
     {
       coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
     }
@@ -642,7 +643,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t
     uint8_t* ptrCurr  = &m_coeffTemp[100];
     uint8_t  sfCurr   = sfIndex;
 
-    for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes. TODO: use SIMD for speed?
+    for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
     {
       coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
     }