fix loudness estim.

2025-03-11 16:50:09 +01:00 · 2020-08-28 22:00:00 +02:00 · 2020-08-28 22:00:00 +02:00 · 54a7bcd783
commit 54a7bcd783
parent d2bede2449
4 changed files with 64 additions and 3 deletions
--- a/include/Release.htm
+++ b/include/Release.htm
@ -32,6 +32,7 @@
 <li><h3>minor bugfixes in bit-rate control and higher-rate coding at 32 kHz sampling rate</h3></li>
 <li><h3>exhaleApp: add support for CVBR mode 0 at codec sampling rates below 44.1 kHz</h3></li>
 <li><h3>exhaleApp: write complete MP4 &laquo;stss&raquo; data for improved compatibility (issue 13)</h3></li>
+ <li><h3>exhaleApp: higher accuracy of loudness estimation, better BS.1770-4 compliance</h3></li>
 </ul>
 <h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020</span></b></h3>
 <ul>
--- a/src/app/exhaleApp.cpp
+++ b/src/app/exhaleApp.cpp
@ -45,7 +45,11 @@
 #endif

 // constants, experimental macros
+#if LE_ACCURATE_CALC
+#define EA_LOUD_INIT  16384u  // bsSamplePeakLevel = 0 & methodValue = 0
+#else
 #define EA_LOUD_INIT  16399u  // bsSamplePeakLevel = 0 & methodValue = 0
+#endif
 #define EA_LOUD_NORM -42.25f  // -100 + 57.75 of ISO 23003-4, Table A.48
 #define EA_PEAK_NORM -96.33f  // 20 * log10(2^-16), 16-bit normalization
 #define EA_PEAK_MIN   0.262f  // 20 * log10() + EA_PEAK_NORM = -108 dbFS
@ -912,7 +916,7 @@ int main (const int argc, char* argv[])
        const uint32_t qPeak = uint32_t (32.0f * (20.0f - 20.0f * log10 (__max (EA_PEAK_MIN, float (loudStats & USHRT_MAX))) - EA_PEAK_NORM) + 0.5f);

        // recreate ASC + UC + loudness data
-        bw = EA_LOUD_INIT | (qPeak << 18) | (qLoud << 6); // measurementSystem is 3
+        bw = EA_LOUD_INIT | (qPeak << 18) | (qLoud << 6) | 11; // measurementSystem
        memset (outAuData, 0, 108 * sizeof (uint8_t)); // max allowed ASC + UC size
        i = exhaleEnc.initEncoder (outAuData, &bw); // with finished loudnessInfo()
      }
@ -925,7 +929,7 @@ int main (const int argc, char* argv[])
      fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f);
      if (numChannels < 7)
      {
-        fprintf_s (stdout, " Input statistics: Mobile loudness %.2f LUFS,\tsample peak level %.2f dBFS\n\n",
+        fprintf_s (stdout, " Input statistics:  File loudness %.2f LUFS,\tsample peak level %.2f dBFS\n\n",
                   __max (3u, loudStats >> 16) / 512.f - 100.0f, 20.0f * log10 (__max (EA_PEAK_MIN, float (loudStats & USHRT_MAX))) + EA_PEAK_NORM);
      }

--- a/src/app/loudnessEstim.cpp
+++ b/src/app/loudnessEstim.cpp
@ -11,11 +11,25 @@
 #include "exhaleAppPch.h"
 #include "loudnessEstim.h"

+#if LE_ACCURATE_CALC
+static const int64_t kFilterCoeffs[4][8] = { // first 4: numerator, last 4: denominator, values fit into 32 bit
+  {-1007060950, 1418359536, -889278046, 209544004,  -986120192, 1360482752, -836214568, 193416912}, // <=32 kHz TODO
+  {-1007060950, 1418359536, -889278046, 209544004,  -986120192, 1360482752, -836214568, 193416912}, // 44.1 kHz
+  {-1007547085, 1419341519, -889783607, 209553717,  -988032194, 1365543311, -840618073, 194671779}, // 48.0 kHz
+  {-1007547085, 1419341519, -889783607, 209553717,  -988032194, 1365543311, -840618073, 194671779}  // >=64 kHz TODO
+};
+#endif
+
 // constructor
 LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData,           const unsigned bitDepth /*= 24*/,
                                      const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/)
 {
+#if LE_ACCURATE_CALC
+  m_filterCoeffs  = kFilterCoeffs[sampleRate <= 44100 ? (sampleRate <= 32000 ? 0 : 1) : (sampleRate <= 48000 ? 2 : 3)];
+  m_filterFactor  = (sampleRate < 48000 ? (48000 - sampleRate) >> 11 : 0);
+#else
  m_filterFactor  = 224 + (__min (SHRT_MAX, (int) sampleRate - 47616) >> 10);
+#endif
  m_gbHopSize64   = (__min (163519, sampleRate) + 320) / 640; // 100 msec
  m_gbNormFactor  = (m_gbHopSize64 == 0 ? 0 : 1.0f / (4.0f * m_gbHopSize64));
  m_inputChannels = __min (8, numChannels);
@ -23,7 +37,15 @@ LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData,           con
  m_inputPcmData  = inputPcmData;

  reset ();
+#if LE_ACCURATE_CALC
+  for (unsigned ch = 0; ch < 8; ch++)
+  {
+    memset (m_filterMemI[ch], 0, 4 * sizeof (int32_t));
+    memset (m_filterMemO[ch], 0, 4 * sizeof (int32_t));
+  }
+#else
  for (unsigned ch = 0; ch < 8; ch++) m_filterMemoryI[ch] = m_filterMemoryO[ch] = 0;
+#endif
 }

 // public functions
@ -32,6 +54,10 @@ uint32_t LoudnessEstimator::addNewPcmData (const unsigned samplesPerChannel)
  const unsigned frameSize64  = samplesPerChannel >> 6; // in units of 64
  const unsigned numSamples64 = 1 << 6; // sub-frame size (64, of course)
  const int32_t* chSig        = m_inputPcmData;
+#if LE_ACCURATE_CALC
+  const int64_t* filtI        = m_filterCoeffs;
+  const int64_t* filtO        = &m_filterCoeffs[4];
+#endif
  uint64_t* newQuarterPower   = m_powerValue[3];
  unsigned  ch, f, s;

@ -47,6 +73,20 @@ uint32_t LoudnessEstimator::addNewPcmData (const unsigned samplesPerChannel)
    {
      for (ch = 0; ch < m_inputChannels; ch++)
      {
+#if LE_ACCURATE_CALC
+        // accurate K-filter according to ITU-R BS.1770-4, Annex 1 (2015)
+        int32_t* const i = m_filterMemI[ch];
+        int32_t* const o = m_filterMemO[ch];
+        const int64_t pi = filtI[0] * i[0] + filtI[1] * i[1] + filtI[2] * i[2] + filtI[3] * i[3] -
+                           filtO[0] * o[0] - filtO[1] * o[1] - filtO[2] * o[2] - filtO[3] * o[3];
+        const int64_t to = (pi < 0 ? (1 << 28) - 1 : 0); // trunc. offset
+        const int32_t xi = (*(chSig++)) << 2;
+        const int32_t yi = xi + int32_t ((pi + (xi == 0 ? to : (1 << 27))) >> 28);
+        const uint32_t a = abs (xi >> 2);
+
+        i[3] = i[2];   i[2] = i[1];   i[1] = i[0];   i[0] = xi; // update
+        o[3] = o[2];   o[2] = o[1];   o[1] = o[0];   o[0] = yi; // memory
+#else
        // simplified K-filter, including 500-Hz high-pass pre-processing
        const int32_t xi = *(chSig++);
        const int32_t yi = xi - m_filterMemoryI[ch] + ((128 + m_filterFactor * m_filterMemoryO[ch]) >> 8);
@ -54,6 +94,7 @@ uint32_t LoudnessEstimator::addNewPcmData (const unsigned samplesPerChannel)

        m_filterMemoryI[ch] = xi;
        m_filterMemoryO[ch] = yi;
+#endif
        newQuarterPower[ch] += (int64_t) yi * (int64_t) yi;

        if (m_inputPeakValue < a) m_inputPeakValue = a; // get peak level
@ -121,6 +162,9 @@ uint32_t LoudnessEstimator::getStatistics (const bool includeWarmUp /*= false*/)
  if (zg < LE_THRESH_ABS) return peakValue16Bits;

  zg = LE_LUFS_OFFSET + 10.0f * log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue));
+#if LE_ACCURATE_CALC
+  zg -= m_filterFactor * 0.046875f; // for sample rates other than 48 kHz
+#endif
  i  = __max (0, int32_t ((zg + 100.0f) * 512.0f + 0.5f)); // map to uint

  return (__min (USHRT_MAX, i) << 16) | peakValue16Bits; // L = i/512-100
--- a/src/app/loudnessEstim.h
+++ b/src/app/loudnessEstim.h
@ -14,9 +14,15 @@
 #include "exhaleAppPch.h"

 // constants, experimental macros
-#define LE_THRESH_ABS   (15.0f / 268435456.0f) // absolute threshold for -70 LUFS
+#define LE_ACCURATE_CALC       1 // correct filter order, no 500-Hz pre-high-pass
 #define LE_THRESH_REL       0.1f // second stage, relative threshold 10dB below L
+#if LE_ACCURATE_CALC
+#define LE_THRESH_ABS (5.0f / 8388608.0f) // absolute gate threshold for -70 LUFS
+#define LE_LUFS_OFFSET -9.03125f // to get -3.01 LUFS for mono 997-Hz 0-dBFS sine
+#else
+#define LE_THRESH_ABS   (15.0f / 268435456.0f) // absolute threshold for -70 LUFS
 #define LE_LUFS_OFFSET  2.53125f // to get -3.01 LUFS for mono 997-Hz 0-dBFS sine
+#endif

 // ITU-R loudness estimator class
 class LoudnessEstimator
@ -24,8 +30,14 @@ class LoudnessEstimator
 private:

  // member variables
+#if LE_ACCURATE_CALC
+  const int64_t* m_filterCoeffs; // coefficients of IIR K-weighting filter kernel
+  int32_t  m_filterMemI[8][4]; // channel-wise preceding K-weighting filter input
+  int32_t  m_filterMemO[8][4]; // channel-wise previous K-weighting filter output
+#else
  int32_t  m_filterMemoryI[8]; // channel-wise preceding K-weighting filter input
  int32_t  m_filterMemoryO[8]; // channel-wise previous K-weighting filter output
+#endif
  uint64_t m_powerValue[4][8]; // channel-wise power in each gating block quarter
  float    m_gbNormFactor; // 64-sample normalization factor, 1/(4*m_gbHopSize64)
  uint8_t  m_filterFactor; // sampling rate dependent K-weighting filter constant