diff --git a/src/app/loudnessEstim.cpp b/src/app/loudnessEstim.cpp index 852a304..3e586f0 100644 --- a/src/app/loudnessEstim.cpp +++ b/src/app/loudnessEstim.cpp @@ -13,8 +13,8 @@ #if LE_ACCURATE_CALC static const int64_t kFilterCoeffs[4][8] = { // first 4: numerator, last 4: denominator, values fit into 32 bit - {-1007060950, 1418359536, -889278046, 209544004, -986120192, 1360482752, -836214568, 193416912}, // <=32 kHz TODO - {-1007060950, 1418359536, -889278046, 209544004, -986120192, 1360482752, -836214568, 193416912}, // 44.1 kHz + { -974848000, 1329463296, -808124416, 185073664, -946145526, 1253229580, -741406522, 165888320}, // <=32 kHz + {-1007157248, 1418657792, -889585664, 209649664, -986120192, 1360482752, -836214568, 193416912}, // 44.1 kHz {-1007547085, 1419341519, -889783607, 209553717, -988032194, 1365543311, -840618073, 194671779}, // 48.0 kHz {-1007547085, 1419341519, -889783607, 209553717, -988032194, 1365543311, -840618073, 194671779} // >=64 kHz TODO }; @@ -26,7 +26,7 @@ LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData, con { #if LE_ACCURATE_CALC m_filterCoeffs = kFilterCoeffs[sampleRate <= 44100 ? (sampleRate <= 32000 ? 0 : 1) : (sampleRate <= 48000 ? 2 : 3)]; - m_filterFactor = (sampleRate < 48000 ? (48000 - sampleRate) >> 11 : 0); + m_filterFactor = (sampleRate < 48000 ? (48200 - sampleRate) >> 12 : 0); #else m_filterFactor = 224 + (__min (SHRT_MAX, (int) sampleRate - 47616) >> 10); #endif diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp index ce69b5a..ae46148 100644 --- a/src/lib/bitAllocation.cpp +++ b/src/lib/bitAllocation.cpp @@ -34,7 +34,7 @@ static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t val return uint32_t (meanRoot * meanRoot + 0.5); } -static void jndPowerLawAndPeakSmoothing (uint32_t* const stepSizes, const unsigned nStepSizes, +static void jndPowerLawAndPeakSmoothing (uint32_t* const stepSizes, const unsigned nStepSizes, const bool lowRateMode, const uint32_t avgStepSize, const uint8_t sfm, const uint8_t tfm) { const unsigned expTimes512 = 512u - sfm; // 1.0 - sfm / 2.0 @@ -51,15 +51,16 @@ static void jndPowerLawAndPeakSmoothing (uint32_t* const stepSizes, const unsig stepSizes[0] = __min (stepSizeM1, stepSizes[0]); // `- becomes -- for (/*b*/; b < nStepSizes; b++) { - const uint64_t oneMinusB = 128 - b; + const uint64_t modifiedB = (lowRateMode ? 16 + b : b); + const uint64_t oneMinusB = 128 - modifiedB; const uint32_t stepSizeB = jndModel (stepSizes[b], avgStepSize, expTimes512, mulTimes512); if ((stepSizeM3 <= stepSizeM2) && (stepSizeM3 <= stepSizeM1) && (stepSizeB <= stepSizeM2) && (stepSizeB <= stepSizeM1)) { const uint32_t maxM3M0 = __max (stepSizeM3, stepSizeB); // smoothen local spectral peak of _´`- shape - stepSizes[b - 2] = uint32_t ((b * (uint64_t) stepSizes[b - 2] + oneMinusB * __min (maxM3M0, stepSizes[b - 2]) + 64) >> 7); // _-`- - stepSizes[b - 1] = uint32_t ((b * (uint64_t) stepSizes[b - 1] + oneMinusB * __min (maxM3M0, stepSizes[b - 1]) + 64) >> 7); // _--- + stepSizes[b - 2] = uint32_t ((modifiedB * stepSizes[b - 2] + oneMinusB * __min (maxM3M0, stepSizes[b - 2]) + 64) >> 7); // _-`- + stepSizes[b - 1] = uint32_t ((modifiedB * stepSizes[b - 1] + oneMinusB * __min (maxM3M0, stepSizes[b - 1]) + 64) >> 7); // _--- } stepSizeM3 = stepSizeM2; stepSizeM2 = stepSizeM1; @@ -275,7 +276,8 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA for (gr = 0; gr < grpData.numWindowGroups; gr++) // separate spectral peak smoothing for each group { - jndPowerLawAndPeakSmoothing (&stepSizes[numSwbShort * gr], maxSfbInCh, m_avgStepSize[ch], m_avgSpecFlat[ch], 0); + jndPowerLawAndPeakSmoothing (&stepSizes[numSwbShort * gr], maxSfbInCh, false, + m_avgStepSize[ch], m_avgSpecFlat[ch], 0); } } continue; @@ -345,7 +347,8 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA sumMeans += m_avgStepSize[ch]; m_avgStepSize[ch] *= m_avgStepSize[ch]; - jndPowerLawAndPeakSmoothing (stepSizes, maxSfbInCh, m_avgStepSize[ch], m_avgSpecFlat[ch], tnsDisabled ? m_avgTempFlat[ch] : 0); + jndPowerLawAndPeakSmoothing (stepSizes, maxSfbInCh, (m_rateIndex == 0) && (samplingRate >= 27713), + m_avgStepSize[ch], m_avgSpecFlat[ch], tnsDisabled ? m_avgTempFlat[ch] : 0); if ((samplingRate >= 27713) && (samplingRate < 75132)) { diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index bf70afc..82b1b8c 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -2096,14 +2096,23 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout { + const uint32_t loudnessInfo = (audioConfigBytes ? *audioConfigBytes : 0); + errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements, - elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0, + elementTypeConfig[chConf], loudnessInfo, #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif audioConfigBuffer); if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes errorValue = (errorValue == 0 ? 1 : 0); + + // NOTE: In the following, an error value of 256 is actually a warning, not an error. If + // the exhale library is used for realtime encoding and a nonzero program loudness level + // is provided before any frames have been encoded, this warning reminds the implementer + // to apply short-term loudness normalization of the incoming live audio before encoding + // each frame, preferably to a program level of -23 LUFS and as recommended in EBU R128. + if ((m_frameCount == 0) && ((loudnessInfo & 16383) > 0)) errorValue |= 256; } return errorValue; diff --git a/src/lib/quantization.cpp b/src/lib/quantization.cpp index e827556..a4c6ded 100644 --- a/src/lib/quantization.cpp +++ b/src/lib/quantization.cpp @@ -53,7 +53,7 @@ double SfbQuantizer::getQuantDist (const unsigned* const coeffMagn, const uint8_ { const double d = m_lutXExp43[coeffQuant[i]] - coeffMagn[i] * stepSizeDiv; - dDist += d * d; // TODO: do this in fixed-point and with SIMD + dDist += d * d; } // consider quantization step-size in calculation of distortion @@ -520,7 +520,8 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength, { const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1; #if EC_TRELLIS_OPT_CODING - const uint8_t numTrellisStates = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB + const uint8_t complexityOffset = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) + (bitRateMode == 0 ? 1 : 0); + const uint8_t numTrellisStates = complexityOffset - __min (2, (bitRateMode + 2) >> 2); // number of states per SFB const uint8_t numSquaredStates = numTrellisStates * numTrellisStates; const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC() #endif @@ -606,7 +607,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart; uint32_t* const coeffMagn = &m_coeffMagn[sfbStart]; - for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes. TODO: use SIMD for speed? + for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes { coeffMagn[i] = abs (inputCoeffs[sfbStart + i]); } @@ -642,7 +643,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t uint8_t* ptrCurr = &m_coeffTemp[100]; uint8_t sfCurr = sfIndex; - for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes. TODO: use SIMD for speed? + for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes { coeffMagn[i] = abs (inputCoeffs[sfbStart + i]); }