From 1b231994ec7897700e67459278807893aeb424de Mon Sep 17 00:00:00 2001 From: "Christian R. Helmrich" Date: Wed, 30 Aug 2023 20:00:00 +0200 Subject: [PATCH] add MSE test code --- src/lib/exhaleEnc.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++- src/lib/exhaleEnc.h | 3 ++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 6e3d661..34c946b 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -640,7 +640,11 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u { tnsData.coeffResLow[0] = false; tnsData.filterDownward[0] = false; // enforce direction = 0 for now, detection difficult +#if EE_MORE_MSE + tnsData.filterOrder[0] = uint8_t (m_bitRateMode >= EE_MORE_MSE ? 0 : m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor[0], channelIndex)); +#else tnsData.filterOrder[0] = (uint8_t) m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor[0], channelIndex); +#endif tnsData.firstTnsWindow = 0; if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases @@ -730,7 +734,11 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u predGainCurr = predGainPrev; predGainPrev = (temp >> (8 * bestOrder - 16)) & UCHAR_MAX; } +#if EE_MORE_MSE + tnsData.filterOrder[n] = uint8_t (m_bitRateMode >= EE_MORE_MSE ? 0 : ((bestOrder == 1) && (tnsData.coeffParCor[n][0] == 0) ? 0 : bestOrder)); +#else tnsData.filterOrder[n] = uint8_t ((bestOrder == 1) && (tnsData.coeffParCor[n][0] == 0) ? 0 : bestOrder); +#endif } n++; } @@ -936,8 +944,34 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s uint8_t* grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr]; uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr]; +#if EE_MORE_MSE + s = 0; + for (unsigned b = grpOff[0]; b < grpOff[maxSfbCh]; b++) + { + s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[ci][b]))); + } + if (el == 0 && nrChannels == 2) + { + for (unsigned b = grpOff[0]; b < grpOff[maxSfbCh]; b++) + { + s += unsigned (0.5 + sqrt ((double) abs (m_mdctSignals[1 - ci][b]))); + } + s = (s + 1) >> 1; + } + if (grpOff[maxSfbCh] > grpOff[0]) + { + s = unsigned ((s * (eightShorts ? (24u + (grpData.windowGroupLength[gr] >> 2)) / grpData.windowGroupLength[gr] : 4u) + 4096u) >> 13); + } + s = unsigned (__max (1u + (INT32_MAX >> ((eightShorts ? 1 : 2) + (2 + m_bitRateMode / 9) * m_bitRateMode)), s * s)); +#endif for (unsigned b = 0; b < maxSfbCh; b++) { +#if EE_MORE_MSE + const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b]; + const uint64_t sThresh = __max (1u + (INT32_MAX >> 29), (grpRms[b] * uint64_t (__max (16, b * b)) + 32u) >> 6); + + grpStepSizes[b] = uint32_t (!eightShorts && s > sThresh ? sThresh : (eightShorts ? s >> __max (0, 2 - int (b)) : s)); +#else const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4)); const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b]; @@ -945,6 +979,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s // scale step-sizes according to VBR mode & derive scale factors from step-sizes grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 24) + grpStepSizes[b] * scale) >> 25)); +#endif #if !RESTRICT_TO_AAC if (!m_noiseFilling[el] || (m_bitRateMode > 0) || (m_shiftValSBR == 0) || (samplingRate < 23004) || (b + 3 - (meanSpecFlat[ci] >> 6) < m_numSwbLong)) // HF @@ -1153,8 +1188,12 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047); const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) + (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5)); +#if EE_MORE_MSE + const unsigned targetBitCount25 = INT32_MAX; +#else const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1)); +#endif unsigned b = grpData.sfbsPerGroup - 1; if ((grpRms[b] >> 16) > 0) lastSfb = b; @@ -1184,6 +1223,9 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en } } #endif +#if EE_MORE_MSE + b = lastSfb; +#else // coarse-quantize near-Nyquist SFB with SBR @ 48-64 kHz b = 40 + (samplingRate >> 12); if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb; @@ -1193,6 +1235,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en { b--; // search first coarsely quantized high-freq. SFB } +#endif lastSOff = b; for (b++; b <= lastSfb; b++) @@ -1478,8 +1521,13 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS { const uint8_t tonality = (m_specAnaCurr[ci] >> 16) & UCHAR_MAX; +#if EE_MORE_MSE + tnsData.filterOrder[n] = (m_bitRateMode >= EE_MORE_MSE ? 0 : m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n], + tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5))); +#else tnsData.filterOrder[n] = m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor[n], tnsData.coeff[n], &tnsData.coeffResLow[n], tnsData.filterOrder[n], s, tonality >> (m_tempFlatPrev[ci] >> 5)); +#endif tnsData.numFilters[n] = (tnsData.filterOrder[n] > 0 ? 1 : 0); if ((ch == 0) && (icsCurr.windowSequence == EIGHT_SHORT) && (tnsData.numFilters[n] == 0) && (tnsData.firstTnsWindow == gr)) { @@ -1674,9 +1722,12 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o // save maximum spectral flatness of current and neighboring frames for quantization m_tempAnaCurr [ci] = (m_tempAnaCurr[ci] & 0xFFFFFF) | (__max (sfCurr, __max (m_specFlatPrev[ci], sfNext)) << 24); m_specFlatPrev[ci] = (uint8_t) sfCurr; - +#if EE_MORE_MSE + const bool lowOlapNext = (m_tranLocNext[ci] >= 0); +#else const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8); +#endif const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) && (tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20); // set window_sequence @@ -1686,7 +1737,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o } else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap { +#if EE_MORE_MSE + wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT : +#else wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > tThresh / 16) ? EIGHT_SHORT : +#endif #if RESTRICT_TO_AAC (lowOlapNext ? EIGHT_SHORT : LONG_STOP); #else diff --git a/src/lib/exhaleEnc.h b/src/lib/exhaleEnc.h index 5bc74ef..c1a6aa5 100644 --- a/src/lib/exhaleEnc.h +++ b/src/lib/exhaleEnc.h @@ -1,5 +1,5 @@ /* exhaleEnc.h - header file for class providing Extended HE-AAC encoding capability - * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2023 - see License.htm for legal notices * * The copyright in this software is being made available under the exhale Copyright License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -26,6 +26,7 @@ // constant and experimental macro #define WIN_SCALE double (1 << 23) +#define EE_MORE_MSE 0 // 1-9: MSE optimized encoding with TNS disabled starting at bit-rate mode 1-9 // channelConfigurationIndex setup typedef enum USAC_CCI : signed char