From 9f82a8a5bf4acc34cf82c43bf53b859621003954 Mon Sep 17 00:00:00 2001 From: "Christian R. Helmrich" Date: Sat, 11 Apr 2020 01:00:03 +0200 Subject: [PATCH] transient tuning pt. 2 --- src/lib/bitAllocation.cpp | 43 +++++++++++++++++++----------------- src/lib/exhaleEnc.cpp | 43 +++++++++++++++++++++--------------- src/lib/stereoProcessing.cpp | 8 +++++-- 3 files changed, 54 insertions(+), 40 deletions(-) diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp index d21bd14..caa880b 100644 --- a/src/lib/bitAllocation.cpp +++ b/src/lib/bitAllocation.cpp @@ -12,6 +12,11 @@ #include "bitAllocation.h" // static helper functions +static inline uint32_t intSqrt (const uint32_t val) +{ + return uint32_t (0.5 + sqrt ((double) val)); +} + static inline uint32_t jndModel (const uint32_t val, const uint32_t mean, const unsigned expTimes512, const unsigned mulTimes512) { @@ -102,7 +107,7 @@ uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* co u = 0; for (sf = 0; sf < sfbWidth; sf++) { - u += uint32_t (0.5 + sqrt (abs ((double) sfbSignal[sf]))); + u += intSqrt (abs (sfbSignal[sf])); } u = uint32_t ((u * 16384ui64 + (sfbWidth >> 1)) / sfbWidth); u = uint32_t (0.5 + sqrt ((double) u) * 128.0); @@ -197,10 +202,10 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA for (/*b*/; b > 0; b--) { gStepSizes[b] = __max (gRms[b], BA_EPS); - sumStepSizes += unsigned (0.5 + sqrt ((double) gStepSizes[b])); + sumStepSizes += intSqrt (gStepSizes[b]); } gStepSizes[0] = __max (gRms[0], BA_EPS); - sumStepSizes += unsigned (0.5 + sqrt ((double) gStepSizes[0])); + sumStepSizes += intSqrt (gStepSizes[0]); } // for gr if (ch != lfeChannelIndex) @@ -218,28 +223,25 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA if (curGrpStep > maxGrpStep) maxGrpStep = curGrpStep; } - for (gr = 0; gr + 1 < grpData.numWindowGroups; gr++) + for (gr = 0; gr < grpData.numWindowGroups; gr++) { - const uint32_t newGrpStep = __max (stepSizeM1, stepSizes[b + numSwbShort * (gr + 1)]); + const uint32_t newGrpStep = __max (stepSizeM1, (gr + 1 == grpData.numWindowGroups ? BA_EPS : stepSizes[b + numSwbShort * (gr + 1)])); stepSizeM1 = stepSizes[b + numSwbShort * gr]; if ((stepSizeM1 == maxGrpStep) && (maxGrpStep > newGrpStep)) { - sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep)); - stepSizes[b + numSwbShort * gr] = newGrpStep; - sumStepSizes += unsigned (0.5 + sqrt ((double) newGrpStep)); + const uint32_t sqrtOldStep = intSqrt (maxGrpStep); + const uint32_t sqrtNewStep = intSqrt (newGrpStep); + uint32_t& gStepSize = stepSizes[b + numSwbShort * gr]; + + sumStepSizes += (gStepSize = (sqrtOldStep + sqrtNewStep) >> 1) - sqrtOldStep; + gStepSize *= gStepSize; // for square-mean-root } } - if ((stepSizes[b + numSwbShort * gr] == maxGrpStep) && (maxGrpStep > stepSizeM1)) - { - sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep)); - stepSizes[b + numSwbShort * gr] = stepSizeM1; - sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizeM1)); - } } // for b - m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh)); + m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh); sumMeans += m_avgStepSize[ch]; m_avgStepSize[ch] *= m_avgStepSize[ch]; @@ -288,29 +290,30 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA stepSizes[b] = __max (rms[b], maskingSlope + BA_EPS); } } + stepSizes[b] = 0; for (b -= 1; b > __min (MF, maxSfbInCh); b--) // complete simultaneous masking by reversing the pattern { - sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b])); + sumStepSizes += intSqrt (stepSizes[b]); maskingSlope = ((uint64_t) stepSizes[b] * (8u + b - MF) + (msOffset << 3u)) >> (msShift + 3u); stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope); } for (/*b*/; b > __min (LF, maxSfbInCh); b--) // typical reversed mid-freq. simultaneous masking slopes { - sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b])); + sumStepSizes += intSqrt (stepSizes[b]); maskingSlope = (stepSizes[b] + msOffset) >> msShift; stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope); } for (/*b = min (9, maxSfbInCh)*/; b > 0; b--) // steeper reversed low-freq. simultaneous masking slopes { - sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b])); + sumStepSizes += intSqrt (stepSizes[b]); maskingSlope = (stepSizes[b] + (msOffset << (10u - b))) >> (msShift + 10u - b); stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope); } - sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[0])); + sumStepSizes += intSqrt (stepSizes[0]); // --- LONG window: apply perceptual JND model and local band-peak smoothing, undo equal-loudness weighting nMeans++; - m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh)); + m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh); sumMeans += m_avgStepSize[ch]; m_avgStepSize[ch] *= m_avgStepSize[ch]; diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 0a1eaff..0439958 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -573,13 +573,13 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const int32_t* const mdctSignal, con if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases { const uint16_t filtOrder = tnsData.filterOrder[0]; - uint16_t b = __min (m_specAnaCurr[channelIndex] & 31, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT); + uint16_t b = __min ((m_specAnaCurr[channelIndex] & 31) + 2, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT); short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0}; int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor, filtOrder, filterC); - for (b = (b > 0 ? b - 1 : 0), predSig--; b > 0; b--) // b is in spectr. analysis units + for (b--, predSig--; b > 0; b--) // start a bit higher; b is in spectr. analysis units { uint64_t sumAbsOrg = 0, sumAbsTns = 0; @@ -664,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3); // undercoding reduction - const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0); - const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1)); + const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 3 ? nChannels >> 1 : 0); + const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)); uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf; uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; //uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; @@ -717,7 +717,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT); const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]); const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64)); - const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5); + const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts ? 2 : 5); const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0); const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128 const TnsData& tnsData0 = coreConfig.tnsData[0]; @@ -785,7 +785,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s if ((errorValue == 0) && (coreConfig.stereoMode == 2)) // frame M/S, synch statistics { - const uint8_t numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong)); + const uint8_t numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong + 1)); const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5; errorValue = m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1], @@ -810,6 +810,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s { SfbGroupData& grpData = coreConfig.groupingData[ch]; const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT); + const uint8_t maxSfbCh = grpData.sfbsPerGroup; const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong); const uint8_t mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]]; uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS]; @@ -827,7 +828,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s // undercoding reduction for case where large number of coefs is quantized to zero s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2); - for (b = 0; b < grpData.sfbsPerGroup; b++) + for (b = 0; b < maxSfbCh; b++) { #if SA_IMPROVED_REAL_ABS const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); @@ -860,7 +861,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s } s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5)); - for (b = 0; b < grpData.sfbsPerGroup; b++) + for (b = 0; b < maxSfbCh; b++) { const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // LF SNR boost, cf my M.Sc. thesis const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4)); @@ -876,13 +877,16 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s } // for gr #if !RESTRICT_TO_AAC - if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill + if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts)) { - const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong); // rate based bandwidth + const uint8_t numSwbFrame = __min (numSwbCh, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1)); - if (grpData.sfbsPerGroup < numSwbFrame) + if (maxSfbCh < numSwbFrame) // increase coding bandwidth { - memset (&grpData.scaleFactors[grpData.sfbsPerGroup], 0, (numSwbFrame - grpData.sfbsPerGroup) * sizeof (uint8_t)); + for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) + { + memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t)); + } grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame; } if (ch > 0) coreConfig.commonMaxSfb = (coreConfig.icsInfoCurr[0].maxSfb == coreConfig.icsInfoCurr[1].maxSfb); @@ -1213,6 +1217,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis { const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT); + const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1; const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx]; const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]); const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1], @@ -1222,14 +1227,15 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence { - m_perCorrCurr[el] = (uint8_t) s; + m_perCorrCurr[el] = uint8_t((32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6); } else // gentle overlap length dependent temporal smoothing { const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32); const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el])); + const int16_t currPerCorr = (32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6; - m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s)); + m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, currPerCorr)); } if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1 @@ -1489,7 +1495,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX; tsNext[ch] = (m_tempAnaNext[ci] >> 8) & UCHAR_MAX; - const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext < 68 && tfNext >= 204) || (tsCurr[ch] >= 153) || (tsNext[ch] >= 153); + const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) || + (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8); const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) && (tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20); // set window_sequence @@ -1499,11 +1506,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o } else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap { - wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT : + wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > (UCHAR_MAX * 15) / 16) ? EIGHT_SHORT : #if RESTRICT_TO_AAC - (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != EIGHT_SHORT) ? EIGHT_SHORT : LONG_STOP); + (lowOlapNext ? EIGHT_SHORT : LONG_STOP); #else - (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != STOP_START) ? STOP_START : LONG_STOP); + (lowOlapNext ? STOP_START : LONG_STOP); #endif } diff --git a/src/lib/stereoProcessing.cpp b/src/lib/stereoProcessing.cpp index 99673dc..82343dd 100644 --- a/src/lib/stereoProcessing.cpp +++ b/src/lib/stereoProcessing.cpp @@ -30,7 +30,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in const bool alterPredDir = (applyPredSte && (useAltPredDir > 0)); // predict mid from side? const SfbGroupData& grp = groupingData1; const bool eightShorts = (grp.numWindowGroups > 1); - const uint8_t maxSfbSte = (eightShorts ? __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) : numSwbFrame); + const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame); uint32_t numSfbPredSte = 0; // counter if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) || @@ -282,7 +282,11 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in sfbTempVar = (applyPredSte ? __max (rmsSfbM[b], rmsSfbS[b]) : __max (grpRms1[idx], grpRms2[idx])); - if (sfbFacLR <= 1.0) // total simultaneous masking - no positive SNR in either SFB + if ((grpStepSizes1[idx] == 0) || (grpStepSizes2[idx] == 0)) // HF noise filled SFB + { + grpStepSizes1[idx] = grpStepSizes2[idx] = 0; + } + else if (sfbFacLR <= 1.0) // simultaneous masking - no positive SNR in either SFB { const double max = __max (sfbRmsL, sfbRmsR);