From a327f120c1d0c9267f9f26a374997b5380ee995f Mon Sep 17 00:00:00 2001 From: "Christian R. Helmrich" Date: Thu, 13 May 2021 01:00:00 +0200 Subject: [PATCH] retune TNS for SBR --- src/lib/bitAllocation.cpp | 46 +++++++++++++++------------------------ src/lib/bitAllocation.h | 5 ++--- src/lib/specAnalysis.cpp | 34 ++++++++++++----------------- src/lib/specAnalysis.h | 8 +++---- 4 files changed, 37 insertions(+), 56 deletions(-) diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp index 13bca10..3043666 100644 --- a/src/lib/bitAllocation.cpp +++ b/src/lib/bitAllocation.cpp @@ -1,11 +1,11 @@ /* bitAllocation.cpp - source file for class needed for psychoacoustic bit-allocation - * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices * * The copyright in this software is being made available under the exhale Copyright License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleLibPch.h" @@ -205,13 +205,13 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA m_avgStepSize[ch] = 0; - b = ((specAnaStats[ch] >> 16) & UCHAR_MAX); // start with squared spec. flatness from spectral analysis - b = __max (b * b, (tempAnaStats[ch] >> 24) * (tempAnaStats[ch] >> 24)); // ..and from temporal analysis - m_avgSpecFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // normalized maximum + b = ((specAnaStats[ch] >> 16) & UCHAR_MAX); + b = __max (b * b, (tempAnaStats[ch] >> 24) * (tempAnaStats[ch] >> 24)); + m_avgSpecFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // max. of squared SFM from spec. and temp. analysis - b = ((tempAnaStats[ch] >> 16) & UCHAR_MAX); // now derive squared temp. flatness from temporal analysis - b = __max (b * b, (specAnaStats[ch] >> 24) * (specAnaStats[ch] >> 24)); // ..and from spectral analysis - m_avgTempFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // normalized maximum + b = ((tempAnaStats[ch] >> 16) & UCHAR_MAX); + b = __max (b * b, (specAnaStats[ch] >> 24) * (specAnaStats[ch] >> 24)); + m_avgTempFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // max. of squared TFM from spec. and temp. analysis if ((nBandsInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS)) { @@ -373,12 +373,6 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA sumMeans = (sumMeans + (nMeans >> 1)) / nMeans; sumMeans *= sumMeans; // since we've averaged square-roots -#if BA_INTER_CHAN_SIM_MASK - if (nMeans > 3) - { - // TODO: cross-channel simultaneous masking for 4.0 - 7.1 - } -#endif for (unsigned ch = 0; ch < nChannels; ch++) { @@ -435,9 +429,10 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA const SfbGroupData& grpData = *groupData[ch]; const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup); const bool eightShorts = (grpData.numWindowGroups != 1); - const bool lowRateTuning = (samplingRate >= 25495) && (sfm[ch] <= (SCHAR_MAX >> 1)); - const uint32_t* rms = grpData.sfbRmsValues; - uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS]; + const bool lowRateTuning = (m_rateIndex == 0) && (samplingRate >= 25495 && sfm[ch] <= (SCHAR_MAX >> 1)); + const bool undercodingRed = (m_rateIndex > 0) || (samplingRate >= 25495 && sfm[ch] * 8 > UCHAR_MAX * 7) || lowRateTuning; + const uint32_t* rms = grpData.sfbRmsValues; + uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS]; if ((grpData.numWindowGroups * maxSfbInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS)) { @@ -454,8 +449,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA uint64_t s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2); memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t)); - - if ((m_rateIndex == 0) && lowRateTuning && (maxSfbInCh > 0) && !eightShorts) + if (lowRateTuning && (maxSfbInCh > 0) && !eightShorts) { uint32_t numRedBands = nSamplesInFrame; // final result lies between 1/4 and 1/2 @@ -491,10 +485,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA } } if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b]; -#if 1 - if ((m_rateIndex > 0) || lowRateTuning) -#endif - if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized + + if (undercodingRed && (rmsComp >= rmsRef9) && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized { s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11; } @@ -505,10 +497,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp); const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b]; -#if 1 - if ((m_rateIndex > 0) || lowRateTuning) -#endif - if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification + + if (undercodingRed && (rmsComp >= rmsRef9)) // check only first SFB above max_sfb as simplification { s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11; } @@ -522,7 +512,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15); if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11); - if ((m_rateIndex == 0) && lowRateTuning) // clip near-zero SNRs to a minimum SNR + if (lowRateTuning) // clip near-0 SNRs to minimum SNR { const uint32_t lim = uint32_t ((grpRms[b] * (8192u - (uint64_t) sfm[ch] * sfm[ch]) + (1u << 12)) >> 13); diff --git a/src/lib/bitAllocation.h b/src/lib/bitAllocation.h index e70e145..07ec48c 100644 --- a/src/lib/bitAllocation.h +++ b/src/lib/bitAllocation.h @@ -1,11 +1,11 @@ /* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation - * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices * * The copyright in this software is being made available under the exhale Copyright License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #ifndef _BIT_ALLOCATION_H_ @@ -16,7 +16,6 @@ // constants, experimental macros #define BA_EPS 1 -#define BA_INTER_CHAN_SIM_MASK 0 // 5.1 cross-channel simultaneous masking // class for audio bit-allocation class BitAllocator diff --git a/src/lib/specAnalysis.cpp b/src/lib/specAnalysis.cpp index b9f7d12..6069d71 100644 --- a/src/lib/specAnalysis.cpp +++ b/src/lib/specAnalysis.cpp @@ -1,11 +1,11 @@ /* specAnalysis.cpp - source file for class providing spectral analysis of MCLT signals - * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices * * The copyright in this software is being made available under the exhale Copyright License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleLibPch.h" @@ -44,10 +44,8 @@ SpecAnalyzer::SpecAnalyzer () for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) { m_bandwidthOff[ch] = 0; -#if SA_IMPROVED_SFM_ESTIM m_magnCorrPrev[ch] = 0; m_magnSpectra [ch] = nullptr; -#endif m_numAnaBands [ch] = 0; m_specAnaStats[ch] = 0; memset (m_parCorCoeffs[ch], 0, MAX_PREDICTION_ORDER * sizeof (short)); @@ -169,7 +167,7 @@ unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, co return 1; // invalid arguments error } m_tnsPredictor = linPredictor; -#if SA_IMPROVED_SFM_ESTIM + for (unsigned ch = 0; ch < nChannels; ch++) { if ((m_magnSpectra[ch] = (uint32_t*) malloc (maxTransfLength * sizeof (uint32_t))) == nullptr) @@ -178,7 +176,6 @@ unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, co } memset (m_magnSpectra[ch], 0, maxTransfLength * sizeof (uint32_t)); } -#endif return 0; // no error } @@ -242,7 +239,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M const unsigned thresholdStart = samplingRate >> 15; if ((mdctSignals == nullptr) || (mdstSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || - (nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (samplingRate < 7350) || (samplingRate > 96000)) + (nSamplesInFrame > 2048) || (nSamplesInFrame <= 127) || (samplingRate < 7350) || (samplingRate > 96000)) { return 1; // invalid arguments error } @@ -251,11 +248,10 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M { const int32_t* const chMdct = mdctSignals[ch]; const int32_t* const chMdst = mdstSignals[ch]; -#if SA_IMPROVED_SFM_ESTIM uint32_t* const chPrvMagn = m_magnSpectra[ch]; const bool improvedSfmEstim = (chPrvMagn != nullptr); uint16_t currMC = 0, numMC = 0; // channel average -#endif + // --- get L1 norm and max value in each band uint16_t idxMaxSpec = 0; uint64_t sumAvgBand = 0; @@ -279,22 +275,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M const uint16_t offs = b << SA_BW_SHIFT; // start offset of current analysis band const int32_t* const bMdct = &chMdct[offs]; const int32_t* const bMdst = &chMdst[offs]; -#if SA_IMPROVED_SFM_ESTIM uint32_t* const prvMagn = (improvedSfmEstim ? &chPrvMagn[offs] : nullptr); -#endif uint16_t maxAbsIdx = 0; uint32_t maxAbsVal = 0, tmp = UINT_MAX; uint64_t sumAbsVal = 0; -#if SA_IMPROVED_SFM_ESTIM uint64_t sumAbsPrv = 0; uint64_t sumPrdCP = 0, sumPrdCC = 0, sumPrdPP = 0; double ncp, dcc, dpp; -#endif + for (int s = SA_BW - 1; s >= 0; s--) { // sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index const uint64_t absSample = complexAbs (bMdct[s], bMdst[s]); -#if SA_IMPROVED_SFM_ESTIM + if (improvedSfmEstim) // correlation between current and previous magnitude spectrum { const uint64_t prvSample = prvMagn[s]; @@ -306,7 +299,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M sumAbsPrv += prvSample; prvMagn[s] = (uint32_t) absSample; } -#endif sumAbsVal += absSample; if (offs + s > 0) // exclude DC from max & min { @@ -332,7 +324,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M tmp/*mean*/ = uint32_t ((sumAbsVal + anaBwOffset) >> SA_BW_SHIFT); m_meanAbsValue[ch][b] = tmp; // spectral statistics -#if SA_IMPROVED_SFM_ESTIM if (improvedSfmEstim && (b > 0) && ((unsigned) b < lpcStopBand16k)) { dcc = double (tmp); @@ -344,7 +335,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M currMC += (uint16_t) __min (UCHAR_MAX, sumPrdCP); numMC++; // temporal correlation sum } -#endif if (b > 0) { sumAvgBand += tmp; @@ -359,14 +349,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M // --- spectral analysis statistics for frame b = 1; +#if SA_IMPROVED_FILT_CALC + if (samplingRate < 27713) sumAvgBand -= m_meanAbsValue[ch][b++]; +#endif while (((unsigned) b + 1 < lpcStopBand16k) && ((uint64_t) m_meanAbsValue[ch][b] * (m_numAnaBands[ch] - 1) > sumAvgBand)) b++; b = __min (m_bandwidthOff[ch], b << SA_BW_SHIFT); - +#if SA_IMPROVED_FILT_CALC + if (samplingRate < 27713) sumAvgBand += m_meanAbsValue[ch][1]; +#endif // obtain prediction gain across spectrum m_tnsPredGains[ch] = m_tnsPredictor->calcParCorCoeffs (&chMdct[b], __min (m_bandwidthOff[ch], lpcStopBand16k << SA_BW_SHIFT) - b, MAX_PREDICTION_ORDER, m_parCorCoeffs[ch]); m_specAnaStats[ch] = packAvgSpecAnalysisStats (sumAvgBand, sumMaxBand, m_tnsPredGains[ch] >> 24, idxMaxSpec, (unsigned) b >> SA_BW_SHIFT); -#if SA_IMPROVED_SFM_ESTIM + if (improvedSfmEstim) { if (numMC > 1) currMC = (currMC + (numMC >> 1)) / numMC;// smoothed temporal correlation @@ -375,7 +370,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M if (valMaxSpec > ((m_specAnaStats[ch] >> 16) & UCHAR_MAX)) m_specAnaStats[ch] = (m_specAnaStats[ch] & 0xFF00FFFF) | (valMaxSpec << 16); } -#endif } // for ch return 0; // no error diff --git a/src/lib/specAnalysis.h b/src/lib/specAnalysis.h index a377e7c..ea1bb95 100644 --- a/src/lib/specAnalysis.h +++ b/src/lib/specAnalysis.h @@ -1,11 +1,11 @@ /* specAnalysis.h - header file for class providing spectral analysis of MCLT signals - * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices * * The copyright in this software is being made available under the exhale Copyright License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #ifndef _SPEC_ANALYSIS_H_ @@ -19,7 +19,7 @@ #define SA_BW (1 << SA_BW_SHIFT) #define SA_EPS 1024 #define SA_EXACT_COMPLEX_ABS 0 -#define SA_IMPROVED_SFM_ESTIM 1 +#define SA_IMPROVED_FILT_CALC 1 #define SA_OPT_WINDOW_GROUPING 1 // spectral signal analysis class @@ -29,10 +29,8 @@ private: // member variables uint16_t m_bandwidthOff[USAC_MAX_NUM_CHANNELS]; -#if SA_IMPROVED_SFM_ESTIM uint8_t m_magnCorrPrev[USAC_MAX_NUM_CHANNELS]; uint32_t* m_magnSpectra[USAC_MAX_NUM_CHANNELS]; -#endif uint32_t m_meanAbsValue[USAC_MAX_NUM_CHANNELS][1024 >> SA_BW_SHIFT]; uint16_t m_numAnaBands [USAC_MAX_NUM_CHANNELS]; short m_parCorCoeffs[USAC_MAX_NUM_CHANNELS][MAX_PREDICTION_ORDER];