retune TNS for SBR

This commit is contained in:
Christian R. Helmrich 2021-05-13 01:00:00 +02:00
parent 08ac873cef
commit a327f120c1
4 changed files with 37 additions and 56 deletions

View File

@ -1,11 +1,11 @@
/* bitAllocation.cpp - source file for class needed for psychoacoustic bit-allocation
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleLibPch.h"
@ -205,13 +205,13 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
m_avgStepSize[ch] = 0;
b = ((specAnaStats[ch] >> 16) & UCHAR_MAX); // start with squared spec. flatness from spectral analysis
b = __max (b * b, (tempAnaStats[ch] >> 24) * (tempAnaStats[ch] >> 24)); // ..and from temporal analysis
m_avgSpecFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // normalized maximum
b = ((specAnaStats[ch] >> 16) & UCHAR_MAX);
b = __max (b * b, (tempAnaStats[ch] >> 24) * (tempAnaStats[ch] >> 24));
m_avgSpecFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // max. of squared SFM from spec. and temp. analysis
b = ((tempAnaStats[ch] >> 16) & UCHAR_MAX); // now derive squared temp. flatness from temporal analysis
b = __max (b * b, (specAnaStats[ch] >> 24) * (specAnaStats[ch] >> 24)); // ..and from spectral analysis
m_avgTempFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // normalized maximum
b = ((tempAnaStats[ch] >> 16) & UCHAR_MAX);
b = __max (b * b, (specAnaStats[ch] >> 24) * (specAnaStats[ch] >> 24));
m_avgTempFlat[ch] = uint8_t ((b + (1 << 7)) >> 8); // max. of squared TFM from spec. and temp. analysis
if ((nBandsInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS))
{
@ -373,12 +373,6 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
sumMeans = (sumMeans + (nMeans >> 1)) / nMeans;
sumMeans *= sumMeans; // since we've averaged square-roots
#if BA_INTER_CHAN_SIM_MASK
if (nMeans > 3)
{
// TODO: cross-channel simultaneous masking for 4.0 - 7.1
}
#endif
for (unsigned ch = 0; ch < nChannels; ch++)
{
@ -435,9 +429,10 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
const SfbGroupData& grpData = *groupData[ch];
const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
const bool eightShorts = (grpData.numWindowGroups != 1);
const bool lowRateTuning = (samplingRate >= 25495) && (sfm[ch] <= (SCHAR_MAX >> 1));
const uint32_t* rms = grpData.sfbRmsValues;
uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
const bool lowRateTuning = (m_rateIndex == 0) && (samplingRate >= 25495 && sfm[ch] <= (SCHAR_MAX >> 1));
const bool undercodingRed = (m_rateIndex > 0) || (samplingRate >= 25495 && sfm[ch] * 8 > UCHAR_MAX * 7) || lowRateTuning;
const uint32_t* rms = grpData.sfbRmsValues;
uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
if ((grpData.numWindowGroups * maxSfbInCh == 0) || (grpData.numWindowGroups > NUM_WINDOW_GROUPS))
{
@ -454,8 +449,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
uint64_t s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t));
if ((m_rateIndex == 0) && lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
if (lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
{
uint32_t numRedBands = nSamplesInFrame; // final result lies between 1/4 and 1/2
@ -491,10 +485,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
}
}
if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
#if 1
if ((m_rateIndex > 0) || lowRateTuning)
#endif
if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
if (undercodingRed && (rmsComp >= rmsRef9) && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
{
s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11;
}
@ -505,10 +497,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b];
#if 1
if ((m_rateIndex > 0) || lowRateTuning)
#endif
if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
if (undercodingRed && (rmsComp >= rmsRef9)) // check only first SFB above max_sfb as simplification
{
s -= (sfbWidth * redFactor * __min (1u << 11, rmsComp) + (1u << 10)) >> 11;
}
@ -522,7 +512,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15);
if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11);
if ((m_rateIndex == 0) && lowRateTuning) // clip near-zero SNRs to a minimum SNR
if (lowRateTuning) // clip near-0 SNRs to minimum SNR
{
const uint32_t lim = uint32_t ((grpRms[b] * (8192u - (uint64_t) sfm[ch] * sfm[ch]) + (1u << 12)) >> 13);

View File

@ -1,11 +1,11 @@
/* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#ifndef _BIT_ALLOCATION_H_
@ -16,7 +16,6 @@
// constants, experimental macros
#define BA_EPS 1
#define BA_INTER_CHAN_SIM_MASK 0 // 5.1 cross-channel simultaneous masking
// class for audio bit-allocation
class BitAllocator

View File

@ -1,11 +1,11 @@
/* specAnalysis.cpp - source file for class providing spectral analysis of MCLT signals
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleLibPch.h"
@ -44,10 +44,8 @@ SpecAnalyzer::SpecAnalyzer ()
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
{
m_bandwidthOff[ch] = 0;
#if SA_IMPROVED_SFM_ESTIM
m_magnCorrPrev[ch] = 0;
m_magnSpectra [ch] = nullptr;
#endif
m_numAnaBands [ch] = 0;
m_specAnaStats[ch] = 0;
memset (m_parCorCoeffs[ch], 0, MAX_PREDICTION_ORDER * sizeof (short));
@ -169,7 +167,7 @@ unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, co
return 1; // invalid arguments error
}
m_tnsPredictor = linPredictor;
#if SA_IMPROVED_SFM_ESTIM
for (unsigned ch = 0; ch < nChannels; ch++)
{
if ((m_magnSpectra[ch] = (uint32_t*) malloc (maxTransfLength * sizeof (uint32_t))) == nullptr)
@ -178,7 +176,6 @@ unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, co
}
memset (m_magnSpectra[ch], 0, maxTransfLength * sizeof (uint32_t));
}
#endif
return 0; // no error
}
@ -242,7 +239,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const unsigned thresholdStart = samplingRate >> 15;
if ((mdctSignals == nullptr) || (mdstSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (samplingRate < 7350) || (samplingRate > 96000))
(nSamplesInFrame > 2048) || (nSamplesInFrame <= 127) || (samplingRate < 7350) || (samplingRate > 96000))
{
return 1; // invalid arguments error
}
@ -251,11 +248,10 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
{
const int32_t* const chMdct = mdctSignals[ch];
const int32_t* const chMdst = mdstSignals[ch];
#if SA_IMPROVED_SFM_ESTIM
uint32_t* const chPrvMagn = m_magnSpectra[ch];
const bool improvedSfmEstim = (chPrvMagn != nullptr);
uint16_t currMC = 0, numMC = 0; // channel average
#endif
// --- get L1 norm and max value in each band
uint16_t idxMaxSpec = 0;
uint64_t sumAvgBand = 0;
@ -279,22 +275,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const uint16_t offs = b << SA_BW_SHIFT; // start offset of current analysis band
const int32_t* const bMdct = &chMdct[offs];
const int32_t* const bMdst = &chMdst[offs];
#if SA_IMPROVED_SFM_ESTIM
uint32_t* const prvMagn = (improvedSfmEstim ? &chPrvMagn[offs] : nullptr);
#endif
uint16_t maxAbsIdx = 0;
uint32_t maxAbsVal = 0, tmp = UINT_MAX;
uint64_t sumAbsVal = 0;
#if SA_IMPROVED_SFM_ESTIM
uint64_t sumAbsPrv = 0;
uint64_t sumPrdCP = 0, sumPrdCC = 0, sumPrdPP = 0;
double ncp, dcc, dpp;
#endif
for (int s = SA_BW - 1; s >= 0; s--)
{
// sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index
const uint64_t absSample = complexAbs (bMdct[s], bMdst[s]);
#if SA_IMPROVED_SFM_ESTIM
if (improvedSfmEstim) // correlation between current and previous magnitude spectrum
{
const uint64_t prvSample = prvMagn[s];
@ -306,7 +299,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
sumAbsPrv += prvSample;
prvMagn[s] = (uint32_t) absSample;
}
#endif
sumAbsVal += absSample;
if (offs + s > 0) // exclude DC from max & min
{
@ -332,7 +324,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
tmp/*mean*/ = uint32_t ((sumAbsVal + anaBwOffset) >> SA_BW_SHIFT);
m_meanAbsValue[ch][b] = tmp;
// spectral statistics
#if SA_IMPROVED_SFM_ESTIM
if (improvedSfmEstim && (b > 0) && ((unsigned) b < lpcStopBand16k))
{
dcc = double (tmp);
@ -344,7 +335,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
currMC += (uint16_t) __min (UCHAR_MAX, sumPrdCP); numMC++; // temporal correlation sum
}
#endif
if (b > 0)
{
sumAvgBand += tmp;
@ -359,14 +349,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
// --- spectral analysis statistics for frame
b = 1;
#if SA_IMPROVED_FILT_CALC
if (samplingRate < 27713) sumAvgBand -= m_meanAbsValue[ch][b++];
#endif
while (((unsigned) b + 1 < lpcStopBand16k) && ((uint64_t) m_meanAbsValue[ch][b] * (m_numAnaBands[ch] - 1) > sumAvgBand)) b++;
b = __min (m_bandwidthOff[ch], b << SA_BW_SHIFT);
#if SA_IMPROVED_FILT_CALC
if (samplingRate < 27713) sumAvgBand += m_meanAbsValue[ch][1];
#endif
// obtain prediction gain across spectrum
m_tnsPredGains[ch] = m_tnsPredictor->calcParCorCoeffs (&chMdct[b], __min (m_bandwidthOff[ch], lpcStopBand16k << SA_BW_SHIFT) - b,
MAX_PREDICTION_ORDER, m_parCorCoeffs[ch]);
m_specAnaStats[ch] = packAvgSpecAnalysisStats (sumAvgBand, sumMaxBand, m_tnsPredGains[ch] >> 24, idxMaxSpec, (unsigned) b >> SA_BW_SHIFT);
#if SA_IMPROVED_SFM_ESTIM
if (improvedSfmEstim)
{
if (numMC > 1) currMC = (currMC + (numMC >> 1)) / numMC;// smoothed temporal correlation
@ -375,7 +370,6 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
if (valMaxSpec > ((m_specAnaStats[ch] >> 16) & UCHAR_MAX)) m_specAnaStats[ch] = (m_specAnaStats[ch] & 0xFF00FFFF) | (valMaxSpec << 16);
}
#endif
} // for ch
return 0; // no error

View File

@ -1,11 +1,11 @@
/* specAnalysis.h - header file for class providing spectral analysis of MCLT signals
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#ifndef _SPEC_ANALYSIS_H_
@ -19,7 +19,7 @@
#define SA_BW (1 << SA_BW_SHIFT)
#define SA_EPS 1024
#define SA_EXACT_COMPLEX_ABS 0
#define SA_IMPROVED_SFM_ESTIM 1
#define SA_IMPROVED_FILT_CALC 1
#define SA_OPT_WINDOW_GROUPING 1
// spectral signal analysis class
@ -29,10 +29,8 @@ private:
// member variables
uint16_t m_bandwidthOff[USAC_MAX_NUM_CHANNELS];
#if SA_IMPROVED_SFM_ESTIM
uint8_t m_magnCorrPrev[USAC_MAX_NUM_CHANNELS];
uint32_t* m_magnSpectra[USAC_MAX_NUM_CHANNELS];
#endif
uint32_t m_meanAbsValue[USAC_MAX_NUM_CHANNELS][1024 >> SA_BW_SHIFT];
uint16_t m_numAnaBands [USAC_MAX_NUM_CHANNELS];
short m_parCorCoeffs[USAC_MAX_NUM_CHANNELS][MAX_PREDICTION_ORDER];