extrema tuning

This commit is contained in:
Christian R. Helmrich 2020-05-01 15:00:35 +02:00
parent 70534ca7b4
commit a3be5338f2
4 changed files with 80 additions and 9 deletions

View File

@ -845,7 +845,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
const uint8_t maxSfbCh = grpData.sfbsPerGroup;
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
const uint8_t mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4);
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -909,7 +909,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
const uint64_t rateFac = mSfmFac * s * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1
const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 24;
const uint64_t sScaled = ((1u << 24) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 25;
// scale step-sizes according to VBR mode & derive scale factors from step-sizes
grpStepSizes[b] = uint32_t (__max (BA_EPS, __min (UINT_MAX, sScaled)));
@ -1972,7 +1972,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
#else
if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
#endif
m_specAnalyzer.initLinPredictor (&m_linPredictor) > 0 ||
m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 4 ? nChannels : 0, nSamplesInFrame) > 0 ||
m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0)
{
errorValue |= 1;

View File

@ -527,7 +527,7 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
{
const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
#if EC_TRELLIS_OPT_CODING
const uint8_t numTrellisStates = (samplingRate < 44100 ? 8 - samplingRate / 32000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
const uint8_t numTrellisStates = (samplingRate < 44100 ? 8 - samplingRate / 16000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
const uint16_t quantRateLength = (samplingRate < 28800 ? 512 : 256); // quantizeMagnRDOC()
#endif

View File

@ -30,6 +30,10 @@ SpecAnalyzer::SpecAnalyzer ()
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
{
m_bandwidthOff[ch] = 0;
#if SA_IMPROVED_SFM_ESTIM
m_magnCorrPrev[ch] = 0;
m_magnSpectra [ch] = nullptr;
#endif
m_numAnaBands [ch] = 0;
m_specAnaStats[ch] = 0;
memset (m_parCorCoeffs[ch], 0, MAX_PREDICTION_ORDER * sizeof (short));
@ -171,14 +175,23 @@ void SpecAnalyzer::getSpectralBandwidth (uint16_t bandwidthOffset[USAC_MAX_NUM_C
memcpy (bandwidthOffset, m_bandwidthOff, nChannels * sizeof (uint16_t));
}
unsigned SpecAnalyzer::initLinPredictor (LinearPredictor* const linPredictor)
unsigned SpecAnalyzer::initSigAnaMemory (LinearPredictor* const linPredictor, const unsigned nChannels, const unsigned maxTransfLength)
{
if (linPredictor == nullptr)
{
return 1; // invalid arguments error
}
m_tnsPredictor = linPredictor;
#if SA_IMPROVED_SFM_ESTIM
for (unsigned ch = 0; ch < nChannels; ch++)
{
if ((m_magnSpectra[ch] = (uint32_t*) malloc (maxTransfLength * sizeof (uint32_t))) == nullptr)
{
return 2; // mem. allocation error
}
memset (m_magnSpectra[ch], 0, maxTransfLength * sizeof (uint32_t));
}
#endif
return 0; // no error
}
@ -236,6 +249,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const unsigned nChannels, const unsigned nSamplesInFrame, const unsigned samplingRate,
const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel
{
const uint64_t anaBwOffset = SA_BW >> 1;
const unsigned lpcStopBand16k = (samplingRate <= 32000 ? nSamplesInFrame : (32000 * nSamplesInFrame) / samplingRate) >> SA_BW_SHIFT;
const unsigned thresholdSlope = (48000 + SA_EPS * samplingRate) / 96000;
const unsigned thresholdStart = samplingRate >> 15;
@ -250,6 +264,11 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
{
const int32_t* const chMdct = mdctSignals[ch];
const int32_t* const chMdst = mdstSignals[ch];
#if SA_IMPROVED_SFM_ESTIM
uint32_t* const chPrvMagn = m_magnSpectra[ch];
const bool improvedSfmEstim = (chPrvMagn != nullptr);
uint16_t currMC = 0, numMC = 0; // channel average
#endif
// --- get L1 norm and max value in each band
uint16_t idxMaxSpec = 0;
uint64_t sumAvgBand = 0;
@ -273,10 +292,17 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const uint16_t offs = b << SA_BW_SHIFT; // start offset of current analysis band
const int32_t* const bMdct = &chMdct[offs];
const int32_t* const bMdst = &chMdst[offs];
#if SA_IMPROVED_SFM_ESTIM
uint32_t* const prvMagn = (improvedSfmEstim ? &chPrvMagn[offs] : nullptr);
#endif
uint16_t maxAbsIdx = 0;
uint32_t maxAbsVal = 0, tmp = UINT_MAX;
uint64_t sumAbsVal = 0;
#if SA_IMPROVED_SFM_ESTIM
uint64_t sumAbsPrv = 0;
uint64_t sumPrdCP = 0, sumPrdCC = 0, sumPrdPP = 0;
double ncp, dcc, dpp;
#endif
for (int s = SA_BW - 1; s >= 0; s--)
{
// sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index
@ -287,6 +313,19 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const uint64_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absSample = uint32_t (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
#endif
#if SA_IMPROVED_SFM_ESTIM
if (improvedSfmEstim) // correlation between current and previous magnitude spectrum
{
const uint64_t prvSample = prvMagn[s];
sumPrdCP += ((uint64_t) absSample * prvSample + anaBwOffset) >> SA_BW_SHIFT;
sumPrdCC += ((uint64_t) absSample * absSample + anaBwOffset) >> SA_BW_SHIFT;
sumPrdPP += ((uint64_t) prvSample * prvSample + anaBwOffset) >> SA_BW_SHIFT;
sumAbsPrv += prvSample;
prvMagn[s] = absSample;
}
#endif
sumAbsVal += absSample;
if (offs + s > 0) // exclude DC from max & min
@ -310,9 +349,22 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
m_bandwidthOff[ch] = __min (m_bandwidthOff[ch], nSamplesInFrame);
}
// save mean magnitude
tmp/*mean*/ = uint32_t ((sumAbsVal + (1 << (SA_BW_SHIFT - 1))) >> SA_BW_SHIFT);
tmp/*mean*/ = uint32_t ((sumAbsVal + anaBwOffset) >> SA_BW_SHIFT);
m_meanAbsValue[ch][b] = tmp;
// spectral statistics
#if SA_IMPROVED_SFM_ESTIM
if (improvedSfmEstim && (b > 0) && ((unsigned) b < lpcStopBand16k))
{
dcc = double (tmp);
dpp = double ((sumAbsPrv + anaBwOffset) >> SA_BW_SHIFT);
ncp = (sumPrdCP + dcc * dpp) * SA_BW - sumAbsVal * dpp - sumAbsPrv * dcc;
dcc = (sumPrdCC + dcc * dcc) * SA_BW - sumAbsVal * dcc - sumAbsVal * dcc;
dpp = (sumPrdPP + dpp * dpp) * SA_BW - sumAbsPrv * dpp - sumAbsPrv * dpp;
sumPrdCP = uint64_t ((ncp <= 0.0) || (dcc * dpp <= 0.0) ? 0 : 0.5 + (256.0 * ncp * ncp) / (dcc * dpp));
currMC += (uint16_t) __min (UCHAR_MAX, sumPrdCP); numMC++; // temporal correlation sum
}
#endif
if (b > 0)
{
sumAvgBand += tmp;
@ -334,6 +386,16 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
m_tnsPredGains[ch] = m_tnsPredictor->calcParCorCoeffs (&chMdct[b], __min (m_bandwidthOff[ch], lpcStopBand16k << SA_BW_SHIFT) - b,
MAX_PREDICTION_ORDER, m_parCorCoeffs[ch]);
m_specAnaStats[ch] = packAvgSpecAnalysisStats (sumAvgBand, sumMaxBand, m_tnsPredGains[ch] >> 24, idxMaxSpec, (unsigned) b >> SA_BW_SHIFT);
#if SA_IMPROVED_SFM_ESTIM
if (improvedSfmEstim)
{
if (numMC > 1) currMC = (currMC + (numMC >> 1)) / numMC;// smoothed temporal correlation
valMaxSpec = (currMC + m_magnCorrPrev[ch] + 1) >> 1;
m_magnCorrPrev[ch] = (uint8_t) currMC; // update
if (valMaxSpec > ((m_specAnaStats[ch] >> 16) & UCHAR_MAX)) m_specAnaStats[ch] = (m_specAnaStats[ch] & 0xFF00FFFF) | (valMaxSpec << 16);
}
#endif
} // for ch
return 0; // no error

View File

@ -20,6 +20,7 @@
#define SA_EPS 1024
#define SA_EXACT_COMPLEX_ABS 0
#define SA_IMPROVED_REAL_ABS 1
#define SA_IMPROVED_SFM_ESTIM 1
#define SA_OPT_WINDOW_GROUPING 1
// spectral signal analysis class
@ -29,6 +30,10 @@ private:
// member variables
uint16_t m_bandwidthOff[USAC_MAX_NUM_CHANNELS];
#if SA_IMPROVED_SFM_ESTIM
uint8_t m_magnCorrPrev[USAC_MAX_NUM_CHANNELS];
uint32_t* m_magnSpectra[USAC_MAX_NUM_CHANNELS];
#endif
uint32_t m_meanAbsValue[USAC_MAX_NUM_CHANNELS][1024 >> SA_BW_SHIFT];
uint16_t m_numAnaBands [USAC_MAX_NUM_CHANNELS];
short m_parCorCoeffs[USAC_MAX_NUM_CHANNELS][MAX_PREDICTION_ORDER];
@ -41,7 +46,11 @@ public:
// constructor
SpecAnalyzer ();
// destructor
#if SA_IMPROVED_SFM_ESTIM
~SpecAnalyzer () { for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) MFREE (m_magnSpectra[ch]); }
#else
~SpecAnalyzer () { }
#endif
// public functions
unsigned getLinPredCoeffs (short parCorCoeffs[MAX_PREDICTION_ORDER], const unsigned channelIndex); // returns best filter order
unsigned getMeanAbsValues (const int32_t* const mdctSignal, const int32_t* const mdstSignal, const unsigned nSamplesInFrame,
@ -49,7 +58,7 @@ public:
uint32_t* const meanBandValues);
void getSpecAnalysisStats (uint32_t avgSpecAnaStats[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
void getSpectralBandwidth (uint16_t bandwidthOffset[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
unsigned initLinPredictor (LinearPredictor* const linPredictor);
unsigned initSigAnaMemory (LinearPredictor* const linPredictor, const unsigned nChannels, const unsigned maxTransfLength);
#if SA_OPT_WINDOW_GROUPING
unsigned optimizeGrouping (const unsigned channelIndex, const unsigned preferredBandwidth, const unsigned preferredGrouping);
#endif