mirror of
https://gitlab.com/ecodis/exhale.git
synced 2025-03-12 01:00:11 +01:00
transient tuning pt. 2
This commit is contained in:
parent
83f3dc2f88
commit
9f82a8a5bf
@ -12,6 +12,11 @@
|
||||
#include "bitAllocation.h"
|
||||
|
||||
// static helper functions
|
||||
static inline uint32_t intSqrt (const uint32_t val)
|
||||
{
|
||||
return uint32_t (0.5 + sqrt ((double) val));
|
||||
}
|
||||
|
||||
static inline uint32_t jndModel (const uint32_t val, const uint32_t mean,
|
||||
const unsigned expTimes512, const unsigned mulTimes512)
|
||||
{
|
||||
@ -102,7 +107,7 @@ uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* co
|
||||
u = 0;
|
||||
for (sf = 0; sf < sfbWidth; sf++)
|
||||
{
|
||||
u += uint32_t (0.5 + sqrt (abs ((double) sfbSignal[sf])));
|
||||
u += intSqrt (abs (sfbSignal[sf]));
|
||||
}
|
||||
u = uint32_t ((u * 16384ui64 + (sfbWidth >> 1)) / sfbWidth);
|
||||
u = uint32_t (0.5 + sqrt ((double) u) * 128.0);
|
||||
@ -197,10 +202,10 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
|
||||
for (/*b*/; b > 0; b--)
|
||||
{
|
||||
gStepSizes[b] = __max (gRms[b], BA_EPS);
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) gStepSizes[b]));
|
||||
sumStepSizes += intSqrt (gStepSizes[b]);
|
||||
}
|
||||
gStepSizes[0] = __max (gRms[0], BA_EPS);
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) gStepSizes[0]));
|
||||
sumStepSizes += intSqrt (gStepSizes[0]);
|
||||
} // for gr
|
||||
|
||||
if (ch != lfeChannelIndex)
|
||||
@ -218,28 +223,25 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
|
||||
|
||||
if (curGrpStep > maxGrpStep) maxGrpStep = curGrpStep;
|
||||
}
|
||||
for (gr = 0; gr + 1 < grpData.numWindowGroups; gr++)
|
||||
for (gr = 0; gr < grpData.numWindowGroups; gr++)
|
||||
{
|
||||
const uint32_t newGrpStep = __max (stepSizeM1, stepSizes[b + numSwbShort * (gr + 1)]);
|
||||
const uint32_t newGrpStep = __max (stepSizeM1, (gr + 1 == grpData.numWindowGroups ? BA_EPS : stepSizes[b + numSwbShort * (gr + 1)]));
|
||||
|
||||
stepSizeM1 = stepSizes[b + numSwbShort * gr];
|
||||
|
||||
if ((stepSizeM1 == maxGrpStep) && (maxGrpStep > newGrpStep))
|
||||
{
|
||||
sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep));
|
||||
stepSizes[b + numSwbShort * gr] = newGrpStep;
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) newGrpStep));
|
||||
const uint32_t sqrtOldStep = intSqrt (maxGrpStep);
|
||||
const uint32_t sqrtNewStep = intSqrt (newGrpStep);
|
||||
uint32_t& gStepSize = stepSizes[b + numSwbShort * gr];
|
||||
|
||||
sumStepSizes += (gStepSize = (sqrtOldStep + sqrtNewStep) >> 1) - sqrtOldStep;
|
||||
gStepSize *= gStepSize; // for square-mean-root
|
||||
}
|
||||
}
|
||||
if ((stepSizes[b + numSwbShort * gr] == maxGrpStep) && (maxGrpStep > stepSizeM1))
|
||||
{
|
||||
sumStepSizes -= unsigned (0.5 + sqrt ((double) maxGrpStep));
|
||||
stepSizes[b + numSwbShort * gr] = stepSizeM1;
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizeM1));
|
||||
}
|
||||
} // for b
|
||||
|
||||
m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh));
|
||||
m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh);
|
||||
sumMeans += m_avgStepSize[ch];
|
||||
m_avgStepSize[ch] *= m_avgStepSize[ch];
|
||||
|
||||
@ -288,29 +290,30 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
|
||||
stepSizes[b] = __max (rms[b], maskingSlope + BA_EPS);
|
||||
}
|
||||
}
|
||||
stepSizes[b] = 0;
|
||||
for (b -= 1; b > __min (MF, maxSfbInCh); b--) // complete simultaneous masking by reversing the pattern
|
||||
{
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
|
||||
sumStepSizes += intSqrt (stepSizes[b]);
|
||||
maskingSlope = ((uint64_t) stepSizes[b] * (8u + b - MF) + (msOffset << 3u)) >> (msShift + 3u);
|
||||
stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
|
||||
}
|
||||
for (/*b*/; b > __min (LF, maxSfbInCh); b--) // typical reversed mid-freq. simultaneous masking slopes
|
||||
{
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
|
||||
sumStepSizes += intSqrt (stepSizes[b]);
|
||||
maskingSlope = (stepSizes[b] + msOffset) >> msShift;
|
||||
stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
|
||||
}
|
||||
for (/*b = min (9, maxSfbInCh)*/; b > 0; b--) // steeper reversed low-freq. simultaneous masking slopes
|
||||
{
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[b]));
|
||||
sumStepSizes += intSqrt (stepSizes[b]);
|
||||
maskingSlope = (stepSizes[b] + (msOffset << (10u - b))) >> (msShift + 10u - b);
|
||||
stepSizes[b - 1] = __max (stepSizes[b - 1], maskingSlope);
|
||||
}
|
||||
sumStepSizes += unsigned (0.5 + sqrt ((double) stepSizes[0]));
|
||||
sumStepSizes += intSqrt (stepSizes[0]);
|
||||
|
||||
// --- LONG window: apply perceptual JND model and local band-peak smoothing, undo equal-loudness weighting
|
||||
nMeans++;
|
||||
m_avgStepSize[ch] = __min (USHRT_MAX, uint32_t ((sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh));
|
||||
m_avgStepSize[ch] = __min (USHRT_MAX, (sumStepSizes + (nBandsInCh >> 1)) / nBandsInCh);
|
||||
sumMeans += m_avgStepSize[ch];
|
||||
m_avgStepSize[ch] *= m_avgStepSize[ch];
|
||||
|
||||
|
@ -573,13 +573,13 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const int32_t* const mdctSignal, con
|
||||
if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases
|
||||
{
|
||||
const uint16_t filtOrder = tnsData.filterOrder[0];
|
||||
uint16_t b = __min (m_specAnaCurr[channelIndex] & 31, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
|
||||
uint16_t b = __min ((m_specAnaCurr[channelIndex] & 31) + 2, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT);
|
||||
short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0};
|
||||
int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset
|
||||
|
||||
m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor, filtOrder, filterC);
|
||||
|
||||
for (b = (b > 0 ? b - 1 : 0), predSig--; b > 0; b--) // b is in spectr. analysis units
|
||||
for (b--, predSig--; b > 0; b--) // start a bit higher; b is in spectr. analysis units
|
||||
{
|
||||
uint64_t sumAbsOrg = 0, sumAbsTns = 0;
|
||||
|
||||
@ -664,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
|
||||
const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
||||
const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3); // undercoding reduction
|
||||
const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0);
|
||||
const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1));
|
||||
const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 3 ? nChannels >> 1 : 0);
|
||||
const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
|
||||
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
|
||||
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
|
||||
//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS];
|
||||
@ -717,7 +717,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
|
||||
const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]);
|
||||
const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64));
|
||||
const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5);
|
||||
const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts ? 2 : 5);
|
||||
const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0);
|
||||
const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
|
||||
const TnsData& tnsData0 = coreConfig.tnsData[0];
|
||||
@ -785,7 +785,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
|
||||
if ((errorValue == 0) && (coreConfig.stereoMode == 2)) // frame M/S, synch statistics
|
||||
{
|
||||
const uint8_t numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong));
|
||||
const uint8_t numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong + 1));
|
||||
const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;
|
||||
|
||||
errorValue = m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1],
|
||||
@ -810,6 +810,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
{
|
||||
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
||||
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
|
||||
const uint8_t maxSfbCh = grpData.sfbsPerGroup;
|
||||
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
|
||||
const uint8_t mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
|
||||
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
|
||||
@ -827,7 +828,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
|
||||
// undercoding reduction for case where large number of coefs is quantized to zero
|
||||
s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
|
||||
for (b = 0; b < grpData.sfbsPerGroup; b++)
|
||||
for (b = 0; b < maxSfbCh; b++)
|
||||
{
|
||||
#if SA_IMPROVED_REAL_ABS
|
||||
const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
|
||||
@ -860,7 +861,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
}
|
||||
s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5));
|
||||
|
||||
for (b = 0; b < grpData.sfbsPerGroup; b++)
|
||||
for (b = 0; b < maxSfbCh; b++)
|
||||
{
|
||||
const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // LF SNR boost, cf my M.Sc. thesis
|
||||
const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
|
||||
@ -876,13 +877,16 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
} // for gr
|
||||
|
||||
#if !RESTRICT_TO_AAC
|
||||
if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill
|
||||
if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts))
|
||||
{
|
||||
const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong); // rate based bandwidth
|
||||
const uint8_t numSwbFrame = __min (numSwbCh, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1));
|
||||
|
||||
if (grpData.sfbsPerGroup < numSwbFrame)
|
||||
if (maxSfbCh < numSwbFrame) // increase coding bandwidth
|
||||
{
|
||||
memset (&grpData.scaleFactors[grpData.sfbsPerGroup], 0, (numSwbFrame - grpData.sfbsPerGroup) * sizeof (uint8_t));
|
||||
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
|
||||
{
|
||||
memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
|
||||
}
|
||||
grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame;
|
||||
}
|
||||
if (ch > 0) coreConfig.commonMaxSfb = (coreConfig.icsInfoCurr[0].maxSfb == coreConfig.icsInfoCurr[1].maxSfb);
|
||||
@ -1213,6 +1217,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
|
||||
if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis
|
||||
{
|
||||
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
|
||||
const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
|
||||
const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
|
||||
const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
|
||||
const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1],
|
||||
@ -1222,14 +1227,15 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
|
||||
|
||||
if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence
|
||||
{
|
||||
m_perCorrCurr[el] = (uint8_t) s;
|
||||
m_perCorrCurr[el] = uint8_t((32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6);
|
||||
}
|
||||
else // gentle overlap length dependent temporal smoothing
|
||||
{
|
||||
const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
|
||||
const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el]));
|
||||
const int16_t currPerCorr = (32 + s * __min (64, eightTimesSqrt256Minus[meanSpecFlat])) >> 6;
|
||||
|
||||
m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s));
|
||||
m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, currPerCorr));
|
||||
}
|
||||
|
||||
if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
|
||||
@ -1489,7 +1495,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
|
||||
tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX;
|
||||
tsNext[ch] = (m_tempAnaNext[ci] >> 8) & UCHAR_MAX;
|
||||
|
||||
const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext < 68 && tfNext >= 204) || (tsCurr[ch] >= 153) || (tsNext[ch] >= 153);
|
||||
const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext <= UCHAR_MAX / 4 && tfNext > (UCHAR_MAX * 13) / 16) ||
|
||||
(tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tsNext[ch] > (UCHAR_MAX * 5) / 8);
|
||||
const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) &&
|
||||
(tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20);
|
||||
// set window_sequence
|
||||
@ -1499,11 +1506,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
|
||||
}
|
||||
else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap
|
||||
{
|
||||
wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT :
|
||||
wsCurr = (m_tranLocCurr[ci] >= 0) || (tsCurr[ch] > (UCHAR_MAX * 5) / 8) || (tfCurr > (UCHAR_MAX * 15) / 16) ? EIGHT_SHORT :
|
||||
#if RESTRICT_TO_AAC
|
||||
(lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != EIGHT_SHORT) ? EIGHT_SHORT : LONG_STOP);
|
||||
(lowOlapNext ? EIGHT_SHORT : LONG_STOP);
|
||||
#else
|
||||
(lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != STOP_START) ? STOP_START : LONG_STOP);
|
||||
(lowOlapNext ? STOP_START : LONG_STOP);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
|
||||
const bool alterPredDir = (applyPredSte && (useAltPredDir > 0)); // predict mid from side?
|
||||
const SfbGroupData& grp = groupingData1;
|
||||
const bool eightShorts = (grp.numWindowGroups > 1);
|
||||
const uint8_t maxSfbSte = (eightShorts ? __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) : numSwbFrame);
|
||||
const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
|
||||
uint32_t numSfbPredSte = 0; // counter
|
||||
|
||||
if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) ||
|
||||
@ -282,7 +282,11 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
|
||||
|
||||
sfbTempVar = (applyPredSte ? __max (rmsSfbM[b], rmsSfbS[b]) : __max (grpRms1[idx], grpRms2[idx]));
|
||||
|
||||
if (sfbFacLR <= 1.0) // total simultaneous masking - no positive SNR in either SFB
|
||||
if ((grpStepSizes1[idx] == 0) || (grpStepSizes2[idx] == 0)) // HF noise filled SFB
|
||||
{
|
||||
grpStepSizes1[idx] = grpStepSizes2[idx] = 0;
|
||||
}
|
||||
else if (sfbFacLR <= 1.0) // simultaneous masking - no positive SNR in either SFB
|
||||
{
|
||||
const double max = __max (sfbRmsL, sfbRmsR);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user