prepare M/S stereo

This commit is contained in:
Christian R. Helmrich 2020-03-25 01:00:02 +01:00
parent 7185ac995f
commit e4bc905be2
5 changed files with 377 additions and 88 deletions

View File

@ -117,6 +117,112 @@ static uint32_t quantizeSfbWithMinSnr (const unsigned* const coeffMagn, const ui
}
// inline helper functions
static inline void applyStereoPreProcessingCplx (int32_t* mdctSample1, int32_t* mdctSample2,
int32_t* mdstSample1, int32_t* mdstSample2,
const int64_t factIn, const int64_t factDe)
{
const int32_t valI1 = *mdstSample1;
const int32_t valI2 = *mdstSample2;
const int32_t valR1 = *mdctSample1;
const int32_t valR2 = *mdctSample2;
const int64_t absR1 = abs (valR1);
const int64_t absR2 = abs (valR2);
int64_t dmxI1, dmxR1 = valR1 * factDe + valR2 * factIn; // cross
int64_t dmxI2, dmxR2 = valR1 * factIn + valR2 * factDe; // -talk
double n, d;
if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
{
if (absR1 * factDe < absR2 * factIn)
{
dmxR1 = valR2 * factIn - valR1 * factDe;
dmxI1 = valI2 * factIn - valI1 * factDe;
}
else
{
dmxR1 = valR1 * factDe - valR2 * factIn;
dmxI1 = valI1 * factDe - valI2 * factIn;
}
}
else dmxI1 = valI1 * factDe + valI2 * factIn;
if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
{
if (absR1 * factIn < absR2 * factDe)
{
dmxR2 = valR2 * factDe - valR1 * factIn;
dmxI2 = valI2 * factDe - valI1 * factIn;
}
else
{
dmxR2 = valR1 * factIn - valR2 * factDe;
dmxI2 = valI1 * factIn - valI2 * factDe;
}
}
else dmxI2 = valI1 * factIn + valI2 * factDe;
n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
*mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5));
n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
*mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5));
}
static inline void applyStereoPreProcessingReal (int32_t* mdctSample1, int32_t* mdctSample2,
int32_t* prevSample1, int32_t* prevSample2,
const int64_t factIn, const int64_t factDe)
{
const int64_t valI1 = (*(mdctSample1 + 1) - (int64_t) *prevSample1) >> 1; // estimate, see also
const int64_t valI2 = (*(mdctSample2 + 1) - (int64_t) *prevSample2) >> 1; // getMeanAbsValues()
const int32_t valR1 = (*prevSample1 = *mdctSample1);
const int32_t valR2 = (*prevSample2 = *mdctSample2);
const int64_t absR1 = abs (valR1);
const int64_t absR2 = abs (valR2);
int64_t dmxI1, dmxR1 = valR1 * factDe + valR2 * factIn; // cross
int64_t dmxI2, dmxR2 = valR1 * factIn + valR2 * factDe; // -talk
double n, d;
if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations
{
if (absR1 * factDe < absR2 * factIn)
{
dmxR1 = valR2 * factIn - valR1 * factDe;
dmxI1 = valI2 * factIn - valI1 * factDe;
}
else
{
dmxR1 = valR1 * factDe - valR2 * factIn;
dmxI1 = valI1 * factDe - valI2 * factIn;
}
}
else dmxI1 = valI1 * factDe + valI2 * factIn;
if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations
{
if (absR1 * factIn < absR2 * factDe)
{
dmxR2 = valR2 * factDe - valR1 * factIn;
dmxI2 = valI2 * factDe - valI1 * factIn;
}
else
{
dmxR2 = valR1 * factIn - valR2 * factDe;
dmxI2 = valI1 * factIn - valI2 * factDe;
}
}
else dmxI2 = valI1 * factIn + valI2 * factDe;
n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1;
d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1;
*mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5));
n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2;
d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2;
*mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5));
}
static inline uint8_t brModeAndFsToMaxSfbLong (const unsigned bitRateMode, const unsigned samplingRate)
{
// max. for fs of 44 kHz: band 47 (19.3 kHz), 48 kHz: 45 (19.5 kHz), 64 kHz: 39 (22.0 kHz)
@ -134,8 +240,8 @@ static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsign
const uint8_t numSwb, const TnsData& tnsData)
{
// compensate for missing MDST coefficients in RMS calculation of SFBs where TNS is active
return ((tnsData.numFilters > 0) && (sfbGroup == tnsData.filteredWindow) && (rmsValue <= UINT_MAX / 3) &&
(tnsData.filterLength[0] + sfbIndex >= numSwb) ? (rmsValue * 3u) >> 1 : rmsValue);
return ((tnsData.numFilters > 0) && (sfbGroup == tnsData.filteredWindow) && (rmsValue <= UINT_MAX / 5) &&
(tnsData.filterLength[0] + sfbIndex >= numSwb) ? (rmsValue * 5u) >> 2 : rmsValue);
}
#endif
@ -296,7 +402,6 @@ unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpD
{
const uint16_t filtOrder = tnsData.filterOrder[0];
const uint16_t* grpSO = &grpData.sfbOffsets[m_numSwbShort * tnsData.filteredWindow];
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
unsigned errorValue = 0; // no error
if ((maxSfb > (eightShorts ? 15 : 51)) || (channelIndex >= USAC_MAX_NUM_CHANNELS))
@ -306,7 +411,7 @@ unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpD
if (filtOrder > 0) // determine TNS filter length in SFBs and apply TNS analysis filtering
{
uint8_t numSwbFrame = (eightShorts ? numSwbOffsetS[m_swbTableIdx] : numSwbOffsetL[m_swbTableIdx]) - 1;
const int numSwbWin = (eightShorts ? m_numSwbShort : m_numSwbLong);
uint8_t tnsMaxBands = tnsScaleFactorBandLimit[eightShorts ? 1 : 0][m_swbTableIdx];
uint8_t tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx); // 8-short TNS start
@ -315,22 +420,15 @@ unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpD
const unsigned samplingRate = toSamplingRate (m_frequencyIdx); // refine TNS_MAX_BANDS
const unsigned tnsStartOffs = (m_specAnaCurr[channelIndex] & 31) << SA_BW_SHIFT;
if ((samplingRate >= 46009) && (samplingRate < 55426)) // ~48kHz
{
numSwbFrame = 49;
tnsMaxBands = 40;
}
if ((samplingRate >= 46009) && (samplingRate < 55426)) tnsMaxBands = 40; // for 48 kHz
else
if ((samplingRate >= 37566) && (samplingRate < 46009)) // ~44kHz
{
numSwbFrame = 49;
tnsMaxBands = 42;
}
if ((samplingRate >= 37566) && (samplingRate < 46009)) tnsMaxBands = 42; // & 44.1 kHz
while (grpSO[tnsStartSfb] < tnsStartOffs) tnsStartSfb++; // start band for TNS filter
}
tnsMaxBands = __min (tnsMaxBands, maxSfb);
if ((tnsData.filterLength[0] = __max (0, numSwbFrame - (int) tnsStartSfb)) > 0)
if ((tnsData.filterLength[0] = __max (0, numSwbWin - tnsStartSfb)) > 0)
{
int32_t* const mdctSignal = m_mdctSignals[channelIndex];
const short offs = grpSO[tnsStartSfb];
@ -604,18 +702,97 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
}
else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
{
// if ((coreConfig.stereoMode == 0) && (m_perCorrCurr[el] > SCHAR_MAX)) coreConfig.stereoMode = 1;
if (coreConfig.commonWindow && (m_perCorrCurr[el] > 128)) // run stereo pre-processing
{
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]);
const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64));
const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5);
const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0);
const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
for (uint16_t gr = 0; gr < coreConfig.groupingData[0].numWindowGroups; gr++)
{
const uint8_t grpLength = coreConfig.groupingData[0].windowGroupLength[gr];
const uint16_t* grpOff = &coreConfig.groupingData[0].sfbOffsets[m_numSwbShort * gr];
const uint16_t grpStart = grpOff[0] + steppFadeOff * grpLength;
int32_t* sigR0 = &m_mdctSignals[ci][grpStart];
int32_t* sigR1 = &m_mdctSignals[ci + 1][grpStart];
int64_t xTalkI = 0, xTalkD = 0; // weights for crosstalk
if (coreConfig.tnsActive && (gr == coreConfig.tnsData[0].filteredWindow || gr == coreConfig.tnsData[1].filteredWindow))
{
const uint16_t maxLen = (eightShorts ? grpOff[m_numSwbShort] - 1 : __min (nSamplesInFrame - 1u, nSamplesMax)) - grpStart;
int32_t prevR0 = 0; // NOTE: functions also on grouped
int32_t prevR1 = 0; // MDCT spectra, but not properly!
for (uint16_t w = 0; w < grpLength; w++) // sub-window
{
prevR0 = *(sigR0++); prevR1 = *(sigR1++); // processing starts at offset of 1!
xTalkI = steppWeightI;
xTalkD = steppWeightD * (2 * steppFadeLen - 1);
for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++) // start with fade-in
{
applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD);
xTalkI += steppWeightI;
xTalkD -= steppWeightD;
}
}
for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++) // end
{
applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD);
}
}
else // TNS inactive, both MDCTs and MDSTs are available
{
const uint16_t maxLen = (eightShorts ? grpOff[m_numSwbShort] : nSamplesMax) - grpStart;
int32_t* sigI0 = &m_mdstSignals[ci][grpStart]; // imag
int32_t* sigI1 = &m_mdstSignals[ci + 1][grpStart];
for (uint16_t w = 0; w < grpLength; w++) // sub-window
{
sigR0++; sigR1++; sigI0++; sigI1++; // processing starts at an offset of 1!
xTalkI = steppWeightI;
xTalkD = steppWeightD * (2 * steppFadeLen - 1);
for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
{
applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD);
xTalkI += steppWeightI;
xTalkD -= steppWeightD;
}
}
for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++)
{
applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD);
}
} // if coreConfig.tnsActive
}
} // if coreConfig.commonWindow
if (coreConfig.stereoMode > 0) // synch spectral statistics
{
const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;
// TODO: M/S matrixing, update of grpData{0,1}.sfbRmsValues and &sfbStepSizes[(ci + {0,1}) * m_numSwbShort * NUM_WINDOW_GROUPS]
m_specAnaCurr[ci ] = (m_specAnaCurr[ci ] & (UINT_MAX - 65504)) | peakIndexSte;
m_specAnaCurr[ci + 1] = (m_specAnaCurr[ci + 1] & (UINT_MAX - 65504)) | peakIndexSte;
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
// meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
}
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
{
SfbGroupData& grpData = coreConfig.groupingData[ch];
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
const uint8_t mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]];
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
uint8_t numSwbFrame = (eightShorts ? numSwbOffsetS[m_swbTableIdx] : numSwbOffsetL[m_swbTableIdx]) - 1;
if (!eightShorts && (samplingRate >= 37566) && (samplingRate < 55426)) // fix numSwb
{
numSwbFrame = 49;
}
memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
@ -635,9 +812,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const uint32_t rmsComp = grpRms[b];
const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
#else
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbFrame, coreConfig.tnsData[ch]);
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
getComplexRmsValue (refRms[b], gr, b, numSwbFrame, coreConfig.tnsData[1 - ch]) >> 9);
getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
#endif
if (rmsComp < grpRmsMin) grpRmsMin = rmsComp;
if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
@ -651,9 +828,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const uint32_t rmsComp = grpRms[b];
const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
#else
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbFrame, coreConfig.tnsData[ch]);
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
getComplexRmsValue (refRms[b], gr, b, numSwbFrame, coreConfig.tnsData[1 - ch]) >> 9);
getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
#endif
if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
{
@ -680,7 +857,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
#if !RESTRICT_TO_AAC
if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill
{
numSwbFrame = __min (numSwbFrame, maxSfbLong); // bit-rate dependent max bandwidth
const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong); // rate based bandwidth
if (grpData.sfbsPerGroup < numSwbFrame)
{
@ -738,6 +915,12 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
CoreCoderData& coreConfig = *m_elementData[el];
const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData()
if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs
{
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
// meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
}
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
{
EntropyCoder& entrCoder = m_entropyCoder[ci];
@ -819,12 +1002,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
const uint8_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint8_t maxSfbShort = (samplingRate < 37566 ? 14 /*32 kHz*/ : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
#if RESTRICT_TO_AAC
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 6 + (m_bitRateMode >> 1) + ((m_specAnaCurr[ci] >> 21) & 7);
#else
const unsigned highFreqMinStart = (m_noiseFilling[el] ? 6 : 6 - (m_bitRateMode >> 1));
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - highFreqMinStart + ((m_specAnaCurr[ci] >> 21) & 7);
#endif
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 5 + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5);
const unsigned targetBitCountX2 = ((48000 + 16000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * grpData.numWindowGroups);
unsigned b = grpData.sfbsPerGroup - 1;
@ -919,7 +1097,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
grpScaleFacs[b] = grpScaleFacs[b - 1];
}
}
} // if (estimBitCount > targetBitCountX2)
} // if estimBitCount > targetBitCountX2
for (b = lastSfb + 1; b < grpData.sfbsPerGroup; b++)
{
@ -974,7 +1152,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
unsigned errorValue = 0; // no error
// get spectral channel statistics for last frame, used for input bandwidth (BW) detection
m_specAnalyzer.getSpecAnalysisStats (m_specAnaPrev, nChannels);
//m_specAnalyzer.getSpecAnalysisStats (m_specAnaPrev, nChannels);
m_specAnalyzer.getSpectralBandwidth (m_bandwidPrev, nChannels);
// spectral analysis for current MCLT signal (windowed time-samples for the current frame)
@ -1011,6 +1189,33 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
{
coreConfig.stereoConfig = coreConfig.stereoMode = 0;
if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis
{
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1],
m_mdstSignals[ci], m_mdstSignals[ci + 1], nSamplesMax,
nSamplesInFrame, eightShorts, (uint8_t* const) coreConfig.stereoData);
if (steAnaStats == SHRT_MIN) errorValue = 1;
if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence
{
m_perCorrCurr[el] = (uint8_t) s;
}
else // gentle overlap length dependent temporal smoothing
{
const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32);
const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el]));
m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s));
}
if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
// if (s > (UCHAR_MAX * 3) / 4) coreConfig.stereoMode = 2; // 2: all, ms_mask_present=2
}
else if (coreConfig.commonWindow) m_perCorrCurr[el] = 128; // update with midway value
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
{
SfbGroupData& grpData = coreConfig.groupingData[ch];
@ -1027,13 +1232,15 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
icsCurr.maxSfb = 0;
while (grpSO[icsCurr.maxSfb] < nSamplesInFrame) icsCurr.maxSfb++; // num_swb_long
grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInFrame;
grpData.sfbsPerGroup = icsCurr.maxSfb; // initialization, changed to max_sfb later
grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb; // changed to max_sfb later
if (samplingRate > 32000) // set max_sfb based on VBR mode and bandwidth detection
{
if (icsCurr.maxSfb > 49) // may still be 51 for 32 kHz
{
grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb = 49; // fix 44.1, 48 kHz
}
icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
if (grpData.sfbsPerGroup > 49) grpData.sfbsPerGroup = 49; // for 44.1 and 48 kHz
}
while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci])) icsCurr.maxSfb--; // BW detector
}
@ -1090,7 +1297,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb;
const uint8_t maxSfbSte = __max (maxSfb0, maxSfb1); // max_sfb_ste, as in Table 24
if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1))
if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1 || coreConfig.stereoMode == 2))
{
uint32_t& sa0 = m_specAnaCurr[ci-2];
uint32_t& sa1 = m_specAnaCurr[ci-1];
@ -1394,6 +1601,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
#if !RESTRICT_TO_AAC
m_nonMpegExt = useEcodisExt;
#endif
m_numSwbLong = 51; // maximum
m_numSwbShort = MAX_NUM_SWB_SHORT;
m_outAuData = outputAuData;
m_pcm24Data = inputPcmData;
@ -1405,6 +1613,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
const ELEM_TYPE et = elementTypeConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS][el]; // usacElementType
m_elementData[el] = nullptr;
m_perCorrCurr[el] = 0;
#if !RESTRICT_TO_AAC
m_noiseFilling[el] = (useNoiseFilling && (et < ID_USAC_LFE));
m_timeWarping[el] = (false /* N/A */ && (et < ID_USAC_LFE));
@ -1420,7 +1629,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
m_mdstSignals[ch] = nullptr;
m_scaleFacData[ch] = nullptr;
m_specAnaCurr[ch] = 0;
m_specAnaPrev[ch] = 0;
//m_specAnaPrev[ch] = 0;
m_tempAnaCurr[ch] = 0;
m_tempAnaNext[ch] = 0;
m_timeSignals[ch] = nullptr;

View File

@ -83,15 +83,17 @@ private:
bool m_nonMpegExt;
#endif
uint8_t m_numElements;
uint8_t m_numSwbLong;
uint8_t m_numSwbShort;
unsigned char* m_outAuData;
BitStreamWriter m_outStream; // for access unit creation
int32_t* m_pcm24Data;
uint8_t m_perCorrCurr[USAC_MAX_NUM_ELEMENTS];
SfbGroupData* m_scaleFacData[USAC_MAX_NUM_CHANNELS];
SfbQuantizer m_sfbQuantizer; // powerlaw quantization
SpecAnalyzer m_specAnalyzer; // for spectral analysis
uint32_t m_specAnaCurr[USAC_MAX_NUM_CHANNELS];
uint32_t m_specAnaPrev[USAC_MAX_NUM_CHANNELS];
//uint32_t m_specAnaPrev[USAC_MAX_NUM_CHANNELS];
#if !RESTRICT_TO_AAC
SpecGapFiller m_specGapFiller;// for noise/gap filling
#endif

View File

@ -62,7 +62,7 @@ char toSamplingFrequencyIndex (const unsigned samplingRate)
return i;
}
#if !RESTRICT_TO_AAC
if (samplingRate == allowedSamplingRates[i + AAC_NUM_SAMPLE_RATES])
if (samplingRate == allowedSamplingRates[i + AAC_NUM_SAMPLE_RATES] && (samplingRate % 19200) == 0) // Baseline USAC
{
return i + AAC_NUM_SAMPLE_RATES + 2; // skip reserved entry
}

View File

@ -240,7 +240,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const unsigned thresholdSlope = (48000 + SA_EPS * samplingRate) / 96000;
const unsigned thresholdStart = samplingRate >> 15;
if ((mdctSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
if ((mdctSignals == nullptr) || (mdstSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (samplingRate < 7350) || (samplingRate > 96000))
{
return 1; // invalid arguments error
@ -249,7 +249,7 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
for (unsigned ch = 0; ch < nChannels; ch++)
{
const int32_t* const chMdct = mdctSignals[ch];
const int32_t* const chMdst = (mdstSignals == nullptr ? nullptr : mdstSignals[ch]);
const int32_t* const chMdst = mdstSignals[ch];
// --- get L1 norm and max value in each band
uint16_t idxMaxSpec = 0;
uint64_t sumAvgBand = 0;
@ -272,61 +272,37 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
{
const uint16_t offs = b << SA_BW_SHIFT; // start offset of current analysis band
const int32_t* const bMdct = &chMdct[offs];
const int32_t* const bMdst = (chMdst == nullptr ? nullptr : &chMdst[offs]);
uint16_t maxAbsIdx = 0;
uint32_t maxAbsVal = 0, tmp = UINT_MAX;
uint64_t sumAbsVal = 0;
const int32_t* const bMdst = &chMdst[offs];
uint16_t maxAbsIdx = 0;
uint32_t maxAbsVal = 0, tmp = UINT_MAX;
uint64_t sumAbsVal = 0;
if (bMdst != nullptr) // complex-valued spectrum
for (int s = SA_BW - 1; s >= 0; s--)
{
for (int s = SA_BW - 1; s >= 0; s--)
{
// sum absolute values of complex signal, derive L1 norm, peak value, and peak index
// sum absolute values of complex spectrum, derive L1 norm, peak value, and peak index
#if SA_EXACT_COMPLEX_ABS
const double complexSqr = (double) bMdct[s] * (double) bMdct[s] + (double) bMdst[s] * (double) bMdst[s];
const uint32_t absSample = uint32_t (sqrt (complexSqr) + 0.5);
const double complexSqr = (double) bMdct[s] * (double) bMdct[s] + (double) bMdst[s] * (double) bMdst[s];
const uint32_t absSample = uint32_t (sqrt (complexSqr) + 0.5);
#else
const uint32_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absSample = (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
const uint32_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absSample = (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
#endif
sumAbsVal += absSample;
if (offs + s > 0) // exclude DC from max/min
{
if (maxAbsVal < absSample) // maximum data
{
maxAbsVal = absSample;
maxAbsIdx = (uint16_t) s;
}
if (tmp/*min*/> absSample) // minimum data
{
tmp/*min*/= absSample;
}
} // b > 0
}
}
else // real-valued spectrum, no imaginary part
{
for (int s = SA_BW - 1; s >= 0; s--)
sumAbsVal += absSample;
if (offs + s > 0) // exclude DC from max & min
{
// obtain absolute values of real signal, derive L1 norm, peak value, and peak index
const uint32_t absSample = abs (bMdct[s]);
sumAbsVal += absSample;
if (offs + s > 0) // exclude DC from max/min
if (maxAbsVal < absSample) // update maximum
{
if (maxAbsVal < absSample) // maximum data
{
maxAbsVal = absSample;
maxAbsIdx = (uint16_t) s;
}
if (tmp/*min*/> absSample) // minimum data
{
tmp/*min*/= absSample;
}
maxAbsVal = absSample;
maxAbsIdx = (uint16_t) s;
}
if (tmp/*min*/> absSample) // update minimum
{
tmp/*min*/= absSample;
}
}
}
} // for s
// bandwidth detection
if ((m_bandwidthOff[ch] == 0) && (maxAbsVal > __max (thresholdSlope * (thresholdStart + b), SA_EPS)))
{
@ -362,3 +338,101 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
return 0; // no error
}
int16_t SpecAnalyzer::stereoSigAnalysis (const int32_t* const mdctSignal1, const int32_t* const mdctSignal2,
const int32_t* const mdstSignal1, const int32_t* const mdstSignal2,
const unsigned nSamplesMax, const unsigned nSamplesInFrame, const bool shortTransforms,
uint8_t* const stereoCorrValue /*= nullptr*/) // per-band perceptual correlation data
{
const uint64_t anaBwOffset = SA_BW >> 1;
const uint16_t numAnaBands = (shortTransforms ? nSamplesInFrame : nSamplesMax) >> SA_BW_SHIFT;
const uint16_t numAnaModul = (shortTransforms ? numAnaBands >> 3 : numAnaBands + 1);
int16_t b;
if ((mdctSignal1 == nullptr) || (mdctSignal2 == nullptr) || (mdstSignal1 == nullptr) || (mdstSignal2 == nullptr) ||
(nSamplesInFrame > 2048) || (nSamplesMax > 2048) || (numAnaBands == 0) || (numAnaModul == 0))
{
b = SHRT_MIN; // invalid arguments error
}
else
{
uint16_t currPC = 0, numPC = 0; // frame-average correlation
uint64_t sumReM = 0, sumReS = 0;// mid-side RMS distribution
for (b = numAnaBands - 1; b >= 0; b--)
{
const uint16_t anaBandModul = b % numAnaModul; // to exclude first and last window band
const uint16_t offs = b << SA_BW_SHIFT; // start offset of current analysis band
const int32_t* const lbMdct = &mdctSignal1[offs];
const int32_t* const lbMdst = &mdstSignal1[offs];
const int32_t* const rbMdct = &mdctSignal2[offs];
const int32_t* const rbMdst = &mdstSignal2[offs];
uint64_t sumMagnL = 0, sumMagnR = 0; // temporary RMS sums
uint64_t sumPrdLR = 0, sumPrdLL = 0, sumPrdRR = 0;
uint64_t sumRealL = 0, sumRealR = 0;
uint64_t sumRealM = 0, sumRealS = 0, sumPrdMS; // mid-side
double nlr, dll, drr;
for (int s = SA_BW - 1; s >= 0; s--)
{
const uint32_t absRealL = abs (lbMdct[s]);
const uint32_t absRealR = abs (rbMdct[s]);
#if SA_EXACT_COMPLEX_ABS
const double complexSqrL = (double) lbMdct[s] * (double) lbMdct[s] + (double) lbMdst[s] * (double) lbMdst[s];
const uint32_t absMagnL = uint32_t (sqrt (complexSqrL) + 0.5);
const double complexSqrR = (double) rbMdct[s] * (double) rbMdct[s] + (double) rbMdst[s] * (double) rbMdst[s];
const uint32_t absMagnR = uint32_t (sqrt (complexSqrR) + 0.5);
#else
const uint32_t absImagL = abs (lbMdst[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImagR = abs (rbMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absMagnL = (absRealL > absImagL ? absRealL + ((absImagL * 3) >> 3) : absImagL + ((absRealL * 3) >> 3));
const uint32_t absMagnR = (absRealR > absImagR ? absRealR + ((absImagR * 3) >> 3) : absImagR + ((absRealR * 3) >> 3));
#endif
sumRealL += absRealL;
sumRealR += absRealR;
sumRealM += abs (lbMdct[s] + rbMdct[s]); // i.e., 2*mid,
sumRealS += abs (lbMdct[s] - rbMdct[s]); // i.e., 2*side
sumMagnL += absMagnL;
sumMagnR += absMagnR;
sumPrdLR += ((uint64_t) absMagnL * (uint64_t) absMagnR + anaBwOffset) >> SA_BW_SHIFT;
sumPrdLL += ((uint64_t) absMagnL * (uint64_t) absMagnL + anaBwOffset) >> SA_BW_SHIFT;
sumPrdRR += ((uint64_t) absMagnR * (uint64_t) absMagnR + anaBwOffset) >> SA_BW_SHIFT;
} // for s
sumRealL = (sumRealL + anaBwOffset) >> SA_BW_SHIFT; // avg
sumRealR = (sumRealR + anaBwOffset) >> SA_BW_SHIFT;
sumRealM = (sumRealM + anaBwOffset) >> SA_BW_SHIFT;
sumRealS = (sumRealS + anaBwOffset) >> SA_BW_SHIFT;
nlr = double (sumRealL * sumRealR) * 0.46875; // tuned for uncorrelated full-scale noise
sumPrdMS = uint64_t (nlr > double (sumRealM * sumRealS) ? 256.0 : 0.5 + (512.0 * nlr) / __max (1.0, double (sumRealM * sumRealS)));
dll = double ((sumMagnL + anaBwOffset) >> SA_BW_SHIFT);
drr = double ((sumMagnR + anaBwOffset) >> SA_BW_SHIFT);
nlr = (sumPrdLR + dll * drr) * SA_BW - sumMagnL * drr - sumMagnR * dll;
dll = (sumPrdLL + dll * dll) * SA_BW - sumMagnL * dll - sumMagnL * dll;
drr = (sumPrdRR + drr * drr) * SA_BW - sumMagnR * drr - sumMagnR * drr;
sumPrdLR = uint64_t ((nlr <= 0.0) || (dll * drr <= 0.0) ? 0 : 0.5 + (256.0 * nlr * nlr) / (dll * drr));
stereoCorrValue[b] = (uint8_t) __min (UCHAR_MAX, __max (sumPrdMS, sumPrdLR)); // in band
if ((anaBandModul > 0) && (anaBandModul + 1 < numAnaModul)) // in frame (averaged below)
{
currPC += stereoCorrValue[b]; numPC++;
sumReM += sumRealM;
sumReS += sumRealS;
}
} // for b
for (b = numAnaBands; b < int16_t (nSamplesInFrame >> SA_BW_SHIFT); b++)
{
stereoCorrValue[b] = UCHAR_MAX; // to allow joint-stereo coding at very high frequencies
}
if (numPC > 1) currPC = (currPC + (numPC >> 1)) / numPC; // frame's perceptual correlation
b = (int16_t) currPC * (sumReS * 2 > sumReM * 3 ? -1 : 1); // negation implies side > mid
}
return b;
}

View File

@ -57,6 +57,10 @@ public:
const int32_t* const mdstSignals[USAC_MAX_NUM_CHANNELS],
const unsigned nChannels, const unsigned nSamplesInFrame, const unsigned samplingRate,
const unsigned lfeChannelIndex = USAC_MAX_NUM_CHANNELS); // to skip an LFE channel
int16_t stereoSigAnalysis (const int32_t* const mdctSignal1, const int32_t* const mdctSignal2,
const int32_t* const mdstSignal1, const int32_t* const mdstSignal2,
const unsigned nSamplesMax, const unsigned nSamplesInFrame, const bool shortTransforms,
uint8_t* const stereoCorrValue = nullptr); // per-band perceptual correlation data
}; // SpecAnalyzer
#endif // _SPEC_ANALYSIS_H_