real pred. stereo

This commit is contained in:
Christian R. Helmrich 2020-03-31 02:00:01 +02:00
parent 036d9b7d20
commit 37cab9d936
7 changed files with 291 additions and 64 deletions

View File

@ -30,7 +30,7 @@ exhale is being made available under an open-source license which is
similar to the 3-clause BSD license but modified to address specific
aspects dictated by the nature and the output of this application.
The license text and release notes for the current version 1.0.1 can
The license text and release notes for the current version 1.0.2 can
be found in the `include` subdirectory of the exhale distribution.

View File

@ -15,5 +15,5 @@
# define EXHALELIB_VERSION_MINOR "0"
#endif
#ifndef EXHALELIB_VERSION_BUGFIX
# define EXHALELIB_VERSION_BUGFIX ".1" // "RC" or ".0", ".1", ...
# define EXHALELIB_VERSION_BUGFIX ".2" // "RC" or ".0", ".1", ...
#endif

View File

@ -13,7 +13,7 @@
0 ICON "exhaleApp.ico"
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,0,1
FILEVERSION 1,0,2
BEGIN
BLOCK "StringFileInfo"
BEGIN

View File

@ -782,18 +782,23 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
}
} // if coreConfig.commonWindow
if (coreConfig.stereoMode > 0) // use M/S, synch statistics
if ((errorValue == 0) && (coreConfig.stereoMode == 2)) // frame M/S, synch statistics
{
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
const uint8_t numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong));
const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5;
errorValue |= m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1],
m_mdstSignals[ci], m_mdstSignals[ci + 1],
coreConfig.groupingData[0], coreConfig.groupingData[1],
coreConfig.tnsData[0], coreConfig.tnsData[1],
(eightShorts ? m_numSwbShort : m_numSwbLong), coreConfig.stereoData,
&sfbStepSizes[ ci * m_numSwbShort * NUM_WINDOW_GROUPS],
&sfbStepSizes[(ci + 1) * m_numSwbShort * NUM_WINDOW_GROUPS]);
coreConfig.stereoData[0] = (coreConfig.stereoData[0] & 0xFE) | (coreConfig.stereoConfig >> 1); // TODO: pass pred_dir, complex_coef as args
errorValue = m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1],
m_mdstSignals[ci], m_mdstSignals[ci + 1],
coreConfig.groupingData[0], coreConfig.groupingData[1],
coreConfig.tnsData[0], coreConfig.tnsData[1],
numSwbFrame, coreConfig.stereoData,
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * ci],
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);
if (errorValue == 2) // use frame M/S with cplx_pred_all=1
{
coreConfig.stereoMode += 2; errorValue = 0;
}
m_specAnaCurr[ci ] = (m_specAnaCurr[ci ] & (UINT_MAX - 65504)) | peakIndexSte;
m_specAnaCurr[ci + 1] = (m_specAnaCurr[ci + 1] & (UINT_MAX - 65504)) | peakIndexSte;
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
@ -1226,7 +1231,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s));
}
if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
// if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1
if (s > (UCHAR_MAX * 3) / 4) coreConfig.stereoMode = 2; // 2: all, ms_mask_present=2
}
else if (nrChannels > 1) m_perCorrCurr[el] = 128; // update history with halfway value

View File

@ -98,8 +98,8 @@ unsigned SpecAnalyzer::getMeanAbsValues (const int32_t* const mdctSignal, const
sumAbsVal += uint64_t (sqrt (complexSqr) + 0.5);
#else
const uint32_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint64_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
sumAbsVal += (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
#endif
@ -123,15 +123,15 @@ unsigned SpecAnalyzer::getMeanAbsValues (const int32_t* const mdctSignal, const
{
// based on S. Merdjani, L. Daudet, "Estimation of Frequency from MDCT-Encoded Files,"
// DAFx-03, 2003, http://www.eecs.qmul.ac.uk/legacy/dafx03/proceedings/pdfs/dafx01.pdf
const uint32_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; see also code above
const uint32_t absEstIm = abs (bMdct[s + 1] - bMdct[s - 1]) >> 1; // s - 1 may be -1!
const uint64_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; see also code above
const uint64_t absEstIm = abs (bMdct[s + 1] - bMdct[s - 1]) >> 1; // s - 1 may be -1!
sumAbsVal += (absReal > absEstIm ? absReal + ((absEstIm * 3) >> 3) : absEstIm + ((absReal * 3) >> 3));
}
if ((b == 0) && (bandWidth > 0)) // correct estimate at DC
{
const uint32_t absReal = abs (bMdct[0]); // Richard Lyons, 1997; see also code above
const uint32_t absEstIm = abs (bMdct[1] - bMdct[-1]) >> 1; // if s - 1 was -1 earlier
const uint64_t absReal = abs (bMdct[0]); // Richard Lyons, 1997; see also code above
const uint64_t absEstIm = abs (bMdct[1] - bMdct[-1]) >> 1; // if s - 1 was -1 earlier
sumAbsVal -= (absReal > absEstIm ? absReal + ((absEstIm * 3) >> 3) : absEstIm + ((absReal * 3) >> 3));
sumAbsVal += absReal;
@ -284,9 +284,9 @@ unsigned SpecAnalyzer::spectralAnalysis (const int32_t* const mdctSignals[USAC_M
const double complexSqr = (double) bMdct[s] * (double) bMdct[s] + (double) bMdst[s] * (double) bMdst[s];
const uint32_t absSample = uint32_t (sqrt (complexSqr) + 0.5);
#else
const uint32_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absSample = (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
const uint64_t absReal = abs (bMdct[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImag = abs (bMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absSample = uint32_t (absReal > absImag ? absReal + ((absImag * 3) >> 3) : absImag + ((absReal * 3) >> 3));
#endif
sumAbsVal += absSample;
if (offs + s > 0) // exclude DC from max & min
@ -383,10 +383,10 @@ int16_t SpecAnalyzer::stereoSigAnalysis (const int32_t* const mdctSignal1, const
const double complexSqrR = (double) rbMdct[s] * (double) rbMdct[s] + (double) rbMdst[s] * (double) rbMdst[s];
const uint32_t absMagnR = uint32_t (sqrt (complexSqrR) + 0.5);
#else
const uint32_t absImagL = abs (lbMdst[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImagR = abs (rbMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absMagnL = (absRealL > absImagL ? absRealL + ((absImagL * 3) >> 3) : absImagL + ((absRealL * 3) >> 3));
const uint32_t absMagnR = (absRealR > absImagR ? absRealR + ((absImagR * 3) >> 3) : absImagR + ((absRealR * 3) >> 3));
const uint64_t absImagL = abs (lbMdst[s]); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImagR = abs (rbMdst[s]); // wiki/Alpha_max_plus_beta_min_algorithm
const uint64_t absMagnL = (absRealL > absImagL ? absRealL + ((absImagL * 3) >> 3) : absImagL + ((absRealL * 3) >> 3));
const uint64_t absMagnR = (absRealR > absImagR ? absRealR + ((absImagR * 3) >> 3) : absImagR + ((absRealR * 3) >> 3));
#endif
sumRealL += absRealL;
sumRealR += absRealR;
@ -395,9 +395,9 @@ int16_t SpecAnalyzer::stereoSigAnalysis (const int32_t* const mdctSignal1, const
sumMagnL += absMagnL;
sumMagnR += absMagnR;
sumPrdLR += ((uint64_t) absMagnL * (uint64_t) absMagnR + anaBwOffset) >> SA_BW_SHIFT;
sumPrdLL += ((uint64_t) absMagnL * (uint64_t) absMagnL + anaBwOffset) >> SA_BW_SHIFT;
sumPrdRR += ((uint64_t) absMagnR * (uint64_t) absMagnR + anaBwOffset) >> SA_BW_SHIFT;
sumPrdLR += (absMagnL * absMagnR + anaBwOffset) >> SA_BW_SHIFT;
sumPrdLL += (absMagnL * absMagnL + anaBwOffset) >> SA_BW_SHIFT;
sumPrdRR += (absMagnR * absMagnR + anaBwOffset) >> SA_BW_SHIFT;
} // for s
sumRealL = (sumRealL + anaBwOffset) >> SA_BW_SHIFT; // avg

View File

@ -18,7 +18,7 @@
// constructor
StereoProcessor::StereoProcessor ()
{
return;
memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
}
// public functions
@ -30,24 +30,31 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
{
const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
const uint8_t maxSfbSte = __max (groupingData1.sfbsPerGroup, groupingData2.sfbsPerGroup);
const bool alterPredDir = (applyPredSte && (sfbStereoData[0] & 1)); // true: mid from side
const SfbGroupData& grp = groupingData1;
const bool eightShorts = (grp.numWindowGroups > 1);
const uint8_t maxSfbSte = (eightShorts ? __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) : numSwbFrame);
uint16_t numSfbPredSte = 0; // counter
if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (groupingData1.numWindowGroups != groupingData2.numWindowGroups) ||
if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) ||
(sfbStepSize1 == nullptr) || (sfbStepSize2 == nullptr) || (numSwbFrame < MIN_NUM_SWB_SHORT) || (numSwbFrame > MAX_NUM_SWB_LONG))
{
return 1; // invalid arguments error
return 1; // invalid arguments error
}
for (uint16_t gr = 0; gr < groupingData1.numWindowGroups; gr++)
if (applyPredSte && !eightShorts) memcpy (m_stereoCorrValue, sfbStereoData, (grp.sfbOffsets[numSwbFrame] >> SA_BW_SHIFT) * sizeof (uint8_t));
for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
{
const bool realOnlyCalc = (filterData1.numFilters > 0 && gr == filterData1.filteredWindow) || (mdstSpectrum1 == nullptr) ||
const bool realOnlyCalc = (filterData1.numFilters > 0 && gr == filterData1.filteredWindow) || (mdstSpectrum1 == nullptr) ||
(filterData2.numFilters > 0 && gr == filterData2.filteredWindow) || (mdstSpectrum2 == nullptr);
const uint16_t* grpOff = &groupingData1.sfbOffsets[numSwbFrame * gr];
const uint16_t* grpOff = &grp.sfbOffsets[numSwbFrame * gr];
uint32_t* const grpRms1 = &groupingData1.sfbRmsValues[numSwbFrame * gr];
uint32_t* const grpRms2 = &groupingData2.sfbRmsValues[numSwbFrame * gr];
uint32_t* grpStepSizes1 = &sfbStepSize1[numSwbFrame * gr];
uint32_t* grpStepSizes2 = &sfbStepSize2[numSwbFrame * gr];
int32_t prevReM = 0, prevReS = 0;
int32_t b = 0, prevReM = 0, prevReS = 0;
uint32_t rmsSfbL[2] = {0, 0}, rmsSfbR[2] = {0, 0};
if (realOnlyCalc) // preparation for first magnitude value
{
@ -59,16 +66,13 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
{
const uint32_t sfbRmsL = __max (SP_EPS, grpRms1[sfb]);
const uint32_t sfbRmsR = __max (SP_EPS, grpRms2[sfb]);
const double sfbFacLR = (sfbRmsL < (grpStepSizes1[sfb] >> 1) ? 1.0 : 2.0) * (sfbRmsR < (grpStepSizes2[sfb] >> 1) ? 1.0 : 2.0);
const double sfbRatLR = __min (1.0, grpStepSizes1[sfb] / (sfbRmsL * 2.0)) * __min (1.0, grpStepSizes2[sfb] / (sfbRmsR * 2.0)) * sfbFacLR;
const int32_t sfbIsOdd = sfb & 1;
const uint16_t sfbStart = grpOff[sfb];
const uint16_t sfbWidth = grpOff[sfb + 1] - sfbStart;
int32_t* sfbMdct1 = &mdctSpectrum1[sfbStart];
int32_t* sfbMdct2 = &mdctSpectrum2[sfbStart];
double sfbRmsMaxMS;
uint64_t sumAbsValM = 0, sumAbsValS = 0;
double sfbTempVar;
if (realOnlyCalc) // real data, only MDCTs are available
{
@ -80,13 +84,13 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
const int32_t dmixReM = int32_t (((int64_t) *sfbMdct1 + (int64_t) *sfbMdct2 + 1) >> 1);
const int32_t dmixReS = int32_t (((int64_t) *sfbMdct1 - (int64_t) *sfbMdct2 + 1) >> 1);
// TODO: improve the following lines since the calculation is partially redundant!
const int32_t dmixImM = int32_t (((*sfbNext1 + (int64_t) *sfbNext2 + 1) >> 1) - (int64_t) prevReM) >> 1; // estimate, see also
const int32_t dmixImS = int32_t (((*sfbNext1 - (int64_t) *sfbNext2 + 1) >> 1) - (int64_t) prevReS) >> 1; // getMeanAbsValues()
const int32_t dmixImM = int32_t ((((*sfbNext1 + (int64_t) *sfbNext2 + 1) >> 1) - (int64_t) prevReM) >> 1); // estimate, see also
const int32_t dmixImS = int32_t ((((*sfbNext1 - (int64_t) *sfbNext2 + 1) >> 1) - (int64_t) prevReS) >> 1); // getMeanAbsValues()
const uint32_t absReM = abs (dmixReM);
const uint32_t absReS = abs (dmixReS); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImM = abs (dmixImM); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absImS = abs (dmixImS);
const uint64_t absReM = abs (dmixReM);
const uint64_t absReS = abs (dmixReS); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImM = abs (dmixImM); // wiki/Alpha_max_plus_beta_min_algorithm
const uint64_t absImS = abs (dmixImS);
sumAbsValM += (absReM > absImM ? absReM + ((absImM * 3) >> 3) : absImM + ((absReM * 3) >> 3));
sumAbsValS += (absReS > absImS ? absReS + ((absImS * 3) >> 3) : absImS + ((absReS * 3) >> 3));
@ -126,10 +130,10 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
sumAbsValM += uint64_t (sqrt (cplxSqrM) + 0.5);
sumAbsValS += uint64_t (sqrt (cplxSqrS) + 0.5);
#else
const uint32_t absReM = abs (dmixReM);
const uint32_t absReS = abs (dmixReS); // Richard Lyons, 1997; en.wikipedia.org/
const uint32_t absImM = abs (dmixImM); // wiki/Alpha_max_plus_beta_min_algorithm
const uint32_t absImS = abs (dmixImS);
const uint64_t absReM = abs (dmixReM);
const uint64_t absReS = abs (dmixReS); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImM = abs (dmixImM); // wiki/Alpha_max_plus_beta_min_algorithm
const uint64_t absImS = abs (dmixImS);
sumAbsValM += (absReM > absImM ? absReM + ((absImM * 3) >> 3) : absImM + ((absReM * 3) >> 3));
sumAbsValS += (absReS > absImS ? absReS + ((absImS * 3) >> 3) : absImS + ((absReS * 3) >> 3));
@ -141,25 +145,240 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
}
} // realOnlyCalc
rmsSfbL[sfbIsOdd] = grpRms1[sfb];
rmsSfbR[sfbIsOdd] = grpRms2[sfb];
// average spectral sample magnitude across current band
grpRms1[sfb] = uint32_t ((sumAbsValM + (sfbWidth >> 1)) / sfbWidth);
grpRms2[sfb] = uint32_t ((sumAbsValS + (sfbWidth >> 1)) / sfbWidth);
sfbRmsMaxMS = __max (grpRms1[sfb], grpRms2[sfb]);
if (sfbFacLR <= 1.0) // total simultaneous masking, no positive SNR in any channel SFB
sfbStereoData[sfb + numSwbFrame * gr] = 16; // alpha = 0
if ((sfbIsOdd) || (sfb + 1 == maxSfbSte)) // finish pair
{
double max = __max (sfbRmsL, sfbRmsR);
grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (grpStepSizes1[sfb], grpStepSizes2[sfb]) * (sfbRmsMaxMS / max) + 0.5);
}
else // partial or no masking, redistribute positive SNR into at least one channel SFB
const uint16_t sfbEv = sfb & 0xFFFE; // even SFB index
uint32_t rmsSfbM[2] = {0, 0}, rmsSfbS[2] = {0, 0};
if (applyPredSte) // calc real-prediction coefficients
{
const uint16_t offEv = grpOff[sfbEv];
const uint16_t width = grpOff[sfb + 1] - offEv;
const int32_t* mdctA = (alterPredDir ? &mdctSpectrum2[offEv] : &mdctSpectrum1[offEv]);
const int32_t* mdctB = (alterPredDir ? &mdctSpectrum1[offEv] : &mdctSpectrum2[offEv]);
int64_t sumPrdRefRes = 0, sumPrdRefRef = SP_EPS; // stabilizes the division below
double d, alphaLimit = 1.5; // max alpha_q magnitude
bool nonZeroPredCoef = false;
for (uint16_t s = width; s > 0; s--, mdctA++, mdctB++)
{
sumPrdRefRes += ((int64_t) *mdctA * (int64_t) *mdctB + SA_BW) >> (SA_BW_SHIFT + 1);
sumPrdRefRef += ((int64_t) *mdctA * (int64_t) *mdctA + SA_BW) >> (SA_BW_SHIFT + 1);
}
if (realOnlyCalc) // real data, only MDCTs available
{
// TODO
}
else // complex data, both MDCTs and MDSTs available
{
const int32_t* mdstA = (alterPredDir ? &mdstSpectrum2[offEv] : &mdstSpectrum1[offEv]);
const int32_t* mdstB = (alterPredDir ? &mdstSpectrum1[offEv] : &mdstSpectrum2[offEv]);
for (uint16_t s = width; s > 0; s--, mdstA++, mdstB++)
{
sumPrdRefRes += ((int64_t) *mdstA * (int64_t) *mdstB + SA_BW) >> (SA_BW_SHIFT + 1);
sumPrdRefRef += ((int64_t) *mdstA * (int64_t) *mdstA + SA_BW) >> (SA_BW_SHIFT + 1);
}
}
sfbTempVar = (double) sumPrdRefRes / (double) sumPrdRefRef; // compute alpha_q_re
for (b = sfbIsOdd; b >= 0; b--) // limit alpha_q_re to avoid residual RMS increase
{
const int idx = sfbEv + b;
d = (alterPredDir ? (double) grpRms1[idx] / __max (SP_EPS, grpRms2[idx]) : (double) grpRms2[idx] / __max (SP_EPS, grpRms1[idx]));
if (alphaLimit > d) alphaLimit = d;
}
sfbTempVar = CLIP_PM (sfbTempVar, alphaLimit);
#if SP_OPT_ALPHA_QUANT
b = __max (512, 524 - int32_t (abs (10.0 * sfbTempVar))); // rounding optimization
b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0 ? -0.0009765625 : 0.0009765625));
#else
b = int32_t (10.0 * sfbTempVar + (sfbTempVar < 0 ? -0.5 : 0.5));// nearest integer
#endif
sfbStereoData[sfbEv + numSwbFrame * gr] = uint8_t (b + 16);
if (!eightShorts && ((offEv & (SA_BW - 1)) == 0) && ((width & (SA_BW - 1)) == 0))
{
const uint8_t* const perCorr = &m_stereoCorrValue[offEv >> SA_BW_SHIFT];
// perceptual correlation data available from previous call to stereoSigAnalysis
b = (width == SA_BW ? perCorr[0] : ((int32_t) perCorr[0] + (int32_t) perCorr[1] + 1) >> 1);
}
else b = UCHAR_MAX; // previous correlation data unavailable, assume maximum value
if ((b > SCHAR_MAX) && (sfbEv < __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup)) &&
(sfbStereoData[sfbEv + numSwbFrame * gr] != 16))
{
nonZeroPredCoef = true;
}
sfbTempVar *= sfbTempVar; // account for residual RMS reduction due to prediction
for (b = sfbIsOdd; b >= 0; b--)
{
const int idx = sfbEv + b;
if (alterPredDir)
{
d = (double) grpRms1[idx] * grpRms1[idx] - sfbTempVar * (double) grpRms2[idx] * grpRms2[idx];
// consider discarding prediction if gain (residual RMS loss) is below -0.9 dB
if ((double) grpRms1[idx] * grpRms1[idx] * 0.8125 < d) nonZeroPredCoef = false;
rmsSfbM[b] = uint32_t (sqrt (__max (0.0, d)) + 0.5);
rmsSfbS[b] = grpRms2[idx];
}
else // mid>side
{
d = (double) grpRms2[idx] * grpRms2[idx] - sfbTempVar * (double) grpRms1[idx] * grpRms1[idx];
// consider discarding prediction if gain (residual RMS loss) is below -0.9 dB
if ((double) grpRms2[idx] * grpRms2[idx] * 0.8125 < d) nonZeroPredCoef = false;
rmsSfbS[b] = uint32_t (sqrt (__max (0.0, d)) + 0.5);
rmsSfbM[b] = grpRms1[idx];
}
}
if (nonZeroPredCoef) numSfbPredSte++; // count the "significant" prediction bands
} // if applyPredSte
for (b = sfbIsOdd; b >= 0; b--)
{
const int idx = sfbEv + b;
const uint32_t sfbRmsL = __max (SP_EPS, rmsSfbL[b]);
const uint32_t sfbRmsR = __max (SP_EPS, rmsSfbR[b]);
const double sfbFacLR = (sfbRmsL < (grpStepSizes1[idx] >> 1) ? 1.0 : 2.0) * (sfbRmsR < (grpStepSizes2[idx] >> 1) ? 1.0 : 2.0);
sfbTempVar = (applyPredSte ? __max (rmsSfbM[b], rmsSfbS[b]) : __max (grpRms1[idx], grpRms2[idx]));
if (sfbFacLR <= 1.0) // total simultaneous masking - no positive SNR in either SFB
{
const double max = __max (sfbRmsL, sfbRmsR);
grpStepSizes1[idx] = grpStepSizes2[idx] = uint32_t (__max (grpStepSizes1[idx], grpStepSizes2[idx]) * (sfbTempVar / max) + 0.5);
}
else // partial/no masking - redistribute positive SNR into at least 1 channel SFB
{
const double min = (applyPredSte ? __min (rmsSfbM[b], rmsSfbS[b]) : __min (grpRms1[idx], grpRms2[idx]));
const double rat = __min (1.0, grpStepSizes1[idx] / (sfbRmsL * 2.0)) * __min (1.0, grpStepSizes2[idx] / (sfbRmsR * 2.0)) * sfbFacLR;
grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (SP_EPS, (min > rat * sfbTempVar ? sqrt (rat * sfbTempVar * min) :
__min (1.0, rat) * sfbTempVar)) + 0.5);
}
}
} // if pair completed
}
} // for gr
if (numSfbPredSte <= 1) // discard prediction coefficients and stay with legacy M/S stereo
{
memset (sfbStereoData, 16, numSwbFrame * grp.numWindowGroups * sizeof (uint8_t));
numSfbPredSte = 0;
}
else // at least two "significant" prediction bands - apply prediction and update RMS data
{
for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
{
const bool realOnlyCalc = (filterData1.numFilters > 0 && gr == filterData1.filteredWindow) || (mdstSpectrum1 == nullptr) ||
(filterData2.numFilters > 0 && gr == filterData2.filteredWindow) || (mdstSpectrum2 == nullptr);
const uint16_t* grpOff = &grp.sfbOffsets[numSwbFrame * gr];
uint32_t* const grpRms1 = &groupingData1.sfbRmsValues[numSwbFrame * gr];
uint32_t* const grpRms2 = &groupingData2.sfbRmsValues[numSwbFrame * gr];
int32_t b = 0, prevResi = 0;
if (realOnlyCalc) // preparation of res. magnitude value
{
double min = __min (grpRms1[sfb], grpRms2[sfb]);
grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (SP_EPS, (min > sfbRatLR * sfbRmsMaxMS ? sqrt (sfbRatLR * sfbRmsMaxMS *
min) : __min (1.0/*0 dB*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
const int64_t alphaRe = ((int) sfbStereoData[numSwbFrame * gr] - 16) * 6554; // *0.1
const uint16_t sPlus1 = grpOff[0] + 1;
prevResi = (alterPredDir ? mdctSpectrum1[sPlus1] - int32_t ((mdctSpectrum2[sPlus1] * alphaRe - SHRT_MIN) >> 16)
: mdctSpectrum2[sPlus1] - int32_t ((mdctSpectrum1[sPlus1] * alphaRe - SHRT_MIN) >> 16));
}
if (applyPredSte) sfbStereoData[sfb + numSwbFrame * gr] = 16; // zero prediction coefs
} // for sfb
for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
{
const uint16_t sfbEv = sfb & 0xFFFE; // even SFB index
const uint16_t sfbStart = grpOff[sfb];
const uint16_t sfbWidth = grpOff[sfb + 1] - sfbStart;
const int64_t alphaRe = ((int) sfbStereoData[sfbEv + numSwbFrame * gr] - 16) * 6554;
int32_t* sfbMdctD = (alterPredDir ? &mdctSpectrum2[sfbStart] : &mdctSpectrum1[sfbStart]);
int32_t* sfbMdctR = (alterPredDir ? &mdctSpectrum1[sfbStart] : &mdctSpectrum2[sfbStart]);
uint64_t sumAbsValR = 0;
if (alphaRe == 0) continue; // nothing to do, no pred.
if (realOnlyCalc) // real data, only MDCT is available
{
int32_t* sfbNextD = &sfbMdctD[1];
int32_t* sfbNextR = &sfbMdctR[1];
for (uint16_t s = sfbWidth - (sfb + 1 == numSwbFrame ? 1 : 0); s > 0; s--)
{
const int32_t resiRe = *sfbMdctR - int32_t ((*sfbMdctD * alphaRe - SHRT_MIN) >> 16);
// TODO: improve the following line since the calculation is partially redundant
// Also, in the final s index of this band, the wrong alphaRe may be used!
const int32_t resiIm = int32_t (((*sfbNextR - ((*sfbNextD * alphaRe - SHRT_MIN) >> 16)) - (int64_t) prevResi) >> 1);
const uint64_t absReR = abs (resiRe); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImR = abs (resiIm); // wiki/Alpha_max_plus_beta_min_algorithm
sumAbsValR += (absReR > absImR ? absReR + ((absImR * 3) >> 3) : absImR + ((absReR * 3) >> 3));
sfbMdctD++;
*(sfbMdctR++) = resiRe;
sfbNextD++;
sfbNextR++; prevResi = resiRe;
}
if (sfb + 1 == numSwbFrame) // process final sample
{
const int32_t resiRe = *sfbMdctR - int32_t ((*sfbMdctD * alphaRe - SHRT_MIN) >> 16);
sumAbsValR += abs (resiRe);
*sfbMdctR = resiRe;
}
}
else // complex data, both MDCT and MDST is available
{
int32_t* sfbMdstD = (alterPredDir ? &mdstSpectrum2[sfbStart] : &mdstSpectrum1[sfbStart]);
int32_t* sfbMdstR = (alterPredDir ? &mdstSpectrum1[sfbStart] : &mdstSpectrum2[sfbStart]);
for (uint16_t s = sfbWidth; s > 0; s--)
{
const int32_t resiRe = *sfbMdctR - int32_t ((*sfbMdctD * alphaRe - SHRT_MIN) >> 16);
const int32_t resiIm = *sfbMdstR - int32_t ((*sfbMdstD * alphaRe - SHRT_MIN) >> 16);
#if SA_EXACT_COMPLEX_ABS
const double cplxSqrR = (double) resiRe * (double) resiRe + (double) resiIm * (double) resiIm;
sumAbsValR += uint64_t (sqrt (cplxSqrR) + 0.5);
#else
const uint64_t absReR = abs (resiRe); // Richard Lyons, 1997; en.wikipedia.org/
const uint64_t absImR = abs (resiIm); // wiki/Alpha_max_plus_beta_min_algorithm
sumAbsValR += (absReR > absImR ? absReR + ((absImR * 3) >> 3) : absImR + ((absReR * 3) >> 3));
#endif
sfbMdctD++;
*(sfbMdctR++) = resiRe;
sfbMdstD++;
*(sfbMdstR++) = resiIm;
}
} // realOnlyCalc
// average spectral res. magnitude across current band
sumAbsValR = (sumAbsValR + (sfbWidth >> 1)) / sfbWidth;
if (alterPredDir) grpRms1[sfb] = (uint32_t) sumAbsValR; else grpRms2[sfb] = (uint32_t) sumAbsValR;
}
} // for gr
numSfbPredSte = 2;
}
return 0; // no error
return (numSfbPredSte); // no error
}

View File

@ -12,9 +12,11 @@
#define _STEREO_PROCESSING_H_
#include "exhaleLibPch.h"
#include "specAnalysis.h" // for SA_BW... constants
// constants, experimental macros
#define SP_EPS 1
#define SP_OPT_ALPHA_QUANT 1 // quantize alpha_q minimizing RMS distortion in louder channel
// joint-channel processing class
class StereoProcessor
@ -22,6 +24,7 @@ class StereoProcessor
private:
// member variables
uint8_t m_stereoCorrValue[1024 >> SA_BW_SHIFT]; // one value for every 32 spectral coefficients
public: