correct modes f, 5

This commit is contained in:
Christian R. Helmrich 2022-10-17 18:00:36 +00:00
parent 71f2a4ab92
commit 444a006269
4 changed files with 30 additions and 86 deletions

View File

@ -1,5 +1,5 @@
/* exhaleApp.rc - resource file for exhale application binaries compiled under Windows
* written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2022 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -13,7 +13,7 @@
0 ICON "exhaleApp.ico"
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,1,9,1
FILEVERSION 1,1,9,2
BEGIN
BLOCK "StringFileInfo"
BEGIN
@ -22,7 +22,7 @@ BEGIN
VALUE "CompanyName", "ecodis"
VALUE "FileDescription", "exhale - ecodis extended high-efficiency and low-complexity encoder"
VALUE "InternalName", "exhaleApp.exe"
VALUE "LegalCopyright", "© 2018-2021 C. R. Helmrich, ecodis"
VALUE "LegalCopyright", "<EFBFBD> 2018-2021 C. R. Helmrich, ecodis"
VALUE "OriginalFilename", "exhale.exe"
VALUE "ProductName", "exhaleApp"
VALUE "ProductVersion", EXHALELIB_VERSION_MAJOR "." EXHALELIB_VERSION_MINOR EXHALELIB_VERSION_BUGFIX

View File

@ -455,7 +455,7 @@ static const uint8_t tnsScaleFactorBandLimit[2 /*long/short*/][USAC_NUM_FREQ_TAB
{31, 34, 51 /*to be corrected to 42 (44.1) and 40 (48 kHz)!*/, 47, 43, 40}, {9, 10, 14, 15, 15, 15}
};
static const uint8_t sbrRateOffset[10] = {7, 6, 6, 8, 7, 8, 8, 8, 8, 8}; // used for scaleSBR
static const uint8_t sbrRateOffset[10] = {7, 6, 6, 8, 7, 8, 9, 9, 9, 9}; // used for scaleSBR
// scale_factor_grouping map
// group lengths based on transient location: 1133, 1115, 2114, 3113, 4112, 5111, 3311, 1331
@ -817,9 +817,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
{
const int16_t chanCorrSign = (coreConfig.stereoConfig & 2 ? -1 : 1);
const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]);
const uint8_t steppFadeLen = (eightShorts0 ? 4 : (coreConfig.tnsActive && (m_bitRateMode > 0) ? 32 : 64));
const bool reducedStrength = (coreConfig.tnsActive && (m_bitRateMode > 0)) || (m_bitRateMode >= 5);
const uint8_t steppFadeLen = (eightShorts0 ? 4 : (reducedStrength ? 32 : 64));
const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts0 ? 2 : 5);
const int64_t steppWeightI = __min (64, m_perCorrHCurr[el] - 128) >> (eightShorts0 || (coreConfig.tnsActive && (m_bitRateMode > 0)) ? 1 : 0);
const int64_t steppWeightI = __min (64, m_perCorrHCurr[el] - 128) >> (eightShorts0 || reducedStrength ? 1 : 0); // crosstalk * 128
const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128
for (uint16_t n = 0, gr = 0; gr < coreConfig.groupingData[0].numWindowGroups; gr++)
@ -1189,7 +1190,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
b = 40 + (samplingRate >> 12);
if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb;
while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) /*coarse quantization*/ &&
while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1) + (m_bitRateMode / 5)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) &&
((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count estim. available for grouped spectrum*/)))
{
b--; // search first coarsely quantized high-freq. SFB
@ -1377,7 +1378,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
{
coreConfig.stereoConfig = coreConfig.stereoMode = 0;
if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis
if (coreConfig.commonWindow && (m_bitRateMode <= 5)) // stereo pre-processing analysis
{
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
@ -1519,7 +1520,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
{
applyTnsCoeff2ChannelSynch (m_linPredictor, tnsData0, tnsData1, s, n, &coreConfig.commonTnsData);
}
else if ((m_bitRateMode <= 4) && (m_perCorrHCurr[el] > 128))
else if ((m_bitRateMode <= 5) && (m_perCorrHCurr[el] > 128))
{
applyTnsCoeffPreProcessing (m_linPredictor, tnsData0, tnsData1, s, n, &coreConfig.commonTnsData, m_perCorrHCurr[el] - 128);
}
@ -1536,7 +1537,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
}
maxSfb0 = maxSfb1 = maxSfbSte;
if ((m_bitRateMode <= 4) && (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT))
if ((m_bitRateMode <= 5) && (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT))
{
m_perCorrLCurr[el] = stereoCorrGrouping (coreConfig.groupingData[0], nSamplesInFrame, coreConfig.stereoDataCurr);
}
@ -1816,7 +1817,8 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
if (m_channelConf == CCI_CONF) m_channelConf = CCI_2_CHM; // passing numChannels = 0 means 2-ch dual-mono
m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig
m_shiftValSBR = (frameLength >= 1536 ? 1 : 0);
m_frameCount = m_rateFactor = m_priLength = 0;
m_frameCount = m_rateFactor = 0;
m_priLength = 0;
m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex
m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame
@ -2117,7 +2119,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
#else
m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 ||
#endif
m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 4 ? nChannels : 0, nSamplesInFrame) > 0 ||
m_specAnalyzer.initSigAnaMemory (&m_linPredictor, m_bitRateMode <= 5 ? nChannels : 0, nSamplesInFrame) > 0 ||
m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0)
{
errorValue |= 1;

View File

@ -1,5 +1,5 @@
/* stereoProcessing.cpp - source file for class providing M/S stereo coding functionality
* written by C. R. Helmrich, last modified in 2021 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2022 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -77,12 +77,8 @@ static inline void setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
// constructor
StereoProcessor::StereoProcessor ()
{
#if SP_OPT_ALPHA_QUANT
memset (m_randomIntMemRe, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
# if SP_MDST_PRED
memset (m_randomIntMemIm, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
# endif
#endif
memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
}
@ -100,36 +96,24 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
const SfbGroupData& grp = groupingData1;
const bool eightShorts = (grp.numWindowGroups > 1);
const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
const bool perCorrData = ((bitRateMode <= 4) && !eightShorts); // perceptual correlation?
#if SP_OPT_ALPHA_QUANT
const bool perCorrData = ((bitRateMode <= 5) && !eightShorts); // perceptual correlation?
const bool quantDither = ((bitRateMode >= 4) && !eightShorts); // quantization dithering?
#endif
bool alterPredDir = (applyPredSte && reversePredDir); // predict mid from side band?
uint32_t rmsSfbL[2] = {0, 0}, rmsSfbR[2] = {0, 0};
uint32_t numSfbPredSte = 0; // counter
#if SP_SFB_WISE_STEREO
uint16_t numSfbNoMsSte = 0, idxSfbNoMsSte = 0, nNoMS = 0;
uint32_t rms1NoMsSte[2] = {0, 0}, rms2NoMsSte[2] = {0, 0};
uint32_t rmsMNoMsSte[2] = {0, 0}, rmsSNoMsSte[2] = {0, 0};
uint8_t dataNoMsSte[2] = {0, 0};
bool nonZeroPredNoMsSte = false;
#endif
if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (numSwbFrame < maxSfbSte) || (grp.numWindowGroups != groupingData2.numWindowGroups) ||
(sfbStepSize1 == nullptr) || (sfbStepSize2 == nullptr) || (numSwbFrame < MIN_NUM_SWB_SHORT) || (numSwbFrame > MAX_NUM_SWB_LONG))
{
return 1; // invalid arguments error
}
#if !SP_SFB_WISE_STEREO
if (!useFullFrameMS)
{
if (applyPredSte) memset (sfbStereoData, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
return 0; // zeroed ms_used, no pred.
}
#endif
#if SP_MDST_PRED
if (applyPredSte && (bitRateMode > 4) && !eightShorts && !reversePredDir) // pred_dir test
if (applyPredSte && (bitRateMode > 5) && !eightShorts && !reversePredDir) // pred_dir test
{
uint64_t sumRealM = 0, sumRealS = 0;
@ -140,21 +124,17 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
}
alterPredDir = (sumRealS * 2 > sumRealM * 3);
}
#endif
if (applyPredSte && perCorrData) memcpy (m_stereoCorrValue, sfbStereoData, (grp.sfbOffsets[numSwbFrame] >> SA_BW_SHIFT) * sizeof (uint8_t));
#if SP_OPT_ALPHA_QUANT
if ((bitRateMode >= 4) && eightShorts) // reset quantizer dither memory in short transform
{
for (uint16_t sfb = 0; sfb <= MAX_NUM_SWB_LONG / 2; sfb++)
{
m_randomIntMemRe[sfb] = (1 << 30);
# if SP_MDST_PRED
m_randomIntMemIm[sfb] = (1 << 30);
# endif
}
}
#endif
for (uint16_t n = 0, gr = 0; gr < grp.numWindowGroups; gr++)
{
@ -188,7 +168,6 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
uint64_t sumAbsValM = 0, sumAbsValS = 0;
double sfbTempVar;
#if SP_SFB_WISE_STEREO
if ((sfbIsOdd == 0) && !useFullFrameMS) // save L/R data
{
const uint16_t cpyWidth = (grpOff[__min (maxSfbSte, sfb + 2)] - sfbStart) * sizeof (int32_t);
@ -198,7 +177,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
memcpy (m_originBandMdst1, sfbMdst1, cpyWidth);
memcpy (m_originBandMdst2, sfbMdst2, cpyWidth);
}
#endif
if (realOnlyCalc && (sfb >= realOnlyOffset)) // real-valued data, only MDCTs available
{
const int32_t* sfbNext1 = &sfbMdct1[1];
@ -277,10 +256,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
const int32_t* mdctB = (alterPredDir ? &mdctSpectrum1[offEv] : &mdctSpectrum2[offEv]);
const int32_t* mdstA = (alterPredDir ? &mdstSpectrum2[offEv] : &mdstSpectrum1[offEv]);
const int32_t* mdstB = (alterPredDir ? &mdstSpectrum1[offEv] : &mdstSpectrum2[offEv]);
int64_t sumPrdReAReB = 0, sumPrdReAReA = SP_EPS; // stabilizes the division below
#if SP_MDST_PRED
int64_t sumPrdImAReB = 0;
#endif
int64_t sumPrdReAReB = 0, sumPrdImAReB = 0, sumPrdReAReA = SP_EPS; // to stabilize
double d, alphaLimit = 1.5; // max alpha_q magnitude
for (uint16_t s = width; s > 0; s--, mdctA++, mdctB++, mdstA++, mdstB++)
@ -290,15 +266,11 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
sumPrdReAReB += ((int64_t) *mdctA * (int64_t) *mdctB + SA_BW) >> (SA_BW_SHIFT + 1);
sumPrdReAReA += prdReAReA;
#if SP_MDST_PRED
sumPrdImAReB += ((int64_t) *mdstA * (int64_t) *mdctB + SA_BW) >> (SA_BW_SHIFT + 1);
#endif
// add complex conjugate part, increases stability
sumPrdReAReB += ((int64_t) *mdstA * (int64_t) *mdstB + SA_BW) >> (SA_BW_SHIFT + 1);
sumPrdReAReA += prdImAImA;
#if SP_MDST_PRED
sumPrdImAReB -= ((int64_t) *mdctA * (int64_t) *mdstB + SA_BW) >> (SA_BW_SHIFT + 1);
#endif
}
for (b = sfbIsOdd; b >= 0; b--) // limit alpha_q to prevent residual RMS increases
{
@ -308,9 +280,8 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
if (alphaLimit > d) alphaLimit = d;
}
sfbTempVar = CLIP_PM ((double) sumPrdReAReB / (double) sumPrdReAReA, alphaLimit);
#if SP_OPT_ALPHA_QUANT
b = __max (512, 524 - int32_t (abs (10.0 * sfbTempVar))); // rounding optimization
# if 1
if (quantDither)
{
const int32_t r = (int32_t) m_randomInt32 ();
@ -319,18 +290,12 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
m_randomIntMemRe[sfbEv >> 1] = r;
}
else
# endif
b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0.0 ? -0.0009765625 : 0.0009765625));
#else
b = int32_t (10.0 * sfbTempVar + (sfbTempVar < 0 ? -0.5 : 0.5));// nearest integer
#endif
else b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0.0 ? -0.0009765625 : 0.0009765625));
sfbStereoData[sfbEv + grOffset] = uint8_t (b + 16); // save SFB's final alpha_q_re
#if SP_MDST_PRED
alphaLimit = CLIP_PM ((double) sumPrdImAReB / (double) sumPrdReAReA, alphaLimit);
# if SP_OPT_ALPHA_QUANT
b = __max (512, 524 - int32_t (abs (10.0 * alphaLimit))); // rounding optimization
# if 1
if (quantDither)
{
const int32_t r = (int32_t) m_randomInt32 ();
@ -339,15 +304,10 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
m_randomIntMemIm[sfbEv >> 1] = r;
}
else
# endif
b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0.0 ? -0.0009765625 : 0.0009765625));
# else
b = int32_t (10.0 * alphaLimit + (alphaLimit < 0 ? -0.5 : 0.5));// nearest integer
# endif
else b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0.0 ? -0.0009765625 : 0.0009765625));
if (sfbEv + 1 < numSwbFrame)
sfbStereoData[sfbEv + 1 + grOffset] = uint8_t (b + 16); // save initial alpha_q_im
#endif // SP_MDST_PRED
if (perCorrData && ((offEv & (SA_BW - 1)) == 0) && ((width & (SA_BW - 1)) == 0))
{
@ -364,7 +324,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
nonZeroPredCoef = true;
}
sfbTempVar *= sfbTempVar; // account for residual RMS reduction due to prediction
#if SP_MDST_PRED && !(BA_MORE_CBR)
#if !BA_MORE_CBR
if (bitRateMode > 0) sfbTempVar += alphaLimit * alphaLimit; // including alpha_im
#endif
for (b = sfbIsOdd; b >= 0; b--)
@ -390,7 +350,6 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
}
} // if applyPredSte
#if SP_SFB_WISE_STEREO
if (!useFullFrameMS) // test M/S compaction gain, revert to L/R if it's insufficient
{
const uint64_t bandSum1 = (sfbIsOdd > 0 ? (uint64_t) grpRms1[sfbEv] + (uint64_t) grpRms1[sfbEv + 1] : grpRms1[sfbEv]);
@ -440,7 +399,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
continue; // M/S is not used
}
} // if !useFullFrameMS
#endif
for (b = sfbIsOdd; b >= 0; b--) setStepSizesMS (rmsSfbL, rmsSfbR, rmsSfbM, rmsSfbS, grpRms1, grpRms2,
grpStepSizes1, grpStepSizes2, sfbEv, (uint16_t) b, applyPredSte);
if (nonZeroPredCoef) numSfbPredSte++; // if perceptually significant prediction band
@ -449,7 +408,6 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
if (grpLength == 1) n++;
} // for gr
#if SP_SFB_WISE_STEREO
if (numSfbNoMsSte == 1) // upgrade single L/R to M/S band to reduce M/S signaling overhead
{
const uint16_t grNoMS = idxSfbNoMsSte / numSwbFrame;
@ -510,7 +468,6 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
}
if (nonZeroPredNoMsSte) numSfbPredSte++; // was perceptually significant prediction band
} // if numSfbNoMsSte == 1
#endif
if (numSfbPredSte == 0) // discard prediction coefficients and stay with legacy M/S stereo
{
@ -647,11 +604,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
if (grpLength == 1) n++;
} // for gr
#if SP_MDST_PRED
numSfbPredSte = (applyPredSte && (alterPredDir != reversePredDir) ? 4 /*pred_dir=1*/ : 2);
#else
numSfbPredSte = 2;
#endif
}
return numSfbPredSte; // no error

View File

@ -1,5 +1,5 @@
/* stereoProcessing.h - header file for class providing M/S stereo coding functionality
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2022 - see License.htm for legal notices
*
* The copyright in this software is being made available under the exhale Copyright License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -18,12 +18,7 @@
// constants, experimental macros
#define SP_0_DOT_1_16BIT 6554
#define SP_EPS 1
#define SP_MDST_PRED 1
#define SP_OPT_ALPHA_QUANT 1 // quantize alpha_q minimizing RMS distortion in louder channel
#define SP_SFB_WISE_STEREO 1
#if SP_OPT_ALPHA_QUANT
# define SP_DIV (1.0 / 4294967296.0)
#endif
#define SP_DIV (1.0 / 4294967296.0)
// joint-channel processing class
class StereoProcessor
@ -31,19 +26,13 @@ class StereoProcessor
private:
// member variables
#if SP_SFB_WISE_STEREO
int32_t m_originBandMdct1[320]; // i.e. 64 * 5 - NOTE: increase this when maximum grpLength > 5
int32_t m_originBandMdct2[320];
int32_t m_originBandMdst1[320];
int32_t m_originBandMdst2[320];
#endif
#if SP_OPT_ALPHA_QUANT
std::minstd_rand m_randomInt32;
int32_t m_randomIntMemRe[1+MAX_NUM_SWB_LONG/2];
# if SP_MDST_PRED
int32_t m_randomIntMemIm[1+MAX_NUM_SWB_LONG/2];
# endif
#endif
uint8_t m_stereoCorrValue[1024 >> SA_BW_SHIFT]; // one value for every 32 spectral coefficients
public: