more mode 1 tuning

This commit is contained in:
Christian R. Helmrich 2020-05-17 15:00:00 +02:00
parent ea74e998f3
commit 3c5fb0b0dd
4 changed files with 68 additions and 18 deletions

View File

@ -304,24 +304,20 @@ int main (const int argc, char* argv[])
goto mainFinish; // bad output string
}
if ((variableCoreBitRateMode < 2) && (wavReader.getSampleRate () > 24000))
if ((variableCoreBitRateMode < 2) && (wavReader.getSampleRate () > 32000))
{
fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=24 kHz for preset mode %d!\n\n", variableCoreBitRateMode);
fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=32 kHz for preset mode %d!\n\n", variableCoreBitRateMode);
i = 4096; // return value
goto mainFinish; // resample to 24 kHz
goto mainFinish; // resample to 32 kHz
}
if ((variableCoreBitRateMode < 3) && (wavReader.getSampleRate () > 48000))
if ((variableCoreBitRateMode < 4) && (wavReader.getSampleRate () > 48000))
{
fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=48 kHz for preset mode %d!\n\n", variableCoreBitRateMode);
i = 4096; // return value
goto mainFinish; // resample to 44 kHz
}
if ((variableCoreBitRateMode == 2) && (wavReader.getSampleRate () > 32000))
{
fprintf_s (stderr, " WARNING: The input sampling rate should be 32 kHz or less for preset mode %d!\n\n", variableCoreBitRateMode);
}
if (outPathEnd == 0) // name has no path
{

View File

@ -726,7 +726,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
}
else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel
{
const uint32_t redFactor = (m_bitRateMode < 3 ? 2 : 3) - (coreConfig.stereoConfig >> 3);
const uint32_t redFactor = __max ((samplingRate < 27713 ? 2 : 1), __min (3, m_bitRateMode)) - (coreConfig.stereoConfig >> 3);
const bool eightShorts0 = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
const TnsData& tnsData0 = coreConfig.tnsData[0];
const TnsData& tnsData1 = coreConfig.tnsData[1];
@ -818,7 +818,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
coreConfig.groupingData[0], coreConfig.groupingData[1],
coreConfig.tnsData[0], coreConfig.tnsData[1],
numSwbFrame, coreConfig.stereoDataCurr,
m_bitRateMode <= 4, coreConfig.stereoMode > 1,
m_bitRateMode, coreConfig.stereoMode > 1,
(coreConfig.stereoConfig & 2) > 0, realOnlyStartSfb,
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * ci],
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);

View File

@ -76,6 +76,12 @@ static inline void setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
// constructor
StereoProcessor::StereoProcessor ()
{
#if SP_OPT_ALPHA_QUANT
memset (m_randomIntMemRe, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
# if SP_MDST_PRED
memset (m_randomIntMemIm, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
# endif
#endif
memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
}
@ -85,7 +91,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
SfbGroupData& groupingData1, SfbGroupData& groupingData2,
const TnsData& filterData1, const TnsData& filterData2,
const uint8_t numSwbFrame, uint8_t* const sfbStereoData,
const bool usePerCorrData, const bool useFullFrameMS,
const uint8_t bitRateMode, const bool useFullFrameMS,
const bool reversePredDir, const uint8_t realOnlyOffset,
uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
{
@ -94,7 +100,10 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
const SfbGroupData& grp = groupingData1;
const bool eightShorts = (grp.numWindowGroups > 1);
const uint8_t maxSfbSte = (eightShorts ? __min (numSwbFrame, __max (grp.sfbsPerGroup, groupingData2.sfbsPerGroup) + 1) : numSwbFrame);
const bool perCorrData = (usePerCorrData && !eightShorts); // use perceptual correlation?
const bool perCorrData = ((bitRateMode <= 4) && !eightShorts); // perceptual correlation?
#if SP_OPT_ALPHA_QUANT
const bool quantDither = ((bitRateMode >= 4) && !eightShorts); // quantization dithering?
#endif
uint32_t rmsSfbL[2] = {0, 0}, rmsSfbR[2] = {0, 0};
uint32_t numSfbPredSte = 0; // counter
#if SP_SFB_WISE_STEREO
@ -120,6 +129,18 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
#endif
if (applyPredSte && perCorrData) memcpy (m_stereoCorrValue, sfbStereoData, (grp.sfbOffsets[numSwbFrame] >> SA_BW_SHIFT) * sizeof (uint8_t));
#if SP_OPT_ALPHA_QUANT
if ((bitRateMode >= 4) && eightShorts) // reset quantizer dither memory in short transform
{
for (uint16_t sfb = 0; sfb < MAX_NUM_SWB_LONG / 2; sfb++)
{
m_randomIntMemRe[sfb] = (1 << 30);
# if SP_MDST_PRED
m_randomIntMemIm[sfb] = (1 << 30);
# endif
}
}
#endif
for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
{
@ -226,7 +247,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
grpRms1[sfb] = uint32_t ((sumAbsValM + (sfbWidth >> 1)) / sfbWidth);
grpRms2[sfb] = uint32_t ((sumAbsValS + (sfbWidth >> 1)) / sfbWidth);
if (applyPredSte) sfbStereoData[sfb + grOffset] = 16; // initialize to alpha_q to zero
if (applyPredSte) sfbStereoData[sfb + grOffset] = 16; // initialize alpha_q_.. to zero
if ((sfbIsOdd) || (sfb + 1 == maxSfbSte)) // finish pair
{
@ -271,7 +292,18 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
sfbTempVar = CLIP_PM ((double) sumPrdReAReB / (double) sumPrdReAReA, alphaLimit);
#if SP_OPT_ALPHA_QUANT
b = __max (512, 524 - int32_t (abs (10.0 * sfbTempVar))); // rounding optimization
b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0 ? -0.0009765625 : 0.0009765625));
# if 1
if (quantDither)
{
const int32_t r = (int32_t) m_randomInt32 ();
const double dr = 10.0 * sfbTempVar + (r - m_randomIntMemRe[sfbEv >> 1]) * SP_DIV;
b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
m_randomIntMemRe[sfbEv >> 1] = r;
}
else
# endif
b = int32_t (10.0 * sfbTempVar + b * (sfbTempVar < 0.0 ? -0.0009765625 : 0.0009765625));
#else
b = int32_t (10.0 * sfbTempVar + (sfbTempVar < 0 ? -0.5 : 0.5));// nearest integer
#endif
@ -280,13 +312,24 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
alphaLimit = CLIP_PM ((double) sumPrdImAReB / (double) sumPrdImAImA, alphaLimit);
# if SP_OPT_ALPHA_QUANT
b = __max (512, 524 - int32_t (abs (10.0 * alphaLimit))); // rounding optimization
b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0 ? -0.0009765625 : 0.0009765625));
# if 1
if (quantDither)
{
const int32_t r = (int32_t) m_randomInt32 ();
const double dr = 10.0 * alphaLimit + (r - m_randomIntMemIm[sfbEv >> 1]) * SP_DIV;
b = int32_t (dr + b * (dr < 0.0 ? -0.0009765625 : 0.0009765625));
m_randomIntMemIm[sfbEv >> 1] = r;
}
else
# endif
b = int32_t (10.0 * alphaLimit + b * (alphaLimit < 0.0 ? -0.0009765625 : 0.0009765625));
# else
b = int32_t (10.0 * alphaLimit + (alphaLimit < 0 ? -0.5 : 0.5));// nearest integer
# endif
if (sfbEv + 1 < numSwbFrame)
sfbStereoData[sfbEv + 1 + grOffset] = uint8_t (b + 16); // save initial alpha_q_im
#endif
#endif // SP_MDST_PRED
if (perCorrData && ((offEv & (SA_BW - 1)) == 0) && ((width & (SA_BW - 1)) == 0))
{
@ -304,7 +347,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
}
sfbTempVar *= sfbTempVar; // account for residual RMS reduction due to prediction
#if SP_MDST_PRED
sfbTempVar += alphaLimit * alphaLimit; // including complex prediction by alpha_im
if (bitRateMode > 1) sfbTempVar += alphaLimit * alphaLimit; // including alpha_im
#endif
for (b = sfbIsOdd; b >= 0; b--)
{

View File

@ -13,6 +13,7 @@
#include "exhaleLibPch.h"
#include "specAnalysis.h" // for SA_BW... constants
#include <random>
// constants, experimental macros
#define SP_0_DOT_1_16BIT 6554
@ -20,6 +21,9 @@
#define SP_MDST_PRED 1
#define SP_OPT_ALPHA_QUANT 1 // quantize alpha_q minimizing RMS distortion in louder channel
#define SP_SFB_WISE_STEREO 1
#if SP_OPT_ALPHA_QUANT
# define SP_DIV (1.0 / 4294967296.0)
#endif
// joint-channel processing class
class StereoProcessor
@ -32,6 +36,13 @@ private:
int32_t m_originBandMdct2[320];
int32_t m_originBandMdst1[320];
int32_t m_originBandMdst2[320];
#endif
#if SP_OPT_ALPHA_QUANT
std::minstd_rand m_randomInt32;
int32_t m_randomIntMemRe[MAX_NUM_SWB_LONG / 2];
# if SP_MDST_PRED
int32_t m_randomIntMemIm[MAX_NUM_SWB_LONG / 2];
# endif
#endif
uint8_t m_stereoCorrValue[1024 >> SA_BW_SHIFT]; // one value for every 32 spectral coefficients
@ -47,7 +58,7 @@ public:
SfbGroupData& groupingData1, SfbGroupData& groupingData2,
const TnsData& filterData1, const TnsData& filterData2,
const uint8_t numSwbFrame, uint8_t* const sfbStereoData,
const bool usePerCorrData, const bool useFullFrameMS,
const uint8_t bitRateMode, const bool useFullFrameMS,
const bool reversePredDir, const uint8_t realOnlyOffset,
uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2);
}; // StereoProcessor