prepare stereo pred.

This commit is contained in:
Christian R. Helmrich 2020-03-29 01:00:24 +01:00
parent 5ceb1a0959
commit 036d9b7d20
7 changed files with 84 additions and 35 deletions

View File

@ -1,5 +1,5 @@
/* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation
* written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
*
* The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -15,7 +15,7 @@
// constants, experimental macros
#define BA_EPS 1
#define BA_INTER_CHAN_SIM_MASK 0 // cross-channel simultaneous masking for surround
#define BA_INTER_CHAN_SIM_MASK 0 // 5.1 cross-channel simultaneous masking
// class for audio bit-allocation
class BitAllocator

View File

@ -91,7 +91,7 @@ unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const
bitCount += 2 + order * coefBits;
}
}
} // if (n_filt[w])
} // if n_filt[w] > 0
}
} // for w
@ -264,14 +264,14 @@ unsigned BitStreamWriter::writeFDChannelStream (const CoreCoderData& elData, Ent
}
}
} // for w
} // if (maxSfb == 0)
} // if maxSfb == 0
m_auBitStream.write (0, 1); // fac_data_present, no fac_data
return bitCount;
}
unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder,
#if !RESTRICT_TO_AAC
const bool timeWarping,
#endif
@ -281,6 +281,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
const IcsInfo& icsInfo1 = elData.icsInfoCurr[1];
const TnsData& tnsData0 = elData.tnsData[0];
const TnsData& tnsData1 = elData.tnsData[1];
const SfbGroupData& grp = elData.groupingData[0];
unsigned bitCount = 2, g, b;
m_auBitStream.write (elData.tnsActive ? 1 : 0, 1); // tns_active
@ -301,7 +302,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
bitCount += 3;
if (elData.stereoMode == 1) // write SFB-wise ms_used[][] flag
{
for (g = 0; g < elData.groupingData[0].numWindowGroups; g++)
for (g = 0; g < grp.numWindowGroups; g++)
{
const uint8_t* const gMsUsed = &elData.stereoData[m_numSwbShort * g];
@ -315,10 +316,12 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
#if !RESTRICT_TO_AAC
else if (elData.stereoMode >= 3) // SFB-wise cplx_pred_data()
{
const bool complexCoef = (elData.stereoConfig & 1);
m_auBitStream.write (elData.stereoMode - 3, 1); // _pred_all
if (elData.stereoMode == 3)
{
for (g = 0; g < elData.groupingData[0].numWindowGroups; g++)
for (g = 0; g < grp.numWindowGroups; g++)
{
const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g];
@ -329,9 +332,50 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
}
bitCount += ((maxSfbSte + 1) / SFB_PER_PRED_BAND) * g;
}
// pred_dir and complex_coef. TODO: rest of cplx_pred_data()
m_auBitStream.write (elData.stereoConfig & 3, 2);
m_auBitStream.write (elData.stereoConfig & 3, 2);// pred_dir
bitCount += 3;
if (!indepFlag) // use_prev_frame (&4), delta_code_time (&8)
{
if (complexCoef)
{
m_auBitStream.write (elData.stereoConfig & 4 ? 1 : 0, 1);
bitCount++;
}
m_auBitStream.write (elData.stereoConfig & 8 ? 1 : 0, 1);
bitCount++;
}
// TODO: complete the following code for delta_code_time > 0
for (g = 0; g < grp.numWindowGroups; g++)
{
const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g];
uint8_t aqReIdxPred = 16, aqImIdxPred = 16; // alpha_q = 0
for (b = 0; b < maxSfbSte; b += SFB_PER_PRED_BAND)
{
if (gCplxPredUsed[b] > 0) // write dpcm_alpha_q_re/_q_im
{
uint8_t aqIdx = gCplxPredUsed[b] & 31; // -15,..0,..15
int aqIdxDpcm = (int) aqIdx - aqReIdxPred;
unsigned bits = entrCoder.indexGetBitCount (aqIdxDpcm);
aqReIdxPred = aqIdx;
m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits);
bitCount += bits;
if (complexCoef)
{
aqIdx = gCplxPredUsed[b + 1] & 31; // <32 kHz short!
aqIdxDpcm = (int) aqIdx - aqImIdxPred;
bits = entrCoder.indexGetBitCount (aqIdxDpcm);
aqImIdxPred = aqIdx;
m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits);
bitCount += bits;
}
}
else aqReIdxPred = aqImIdxPred = 16;
}
} // for g
}
#endif
} // common_window
@ -517,7 +561,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
m_auBitStream.write (CORE_MODE_FD, 1); // L
m_auBitStream.write (CORE_MODE_FD, 1); // R
bitCount += 2;
bitCount += writeStereoCoreToolInfo (*elData,
bitCount += writeStereoCoreToolInfo (*elData, entropyCoder[ci], // L
#if !RESTRICT_TO_AAC
tw_mdct[el],
#endif

View File

@ -41,7 +41,7 @@ private:
const bool timeWarping, const bool noiseFilling,
#endif
const bool indepFlag = false);
unsigned writeStereoCoreToolInfo (const CoreCoderData& elData,
unsigned writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder,
#if !RESTRICT_TO_AAC
const bool timeWarping,
#endif

View File

@ -235,7 +235,14 @@ static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const
return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2);
}
#if !SA_IMPROVED_REAL_ABS
#if SA_IMPROVED_REAL_ABS
static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2)
{
const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5;
return uint32_t (meanRoot * meanRoot + 0.5);
}
#else
static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsigned sfbGroup, const unsigned sfbIndex,
const uint8_t numSwb, const TnsData& tnsData)
{
@ -657,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3); // undercoding reduction
const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37);
const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - ((m_bitRateMode - 1) >> 1));
const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0);
const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1));
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS];
@ -817,14 +824,14 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
for (b = 0; b < grpData.sfbsPerGroup; b++)
{
#if SA_IMPROVED_REAL_ABS
const uint32_t rmsComp = grpRms[b];
const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
#else
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
#endif
if (rmsComp < grpRmsMin) grpRmsMin = rmsComp;
if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
{
s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
@ -833,7 +840,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
if ((samplingRate >= 27713) && (b < maxSfbLong) && !eightShorts) // uncoded coefs
{
#if SA_IMPROVED_REAL_ABS
const uint32_t rmsComp = grpRms[b];
const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp);
#else
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);

View File

@ -774,7 +774,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t
{
grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count
}
} // if (sfIndex == 0)
} // if sfIndex == 0
return __min (sfBest, m_maxSfIndex);
}

View File

@ -18,15 +18,7 @@
// constructor
StereoProcessor::StereoProcessor ()
{
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
{
m_avgAbsHpPrev[ch] = 0;
m_maxAbsHpPrev[ch] = 0;
m_maxIdxHpPrev[ch] = 1;
m_pitchLagPrev[ch] = 0;
m_tempAnaStats[ch] = 0;
m_transientLoc[ch] = -1;
}
return;
}
// public functions
@ -37,7 +29,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
const uint8_t numSwbFrame, uint8_t* const sfbStereoData,
uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2)
{
//const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo
const uint8_t maxSfbSte = __max (groupingData1.sfbsPerGroup, groupingData2.sfbsPerGroup);
if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (groupingData1.numWindowGroups != groupingData2.numWindowGroups) ||
@ -104,6 +96,17 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
sfbNext1++; prevReM = dmixReM;
sfbNext2++; prevReS = dmixReS;
}
if (sfb + 1 == numSwbFrame) // handle remaining sample
{
const int32_t dmixReM = int32_t (((int64_t) *sfbMdct1 + (int64_t) *sfbMdct2 + 1) >> 1);
const int32_t dmixReS = int32_t (((int64_t) *sfbMdct1 - (int64_t) *sfbMdct2 + 1) >> 1);
sumAbsValM += abs (dmixReM);
sumAbsValS += abs (dmixReS);
*sfbMdct1 = dmixReM;
*sfbMdct2 = dmixReS;
}
}
else // complex data, both MDCTs and MDSTs are available
{
@ -152,8 +155,9 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in
{
double min = __min (grpRms1[sfb], grpRms2[sfb]);
grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (SP_EPS, (min > sfbRatLR * sfbRmsMaxMS ? sqrt (sfbRatLR * sfbRmsMaxMS *
min) : __min (1.0/*TODO*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
min) : __min (1.0/*0 dB*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5);
}
if (applyPredSte) sfbStereoData[sfb + numSwbFrame * gr] = 16; // zero prediction coefs
} // for sfb
}

View File

@ -22,12 +22,6 @@ class StereoProcessor
private:
// member variables
unsigned m_avgAbsHpPrev[USAC_MAX_NUM_CHANNELS];
unsigned m_maxAbsHpPrev[USAC_MAX_NUM_CHANNELS];
unsigned m_maxIdxHpPrev[USAC_MAX_NUM_CHANNELS];
unsigned m_pitchLagPrev[USAC_MAX_NUM_CHANNELS];
uint32_t m_tempAnaStats[USAC_MAX_NUM_CHANNELS];
int16_t m_transientLoc[USAC_MAX_NUM_CHANNELS];
public: