diff --git a/src/lib/bitAllocation.h b/src/lib/bitAllocation.h index 6e197ff..9988301 100644 --- a/src/lib/bitAllocation.h +++ b/src/lib/bitAllocation.h @@ -1,5 +1,5 @@ /* bitAllocation.h - header file for class needed for psychoacoustic bit-allocation - * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -15,7 +15,7 @@ // constants, experimental macros #define BA_EPS 1 -#define BA_INTER_CHAN_SIM_MASK 0 // cross-channel simultaneous masking for surround +#define BA_INTER_CHAN_SIM_MASK 0 // 5.1 cross-channel simultaneous masking // class for audio bit-allocation class BitAllocator diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp index c5f5893..3a699ef 100644 --- a/src/lib/bitStreamWriter.cpp +++ b/src/lib/bitStreamWriter.cpp @@ -91,7 +91,7 @@ unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const bitCount += 2 + order * coefBits; } } - } // if (n_filt[w]) + } // if n_filt[w] > 0 } } // for w @@ -264,14 +264,14 @@ unsigned BitStreamWriter::writeFDChannelStream (const CoreCoderData& elData, Ent } } } // for w - } // if (maxSfb == 0) + } // if maxSfb == 0 m_auBitStream.write (0, 1); // fac_data_present, no fac_data return bitCount; } -unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, +unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder, #if !RESTRICT_TO_AAC const bool timeWarping, #endif @@ -281,6 +281,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, const IcsInfo& icsInfo1 = elData.icsInfoCurr[1]; const TnsData& tnsData0 = elData.tnsData[0]; const TnsData& tnsData1 = elData.tnsData[1]; + const SfbGroupData& grp = elData.groupingData[0]; unsigned bitCount = 2, g, b; m_auBitStream.write (elData.tnsActive ? 1 : 0, 1); // tns_active @@ -301,7 +302,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, bitCount += 3; if (elData.stereoMode == 1) // write SFB-wise ms_used[][] flag { - for (g = 0; g < elData.groupingData[0].numWindowGroups; g++) + for (g = 0; g < grp.numWindowGroups; g++) { const uint8_t* const gMsUsed = &elData.stereoData[m_numSwbShort * g]; @@ -315,10 +316,12 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, #if !RESTRICT_TO_AAC else if (elData.stereoMode >= 3) // SFB-wise cplx_pred_data() { + const bool complexCoef = (elData.stereoConfig & 1); + m_auBitStream.write (elData.stereoMode - 3, 1); // _pred_all if (elData.stereoMode == 3) { - for (g = 0; g < elData.groupingData[0].numWindowGroups; g++) + for (g = 0; g < grp.numWindowGroups; g++) { const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g]; @@ -329,9 +332,50 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, } bitCount += ((maxSfbSte + 1) / SFB_PER_PRED_BAND) * g; } - // pred_dir and complex_coef. TODO: rest of cplx_pred_data() - m_auBitStream.write (elData.stereoConfig & 3, 2); + m_auBitStream.write (elData.stereoConfig & 3, 2);// pred_dir bitCount += 3; + if (!indepFlag) // use_prev_frame (&4), delta_code_time (&8) + { + if (complexCoef) + { + m_auBitStream.write (elData.stereoConfig & 4 ? 1 : 0, 1); + bitCount++; + } + m_auBitStream.write (elData.stereoConfig & 8 ? 1 : 0, 1); + bitCount++; + } + // TODO: complete the following code for delta_code_time > 0 + for (g = 0; g < grp.numWindowGroups; g++) + { + const uint8_t* const gCplxPredUsed = &elData.stereoData[m_numSwbShort * g]; + uint8_t aqReIdxPred = 16, aqImIdxPred = 16; // alpha_q = 0 + + for (b = 0; b < maxSfbSte; b += SFB_PER_PRED_BAND) + { + if (gCplxPredUsed[b] > 0) // write dpcm_alpha_q_re/_q_im + { + uint8_t aqIdx = gCplxPredUsed[b] & 31; // -15,..0,..15 + int aqIdxDpcm = (int) aqIdx - aqReIdxPred; + unsigned bits = entrCoder.indexGetBitCount (aqIdxDpcm); + + aqReIdxPred = aqIdx; + m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits); + bitCount += bits; + + if (complexCoef) + { + aqIdx = gCplxPredUsed[b + 1] & 31; // <32 kHz short! + aqIdxDpcm = (int) aqIdx - aqImIdxPred; + bits = entrCoder.indexGetBitCount (aqIdxDpcm); + + aqImIdxPred = aqIdx; + m_auBitStream.write (entrCoder.indexGetHuffCode (aqIdxDpcm), bits); + bitCount += bits; + } + } + else aqReIdxPred = aqImIdxPred = 16; + } + } // for g } #endif } // common_window @@ -517,7 +561,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, m_auBitStream.write (CORE_MODE_FD, 1); // L m_auBitStream.write (CORE_MODE_FD, 1); // R bitCount += 2; - bitCount += writeStereoCoreToolInfo (*elData, + bitCount += writeStereoCoreToolInfo (*elData, entropyCoder[ci], // L #if !RESTRICT_TO_AAC tw_mdct[el], #endif diff --git a/src/lib/bitStreamWriter.h b/src/lib/bitStreamWriter.h index 2a5aaa7..2fc28de 100644 --- a/src/lib/bitStreamWriter.h +++ b/src/lib/bitStreamWriter.h @@ -41,7 +41,7 @@ private: const bool timeWarping, const bool noiseFilling, #endif const bool indepFlag = false); - unsigned writeStereoCoreToolInfo (const CoreCoderData& elData, + unsigned writeStereoCoreToolInfo (const CoreCoderData& elData, EntropyCoder& entrCoder, #if !RESTRICT_TO_AAC const bool timeWarping, #endif diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 27bcc7a..639cbb3 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -235,7 +235,14 @@ static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2); } -#if !SA_IMPROVED_REAL_ABS +#if SA_IMPROVED_REAL_ABS +static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2) +{ + const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5; + + return uint32_t (meanRoot * meanRoot + 0.5); +} +#else static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsigned sfbGroup, const unsigned sfbIndex, const uint8_t numSwb, const TnsData& tnsData) { @@ -657,8 +664,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3); // undercoding reduction - const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37); - const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - ((m_bitRateMode - 1) >> 1)); + const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0); + const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1)); uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf; uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; //uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; @@ -817,14 +824,14 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s for (b = 0; b < grpData.sfbsPerGroup; b++) { #if SA_IMPROVED_REAL_ABS - const uint32_t rmsComp = grpRms[b]; + const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp); #else const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]); const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp : getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9); #endif - if (rmsComp < grpRmsMin) grpRmsMin = rmsComp; + if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b]; if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized { s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS) @@ -833,7 +840,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s if ((samplingRate >= 27713) && (b < maxSfbLong) && !eightShorts) // uncoded coefs { #if SA_IMPROVED_REAL_ABS - const uint32_t rmsComp = grpRms[b]; + const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp); #else const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]); diff --git a/src/lib/quantization.cpp b/src/lib/quantization.cpp index 33645a3..ddd731f 100644 --- a/src/lib/quantization.cpp +++ b/src/lib/quantization.cpp @@ -774,7 +774,7 @@ uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t { grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count } - } // if (sfIndex == 0) + } // if sfIndex == 0 return __min (sfBest, m_maxSfIndex); } diff --git a/src/lib/stereoProcessing.cpp b/src/lib/stereoProcessing.cpp index 2855988..1621404 100644 --- a/src/lib/stereoProcessing.cpp +++ b/src/lib/stereoProcessing.cpp @@ -18,15 +18,7 @@ // constructor StereoProcessor::StereoProcessor () { - for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) - { - m_avgAbsHpPrev[ch] = 0; - m_maxAbsHpPrev[ch] = 0; - m_maxIdxHpPrev[ch] = 1; - m_pitchLagPrev[ch] = 0; - m_tempAnaStats[ch] = 0; - m_transientLoc[ch] = -1; - } + return; } // public functions @@ -37,7 +29,7 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in const uint8_t numSwbFrame, uint8_t* const sfbStereoData, uint32_t* const sfbStepSize1, uint32_t* const sfbStepSize2) { -//const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo + const bool applyPredSte = (sfbStereoData != nullptr); // use real-valued predictive stereo const uint8_t maxSfbSte = __max (groupingData1.sfbsPerGroup, groupingData2.sfbsPerGroup); if ((mdctSpectrum1 == nullptr) || (mdctSpectrum2 == nullptr) || (groupingData1.numWindowGroups != groupingData2.numWindowGroups) || @@ -104,6 +96,17 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in sfbNext1++; prevReM = dmixReM; sfbNext2++; prevReS = dmixReS; } + if (sfb + 1 == numSwbFrame) // handle remaining sample + { + const int32_t dmixReM = int32_t (((int64_t) *sfbMdct1 + (int64_t) *sfbMdct2 + 1) >> 1); + const int32_t dmixReS = int32_t (((int64_t) *sfbMdct1 - (int64_t) *sfbMdct2 + 1) >> 1); + + sumAbsValM += abs (dmixReM); + sumAbsValS += abs (dmixReS); + + *sfbMdct1 = dmixReM; + *sfbMdct2 = dmixReS; + } } else // complex data, both MDCTs and MDSTs are available { @@ -152,8 +155,9 @@ unsigned StereoProcessor::applyFullFrameMatrix (int32_t* const mdctSpectrum1, in { double min = __min (grpRms1[sfb], grpRms2[sfb]); grpStepSizes1[sfb] = grpStepSizes2[sfb] = uint32_t (__max (SP_EPS, (min > sfbRatLR * sfbRmsMaxMS ? sqrt (sfbRatLR * sfbRmsMaxMS * - min) : __min (1.0/*TODO*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5); + min) : __min (1.0/*0 dB*/, sfbRatLR) * sfbRmsMaxMS)) + 0.5); } + if (applyPredSte) sfbStereoData[sfb + numSwbFrame * gr] = 16; // zero prediction coefs } // for sfb } diff --git a/src/lib/stereoProcessing.h b/src/lib/stereoProcessing.h index e46dcb1..f61afa5 100644 --- a/src/lib/stereoProcessing.h +++ b/src/lib/stereoProcessing.h @@ -22,12 +22,6 @@ class StereoProcessor private: // member variables - unsigned m_avgAbsHpPrev[USAC_MAX_NUM_CHANNELS]; - unsigned m_maxAbsHpPrev[USAC_MAX_NUM_CHANNELS]; - unsigned m_maxIdxHpPrev[USAC_MAX_NUM_CHANNELS]; - unsigned m_pitchLagPrev[USAC_MAX_NUM_CHANNELS]; - uint32_t m_tempAnaStats[USAC_MAX_NUM_CHANNELS]; - int16_t m_transientLoc[USAC_MAX_NUM_CHANNELS]; public: