diff --git a/src/app/exhaleApp.rc b/src/app/exhaleApp.rc index b2b649c..cb91851 100644 --- a/src/app/exhaleApp.rc +++ b/src/app/exhaleApp.rc @@ -13,7 +13,7 @@ 0 ICON "exhaleApp.ico" VS_VERSION_INFO VERSIONINFO -FILEVERSION 1,1,6 +FILEVERSION 1,1,6,1 BEGIN BLOCK "StringFileInfo" BEGIN diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp index 3043666..c7848e6 100644 --- a/src/lib/bitAllocation.cpp +++ b/src/lib/bitAllocation.cpp @@ -101,6 +101,15 @@ void BitAllocator::getChAverageTempFlat (uint8_t meanTempFlatInCh[USAC_MAX_NUM_C memcpy (meanTempFlatInCh, m_avgTempFlat, nChannels * sizeof (uint8_t)); } +uint16_t BitAllocator::getRateCtrlFac (const int32_t rateRatio, const unsigned samplingRate, const uint32_t specFlatness) +{ + const uint32_t brRatio = __max (1 << 15, __min (USHRT_MAX, rateRatio * (36 - 9 * m_rateIndex))); + const uint16_t mSfmSqr = (m_rateIndex < 2 && samplingRate >= 27713 ? (specFlatness * specFlatness) >> m_rateIndex : 0); + const uint16_t mSfmFac = 256 - (((32 + m_rateIndex) * (specFlatness << 4) - mSfmSqr + (1 << 9)) >> 10); + + return uint16_t ((brRatio * mSfmFac + (1 << 7)) >> 8); +} + uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* const sfbSignal, const uint8_t sfbWidth, const uint32_t sfbRmsValue) { diff --git a/src/lib/bitAllocation.h b/src/lib/bitAllocation.h index 07ec48c..8f9b309 100644 --- a/src/lib/bitAllocation.h +++ b/src/lib/bitAllocation.h @@ -39,6 +39,7 @@ public: // public functions void getChAverageSpecFlat (uint8_t meanSpecFlatInCh[USAC_MAX_NUM_CHANNELS], const unsigned nChannels); void getChAverageTempFlat (uint8_t meanTempFlatInCh[USAC_MAX_NUM_CHANNELS], const unsigned nChannels); + uint16_t getRateCtrlFac (const int32_t rateRatio, const unsigned samplingRate, const uint32_t specFlatness); uint8_t getScaleFac (const uint32_t sfbStepSize, const int32_t* const sfbSignal, const uint8_t sfbWidth, const uint32_t sfbRmsValue); unsigned initAllocMemory (LinearPredictor* const linPredictor, const uint8_t numSwb, const uint8_t bitRateMode); diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp index 8ffa51f..4d1f115 100644 --- a/src/lib/bitStreamWriter.cpp +++ b/src/lib/bitStreamWriter.cpp @@ -936,7 +936,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, const uint8_t numSwbShort, uint8_t* const tempBuffer, #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, - const uint32_t frameCount, const uint32_t indepPeriod, + const uint32_t frameCount, const uint32_t indepPeriod, uint32_t* rate, #endif const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData, unsigned char* const accessUnit, const unsigned nSamplesInFrame) @@ -971,7 +971,6 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, } #endif m_auBitStream.reset (); - m_frameLength = nSamplesInFrame; m_numSwbShort = numSwbShort; m_uCharBuffer = tempBuffer; m_auBitStream.write (usacIndependencyFlag ? 1 : 0, 1); @@ -1112,6 +1111,18 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, #if RESTRICT_TO_AAC || defined (NO_PREROLL_DATA) memcpy (accessUnit, &m_auBitStream.stream.front (), __min (768 * ci, bitCount >> 3)); #else + m_auByteCount += bitCount >> 3; + if (rate != nullptr) // sampling rate + { + const double framesPerSec = (double) *rate / nSamplesInFrame; + const unsigned targetRate = (4 - (sbrRatioShiftValue & 1)) * ci; // frame average for preset 1 + + if (framesPerSec > 0.0 && targetRate > 0 && frameCount < UINT_MAX) // running overcoding ratio + { + *rate = uint32_t (0.5 + (m_auByteCount * framesPerSec) / (__max (20.0 * framesPerSec, (double) frameCount) * targetRate)); + } + else *rate = 0; // insufficient data + } memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1152 : 768), bitCount >> 3)); #endif return (bitCount >> 3); // byte count diff --git a/src/lib/bitStreamWriter.h b/src/lib/bitStreamWriter.h index 116a90e..a03e634 100644 --- a/src/lib/bitStreamWriter.h +++ b/src/lib/bitStreamWriter.h @@ -25,7 +25,7 @@ private: // member variables OutputStream m_auBitStream; // access unit bit-stream to write - uint32_t m_frameLength; + uint64_t m_auByteCount; uint8_t m_numSwbShort; // max. SFB count in short windows uint8_t* m_uCharBuffer; // temporary buffer for ungrouping #ifndef NO_PREROLL_DATA @@ -37,7 +37,7 @@ private: #endif // helper functions void writeByteAlignment (); // write 0s for byte alignment - unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo); // ics_info() + unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo); unsigned writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1, const bool indepFlag = false); unsigned writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts); @@ -56,7 +56,7 @@ private: public: // constructor - BitStreamWriter () { m_auBitStream.reset (); m_frameLength = 0; m_numSwbShort = 0; m_uCharBuffer = nullptr; + BitStreamWriter () { m_auBitStream.reset (); m_auByteCount = m_numSwbShort = 0; m_uCharBuffer = nullptr; #ifndef NO_PREROLL_DATA memset (m_usacConfig, 0, 20); m_usacConfigLen = 0; memset (m_usacIpfState, 0, 4); #endif @@ -77,7 +77,7 @@ public: const uint8_t numSwbShort, uint8_t* const tempBuffer, #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, - const uint32_t frameCount, const uint32_t indepPeriod, + const uint32_t frameCount, const uint32_t indepPeriod, uint32_t* rate, #endif const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData, unsigned char* const accessUnit, const unsigned nSamplesInFrame); diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index f890fe9..0211b6b 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -445,13 +445,13 @@ static const uint8_t numSwbOffsetS[USAC_NUM_FREQ_TABLES] = {13, 13, 15, 16, 16, // ISO/IEC 23003-3, Table 79 static const uint8_t freqIdxToSwbTableIdxAAC[USAC_NUM_SAMPLE_RATES + 2] = { - /*96000*/ 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, // AAC - 255, 255, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4 // USAC + /*96000*/ 0, 0, 1, 2, 2, 2,/*24000*/ 3, 3, 4, 4, 4, 5, 5, // AAC + 255, 255, 1, 2, 2, 2, 2, 2,/*25600*/ 3, 3, 3, 4, 4, 4, 4 // USAC }; #if !RESTRICT_TO_AAC static const uint8_t freqIdxToSwbTableIdx768[USAC_NUM_SAMPLE_RATES + 2] = { - /*96000*/ 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, // AAC - 255, 255, 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4 // USAC + /*96000*/ 0, 0, 0, 1, 1, 2,/*24000*/ 2, 2, 3, 4, 4, 4, 4, // AAC + 255, 255, 0, 1, 2, 2, 2, 2,/*25600*/ 2, 3, 3, 3, 3, 4, 4 // USAC }; #endif @@ -745,7 +745,6 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u return (predGainMax >> 24) & UCHAR_MAX; // max pred gain of all orders and length-1 groups } -#ifndef NO_DTX_MODE uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbIndex) { const uint16_t* const sfbLoudMem = m_sfbLoudMem[channelIndex][sfbIndex]; @@ -756,7 +755,6 @@ uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbI return sumSfbLoud * (sumSfbLoud >> (toSamplingRate (m_frequencyIdx) >> 13)); // scaled SMR } -#endif unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via scale factors { @@ -933,8 +931,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s #if !RESTRICT_TO_AAC const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong); #endif - const uint16_t mSfmSqr = (m_bitRateMode < 2 && samplingRate >= 27713 ? ((uint16_t) meanSpecFlat[ci] * meanSpecFlat[ci]) >> m_bitRateMode : 0); - const uint16_t mSfmFac = 256u - (((32u + m_bitRateMode) * ((uint32_t) meanSpecFlat[ci] << 4) - mSfmSqr + (1u << 9)) >> 10); + const uint16_t rateFac = m_bitAllocator.getRateCtrlFac (m_priLength ? m_rateFactor : 0, samplingRate, meanSpecFlat[ci]); // RC factor uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS]; memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t)); @@ -951,10 +948,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4)); const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b]; - const uint64_t scale = scaleBr * mSfmFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB) + const uint64_t scale = scaleBr * rateFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB) // scale step-sizes according to VBR mode & derive scale factors from step-sizes - grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 17) + grpStepSizes[b] * scale) >> 18)); + grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 24) + grpStepSizes[b] * scale) >> 25)); #if !RESTRICT_TO_AAC if (!m_noiseFilling[el] || (m_bitRateMode > 0) || (m_shiftValSBR == 0) || (samplingRate < 23004) || (b + 3 - (meanSpecFlat[ci] >> 6) < m_numSwbLong)) // HF @@ -971,19 +968,15 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts)); const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) || (m_shiftValSBR > 0) ? 4u : 3u)) >> 2, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0u : 1u)); -#ifndef NO_DTX_MODE + if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts) { for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13)))); } if ((maxSfbCh < numSwbFrame) || (m_bitRateMode <= 2)) // increase coding bandwidth -#else - if (maxSfbCh < numSwbFrame) // increase coding bandwidth -#endif { for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { -#ifndef NO_DTX_MODE const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr]; if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713)) @@ -1009,7 +1002,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0; } } -#endif + memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t)); } grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame; @@ -1026,12 +1019,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1); } } -#ifndef NO_DTX_MODE else if (m_noiseFilling[el] && (m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713)) { for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS; } -#endif #endif // !RESTRICT_TO_AAC ci++; } // for ch @@ -1294,10 +1285,11 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en entrCoder.setIsShortWindow (shortWinPrev); #if !RESTRICT_TO_AAC s = 22050 + 7350 * m_bitRateMode; // compute channel-wise noise_level and noise_offset + sfIdxPred = ((m_bitRateMode == 0) && (m_priLength) && (m_shiftValSBR) && ((m_tempAnaCurr[ci] >> 24) || (m_tempAnaNext[ci] >> 24)) && (meanSpecFlat[ci] + + __min ((m_tempAnaCurr[ci] >> 16) & UCHAR_MAX, (m_tempAnaNext[ci] >> 16) & UCHAR_MAX) >= 192) ? UCHAR_MAX : meanSpecFlat[ci]); coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci], m_numSwbShort, grpData, nSamplesInFrame, samplingRate, s, - shortWinCurr ? 0 : meanSpecFlat[ci])); - // NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed + shortWinCurr ? 0 : sfIdxPred)); if (coreConfig.specFillData[ch] == 1) errorValue |= 1; #endif s = ci + nrChannels - 1 - 2 * ch; // other channel in stereo @@ -1332,11 +1324,13 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en ci++; } } // for el - +#if !RESTRICT_TO_AAC + if (m_priLength) m_rateFactor = samplingRate; +#endif return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag, m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf, #if !RESTRICT_TO_AAC - m_timeWarping, m_noiseFilling, m_frameCount - 1u, m_indepPeriod, + m_timeWarping, m_noiseFilling, m_frameCount - 1u, m_indepPeriod, &m_rateFactor, #endif m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size } @@ -1829,7 +1823,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch if (m_channelConf == CCI_CONF) m_channelConf = CCI_2_CHM; // passing numChannels = 0 means 2-ch dual-mono m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig m_shiftValSBR = (frameLength >= 1536 ? 1 : 0); - m_frameCount = m_priLength = 0; + m_frameCount = m_rateFactor = m_priLength = 0; m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame @@ -2085,9 +2079,8 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin m_elementData[el]->elementType = elementTypeConfig[chConf][el]; // usacElementType[el] } } -#ifndef NO_DTX_MODE memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t)); -#endif + // allocate all signal buffers if (m_shiftValSBR > 0) { diff --git a/src/lib/exhaleEnc.h b/src/lib/exhaleEnc.h index be14062..70ae230 100644 --- a/src/lib/exhaleEnc.h +++ b/src/lib/exhaleEnc.h @@ -50,7 +50,9 @@ typedef enum USAC_CCI : signed char typedef enum USAC_CCFL : short { CCFL_UNDEF = -1, +#if !RESTRICT_TO_AAC CCFL_768 = 768, // LD +#endif CCFL_1024 = 1024 // LC } USAC_CCFL; @@ -92,10 +94,9 @@ private: uint8_t m_perCorrHCurr[USAC_MAX_NUM_ELEMENTS]; uint8_t m_perCorrLCurr[USAC_MAX_NUM_ELEMENTS]; uint8_t m_priLength; + uint32_t m_rateFactor; // RC SfbGroupData* m_scaleFacData[USAC_MAX_NUM_CHANNELS]; -#ifndef NO_DTX_MODE uint16_t m_sfbLoudMem[2][26][32]; // loudness mem -#endif SfbQuantizer m_sfbQuantizer; // powerlaw quantization uint8_t m_shiftValSBR; // SBR ratio for shifting SpecAnalyzer m_specAnalyzer; // for spectral analysis @@ -128,9 +129,7 @@ private: int32_t* const mdctSignal, int32_t* const mdstSignal); unsigned getOptParCorCoeffs (const SfbGroupData& grpData, const uint8_t maxSfb, TnsData& tnsData, const unsigned channelIndex, const uint8_t firstGroupIndexToTest = 0); -#ifndef NO_DTX_MODE uint32_t getThr (const unsigned channelIndex, const unsigned sfbIndex); -#endif unsigned psychBitAllocation (); unsigned quantizationCoding (); unsigned spectralProcessing (); @@ -146,7 +145,7 @@ public: #if !RESTRICT_TO_AAC , const bool useNoiseFilling = true, const bool useEcodisExt = false #endif - ); + ); // destructor virtual ~ExhaleEncoder (); // public functions