diff --git a/src/app/exhaleApp.cpp b/src/app/exhaleApp.cpp index 87f9b23..db9837c 100644 --- a/src/app/exhaleApp.cpp +++ b/src/app/exhaleApp.cpp @@ -92,7 +92,7 @@ static bool eaInitDownsampler (int32_t** resampleBuffer, const uint16_t bitRateM { const uint16_t inLength = (frameSize * 3u) >> 1; const uint16_t chLength = inLength + (frameSize >> 3); - const bool useResampler = (frameSize >= 512 && bitRateMode == 1 && sampleRate == 48000); + const bool useResampler = (frameSize >= 512 && bitRateMode <= 1 && sampleRate == 48000); if (useResampler) { @@ -550,7 +550,7 @@ int main (const int argc, char* argv[]) if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900 #if ENABLE_RESAMPLING - && (variableCoreBitRateMode != 1 || wavReader.getSampleRate () != 48000) + && (variableCoreBitRateMode > 1 || wavReader.getSampleRate () != 48000) #endif ) { @@ -704,7 +704,7 @@ int main (const int argc, char* argv[]) if (*argv[1] != '#') // user-def. mode { fprintf_s (stdout, " Encoding %d-kHz %d-channel %d-bit WAVE to low-complexity xHE-AAC at %d kbit/s\n\n", - sampleRate / 1000, numChannels, inSampDepth, __min (4, numChannels) * (24 + variableCoreBitRateMode * 8)); + sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (24 + variableCoreBitRateMode * 8)); } if (!readStdin && (mod3Percent > 0)) { diff --git a/src/lib/bitAllocation.cpp b/src/lib/bitAllocation.cpp index e58a86b..ea9fc63 100644 --- a/src/lib/bitAllocation.cpp +++ b/src/lib/bitAllocation.cpp @@ -412,7 +412,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort, const int32_t* const mdctSpec[USAC_MAX_NUM_CHANNELS], const unsigned nSamplesInFrame, const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes, - const unsigned firstChannelIndex, const bool commonWindow /*= false*/, + const unsigned firstChannelIndex, const uint8_t* const sfm, const bool commonWindow, const uint8_t* const sfbStereoData /*= nullptr*/, const uint8_t stereoConfig /*= 0*/) { const uint8_t maxSfbL16k = 16 + __min (35, (9 << 17) / __max (1, samplingRate)); // SFB index at 15.8 kHz @@ -420,7 +420,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA const uint32_t redWeight = __min (4, 9 - __min (9, m_rateIndex)); short* const tempCoeffs = (short* const) m_tempSfbValue; - if ((groupData == nullptr) || (mdctSpec == nullptr) || (sfbStepSizes == nullptr) || (nSamplesInFrame > 2048) || + if ((groupData == nullptr) || (mdctSpec == nullptr) || (sfbStepSizes == nullptr) || (sfm == nullptr) || (nSamplesInFrame > 2048) || (numSwbShort < MIN_NUM_SWB_SHORT) || (numSwbShort > MAX_NUM_SWB_SHORT) || (nChannels > USAC_MAX_NUM_CHANNELS) || (samplingRate < 7350) || (samplingRate > 96000) || (firstChannelIndex > USAC_MAX_NUM_CHANNELS)) { @@ -469,8 +469,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA } } if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b]; -#ifndef NO_DTX_MODE - if (m_rateIndex > 0) +#if 1 + if ((m_rateIndex > 0) || (samplingRate >= 27713 && sfm[ch] <= (SCHAR_MAX >> 1))) #endif if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized { @@ -483,8 +483,8 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp); const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b]; -#ifndef NO_DTX_MODE - if (m_rateIndex > 0) +#if 1 + if ((m_rateIndex > 0) || (samplingRate >= 27713 && sfm[ch] <= (SCHAR_MAX >> 1))) #endif if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification { @@ -498,6 +498,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA for (b = 0; b < maxSfbInCh; b++) // improve step-sizes by limiting and attenuation { grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15); + if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11); } } // for gr } // for ch diff --git a/src/lib/bitAllocation.h b/src/lib/bitAllocation.h index 7d2f81a..c8b653d 100644 --- a/src/lib/bitAllocation.h +++ b/src/lib/bitAllocation.h @@ -51,7 +51,7 @@ public: unsigned imprSfbStepSizes (const SfbGroupData* const groupData[USAC_MAX_NUM_CHANNELS], const uint8_t numSwbShort, const int32_t* const mdctSpec[USAC_MAX_NUM_CHANNELS], const unsigned nSamplesInFrame, const unsigned nChannels, const unsigned samplingRate, uint32_t* const sfbStepSizes, - const unsigned firstChannelIndex, const bool commonWindow = false, + const unsigned firstChannelIndex, const uint8_t* const sfm, const bool commonWindow, const uint8_t* const sfbStereoData = nullptr, const uint8_t stereoConfig = 0); }; // BitAllocator diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 985a563..b2488ee 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -941,14 +941,13 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s else memset (coreConfig.stereoDataCurr, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t)); errorValue |= m_bitAllocator.imprSfbStepSizes (m_scaleFacData, m_numSwbShort, m_mdctSignals, nSamplesInFrame, - nrChannels, samplingRate, sfbStepSizes, ci, coreConfig.commonWindow, - coreConfig.stereoDataCurr, coreConfig.stereoConfig); + nrChannels, samplingRate, sfbStepSizes, ci, meanSpecFlat, + coreConfig.commonWindow, coreConfig.stereoDataCurr, coreConfig.stereoConfig); for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { SfbGroupData& grpData = coreConfig.groupingData[ch]; const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT); - const bool saveBitRate = (meanSpecFlat[ci] > (UCHAR_MAX * 3) / 4 && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000); const uint8_t maxSfbCh = grpData.sfbsPerGroup; #if !RESTRICT_TO_AAC const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong); @@ -968,7 +967,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s for (unsigned b = 0; b < maxSfbCh; b++) { const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis - const unsigned lfAtten = (saveBitRate || b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4)); + const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4)); const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b]; const uint64_t scale = scaleBr * mSfmFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB) @@ -987,8 +986,6 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) ? 4u : 3u)) >> 2, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0 : 1)); #ifndef NO_DTX_MODE - const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT); - if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts) { for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13)))); @@ -1007,7 +1004,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s { const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr]; uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr]; - const unsigned sfbStart = (prvEightShorts ? (samplingRate > 16000 ? 24 : 17) : m_specGapFiller.getFirstGapFillSfb ()); + const unsigned sfbStart = __max (samplingRate < 18783 ? 17 : 24, m_specGapFiller.getFirstGapFillSfb ()); for (s = sfbStart; s < maxSfbCh; s++) { @@ -1021,7 +1018,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s else if ((m_bitRateMode <= 4) && (meanSpecFlat[ci] <= (SCHAR_MAX >> 1))) // low-RMS { - for (s = (prvEightShorts ? (samplingRate < 27713 ? 24 : 22) : m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++) + for (s = __max (samplingRate < 27713 ? (samplingRate < 18783 ? 17 : 24) : 22, m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++) { if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0; } @@ -1360,6 +1357,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS icsCurr.maxSfb = MAX_NUM_SWB_LFE; while (grpSO[icsCurr.maxSfb] > LFE_MAX) icsCurr.maxSfb--; // limit coefficients in LFE + grpData.sfbsPerGroup = icsCurr.maxSfb; ci++; } else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel @@ -2078,7 +2076,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin // initialize coder class memory m_tempIntBuf = m_timeSignals[0]; - if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 || + if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode >> ((nChannels - 1) >> 1)) > 0 || #if EC_TRELLIS_OPT_CODING m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 || #else