diff --git a/src/app/exhaleApp.cpp b/src/app/exhaleApp.cpp index fe3e828..0ed3295 100644 --- a/src/app/exhaleApp.cpp +++ b/src/app/exhaleApp.cpp @@ -846,7 +846,11 @@ int main (const int argc, char* argv[]) if (!readStdin && (mod3Percent > 0) && !(mp4Writer.getFrameCount () % mod3Percent)) { +#if ENABLE_SIMPLE_SBR + if ((i++) < (coreSbrFrameLengthIndex >= 3 ? 17 : 34)) // with short files +#else if ((i++) < 34) // for short files +#endif { fprintf_s (stdout, "-"); fflush (stdout); } diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp index fd8ac98..f133453 100644 --- a/src/lib/bitStreamWriter.cpp +++ b/src/lib/bitStreamWriter.cpp @@ -116,15 +116,23 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC const bool indepFlag /*= false*/) { const unsigned nb = (sbrDataCh0 != nullptr ? 2 * ((sbrDataCh0[0] >> 23) & 1) + 2 : 0); // noise bits/ch = 2 or 4 +#if ENABLE_INTERTES + const bool issTes = (nb > 0 ? ((sbrDataCh0[0] >> 30) & 1) : false); + const int8_t res = (nb > 0 ? (sbrDataCh0[0] >> 29) & 1 : 0); // bs_amp_res +#else const int16_t res = (nb > 0 ? sbrDataCh0[0] >> 29 : 0); // short bs_amp_res +#endif const bool stereo = (sbrDataCh1 != nullptr); const bool couple = (stereo ? ((sbrDataCh1[0] >> 23) & 1) : false); - unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb, i, tmpCh0, tmpCh1; + unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb; + unsigned i, envCh0, envCh1, resCh0, resCh1; // bs_num_env[], bs_freq_res[] if (nb == 0) return 0; - tmpCh0 = (sbrDataCh0[0] >> 21) & 3; - tmpCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3; + envCh0 = 1 << ((sbrDataCh0[0] >> 21) & 3); + resCh0 = (sbrDataCh0[0] >> 20) & 1; + envCh1 = 1 << (((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3); + resCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 20) & 1; if (stereo) m_auBitStream.write (couple ? 1 : 0, 1); // _coupling @@ -132,20 +140,20 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC m_auBitStream.write ((sbrDataCh0[0] >> 20) & 7, 5); // class data if (stereo && !couple) m_auBitStream.write ((sbrDataCh1[0] >> 20) & 7, 5); - // sbr_dtdf() - i = (1u << tmpCh0) - (indepFlag ? 1 : 0); // actual bs_num_env[0] + // sbr_dtdf(), assumes bs_pvc == 0, i.e. no PVC like rest of code + i = envCh0 - (indepFlag ? 1 : 0); if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 12) & 255, i); // _df_env bitCount += i; - i = (tmpCh0 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);// bs_num_noise[0] + i = __min (2, envCh0) - (indepFlag ? 1 : 0); if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 4) & 255, i); // df_noise bitCount += i; if (stereo) { - i = (1u << tmpCh1) - (indepFlag ? 1 : 0); + i = envCh1 - (indepFlag ? 1 : 0); if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 12) & 255, i); bitCount += i; - i = (tmpCh1 > 0 ? 2 : 1) - (indepFlag ? 1 : 0); + i = __min (2, envCh1) - (indepFlag ? 1 : 0); if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 4) & 255, i); bitCount += i; } @@ -155,37 +163,64 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC m_auBitStream.write (sbrDataCh0[0] & i, nb); // bs_invf_mode[0][] if (stereo && !couple) m_auBitStream.write (sbrDataCh1[0] & i, nb); - // sbr_envelope() for mono/left channel, assumes bs_pvc_mode == 0 - for (i = 1; i <= (1u << tmpCh0); i++) // dt loop + // sbr_envelope() for mono/left channel, assumes bs_df_env[] == 0 + for (i = 1; i <= envCh0; i++) // dt loop { - const uint8_t bits = (res > 0 && tmpCh0 > 0 ? 6 : 7); + const uint8_t bits = (res > 0 && envCh0 > 1 ? 6 : 7); // start + const uint8_t bitd = (2 + 3 * resCh0) * 2; // differential, <25 TODO: VLC words - m_auBitStream.write (15/*sbrDataCh0[i] & 127*/, bits); // bs_data_env + m_auBitStream.write (sbrDataCh0[i] & 127, bits); // bs_data_env bitCount += bits; - m_auBitStream.write (sbrDataCh0[i] >> 7, 5<<1); // TODO: VLC words - bitCount += 5<<1; + m_auBitStream.write (sbrDataCh0[i] >> 7, bitd); + bitCount += bitd; +#if ENABLE_INTERTES + if (issTes) + { + m_auBitStream.write ((sbrDataCh0[9] >> (i - 1)) & 1, 1); // bs_temp_shape[ch][env=i] + bitCount++; + if ((sbrDataCh0[9] >> (i - 1)) & 1) + { + m_auBitStream.write (GAMMA, 2); // bs_inter_temp_shape_mode + bitCount += 2; + } + } +#endif } if (stereo && !couple) { - for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope() dt loop + for (i = 1; i <= envCh1; i++) // decoup. sbr_envelope() dt loop { - const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 6 : 7); + const uint8_t bits = (res > 0 && envCh1 > 1 ? 6 : 7); + const uint8_t bitd = (2 + 3 * resCh1) * 2; // TODO: VLC words m_auBitStream.write (sbrDataCh1[i] & 127, bits); bitCount += bits; - m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words - bitCount += 5<<1; + m_auBitStream.write (sbrDataCh1[i] >> 7, bitd); + bitCount += bitd; +#if ENABLE_INTERTES + if (issTes) + { + m_auBitStream.write ((sbrDataCh1[9] >> (i - 1)) & 1, 1); // bs_temp_shape[ch][env] + bitCount++; + if ((sbrDataCh1[9] >> (i - 1)) & 1) + { + m_auBitStream.write (GAMMA, 2); + bitCount += 2; + } + } +#endif } } - for (i = (tmpCh0 > 0 ? 2 : 1); i > 0; i--) // sbr_noise() dt loop + // sbr_noise() for mono/left channel, assumes bs_df_noise[i] == 0 + for (i = __min (2, envCh0); i > 0; i--) // dt loop { - m_auBitStream.write (31/*(sbrDataCh0[9] >> (12 * i)) & 31*/, 5); // _data_noise + m_auBitStream.write ((sbrDataCh0[9] >> (13 * i)) & 31, 5); // _data_noise bitCount += 5; if (nb == 4) { - m_auBitStream.write ((sbrDataCh0[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word + m_auBitStream.write ((sbrDataCh0[9] >> (13 * i - 5)) & 31, 1); // TODO: VLC word bitCount++; } } @@ -194,24 +229,37 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC { if (couple) { - for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope dt loop + for (i = 1; i <= envCh1; i++) // coup. sbr_envelope() dt loop { - const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 5 : 6); + const uint8_t bits = (res > 0 && envCh1 > 1 ? 5 : 6); + const uint8_t bitd = (2 + 3 * resCh1) * 2; // TODO: VLC words m_auBitStream.write (sbrDataCh1[i] & 63, bits); bitCount += bits; - m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words - bitCount += 5<<1; + m_auBitStream.write (sbrDataCh1[i] >> 7, bitd); + bitCount += bitd; +#if ENABLE_INTERTES + if (issTes) + { + m_auBitStream.write ((sbrDataCh1[9] >> (i - 1)) & 1, 1); // bs_temp_shape[ch][i] + bitCount++; + if ((sbrDataCh1[9] >> (i - 1)) & 1) + { + m_auBitStream.write (GAMMA, 2); + bitCount += 2; + } + } +#endif } } - for (i = (tmpCh1 > 0 ? 2 : 1); i > 0; i--) // sbr_noise dt loop + for (i = __min (2, envCh1); i > 0; i--) // sbr_noise() dt loop { - m_auBitStream.write ((sbrDataCh1[9] >> (12 * i)) & 31, 5); + m_auBitStream.write ((sbrDataCh1[9] >> (13 * i)) & 31, 5); bitCount += 5; if (nb == 4) { - m_auBitStream.write ((sbrDataCh1[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word + m_auBitStream.write ((sbrDataCh1[9] >> (13 * i - 5)) & 31, 1); // TODO: VLC word bitCount++; } } @@ -677,8 +725,11 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, if (sbrRatioShiftValue > 0) // sbrRatioIndex > 0: SbrConfig { const uint32_t sf = (samplingFrequencyIndex == 6 || samplingFrequencyIndex < 5 ? 10 : (samplingFrequencyIndex < 8 ? 9 : 8)); // bs_stop_freq - +#if ENABLE_INTERTES + m_auBitStream.write (2, 3); // bs_interTes = 1, harmonicSBR, bs_pvc = 0 +#else m_auBitStream.write (0, 3); // fix harmonicSBR, bs_interTes, bs_pvc = 0 +#endif bitCount += 13; // incl. SbrDfltHeader following hereafter m_auBitStream.write (15, 4); // 11025 @ 44.1, 11625 @ 48, 15000 @ 64 kHz m_auBitStream.write (sf, 4); // 16193 @ 44.1, 18375 @ 48, 22500 @ 64 kHz @@ -778,7 +829,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, { if (usacIndependencyFlag) { - m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo() + m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24) & 63, 6); // SbrInfo(), bs_pvc = 0 m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1 bitCount += 7; } @@ -820,7 +871,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, { if (usacIndependencyFlag) { - m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo() + m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24) & 63, 6); // SbrInfo(), bs_pvc = 0 m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1 bitCount += 7; } diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 85622bf..33be9e8 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -371,25 +371,6 @@ static inline unsigned toNumChannels (const USAC_CCI chConfigurationIndex) return numberOfChannels[__max (0, (signed char) chConfigurationIndex)]; } -// ISO/IEC 23003-3, Table 68 -static const uint8_t elementCountConfig[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 1, 2, 3, 3, 4, 5, 2, 2, 2, 5, 5}; - -static const ELEM_TYPE elementTypeConfig[USAC_MAX_NUM_ELCONFIGS][USAC_MAX_NUM_ELEMENTS] = { - {ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_UNDEF - {ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_1_CH - {ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CH - {ID_USAC_SCE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CH - {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CH - {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_5_CH - {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE, ID_EL_UNDEF}, // CCI_6_CH - {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE}, // CCI_8_CH - {ID_USAC_SCE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CHM - {ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CHR - {ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CHR - {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_SCE, ID_USAC_LFE}, // CCI_7_CH - {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE} // CCI_8_CHM -}; - // ISO/IEC 14496-3, Table 4.140 static const uint16_t sfbOffsetL0[42] = { // 88.2 and 96 kHz 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 108, @@ -785,8 +766,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); - const uint32_t maxSfbLong = (samplingRate < 37566 || m_shiftValSBR > 0 ? m_numSwbLong // was MAX_NUM_SWB_LONG - : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); + const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0); + const uint32_t maxSfbLong = (useMaxBandwidth ? m_numSwbLong : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint32_t scaleSBR = (m_shiftValSBR > 0 ? 8 : 0); // reduces core rate by 25 % const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode) : (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1); @@ -847,7 +828,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s if (m_perCorrHCurr[el] > 128) // execute stereo pre-processing to increase correlation { const int16_t chanCorrSign = (coreConfig.stereoConfig & 2 ? -1 : 1); - const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]); + const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]); const uint8_t steppFadeLen = (eightShorts0 ? 4 : (coreConfig.tnsActive ? 32 : 64)); const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts0 ? 2 : 5); const int64_t steppWeightI = __min (64, m_perCorrHCurr[el] - 128) >> (eightShorts0 || coreConfig.tnsActive ? 1 : 0); @@ -987,9 +968,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s #if !RESTRICT_TO_AAC if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts)) { - const uint32_t maxSfbCurr = (eightShorts ? (samplingRate < 37566 ? 14 : brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong); + const uint32_t maxSfbCurr = (eightShorts ? (useMaxBandwidth ? 17 - (samplingRate >> 13) // was 14, good for 22.05 - 32 kHz + : brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong); const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts)); - const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) ? 4u : 3u)) >> 2, + const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) || (m_shiftValSBR > 0) ? 4u : 3u)) >> 2, (eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0 : 1)); #ifndef NO_DTX_MODE if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts) @@ -1081,6 +1063,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned samplingRate = toSamplingRate (m_frequencyIdx); + const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead + const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0); const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr (); uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208}; @@ -1180,8 +1164,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en if (grpData.sfbsPerGroup > 0) // rate control part 2 to reach constrained VBR (CVBR) { - const uint8_t maxSfbLong = (samplingRate < 37566 ? 63 - (samplingRate >> 11) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); - const uint8_t maxSfbShort = (samplingRate < 37566 ? 21 - (samplingRate >> 12) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate)); + const uint8_t maxSfbLong = (useMaxBandwidth ? 54 - (samplingRate >> 13) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); + const uint8_t maxSfbShort = (useMaxBandwidth ? 19 - (samplingRate >> 13) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate)); const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047); const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) + (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5)); @@ -1316,8 +1300,28 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en #endif if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data { + int32_t* const sbrLevel = &m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame]; + memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t)); // TODO - m_coreSignals[ci][0] = 1 << 20; // fix bs_freq_res = high +#if ENABLE_INTERTES + m_coreSignals[ci][0] = (shortWinPrev ? 0x40000000 : 0x40100000); // freq_res, interTes +#else + m_coreSignals[ci][0] = (shortWinPrev ? 0 : 1) << 20; // bs_freq_res = low resp. high +#endif + const int32_t msfVal = (shortWinPrev ? 31 : __max (2, __max (m_meanFlatPrev[ci], meanSpecFlat[ci]) >> 3)); + + m_meanFlatPrev[ci] = meanSpecFlat[ci]; + m_coreSignals[ci][9] = (msfVal << 13) | (msfVal << 26); // noise level(s), 31 = none + m_coreSignals[ci][0] |= 4 - int32_t (sqrt (0.75 * msfVal)); // filter mode, 0 = none + + const uint64_t enAdd = (uint64_t) sbrLevel[11] * (uint64_t) sbrLevel[11]; // envelope + const uint64_t enSub = (uint64_t) sbrLevel[21] * (uint64_t) sbrLevel[21]; // 1.9 frms + const uint64_t enSum = (uint64_t) sbrLevel[20] * (uint64_t) sbrLevel[20]; // of delay + const uint64_t enAdj = (enSum + enAdd - enSub + (nSamplesInFrame >> 1)) / nSamplesInFrame; + + m_coreSignals[ci][1] = (enAdj > 8192 ? int32_t (1.375 - 0.03125 * msfVal + 6.64385619 * log10 ((double) enAdj)) - 26 : 0); + memcpy (&sbrLevel[20], &sbrLevel[10] /*last*/, 10 * sizeof (int32_t)); + memcpy (&sbrLevel[10], sbrLevel /*& current*/, 10 * sizeof (int32_t)); // delay line } ci++; } @@ -1338,6 +1342,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS const unsigned nSamplesInShort = nSamplesInFrame >> 3; const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); + const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0); unsigned ci = 0, s; // running index unsigned errorValue = 0; // no error @@ -1384,7 +1389,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT); const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1; const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx]; - const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]); + const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]); const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1], m_mdstSignals[ci], m_mdstSignals[ci + 1], nSamplesMax, nSamplesInFrame, eightShorts, coreConfig.stereoDataCurr); @@ -1862,6 +1867,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch m_mdctQuantMag[ch] = nullptr; m_mdctSignals[ch] = nullptr; m_mdstSignals[ch] = nullptr; + m_meanFlatPrev[ch] = 0; m_scaleFacData[ch] = nullptr; m_specAnaCurr[ch] = 0; m_specFlatPrev[ch] = 0; @@ -1940,7 +1946,7 @@ unsigned ExhaleEncoder::encodeLookahead () *(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3]; *(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9); } - if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, (nSamplesInFrame >> 2) * sizeof (int32_t)); + if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, ((nSamplesInFrame * 41) >> (4 + m_shiftValSBR)) * sizeof (int32_t)); } // set initial temporal channel statistic to something meaningful before first coded frame diff --git a/src/lib/exhaleEnc.h b/src/lib/exhaleEnc.h index a298729..0cb4fd2 100644 --- a/src/lib/exhaleEnc.h +++ b/src/lib/exhaleEnc.h @@ -79,6 +79,7 @@ private: uint8_t* m_mdctQuantMag[USAC_MAX_NUM_CHANNELS]; int32_t* m_mdctSignals[USAC_MAX_NUM_CHANNELS]; int32_t* m_mdstSignals[USAC_MAX_NUM_CHANNELS]; + uint8_t m_meanFlatPrev[USAC_MAX_NUM_CHANNELS]; #if !RESTRICT_TO_AAC bool m_noiseFilling[USAC_MAX_NUM_ELEMENTS]; bool m_nonMpegExt; diff --git a/src/lib/exhaleLibPch.h b/src/lib/exhaleLibPch.h index d7782f5..75e5a03 100644 --- a/src/lib/exhaleLibPch.h +++ b/src/lib/exhaleLibPch.h @@ -32,6 +32,12 @@ #define USAC_NUM_FREQ_TABLES 6 #define USAC_NUM_SAMPLE_RATES (2 * AAC_NUM_SAMPLE_RATES) +#define ENABLE_INTERTES 0 // inter-sample TES in SBR + +#if ENABLE_INTERTES +# define GAMMA 1 // 2? +#endif + #define RESTRICT_TO_AAC 0 // allow only AAC tool-set #if RESTRICT_TO_AAC @@ -168,6 +174,25 @@ const uint8_t eightTimesSqrt256Minus[256] = { 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 25, 24, 23, 21, 20, 18, 16, 14, 11, 8 }; +// ISO/IEC 23003-3:2012, Table 68 +static const uint8_t elementCountConfig[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 1, 2, 3, 3, 4, 5, 2, 2, 2, 5, 5}; + +static const ELEM_TYPE elementTypeConfig[USAC_MAX_NUM_ELCONFIGS][USAC_MAX_NUM_ELEMENTS] = { + {ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_UNDEF + {ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_1_CH + {ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CH + {ID_USAC_SCE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CH + {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CH + {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_5_CH + {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE, ID_EL_UNDEF}, // CCI_6_CH + {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE}, // CCI_8_CH + {ID_USAC_SCE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CHM + {ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CHR + {ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CHR + {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_SCE, ID_USAC_LFE}, // CCI_7_CH + {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE} // CCI_8_CHM +}; + // fast calculation of x / den: (x * oneTwentyEightOver[den]) >> 7, accurate for 0 <= x <= 162 const uint8_t oneTwentyEightOver[14] = {0, 128, 64, 43, 32, 26, 22, 19, 16, 15, 13, 12, 11, 10}; diff --git a/src/lib/tempAnalysis.cpp b/src/lib/tempAnalysis.cpp index cac85f8..2aeb5ae 100644 --- a/src/lib/tempAnalysis.cpp +++ b/src/lib/tempAnalysis.cpp @@ -11,7 +11,7 @@ #include "exhaleLibPch.h" #include "tempAnalysis.h" -static const int16_t lffc2x[65] = { // low-frequency filter coefficients +static const int16_t lpfc12[65] = { // 50% low-pass filter coefficients // 269-pt. sinc windowed by 0.409 * cos(0*pi.*t) - 0.5 * cos(2*pi.*t) + 0.091 * cos(4*pi.*t) 17887, -27755, 16590, -11782, 9095, -7371, 6166, -5273, 4582, -4029, 3576, -3196, 2873, -2594, 2350, -2135, 1944, -1773, 1618, -1478, 1351, -1235, 1129, -1032, 942, -860, 784, @@ -19,6 +19,17 @@ static const int16_t lffc2x[65] = { // low-frequency filter coefficients -124, 108, -95, 82, -71, 61, -52, 44, -37, 31, -26, 21, -17, 14, -11, 8, -6, 5, -3, 2, -1, 1 }; +static const int16_t lpfc34[128] = { // 25% low-pass filter coefficients + // see also A. H. Nuttall, "Some Windows with Very Good Sidelobe Behavior," IEEE, Feb. 1981. + 3 /*<<16*/, 26221, -8914, 19626, 0, -11731, 13789, -8331, 0, 6431, -8148, 5212, 0, -4360, + 5688, -3728, 0, 3240, -4291, 2849, 0, -2529, 3378, -2260, 0, 2032, -2729, 1834, 0, -1662, + 2240, -1510, 0, 1375, -1856, 1253, 0, -1144, 1546, -1045, 0, 955, -1292, 873, 0, -798, + 1079, -729, 0, 666, -900, 608, 0, -555, 748, -505, 0, 459, -620, 418, 0, -379, 510, -343, + 0, 310, -417, 280, 0, -252, 338, -227, 0, 203, -272, 182, 0, -162, 216, -144, 0, 128, -170, + 113, 0, -100, 132, -88, 0, 77, -101, 67, 0, -58, 76, -50, 0, 43, -56, 37, 0, -31, 41, -26, + 0, 22, -28, 18, 0, -15, 19, -12, 0, 10, -12, 8, 0, -6, 7, -4, 0, 3, -4, 2, 0, -1, 2, -1 +}; + // static helper functions static unsigned updateAbsStats (const int32_t* const chSig, const int nSamples, unsigned* const maxAbsVal, int16_t* const maxAbsIdx) { @@ -86,6 +97,7 @@ TempAnalyzer::TempAnalyzer () { m_avgAbsHpPrev[ch] = 0; m_maxAbsHpPrev[ch] = 0; + m_maxHfLevPrev[ch] = 0; m_maxIdxHpPrev[ch] = 1; m_pitchLagPrev[ch] = 0; m_tempAnaStats[ch] = 0; @@ -122,7 +134,7 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M const int resamplerOffset = (int) lookaheadOffset - 128; if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || (sbrShift > 1) || - (nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift)) + (nSamplesInFrame > 2048) || (nSamplesInFrame <= 128 * sbrShift) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift)) { return 1; } @@ -135,6 +147,7 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M // --- get L1 norm and pitch lag of both sides unsigned sumAbsValL = 0, sumAbsValR = 0; unsigned maxAbsValL = 0, maxAbsValR = 0; + int32_t maxHfrLevL = 8, maxHfrLevR = 8; int16_t maxAbsIdxL = 0, maxAbsIdxR = 0; int splitPtL = 0; int splitPtC = halfFrameOffset; @@ -147,21 +160,52 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M if (applyResampler && lrCoreTimeSignals[ch] != nullptr) // downsampler { - /*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift]; // low-rate, - const int32_t* hrSig = &timeSignals[ch][resamplerOffset]; // high-rate input time signal + /*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift]; + const int32_t* hrSig = &timeSignals[ch][resamplerOffset]; + /*MF*/uint64_t ue[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // unit energies for (int i = nSamplesInFrame >> sbrShift; i > 0; i--, lrSig++, hrSig += 2) { int64_t r = ((int64_t) hrSig[0] << 17) + (hrSig[-1] + (int64_t) hrSig[1]) * -2*SHRT_MIN; int16_t s; - for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lffc2x[--u]; + for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lpfc12[--u]; - *lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass and low-rate -// TODO: bandpass + *lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass at half rate if (*lrSig < -8388608) *lrSig = -8388608; else if (*lrSig > 8388607) *lrSig = 8388607; + + if ((i & 1) != 0) // compute quarter-rate mid-frequency SBR signal + { + r = ((3 * (int64_t) hrSig[0]) << 16) - (hrSig[-1] + (int64_t) hrSig[1]) * SHRT_MIN - r; + r += (hrSig[-2] + (int64_t) hrSig[2]) * SHRT_MIN; + + for (s = 127; s > 0; s--/*u = s*/) r += (hrSig[-s] + (int64_t) hrSig[s]) * lpfc34[s]; + + r = (r + (1 << 17)) >> 18; // SBR env. band-pass at quarter rate + ue[i >> 7] += uint64_t (r * r); + } + } + + if (ch != lfeChannelIndex) // calculate overall and unit-wise levels + { + const unsigned numUnits = nSamplesInFrame >> (sbrShift + 7); + int32_t* const hfrLevel = &lrCoreTimeSignals[ch][(resamplerOffset + nSamplesInFrame) >> sbrShift]; + + for (u = numUnits; u > 0; /*u*/) + { + ue[8] += ue[--u]; + hfrLevel[numUnits - u] = int32_t (0.5 + sqrt ((double) ue[u])); + } + hfrLevel[0] = int32_t (0.5 + sqrt ((double) ue[8])); + + // stabilize transient detection below + for (u = numUnits >> 1; u > 0; u--) + { + if (maxHfrLevL < hfrLevel[u]) /* update max. */ maxHfrLevL = hfrLevel[u]; + if (maxHfrLevR < hfrLevel[u + (numUnits >> 1)]) maxHfrLevR = hfrLevel[u + (numUnits >> 1)]; + } } } @@ -225,9 +269,9 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M m_transientLoc[ch] = -1; // re-init stats history for this channel m_avgAbsHpPrev[ch] = 0; - m_maxAbsHpPrev[ch] = 0; // maxAbsValR - m_maxIdxHpPrev[ch] = 1; // maxAbsIdxR - m_pitchLagPrev[ch] = 0; // pLagBestR + m_maxAbsHpPrev[ch] = 0; + m_maxIdxHpPrev[ch] = 1; + m_pitchLagPrev[ch] = 0; } else // nonzero signal in the current frame { @@ -299,14 +343,23 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M // --- temporal analysis statistics for frame m_tempAnaStats[ch] = packAvgTempAnalysisStats (sumAbsHpL, sumAbsHpR, m_avgAbsHpPrev[ch], sumAbsPpL + sumAbsPpR, maxAbsValL + maxAbsValR); + u = maxAbsValR; + if ((m_maxHfLevPrev[ch] < (maxHfrLevL >> 3)) || (maxHfrLevL < (maxHfrLevR >> 3))) // transient + { + maxAbsValL = maxHfrLevL; + maxAbsValR = maxHfrLevR; + m_maxAbsHpPrev[ch] = m_maxHfLevPrev[ch]; + } m_transientLoc[ch] = packTransLocWithPitchLag (maxAbsValL, maxAbsValR, m_maxAbsHpPrev[ch], maxAbsIdxL, maxAbsIdxR, __max (1, pLagBestR)); // update stats history for this channel m_avgAbsHpPrev[ch] = sumAbsHpR; - m_maxAbsHpPrev[ch] = maxAbsValR; + m_maxAbsHpPrev[ch] = u; m_maxIdxHpPrev[ch] = (unsigned) maxAbsIdxR; m_pitchLagPrev[ch] = (unsigned) pLagBestR; } // if sumAbsValL == 0 && sumAbsValR == 0 + + if (applyResampler) m_maxHfLevPrev[ch] = maxHfrLevR; } // for ch return 0; // no error diff --git a/src/lib/tempAnalysis.h b/src/lib/tempAnalysis.h index ec0c8b4..3abc8c8 100644 --- a/src/lib/tempAnalysis.h +++ b/src/lib/tempAnalysis.h @@ -24,6 +24,7 @@ private: // member variables unsigned m_avgAbsHpPrev[USAC_MAX_NUM_CHANNELS]; unsigned m_maxAbsHpPrev[USAC_MAX_NUM_CHANNELS]; + int32_t m_maxHfLevPrev[USAC_MAX_NUM_CHANNELS]; unsigned m_maxIdxHpPrev[USAC_MAX_NUM_CHANNELS]; unsigned m_pitchLagPrev[USAC_MAX_NUM_CHANNELS]; uint32_t m_tempAnaStats[USAC_MAX_NUM_CHANNELS];