From 1259070c1926f453e1da4d289ef149c0de482c32 Mon Sep 17 00:00:00 2001 From: "Christian R. Helmrich" Date: Tue, 10 Nov 2020 20:00:02 +0100 Subject: [PATCH] add SBR bit syntax --- CMakeLists.txt | 2 +- README.md | 2 +- include/version.h | 4 +- src/app/exhaleApp.cpp | 78 +++++++++++++--- src/app/exhaleApp.rc | 2 +- src/lib/bitStreamWriter.cpp | 176 ++++++++++++++++++++++++++++++++++-- src/lib/bitStreamWriter.h | 5 +- src/lib/exhaleEnc.cpp | 92 ++++++++++++++----- src/lib/exhaleEnc.h | 2 + src/lib/tempAnalysis.cpp | 39 +++++++- src/lib/tempAnalysis.h | 3 +- 11 files changed, 349 insertions(+), 56 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9039ba3..761489c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") endif() -project(exhale VERSION 1.0.8 LANGUAGES CXX) +project(exhale VERSION 1.1.0 LANGUAGES CXX) if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE Release diff --git a/README.md b/README.md index b7aebbe..2b7758e 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is similar to the 3-clause BSD license but modified to address specific aspects dictated by the nature and the output of this application. -The license text and release notes for the current version 1.0.8 can +The license text and release notes for the current version 1.1RC can be found in the `include` subdirectory of the exhale distribution. diff --git a/include/version.h b/include/version.h index b002be7..62d9a9b 100644 --- a/include/version.h +++ b/include/version.h @@ -12,8 +12,8 @@ # define EXHALELIB_VERSION_MAJOR "1" #endif #ifndef EXHALELIB_VERSION_MINOR -# define EXHALELIB_VERSION_MINOR "0" +# define EXHALELIB_VERSION_MINOR "1" #endif #ifndef EXHALELIB_VERSION_BUGFIX -# define EXHALELIB_VERSION_BUGFIX ".8" // "RC" or ".0", ".1", ... +# define EXHALELIB_VERSION_BUGFIX "RC" // "RC" or ".0", ".1", ... #endif diff --git a/src/app/exhaleApp.cpp b/src/app/exhaleApp.cpp index 05de8b0..fe3e828 100644 --- a/src/app/exhaleApp.cpp +++ b/src/app/exhaleApp.cpp @@ -54,6 +54,7 @@ #define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization #define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS #define ENABLE_RESAMPLING 1 // 1: automatic input up- and downsampling +#define ENABLE_SIMPLE_SBR 1 // 1: basic 2:1 low-rate SBR functionality #define IGNORE_WAV_LENGTH 0 // 1: ignore input size indicators (nasty) #define XHE_AAC_LOW_DELAY 0 // 1: allow encoding with 768 frame length @@ -376,14 +377,16 @@ int main (const int argc, char* argv[]) #if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64) fprintf_s (stdout, " preset\t= # (0-9) low-complexity standard compliant xHE-AAC at 16ú#+48 kbit/s\n"); # if XHE_AAC_LOW_DELAY -// fprintf_s (stdout, " \t (a-i) low-complexity compatible xHE-AAC with BE at 16ú#+48 kbit/s\n"); - fprintf_s (stdout, " \t (A-I) 41ms low-delay compatible xHE-AAC with BE at 16ú#+48 kbit/s\n"); + fprintf_s (stdout, " \t (A-J) 41ms low-delay compatible xHE-AAC with BE at 16ú#+48 kbit/s\n"); +# elif ENABLE_SIMPLE_SBR + fprintf_s (stdout, " \t (a-g) low-complexity compliant xHE-AAC with SBR at 12ú#+36 kbit/s\n"); # endif #else fprintf_s (stdout, " preset\t= # (0-9) low-complexity standard compliant xHE-AAC at 16*#+48 kbit/s\n"); # if XHE_AAC_LOW_DELAY -// fprintf_s (stdout, " \t (a-i) low-complexity compatible xHE-AAC with BE at 16*#+48 kbit/s\n"); - fprintf_s (stdout, " \t (A-I) 41ms low-delay compatible xHE-AAC with BE at 16*#+48 kbit/s\n"); + fprintf_s (stdout, " \t (A-J) 41ms low-delay compatible xHE-AAC with BE at 16*#+48 kbit/s\n"); +# elif ENABLE_SIMPLE_SBR + fprintf_s (stdout, " \t (a-g) low-complexity compliant xHE-AAC with SBR at 12*#+36 kbit/s\n"); # endif #endif fprintf_s (stdout, "\n inputWaveFile.wav lossless WAVE audio input, read from stdin if not specified\n\n"); @@ -412,15 +415,21 @@ int main (const int argc, char* argv[]) // check preset mode, derive coder config #if XHE_AAC_LOW_DELAY - if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'i') || (*argv[1] >= 'A' && *argv[1] <= 'I')) + if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'A' && *argv[1] <= 'J')) +#elif ENABLE_SIMPLE_SBR + if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'g')) #else - if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'i')) + if (*argv[1] >= '0' && *argv[1] <= '9') #endif { i = (uint16_t) argv[1][0]; compatibleExtensionFlag = (i & 0x40) >> 6; +#if ENABLE_SIMPLE_SBR + coreSbrFrameLengthIndex = (i > 0x60 ? 5 : (i & 0x20) >> 5); +#else coreSbrFrameLengthIndex = (i & 0x20) >> 5; - variableCoreBitRateMode = (i & 0x0F); +#endif + variableCoreBitRateMode = (i & 0x0F) - (i >> 6); } else if (*argv[1] == '#') // default mode { @@ -430,9 +439,15 @@ int main (const int argc, char* argv[]) { #if XHE_AAC_LOW_DELAY # ifdef EXHALE_APP_WCHAR - fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or A-I.\n\n", argv[1]); -#else - fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or A-I.\n\n", argv[1]); + fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or A-J.\n\n", argv[1]); +# else + fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or A-J.\n\n", argv[1]); +# endif +#elif ENABLE_SIMPLE_SBR +# ifdef EXHALE_APP_WCHAR + fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or a-g.\n\n", argv[1]); +# else + fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or a-g.\n\n", argv[1]); # endif #else # ifdef EXHALE_APP_WCHAR @@ -522,10 +537,19 @@ int main (const int argc, char* argv[]) if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : _filelengthi64 (inFileHandle)) != 0) || #else // Linux, MacOS, Unix if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : lseek (inFileHandle, 0, 2 /*SEEK_END*/)) != 0) || +#endif +#if ENABLE_SIMPLE_SBR + (wavReader.getSampleRate () >= 1000 && wavReader.getSampleRate () < 24000 && coreSbrFrameLengthIndex >= 3) || #endif (wavReader.getNumChannels () >= 7)) { fprintf_s (stderr, " ERROR while trying to open WAVE file: invalid or unsupported audio format!\n\n"); +#if ENABLE_SIMPLE_SBR + if (wavReader.getSampleRate () >= 1000 && wavReader.getSampleRate () < 24000 && coreSbrFrameLengthIndex >= 3) + { + fprintf_s (stderr, " The sampling rate is %d kHz but xHE-AAC with SBR requires at least 24 kHz.\n\n", wavReader.getSampleRate () / 1000); + } +#endif i = 8192; // return value goto mainFinish; // audio format invalid @@ -555,6 +579,9 @@ int main (const int argc, char* argv[]) } if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900 +#if ENABLE_SIMPLE_SBR + && (coreSbrFrameLengthIndex < 3) +#endif #if ENABLE_RESAMPLING && (variableCoreBitRateMode > 1 || wavReader.getSampleRate () != 48000) #endif @@ -566,7 +593,11 @@ int main (const int argc, char* argv[]) goto mainFinish; // ask for resampling } +#if ENABLE_SIMPLE_SBR + if (wavReader.getSampleRate () > 32000 && coreSbrFrameLengthIndex < 3 && variableCoreBitRateMode <= 1) +#else if (wavReader.getSampleRate () > 32000 && variableCoreBitRateMode <= 1) +#endif { #if ENABLE_RESAMPLING if (wavReader.getSampleRate () == 48000) @@ -613,8 +644,8 @@ int main (const int argc, char* argv[]) // enforce executable specific constraints i = __min (USHRT_MAX, wavReader.getSampleRate ()); - if ((wavReader.getNumChannels () > 3) && (i == 57600 || i == 51200 || i == 40000 || i == 38400 || i == 34150 || - i == 28800 || i == 25600 || i == 20000 || i == 19200 || i == 17075 || i == 14400 || i == 12800 || i == 9600)) + if ((wavReader.getNumChannels () > 3 || coreSbrFrameLengthIndex >= 3) && (i == 57600 || i == 51200 || i == 40000 || i == 38400 || + i == 34150 || i == 28800 || i == 25600 || i == 20000 || i == 19200 || i == 17075 || i == 14400 || i == 12800 || i == 9600)) { fprintf_s (stderr, " ERROR: exhale does not support %d-channel coding with %d Hz sampling rate.\n\n", wavReader.getNumChannels (), i); @@ -626,7 +657,12 @@ int main (const int argc, char* argv[]) const unsigned inSampDepth = wavReader.getBitDepth (); #if ENABLE_RESAMPLING const bool enableUpsampler = eaInitUpsampler2x (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels); +# if ENABLE_SIMPLE_SBR + const bool enableResampler = (coreSbrFrameLengthIndex >= 3 ? false : // no 3:2 downsampling needed when using SBR + eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels)); +# else const bool enableResampler = eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels); +# endif const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : (enableResampler ? startLength : frameLength)); const unsigned inFrameSize = (enableResampler ? startLength : frameLength) * sizeof (int32_t); // max buffer size const unsigned resampRatio = (enableResampler ? 3 : 1); // for resampling ratio @@ -671,7 +707,7 @@ int main (const int argc, char* argv[]) const unsigned sampleRate = wavReader.getSampleRate (); #endif const unsigned indepPeriod = (sampleRate < 48000 ? (sampleRate - 320) / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/); - const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17); + const unsigned mod3Percent = unsigned ((expectLength * (3 + (coreSbrFrameLengthIndex & 3))) >> 17); uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0); uint32_t br, bwMax = 0; // br will be used to hold bytes read and/or bit-rate uint32_t headerRes = 0; @@ -696,7 +732,11 @@ int main (const int argc, char* argv[]) memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size +#if ENABLE_SIMPLE_SBR + if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength + (coreSbrFrameLengthIndex >= 3 ? 962 : 0), +#else if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength, +#endif indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0) { fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i); @@ -710,7 +750,12 @@ int main (const int argc, char* argv[]) if (*argv[1] != '#') // user-def. mode { fprintf_s (stdout, " Encoding %d-kHz %d-channel %d-bit WAVE to low-complexity xHE-AAC at %d kbit/s\n\n", +#if ENABLE_SIMPLE_SBR + sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (((24 + variableCoreBitRateMode * 8) * + (coreSbrFrameLengthIndex >= 3 ? 3 : 4)) >> 2)); +#else sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (24 + variableCoreBitRateMode * 8)); +#endif } if (!readStdin && (mod3Percent > 0)) { @@ -926,6 +971,13 @@ int main (const int argc, char* argv[]) bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), (time (nullptr) + 2082844800) & UINT_MAX, (i == 0) && (numChannels < 7) ? outAuData : nullptr); // print out collected file statistics +#if ENABLE_SIMPLE_SBR + if (coreSbrFrameLengthIndex >= 3) + { + fprintf_s (stdout, " Done, actual average incl. SBR data %.2f kbit/s\n\n", (float) br * 0.001f); + } + else +#endif fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f); if (numChannels < 7) { diff --git a/src/app/exhaleApp.rc b/src/app/exhaleApp.rc index 5cb9a9a..78cabd8 100644 --- a/src/app/exhaleApp.rc +++ b/src/app/exhaleApp.rc @@ -13,7 +13,7 @@ 0 ICON "exhaleApp.ico" VS_VERSION_INFO VERSIONINFO -FILEVERSION 1,0,8 +FILEVERSION 1,1,0 BEGIN BLOCK "StringFileInfo" BEGIN diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp index 3350411..fd8ac98 100644 --- a/src/lib/bitStreamWriter.cpp +++ b/src/lib/bitStreamWriter.cpp @@ -112,6 +112,117 @@ unsigned BitStreamWriter::writeChannelWiseIcsInfo (const IcsInfo& icsInfo) // return 9; } +unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1, + const bool indepFlag /*= false*/) +{ + const unsigned nb = (sbrDataCh0 != nullptr ? 2 * ((sbrDataCh0[0] >> 23) & 1) + 2 : 0); // noise bits/ch = 2 or 4 + const int16_t res = (nb > 0 ? sbrDataCh0[0] >> 29 : 0); // short bs_amp_res + const bool stereo = (sbrDataCh1 != nullptr); + const bool couple = (stereo ? ((sbrDataCh1[0] >> 23) & 1) : false); + unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb, i, tmpCh0, tmpCh1; + + if (nb == 0) return 0; + + tmpCh0 = (sbrDataCh0[0] >> 21) & 3; + tmpCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3; + + if (stereo) m_auBitStream.write (couple ? 1 : 0, 1); // _coupling + + // sbr_grid(), assumes bs_frame_class[ch] == 0, i.e. class FIXFIX + m_auBitStream.write ((sbrDataCh0[0] >> 20) & 7, 5); // class data + if (stereo && !couple) m_auBitStream.write ((sbrDataCh1[0] >> 20) & 7, 5); + + // sbr_dtdf() + i = (1u << tmpCh0) - (indepFlag ? 1 : 0); // actual bs_num_env[0] + if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 12) & 255, i); // _df_env + bitCount += i; + i = (tmpCh0 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);// bs_num_noise[0] + if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 4) & 255, i); // df_noise + bitCount += i; + + if (stereo) + { + i = (1u << tmpCh1) - (indepFlag ? 1 : 0); + if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 12) & 255, i); + bitCount += i; + i = (tmpCh1 > 0 ? 2 : 1) - (indepFlag ? 1 : 0); + if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 4) & 255, i); + bitCount += i; + } + + // sbr_invf(), assumes dflt_noise_bands < 3, i.e. 1-2 noise bands + i = 6 * nb - 9; // bitmask = 3 or 15 + m_auBitStream.write (sbrDataCh0[0] & i, nb); // bs_invf_mode[0][] + if (stereo && !couple) m_auBitStream.write (sbrDataCh1[0] & i, nb); + + // sbr_envelope() for mono/left channel, assumes bs_pvc_mode == 0 + for (i = 1; i <= (1u << tmpCh0); i++) // dt loop + { + const uint8_t bits = (res > 0 && tmpCh0 > 0 ? 6 : 7); + + m_auBitStream.write (15/*sbrDataCh0[i] & 127*/, bits); // bs_data_env + bitCount += bits; + m_auBitStream.write (sbrDataCh0[i] >> 7, 5<<1); // TODO: VLC words + bitCount += 5<<1; + } + + if (stereo && !couple) + { + for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope() dt loop + { + const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 6 : 7); + + m_auBitStream.write (sbrDataCh1[i] & 127, bits); + bitCount += bits; + m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words + bitCount += 5<<1; + } + } + + for (i = (tmpCh0 > 0 ? 2 : 1); i > 0; i--) // sbr_noise() dt loop + { + m_auBitStream.write (31/*(sbrDataCh0[9] >> (12 * i)) & 31*/, 5); // _data_noise + bitCount += 5; + if (nb == 4) + { + m_auBitStream.write ((sbrDataCh0[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word + bitCount++; + } + } + + if (stereo) + { + if (couple) + { + for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope dt loop + { + const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 5 : 6); + + m_auBitStream.write (sbrDataCh1[i] & 63, bits); + bitCount += bits; + m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words + bitCount += 5<<1; + } + } + + for (i = (tmpCh1 > 0 ? 2 : 1); i > 0; i--) // sbr_noise dt loop + { + m_auBitStream.write ((sbrDataCh1[9] >> (12 * i)) & 31, 5); + bitCount += 5; + if (nb == 4) + { + m_auBitStream.write ((sbrDataCh1[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word + bitCount++; + } + } + } + + m_auBitStream.write (0, 1); // fixed bs_add_harmonic_flag[0] = 0 + if (stereo) m_auBitStream.write (0, 1); + + return bitCount; +} + unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts) { const unsigned numWindows = (eightShorts ? 8 : 1); @@ -514,8 +625,10 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, #endif - unsigned char* const audioConfig) + const uint8_t sbrRatioShiftValue, unsigned char* const audioConfig) { + const uint8_t fli = (sbrRatioShiftValue == 0 ? 1 /*no SBR*/ : __min (2, sbrRatioShiftValue) + 2); + const int8_t usfi = __max (0, samplingFrequencyIndex - 3 * sbrRatioShiftValue); // TODO: non-standard sampling rates unsigned bitCount = 37; if ((elementType == nullptr) || (audioConfig == nullptr) || (chConfigurationIndex >= USAC_MAX_NUM_ELCONFIGS) || @@ -532,20 +645,20 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, m_auBitStream.write (0x7CA, 11); // audio object type (AOT) 32 (esc) + 10 = 42 if (samplingFrequencyIndex < AAC_NUM_SAMPLE_RATES) { - m_auBitStream.write (samplingFrequencyIndex, 4); + m_auBitStream.write (usfi, 4); } else { m_auBitStream.write (0xF, 4); // esc - m_auBitStream.write (toSamplingRate (samplingFrequencyIndex), 24); + m_auBitStream.write (toSamplingRate (usfi), 24); bitCount += 24; } // for multichannel audio, refer to channel mapping of AotSpecificConfig below m_auBitStream.write (chConfigurationIndex > 2 ? 0 : chConfigurationIndex, 4); // --- AotSpecificConfig(): UsacConfig() - m_auBitStream.write (samplingFrequencyIndex, 5); // usacSamplingFrequencyIndex - m_auBitStream.write (shortFrameLength ? 0 : 1, 3); // coreSbrFrameLengthIndex + m_auBitStream.write (usfi, 5); // usacSamplingFrequencyIndex (after SBR dec.!) + m_auBitStream.write (shortFrameLength ? 0 : fli, 3);// coreSbrFrameLengthIndex m_auBitStream.write (chConfigurationIndex, 5); // channelConfigurationIndex m_auBitStream.write (numElements - 1, 4); // numElements in UsacDecoderConfig @@ -561,6 +674,22 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, m_auBitStream.write ((tw_mdct[el] ? 2 : 0) | (noiseFilling[el] ? 1 : 0), 2); #endif bitCount += 2; + if (sbrRatioShiftValue > 0) // sbrRatioIndex > 0: SbrConfig + { + const uint32_t sf = (samplingFrequencyIndex == 6 || samplingFrequencyIndex < 5 ? 10 : (samplingFrequencyIndex < 8 ? 9 : 8)); // bs_stop_freq + + m_auBitStream.write (0, 3); // fix harmonicSBR, bs_interTes, bs_pvc = 0 + bitCount += 13; // incl. SbrDfltHeader following hereafter + m_auBitStream.write (15, 4); // 11025 @ 44.1, 11625 @ 48, 15000 @ 64 kHz + m_auBitStream.write (sf, 4); // 16193 @ 44.1, 18375 @ 48, 22500 @ 64 kHz + m_auBitStream.write ( 0, 2); // fix dflt_header_extra* = 0 + + if (elementType[el] == ID_USAC_CPE) + { + m_auBitStream.write (0, 2); // fix stereoConfigIndex = 0 + bitCount += 2; + } + } } } // for el @@ -591,7 +720,7 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, bitCount += (8 - m_auBitStream.heldBitCount) & 7; writeByteAlignment (); // flush bytes - memcpy (audioConfig, &m_auBitStream.stream.front (), __min (16, bitCount >> 3)); + memcpy (audioConfig, &m_auBitStream.stream.front (), __min (15u + fli, bitCount >> 3)); return (bitCount >> 3); // byte count } @@ -603,11 +732,12 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, #endif + const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData, unsigned char* const accessUnit, const unsigned nSamplesInFrame /*= 1024*/) { unsigned bitCount = 1, ci = 0; - if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || + if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || (sbrInfoAndData == nullptr) || (mdctSignals == nullptr) || (mdctQuantMag == nullptr) || (accessUnit == nullptr) || (nSamplesInFrame > 2048) || #if !RESTRICT_TO_AAC (noiseFilling == nullptr) || (tw_mdct == nullptr) || @@ -644,6 +774,22 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, tw_mdct[el], noiseFilling[el], #endif usacIndependencyFlag); + if (sbrRatioShiftValue > 0) // UsacSbrData() + { + if (usacIndependencyFlag) + { + m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo() + m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1 + bitCount += 7; + } + else + { + m_auBitStream.write (0, 1); // fix sbrInfoPresent = 0 + bitCount++; + } + bitCount += writeChannelWiseSbrData (sbrInfoAndData[ci], nullptr, // L (mono) only, no R + usacIndependencyFlag); + } ci++; break; } @@ -670,6 +816,22 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, tw_mdct[el], noiseFilling[el], #endif usacIndependencyFlag); + if (sbrRatioShiftValue > 0) // UsacSbrData() + { + if (usacIndependencyFlag) + { + m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo() + m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1 + bitCount += 7; + } + else + { + m_auBitStream.write (0, 1); // fix sbrInfoPresent = 0 + bitCount++; + } + bitCount += writeChannelWiseSbrData (sbrInfoAndData[ci - 1], sbrInfoAndData[ci], // L, R + usacIndependencyFlag); + } ci++; break; } diff --git a/src/lib/bitStreamWriter.h b/src/lib/bitStreamWriter.h index ca0db49..1df42ee 100644 --- a/src/lib/bitStreamWriter.h +++ b/src/lib/bitStreamWriter.h @@ -34,6 +34,8 @@ private: // helper functions void writeByteAlignment (); // write 0s for byte alignment unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo); // ics_info() + unsigned writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1, + const bool indepFlag = false); unsigned writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts); unsigned writeFDChannelStream (const CoreCoderData& elData, EntropyCoder& entrCoder, const unsigned ch, const int32_t* const mdctSignal, const uint8_t* const mdctQuantMag, @@ -60,7 +62,7 @@ public: #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, #endif - unsigned char* const audioConfig); + const uint8_t sbrRatioShiftValue, unsigned char* const audioConfig); unsigned createAudioFrame (CoreCoderData** const elementData, EntropyCoder* const entropyCoder, int32_t** const mdctSignals, uint8_t** const mdctQuantMag, const bool usacIndependencyFlag, const uint8_t numElements, @@ -68,6 +70,7 @@ public: #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, #endif + const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData, unsigned char* const accessUnit, const unsigned nSamplesInFrame = 1024); }; // BitStreamWriter diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index a594830..85622bf 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -785,11 +785,13 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); - const uint32_t maxSfbLong = (samplingRate < 37566 ? MAX_NUM_SWB_LONG : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); - const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3, m_bitRateMode) + const uint32_t maxSfbLong = (samplingRate < 37566 || m_shiftValSBR > 0 ? m_numSwbLong // was MAX_NUM_SWB_LONG + : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); + const uint32_t scaleSBR = (m_shiftValSBR > 0 ? 8 : 0); // reduces core rate by 25 % + const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode) : (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1); const uint64_t scaleBr = (m_bitRateMode == 0 ? __min (32, 3 + (samplingRate >> 10) + (samplingRate >> 13) - (nChannels >> 1)) - : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)); + : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR; uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf; uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; //uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; @@ -1081,13 +1083,13 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr (); uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; -//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; + uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208}; unsigned ci = 0, s; // running index unsigned errorValue = (coeffMagn == nullptr ? 1 : 0); // get means of spectral and temporal flatness for every channel m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels); -//m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels); + if (m_bitRateMode == 0 && samplingRate >= 23004) m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels); for (unsigned el = 0; el < m_numElements; el++) // element loop { @@ -1097,7 +1099,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs { meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1; - // meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1; + meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1; } for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop @@ -1181,7 +1183,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en const uint8_t maxSfbLong = (samplingRate < 37566 ? 63 - (samplingRate >> 11) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint8_t maxSfbShort = (samplingRate < 37566 ? 21 - (samplingRate >> 12) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate)); const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047); - const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)); + const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) + + (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5)); const unsigned targetBitCount25 = ((60000 + 20000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1)); unsigned b = grpData.sfbsPerGroup - 1; @@ -1212,7 +1215,10 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en } } #endif - b = lastSfb; + // coarse-quantize near-Nyquist SFB with SBR @ 48-64 kHz + b = 40 + (samplingRate >> 12); + if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb; + while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) /*coarse quantization*/ && ((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count estim. available for grouped spectrum*/))) { @@ -1308,6 +1314,11 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en // NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed if (coreConfig.specFillData[ch] == 1) errorValue |= 1; #endif + if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data + { + memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t)); // TODO + m_coreSignals[ci][0] = 1 << 20; // fix bs_freq_res = high + } ci++; } } // for el @@ -1317,7 +1328,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif - m_outAuData, nSamplesInFrame)); // returns AU size + m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size } unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS and SFB data @@ -1597,7 +1608,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects of ics_info() { const unsigned nChannels = toNumChannels (m_channelConf); - const unsigned nSamplesInFrame = toFrameLength (m_frameLength); + const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR; const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); unsigned ci = 0; // running ch index @@ -1608,8 +1619,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o m_tempAnalyzer.getTransientAndPitch (m_tranLocCurr, nChannels); // temporal analysis for look-ahead signal (central nSamplesInFrame samples of next frame) - errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna, lfeChannelIndex); - + errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna, + m_shiftValSBR, m_coreSignals, lfeChannelIndex); // get temporal channel statistics for next frame, used for window length/overlap decision m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels); m_tempAnalyzer.getTransientAndPitch (m_tranLocNext, nChannels); @@ -1654,10 +1665,10 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o const USAC_WSEQ wsPrev = icsPrev.windowSequence; USAC_WSEQ& wsCurr = icsCurr.windowSequence; // get temporal signal statistics, then determine overlap config. for the next frame - const unsigned plCurr = abs (m_tranLocCurr[ci]) & 1023; + const unsigned plCurr = abs (m_tranLocCurr[ci]) & ((1024 << m_shiftValSBR) - 1); const unsigned sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX; const unsigned tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX; - const unsigned plNext = abs (m_tranLocNext[ci]) & 1023; + const unsigned plNext = abs (m_tranLocNext[ci]) & ((1024 << m_shiftValSBR) - 1); const unsigned sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX; const unsigned tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX; const unsigned tThresh = UCHAR_MAX * (__max (plCurr, plNext) < 614 /*0.6 * 1024*/ ? 16 : 15 - (m_bitRateMode >> 2)); @@ -1775,6 +1786,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o { const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch]; const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch]; + const int32_t* timeSig = (m_shiftValSBR > 0 ? m_coreSignals[ci] : m_timeSignals[ci]); const USAC_WSEQ wsCurr = icsCurr.windowSequence; const bool eightShorts = (wsCurr == EIGHT_SHORT); SfbGroupData& grpData = coreConfig.groupingData[ch]; @@ -1782,7 +1794,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o grpData.numWindowGroups = (eightShorts ? NUM_WINDOW_GROUPS : 1); // fill groupingData memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t)); - errorValue |= m_transform.applyMCLT (m_timeSignals[ci], eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE, + errorValue |= m_transform.applyMCLT (timeSig, eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE, wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]); m_scaleFacData[ci++] = &grpData; } @@ -1809,9 +1821,14 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch m_channelConf = CCI_2_CHM; // passing numChannels = 0 to ExhaleEncoder is interpreted as 2-ch dual-mono } m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig +#if 1 + m_shiftValSBR = (frameLength >= 1536 ? 1 : 0); +#else + m_shiftValSBR = 0; +#endif m_frameCount = 0; - m_frameLength = (USAC_CCFL) frameLength; // coreCoderFrameLength, signaled using coreSbrFrameLengthIndex - m_frequencyIdx = toSamplingFrequencyIndex (sampleRate); // I/O sample rate as usacSamplingFrequencyIndex + m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex + m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame m_indepPeriod = (indepPeriod == 0 ? UINT_MAX : indepPeriod); // RAP, signaled using usacIndependencyFlag #if !RESTRICT_TO_AAC @@ -1841,6 +1858,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch { m_bandwidCurr[ch] = 0; m_bandwidPrev[ch] = 0; + m_coreSignals[ch] = nullptr; m_mdctQuantMag[ch] = nullptr; m_mdctSignals[ch] = nullptr; m_mdstSignals[ch] = nullptr; @@ -1873,6 +1891,7 @@ ExhaleEncoder::~ExhaleEncoder () // free allocated signal buffers for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) { + if (m_shiftValSBR > 0) MFREE (m_coreSignals[ch]); MFREE (m_mdctQuantMag[ch]); MFREE (m_mdctSignals[ch]); MFREE (m_mdstSignals[ch]); @@ -1891,7 +1910,7 @@ ExhaleEncoder::~ExhaleEncoder () unsigned ExhaleEncoder::encodeLookahead () { const unsigned nChannels = toNumChannels (m_channelConf); - const unsigned nSamplesInFrame = toFrameLength (m_frameLength); + const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR; const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead const int32_t* chSig = m_pcm24Data; unsigned ch, s; @@ -1921,11 +1940,12 @@ unsigned ExhaleEncoder::encodeLookahead () *(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3]; *(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9); } + if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, (nSamplesInFrame >> 2) * sizeof (int32_t)); } // set initial temporal channel statistic to something meaningful before first coded frame - m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame); - + m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame, + m_shiftValSBR, m_coreSignals); // default lfeChannelIndex if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform { return 2; // internal error in temporal processing @@ -1945,7 +1965,7 @@ unsigned ExhaleEncoder::encodeLookahead () unsigned ExhaleEncoder::encodeFrame () { const unsigned nChannels = toNumChannels (m_channelConf); - const unsigned nSamplesInFrame = toFrameLength (m_frameLength); + const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR; const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead const int32_t* chSig = m_pcm24Data; unsigned ch, s; @@ -1955,6 +1975,14 @@ unsigned ExhaleEncoder::encodeFrame () { memcpy (&m_timeSignals[ch][0], &m_timeSignals[ch][nSamplesInFrame], nSamplesInFrame * sizeof (int32_t)); memcpy (&m_timeSignals[ch][nSamplesInFrame], &m_timeSignals[ch][2 * nSamplesInFrame], (nSamplesTempAna - nSamplesInFrame) * sizeof (int32_t)); + + if (m_shiftValSBR > 0) + { + const unsigned nSmpInFrame = toFrameLength (m_frameLength); // core coder frame length + + memcpy (&m_coreSignals[ch][0], &m_coreSignals[ch][nSmpInFrame], nSmpInFrame * sizeof (int32_t)); + memcpy (&m_coreSignals[ch][nSmpInFrame], &m_coreSignals[ch][2 * nSmpInFrame], (nSamplesInFrame >> 2) * sizeof (int32_t)); + } } // copy nSamplesInFrame external channel-interleaved samples into internal channel buffers @@ -1987,7 +2015,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned specSigBufSize = nSamplesInFrame * sizeof (int32_t); - const unsigned timeSigBufSize = ((nSamplesInFrame * 41) >> 4) * sizeof (int32_t); // core-codec delay*4 + const unsigned timeSigBufSize = (((nSamplesInFrame << m_shiftValSBR) * 41) >> 4) * sizeof (int32_t); // core-codec delay*4 const unsigned char chConf = m_channelConf; unsigned errorValue = 0; // no error @@ -2008,7 +2036,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin { errorValue |= 64; } - if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> 12) + 2)) + if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> (m_shiftValSBR > 0 ? 11 : 12)) + 2)) { errorValue |= 32; } @@ -2036,7 +2064,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif - audioConfigBuffer); + m_shiftValSBR, audioConfigBuffer); if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes errorValue = (errorValue == 0 ? 1 : 0); } @@ -2061,6 +2089,20 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t)); #endif // allocate all signal buffers + if (m_shiftValSBR > 0) + { + if (m_shiftValSBR > 1) + { + return (errorValue | 4); // >2:1 not supported at the moment + } + else for (unsigned ch = 0; ch < nChannels; ch++) + { + if ((m_coreSignals[ch] = (int32_t*) malloc (timeSigBufSize >> m_shiftValSBR)) == nullptr) + { + errorValue |= 4; + } + } + } for (unsigned ch = 0; ch < nChannels; ch++) { if ((m_entropyCoder[ch].initCodingMemory (nSamplesInFrame) > 0) || @@ -2106,7 +2148,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif - audioConfigBuffer); + m_shiftValSBR, audioConfigBuffer); if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes errorValue = (errorValue == 0 ? 1 : 0); diff --git a/src/lib/exhaleEnc.h b/src/lib/exhaleEnc.h index ac12994..a298729 100644 --- a/src/lib/exhaleEnc.h +++ b/src/lib/exhaleEnc.h @@ -67,6 +67,7 @@ private: BitAllocator m_bitAllocator; // for scale factor init uint8_t m_bitRateMode; USAC_CCI m_channelConf; + int32_t* m_coreSignals[USAC_MAX_NUM_CHANNELS]; CoreCoderData* m_elementData[USAC_MAX_NUM_ELEMENTS]; EntropyCoder m_entropyCoder[USAC_MAX_NUM_CHANNELS]; uint32_t m_frameCount; @@ -95,6 +96,7 @@ private: uint16_t m_sfbLoudMem[2][26][32]; // loudness mem #endif SfbQuantizer m_sfbQuantizer; // powerlaw quantization + uint8_t m_shiftValSBR; // SBR ratio for shifting SpecAnalyzer m_specAnalyzer; // for spectral analysis uint32_t m_specAnaCurr[USAC_MAX_NUM_CHANNELS]; uint8_t m_specFlatPrev[USAC_MAX_NUM_CHANNELS]; diff --git a/src/lib/tempAnalysis.cpp b/src/lib/tempAnalysis.cpp index 8e73c60..cac85f8 100644 --- a/src/lib/tempAnalysis.cpp +++ b/src/lib/tempAnalysis.cpp @@ -11,6 +11,14 @@ #include "exhaleLibPch.h" #include "tempAnalysis.h" +static const int16_t lffc2x[65] = { // low-frequency filter coefficients + // 269-pt. sinc windowed by 0.409 * cos(0*pi.*t) - 0.5 * cos(2*pi.*t) + 0.091 * cos(4*pi.*t) + 17887, -27755, 16590, -11782, 9095, -7371, 6166, -5273, 4582, -4029, 3576, -3196, 2873, + -2594, 2350, -2135, 1944, -1773, 1618, -1478, 1351, -1235, 1129, -1032, 942, -860, 784, + -714, 650, -591, 536, -485, 439, -396, 357, -321, 287, -257, 229, -204, 181, -160, 141, + -124, 108, -95, 82, -71, 61, -52, 44, -37, 31, -26, 21, -17, 14, -11, 8, -6, 5, -3, 2, -1, 1 +}; + // static helper functions static unsigned updateAbsStats (const int32_t* const chSig, const int nSamples, unsigned* const maxAbsVal, int16_t* const maxAbsIdx) { @@ -105,13 +113,16 @@ void TempAnalyzer::getTransientAndPitch (int16_t transIdxAndPitch[USAC_MAX_NUM_C } unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_MAX_NUM_CHANNELS], const unsigned nChannels, - const int nSamplesInFrame, const unsigned lookaheadOffset, - const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel + const int nSamplesInFrame, const unsigned lookaheadOffset, const uint8_t sbrShift, + int32_t* const lrCoreTimeSignals[USAC_MAX_NUM_CHANNELS] /*= nullptr*/, // if using SBR + const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel { + const bool applyResampler = (sbrShift > 0 && lrCoreTimeSignals != nullptr); const int halfFrameOffset = nSamplesInFrame >> 1; + const int resamplerOffset = (int) lookaheadOffset - 128; - if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || - (nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 2048) || (lookaheadOffset == 0)) + if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || (sbrShift > 1) || + (nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift)) { return 1; } @@ -134,6 +145,26 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M unsigned uR1 = abs (chSig[splitPtR - 1] - chSigM1[splitPtR - 1]); unsigned u; // temporary value - register? + if (applyResampler && lrCoreTimeSignals[ch] != nullptr) // downsampler + { + /*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift]; // low-rate, + const int32_t* hrSig = &timeSignals[ch][resamplerOffset]; // high-rate input time signal + + for (int i = nSamplesInFrame >> sbrShift; i > 0; i--, lrSig++, hrSig += 2) + { + int64_t r = ((int64_t) hrSig[0] << 17) + (hrSig[-1] + (int64_t) hrSig[1]) * -2*SHRT_MIN; + int16_t s; + + for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lffc2x[--u]; + + *lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass and low-rate +// TODO: bandpass + if (*lrSig < -8388608) *lrSig = -8388608; + else + if (*lrSig > 8388607) *lrSig = 8388607; + } + } + if (ch == lfeChannelIndex) // no analysis { m_tempAnaStats[ch] = 0; // flat/stationary frame diff --git a/src/lib/tempAnalysis.h b/src/lib/tempAnalysis.h index d6db8a9..ec0c8b4 100644 --- a/src/lib/tempAnalysis.h +++ b/src/lib/tempAnalysis.h @@ -39,7 +39,8 @@ public: void getTempAnalysisStats (uint32_t avgTempAnaStats[USAC_MAX_NUM_CHANNELS], const unsigned nChannels); void getTransientAndPitch (int16_t transIdxAndPitch[USAC_MAX_NUM_CHANNELS], const unsigned nChannels); unsigned temporalAnalysis (const int32_t* const timeSignals[USAC_MAX_NUM_CHANNELS], const unsigned nChannels, - const int nSamplesInFrame, const unsigned lookaheadOffset, + const int nSamplesInFrame, const unsigned lookaheadOffset, const uint8_t sbrShift, + int32_t* const lrCoreTimeSignals[USAC_MAX_NUM_CHANNELS] = nullptr, // if using SBR const unsigned lfeChannelIndex = USAC_MAX_NUM_CHANNELS); // to skip an LFE channel }; // TempAnalyzer