add SBR bit syntax

This commit is contained in:
Christian R. Helmrich 2020-11-10 20:00:02 +01:00
parent 8b56192418
commit 1259070c19
11 changed files with 349 additions and 56 deletions

View File

@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
endif()
project(exhale VERSION 1.0.8 LANGUAGES CXX)
project(exhale VERSION 1.1.0 LANGUAGES CXX)
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release

View File

@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
similar to the 3-clause BSD license but modified to address specific
aspects dictated by the nature and the output of this application.
The license text and release notes for the current version 1.0.8 can
The license text and release notes for the current version 1.1RC can
be found in the `include` subdirectory of the exhale distribution.

View File

@ -12,8 +12,8 @@
# define EXHALELIB_VERSION_MAJOR "1"
#endif
#ifndef EXHALELIB_VERSION_MINOR
# define EXHALELIB_VERSION_MINOR "0"
# define EXHALELIB_VERSION_MINOR "1"
#endif
#ifndef EXHALELIB_VERSION_BUGFIX
# define EXHALELIB_VERSION_BUGFIX ".8" // "RC" or ".0", ".1", ...
# define EXHALELIB_VERSION_BUGFIX "RC" // "RC" or ".0", ".1", ...
#endif

View File

@ -54,6 +54,7 @@
#define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization
#define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS
#define ENABLE_RESAMPLING 1 // 1: automatic input up- and downsampling
#define ENABLE_SIMPLE_SBR 1 // 1: basic 2:1 low-rate SBR functionality
#define IGNORE_WAV_LENGTH 0 // 1: ignore input size indicators (nasty)
#define XHE_AAC_LOW_DELAY 0 // 1: allow encoding with 768 frame length
@ -376,14 +377,16 @@ int main (const int argc, char* argv[])
#if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64)
fprintf_s (stdout, " preset\t= # (0-9) low-complexity standard compliant xHE-AAC at 16ú#+48 kbit/s\n");
# if XHE_AAC_LOW_DELAY
// fprintf_s (stdout, " \t (a-i) low-complexity compatible xHE-AAC with BE at 16ú#+48 kbit/s\n");
fprintf_s (stdout, " \t (A-I) 41ms low-delay compatible xHE-AAC with BE at 16ú#+48 kbit/s\n");
fprintf_s (stdout, " \t (A-J) 41ms low-delay compatible xHE-AAC with BE at 16ú#+48 kbit/s\n");
# elif ENABLE_SIMPLE_SBR
fprintf_s (stdout, " \t (a-g) low-complexity compliant xHE-AAC with SBR at 12ú#+36 kbit/s\n");
# endif
#else
fprintf_s (stdout, " preset\t= # (0-9) low-complexity standard compliant xHE-AAC at 16*#+48 kbit/s\n");
# if XHE_AAC_LOW_DELAY
// fprintf_s (stdout, " \t (a-i) low-complexity compatible xHE-AAC with BE at 16*#+48 kbit/s\n");
fprintf_s (stdout, " \t (A-I) 41ms low-delay compatible xHE-AAC with BE at 16*#+48 kbit/s\n");
fprintf_s (stdout, " \t (A-J) 41ms low-delay compatible xHE-AAC with BE at 16*#+48 kbit/s\n");
# elif ENABLE_SIMPLE_SBR
fprintf_s (stdout, " \t (a-g) low-complexity compliant xHE-AAC with SBR at 12*#+36 kbit/s\n");
# endif
#endif
fprintf_s (stdout, "\n inputWaveFile.wav lossless WAVE audio input, read from stdin if not specified\n\n");
@ -412,15 +415,21 @@ int main (const int argc, char* argv[])
// check preset mode, derive coder config
#if XHE_AAC_LOW_DELAY
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'i') || (*argv[1] >= 'A' && *argv[1] <= 'I'))
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'A' && *argv[1] <= 'J'))
#elif ENABLE_SIMPLE_SBR
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'g'))
#else
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'i'))
if (*argv[1] >= '0' && *argv[1] <= '9')
#endif
{
i = (uint16_t) argv[1][0];
compatibleExtensionFlag = (i & 0x40) >> 6;
#if ENABLE_SIMPLE_SBR
coreSbrFrameLengthIndex = (i > 0x60 ? 5 : (i & 0x20) >> 5);
#else
coreSbrFrameLengthIndex = (i & 0x20) >> 5;
variableCoreBitRateMode = (i & 0x0F);
#endif
variableCoreBitRateMode = (i & 0x0F) - (i >> 6);
}
else if (*argv[1] == '#') // default mode
{
@ -430,9 +439,15 @@ int main (const int argc, char* argv[])
{
#if XHE_AAC_LOW_DELAY
# ifdef EXHALE_APP_WCHAR
fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or A-I.\n\n", argv[1]);
#else
fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or A-I.\n\n", argv[1]);
fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or A-J.\n\n", argv[1]);
# else
fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or A-J.\n\n", argv[1]);
# endif
#elif ENABLE_SIMPLE_SBR
# ifdef EXHALE_APP_WCHAR
fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or a-g.\n\n", argv[1]);
# else
fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or a-g.\n\n", argv[1]);
# endif
#else
# ifdef EXHALE_APP_WCHAR
@ -522,10 +537,19 @@ int main (const int argc, char* argv[])
if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : _filelengthi64 (inFileHandle)) != 0) ||
#else // Linux, MacOS, Unix
if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : lseek (inFileHandle, 0, 2 /*SEEK_END*/)) != 0) ||
#endif
#if ENABLE_SIMPLE_SBR
(wavReader.getSampleRate () >= 1000 && wavReader.getSampleRate () < 24000 && coreSbrFrameLengthIndex >= 3) ||
#endif
(wavReader.getNumChannels () >= 7))
{
fprintf_s (stderr, " ERROR while trying to open WAVE file: invalid or unsupported audio format!\n\n");
#if ENABLE_SIMPLE_SBR
if (wavReader.getSampleRate () >= 1000 && wavReader.getSampleRate () < 24000 && coreSbrFrameLengthIndex >= 3)
{
fprintf_s (stderr, " The sampling rate is %d kHz but xHE-AAC with SBR requires at least 24 kHz.\n\n", wavReader.getSampleRate () / 1000);
}
#endif
i = 8192; // return value
goto mainFinish; // audio format invalid
@ -555,6 +579,9 @@ int main (const int argc, char* argv[])
}
if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900
#if ENABLE_SIMPLE_SBR
&& (coreSbrFrameLengthIndex < 3)
#endif
#if ENABLE_RESAMPLING
&& (variableCoreBitRateMode > 1 || wavReader.getSampleRate () != 48000)
#endif
@ -566,7 +593,11 @@ int main (const int argc, char* argv[])
goto mainFinish; // ask for resampling
}
#if ENABLE_SIMPLE_SBR
if (wavReader.getSampleRate () > 32000 && coreSbrFrameLengthIndex < 3 && variableCoreBitRateMode <= 1)
#else
if (wavReader.getSampleRate () > 32000 && variableCoreBitRateMode <= 1)
#endif
{
#if ENABLE_RESAMPLING
if (wavReader.getSampleRate () == 48000)
@ -613,8 +644,8 @@ int main (const int argc, char* argv[])
// enforce executable specific constraints
i = __min (USHRT_MAX, wavReader.getSampleRate ());
if ((wavReader.getNumChannels () > 3) && (i == 57600 || i == 51200 || i == 40000 || i == 38400 || i == 34150 ||
i == 28800 || i == 25600 || i == 20000 || i == 19200 || i == 17075 || i == 14400 || i == 12800 || i == 9600))
if ((wavReader.getNumChannels () > 3 || coreSbrFrameLengthIndex >= 3) && (i == 57600 || i == 51200 || i == 40000 || i == 38400 ||
i == 34150 || i == 28800 || i == 25600 || i == 20000 || i == 19200 || i == 17075 || i == 14400 || i == 12800 || i == 9600))
{
fprintf_s (stderr, " ERROR: exhale does not support %d-channel coding with %d Hz sampling rate.\n\n", wavReader.getNumChannels (), i);
@ -626,7 +657,12 @@ int main (const int argc, char* argv[])
const unsigned inSampDepth = wavReader.getBitDepth ();
#if ENABLE_RESAMPLING
const bool enableUpsampler = eaInitUpsampler2x (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
# if ENABLE_SIMPLE_SBR
const bool enableResampler = (coreSbrFrameLengthIndex >= 3 ? false : // no 3:2 downsampling needed when using SBR
eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels));
# else
const bool enableResampler = eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
# endif
const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : (enableResampler ? startLength : frameLength));
const unsigned inFrameSize = (enableResampler ? startLength : frameLength) * sizeof (int32_t); // max buffer size
const unsigned resampRatio = (enableResampler ? 3 : 1); // for resampling ratio
@ -671,7 +707,7 @@ int main (const int argc, char* argv[])
const unsigned sampleRate = wavReader.getSampleRate ();
#endif
const unsigned indepPeriod = (sampleRate < 48000 ? (sampleRate - 320) / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17);
const unsigned mod3Percent = unsigned ((expectLength * (3 + (coreSbrFrameLengthIndex & 3))) >> 17);
uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0);
uint32_t br, bwMax = 0; // br will be used to hold bytes read and/or bit-rate
uint32_t headerRes = 0;
@ -696,7 +732,11 @@ int main (const int argc, char* argv[])
memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size
i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size
#if ENABLE_SIMPLE_SBR
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength + (coreSbrFrameLengthIndex >= 3 ? 962 : 0),
#else
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
#endif
indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
{
fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i);
@ -710,7 +750,12 @@ int main (const int argc, char* argv[])
if (*argv[1] != '#') // user-def. mode
{
fprintf_s (stdout, " Encoding %d-kHz %d-channel %d-bit WAVE to low-complexity xHE-AAC at %d kbit/s\n\n",
#if ENABLE_SIMPLE_SBR
sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (((24 + variableCoreBitRateMode * 8) *
(coreSbrFrameLengthIndex >= 3 ? 3 : 4)) >> 2));
#else
sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (24 + variableCoreBitRateMode * 8));
#endif
}
if (!readStdin && (mod3Percent > 0))
{
@ -926,6 +971,13 @@ int main (const int argc, char* argv[])
bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), (time (nullptr) + 2082844800) & UINT_MAX,
(i == 0) && (numChannels < 7) ? outAuData : nullptr);
// print out collected file statistics
#if ENABLE_SIMPLE_SBR
if (coreSbrFrameLengthIndex >= 3)
{
fprintf_s (stdout, " Done, actual average incl. SBR data %.2f kbit/s\n\n", (float) br * 0.001f);
}
else
#endif
fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f);
if (numChannels < 7)
{

View File

@ -13,7 +13,7 @@
0 ICON "exhaleApp.ico"
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,0,8
FILEVERSION 1,1,0
BEGIN
BLOCK "StringFileInfo"
BEGIN

View File

@ -112,6 +112,117 @@ unsigned BitStreamWriter::writeChannelWiseIcsInfo (const IcsInfo& icsInfo) //
return 9;
}
unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1,
const bool indepFlag /*= false*/)
{
const unsigned nb = (sbrDataCh0 != nullptr ? 2 * ((sbrDataCh0[0] >> 23) & 1) + 2 : 0); // noise bits/ch = 2 or 4
const int16_t res = (nb > 0 ? sbrDataCh0[0] >> 29 : 0); // short bs_amp_res
const bool stereo = (sbrDataCh1 != nullptr);
const bool couple = (stereo ? ((sbrDataCh1[0] >> 23) & 1) : false);
unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb, i, tmpCh0, tmpCh1;
if (nb == 0) return 0;
tmpCh0 = (sbrDataCh0[0] >> 21) & 3;
tmpCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3;
if (stereo) m_auBitStream.write (couple ? 1 : 0, 1); // _coupling
// sbr_grid(), assumes bs_frame_class[ch] == 0, i.e. class FIXFIX
m_auBitStream.write ((sbrDataCh0[0] >> 20) & 7, 5); // class data
if (stereo && !couple) m_auBitStream.write ((sbrDataCh1[0] >> 20) & 7, 5);
// sbr_dtdf()
i = (1u << tmpCh0) - (indepFlag ? 1 : 0); // actual bs_num_env[0]
if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 12) & 255, i); // _df_env
bitCount += i;
i = (tmpCh0 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);// bs_num_noise[0]
if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 4) & 255, i); // df_noise
bitCount += i;
if (stereo)
{
i = (1u << tmpCh1) - (indepFlag ? 1 : 0);
if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 12) & 255, i);
bitCount += i;
i = (tmpCh1 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);
if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 4) & 255, i);
bitCount += i;
}
// sbr_invf(), assumes dflt_noise_bands < 3, i.e. 1-2 noise bands
i = 6 * nb - 9; // bitmask = 3 or 15
m_auBitStream.write (sbrDataCh0[0] & i, nb); // bs_invf_mode[0][]
if (stereo && !couple) m_auBitStream.write (sbrDataCh1[0] & i, nb);
// sbr_envelope() for mono/left channel, assumes bs_pvc_mode == 0
for (i = 1; i <= (1u << tmpCh0); i++) // dt loop
{
const uint8_t bits = (res > 0 && tmpCh0 > 0 ? 6 : 7);
m_auBitStream.write (15/*sbrDataCh0[i] & 127*/, bits); // bs_data_env
bitCount += bits;
m_auBitStream.write (sbrDataCh0[i] >> 7, 5<<1); // TODO: VLC words
bitCount += 5<<1;
}
if (stereo && !couple)
{
for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope() dt loop
{
const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 6 : 7);
m_auBitStream.write (sbrDataCh1[i] & 127, bits);
bitCount += bits;
m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words
bitCount += 5<<1;
}
}
for (i = (tmpCh0 > 0 ? 2 : 1); i > 0; i--) // sbr_noise() dt loop
{
m_auBitStream.write (31/*(sbrDataCh0[9] >> (12 * i)) & 31*/, 5); // _data_noise
bitCount += 5;
if (nb == 4)
{
m_auBitStream.write ((sbrDataCh0[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word
bitCount++;
}
}
if (stereo)
{
if (couple)
{
for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope dt loop
{
const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 5 : 6);
m_auBitStream.write (sbrDataCh1[i] & 63, bits);
bitCount += bits;
m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words
bitCount += 5<<1;
}
}
for (i = (tmpCh1 > 0 ? 2 : 1); i > 0; i--) // sbr_noise dt loop
{
m_auBitStream.write ((sbrDataCh1[9] >> (12 * i)) & 31, 5);
bitCount += 5;
if (nb == 4)
{
m_auBitStream.write ((sbrDataCh1[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word
bitCount++;
}
}
}
m_auBitStream.write (0, 1); // fixed bs_add_harmonic_flag[0] = 0
if (stereo) m_auBitStream.write (0, 1);
return bitCount;
}
unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts)
{
const unsigned numWindows = (eightShorts ? 8 : 1);
@ -514,8 +625,10 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
#if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
#endif
unsigned char* const audioConfig)
const uint8_t sbrRatioShiftValue, unsigned char* const audioConfig)
{
const uint8_t fli = (sbrRatioShiftValue == 0 ? 1 /*no SBR*/ : __min (2, sbrRatioShiftValue) + 2);
const int8_t usfi = __max (0, samplingFrequencyIndex - 3 * sbrRatioShiftValue); // TODO: non-standard sampling rates
unsigned bitCount = 37;
if ((elementType == nullptr) || (audioConfig == nullptr) || (chConfigurationIndex >= USAC_MAX_NUM_ELCONFIGS) ||
@ -532,20 +645,20 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
m_auBitStream.write (0x7CA, 11); // audio object type (AOT) 32 (esc) + 10 = 42
if (samplingFrequencyIndex < AAC_NUM_SAMPLE_RATES)
{
m_auBitStream.write (samplingFrequencyIndex, 4);
m_auBitStream.write (usfi, 4);
}
else
{
m_auBitStream.write (0xF, 4); // esc
m_auBitStream.write (toSamplingRate (samplingFrequencyIndex), 24);
m_auBitStream.write (toSamplingRate (usfi), 24);
bitCount += 24;
}
// for multichannel audio, refer to channel mapping of AotSpecificConfig below
m_auBitStream.write (chConfigurationIndex > 2 ? 0 : chConfigurationIndex, 4);
// --- AotSpecificConfig(): UsacConfig()
m_auBitStream.write (samplingFrequencyIndex, 5); // usacSamplingFrequencyIndex
m_auBitStream.write (shortFrameLength ? 0 : 1, 3); // coreSbrFrameLengthIndex
m_auBitStream.write (usfi, 5); // usacSamplingFrequencyIndex (after SBR dec.!)
m_auBitStream.write (shortFrameLength ? 0 : fli, 3);// coreSbrFrameLengthIndex
m_auBitStream.write (chConfigurationIndex, 5); // channelConfigurationIndex
m_auBitStream.write (numElements - 1, 4); // numElements in UsacDecoderConfig
@ -561,6 +674,22 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
m_auBitStream.write ((tw_mdct[el] ? 2 : 0) | (noiseFilling[el] ? 1 : 0), 2);
#endif
bitCount += 2;
if (sbrRatioShiftValue > 0) // sbrRatioIndex > 0: SbrConfig
{
const uint32_t sf = (samplingFrequencyIndex == 6 || samplingFrequencyIndex < 5 ? 10 : (samplingFrequencyIndex < 8 ? 9 : 8)); // bs_stop_freq
m_auBitStream.write (0, 3); // fix harmonicSBR, bs_interTes, bs_pvc = 0
bitCount += 13; // incl. SbrDfltHeader following hereafter
m_auBitStream.write (15, 4); // 11025 @ 44.1, 11625 @ 48, 15000 @ 64 kHz
m_auBitStream.write (sf, 4); // 16193 @ 44.1, 18375 @ 48, 22500 @ 64 kHz
m_auBitStream.write ( 0, 2); // fix dflt_header_extra* = 0
if (elementType[el] == ID_USAC_CPE)
{
m_auBitStream.write (0, 2); // fix stereoConfigIndex = 0
bitCount += 2;
}
}
}
} // for el
@ -591,7 +720,7 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
bitCount += (8 - m_auBitStream.heldBitCount) & 7;
writeByteAlignment (); // flush bytes
memcpy (audioConfig, &m_auBitStream.stream.front (), __min (16, bitCount >> 3));
memcpy (audioConfig, &m_auBitStream.stream.front (), __min (15u + fli, bitCount >> 3));
return (bitCount >> 3); // byte count
}
@ -603,11 +732,12 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
#if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
#endif
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
unsigned char* const accessUnit, const unsigned nSamplesInFrame /*= 1024*/)
{
unsigned bitCount = 1, ci = 0;
if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) ||
if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || (sbrInfoAndData == nullptr) ||
(mdctSignals == nullptr) || (mdctQuantMag == nullptr) || (accessUnit == nullptr) || (nSamplesInFrame > 2048) ||
#if !RESTRICT_TO_AAC
(noiseFilling == nullptr) || (tw_mdct == nullptr) ||
@ -644,6 +774,22 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
tw_mdct[el], noiseFilling[el],
#endif
usacIndependencyFlag);
if (sbrRatioShiftValue > 0) // UsacSbrData()
{
if (usacIndependencyFlag)
{
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo()
m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1
bitCount += 7;
}
else
{
m_auBitStream.write (0, 1); // fix sbrInfoPresent = 0
bitCount++;
}
bitCount += writeChannelWiseSbrData (sbrInfoAndData[ci], nullptr, // L (mono) only, no R
usacIndependencyFlag);
}
ci++;
break;
}
@ -670,6 +816,22 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
tw_mdct[el], noiseFilling[el],
#endif
usacIndependencyFlag);
if (sbrRatioShiftValue > 0) // UsacSbrData()
{
if (usacIndependencyFlag)
{
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo()
m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1
bitCount += 7;
}
else
{
m_auBitStream.write (0, 1); // fix sbrInfoPresent = 0
bitCount++;
}
bitCount += writeChannelWiseSbrData (sbrInfoAndData[ci - 1], sbrInfoAndData[ci], // L, R
usacIndependencyFlag);
}
ci++;
break;
}

View File

@ -34,6 +34,8 @@ private:
// helper functions
void writeByteAlignment (); // write 0s for byte alignment
unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo); // ics_info()
unsigned writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1,
const bool indepFlag = false);
unsigned writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts);
unsigned writeFDChannelStream (const CoreCoderData& elData, EntropyCoder& entrCoder, const unsigned ch,
const int32_t* const mdctSignal, const uint8_t* const mdctQuantMag,
@ -60,7 +62,7 @@ public:
#if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
#endif
unsigned char* const audioConfig);
const uint8_t sbrRatioShiftValue, unsigned char* const audioConfig);
unsigned createAudioFrame (CoreCoderData** const elementData, EntropyCoder* const entropyCoder,
int32_t** const mdctSignals, uint8_t** const mdctQuantMag,
const bool usacIndependencyFlag, const uint8_t numElements,
@ -68,6 +70,7 @@ public:
#if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
#endif
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
unsigned char* const accessUnit, const unsigned nSamplesInFrame = 1024);
}; // BitStreamWriter

View File

@ -785,11 +785,13 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
const uint32_t maxSfbLong = (samplingRate < 37566 ? MAX_NUM_SWB_LONG : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3, m_bitRateMode)
const uint32_t maxSfbLong = (samplingRate < 37566 || m_shiftValSBR > 0 ? m_numSwbLong // was MAX_NUM_SWB_LONG
: brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint32_t scaleSBR = (m_shiftValSBR > 0 ? 8 : 0); // reduces core rate by 25 %
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
: (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
const uint64_t scaleBr = (m_bitRateMode == 0 ? __min (32, 3 + (samplingRate >> 10) + (samplingRate >> 13) - (nChannels >> 1))
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR;
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS];
@ -1081,13 +1083,13 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS];
uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208};
unsigned ci = 0, s; // running index
unsigned errorValue = (coeffMagn == nullptr ? 1 : 0);
// get means of spectral and temporal flatness for every channel
m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);
//m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
if (m_bitRateMode == 0 && samplingRate >= 23004) m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
for (unsigned el = 0; el < m_numElements; el++) // element loop
{
@ -1097,7 +1099,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs
{
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
// meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
}
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
@ -1181,7 +1183,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
const uint8_t maxSfbLong = (samplingRate < 37566 ? 63 - (samplingRate >> 11) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint8_t maxSfbShort = (samplingRate < 37566 ? 21 - (samplingRate >> 12) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5));
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) +
(shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
const unsigned targetBitCount25 = ((60000 + 20000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
unsigned b = grpData.sfbsPerGroup - 1;
@ -1212,7 +1215,10 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
}
}
#endif
b = lastSfb;
// coarse-quantize near-Nyquist SFB with SBR @ 48-64 kHz
b = 40 + (samplingRate >> 12);
if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb;
while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) /*coarse quantization*/ &&
((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count estim. available for grouped spectrum*/)))
{
@ -1308,6 +1314,11 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
// NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed
if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
#endif
if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data
{
memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t)); // TODO
m_coreSignals[ci][0] = 1 << 20; // fix bs_freq_res = high
}
ci++;
}
} // for el
@ -1317,7 +1328,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
#if !RESTRICT_TO_AAC
m_timeWarping, m_noiseFilling,
#endif
m_outAuData, nSamplesInFrame)); // returns AU size
m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
}
unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS and SFB data
@ -1597,7 +1608,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects of ics_info()
{
const unsigned nChannels = toNumChannels (m_channelConf);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
unsigned ci = 0; // running ch index
@ -1608,8 +1619,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
m_tempAnalyzer.getTransientAndPitch (m_tranLocCurr, nChannels);
// temporal analysis for look-ahead signal (central nSamplesInFrame samples of next frame)
errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna, lfeChannelIndex);
errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna,
m_shiftValSBR, m_coreSignals, lfeChannelIndex);
// get temporal channel statistics for next frame, used for window length/overlap decision
m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels);
m_tempAnalyzer.getTransientAndPitch (m_tranLocNext, nChannels);
@ -1654,10 +1665,10 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
const USAC_WSEQ wsPrev = icsPrev.windowSequence;
USAC_WSEQ& wsCurr = icsCurr.windowSequence;
// get temporal signal statistics, then determine overlap config. for the next frame
const unsigned plCurr = abs (m_tranLocCurr[ci]) & 1023;
const unsigned plCurr = abs (m_tranLocCurr[ci]) & ((1024 << m_shiftValSBR) - 1);
const unsigned sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX;
const unsigned tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX;
const unsigned plNext = abs (m_tranLocNext[ci]) & 1023;
const unsigned plNext = abs (m_tranLocNext[ci]) & ((1024 << m_shiftValSBR) - 1);
const unsigned sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX;
const unsigned tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX;
const unsigned tThresh = UCHAR_MAX * (__max (plCurr, plNext) < 614 /*0.6 * 1024*/ ? 16 : 15 - (m_bitRateMode >> 2));
@ -1775,6 +1786,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
{
const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch];
const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
const int32_t* timeSig = (m_shiftValSBR > 0 ? m_coreSignals[ci] : m_timeSignals[ci]);
const USAC_WSEQ wsCurr = icsCurr.windowSequence;
const bool eightShorts = (wsCurr == EIGHT_SHORT);
SfbGroupData& grpData = coreConfig.groupingData[ch];
@ -1782,7 +1794,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
grpData.numWindowGroups = (eightShorts ? NUM_WINDOW_GROUPS : 1); // fill groupingData
memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));
errorValue |= m_transform.applyMCLT (m_timeSignals[ci], eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
errorValue |= m_transform.applyMCLT (timeSig, eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]);
m_scaleFacData[ci++] = &grpData;
}
@ -1809,9 +1821,14 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
m_channelConf = CCI_2_CHM; // passing numChannels = 0 to ExhaleEncoder is interpreted as 2-ch dual-mono
}
m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig
#if 1
m_shiftValSBR = (frameLength >= 1536 ? 1 : 0);
#else
m_shiftValSBR = 0;
#endif
m_frameCount = 0;
m_frameLength = (USAC_CCFL) frameLength; // coreCoderFrameLength, signaled using coreSbrFrameLengthIndex
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate); // I/O sample rate as usacSamplingFrequencyIndex
m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex
m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame
m_indepPeriod = (indepPeriod == 0 ? UINT_MAX : indepPeriod); // RAP, signaled using usacIndependencyFlag
#if !RESTRICT_TO_AAC
@ -1841,6 +1858,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
{
m_bandwidCurr[ch] = 0;
m_bandwidPrev[ch] = 0;
m_coreSignals[ch] = nullptr;
m_mdctQuantMag[ch] = nullptr;
m_mdctSignals[ch] = nullptr;
m_mdstSignals[ch] = nullptr;
@ -1873,6 +1891,7 @@ ExhaleEncoder::~ExhaleEncoder ()
// free allocated signal buffers
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
{
if (m_shiftValSBR > 0) MFREE (m_coreSignals[ch]);
MFREE (m_mdctQuantMag[ch]);
MFREE (m_mdctSignals[ch]);
MFREE (m_mdstSignals[ch]);
@ -1891,7 +1910,7 @@ ExhaleEncoder::~ExhaleEncoder ()
unsigned ExhaleEncoder::encodeLookahead ()
{
const unsigned nChannels = toNumChannels (m_channelConf);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
const int32_t* chSig = m_pcm24Data;
unsigned ch, s;
@ -1921,11 +1940,12 @@ unsigned ExhaleEncoder::encodeLookahead ()
*(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3];
*(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9);
}
if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, (nSamplesInFrame >> 2) * sizeof (int32_t));
}
// set initial temporal channel statistic to something meaningful before first coded frame
m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame);
m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame,
m_shiftValSBR, m_coreSignals); // default lfeChannelIndex
if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform
{
return 2; // internal error in temporal processing
@ -1945,7 +1965,7 @@ unsigned ExhaleEncoder::encodeLookahead ()
unsigned ExhaleEncoder::encodeFrame ()
{
const unsigned nChannels = toNumChannels (m_channelConf);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
const int32_t* chSig = m_pcm24Data;
unsigned ch, s;
@ -1955,6 +1975,14 @@ unsigned ExhaleEncoder::encodeFrame ()
{
memcpy (&m_timeSignals[ch][0], &m_timeSignals[ch][nSamplesInFrame], nSamplesInFrame * sizeof (int32_t));
memcpy (&m_timeSignals[ch][nSamplesInFrame], &m_timeSignals[ch][2 * nSamplesInFrame], (nSamplesTempAna - nSamplesInFrame) * sizeof (int32_t));
if (m_shiftValSBR > 0)
{
const unsigned nSmpInFrame = toFrameLength (m_frameLength); // core coder frame length
memcpy (&m_coreSignals[ch][0], &m_coreSignals[ch][nSmpInFrame], nSmpInFrame * sizeof (int32_t));
memcpy (&m_coreSignals[ch][nSmpInFrame], &m_coreSignals[ch][2 * nSmpInFrame], (nSamplesInFrame >> 2) * sizeof (int32_t));
}
}
// copy nSamplesInFrame external channel-interleaved samples into internal channel buffers
@ -1987,7 +2015,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
const unsigned nChannels = toNumChannels (m_channelConf);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned specSigBufSize = nSamplesInFrame * sizeof (int32_t);
const unsigned timeSigBufSize = ((nSamplesInFrame * 41) >> 4) * sizeof (int32_t); // core-codec delay*4
const unsigned timeSigBufSize = (((nSamplesInFrame << m_shiftValSBR) * 41) >> 4) * sizeof (int32_t); // core-codec delay*4
const unsigned char chConf = m_channelConf;
unsigned errorValue = 0; // no error
@ -2008,7 +2036,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
{
errorValue |= 64;
}
if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> 12) + 2))
if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> (m_shiftValSBR > 0 ? 11 : 12)) + 2))
{
errorValue |= 32;
}
@ -2036,7 +2064,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
#if !RESTRICT_TO_AAC
m_timeWarping, m_noiseFilling,
#endif
audioConfigBuffer);
m_shiftValSBR, audioConfigBuffer);
if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes
errorValue = (errorValue == 0 ? 1 : 0);
}
@ -2061,6 +2089,20 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t));
#endif
// allocate all signal buffers
if (m_shiftValSBR > 0)
{
if (m_shiftValSBR > 1)
{
return (errorValue | 4); // >2:1 not supported at the moment
}
else for (unsigned ch = 0; ch < nChannels; ch++)
{
if ((m_coreSignals[ch] = (int32_t*) malloc (timeSigBufSize >> m_shiftValSBR)) == nullptr)
{
errorValue |= 4;
}
}
}
for (unsigned ch = 0; ch < nChannels; ch++)
{
if ((m_entropyCoder[ch].initCodingMemory (nSamplesInFrame) > 0) ||
@ -2106,7 +2148,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
#if !RESTRICT_TO_AAC
m_timeWarping, m_noiseFilling,
#endif
audioConfigBuffer);
m_shiftValSBR, audioConfigBuffer);
if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes
errorValue = (errorValue == 0 ? 1 : 0);

View File

@ -67,6 +67,7 @@ private:
BitAllocator m_bitAllocator; // for scale factor init
uint8_t m_bitRateMode;
USAC_CCI m_channelConf;
int32_t* m_coreSignals[USAC_MAX_NUM_CHANNELS];
CoreCoderData* m_elementData[USAC_MAX_NUM_ELEMENTS];
EntropyCoder m_entropyCoder[USAC_MAX_NUM_CHANNELS];
uint32_t m_frameCount;
@ -95,6 +96,7 @@ private:
uint16_t m_sfbLoudMem[2][26][32]; // loudness mem
#endif
SfbQuantizer m_sfbQuantizer; // powerlaw quantization
uint8_t m_shiftValSBR; // SBR ratio for shifting
SpecAnalyzer m_specAnalyzer; // for spectral analysis
uint32_t m_specAnaCurr[USAC_MAX_NUM_CHANNELS];
uint8_t m_specFlatPrev[USAC_MAX_NUM_CHANNELS];

View File

@ -11,6 +11,14 @@
#include "exhaleLibPch.h"
#include "tempAnalysis.h"
static const int16_t lffc2x[65] = { // low-frequency filter coefficients
// 269-pt. sinc windowed by 0.409 * cos(0*pi.*t) - 0.5 * cos(2*pi.*t) + 0.091 * cos(4*pi.*t)
17887, -27755, 16590, -11782, 9095, -7371, 6166, -5273, 4582, -4029, 3576, -3196, 2873,
-2594, 2350, -2135, 1944, -1773, 1618, -1478, 1351, -1235, 1129, -1032, 942, -860, 784,
-714, 650, -591, 536, -485, 439, -396, 357, -321, 287, -257, 229, -204, 181, -160, 141,
-124, 108, -95, 82, -71, 61, -52, 44, -37, 31, -26, 21, -17, 14, -11, 8, -6, 5, -3, 2, -1, 1
};
// static helper functions
static unsigned updateAbsStats (const int32_t* const chSig, const int nSamples, unsigned* const maxAbsVal, int16_t* const maxAbsIdx)
{
@ -105,13 +113,16 @@ void TempAnalyzer::getTransientAndPitch (int16_t transIdxAndPitch[USAC_MAX_NUM_C
}
unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_MAX_NUM_CHANNELS], const unsigned nChannels,
const int nSamplesInFrame, const unsigned lookaheadOffset,
const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel
const int nSamplesInFrame, const unsigned lookaheadOffset, const uint8_t sbrShift,
int32_t* const lrCoreTimeSignals[USAC_MAX_NUM_CHANNELS] /*= nullptr*/, // if using SBR
const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel
{
const bool applyResampler = (sbrShift > 0 && lrCoreTimeSignals != nullptr);
const int halfFrameOffset = nSamplesInFrame >> 1;
const int resamplerOffset = (int) lookaheadOffset - 128;
if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 2048) || (lookaheadOffset == 0))
if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || (sbrShift > 1) ||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift))
{
return 1;
}
@ -134,6 +145,26 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
unsigned uR1 = abs (chSig[splitPtR - 1] - chSigM1[splitPtR - 1]);
unsigned u; // temporary value - register?
if (applyResampler && lrCoreTimeSignals[ch] != nullptr) // downsampler
{
/*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift]; // low-rate,
const int32_t* hrSig = &timeSignals[ch][resamplerOffset]; // high-rate input time signal
for (int i = nSamplesInFrame >> sbrShift; i > 0; i--, lrSig++, hrSig += 2)
{
int64_t r = ((int64_t) hrSig[0] << 17) + (hrSig[-1] + (int64_t) hrSig[1]) * -2*SHRT_MIN;
int16_t s;
for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lffc2x[--u];
*lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass and low-rate
// TODO: bandpass
if (*lrSig < -8388608) *lrSig = -8388608;
else
if (*lrSig > 8388607) *lrSig = 8388607;
}
}
if (ch == lfeChannelIndex) // no analysis
{
m_tempAnaStats[ch] = 0; // flat/stationary frame

View File

@ -39,7 +39,8 @@ public:
void getTempAnalysisStats (uint32_t avgTempAnaStats[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
void getTransientAndPitch (int16_t transIdxAndPitch[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
unsigned temporalAnalysis (const int32_t* const timeSignals[USAC_MAX_NUM_CHANNELS], const unsigned nChannels,
const int nSamplesInFrame, const unsigned lookaheadOffset,
const int nSamplesInFrame, const unsigned lookaheadOffset, const uint8_t sbrShift,
int32_t* const lrCoreTimeSignals[USAC_MAX_NUM_CHANNELS] = nullptr, // if using SBR
const unsigned lfeChannelIndex = USAC_MAX_NUM_CHANNELS); // to skip an LFE channel
}; // TempAnalyzer