mirror of
https://gitlab.com/ecodis/exhale.git
synced 2025-03-11 16:50:09 +01:00
add SBR bit syntax
This commit is contained in:
parent
8b56192418
commit
1259070c19
@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
endif()
|
||||
|
||||
|
||||
project(exhale VERSION 1.0.8 LANGUAGES CXX)
|
||||
project(exhale VERSION 1.1.0 LANGUAGES CXX)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
set(CMAKE_BUILD_TYPE Release
|
||||
|
@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
|
||||
similar to the 3-clause BSD license but modified to address specific
|
||||
aspects dictated by the nature and the output of this application.
|
||||
|
||||
The license text and release notes for the current version 1.0.8 can
|
||||
The license text and release notes for the current version 1.1RC can
|
||||
be found in the `include` subdirectory of the exhale distribution.
|
||||
|
||||
|
||||
|
@ -12,8 +12,8 @@
|
||||
# define EXHALELIB_VERSION_MAJOR "1"
|
||||
#endif
|
||||
#ifndef EXHALELIB_VERSION_MINOR
|
||||
# define EXHALELIB_VERSION_MINOR "0"
|
||||
# define EXHALELIB_VERSION_MINOR "1"
|
||||
#endif
|
||||
#ifndef EXHALELIB_VERSION_BUGFIX
|
||||
# define EXHALELIB_VERSION_BUGFIX ".8" // "RC" or ".0", ".1", ...
|
||||
# define EXHALELIB_VERSION_BUGFIX "RC" // "RC" or ".0", ".1", ...
|
||||
#endif
|
||||
|
@ -54,6 +54,7 @@
|
||||
#define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization
|
||||
#define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS
|
||||
#define ENABLE_RESAMPLING 1 // 1: automatic input up- and downsampling
|
||||
#define ENABLE_SIMPLE_SBR 1 // 1: basic 2:1 low-rate SBR functionality
|
||||
#define IGNORE_WAV_LENGTH 0 // 1: ignore input size indicators (nasty)
|
||||
#define XHE_AAC_LOW_DELAY 0 // 1: allow encoding with 768 frame length
|
||||
|
||||
@ -376,14 +377,16 @@ int main (const int argc, char* argv[])
|
||||
#if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64)
|
||||
fprintf_s (stdout, " preset\t= # (0-9) low-complexity standard compliant xHE-AAC at 16ú#+48 kbit/s\n");
|
||||
# if XHE_AAC_LOW_DELAY
|
||||
// fprintf_s (stdout, " \t (a-i) low-complexity compatible xHE-AAC with BE at 16ú#+48 kbit/s\n");
|
||||
fprintf_s (stdout, " \t (A-I) 41ms low-delay compatible xHE-AAC with BE at 16ú#+48 kbit/s\n");
|
||||
fprintf_s (stdout, " \t (A-J) 41ms low-delay compatible xHE-AAC with BE at 16ú#+48 kbit/s\n");
|
||||
# elif ENABLE_SIMPLE_SBR
|
||||
fprintf_s (stdout, " \t (a-g) low-complexity compliant xHE-AAC with SBR at 12ú#+36 kbit/s\n");
|
||||
# endif
|
||||
#else
|
||||
fprintf_s (stdout, " preset\t= # (0-9) low-complexity standard compliant xHE-AAC at 16*#+48 kbit/s\n");
|
||||
# if XHE_AAC_LOW_DELAY
|
||||
// fprintf_s (stdout, " \t (a-i) low-complexity compatible xHE-AAC with BE at 16*#+48 kbit/s\n");
|
||||
fprintf_s (stdout, " \t (A-I) 41ms low-delay compatible xHE-AAC with BE at 16*#+48 kbit/s\n");
|
||||
fprintf_s (stdout, " \t (A-J) 41ms low-delay compatible xHE-AAC with BE at 16*#+48 kbit/s\n");
|
||||
# elif ENABLE_SIMPLE_SBR
|
||||
fprintf_s (stdout, " \t (a-g) low-complexity compliant xHE-AAC with SBR at 12*#+36 kbit/s\n");
|
||||
# endif
|
||||
#endif
|
||||
fprintf_s (stdout, "\n inputWaveFile.wav lossless WAVE audio input, read from stdin if not specified\n\n");
|
||||
@ -412,15 +415,21 @@ int main (const int argc, char* argv[])
|
||||
|
||||
// check preset mode, derive coder config
|
||||
#if XHE_AAC_LOW_DELAY
|
||||
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'i') || (*argv[1] >= 'A' && *argv[1] <= 'I'))
|
||||
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'A' && *argv[1] <= 'J'))
|
||||
#elif ENABLE_SIMPLE_SBR
|
||||
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'g'))
|
||||
#else
|
||||
if ((*argv[1] >= '0' && *argv[1] <= '9') || (*argv[1] >= 'a' && *argv[1] <= 'i'))
|
||||
if (*argv[1] >= '0' && *argv[1] <= '9')
|
||||
#endif
|
||||
{
|
||||
i = (uint16_t) argv[1][0];
|
||||
compatibleExtensionFlag = (i & 0x40) >> 6;
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
coreSbrFrameLengthIndex = (i > 0x60 ? 5 : (i & 0x20) >> 5);
|
||||
#else
|
||||
coreSbrFrameLengthIndex = (i & 0x20) >> 5;
|
||||
variableCoreBitRateMode = (i & 0x0F);
|
||||
#endif
|
||||
variableCoreBitRateMode = (i & 0x0F) - (i >> 6);
|
||||
}
|
||||
else if (*argv[1] == '#') // default mode
|
||||
{
|
||||
@ -430,9 +439,15 @@ int main (const int argc, char* argv[])
|
||||
{
|
||||
#if XHE_AAC_LOW_DELAY
|
||||
# ifdef EXHALE_APP_WCHAR
|
||||
fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or A-I.\n\n", argv[1]);
|
||||
#else
|
||||
fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or A-I.\n\n", argv[1]);
|
||||
fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or A-J.\n\n", argv[1]);
|
||||
# else
|
||||
fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or A-J.\n\n", argv[1]);
|
||||
# endif
|
||||
#elif ENABLE_SIMPLE_SBR
|
||||
# ifdef EXHALE_APP_WCHAR
|
||||
fwprintf_s (stderr, L" ERROR reading preset mode: character %s is not supported! Use 0-9 or a-g.\n\n", argv[1]);
|
||||
# else
|
||||
fprintf_s (stderr, " ERROR reading preset mode: character %s is not supported! Use 0-9 or a-g.\n\n", argv[1]);
|
||||
# endif
|
||||
#else
|
||||
# ifdef EXHALE_APP_WCHAR
|
||||
@ -522,10 +537,19 @@ int main (const int argc, char* argv[])
|
||||
if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : _filelengthi64 (inFileHandle)) != 0) ||
|
||||
#else // Linux, MacOS, Unix
|
||||
if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : lseek (inFileHandle, 0, 2 /*SEEK_END*/)) != 0) ||
|
||||
#endif
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
(wavReader.getSampleRate () >= 1000 && wavReader.getSampleRate () < 24000 && coreSbrFrameLengthIndex >= 3) ||
|
||||
#endif
|
||||
(wavReader.getNumChannels () >= 7))
|
||||
{
|
||||
fprintf_s (stderr, " ERROR while trying to open WAVE file: invalid or unsupported audio format!\n\n");
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
if (wavReader.getSampleRate () >= 1000 && wavReader.getSampleRate () < 24000 && coreSbrFrameLengthIndex >= 3)
|
||||
{
|
||||
fprintf_s (stderr, " The sampling rate is %d kHz but xHE-AAC with SBR requires at least 24 kHz.\n\n", wavReader.getSampleRate () / 1000);
|
||||
}
|
||||
#endif
|
||||
i = 8192; // return value
|
||||
|
||||
goto mainFinish; // audio format invalid
|
||||
@ -555,6 +579,9 @@ int main (const int argc, char* argv[])
|
||||
}
|
||||
|
||||
if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
&& (coreSbrFrameLengthIndex < 3)
|
||||
#endif
|
||||
#if ENABLE_RESAMPLING
|
||||
&& (variableCoreBitRateMode > 1 || wavReader.getSampleRate () != 48000)
|
||||
#endif
|
||||
@ -566,7 +593,11 @@ int main (const int argc, char* argv[])
|
||||
|
||||
goto mainFinish; // ask for resampling
|
||||
}
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
if (wavReader.getSampleRate () > 32000 && coreSbrFrameLengthIndex < 3 && variableCoreBitRateMode <= 1)
|
||||
#else
|
||||
if (wavReader.getSampleRate () > 32000 && variableCoreBitRateMode <= 1)
|
||||
#endif
|
||||
{
|
||||
#if ENABLE_RESAMPLING
|
||||
if (wavReader.getSampleRate () == 48000)
|
||||
@ -613,8 +644,8 @@ int main (const int argc, char* argv[])
|
||||
|
||||
// enforce executable specific constraints
|
||||
i = __min (USHRT_MAX, wavReader.getSampleRate ());
|
||||
if ((wavReader.getNumChannels () > 3) && (i == 57600 || i == 51200 || i == 40000 || i == 38400 || i == 34150 ||
|
||||
i == 28800 || i == 25600 || i == 20000 || i == 19200 || i == 17075 || i == 14400 || i == 12800 || i == 9600))
|
||||
if ((wavReader.getNumChannels () > 3 || coreSbrFrameLengthIndex >= 3) && (i == 57600 || i == 51200 || i == 40000 || i == 38400 ||
|
||||
i == 34150 || i == 28800 || i == 25600 || i == 20000 || i == 19200 || i == 17075 || i == 14400 || i == 12800 || i == 9600))
|
||||
{
|
||||
fprintf_s (stderr, " ERROR: exhale does not support %d-channel coding with %d Hz sampling rate.\n\n", wavReader.getNumChannels (), i);
|
||||
|
||||
@ -626,7 +657,12 @@ int main (const int argc, char* argv[])
|
||||
const unsigned inSampDepth = wavReader.getBitDepth ();
|
||||
#if ENABLE_RESAMPLING
|
||||
const bool enableUpsampler = eaInitUpsampler2x (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
|
||||
# if ENABLE_SIMPLE_SBR
|
||||
const bool enableResampler = (coreSbrFrameLengthIndex >= 3 ? false : // no 3:2 downsampling needed when using SBR
|
||||
eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels));
|
||||
# else
|
||||
const bool enableResampler = eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
|
||||
# endif
|
||||
const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : (enableResampler ? startLength : frameLength));
|
||||
const unsigned inFrameSize = (enableResampler ? startLength : frameLength) * sizeof (int32_t); // max buffer size
|
||||
const unsigned resampRatio = (enableResampler ? 3 : 1); // for resampling ratio
|
||||
@ -671,7 +707,7 @@ int main (const int argc, char* argv[])
|
||||
const unsigned sampleRate = wavReader.getSampleRate ();
|
||||
#endif
|
||||
const unsigned indepPeriod = (sampleRate < 48000 ? (sampleRate - 320) / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
|
||||
const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17);
|
||||
const unsigned mod3Percent = unsigned ((expectLength * (3 + (coreSbrFrameLengthIndex & 3))) >> 17);
|
||||
uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0);
|
||||
uint32_t br, bwMax = 0; // br will be used to hold bytes read and/or bit-rate
|
||||
uint32_t headerRes = 0;
|
||||
@ -696,7 +732,11 @@ int main (const int argc, char* argv[])
|
||||
memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size
|
||||
i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size
|
||||
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength + (coreSbrFrameLengthIndex >= 3 ? 962 : 0),
|
||||
#else
|
||||
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
|
||||
#endif
|
||||
indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
|
||||
{
|
||||
fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i);
|
||||
@ -710,7 +750,12 @@ int main (const int argc, char* argv[])
|
||||
if (*argv[1] != '#') // user-def. mode
|
||||
{
|
||||
fprintf_s (stdout, " Encoding %d-kHz %d-channel %d-bit WAVE to low-complexity xHE-AAC at %d kbit/s\n\n",
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (((24 + variableCoreBitRateMode * 8) *
|
||||
(coreSbrFrameLengthIndex >= 3 ? 3 : 4)) >> 2));
|
||||
#else
|
||||
sampleRate / 1000, numChannels, inSampDepth, __min (5, numChannels) * (24 + variableCoreBitRateMode * 8));
|
||||
#endif
|
||||
}
|
||||
if (!readStdin && (mod3Percent > 0))
|
||||
{
|
||||
@ -926,6 +971,13 @@ int main (const int argc, char* argv[])
|
||||
bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), (time (nullptr) + 2082844800) & UINT_MAX,
|
||||
(i == 0) && (numChannels < 7) ? outAuData : nullptr);
|
||||
// print out collected file statistics
|
||||
#if ENABLE_SIMPLE_SBR
|
||||
if (coreSbrFrameLengthIndex >= 3)
|
||||
{
|
||||
fprintf_s (stdout, " Done, actual average incl. SBR data %.2f kbit/s\n\n", (float) br * 0.001f);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f);
|
||||
if (numChannels < 7)
|
||||
{
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
0 ICON "exhaleApp.ico"
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,0,8
|
||||
FILEVERSION 1,1,0
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
|
@ -112,6 +112,117 @@ unsigned BitStreamWriter::writeChannelWiseIcsInfo (const IcsInfo& icsInfo) //
|
||||
return 9;
|
||||
}
|
||||
|
||||
unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1,
|
||||
const bool indepFlag /*= false*/)
|
||||
{
|
||||
const unsigned nb = (sbrDataCh0 != nullptr ? 2 * ((sbrDataCh0[0] >> 23) & 1) + 2 : 0); // noise bits/ch = 2 or 4
|
||||
const int16_t res = (nb > 0 ? sbrDataCh0[0] >> 29 : 0); // short bs_amp_res
|
||||
const bool stereo = (sbrDataCh1 != nullptr);
|
||||
const bool couple = (stereo ? ((sbrDataCh1[0] >> 23) & 1) : false);
|
||||
unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb, i, tmpCh0, tmpCh1;
|
||||
|
||||
if (nb == 0) return 0;
|
||||
|
||||
tmpCh0 = (sbrDataCh0[0] >> 21) & 3;
|
||||
tmpCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3;
|
||||
|
||||
if (stereo) m_auBitStream.write (couple ? 1 : 0, 1); // _coupling
|
||||
|
||||
// sbr_grid(), assumes bs_frame_class[ch] == 0, i.e. class FIXFIX
|
||||
m_auBitStream.write ((sbrDataCh0[0] >> 20) & 7, 5); // class data
|
||||
if (stereo && !couple) m_auBitStream.write ((sbrDataCh1[0] >> 20) & 7, 5);
|
||||
|
||||
// sbr_dtdf()
|
||||
i = (1u << tmpCh0) - (indepFlag ? 1 : 0); // actual bs_num_env[0]
|
||||
if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 12) & 255, i); // _df_env
|
||||
bitCount += i;
|
||||
i = (tmpCh0 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);// bs_num_noise[0]
|
||||
if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 4) & 255, i); // df_noise
|
||||
bitCount += i;
|
||||
|
||||
if (stereo)
|
||||
{
|
||||
i = (1u << tmpCh1) - (indepFlag ? 1 : 0);
|
||||
if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 12) & 255, i);
|
||||
bitCount += i;
|
||||
i = (tmpCh1 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);
|
||||
if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 4) & 255, i);
|
||||
bitCount += i;
|
||||
}
|
||||
|
||||
// sbr_invf(), assumes dflt_noise_bands < 3, i.e. 1-2 noise bands
|
||||
i = 6 * nb - 9; // bitmask = 3 or 15
|
||||
m_auBitStream.write (sbrDataCh0[0] & i, nb); // bs_invf_mode[0][]
|
||||
if (stereo && !couple) m_auBitStream.write (sbrDataCh1[0] & i, nb);
|
||||
|
||||
// sbr_envelope() for mono/left channel, assumes bs_pvc_mode == 0
|
||||
for (i = 1; i <= (1u << tmpCh0); i++) // dt loop
|
||||
{
|
||||
const uint8_t bits = (res > 0 && tmpCh0 > 0 ? 6 : 7);
|
||||
|
||||
m_auBitStream.write (15/*sbrDataCh0[i] & 127*/, bits); // bs_data_env
|
||||
bitCount += bits;
|
||||
m_auBitStream.write (sbrDataCh0[i] >> 7, 5<<1); // TODO: VLC words
|
||||
bitCount += 5<<1;
|
||||
}
|
||||
|
||||
if (stereo && !couple)
|
||||
{
|
||||
for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope() dt loop
|
||||
{
|
||||
const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 6 : 7);
|
||||
|
||||
m_auBitStream.write (sbrDataCh1[i] & 127, bits);
|
||||
bitCount += bits;
|
||||
m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words
|
||||
bitCount += 5<<1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = (tmpCh0 > 0 ? 2 : 1); i > 0; i--) // sbr_noise() dt loop
|
||||
{
|
||||
m_auBitStream.write (31/*(sbrDataCh0[9] >> (12 * i)) & 31*/, 5); // _data_noise
|
||||
bitCount += 5;
|
||||
if (nb == 4)
|
||||
{
|
||||
m_auBitStream.write ((sbrDataCh0[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word
|
||||
bitCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (stereo)
|
||||
{
|
||||
if (couple)
|
||||
{
|
||||
for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope dt loop
|
||||
{
|
||||
const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 5 : 6);
|
||||
|
||||
m_auBitStream.write (sbrDataCh1[i] & 63, bits);
|
||||
bitCount += bits;
|
||||
m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words
|
||||
bitCount += 5<<1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = (tmpCh1 > 0 ? 2 : 1); i > 0; i--) // sbr_noise dt loop
|
||||
{
|
||||
m_auBitStream.write ((sbrDataCh1[9] >> (12 * i)) & 31, 5);
|
||||
bitCount += 5;
|
||||
if (nb == 4)
|
||||
{
|
||||
m_auBitStream.write ((sbrDataCh1[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word
|
||||
bitCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_auBitStream.write (0, 1); // fixed bs_add_harmonic_flag[0] = 0
|
||||
if (stereo) m_auBitStream.write (0, 1);
|
||||
|
||||
return bitCount;
|
||||
}
|
||||
|
||||
unsigned BitStreamWriter::writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts)
|
||||
{
|
||||
const unsigned numWindows = (eightShorts ? 8 : 1);
|
||||
@ -514,8 +625,10 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
|
||||
#if !RESTRICT_TO_AAC
|
||||
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
|
||||
#endif
|
||||
unsigned char* const audioConfig)
|
||||
const uint8_t sbrRatioShiftValue, unsigned char* const audioConfig)
|
||||
{
|
||||
const uint8_t fli = (sbrRatioShiftValue == 0 ? 1 /*no SBR*/ : __min (2, sbrRatioShiftValue) + 2);
|
||||
const int8_t usfi = __max (0, samplingFrequencyIndex - 3 * sbrRatioShiftValue); // TODO: non-standard sampling rates
|
||||
unsigned bitCount = 37;
|
||||
|
||||
if ((elementType == nullptr) || (audioConfig == nullptr) || (chConfigurationIndex >= USAC_MAX_NUM_ELCONFIGS) ||
|
||||
@ -532,20 +645,20 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
|
||||
m_auBitStream.write (0x7CA, 11); // audio object type (AOT) 32 (esc) + 10 = 42
|
||||
if (samplingFrequencyIndex < AAC_NUM_SAMPLE_RATES)
|
||||
{
|
||||
m_auBitStream.write (samplingFrequencyIndex, 4);
|
||||
m_auBitStream.write (usfi, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_auBitStream.write (0xF, 4); // esc
|
||||
m_auBitStream.write (toSamplingRate (samplingFrequencyIndex), 24);
|
||||
m_auBitStream.write (toSamplingRate (usfi), 24);
|
||||
bitCount += 24;
|
||||
}
|
||||
// for multichannel audio, refer to channel mapping of AotSpecificConfig below
|
||||
m_auBitStream.write (chConfigurationIndex > 2 ? 0 : chConfigurationIndex, 4);
|
||||
|
||||
// --- AotSpecificConfig(): UsacConfig()
|
||||
m_auBitStream.write (samplingFrequencyIndex, 5); // usacSamplingFrequencyIndex
|
||||
m_auBitStream.write (shortFrameLength ? 0 : 1, 3); // coreSbrFrameLengthIndex
|
||||
m_auBitStream.write (usfi, 5); // usacSamplingFrequencyIndex (after SBR dec.!)
|
||||
m_auBitStream.write (shortFrameLength ? 0 : fli, 3);// coreSbrFrameLengthIndex
|
||||
m_auBitStream.write (chConfigurationIndex, 5); // channelConfigurationIndex
|
||||
m_auBitStream.write (numElements - 1, 4); // numElements in UsacDecoderConfig
|
||||
|
||||
@ -561,6 +674,22 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
|
||||
m_auBitStream.write ((tw_mdct[el] ? 2 : 0) | (noiseFilling[el] ? 1 : 0), 2);
|
||||
#endif
|
||||
bitCount += 2;
|
||||
if (sbrRatioShiftValue > 0) // sbrRatioIndex > 0: SbrConfig
|
||||
{
|
||||
const uint32_t sf = (samplingFrequencyIndex == 6 || samplingFrequencyIndex < 5 ? 10 : (samplingFrequencyIndex < 8 ? 9 : 8)); // bs_stop_freq
|
||||
|
||||
m_auBitStream.write (0, 3); // fix harmonicSBR, bs_interTes, bs_pvc = 0
|
||||
bitCount += 13; // incl. SbrDfltHeader following hereafter
|
||||
m_auBitStream.write (15, 4); // 11025 @ 44.1, 11625 @ 48, 15000 @ 64 kHz
|
||||
m_auBitStream.write (sf, 4); // 16193 @ 44.1, 18375 @ 48, 22500 @ 64 kHz
|
||||
m_auBitStream.write ( 0, 2); // fix dflt_header_extra* = 0
|
||||
|
||||
if (elementType[el] == ID_USAC_CPE)
|
||||
{
|
||||
m_auBitStream.write (0, 2); // fix stereoConfigIndex = 0
|
||||
bitCount += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // for el
|
||||
|
||||
@ -591,7 +720,7 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
|
||||
bitCount += (8 - m_auBitStream.heldBitCount) & 7;
|
||||
writeByteAlignment (); // flush bytes
|
||||
|
||||
memcpy (audioConfig, &m_auBitStream.stream.front (), __min (16, bitCount >> 3));
|
||||
memcpy (audioConfig, &m_auBitStream.stream.front (), __min (15u + fli, bitCount >> 3));
|
||||
|
||||
return (bitCount >> 3); // byte count
|
||||
}
|
||||
@ -603,11 +732,12 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
|
||||
#if !RESTRICT_TO_AAC
|
||||
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
|
||||
#endif
|
||||
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
|
||||
unsigned char* const accessUnit, const unsigned nSamplesInFrame /*= 1024*/)
|
||||
{
|
||||
unsigned bitCount = 1, ci = 0;
|
||||
|
||||
if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) ||
|
||||
if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || (sbrInfoAndData == nullptr) ||
|
||||
(mdctSignals == nullptr) || (mdctQuantMag == nullptr) || (accessUnit == nullptr) || (nSamplesInFrame > 2048) ||
|
||||
#if !RESTRICT_TO_AAC
|
||||
(noiseFilling == nullptr) || (tw_mdct == nullptr) ||
|
||||
@ -644,6 +774,22 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
|
||||
tw_mdct[el], noiseFilling[el],
|
||||
#endif
|
||||
usacIndependencyFlag);
|
||||
if (sbrRatioShiftValue > 0) // UsacSbrData()
|
||||
{
|
||||
if (usacIndependencyFlag)
|
||||
{
|
||||
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo()
|
||||
m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1
|
||||
bitCount += 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_auBitStream.write (0, 1); // fix sbrInfoPresent = 0
|
||||
bitCount++;
|
||||
}
|
||||
bitCount += writeChannelWiseSbrData (sbrInfoAndData[ci], nullptr, // L (mono) only, no R
|
||||
usacIndependencyFlag);
|
||||
}
|
||||
ci++;
|
||||
break;
|
||||
}
|
||||
@ -670,6 +816,22 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
|
||||
tw_mdct[el], noiseFilling[el],
|
||||
#endif
|
||||
usacIndependencyFlag);
|
||||
if (sbrRatioShiftValue > 0) // UsacSbrData()
|
||||
{
|
||||
if (usacIndependencyFlag)
|
||||
{
|
||||
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo()
|
||||
m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1
|
||||
bitCount += 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_auBitStream.write (0, 1); // fix sbrInfoPresent = 0
|
||||
bitCount++;
|
||||
}
|
||||
bitCount += writeChannelWiseSbrData (sbrInfoAndData[ci - 1], sbrInfoAndData[ci], // L, R
|
||||
usacIndependencyFlag);
|
||||
}
|
||||
ci++;
|
||||
break;
|
||||
}
|
||||
|
@ -34,6 +34,8 @@ private:
|
||||
// helper functions
|
||||
void writeByteAlignment (); // write 0s for byte alignment
|
||||
unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo); // ics_info()
|
||||
unsigned writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1,
|
||||
const bool indepFlag = false);
|
||||
unsigned writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts);
|
||||
unsigned writeFDChannelStream (const CoreCoderData& elData, EntropyCoder& entrCoder, const unsigned ch,
|
||||
const int32_t* const mdctSignal, const uint8_t* const mdctQuantMag,
|
||||
@ -60,7 +62,7 @@ public:
|
||||
#if !RESTRICT_TO_AAC
|
||||
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
|
||||
#endif
|
||||
unsigned char* const audioConfig);
|
||||
const uint8_t sbrRatioShiftValue, unsigned char* const audioConfig);
|
||||
unsigned createAudioFrame (CoreCoderData** const elementData, EntropyCoder* const entropyCoder,
|
||||
int32_t** const mdctSignals, uint8_t** const mdctQuantMag,
|
||||
const bool usacIndependencyFlag, const uint8_t numElements,
|
||||
@ -68,6 +70,7 @@ public:
|
||||
#if !RESTRICT_TO_AAC
|
||||
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
|
||||
#endif
|
||||
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
|
||||
unsigned char* const accessUnit, const unsigned nSamplesInFrame = 1024);
|
||||
}; // BitStreamWriter
|
||||
|
||||
|
@ -785,11 +785,13 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
||||
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
|
||||
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
|
||||
const uint32_t maxSfbLong = (samplingRate < 37566 ? MAX_NUM_SWB_LONG : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
||||
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3, m_bitRateMode)
|
||||
const uint32_t maxSfbLong = (samplingRate < 37566 || m_shiftValSBR > 0 ? m_numSwbLong // was MAX_NUM_SWB_LONG
|
||||
: brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
||||
const uint32_t scaleSBR = (m_shiftValSBR > 0 ? 8 : 0); // reduces core rate by 25 %
|
||||
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
|
||||
: (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
|
||||
const uint64_t scaleBr = (m_bitRateMode == 0 ? __min (32, 3 + (samplingRate >> 10) + (samplingRate >> 13) - (nChannels >> 1))
|
||||
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
|
||||
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR;
|
||||
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
|
||||
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
|
||||
//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS];
|
||||
@ -1081,13 +1083,13 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
||||
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
|
||||
const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
|
||||
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
|
||||
//uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS];
|
||||
uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208};
|
||||
unsigned ci = 0, s; // running index
|
||||
unsigned errorValue = (coeffMagn == nullptr ? 1 : 0);
|
||||
|
||||
// get means of spectral and temporal flatness for every channel
|
||||
m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels);
|
||||
//m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
|
||||
if (m_bitRateMode == 0 && samplingRate >= 23004) m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels);
|
||||
|
||||
for (unsigned el = 0; el < m_numElements; el++) // element loop
|
||||
{
|
||||
@ -1097,7 +1099,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
||||
if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs
|
||||
{
|
||||
meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1;
|
||||
// meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
|
||||
meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1;
|
||||
}
|
||||
|
||||
for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop
|
||||
@ -1181,7 +1183,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
||||
const uint8_t maxSfbLong = (samplingRate < 37566 ? 63 - (samplingRate >> 11) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
|
||||
const uint8_t maxSfbShort = (samplingRate < 37566 ? 21 - (samplingRate >> 12) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
|
||||
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
|
||||
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5));
|
||||
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) +
|
||||
(shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
|
||||
const unsigned targetBitCount25 = ((60000 + 20000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
|
||||
unsigned b = grpData.sfbsPerGroup - 1;
|
||||
|
||||
@ -1212,7 +1215,10 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
||||
}
|
||||
}
|
||||
#endif
|
||||
b = lastSfb;
|
||||
// coarse-quantize near-Nyquist SFB with SBR @ 48-64 kHz
|
||||
b = 40 + (samplingRate >> 12);
|
||||
if ((m_shiftValSBR == 0) || (samplingRate < 23004) || shortWinCurr || (b > lastSfb)) b = lastSfb;
|
||||
|
||||
while ((b >= sfmBasedSfbStart + (m_bitRateMode >> 1)) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) /*coarse quantization*/ &&
|
||||
((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count estim. available for grouped spectrum*/)))
|
||||
{
|
||||
@ -1308,6 +1314,11 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
||||
// NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed
|
||||
if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
|
||||
#endif
|
||||
if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data
|
||||
{
|
||||
memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t)); // TODO
|
||||
m_coreSignals[ci][0] = 1 << 20; // fix bs_freq_res = high
|
||||
}
|
||||
ci++;
|
||||
}
|
||||
} // for el
|
||||
@ -1317,7 +1328,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
||||
#if !RESTRICT_TO_AAC
|
||||
m_timeWarping, m_noiseFilling,
|
||||
#endif
|
||||
m_outAuData, nSamplesInFrame)); // returns AU size
|
||||
m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
|
||||
}
|
||||
|
||||
unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS and SFB data
|
||||
@ -1597,7 +1608,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
|
||||
unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects of ics_info()
|
||||
{
|
||||
const unsigned nChannels = toNumChannels (m_channelConf);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
|
||||
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
||||
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
|
||||
unsigned ci = 0; // running ch index
|
||||
@ -1608,8 +1619,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
|
||||
m_tempAnalyzer.getTransientAndPitch (m_tranLocCurr, nChannels);
|
||||
|
||||
// temporal analysis for look-ahead signal (central nSamplesInFrame samples of next frame)
|
||||
errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna, lfeChannelIndex);
|
||||
|
||||
errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna,
|
||||
m_shiftValSBR, m_coreSignals, lfeChannelIndex);
|
||||
// get temporal channel statistics for next frame, used for window length/overlap decision
|
||||
m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels);
|
||||
m_tempAnalyzer.getTransientAndPitch (m_tranLocNext, nChannels);
|
||||
@ -1654,10 +1665,10 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
|
||||
const USAC_WSEQ wsPrev = icsPrev.windowSequence;
|
||||
USAC_WSEQ& wsCurr = icsCurr.windowSequence;
|
||||
// get temporal signal statistics, then determine overlap config. for the next frame
|
||||
const unsigned plCurr = abs (m_tranLocCurr[ci]) & 1023;
|
||||
const unsigned plCurr = abs (m_tranLocCurr[ci]) & ((1024 << m_shiftValSBR) - 1);
|
||||
const unsigned sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX;
|
||||
const unsigned tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX;
|
||||
const unsigned plNext = abs (m_tranLocNext[ci]) & 1023;
|
||||
const unsigned plNext = abs (m_tranLocNext[ci]) & ((1024 << m_shiftValSBR) - 1);
|
||||
const unsigned sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX;
|
||||
const unsigned tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX;
|
||||
const unsigned tThresh = UCHAR_MAX * (__max (plCurr, plNext) < 614 /*0.6 * 1024*/ ? 16 : 15 - (m_bitRateMode >> 2));
|
||||
@ -1775,6 +1786,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
|
||||
{
|
||||
const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch];
|
||||
const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch];
|
||||
const int32_t* timeSig = (m_shiftValSBR > 0 ? m_coreSignals[ci] : m_timeSignals[ci]);
|
||||
const USAC_WSEQ wsCurr = icsCurr.windowSequence;
|
||||
const bool eightShorts = (wsCurr == EIGHT_SHORT);
|
||||
SfbGroupData& grpData = coreConfig.groupingData[ch];
|
||||
@ -1782,7 +1794,7 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
|
||||
grpData.numWindowGroups = (eightShorts ? NUM_WINDOW_GROUPS : 1); // fill groupingData
|
||||
memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t));
|
||||
|
||||
errorValue |= m_transform.applyMCLT (m_timeSignals[ci], eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
|
||||
errorValue |= m_transform.applyMCLT (timeSig, eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE,
|
||||
wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]);
|
||||
m_scaleFacData[ci++] = &grpData;
|
||||
}
|
||||
@ -1809,9 +1821,14 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
|
||||
m_channelConf = CCI_2_CHM; // passing numChannels = 0 to ExhaleEncoder is interpreted as 2-ch dual-mono
|
||||
}
|
||||
m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig
|
||||
#if 1
|
||||
m_shiftValSBR = (frameLength >= 1536 ? 1 : 0);
|
||||
#else
|
||||
m_shiftValSBR = 0;
|
||||
#endif
|
||||
m_frameCount = 0;
|
||||
m_frameLength = (USAC_CCFL) frameLength; // coreCoderFrameLength, signaled using coreSbrFrameLengthIndex
|
||||
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate); // I/O sample rate as usacSamplingFrequencyIndex
|
||||
m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex
|
||||
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex
|
||||
m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame
|
||||
m_indepPeriod = (indepPeriod == 0 ? UINT_MAX : indepPeriod); // RAP, signaled using usacIndependencyFlag
|
||||
#if !RESTRICT_TO_AAC
|
||||
@ -1841,6 +1858,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
|
||||
{
|
||||
m_bandwidCurr[ch] = 0;
|
||||
m_bandwidPrev[ch] = 0;
|
||||
m_coreSignals[ch] = nullptr;
|
||||
m_mdctQuantMag[ch] = nullptr;
|
||||
m_mdctSignals[ch] = nullptr;
|
||||
m_mdstSignals[ch] = nullptr;
|
||||
@ -1873,6 +1891,7 @@ ExhaleEncoder::~ExhaleEncoder ()
|
||||
// free allocated signal buffers
|
||||
for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++)
|
||||
{
|
||||
if (m_shiftValSBR > 0) MFREE (m_coreSignals[ch]);
|
||||
MFREE (m_mdctQuantMag[ch]);
|
||||
MFREE (m_mdctSignals[ch]);
|
||||
MFREE (m_mdstSignals[ch]);
|
||||
@ -1891,7 +1910,7 @@ ExhaleEncoder::~ExhaleEncoder ()
|
||||
unsigned ExhaleEncoder::encodeLookahead ()
|
||||
{
|
||||
const unsigned nChannels = toNumChannels (m_channelConf);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
|
||||
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
||||
const int32_t* chSig = m_pcm24Data;
|
||||
unsigned ch, s;
|
||||
@ -1921,11 +1940,12 @@ unsigned ExhaleEncoder::encodeLookahead ()
|
||||
*(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3];
|
||||
*(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9);
|
||||
}
|
||||
if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, (nSamplesInFrame >> 2) * sizeof (int32_t));
|
||||
}
|
||||
|
||||
// set initial temporal channel statistic to something meaningful before first coded frame
|
||||
m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame);
|
||||
|
||||
m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame,
|
||||
m_shiftValSBR, m_coreSignals); // default lfeChannelIndex
|
||||
if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform
|
||||
{
|
||||
return 2; // internal error in temporal processing
|
||||
@ -1945,7 +1965,7 @@ unsigned ExhaleEncoder::encodeLookahead ()
|
||||
unsigned ExhaleEncoder::encodeFrame ()
|
||||
{
|
||||
const unsigned nChannels = toNumChannels (m_channelConf);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength) << m_shiftValSBR;
|
||||
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
|
||||
const int32_t* chSig = m_pcm24Data;
|
||||
unsigned ch, s;
|
||||
@ -1955,6 +1975,14 @@ unsigned ExhaleEncoder::encodeFrame ()
|
||||
{
|
||||
memcpy (&m_timeSignals[ch][0], &m_timeSignals[ch][nSamplesInFrame], nSamplesInFrame * sizeof (int32_t));
|
||||
memcpy (&m_timeSignals[ch][nSamplesInFrame], &m_timeSignals[ch][2 * nSamplesInFrame], (nSamplesTempAna - nSamplesInFrame) * sizeof (int32_t));
|
||||
|
||||
if (m_shiftValSBR > 0)
|
||||
{
|
||||
const unsigned nSmpInFrame = toFrameLength (m_frameLength); // core coder frame length
|
||||
|
||||
memcpy (&m_coreSignals[ch][0], &m_coreSignals[ch][nSmpInFrame], nSmpInFrame * sizeof (int32_t));
|
||||
memcpy (&m_coreSignals[ch][nSmpInFrame], &m_coreSignals[ch][2 * nSmpInFrame], (nSamplesInFrame >> 2) * sizeof (int32_t));
|
||||
}
|
||||
}
|
||||
|
||||
// copy nSamplesInFrame external channel-interleaved samples into internal channel buffers
|
||||
@ -1987,7 +2015,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
|
||||
const unsigned nChannels = toNumChannels (m_channelConf);
|
||||
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
|
||||
const unsigned specSigBufSize = nSamplesInFrame * sizeof (int32_t);
|
||||
const unsigned timeSigBufSize = ((nSamplesInFrame * 41) >> 4) * sizeof (int32_t); // core-codec delay*4
|
||||
const unsigned timeSigBufSize = (((nSamplesInFrame << m_shiftValSBR) * 41) >> 4) * sizeof (int32_t); // core-codec delay*4
|
||||
const unsigned char chConf = m_channelConf;
|
||||
unsigned errorValue = 0; // no error
|
||||
|
||||
@ -2008,7 +2036,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
|
||||
{
|
||||
errorValue |= 64;
|
||||
}
|
||||
if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> 12) + 2))
|
||||
if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> (m_shiftValSBR > 0 ? 11 : 12)) + 2))
|
||||
{
|
||||
errorValue |= 32;
|
||||
}
|
||||
@ -2036,7 +2064,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
|
||||
#if !RESTRICT_TO_AAC
|
||||
m_timeWarping, m_noiseFilling,
|
||||
#endif
|
||||
audioConfigBuffer);
|
||||
m_shiftValSBR, audioConfigBuffer);
|
||||
if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes
|
||||
errorValue = (errorValue == 0 ? 1 : 0);
|
||||
}
|
||||
@ -2061,6 +2089,20 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
|
||||
memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t));
|
||||
#endif
|
||||
// allocate all signal buffers
|
||||
if (m_shiftValSBR > 0)
|
||||
{
|
||||
if (m_shiftValSBR > 1)
|
||||
{
|
||||
return (errorValue | 4); // >2:1 not supported at the moment
|
||||
}
|
||||
else for (unsigned ch = 0; ch < nChannels; ch++)
|
||||
{
|
||||
if ((m_coreSignals[ch] = (int32_t*) malloc (timeSigBufSize >> m_shiftValSBR)) == nullptr)
|
||||
{
|
||||
errorValue |= 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned ch = 0; ch < nChannels; ch++)
|
||||
{
|
||||
if ((m_entropyCoder[ch].initCodingMemory (nSamplesInFrame) > 0) ||
|
||||
@ -2106,7 +2148,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
|
||||
#if !RESTRICT_TO_AAC
|
||||
m_timeWarping, m_noiseFilling,
|
||||
#endif
|
||||
audioConfigBuffer);
|
||||
m_shiftValSBR, audioConfigBuffer);
|
||||
if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes
|
||||
errorValue = (errorValue == 0 ? 1 : 0);
|
||||
|
||||
|
@ -67,6 +67,7 @@ private:
|
||||
BitAllocator m_bitAllocator; // for scale factor init
|
||||
uint8_t m_bitRateMode;
|
||||
USAC_CCI m_channelConf;
|
||||
int32_t* m_coreSignals[USAC_MAX_NUM_CHANNELS];
|
||||
CoreCoderData* m_elementData[USAC_MAX_NUM_ELEMENTS];
|
||||
EntropyCoder m_entropyCoder[USAC_MAX_NUM_CHANNELS];
|
||||
uint32_t m_frameCount;
|
||||
@ -95,6 +96,7 @@ private:
|
||||
uint16_t m_sfbLoudMem[2][26][32]; // loudness mem
|
||||
#endif
|
||||
SfbQuantizer m_sfbQuantizer; // powerlaw quantization
|
||||
uint8_t m_shiftValSBR; // SBR ratio for shifting
|
||||
SpecAnalyzer m_specAnalyzer; // for spectral analysis
|
||||
uint32_t m_specAnaCurr[USAC_MAX_NUM_CHANNELS];
|
||||
uint8_t m_specFlatPrev[USAC_MAX_NUM_CHANNELS];
|
||||
|
@ -11,6 +11,14 @@
|
||||
#include "exhaleLibPch.h"
|
||||
#include "tempAnalysis.h"
|
||||
|
||||
static const int16_t lffc2x[65] = { // low-frequency filter coefficients
|
||||
// 269-pt. sinc windowed by 0.409 * cos(0*pi.*t) - 0.5 * cos(2*pi.*t) + 0.091 * cos(4*pi.*t)
|
||||
17887, -27755, 16590, -11782, 9095, -7371, 6166, -5273, 4582, -4029, 3576, -3196, 2873,
|
||||
-2594, 2350, -2135, 1944, -1773, 1618, -1478, 1351, -1235, 1129, -1032, 942, -860, 784,
|
||||
-714, 650, -591, 536, -485, 439, -396, 357, -321, 287, -257, 229, -204, 181, -160, 141,
|
||||
-124, 108, -95, 82, -71, 61, -52, 44, -37, 31, -26, 21, -17, 14, -11, 8, -6, 5, -3, 2, -1, 1
|
||||
};
|
||||
|
||||
// static helper functions
|
||||
static unsigned updateAbsStats (const int32_t* const chSig, const int nSamples, unsigned* const maxAbsVal, int16_t* const maxAbsIdx)
|
||||
{
|
||||
@ -105,13 +113,16 @@ void TempAnalyzer::getTransientAndPitch (int16_t transIdxAndPitch[USAC_MAX_NUM_C
|
||||
}
|
||||
|
||||
unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_MAX_NUM_CHANNELS], const unsigned nChannels,
|
||||
const int nSamplesInFrame, const unsigned lookaheadOffset,
|
||||
const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel
|
||||
const int nSamplesInFrame, const unsigned lookaheadOffset, const uint8_t sbrShift,
|
||||
int32_t* const lrCoreTimeSignals[USAC_MAX_NUM_CHANNELS] /*= nullptr*/, // if using SBR
|
||||
const unsigned lfeChannelIndex /*= USAC_MAX_NUM_CHANNELS*/) // to skip an LFE channel
|
||||
{
|
||||
const bool applyResampler = (sbrShift > 0 && lrCoreTimeSignals != nullptr);
|
||||
const int halfFrameOffset = nSamplesInFrame >> 1;
|
||||
const int resamplerOffset = (int) lookaheadOffset - 128;
|
||||
|
||||
if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) ||
|
||||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 2048) || (lookaheadOffset == 0))
|
||||
if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || (sbrShift > 1) ||
|
||||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
@ -134,6 +145,26 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
|
||||
unsigned uR1 = abs (chSig[splitPtR - 1] - chSigM1[splitPtR - 1]);
|
||||
unsigned u; // temporary value - register?
|
||||
|
||||
if (applyResampler && lrCoreTimeSignals[ch] != nullptr) // downsampler
|
||||
{
|
||||
/*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift]; // low-rate,
|
||||
const int32_t* hrSig = &timeSignals[ch][resamplerOffset]; // high-rate input time signal
|
||||
|
||||
for (int i = nSamplesInFrame >> sbrShift; i > 0; i--, lrSig++, hrSig += 2)
|
||||
{
|
||||
int64_t r = ((int64_t) hrSig[0] << 17) + (hrSig[-1] + (int64_t) hrSig[1]) * -2*SHRT_MIN;
|
||||
int16_t s;
|
||||
|
||||
for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lffc2x[--u];
|
||||
|
||||
*lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass and low-rate
|
||||
// TODO: bandpass
|
||||
if (*lrSig < -8388608) *lrSig = -8388608;
|
||||
else
|
||||
if (*lrSig > 8388607) *lrSig = 8388607;
|
||||
}
|
||||
}
|
||||
|
||||
if (ch == lfeChannelIndex) // no analysis
|
||||
{
|
||||
m_tempAnaStats[ch] = 0; // flat/stationary frame
|
||||
|
@ -39,7 +39,8 @@ public:
|
||||
void getTempAnalysisStats (uint32_t avgTempAnaStats[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
|
||||
void getTransientAndPitch (int16_t transIdxAndPitch[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
|
||||
unsigned temporalAnalysis (const int32_t* const timeSignals[USAC_MAX_NUM_CHANNELS], const unsigned nChannels,
|
||||
const int nSamplesInFrame, const unsigned lookaheadOffset,
|
||||
const int nSamplesInFrame, const unsigned lookaheadOffset, const uint8_t sbrShift,
|
||||
int32_t* const lrCoreTimeSignals[USAC_MAX_NUM_CHANNELS] = nullptr, // if using SBR
|
||||
const unsigned lfeChannelIndex = USAC_MAX_NUM_CHANNELS); // to skip an LFE channel
|
||||
}; // TempAnalyzer
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user