non-SBR pre-roll

This commit is contained in:
Christian R. Helmrich
2021-01-17 19:00:10 +01:00
parent 36964d2051
commit 920a3e8d56
7 changed files with 87 additions and 32 deletions

View File

@@ -6,7 +6,7 @@
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleAppPch.h"
@@ -364,7 +364,11 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for
/* NOTE: the following condition is, as far as I can tell, correct, but some decoders with DRC processing
may decode too few samples with it. Hence, I disabled it. See also corresponding NOTE in exhaleApp.cpp */
const bool flushFrameUsed = true; // ((audioLength + m_pregapLength) % m_frameLength) > 0;
#ifdef NO_PREROLL_DATA
const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1);
#else
const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 1 : 0);
#endif
const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod);
const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod);
#ifndef NO_FIX_FOR_ISSUE_1

View File

@@ -5,7 +5,7 @@
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleAppPch.h"
@@ -683,7 +683,11 @@ int main (const int argc, char* argv[])
#endif
// allocate dynamic frame memory buffers
inPcmData = (int32_t*) malloc (inFrameSize * numChannels); // max frame in size
#ifdef NO_PREROLL_DATA
outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size
#else
outAuData = (uint8_t*) malloc ((9216 >> 3) * numChannels); // max frame AU size
#endif
if ((inPcmData == nullptr) || (outAuData == nullptr))
{
fprintf_s (stderr, " ERROR while trying to allocate dynamic memory! Not enough free RAM available!\n\n");
@@ -736,12 +740,14 @@ int main (const int argc, char* argv[])
memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size
i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength
#if ENABLE_SIMPLE_SBR
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength + (coreSbrFrameLengthIndex >= 3 ? 962 : 0),
#else
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
+ (coreSbrFrameLengthIndex >= 3 ? 962 : 0)
#endif
indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
#ifndef NO_PREROLL_DATA
- frameLength
#endif
, indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
{
fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i);
i <<= 2; // return value
@@ -804,6 +810,7 @@ int main (const int argc, char* argv[])
#endif
goto mainFinish; // coder-time error
}
#ifdef NO_PREROLL_DATA
if (bwMax < bw) bwMax = bw;
// write first AU, add frame to header
if ((mp4Writer.addFrameAU (outAuData, bw) != (int) bw) || loudnessEst.addNewPcmData (frameLength))
@@ -814,7 +821,15 @@ int main (const int argc, char* argv[])
goto mainFinish; // writeout error
}
byteCount += bw;
#else
if (loudnessEst.addNewPcmData (frameLength))
{
# if USE_EXHALELIB_DLL
exhaleDelete (&exhaleEnc);
# endif
goto mainFinish; // estimation error
}
#endif
#if ENABLE_RESAMPLING
while (wavReader.read (inPcmData, (frameLength * resampRatio) >> resampShift) > 0) // read a new audio frame
#else

View File

@@ -22,7 +22,7 @@ static const int64_t kFilterCoeffs[4][8] = { // first 4: numerator (16->-32 bit)
// constructor
LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth /*= 24*/,
const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/)
const unsigned sampleRate /*= 44k*/, const unsigned numChannels /*= 2*/)
{
#if LE_ACCURATE_CALC
m_filterCoeffs = kFilterCoeffs[sampleRate <= 44100 ? (sampleRate <= 32000 ? 0 : 1) : (sampleRate <= 48000 ? 2 : 3)];
@@ -161,7 +161,7 @@ uint32_t LoudnessEstimator::getStatistics (const bool includeWarmUp /*= false*/)
}
if (zg < LE_THRESH_ABS) return peakValue16Bits;
zg = LE_LUFS_OFFSET + 10.0f * log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue));
zg = LE_LUFS_OFFSET + 10.0f * (float) log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue));
#if LE_ACCURATE_CALC
zg -= m_filterFactor * 0.046875f; // for sample rates other than 48 kHz
#endif

View File

@@ -5,7 +5,7 @@
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleLibPch.h"
@@ -739,8 +739,16 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
m_auBitStream.write (usfi, 5); // usacSamplingFrequencyIndex (after SBR dec.!)
m_auBitStream.write (shortFrameLength ? 0 : fli, 3);// coreSbrFrameLengthIndex
m_auBitStream.write (chConfigurationIndex, 5); // channelConfigurationIndex
#ifdef NO_PREROLL_DATA
m_auBitStream.write (numElements - 1, 4); // numElements in UsacDecoderConfig
#else
m_auBitStream.write (numElements, 4); // 4bit numElements in UsacDecoderConfig
m_auBitStream.write (ID_USAC_EXT, 2); // usacElementType[0] = 3, for IPF stuff
m_auBitStream.write (3, 4); // UsacExtElementConfig(), ID_EXT_ELE_AUDIOPREROLL
m_auBitStream.write (0, 6); // usacExtElementConfigLength = 0, rest of config.
bitCount += 12;
#endif
for (unsigned el = 0; el < numElements; el++) // el element loop
{
m_auBitStream.write ((unsigned) elementType[el], 2); // usacElementType[el]
@@ -802,7 +810,7 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
bitCount += (8 - m_auBitStream.heldBitCount) & 7;
writeByteAlignment (); // flush bytes
memcpy (audioConfig, &m_auBitStream.stream.front (), __min (15u + fli, bitCount >> 3));
memcpy (audioConfig, &m_auBitStream.stream.front (), __min (17u + fli, bitCount >> 3));
return (bitCount >> 3); // byte count
}
@@ -812,7 +820,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
const bool usacIndependencyFlag, const uint8_t numElements,
const uint8_t numSwbShort, uint8_t* const tempBuffer,
#if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, const bool ipf,
#endif
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
unsigned char* const accessUnit, const unsigned nSamplesInFrame /*= 1024*/)
@@ -822,19 +830,45 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || (sbrInfoAndData == nullptr) ||
(mdctSignals == nullptr) || (mdctQuantMag == nullptr) || (accessUnit == nullptr) || (nSamplesInFrame > 2048) ||
#if !RESTRICT_TO_AAC
(noiseFilling == nullptr) || (tw_mdct == nullptr) ||
(noiseFilling == nullptr) || (tw_mdct == nullptr) || (ipf && !usacIndependencyFlag) ||
#endif
(numElements == 0) || (numElements > USAC_MAX_NUM_ELEMENTS) || (numSwbShort < MIN_NUM_SWB_SHORT) || (numSwbShort > MAX_NUM_SWB_SHORT))
{
return 0; // invalid arguments error
}
#ifndef NO_PREROLL_DATA
if (ipf) // save last AU for ext. data
{
bitCount = __min (65532, (uint32_t) m_auBitStream.stream.size ());
memcpy (tempBuffer, &m_auBitStream.stream.front (), bitCount);
}
#endif
m_auBitStream.reset ();
m_frameLength = nSamplesInFrame;
m_numSwbShort = numSwbShort;
m_uCharBuffer = tempBuffer;
m_auBitStream.write (usacIndependencyFlag ? 1 : 0, 1);
#ifndef NO_PREROLL_DATA
m_auBitStream.write (ipf ? 1 : 0, 1); // UsacExtElement, usacExtElementPresent
if (ipf)
{
const unsigned payloadLength = bitCount + 3; // ext. payload size, in bytes!
m_auBitStream.write (0, 1); // usacExtElementUseDefaultLength = 0 (variable)
m_auBitStream.write (CLIP_UCHAR (payloadLength), 8);
if (payloadLength > 254) m_auBitStream.write (payloadLength - 253, 16); // valueAdd
m_auBitStream.write (0, 6); // start AudioPreRoll - configLen = reserved = 0
m_auBitStream.write (1, 2); // numPreRollFrames, only one supported for now!
m_auBitStream.write (bitCount, 16); // auLen
while (ci < bitCount) m_auBitStream.write (tempBuffer[ci++], 8); // write AU
ci = 0;
bitCount = (payloadLength > 254 ? 26 : 10) + (payloadLength << 3); // for ext. bits
}
bitCount++; // for ElementPresent flag
#endif
for (unsigned el = 0; el < numElements; el++) // el element loop
{
const CoreCoderData* const elData = elementData[el];
@@ -935,7 +969,10 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
bitCount += (8 - m_auBitStream.heldBitCount) & 7;
writeByteAlignment (); // flush bytes
#if RESTRICT_TO_AAC
memcpy (accessUnit, &m_auBitStream.stream.front (), __min (768 * ci, bitCount >> 3));
#else
memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1152 : 768), bitCount >> 3));
#endif
return (bitCount >> 3); // byte count
}

View File

@@ -5,7 +5,7 @@
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#ifndef _BIT_STREAM_WRITER_H_
@@ -68,7 +68,7 @@ public:
const bool usacIndependencyFlag, const uint8_t numElements,
const uint8_t numSwbShort, uint8_t* const tempBuffer,
#if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, const bool ipf,
#endif
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
unsigned char* const accessUnit, const unsigned nSamplesInFrame = 1024);

View File

@@ -5,7 +5,7 @@
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
* Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleLibPch.h"
@@ -773,7 +773,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const uint16_t scaleSBR = (m_shiftValSBR > 0 || m_nonMpegExt ? sbrRateOffset[m_bitRateMode] : 0); // -25% rate
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
: (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
const uint64_t scaleBr = (m_bitRateMode == 0 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1))
const uint64_t scaleBr = (m_bitRateMode == 0 || m_frameCount <= 1 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1))
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR;
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
@@ -1174,7 +1174,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) +
(shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
const unsigned targetBitCount25 = ((60000 + 20000 * (m_bitRateMode + m_shiftValSBR)) * nSamplesInFrame) /
const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) /
(samplingRate * ((grpData.numWindowGroups + 1) >> 1));
unsigned b = grpData.sfbsPerGroup - 1;
@@ -1184,7 +1184,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
#if EC_TRELLIS_OPT_CODING
if (grpLength == 1) // finalize bit count estimate, RDOC
{
estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, __min (estimBitCount + 2, targetBitCount25),
estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, estimBitCount + 2u,
grpOff, grpRms, grpData.sfbsPerGroup, m_mdctQuantMag[ci]);
for (b = 1; b < grpData.sfbsPerGroup; b++)
{
@@ -1327,11 +1327,10 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
m_coreSignals[ci][0] |= getSbrEnvelopeAndNoise (&m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame], msfVal,
__max (m_meanTempPrev[ci], meanTempFlat[ci]) >> 3, m_bitRateMode == 0,
m_indepFlag, msfSte, tmpValSynch, nSamplesInFrame, &m_coreSignals[ci][1]);
if (ch + 1 == nrChannels) // update the flatness histories
if (ch + 1 == nrChannels) // update flatness histories - TODO: coupling
{
m_meanSpecPrev[ci] = meanSpecFlat[ci]; m_meanSpecPrev[s] = meanSpecFlat[s];
m_meanTempPrev[ci] = meanTempFlat[ci]; m_meanTempPrev[s] = meanTempFlat[s];
// TODO: coupling (m_coreSignals[ci][0] |= 1 << 23;)
}
}
ci++;
@@ -1341,7 +1340,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag,
m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf,
#if !RESTRICT_TO_AAC
m_timeWarping, m_noiseFilling,
m_timeWarping, m_noiseFilling, (m_frameCount == 2),
#endif
m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
}

View File

@@ -205,7 +205,7 @@ int32_t getSbrEnvelopeAndNoise (int32_t* const sbrLevels, const uint8_t specFlat
const int32_t p[3] = {prev & SCHAR_MAX, (prev >> 8) & SCHAR_MAX, (prev >> 16) & SCHAR_MAX};
if ((t > 0 || !ind) && (getSbrDeltaBitCount (c[0] - p[0], true) + getSbrDeltaBitCount (c[1] - p[1], true) +
getSbrDeltaBitCount (c[2] - p[2], true) < 12)) // approximate!
getSbrDeltaBitCount (c[2] - p[2], true) < 13)) // approximate!
{
tmpBest |= 1 << (12 + t); // delta-time coding flag for envelope