diff --git a/src/app/basicMP4Writer.cpp b/src/app/basicMP4Writer.cpp index 2a605bc..7acb573 100644 --- a/src/app/basicMP4Writer.cpp +++ b/src/app/basicMP4Writer.cpp @@ -6,7 +6,7 @@ * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleAppPch.h" @@ -364,7 +364,11 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for /* NOTE: the following condition is, as far as I can tell, correct, but some decoders with DRC processing may decode too few samples with it. Hence, I disabled it. See also corresponding NOTE in exhaleApp.cpp */ const bool flushFrameUsed = true; // ((audioLength + m_pregapLength) % m_frameLength) > 0; +#ifdef NO_PREROLL_DATA const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1); +#else + const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 1 : 0); +#endif const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod); const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod); #ifndef NO_FIX_FOR_ISSUE_1 diff --git a/src/app/exhaleApp.cpp b/src/app/exhaleApp.cpp index 3c2bf25..e1169ee 100644 --- a/src/app/exhaleApp.cpp +++ b/src/app/exhaleApp.cpp @@ -5,7 +5,7 @@ * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleAppPch.h" @@ -683,7 +683,11 @@ int main (const int argc, char* argv[]) #endif // allocate dynamic frame memory buffers inPcmData = (int32_t*) malloc (inFrameSize * numChannels); // max frame in size +#ifdef NO_PREROLL_DATA outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size +#else + outAuData = (uint8_t*) malloc ((9216 >> 3) * numChannels); // max frame AU size +#endif if ((inPcmData == nullptr) || (outAuData == nullptr)) { fprintf_s (stderr, " ERROR while trying to allocate dynamic memory! Not enough free RAM available!\n\n"); @@ -736,12 +740,14 @@ int main (const int argc, char* argv[]) memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size + if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength #if ENABLE_SIMPLE_SBR - if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength + (coreSbrFrameLengthIndex >= 3 ? 962 : 0), -#else - if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength, + + (coreSbrFrameLengthIndex >= 3 ? 962 : 0) #endif - indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0) +#ifndef NO_PREROLL_DATA + - frameLength +#endif + , indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0) { fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i); i <<= 2; // return value @@ -804,17 +810,26 @@ int main (const int argc, char* argv[]) #endif goto mainFinish; // coder-time error } +#ifdef NO_PREROLL_DATA if (bwMax < bw) bwMax = bw; // write first AU, add frame to header if ((mp4Writer.addFrameAU (outAuData, bw) != (int) bw) || loudnessEst.addNewPcmData (frameLength)) { -#if USE_EXHALELIB_DLL +# if USE_EXHALELIB_DLL exhaleDelete (&exhaleEnc); -#endif +# endif goto mainFinish; // writeout error } byteCount += bw; - +#else + if (loudnessEst.addNewPcmData (frameLength)) + { +# if USE_EXHALELIB_DLL + exhaleDelete (&exhaleEnc); +# endif + goto mainFinish; // estimation error + } +#endif #if ENABLE_RESAMPLING while (wavReader.read (inPcmData, (frameLength * resampRatio) >> resampShift) > 0) // read a new audio frame #else diff --git a/src/app/loudnessEstim.cpp b/src/app/loudnessEstim.cpp index 63d8645..197ce72 100644 --- a/src/app/loudnessEstim.cpp +++ b/src/app/loudnessEstim.cpp @@ -13,16 +13,16 @@ #if LE_ACCURATE_CALC static const int64_t kFilterCoeffs[4][8] = { // first 4: numerator (16->-32 bit), last 4: denominator (32 bit) - { -974848000, 1329463296, -808124416, 185073664, -946145519, 1253229559, -741406502, 165888314}, // <=32 k - {-1001717760, 1403125760, -874840064, 204996608, -980574000, 1345222905, -822320842, 189236866}, // 44.1 k - {-1007550464, 1419378688, -889847808, 209584128, -988032194, 1365543311, -840618073, 194671779}, // 48.0 k - {-1024000000, 1465647104, -933036032, 222953472, -1009281050, 1424244875, -894338686, 210939497} // >=64 k + { -974848000, 1329463296, -808124416, 185073664, -946145519, 1253229559, -741406502, 165888314}, // <=32k + {-1001717760, 1403125760, -874840064, 204996608, -980574000, 1345222905, -822320842, 189236866}, // 44.1k + {-1007550464, 1419378688, -889847808, 209584128, -988032194, 1365543311, -840618073, 194671779}, // 48.0k + {-1024000000, 1465647104, -933036032, 222953472, -1009281050, 1424244875, -894338686, 210939497} // >=64k }; #endif // constructor -LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth /*= 24*/, - const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/) +LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth /*= 24*/, + const unsigned sampleRate /*= 44k*/, const unsigned numChannels /*= 2*/) { #if LE_ACCURATE_CALC m_filterCoeffs = kFilterCoeffs[sampleRate <= 44100 ? (sampleRate <= 32000 ? 0 : 1) : (sampleRate <= 48000 ? 2 : 3)]; @@ -161,7 +161,7 @@ uint32_t LoudnessEstimator::getStatistics (const bool includeWarmUp /*= false*/) } if (zg < LE_THRESH_ABS) return peakValue16Bits; - zg = LE_LUFS_OFFSET + 10.0f * log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue)); + zg = LE_LUFS_OFFSET + 10.0f * (float) log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue)); #if LE_ACCURATE_CALC zg -= m_filterFactor * 0.046875f; // for sample rates other than 48 kHz #endif diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp index f51049a..2444f28 100644 --- a/src/lib/bitStreamWriter.cpp +++ b/src/lib/bitStreamWriter.cpp @@ -5,7 +5,7 @@ * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleLibPch.h" @@ -739,8 +739,16 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, m_auBitStream.write (usfi, 5); // usacSamplingFrequencyIndex (after SBR dec.!) m_auBitStream.write (shortFrameLength ? 0 : fli, 3);// coreSbrFrameLengthIndex m_auBitStream.write (chConfigurationIndex, 5); // channelConfigurationIndex +#ifdef NO_PREROLL_DATA m_auBitStream.write (numElements - 1, 4); // numElements in UsacDecoderConfig +#else + m_auBitStream.write (numElements, 4); // 4bit numElements in UsacDecoderConfig + m_auBitStream.write (ID_USAC_EXT, 2); // usacElementType[0] = 3, for IPF stuff + m_auBitStream.write (3, 4); // UsacExtElementConfig(), ID_EXT_ELE_AUDIOPREROLL + m_auBitStream.write (0, 6); // usacExtElementConfigLength = 0, rest of config. + bitCount += 12; +#endif for (unsigned el = 0; el < numElements; el++) // el element loop { m_auBitStream.write ((unsigned) elementType[el], 2); // usacElementType[el] @@ -802,7 +810,7 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, bitCount += (8 - m_auBitStream.heldBitCount) & 7; writeByteAlignment (); // flush bytes - memcpy (audioConfig, &m_auBitStream.stream.front (), __min (15u + fli, bitCount >> 3)); + memcpy (audioConfig, &m_auBitStream.stream.front (), __min (17u + fli, bitCount >> 3)); return (bitCount >> 3); // byte count } @@ -812,7 +820,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, const bool usacIndependencyFlag, const uint8_t numElements, const uint8_t numSwbShort, uint8_t* const tempBuffer, #if !RESTRICT_TO_AAC - const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, + const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, const bool ipf, #endif const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData, unsigned char* const accessUnit, const unsigned nSamplesInFrame /*= 1024*/) @@ -822,19 +830,45 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || (sbrInfoAndData == nullptr) || (mdctSignals == nullptr) || (mdctQuantMag == nullptr) || (accessUnit == nullptr) || (nSamplesInFrame > 2048) || #if !RESTRICT_TO_AAC - (noiseFilling == nullptr) || (tw_mdct == nullptr) || + (noiseFilling == nullptr) || (tw_mdct == nullptr) || (ipf && !usacIndependencyFlag) || #endif (numElements == 0) || (numElements > USAC_MAX_NUM_ELEMENTS) || (numSwbShort < MIN_NUM_SWB_SHORT) || (numSwbShort > MAX_NUM_SWB_SHORT)) { return 0; // invalid arguments error } - +#ifndef NO_PREROLL_DATA + if (ipf) // save last AU for ext. data + { + bitCount = __min (65532, (uint32_t) m_auBitStream.stream.size ()); + memcpy (tempBuffer, &m_auBitStream.stream.front (), bitCount); + } +#endif m_auBitStream.reset (); m_frameLength = nSamplesInFrame; m_numSwbShort = numSwbShort; m_uCharBuffer = tempBuffer; m_auBitStream.write (usacIndependencyFlag ? 1 : 0, 1); +#ifndef NO_PREROLL_DATA + m_auBitStream.write (ipf ? 1 : 0, 1); // UsacExtElement, usacExtElementPresent + if (ipf) + { + const unsigned payloadLength = bitCount + 3; // ext. payload size, in bytes! + + m_auBitStream.write (0, 1); // usacExtElementUseDefaultLength = 0 (variable) + m_auBitStream.write (CLIP_UCHAR (payloadLength), 8); + if (payloadLength > 254) m_auBitStream.write (payloadLength - 253, 16); // valueAdd + + m_auBitStream.write (0, 6); // start AudioPreRoll - configLen = reserved = 0 + m_auBitStream.write (1, 2); // numPreRollFrames, only one supported for now! + m_auBitStream.write (bitCount, 16); // auLen + + while (ci < bitCount) m_auBitStream.write (tempBuffer[ci++], 8); // write AU + ci = 0; + bitCount = (payloadLength > 254 ? 26 : 10) + (payloadLength << 3); // for ext. bits + } + bitCount++; // for ElementPresent flag +#endif for (unsigned el = 0; el < numElements; el++) // el element loop { const CoreCoderData* const elData = elementData[el]; @@ -935,7 +969,10 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData, bitCount += (8 - m_auBitStream.heldBitCount) & 7; writeByteAlignment (); // flush bytes +#if RESTRICT_TO_AAC memcpy (accessUnit, &m_auBitStream.stream.front (), __min (768 * ci, bitCount >> 3)); - +#else + memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1152 : 768), bitCount >> 3)); +#endif return (bitCount >> 3); // byte count } diff --git a/src/lib/bitStreamWriter.h b/src/lib/bitStreamWriter.h index 1df42ee..4605cd4 100644 --- a/src/lib/bitStreamWriter.h +++ b/src/lib/bitStreamWriter.h @@ -5,7 +5,7 @@ * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #ifndef _BIT_STREAM_WRITER_H_ @@ -68,7 +68,7 @@ public: const bool usacIndependencyFlag, const uint8_t numElements, const uint8_t numSwbShort, uint8_t* const tempBuffer, #if !RESTRICT_TO_AAC - const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, + const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, const bool ipf, #endif const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData, unsigned char* const accessUnit, const unsigned nSamplesInFrame = 1024); diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 0251645..50bbbfd 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -5,7 +5,7 @@ * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * - * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleLibPch.h" @@ -773,7 +773,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const uint16_t scaleSBR = (m_shiftValSBR > 0 || m_nonMpegExt ? sbrRateOffset[m_bitRateMode] : 0); // -25% rate const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode) : (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1); - const uint64_t scaleBr = (m_bitRateMode == 0 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1)) + const uint64_t scaleBr = (m_bitRateMode == 0 || m_frameCount <= 1 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1)) : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR; uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf; uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; @@ -1174,7 +1174,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047); const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) + (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5)); - const unsigned targetBitCount25 = ((60000 + 20000 * (m_bitRateMode + m_shiftValSBR)) * nSamplesInFrame) / + const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1)); unsigned b = grpData.sfbsPerGroup - 1; @@ -1184,7 +1184,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en #if EC_TRELLIS_OPT_CODING if (grpLength == 1) // finalize bit count estimate, RDOC { - estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, __min (estimBitCount + 2, targetBitCount25), + estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, estimBitCount + 2u, grpOff, grpRms, grpData.sfbsPerGroup, m_mdctQuantMag[ci]); for (b = 1; b < grpData.sfbsPerGroup; b++) { @@ -1327,11 +1327,10 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en m_coreSignals[ci][0] |= getSbrEnvelopeAndNoise (&m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame], msfVal, __max (m_meanTempPrev[ci], meanTempFlat[ci]) >> 3, m_bitRateMode == 0, m_indepFlag, msfSte, tmpValSynch, nSamplesInFrame, &m_coreSignals[ci][1]); - if (ch + 1 == nrChannels) // update the flatness histories + if (ch + 1 == nrChannels) // update flatness histories - TODO: coupling { m_meanSpecPrev[ci] = meanSpecFlat[ci]; m_meanSpecPrev[s] = meanSpecFlat[s]; m_meanTempPrev[ci] = meanTempFlat[ci]; m_meanTempPrev[s] = meanTempFlat[s]; - // TODO: coupling (m_coreSignals[ci][0] |= 1 << 23;) } } ci++; @@ -1341,7 +1340,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag, m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf, #if !RESTRICT_TO_AAC - m_timeWarping, m_noiseFilling, + m_timeWarping, m_noiseFilling, (m_frameCount == 2), #endif m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size } diff --git a/src/lib/exhaleLibPch.cpp b/src/lib/exhaleLibPch.cpp index 65d71dc..d7c838d 100644 --- a/src/lib/exhaleLibPch.cpp +++ b/src/lib/exhaleLibPch.cpp @@ -205,7 +205,7 @@ int32_t getSbrEnvelopeAndNoise (int32_t* const sbrLevels, const uint8_t specFlat const int32_t p[3] = {prev & SCHAR_MAX, (prev >> 8) & SCHAR_MAX, (prev >> 16) & SCHAR_MAX}; if ((t > 0 || !ind) && (getSbrDeltaBitCount (c[0] - p[0], true) + getSbrDeltaBitCount (c[1] - p[1], true) + - getSbrDeltaBitCount (c[2] - p[2], true) < 12)) // approximate! + getSbrDeltaBitCount (c[2] - p[2], true) < 13)) // approximate! { tmpBest |= 1 << (12 + t); // delta-time coding flag for envelope