diff --git a/src/app/basicMP4Writer.cpp b/src/app/basicMP4Writer.cpp index 2aad5e5..1ac88d8 100644 --- a/src/app/basicMP4Writer.cpp +++ b/src/app/basicMP4Writer.cpp @@ -1,5 +1,5 @@ /* basicMP4Writer.cpp - source file for class with basic MPEG-4 file writing capability - * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -71,7 +71,7 @@ static uint16_t toUShortValue (const uint8_t hiByte, const uint8_t loByte) } // public functions -int BasicMP4Writer::addFrameAU (const uint8_t* byteBuf, const uint32_t byteOffset, const uint32_t byteCount) +int BasicMP4Writer::addFrameAU (const uint8_t* byteBuf, const uint32_t byteCount) { if ((m_fileHandle == -1) || (m_m4aMdatSize > 0xFFFFFFF0u - byteCount)) { @@ -94,7 +94,7 @@ int BasicMP4Writer::addFrameAU (const uint8_t* byteBuf, const uint32_t byteOffse } int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBitrate, const uint32_t audioLength, - const uint32_t modifTime /*= 0*/) + const uint32_t modifTime /*= 0*/, const uint8_t* ascBuf /*= nullptr*/) { const unsigned numFramesFirstPeriod = __min (m_frameCount, m_rndAccPeriod); const unsigned numFramesFinalPeriod = (m_frameCount <= m_rndAccPeriod ? 0 : m_frameCount % m_rndAccPeriod); @@ -102,7 +102,14 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit const uint32_t stszAtomSize = STSX_BSIZE + 4 /*bytes for sampleSize*/ + m_frameCount * 4; const uint32_t stscAtomSize = STSX_BSIZE + (numFramesFinalPeriod == 0 ? 12 : 24); const uint32_t stcoAtomSize = STSX_BSIZE + (uint32_t) m_rndAccOffsets.size () * 4; +#ifndef NO_FIX_FOR_ISSUE_1 + const uint32_t stssAtomSize = STSX_BSIZE; + const uint32_t stblIncrSize = m_ascSizeM5 + stszAtomSize + stscAtomSize + stcoAtomSize + stssAtomSize; + const uint32_t headerBytes = STAT_HEADER_SIZE + m_dynamicHeader.size () + stscAtomSize + stcoAtomSize + stssAtomSize; +#else const uint32_t stblIncrSize = m_ascSizeM5 + stszAtomSize + stscAtomSize + stcoAtomSize; + const uint32_t headerBytes = STAT_HEADER_SIZE + m_dynamicHeader.size () + stscAtomSize + stcoAtomSize; +#endif const uint32_t moovAtomSize = toBigEndian (toUShortValue (MOOV_BSIZE) + stblIncrSize); const uint32_t trakAtomSize = toBigEndian (toUShortValue (TRAK_BSIZE) + stblIncrSize); const uint32_t mdiaAtomSize = toBigEndian (toUShortValue (MDIA_BSIZE) + stblIncrSize); @@ -110,7 +117,6 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit const uint32_t stblAtomSize = toBigEndian (toUShortValue (STBL_BSIZE) + stblIncrSize); const uint32_t numSamplesBE = toBigEndian (audioLength); const uint32_t timeStampBE = toBigEndian (modifTime); - const uint32_t headerBytes = STAT_HEADER_SIZE + (uint32_t) m_dynamicHeader.size () + stscAtomSize + stcoAtomSize; uint32_t* const header4Byte = (uint32_t* const) m_staticHeader; int bytesWritten = 0; @@ -119,6 +125,13 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit return 1; // invalid file handle or file getting too big } + if (ascBuf != nullptr) // update ASC + UC data if required + { + memcpy (&m_staticHeader[571], ascBuf, 5 * sizeof (uint8_t)); + + for (unsigned i = 0; i < m_ascSizeM5; i++) m_dynamicHeader.at (i) = ascBuf[5 + i]; + } + // finish setup of fixed-length part of MPEG-4 file header if (modifTime > 0) { @@ -211,6 +224,19 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit m_dynamicHeader.push_back ((rndAccOffset >> 8) & UCHAR_MAX); m_dynamicHeader.push_back ( rndAccOffset & UCHAR_MAX); } +#ifndef NO_FIX_FOR_ISSUE_1 + m_dynamicHeader.push_back ((stssAtomSize >> 24) & UCHAR_MAX); + m_dynamicHeader.push_back ((stssAtomSize >> 16) & UCHAR_MAX); + m_dynamicHeader.push_back ((stssAtomSize >> 8) & UCHAR_MAX); + m_dynamicHeader.push_back ( stssAtomSize & UCHAR_MAX); + m_dynamicHeader.push_back (0x73); m_dynamicHeader.push_back (0x74); + m_dynamicHeader.push_back (0x73); m_dynamicHeader.push_back (0x73); // stss + m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); + m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); + m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); + m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); +#endif + m_dynamicHeader.push_back ((m_m4aMdatSize >> 24) & UCHAR_MAX); m_dynamicHeader.push_back ((m_m4aMdatSize >> 16) & UCHAR_MAX); m_dynamicHeader.push_back ((m_m4aMdatSize >> 8) & UCHAR_MAX); @@ -248,7 +274,11 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1); const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod); const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod); +#ifndef NO_FIX_FOR_ISSUE_1 + const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 4 + +#else const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 3 + +#endif (finalChunk == 0 ? 12 : 24) /*stsc*/ + chunkCount * 4 /*stco*/ + 8 /*mdat*/; int bytesWritten = 0; diff --git a/src/app/basicMP4Writer.h b/src/app/basicMP4Writer.h index 4d73e7f..293ab87 100644 --- a/src/app/basicMP4Writer.h +++ b/src/app/basicMP4Writer.h @@ -1,5 +1,5 @@ /* basicMP4Writer.h - header file for class with basic MPEG-4 file writing capability - * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -50,9 +50,9 @@ public: // destructor ~BasicMP4Writer() { m_dynamicHeader.clear (); m_rndAccOffsets.clear (); } // public functions - int addFrameAU (const uint8_t* byteBuf, const uint32_t byteOffset, const uint32_t byteCount); + int addFrameAU (const uint8_t* byteBuf, const uint32_t byteCount); int finishFile (const unsigned avgBitrate, const unsigned maxBitrate, const uint32_t audioLength, - const uint32_t modifTime = 0); + const uint32_t modifTime = 0, const uint8_t* ascBuf = nullptr); unsigned getFrameCount () const { return m_frameCount; } int initHeader (const uint32_t audioLength); unsigned open (const int mp4FileHandle, const unsigned sampleRate, const unsigned numChannels, diff --git a/src/app/exhaleApp.cpp b/src/app/exhaleApp.cpp index 5c57f19..03c506b 100644 --- a/src/app/exhaleApp.cpp +++ b/src/app/exhaleApp.cpp @@ -1,5 +1,5 @@ /* exhaleApp.cpp - source file with main() routine for exhale application executable - * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -11,6 +11,7 @@ #include "exhaleAppPch.h" #include "basicMP4Writer.h" #include "basicWavReader.h" +#include "loudnessEstim.h" // #define USE_EXHALELIB_DLL (defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64)) #if USE_EXHALELIB_DLL #include "exhaleDecl.h" @@ -26,6 +27,12 @@ #if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64) #include +// constants, experimental macros +#define EA_LOUD_INIT 16399u // bsSamplePeakLevel = 0 & methodValue = 0 +#define EA_LOUD_NORM -42.25f // -100 + 57.75 of ISO 23003-4, Table A.48 +#define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization +#define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS + #define EXHALE_TEXT_BLUE (FOREGROUND_INTENSITY | FOREGROUND_BLUE | FOREGROUND_GREEN) #define EXHALE_TEXT_PINK (FOREGROUND_INTENSITY | FOREGROUND_BLUE | FOREGROUND_RED) #else // Linux, MacOS, Unix @@ -46,6 +53,7 @@ int main (const int argc, char* argv[]) int32_t* inPcmData = nullptr; // 24-bit WAVE audio input buffer uint8_t* outAuData = nullptr; // access unit (AU) output buffer int inFileHandle = -1, outFileHandle = -1; + uint32_t loudStats = EA_LOUD_INIT; // valid empty loudness data uint16_t i, exePathEnd = 0; uint16_t compatibleExtensionFlag = 0; // 0: disabled, 1: enabled uint16_t coreSbrFrameLengthIndex = 1; // 0: 768, 1: 1024 samples @@ -359,8 +367,11 @@ int main (const int argc, char* argv[]) const unsigned sampleRate = wavReader.getSampleRate (); const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/); const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17); - uint32_t byteCount = 0, bw = 0, bwMax = 0, br; // for bytes read and bit-rate + uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0); + uint32_t br, bwMax = 0; // br will be used to hold bytes read and/or bit-rate uint32_t headerRes = 0; + // initialize LoudnessEstimator object + LoudnessEstimator loudnessEst (inPcmData, 24 /*bit*/, sampleRate, numChannels); // open & prepare ExhaleEncoder object #if USE_EXHALELIB_DLL ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode + @@ -376,7 +387,7 @@ int main (const int argc, char* argv[]) // init encoder, generate UsacConfig() memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size - i = exhaleEnc.initEncoder (outAuData, &bw); // bw holds actual ASC + UC size + i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength, indepPeriod, outAuData, bw, time (nullptr) & UINT_MAX, (char) variableCoreBitRateMode)) != 0) @@ -433,7 +444,7 @@ int main (const int argc, char* argv[]) } if (bwMax < bw) bwMax = bw; // write first AU, add frame to header - if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) + if ((mp4Writer.addFrameAU (outAuData, bw) != bw) || loudnessEst.addNewPcmData (frameLength)) { #if USE_EXHALELIB_DLL exhaleDelete (&exhaleEnc); @@ -456,7 +467,7 @@ int main (const int argc, char* argv[]) } if (bwMax < bw) bwMax = bw; // write new AU, add frame to header - if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) + if ((mp4Writer.addFrameAU (outAuData, bw) != bw) || loudnessEst.addNewPcmData (frameLength)) { #if USE_EXHALELIB_DLL exhaleDelete (&exhaleEnc); @@ -486,7 +497,7 @@ int main (const int argc, char* argv[]) } if (bwMax < bw) bwMax = bw; // write final AU, add frame to header - if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) + if ((mp4Writer.addFrameAU (outAuData, bw) != bw) || loudnessEst.addNewPcmData (frameLength)) { #if USE_EXHALELIB_DLL exhaleDelete (&exhaleEnc); @@ -513,7 +524,7 @@ int main (const int argc, char* argv[]) } if (bwMax < bw) bwMax = bw; // the flush AU, add frame to header - if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) + if (mp4Writer.addFrameAU (outAuData, bw) != bw) // zero, no loudness update { #if USE_EXHALELIB_DLL exhaleDelete (&exhaleEnc); @@ -555,14 +566,34 @@ int main (const int argc, char* argv[]) bw = _WRITE(outFileHandle, inPcmData, br); } } + i = 0; // no errors + // loudness and sample peak of program + loudStats = loudnessEst.getStatistics (); + if (numChannels < 7) + { + // quantize for loudnessInfo() reset + const uint32_t qLoud = uint32_t (4.0f * __max (0.0f, (loudStats >> 16) / 512.f + EA_LOUD_NORM) + 0.5f); + const uint32_t qPeak = uint32_t (32.0f * (20.0f - 20.0f * log10 (__max (EA_PEAK_MIN, float (loudStats & USHRT_MAX))) - EA_PEAK_NORM) + 0.5f); + + // recreate ASC + UC + loudness data + bw = EA_LOUD_INIT | (qPeak << 18) | (qLoud << 6); // measurementSystem is 3 + memset (outAuData, 0, 108 * sizeof (uint8_t)); // max allowed ASC + UC size + i = exhaleEnc.initEncoder (outAuData, &bw); // with finished loudnessInfo() + } // mean & max. bit-rate of encoded AUs br = uint32_t (((actualLength >> 1) + 8 * (byteCount + 4 * (int64_t) mp4Writer.getFrameCount ()) * sampleRate) / actualLength); bw = uint32_t (((frameLength >> 1) + 8 * (bwMax + 4u /* maximum AU size + stsz as a bit-rate */) * sampleRate) / frameLength); - bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), time (nullptr) & UINT_MAX); - + bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), time (nullptr) & UINT_MAX, + (i == 0) && (numChannels < 7) ? outAuData : nullptr); + // print out collected file statistics fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f); - i = 0; // no errors + if (numChannels < 7) + { + fprintf_s (stdout, " Input statistics: Mobile loudness %.2f LUFS,\tsample peak level %.2f dBFS\n\n", + (loudStats >> 16) / 512.f - 100.0f, 20.0f * log10 (__max (EA_PEAK_MIN, float (loudStats & USHRT_MAX))) + EA_PEAK_NORM); + } + if (!readStdin && (actualLength != expectLength || bw != headerRes)) { fprintf_s (stderr, " WARNING: %lld sample frames read but %lld sample frames expected!\n", (long long) actualLength, (long long) expectLength); diff --git a/src/app/exhaleApp_vs2012.vcxproj b/src/app/exhaleApp_vs2012.vcxproj index 40b0b08..eaacedc 100644 --- a/src/app/exhaleApp_vs2012.vcxproj +++ b/src/app/exhaleApp_vs2012.vcxproj @@ -157,6 +157,7 @@ + @@ -168,6 +169,7 @@ Create Create + diff --git a/src/app/exhaleApp_vs2012.vcxproj.filters b/src/app/exhaleApp_vs2012.vcxproj.filters index bab0bec..bb3601c 100644 --- a/src/app/exhaleApp_vs2012.vcxproj.filters +++ b/src/app/exhaleApp_vs2012.vcxproj.filters @@ -30,6 +30,9 @@ Header Files + + Header Files + @@ -44,6 +47,9 @@ Source Files + + Source Files + diff --git a/src/app/loudnessEstim.cpp b/src/app/loudnessEstim.cpp new file mode 100644 index 0000000..655fe3a --- /dev/null +++ b/src/app/loudnessEstim.cpp @@ -0,0 +1,126 @@ +/* loudnessEstim.cpp - source file for class with ITU-R BS.1770-4 loudness level estimation + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices + * + * The copyright in this software is being made available under a Modified BSD-Style License + * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- + * party rights, including patent rights. No such rights are granted under this License. + * + * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + */ + +#include "exhaleAppPch.h" +#include "loudnessEstim.h" + +// constructor +LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth /*= 24*/, + const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/) +{ + m_filterFactor = 224 + (__min (SHRT_MAX, (int) sampleRate - 47616) >> 10); + m_gbHopSize64 = (__min (163519, sampleRate) + 320) / 640; // 100 msec + m_gbNormFactor = (m_gbHopSize64 == 0 ? 0 : 1.0f / (4.0f * m_gbHopSize64)); + m_inputChannels = __min (8, numChannels); + m_inputMaxValue = 1 << (__min (24, bitDepth) - 1); + m_inputPcmData = inputPcmData; + + reset (); + for (unsigned ch = 0; ch < 8; ch++) m_filterMemoryI[ch] = m_filterMemoryO[ch] = 0; +} + +// public functions +uint32_t LoudnessEstimator::addNewPcmData (const unsigned samplesPerChannel) +{ + const unsigned frameSize64 = samplesPerChannel >> 6; // in units of 64 + const unsigned numSamples64 = 1 << 6; // sub-frame size (64, of course) + const int32_t* chSig = m_inputPcmData; + uint64_t* newQuarterPower = m_powerValue[3]; + unsigned ch, f, s; + + if ((chSig == nullptr) || (frameSize64 == 0)) + { + return 1; // invalid sample pointer or frame size + } + + // de-interleave and K-filter incoming audio samples in sub-frame units + for (f = 0; f < frameSize64; f++) // sub-frame loop + { + for (s = 0; s < numSamples64; s++) // sample loop + { + for (ch = 0; ch < m_inputChannels; ch++) + { + // simplified K-filter, including 500-Hz high-pass pre-processing + const int32_t xi = *(chSig++); + const int32_t yi = xi - m_filterMemoryI[ch] + ((128 + m_filterFactor * m_filterMemoryO[ch]) >> 8); + const uint32_t a = abs (xi); + + m_filterMemoryI[ch] = xi; + m_filterMemoryO[ch] = yi; + newQuarterPower[ch] += (int64_t) yi * (int64_t) yi; + + if (m_inputPeakValue < a) m_inputPeakValue = a; // get peak level + } + } // s + + if (++m_gbHopLength64 >= m_gbHopSize64) // completed 100-msec quarter + { + const float thrA = LE_THRESH_ABS * (float) m_inputMaxValue * (float) m_inputMaxValue; + uint64_t zij, zj = 0; + + for (ch = 0; ch < m_inputChannels; ch++) // sum 64-sample averages + { + zij = (m_powerValue[0][ch] + m_powerValue[1][ch] + m_powerValue[2][ch] + newQuarterPower[ch] + (1u << 5)) >> 6; + zj += (ch > 2 ? (16u + 45 * zij) >> 5 : zij); // weighting by G_i + } + + if (zj * m_gbNormFactor > thrA) // use sqrt (block RMS) if lj > -70 + { + if (m_gbRmsValues.size () < INT_MAX) m_gbRmsValues.push_back (uint32_t (sqrt (zj * m_gbNormFactor) + 0.5f)); + } + + for (ch = 0; ch < m_inputChannels; ch++) // set up new gating block + { + m_powerValue[0][ch] = m_powerValue[1][ch]; + m_powerValue[1][ch] = m_powerValue[2][ch]; + m_powerValue[2][ch] = newQuarterPower[ch]; + newQuarterPower[ch] = 0; + } + m_gbHopLength64 = 0; + } + } + + return 0; // no error +} + +uint32_t LoudnessEstimator::getStatistics (const bool includeWarmUp /*= false*/) +{ + const uint32_t numWarmUpBlocks = (includeWarmUp ? 0 : 3); + const uint32_t numGatingBlocks = __max (numWarmUpBlocks, m_gbRmsValues.size ()) - numWarmUpBlocks; + const uint16_t maxValueDivisor = __max (1u, m_inputMaxValue >> 16); + const uint16_t peakValue16Bits = __min (USHRT_MAX, (m_inputPeakValue + (maxValueDivisor >> 1)) / maxValueDivisor); + uint32_t i, numBlocks = 0; + float thrR, zg; + + if (numGatingBlocks == 0) return peakValue16Bits; // no loudness stats + + const float normFac = 1.0f / numGatingBlocks; // prevents loop overflow + + // calculate arithmetic average of blocks satisfying absolute threshold + for (zg = 0.0f, i = numWarmUpBlocks; i < m_gbRmsValues.size (); i++) + { + zg += normFac * (float) m_gbRmsValues.at (i) * (float) m_gbRmsValues.at (i); + } + if (zg < LE_THRESH_ABS) return peakValue16Bits; // quiet loudness stats + + thrR = LE_THRESH_REL * zg; // find blocks satisfying relative threshold + for (zg = 0.0f, i = numWarmUpBlocks; i < m_gbRmsValues.size (); i++) + { + const float p = (float) m_gbRmsValues.at (i) * (float) m_gbRmsValues.at (i); + + if (p > thrR) { zg += normFac * p; numBlocks++; } + } + if (zg < LE_THRESH_ABS) return peakValue16Bits; // quiet loudness stats + + zg = LE_LUFS_OFFSET + 10.0f * log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue)); + i = __max (0, int32_t ((zg + 100.0f) * 512.0f + 0.5f)); // map to uint + + return (__min (USHRT_MAX, i) << 16) | peakValue16Bits; // L = i/512-100 +} diff --git a/src/app/loudnessEstim.h b/src/app/loudnessEstim.h new file mode 100644 index 0000000..68e78f5 --- /dev/null +++ b/src/app/loudnessEstim.h @@ -0,0 +1,54 @@ +/* loudnessEstim.h - header file for class with ITU-R BS.1770-4 loudness level estimation + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices + * + * The copyright in this software is being made available under a Modified BSD-Style License + * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- + * party rights, including patent rights. No such rights are granted under this License. + * + * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. + */ + +#ifndef _LOUDNESS_ESTIM_H_ +#define _LOUDNESS_ESTIM_H_ + +#include "exhaleAppPch.h" + +// constants, experimental macros +#define LE_THRESH_ABS (15.0f / 134217728.0f) // absolute threshold for -70 LUFS +#define LE_THRESH_REL 0.1f // 2nd stage, relative threshold 10 dB below L +#define LE_LUFS_OFFSET -0.484375f // to return -3.01 LUFS for 997-Hz 0-dBFS sine + +// ITU-R loudness estimator class +class LoudnessEstimator +{ +private: + + // member variables + int32_t m_filterMemoryI[8]; // channel-wise preceding K-weighting filter input + int32_t m_filterMemoryO[8]; // channel-wise previous K-weighting filter output + uint64_t m_powerValue[4][8]; // channel-wise power in each gating block quarter + float m_gbNormFactor; // 64-sample normalization factor, 1/(4*m_gbHopSize64) + uint8_t m_filterFactor; // sampling rate dependent K-weighting filter constant + uint8_t m_gbHopLength64; // number of 64-sample units in gating block quarter + uint8_t m_gbHopSize64; // hop-size between gating blocks, 25% of block length + uint8_t m_inputChannels; + uint32_t m_inputMaxValue; + uint32_t m_inputPeakValue; + int32_t* m_inputPcmData; + std::vector m_gbRmsValues; // sqrt of power average per gating block + +public: + + // constructor + LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth = 24, + const unsigned sampleRate = 44100, const unsigned numChannels = 2); + // destructor + ~LoudnessEstimator () { reset (); } + // public functions + uint32_t addNewPcmData (const unsigned samplesPerChannel); + uint32_t getStatistics (const bool includeWarmUp = false); + void reset () { m_gbHopLength64 = m_inputPeakValue = 0; m_gbRmsValues.clear (); memset (m_powerValue, 0, sizeof (m_powerValue)); } + +}; // LoudnessEstimator + +#endif // _LOUDNESS_ESTIM_H_ diff --git a/src/lib/bitStreamWriter.cpp b/src/lib/bitStreamWriter.cpp index bcfc191..4ef25c7 100644 --- a/src/lib/bitStreamWriter.cpp +++ b/src/lib/bitStreamWriter.cpp @@ -1,5 +1,5 @@ /* bitStreamWriter.cpp - source file for class with basic bit-stream writing capability - * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -375,7 +375,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData, // public functions unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, const bool shortFrameLength, const uint8_t chConfigurationIndex, const uint8_t numElements, - const ELEM_TYPE* const elementType, const bool configExtensionPresent, + const ELEM_TYPE* const elementType, const uint32_t loudnessInfo, #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, #endif @@ -429,25 +429,28 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, } } // for el - m_auBitStream.write (configExtensionPresent ? 1 : 0, 1); // usacConfigExten... - if (configExtensionPresent) // 23003-4: loudnessInfo + m_auBitStream.write (loudnessInfo > 0 ? 1 : 0, 1); // ..ConfigExtensionPresent + if (loudnessInfo > 0) // ISO 23003-4: loudnessInfo() { + const unsigned methodDefinition = (loudnessInfo >> 14) & 0xF; + const unsigned methodValueBits = (methodDefinition == 7 ? 5 : (methodDefinition == 8 ? 2 : 8)); + m_auBitStream.write (0, 2); // numConfigExtensions m_auBitStream.write (ID_EXT_LOUDNESS_INFO, 4); - m_auBitStream.write (8, 4); // usacConfigExtLength + m_auBitStream.write (methodValueBits < 3 ? 7 : 8, 4); // usacConfigExtLength m_auBitStream.write (1, 12);// loudnessInfoCount=1 - m_auBitStream.write (1, 14); // peakLevelPresent=1 - m_auBitStream.write (0, 12); // bsSamplePeakLevel + m_auBitStream.write (1, 14);// samplePeakLevel..=1 + m_auBitStream.write ((loudnessInfo >> 18) & 0xFFF, 12); // bsSamplePeakLevel m_auBitStream.write (1, 5); // measurementCount=1 + m_auBitStream.write (methodDefinition, 4); + m_auBitStream.write ((loudnessInfo >> 6) & ((1 << methodValueBits) - 1), methodValueBits); + m_auBitStream.write ((loudnessInfo >> 2) & 0xF, 4); // measurementSystem + m_auBitStream.write ((loudnessInfo & 0x3), 2); // reliability, 3 = accurate - m_auBitStream.write (1, 4); // methodDefinition=1 - m_auBitStream.write (0, 8); // methodValue storage - m_auBitStream.write (0, 4); // measurementSystem=0 - m_auBitStream.write (3, 2); // reliability=3, good - - m_auBitStream.write (0, 1); // ...SetExtPresent=0 - bitCount += 72; + m_auBitStream.write (0, 1); // loudnessInfoSetExtPresent=0, payload padding + bitCount += (methodValueBits < 3 ? 66 : 74); + if (methodValueBits >= 3) m_auBitStream.write (0, 10 - methodValueBits); } bitCount += (8 - m_auBitStream.heldBitCount) & 7; diff --git a/src/lib/bitStreamWriter.h b/src/lib/bitStreamWriter.h index 9ede39d..2a5aaa7 100644 --- a/src/lib/bitStreamWriter.h +++ b/src/lib/bitStreamWriter.h @@ -1,5 +1,5 @@ /* bitStreamWriter.h - header file for class with basic bit-stream writing capability - * written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices + * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- @@ -56,7 +56,7 @@ public: // public functions unsigned createAudioConfig (const char samplingFrequencyIndex, const bool shortFrameLength, const uint8_t chConfigurationIndex, const uint8_t numElements, - const ELEM_TYPE* const elementType, const bool configExtensionPresent, + const ELEM_TYPE* const elementType, const uint32_t loudnessInfo, #if !RESTRICT_TO_AAC const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, #endif diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 407022a..342a8ef 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -1158,7 +1158,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels); m_tempAnalyzer.getTransientLocation (m_tranLocNext, nChannels); +#ifndef NO_FIX_FOR_ISSUE_1 + m_indepFlag = (((m_frameCount++) % m_indepPeriod) <= 1); // configure usacIndependencyFlag +#else m_indepFlag = (((m_frameCount++) % m_indepPeriod) == 0); // configure usacIndependencyFlag +#endif for (unsigned el = 0; el < m_numElements; el++) // element loop { @@ -1534,6 +1538,32 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin } if (errorValue > 0) return errorValue; + // get window band table index + errorValue = (unsigned) m_frequencyIdx; // for temporary storage +#if RESTRICT_TO_AAC + m_swbTableIdx = freqIdxToSwbTableIdxAAC[errorValue]; +#else + m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[errorValue] : freqIdxToSwbTableIdxAAC[errorValue]); +#endif + errorValue = 0; + + if (m_elementData[0] != nullptr) // initEncoder was called before, don't reallocate memory + { + if (audioConfigBuffer != nullptr) // recreate the UsacConfig() + { + errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements, + elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0, +#if !RESTRICT_TO_AAC + m_timeWarping, m_noiseFilling, +#endif + audioConfigBuffer); + if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes + errorValue = (errorValue == 0 ? 1 : 0); + } + + return errorValue; + } + // allocate all helper structs for (unsigned el = 0; el < m_numElements; el++) // element loop { @@ -1571,14 +1601,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin if (errorValue > 0) return errorValue; // initialize coder class memory - errorValue = (unsigned) m_frequencyIdx; // for temporary storage -#if RESTRICT_TO_AAC - m_swbTableIdx = freqIdxToSwbTableIdxAAC[errorValue]; -#else - m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[errorValue] : freqIdxToSwbTableIdxAAC[errorValue]); -#endif - m_tempIntBuf = m_timeSignals[0]; - errorValue = 0; + m_tempIntBuf = m_timeSignals[0]; #if EC_TRELLIS_OPT_CODING if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 || #else @@ -1593,7 +1616,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout { errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements, - elementTypeConfig[chConf], false /*usacConfigExtensionPresent=0*/, + elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0, #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif