loudness, issue 1

This commit is contained in:
Christian R. Helmrich
2020-02-15 17:00:54 +01:00
parent 128e94457a
commit d6bb32b072
10 changed files with 317 additions and 42 deletions

View File

@ -1,5 +1,5 @@
/* basicMP4Writer.cpp - source file for class with basic MPEG-4 file writing capability /* basicMP4Writer.cpp - source file for class with basic MPEG-4 file writing capability
* written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* *
* The copyright in this software is being made available under a Modified BSD-Style License * The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -71,7 +71,7 @@ static uint16_t toUShortValue (const uint8_t hiByte, const uint8_t loByte)
} }
// public functions // public functions
int BasicMP4Writer::addFrameAU (const uint8_t* byteBuf, const uint32_t byteOffset, const uint32_t byteCount) int BasicMP4Writer::addFrameAU (const uint8_t* byteBuf, const uint32_t byteCount)
{ {
if ((m_fileHandle == -1) || (m_m4aMdatSize > 0xFFFFFFF0u - byteCount)) if ((m_fileHandle == -1) || (m_m4aMdatSize > 0xFFFFFFF0u - byteCount))
{ {
@ -94,7 +94,7 @@ int BasicMP4Writer::addFrameAU (const uint8_t* byteBuf, const uint32_t byteOffse
} }
int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBitrate, const uint32_t audioLength, int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBitrate, const uint32_t audioLength,
const uint32_t modifTime /*= 0*/) const uint32_t modifTime /*= 0*/, const uint8_t* ascBuf /*= nullptr*/)
{ {
const unsigned numFramesFirstPeriod = __min (m_frameCount, m_rndAccPeriod); const unsigned numFramesFirstPeriod = __min (m_frameCount, m_rndAccPeriod);
const unsigned numFramesFinalPeriod = (m_frameCount <= m_rndAccPeriod ? 0 : m_frameCount % m_rndAccPeriod); const unsigned numFramesFinalPeriod = (m_frameCount <= m_rndAccPeriod ? 0 : m_frameCount % m_rndAccPeriod);
@ -102,7 +102,14 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
const uint32_t stszAtomSize = STSX_BSIZE + 4 /*bytes for sampleSize*/ + m_frameCount * 4; const uint32_t stszAtomSize = STSX_BSIZE + 4 /*bytes for sampleSize*/ + m_frameCount * 4;
const uint32_t stscAtomSize = STSX_BSIZE + (numFramesFinalPeriod == 0 ? 12 : 24); const uint32_t stscAtomSize = STSX_BSIZE + (numFramesFinalPeriod == 0 ? 12 : 24);
const uint32_t stcoAtomSize = STSX_BSIZE + (uint32_t) m_rndAccOffsets.size () * 4; const uint32_t stcoAtomSize = STSX_BSIZE + (uint32_t) m_rndAccOffsets.size () * 4;
#ifndef NO_FIX_FOR_ISSUE_1
const uint32_t stssAtomSize = STSX_BSIZE;
const uint32_t stblIncrSize = m_ascSizeM5 + stszAtomSize + stscAtomSize + stcoAtomSize + stssAtomSize;
const uint32_t headerBytes = STAT_HEADER_SIZE + m_dynamicHeader.size () + stscAtomSize + stcoAtomSize + stssAtomSize;
#else
const uint32_t stblIncrSize = m_ascSizeM5 + stszAtomSize + stscAtomSize + stcoAtomSize; const uint32_t stblIncrSize = m_ascSizeM5 + stszAtomSize + stscAtomSize + stcoAtomSize;
const uint32_t headerBytes = STAT_HEADER_SIZE + m_dynamicHeader.size () + stscAtomSize + stcoAtomSize;
#endif
const uint32_t moovAtomSize = toBigEndian (toUShortValue (MOOV_BSIZE) + stblIncrSize); const uint32_t moovAtomSize = toBigEndian (toUShortValue (MOOV_BSIZE) + stblIncrSize);
const uint32_t trakAtomSize = toBigEndian (toUShortValue (TRAK_BSIZE) + stblIncrSize); const uint32_t trakAtomSize = toBigEndian (toUShortValue (TRAK_BSIZE) + stblIncrSize);
const uint32_t mdiaAtomSize = toBigEndian (toUShortValue (MDIA_BSIZE) + stblIncrSize); const uint32_t mdiaAtomSize = toBigEndian (toUShortValue (MDIA_BSIZE) + stblIncrSize);
@ -110,7 +117,6 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
const uint32_t stblAtomSize = toBigEndian (toUShortValue (STBL_BSIZE) + stblIncrSize); const uint32_t stblAtomSize = toBigEndian (toUShortValue (STBL_BSIZE) + stblIncrSize);
const uint32_t numSamplesBE = toBigEndian (audioLength); const uint32_t numSamplesBE = toBigEndian (audioLength);
const uint32_t timeStampBE = toBigEndian (modifTime); const uint32_t timeStampBE = toBigEndian (modifTime);
const uint32_t headerBytes = STAT_HEADER_SIZE + (uint32_t) m_dynamicHeader.size () + stscAtomSize + stcoAtomSize;
uint32_t* const header4Byte = (uint32_t* const) m_staticHeader; uint32_t* const header4Byte = (uint32_t* const) m_staticHeader;
int bytesWritten = 0; int bytesWritten = 0;
@ -119,6 +125,13 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
return 1; // invalid file handle or file getting too big return 1; // invalid file handle or file getting too big
} }
if (ascBuf != nullptr) // update ASC + UC data if required
{
memcpy (&m_staticHeader[571], ascBuf, 5 * sizeof (uint8_t));
for (unsigned i = 0; i < m_ascSizeM5; i++) m_dynamicHeader.at (i) = ascBuf[5 + i];
}
// finish setup of fixed-length part of MPEG-4 file header // finish setup of fixed-length part of MPEG-4 file header
if (modifTime > 0) if (modifTime > 0)
{ {
@ -211,6 +224,19 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
m_dynamicHeader.push_back ((rndAccOffset >> 8) & UCHAR_MAX); m_dynamicHeader.push_back ((rndAccOffset >> 8) & UCHAR_MAX);
m_dynamicHeader.push_back ( rndAccOffset & UCHAR_MAX); m_dynamicHeader.push_back ( rndAccOffset & UCHAR_MAX);
} }
#ifndef NO_FIX_FOR_ISSUE_1
m_dynamicHeader.push_back ((stssAtomSize >> 24) & UCHAR_MAX);
m_dynamicHeader.push_back ((stssAtomSize >> 16) & UCHAR_MAX);
m_dynamicHeader.push_back ((stssAtomSize >> 8) & UCHAR_MAX);
m_dynamicHeader.push_back ( stssAtomSize & UCHAR_MAX);
m_dynamicHeader.push_back (0x73); m_dynamicHeader.push_back (0x74);
m_dynamicHeader.push_back (0x73); m_dynamicHeader.push_back (0x73); // stss
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
#endif
m_dynamicHeader.push_back ((m_m4aMdatSize >> 24) & UCHAR_MAX); m_dynamicHeader.push_back ((m_m4aMdatSize >> 24) & UCHAR_MAX);
m_dynamicHeader.push_back ((m_m4aMdatSize >> 16) & UCHAR_MAX); m_dynamicHeader.push_back ((m_m4aMdatSize >> 16) & UCHAR_MAX);
m_dynamicHeader.push_back ((m_m4aMdatSize >> 8) & UCHAR_MAX); m_dynamicHeader.push_back ((m_m4aMdatSize >> 8) & UCHAR_MAX);
@ -248,7 +274,11 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for
const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1); const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1);
const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod); const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod);
const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod); const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod);
#ifndef NO_FIX_FOR_ISSUE_1
const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 4 +
#else
const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 3 + const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 3 +
#endif
(finalChunk == 0 ? 12 : 24) /*stsc*/ + chunkCount * 4 /*stco*/ + 8 /*mdat*/; (finalChunk == 0 ? 12 : 24) /*stsc*/ + chunkCount * 4 /*stco*/ + 8 /*mdat*/;
int bytesWritten = 0; int bytesWritten = 0;

View File

@ -1,5 +1,5 @@
/* basicMP4Writer.h - header file for class with basic MPEG-4 file writing capability /* basicMP4Writer.h - header file for class with basic MPEG-4 file writing capability
* written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* *
* The copyright in this software is being made available under a Modified BSD-Style License * The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -50,9 +50,9 @@ public:
// destructor // destructor
~BasicMP4Writer() { m_dynamicHeader.clear (); m_rndAccOffsets.clear (); } ~BasicMP4Writer() { m_dynamicHeader.clear (); m_rndAccOffsets.clear (); }
// public functions // public functions
int addFrameAU (const uint8_t* byteBuf, const uint32_t byteOffset, const uint32_t byteCount); int addFrameAU (const uint8_t* byteBuf, const uint32_t byteCount);
int finishFile (const unsigned avgBitrate, const unsigned maxBitrate, const uint32_t audioLength, int finishFile (const unsigned avgBitrate, const unsigned maxBitrate, const uint32_t audioLength,
const uint32_t modifTime = 0); const uint32_t modifTime = 0, const uint8_t* ascBuf = nullptr);
unsigned getFrameCount () const { return m_frameCount; } unsigned getFrameCount () const { return m_frameCount; }
int initHeader (const uint32_t audioLength); int initHeader (const uint32_t audioLength);
unsigned open (const int mp4FileHandle, const unsigned sampleRate, const unsigned numChannels, unsigned open (const int mp4FileHandle, const unsigned sampleRate, const unsigned numChannels,

View File

@ -1,5 +1,5 @@
/* exhaleApp.cpp - source file with main() routine for exhale application executable /* exhaleApp.cpp - source file with main() routine for exhale application executable
* written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* *
* The copyright in this software is being made available under a Modified BSD-Style License * The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -11,6 +11,7 @@
#include "exhaleAppPch.h" #include "exhaleAppPch.h"
#include "basicMP4Writer.h" #include "basicMP4Writer.h"
#include "basicWavReader.h" #include "basicWavReader.h"
#include "loudnessEstim.h"
// #define USE_EXHALELIB_DLL (defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64)) // #define USE_EXHALELIB_DLL (defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64))
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
#include "exhaleDecl.h" #include "exhaleDecl.h"
@ -26,6 +27,12 @@
#if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64) #if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64)
#include <windows.h> #include <windows.h>
// constants, experimental macros
#define EA_LOUD_INIT 16399u // bsSamplePeakLevel = 0 & methodValue = 0
#define EA_LOUD_NORM -42.25f // -100 + 57.75 of ISO 23003-4, Table A.48
#define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization
#define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS
#define EXHALE_TEXT_BLUE (FOREGROUND_INTENSITY | FOREGROUND_BLUE | FOREGROUND_GREEN) #define EXHALE_TEXT_BLUE (FOREGROUND_INTENSITY | FOREGROUND_BLUE | FOREGROUND_GREEN)
#define EXHALE_TEXT_PINK (FOREGROUND_INTENSITY | FOREGROUND_BLUE | FOREGROUND_RED) #define EXHALE_TEXT_PINK (FOREGROUND_INTENSITY | FOREGROUND_BLUE | FOREGROUND_RED)
#else // Linux, MacOS, Unix #else // Linux, MacOS, Unix
@ -46,6 +53,7 @@ int main (const int argc, char* argv[])
int32_t* inPcmData = nullptr; // 24-bit WAVE audio input buffer int32_t* inPcmData = nullptr; // 24-bit WAVE audio input buffer
uint8_t* outAuData = nullptr; // access unit (AU) output buffer uint8_t* outAuData = nullptr; // access unit (AU) output buffer
int inFileHandle = -1, outFileHandle = -1; int inFileHandle = -1, outFileHandle = -1;
uint32_t loudStats = EA_LOUD_INIT; // valid empty loudness data
uint16_t i, exePathEnd = 0; uint16_t i, exePathEnd = 0;
uint16_t compatibleExtensionFlag = 0; // 0: disabled, 1: enabled uint16_t compatibleExtensionFlag = 0; // 0: disabled, 1: enabled
uint16_t coreSbrFrameLengthIndex = 1; // 0: 768, 1: 1024 samples uint16_t coreSbrFrameLengthIndex = 1; // 0: 768, 1: 1024 samples
@ -359,8 +367,11 @@ int main (const int argc, char* argv[])
const unsigned sampleRate = wavReader.getSampleRate (); const unsigned sampleRate = wavReader.getSampleRate ();
const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/); const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17); const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17);
uint32_t byteCount = 0, bw = 0, bwMax = 0, br; // for bytes read and bit-rate uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0);
uint32_t br, bwMax = 0; // br will be used to hold bytes read and/or bit-rate
uint32_t headerRes = 0; uint32_t headerRes = 0;
// initialize LoudnessEstimator object
LoudnessEstimator loudnessEst (inPcmData, 24 /*bit*/, sampleRate, numChannels);
// open & prepare ExhaleEncoder object // open & prepare ExhaleEncoder object
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode + ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode +
@ -376,7 +387,7 @@ int main (const int argc, char* argv[])
// init encoder, generate UsacConfig() // init encoder, generate UsacConfig()
memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size memset (outAuData, 0, 108 * sizeof (uint8_t)); // max. allowed ASC + UC size
i = exhaleEnc.initEncoder (outAuData, &bw); // bw holds actual ASC + UC size i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength, if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
indepPeriod, outAuData, bw, time (nullptr) & UINT_MAX, (char) variableCoreBitRateMode)) != 0) indepPeriod, outAuData, bw, time (nullptr) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
@ -433,7 +444,7 @@ int main (const int argc, char* argv[])
} }
if (bwMax < bw) bwMax = bw; if (bwMax < bw) bwMax = bw;
// write first AU, add frame to header // write first AU, add frame to header
if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) if ((mp4Writer.addFrameAU (outAuData, bw) != bw) || loudnessEst.addNewPcmData (frameLength))
{ {
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
exhaleDelete (&exhaleEnc); exhaleDelete (&exhaleEnc);
@ -456,7 +467,7 @@ int main (const int argc, char* argv[])
} }
if (bwMax < bw) bwMax = bw; if (bwMax < bw) bwMax = bw;
// write new AU, add frame to header // write new AU, add frame to header
if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) if ((mp4Writer.addFrameAU (outAuData, bw) != bw) || loudnessEst.addNewPcmData (frameLength))
{ {
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
exhaleDelete (&exhaleEnc); exhaleDelete (&exhaleEnc);
@ -486,7 +497,7 @@ int main (const int argc, char* argv[])
} }
if (bwMax < bw) bwMax = bw; if (bwMax < bw) bwMax = bw;
// write final AU, add frame to header // write final AU, add frame to header
if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) if ((mp4Writer.addFrameAU (outAuData, bw) != bw) || loudnessEst.addNewPcmData (frameLength))
{ {
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
exhaleDelete (&exhaleEnc); exhaleDelete (&exhaleEnc);
@ -513,7 +524,7 @@ int main (const int argc, char* argv[])
} }
if (bwMax < bw) bwMax = bw; if (bwMax < bw) bwMax = bw;
// the flush AU, add frame to header // the flush AU, add frame to header
if (mp4Writer.addFrameAU (outAuData, byteCount, bw) != bw) if (mp4Writer.addFrameAU (outAuData, bw) != bw) // zero, no loudness update
{ {
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
exhaleDelete (&exhaleEnc); exhaleDelete (&exhaleEnc);
@ -555,14 +566,34 @@ int main (const int argc, char* argv[])
bw = _WRITE(outFileHandle, inPcmData, br); bw = _WRITE(outFileHandle, inPcmData, br);
} }
} }
i = 0; // no errors
// loudness and sample peak of program
loudStats = loudnessEst.getStatistics ();
if (numChannels < 7)
{
// quantize for loudnessInfo() reset
const uint32_t qLoud = uint32_t (4.0f * __max (0.0f, (loudStats >> 16) / 512.f + EA_LOUD_NORM) + 0.5f);
const uint32_t qPeak = uint32_t (32.0f * (20.0f - 20.0f * log10 (__max (EA_PEAK_MIN, float (loudStats & USHRT_MAX))) - EA_PEAK_NORM) + 0.5f);
// recreate ASC + UC + loudness data
bw = EA_LOUD_INIT | (qPeak << 18) | (qLoud << 6); // measurementSystem is 3
memset (outAuData, 0, 108 * sizeof (uint8_t)); // max allowed ASC + UC size
i = exhaleEnc.initEncoder (outAuData, &bw); // with finished loudnessInfo()
}
// mean & max. bit-rate of encoded AUs // mean & max. bit-rate of encoded AUs
br = uint32_t (((actualLength >> 1) + 8 * (byteCount + 4 * (int64_t) mp4Writer.getFrameCount ()) * sampleRate) / actualLength); br = uint32_t (((actualLength >> 1) + 8 * (byteCount + 4 * (int64_t) mp4Writer.getFrameCount ()) * sampleRate) / actualLength);
bw = uint32_t (((frameLength >> 1) + 8 * (bwMax + 4u /* maximum AU size + stsz as a bit-rate */) * sampleRate) / frameLength); bw = uint32_t (((frameLength >> 1) + 8 * (bwMax + 4u /* maximum AU size + stsz as a bit-rate */) * sampleRate) / frameLength);
bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), time (nullptr) & UINT_MAX); bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), time (nullptr) & UINT_MAX,
(i == 0) && (numChannels < 7) ? outAuData : nullptr);
// print out collected file statistics
fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f); fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f);
i = 0; // no errors if (numChannels < 7)
{
fprintf_s (stdout, " Input statistics: Mobile loudness %.2f LUFS,\tsample peak level %.2f dBFS\n\n",
(loudStats >> 16) / 512.f - 100.0f, 20.0f * log10 (__max (EA_PEAK_MIN, float (loudStats & USHRT_MAX))) + EA_PEAK_NORM);
}
if (!readStdin && (actualLength != expectLength || bw != headerRes)) if (!readStdin && (actualLength != expectLength || bw != headerRes))
{ {
fprintf_s (stderr, " WARNING: %lld sample frames read but %lld sample frames expected!\n", (long long) actualLength, (long long) expectLength); fprintf_s (stderr, " WARNING: %lld sample frames read but %lld sample frames expected!\n", (long long) actualLength, (long long) expectLength);

View File

@ -157,6 +157,7 @@
<ClInclude Include="basicMP4Writer.h" /> <ClInclude Include="basicMP4Writer.h" />
<ClInclude Include="basicWavReader.h" /> <ClInclude Include="basicWavReader.h" />
<ClInclude Include="exhaleAppPch.h" /> <ClInclude Include="exhaleAppPch.h" />
<ClInclude Include="loudnessEstim.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="basicMP4Writer.cpp" /> <ClCompile Include="basicMP4Writer.cpp" />
@ -168,6 +169,7 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader> <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader> <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
</ClCompile> </ClCompile>
<ClCompile Include="loudnessEstim.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ResourceCompile Include="exhaleApp.rc" /> <ResourceCompile Include="exhaleApp.rc" />

View File

@ -30,6 +30,9 @@
<ClInclude Include="exhaleAppPch.h"> <ClInclude Include="exhaleAppPch.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="loudnessEstim.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="basicMP4Writer.cpp"> <ClCompile Include="basicMP4Writer.cpp">
@ -44,6 +47,9 @@
<ClCompile Include="exhaleAppPch.cpp"> <ClCompile Include="exhaleAppPch.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="loudnessEstim.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ResourceCompile Include="exhaleApp.rc"> <ResourceCompile Include="exhaleApp.rc">

126
src/app/loudnessEstim.cpp Normal file
View File

@ -0,0 +1,126 @@
/* loudnessEstim.cpp - source file for class with ITU-R BS.1770-4 loudness level estimation
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
*
* The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#include "exhaleAppPch.h"
#include "loudnessEstim.h"
// constructor
LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth /*= 24*/,
const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/)
{
m_filterFactor = 224 + (__min (SHRT_MAX, (int) sampleRate - 47616) >> 10);
m_gbHopSize64 = (__min (163519, sampleRate) + 320) / 640; // 100 msec
m_gbNormFactor = (m_gbHopSize64 == 0 ? 0 : 1.0f / (4.0f * m_gbHopSize64));
m_inputChannels = __min (8, numChannels);
m_inputMaxValue = 1 << (__min (24, bitDepth) - 1);
m_inputPcmData = inputPcmData;
reset ();
for (unsigned ch = 0; ch < 8; ch++) m_filterMemoryI[ch] = m_filterMemoryO[ch] = 0;
}
// public functions
uint32_t LoudnessEstimator::addNewPcmData (const unsigned samplesPerChannel)
{
const unsigned frameSize64 = samplesPerChannel >> 6; // in units of 64
const unsigned numSamples64 = 1 << 6; // sub-frame size (64, of course)
const int32_t* chSig = m_inputPcmData;
uint64_t* newQuarterPower = m_powerValue[3];
unsigned ch, f, s;
if ((chSig == nullptr) || (frameSize64 == 0))
{
return 1; // invalid sample pointer or frame size
}
// de-interleave and K-filter incoming audio samples in sub-frame units
for (f = 0; f < frameSize64; f++) // sub-frame loop
{
for (s = 0; s < numSamples64; s++) // sample loop
{
for (ch = 0; ch < m_inputChannels; ch++)
{
// simplified K-filter, including 500-Hz high-pass pre-processing
const int32_t xi = *(chSig++);
const int32_t yi = xi - m_filterMemoryI[ch] + ((128 + m_filterFactor * m_filterMemoryO[ch]) >> 8);
const uint32_t a = abs (xi);
m_filterMemoryI[ch] = xi;
m_filterMemoryO[ch] = yi;
newQuarterPower[ch] += (int64_t) yi * (int64_t) yi;
if (m_inputPeakValue < a) m_inputPeakValue = a; // get peak level
}
} // s
if (++m_gbHopLength64 >= m_gbHopSize64) // completed 100-msec quarter
{
const float thrA = LE_THRESH_ABS * (float) m_inputMaxValue * (float) m_inputMaxValue;
uint64_t zij, zj = 0;
for (ch = 0; ch < m_inputChannels; ch++) // sum 64-sample averages
{
zij = (m_powerValue[0][ch] + m_powerValue[1][ch] + m_powerValue[2][ch] + newQuarterPower[ch] + (1u << 5)) >> 6;
zj += (ch > 2 ? (16u + 45 * zij) >> 5 : zij); // weighting by G_i
}
if (zj * m_gbNormFactor > thrA) // use sqrt (block RMS) if lj > -70
{
if (m_gbRmsValues.size () < INT_MAX) m_gbRmsValues.push_back (uint32_t (sqrt (zj * m_gbNormFactor) + 0.5f));
}
for (ch = 0; ch < m_inputChannels; ch++) // set up new gating block
{
m_powerValue[0][ch] = m_powerValue[1][ch];
m_powerValue[1][ch] = m_powerValue[2][ch];
m_powerValue[2][ch] = newQuarterPower[ch];
newQuarterPower[ch] = 0;
}
m_gbHopLength64 = 0;
}
}
return 0; // no error
}
uint32_t LoudnessEstimator::getStatistics (const bool includeWarmUp /*= false*/)
{
const uint32_t numWarmUpBlocks = (includeWarmUp ? 0 : 3);
const uint32_t numGatingBlocks = __max (numWarmUpBlocks, m_gbRmsValues.size ()) - numWarmUpBlocks;
const uint16_t maxValueDivisor = __max (1u, m_inputMaxValue >> 16);
const uint16_t peakValue16Bits = __min (USHRT_MAX, (m_inputPeakValue + (maxValueDivisor >> 1)) / maxValueDivisor);
uint32_t i, numBlocks = 0;
float thrR, zg;
if (numGatingBlocks == 0) return peakValue16Bits; // no loudness stats
const float normFac = 1.0f / numGatingBlocks; // prevents loop overflow
// calculate arithmetic average of blocks satisfying absolute threshold
for (zg = 0.0f, i = numWarmUpBlocks; i < m_gbRmsValues.size (); i++)
{
zg += normFac * (float) m_gbRmsValues.at (i) * (float) m_gbRmsValues.at (i);
}
if (zg < LE_THRESH_ABS) return peakValue16Bits; // quiet loudness stats
thrR = LE_THRESH_REL * zg; // find blocks satisfying relative threshold
for (zg = 0.0f, i = numWarmUpBlocks; i < m_gbRmsValues.size (); i++)
{
const float p = (float) m_gbRmsValues.at (i) * (float) m_gbRmsValues.at (i);
if (p > thrR) { zg += normFac * p; numBlocks++; }
}
if (zg < LE_THRESH_ABS) return peakValue16Bits; // quiet loudness stats
zg = LE_LUFS_OFFSET + 10.0f * log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue));
i = __max (0, int32_t ((zg + 100.0f) * 512.0f + 0.5f)); // map to uint
return (__min (USHRT_MAX, i) << 16) | peakValue16Bits; // L = i/512-100
}

54
src/app/loudnessEstim.h Normal file
View File

@ -0,0 +1,54 @@
/* loudnessEstim.h - header file for class with ITU-R BS.1770-4 loudness level estimation
* written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
*
* The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
* party rights, including patent rights. No such rights are granted under this License.
*
* Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
*/
#ifndef _LOUDNESS_ESTIM_H_
#define _LOUDNESS_ESTIM_H_
#include "exhaleAppPch.h"
// constants, experimental macros
#define LE_THRESH_ABS (15.0f / 134217728.0f) // absolute threshold for -70 LUFS
#define LE_THRESH_REL 0.1f // 2nd stage, relative threshold 10 dB below L
#define LE_LUFS_OFFSET -0.484375f // to return -3.01 LUFS for 997-Hz 0-dBFS sine
// ITU-R loudness estimator class
class LoudnessEstimator
{
private:
// member variables
int32_t m_filterMemoryI[8]; // channel-wise preceding K-weighting filter input
int32_t m_filterMemoryO[8]; // channel-wise previous K-weighting filter output
uint64_t m_powerValue[4][8]; // channel-wise power in each gating block quarter
float m_gbNormFactor; // 64-sample normalization factor, 1/(4*m_gbHopSize64)
uint8_t m_filterFactor; // sampling rate dependent K-weighting filter constant
uint8_t m_gbHopLength64; // number of 64-sample units in gating block quarter
uint8_t m_gbHopSize64; // hop-size between gating blocks, 25% of block length
uint8_t m_inputChannels;
uint32_t m_inputMaxValue;
uint32_t m_inputPeakValue;
int32_t* m_inputPcmData;
std::vector <uint32_t> m_gbRmsValues; // sqrt of power average per gating block
public:
// constructor
LoudnessEstimator (int32_t* const inputPcmData, const unsigned bitDepth = 24,
const unsigned sampleRate = 44100, const unsigned numChannels = 2);
// destructor
~LoudnessEstimator () { reset (); }
// public functions
uint32_t addNewPcmData (const unsigned samplesPerChannel);
uint32_t getStatistics (const bool includeWarmUp = false);
void reset () { m_gbHopLength64 = m_inputPeakValue = 0; m_gbRmsValues.clear (); memset (m_powerValue, 0, sizeof (m_powerValue)); }
}; // LoudnessEstimator
#endif // _LOUDNESS_ESTIM_H_

View File

@ -1,5 +1,5 @@
/* bitStreamWriter.cpp - source file for class with basic bit-stream writing capability /* bitStreamWriter.cpp - source file for class with basic bit-stream writing capability
* written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* *
* The copyright in this software is being made available under a Modified BSD-Style License * The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -375,7 +375,7 @@ unsigned BitStreamWriter::writeStereoCoreToolInfo (const CoreCoderData& elData,
// public functions // public functions
unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, const bool shortFrameLength, unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex, const bool shortFrameLength,
const uint8_t chConfigurationIndex, const uint8_t numElements, const uint8_t chConfigurationIndex, const uint8_t numElements,
const ELEM_TYPE* const elementType, const bool configExtensionPresent, const ELEM_TYPE* const elementType, const uint32_t loudnessInfo,
#if !RESTRICT_TO_AAC #if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
#endif #endif
@ -429,25 +429,28 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
} }
} // for el } // for el
m_auBitStream.write (configExtensionPresent ? 1 : 0, 1); // usacConfigExten... m_auBitStream.write (loudnessInfo > 0 ? 1 : 0, 1); // ..ConfigExtensionPresent
if (configExtensionPresent) // 23003-4: loudnessInfo if (loudnessInfo > 0) // ISO 23003-4: loudnessInfo()
{ {
const unsigned methodDefinition = (loudnessInfo >> 14) & 0xF;
const unsigned methodValueBits = (methodDefinition == 7 ? 5 : (methodDefinition == 8 ? 2 : 8));
m_auBitStream.write (0, 2); // numConfigExtensions m_auBitStream.write (0, 2); // numConfigExtensions
m_auBitStream.write (ID_EXT_LOUDNESS_INFO, 4); m_auBitStream.write (ID_EXT_LOUDNESS_INFO, 4);
m_auBitStream.write (8, 4); // usacConfigExtLength m_auBitStream.write (methodValueBits < 3 ? 7 : 8, 4); // usacConfigExtLength
m_auBitStream.write (1, 12);// loudnessInfoCount=1 m_auBitStream.write (1, 12);// loudnessInfoCount=1
m_auBitStream.write (1, 14); // peakLevelPresent=1 m_auBitStream.write (1, 14);// samplePeakLevel..=1
m_auBitStream.write (0, 12); // bsSamplePeakLevel m_auBitStream.write ((loudnessInfo >> 18) & 0xFFF, 12); // bsSamplePeakLevel
m_auBitStream.write (1, 5); // measurementCount=1 m_auBitStream.write (1, 5); // measurementCount=1
m_auBitStream.write (methodDefinition, 4);
m_auBitStream.write ((loudnessInfo >> 6) & ((1 << methodValueBits) - 1), methodValueBits);
m_auBitStream.write ((loudnessInfo >> 2) & 0xF, 4); // measurementSystem
m_auBitStream.write ((loudnessInfo & 0x3), 2); // reliability, 3 = accurate
m_auBitStream.write (1, 4); // methodDefinition=1 m_auBitStream.write (0, 1); // loudnessInfoSetExtPresent=0, payload padding
m_auBitStream.write (0, 8); // methodValue storage bitCount += (methodValueBits < 3 ? 66 : 74);
m_auBitStream.write (0, 4); // measurementSystem=0 if (methodValueBits >= 3) m_auBitStream.write (0, 10 - methodValueBits);
m_auBitStream.write (3, 2); // reliability=3, good
m_auBitStream.write (0, 1); // ...SetExtPresent=0
bitCount += 72;
} }
bitCount += (8 - m_auBitStream.heldBitCount) & 7; bitCount += (8 - m_auBitStream.heldBitCount) & 7;

View File

@ -1,5 +1,5 @@
/* bitStreamWriter.h - header file for class with basic bit-stream writing capability /* bitStreamWriter.h - header file for class with basic bit-stream writing capability
* written by C. R. Helmrich, last modified in 2019 - see License.htm for legal notices * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices
* *
* The copyright in this software is being made available under a Modified BSD-Style License * The copyright in this software is being made available under a Modified BSD-Style License
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
@ -56,7 +56,7 @@ public:
// public functions // public functions
unsigned createAudioConfig (const char samplingFrequencyIndex, const bool shortFrameLength, unsigned createAudioConfig (const char samplingFrequencyIndex, const bool shortFrameLength,
const uint8_t chConfigurationIndex, const uint8_t numElements, const uint8_t chConfigurationIndex, const uint8_t numElements,
const ELEM_TYPE* const elementType, const bool configExtensionPresent, const ELEM_TYPE* const elementType, const uint32_t loudnessInfo,
#if !RESTRICT_TO_AAC #if !RESTRICT_TO_AAC
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling, const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
#endif #endif

View File

@ -1158,7 +1158,11 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels); m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels);
m_tempAnalyzer.getTransientLocation (m_tranLocNext, nChannels); m_tempAnalyzer.getTransientLocation (m_tranLocNext, nChannels);
#ifndef NO_FIX_FOR_ISSUE_1
m_indepFlag = (((m_frameCount++) % m_indepPeriod) <= 1); // configure usacIndependencyFlag
#else
m_indepFlag = (((m_frameCount++) % m_indepPeriod) == 0); // configure usacIndependencyFlag m_indepFlag = (((m_frameCount++) % m_indepPeriod) == 0); // configure usacIndependencyFlag
#endif
for (unsigned el = 0; el < m_numElements; el++) // element loop for (unsigned el = 0; el < m_numElements; el++) // element loop
{ {
@ -1534,6 +1538,32 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
} }
if (errorValue > 0) return errorValue; if (errorValue > 0) return errorValue;
// get window band table index
errorValue = (unsigned) m_frequencyIdx; // for temporary storage
#if RESTRICT_TO_AAC
m_swbTableIdx = freqIdxToSwbTableIdxAAC[errorValue];
#else
m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[errorValue] : freqIdxToSwbTableIdxAAC[errorValue]);
#endif
errorValue = 0;
if (m_elementData[0] != nullptr) // initEncoder was called before, don't reallocate memory
{
if (audioConfigBuffer != nullptr) // recreate the UsacConfig()
{
errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0,
#if !RESTRICT_TO_AAC
m_timeWarping, m_noiseFilling,
#endif
audioConfigBuffer);
if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes
errorValue = (errorValue == 0 ? 1 : 0);
}
return errorValue;
}
// allocate all helper structs // allocate all helper structs
for (unsigned el = 0; el < m_numElements; el++) // element loop for (unsigned el = 0; el < m_numElements; el++) // element loop
{ {
@ -1571,14 +1601,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
if (errorValue > 0) return errorValue; if (errorValue > 0) return errorValue;
// initialize coder class memory // initialize coder class memory
errorValue = (unsigned) m_frequencyIdx; // for temporary storage m_tempIntBuf = m_timeSignals[0];
#if RESTRICT_TO_AAC
m_swbTableIdx = freqIdxToSwbTableIdxAAC[errorValue];
#else
m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[errorValue] : freqIdxToSwbTableIdxAAC[errorValue]);
#endif
m_tempIntBuf = m_timeSignals[0];
errorValue = 0;
#if EC_TRELLIS_OPT_CODING #if EC_TRELLIS_OPT_CODING
if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 || if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode) > 0 ||
#else #else
@ -1593,7 +1616,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout
{ {
errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements, errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements,
elementTypeConfig[chConf], false /*usacConfigExtensionPresent=0*/, elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0,
#if !RESTRICT_TO_AAC #if !RESTRICT_TO_AAC
m_timeWarping, m_noiseFilling, m_timeWarping, m_noiseFilling,
#endif #endif