exe, time, upsampler

This commit is contained in:
Christian R. Helmrich
2020-06-21 00:00:00 +02:00
parent cd4ebeb1f2
commit fd32557d3e
6 changed files with 136 additions and 12 deletions

View File

@@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
similar to the 3-clause BSD license but modified to address specific similar to the 3-clause BSD license but modified to address specific
aspects dictated by the nature and the output of this application. aspects dictated by the nature and the output of this application.
The license text and release notes for the current version 1.0.4 can The license text and release notes for the current version 1.0.5 can
be found in the `include` subdirectory of the exhale distribution. be found in the `include` subdirectory of the exhale distribution.

View File

@@ -85,7 +85,7 @@
<ul> <ul>
<li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li> <li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.6</h3></li> <li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.6</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0 &#x2013; 5.1 multichannel coding, no version plan</h3></li> <li><h3>exhaleLib: finalization of support for 3.0&#x2013;5.1 multichannel coding, no version plan</h3></li>
<li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li> <li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li>
</ul> </ul>
<h3><br></h3> <h3><br></h3>

View File

@@ -49,9 +49,79 @@
#define EA_LOUD_NORM -42.25f // -100 + 57.75 of ISO 23003-4, Table A.48 #define EA_LOUD_NORM -42.25f // -100 + 57.75 of ISO 23003-4, Table A.48
#define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization #define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization
#define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS #define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS
#define ENABLE_RESAMPLING 1 // 1: automatic input up- and downsampling
#define IGNORE_WAV_LENGTH 0 // 1: ignore input size indicators (nasty) #define IGNORE_WAV_LENGTH 0 // 1: ignore input size indicators (nasty)
#define XHE_AAC_LOW_DELAY 0 // 1: allow encoding with 768 frame length #define XHE_AAC_LOW_DELAY 0 // 1: allow encoding with 768 frame length
#if ENABLE_RESAMPLING
static const int16_t usfc2x[32] = { // 2x upsampling filter coefficients
0, -13785, 8142, -5681, 4281, -3367, 2716, -2225, 1840, -1530, 1275, -1062, 883, -732,
604, -495, 402, -325, 260, -205, 160, -124, 94, -70, 51, -36, 25, -16, 11, -6, 3, -1
};
static bool eaInitUpsampler2x (int32_t** upsampleBuffer, const uint16_t bitRateMode, const uint16_t sampleRate,
const uint16_t frameSize, const uint16_t numChannels)
{
const uint16_t inLength = frameSize >> 1;
const uint16_t chLength = inLength + (32 << 1);
const bool useUpsampler = (frameSize > (32 << 1) && bitRateMode * 4000 > sampleRate);
if (useUpsampler)
{
if ((*upsampleBuffer = (int32_t*) malloc (chLength * numChannels * sizeof (int32_t))) == nullptr) return false;
for (uint16_t ch = 0; ch < numChannels; ch++)
{
memset (*upsampleBuffer + inLength + chLength * ch, 0, (chLength - inLength) * sizeof (int32_t));
}
}
return useUpsampler;
}
static void eaApplyUpsampler2x (int32_t* const pcmBuffer, int32_t* const upsampleBuffer,
const uint16_t frameSize, const uint16_t numChannels, const bool firstFrame = false)
{
const uint16_t inLength = (frameSize >> 1) + (firstFrame ? 32 : 0);
const uint16_t chLength = (frameSize >> 1) + (32 << 1);
uint16_t ch;
for (ch = 0; ch < numChannels; ch++) // step 1: add deinterleaved input samples to resampling buffer
{
int32_t* chPcmBuf = &pcmBuffer[ch];
int32_t* chUpsBuf = &upsampleBuffer[chLength * ch];
# if 0
if (firstFrame) // construct leading sample values via extrapolation
{
for (int8_t i = 0; i < 32; i++) chUpsBuf[i] = (*chPcmBuf * i + (32 >> 1)) >> 5;
}
else
# endif
memcpy (chUpsBuf, &chUpsBuf[inLength], (chLength - inLength) * sizeof (int32_t)); // update memory
chUpsBuf += chLength - inLength;
for (uint16_t i = inLength; i > 0; i--, chPcmBuf += numChannels, chUpsBuf++)
{
*chUpsBuf = *chPcmBuf; // deinterleave, store in resampling buffer
}
}
for (ch = 0; ch < numChannels; ch++) // step 2: upsample, reinterleave, and save to PCM input buffer
{
/*in*/int32_t* chPcmBuf = &pcmBuffer[ch];
const int32_t* chUpsBuf = &upsampleBuffer[chLength * ch + 32];
for (uint16_t i = (frameSize >> 1); i > 0; i--, chPcmBuf += numChannels, chUpsBuf++)
{
int64_t r = (chUpsBuf[0] + (int64_t) chUpsBuf[1]) * 41681;
for (int16_t c = 32 - 1; c > 0; c--) r += (chUpsBuf[-c] + (int64_t) chUpsBuf[c + 1]) * usfc2x[c];
*chPcmBuf = *chUpsBuf; chPcmBuf += numChannels; // 1-to-1 mapping
*chPcmBuf = int32_t ((r - SHRT_MIN) >> 16); // interpolated sample
}
}
}
#endif // ENABLE_RESAMPLING
// main routine // main routine
#ifdef EXHALE_APP_WCHAR #ifdef EXHALE_APP_WCHAR
# ifdef __MINGW32__ # ifdef __MINGW32__
@@ -67,6 +137,9 @@ int main (const int argc, char* argv[])
const bool readStdin = (argc == 3); const bool readStdin = (argc == 3);
BasicWavReader wavReader; BasicWavReader wavReader;
int32_t* inPcmData = nullptr; // 24-bit WAVE audio input buffer int32_t* inPcmData = nullptr; // 24-bit WAVE audio input buffer
#if ENABLE_RESAMPLING
int32_t* inPcmRsmp = nullptr; // temporary buffer for resampler
#endif
uint8_t* outAuData = nullptr; // access unit (AU) output buffer uint8_t* outAuData = nullptr; // access unit (AU) output buffer
int inFileHandle = -1, outFileHandle = -1; int inFileHandle = -1, outFileHandle = -1;
uint32_t loudStats = EA_LOUD_INIT; // valid empty loudness data uint32_t loudStats = EA_LOUD_INIT; // valid empty loudness data
@@ -381,6 +454,10 @@ int main (const int argc, char* argv[])
goto mainFinish; // ask for resampling goto mainFinish; // ask for resampling
} }
if (wavReader.getSampleRate () > 32000 && variableCoreBitRateMode == 1)
{
fprintf_s (stderr, " WARNING: The input sampling rate should be 32 kHz or less for preset mode %d!\n\n", variableCoreBitRateMode);
}
if (outPathEnd == 0) // name has no path if (outPathEnd == 0) // name has no path
{ {
@@ -430,8 +507,13 @@ int main (const int argc, char* argv[])
const unsigned numChannels = wavReader.getNumChannels (); const unsigned numChannels = wavReader.getNumChannels ();
const unsigned inFrameSize = frameLength * sizeof (int32_t); const unsigned inFrameSize = frameLength * sizeof (int32_t);
const unsigned inSampDepth = wavReader.getBitDepth (); const unsigned inSampDepth = wavReader.getBitDepth ();
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t (numChannels * inSampDepth >> 3); #if ENABLE_RESAMPLING
const bool enableUpsampler = eaInitUpsampler2x (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : frameLength); // upsampler look-ahead
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t ((numChannels * inSampDepth) >> (enableUpsampler ? 4 : 3));
#else
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t ((numChannels * inSampDepth) >> 3);
#endif
// allocate dynamic frame memory buffers // allocate dynamic frame memory buffers
inPcmData = (int32_t*) malloc (inFrameSize * numChannels); // max frame in size inPcmData = (int32_t*) malloc (inFrameSize * numChannels); // max frame in size
outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size
@@ -443,7 +525,11 @@ int main (const int argc, char* argv[])
goto mainFinish; // memory alloc error goto mainFinish; // memory alloc error
} }
#if ENABLE_RESAMPLING
if (wavReader.read (inPcmData, firstLength) != firstLength) // full first frame
#else
if (wavReader.read (inPcmData, frameLength) != frameLength) // full first frame if (wavReader.read (inPcmData, frameLength) != frameLength) // full first frame
#endif
{ {
fprintf_s (stderr, " ERROR while trying to encode input audio data! The audio stream is too short!\n\n"); fprintf_s (stderr, " ERROR while trying to encode input audio data! The audio stream is too short!\n\n");
i = 1024; // return value i = 1024; // return value
@@ -452,7 +538,11 @@ int main (const int argc, char* argv[])
} }
else // start coding loop, show progress else // start coding loop, show progress
{ {
#if ENABLE_RESAMPLING
const unsigned sampleRate = wavReader.getSampleRate () << (enableUpsampler ? 1 : 0);
#else
const unsigned sampleRate = wavReader.getSampleRate (); const unsigned sampleRate = wavReader.getSampleRate ();
#endif
const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/); const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17); const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17);
uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0); uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0);
@@ -462,10 +552,13 @@ int main (const int argc, char* argv[])
LoudnessEstimator loudnessEst (inPcmData, 24 /*bit*/, sampleRate, numChannels); LoudnessEstimator loudnessEst (inPcmData, 24 /*bit*/, sampleRate, numChannels);
// open & prepare ExhaleEncoder object // open & prepare ExhaleEncoder object
#if USE_EXHALELIB_DLL #if USE_EXHALELIB_DLL
ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode + ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode
#else #else
ExhaleEncoder exhaleEnc (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode ExhaleEncoder exhaleEnc (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode
#endif #endif
#if ENABLE_RESAMPLING
+ (enableUpsampler && (variableCoreBitRateMode < 9) ? 1 : 0)
#endif
#if !RESTRICT_TO_AAC #if !RESTRICT_TO_AAC
, true /*noise filling*/, compatibleExtensionFlag > 0 , true /*noise filling*/, compatibleExtensionFlag > 0
#endif #endif
@@ -477,7 +570,7 @@ int main (const int argc, char* argv[])
i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength, if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
indepPeriod, outAuData, bw, time (nullptr) & UINT_MAX, (char) variableCoreBitRateMode)) != 0) indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
{ {
fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i); fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i);
i <<= 2; // return value i <<= 2; // return value
@@ -519,6 +612,10 @@ int main (const int argc, char* argv[])
#endif #endif
i = 1; // for progress bar i = 1; // for progress bar
#if ENABLE_RESAMPLING
// upsample initial frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels, true);
#endif
// initial frame, encode look-ahead AU // initial frame, encode look-ahead AU
if ((bw = exhaleEnc.encodeLookahead ()) < 3) if ((bw = exhaleEnc.encodeLookahead ()) < 3)
{ {
@@ -540,8 +637,16 @@ int main (const int argc, char* argv[])
} }
byteCount += bw; byteCount += bw;
#if ENABLE_RESAMPLING
while (wavReader.read (inPcmData, frameLength >> (enableUpsampler ? 1 : 0)) > 0) // read a new audio frame
#else
while (wavReader.read (inPcmData, frameLength) > 0) // read a new audio frame while (wavReader.read (inPcmData, frameLength) > 0) // read a new audio frame
#endif
{ {
#if ENABLE_RESAMPLING
// upsample audio frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// frame coding loop, encode next AU // frame coding loop, encode next AU
if ((bw = exhaleEnc.encodeFrame ()) < 3) if ((bw = exhaleEnc.encodeFrame ()) < 3)
{ {
@@ -572,6 +677,10 @@ int main (const int argc, char* argv[])
} }
} // frame loop } // frame loop
#if ENABLE_RESAMPLING
// upsample the last frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// end of coding loop, encode final AU // end of coding loop, encode final AU
if ((bw = exhaleEnc.encodeFrame ()) < 3) if ((bw = exhaleEnc.encodeFrame ()) < 3)
{ {
@@ -593,12 +702,18 @@ int main (const int argc, char* argv[])
} }
byteCount += bw; byteCount += bw;
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t (numChannels * inSampDepth >> 3); #if ENABLE_RESAMPLING
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t ((numChannels * inSampDepth) >> (enableUpsampler ? 4 : 3));
#else
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t ((numChannels * inSampDepth) >> 3);
#endif
if (((actualLength + startLength) % frameLength) > 0) // flush trailing audio if (((actualLength + startLength) % frameLength) > 0) // flush trailing audio
{ {
memset (inPcmData, 0, inFrameSize * numChannels); memset (inPcmData, 0, inFrameSize * numChannels);
#if ENABLE_RESAMPLING
// upsample flush frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// flush remaining audio into new AU // flush remaining audio into new AU
if ((bw = exhaleEnc.encodeFrame ()) < 3) if ((bw = exhaleEnc.encodeFrame ()) < 3)
{ {
@@ -671,7 +786,7 @@ int main (const int argc, char* argv[])
// mean & max. bit-rate of encoded AUs // mean & max. bit-rate of encoded AUs
br = uint32_t (((actualLength >> 1) + 8 * (byteCount + 4 * (int64_t) mp4Writer.getFrameCount ()) * sampleRate) / actualLength); br = uint32_t (((actualLength >> 1) + 8 * (byteCount + 4 * (int64_t) mp4Writer.getFrameCount ()) * sampleRate) / actualLength);
bw = uint32_t (((frameLength >> 1) + 8 * (bwMax + 4u /* maximum AU size + stsz as a bit-rate */) * sampleRate) / frameLength); bw = uint32_t (((frameLength >> 1) + 8 * (bwMax + 4u /* maximum AU size + stsz as a bit-rate */) * sampleRate) / frameLength);
bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), time (nullptr) & UINT_MAX, bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), (time (nullptr) + 2082844800) & UINT_MAX,
(i == 0) && (numChannels < 7) ? outAuData : nullptr); (i == 0) && (numChannels < 7) ? outAuData : nullptr);
// print out collected file statistics // print out collected file statistics
fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f); fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f);
@@ -701,6 +816,13 @@ mainFinish:
free ((void*) inPcmData); free ((void*) inPcmData);
inPcmData = nullptr; inPcmData = nullptr;
} }
#if ENABLE_RESAMPLING
if (inPcmRsmp != nullptr)
{
free ((void*) inPcmRsmp);
inPcmRsmp = nullptr;
}
#endif
if (outAuData != nullptr) if (outAuData != nullptr)
{ {
free ((void*) outAuData); free ((void*) outAuData);

View File

@@ -73,11 +73,13 @@
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<IntDir>$(SolutionDir)build\$(PlatformToolset)\$(Platform)\$(Configuration)\</IntDir> <IntDir>$(SolutionDir)build\$(PlatformToolset)\$(Platform)\$(Configuration)\</IntDir>
<OutDir>$(SolutionDir)bin\$(PlatformToolset)\$(Platform)\$(Configuration)\</OutDir> <OutDir>$(SolutionDir)bin\$(PlatformToolset)\$(Platform)\$(Configuration)\</OutDir>
<TargetName>exhale</TargetName>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<IntDir>$(SolutionDir)build\$(PlatformToolset)\$(Platform)\$(Configuration)\</IntDir> <IntDir>$(SolutionDir)build\$(PlatformToolset)\$(Platform)\$(Configuration)\</IntDir>
<OutDir>$(SolutionDir)bin\$(PlatformToolset)\$(Platform)\$(Configuration)\</OutDir> <OutDir>$(SolutionDir)bin\$(PlatformToolset)\$(Platform)\$(Configuration)\</OutDir>
<TargetName>exhale</TargetName>
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile> <ClCompile>

View File

@@ -916,7 +916,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
{ {
SfbGroupData& grpData = coreConfig.groupingData[ch]; SfbGroupData& grpData = coreConfig.groupingData[ch];
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT); const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
const bool saveBitRate = (meanSpecFlat[ci] > (SCHAR_MAX >> 1) && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000); const bool saveBitRate = (meanSpecFlat[ci] > SCHAR_MAX && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
const uint8_t maxSfbCh = grpData.sfbsPerGroup; const uint8_t maxSfbCh = grpData.sfbsPerGroup;
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong); const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4); const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4);

View File

@@ -520,7 +520,7 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
{ {
const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1; const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
#if EC_TRELLIS_OPT_CODING #if EC_TRELLIS_OPT_CODING
const uint8_t numTrellisStates = (samplingRate < 28800 ? 8 - samplingRate / 16000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB const uint8_t numTrellisStates = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
const uint8_t numSquaredStates = numTrellisStates * numTrellisStates; const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC() const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC()
#endif #endif