exe, time, upsampler

This commit is contained in:
Christian R. Helmrich 2020-06-21 00:00:00 +02:00
parent cd4ebeb1f2
commit fd32557d3e
6 changed files with 136 additions and 12 deletions

View File

@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
similar to the 3-clause BSD license but modified to address specific
aspects dictated by the nature and the output of this application.
The license text and release notes for the current version 1.0.4 can
The license text and release notes for the current version 1.0.5 can
be found in the `include` subdirectory of the exhale distribution.

View File

@ -85,7 +85,7 @@
<ul>
<li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.6</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0 &#x2013; 5.1 multichannel coding, no version plan</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0&#x2013;5.1 multichannel coding, no version plan</h3></li>
<li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li>
</ul>
<h3><br></h3>

View File

@ -49,9 +49,79 @@
#define EA_LOUD_NORM -42.25f // -100 + 57.75 of ISO 23003-4, Table A.48
#define EA_PEAK_NORM -96.33f // 20 * log10(2^-16), 16-bit normalization
#define EA_PEAK_MIN 0.262f // 20 * log10() + EA_PEAK_NORM = -108 dbFS
#define ENABLE_RESAMPLING 1 // 1: automatic input up- and downsampling
#define IGNORE_WAV_LENGTH 0 // 1: ignore input size indicators (nasty)
#define XHE_AAC_LOW_DELAY 0 // 1: allow encoding with 768 frame length
#if ENABLE_RESAMPLING
static const int16_t usfc2x[32] = { // 2x upsampling filter coefficients
0, -13785, 8142, -5681, 4281, -3367, 2716, -2225, 1840, -1530, 1275, -1062, 883, -732,
604, -495, 402, -325, 260, -205, 160, -124, 94, -70, 51, -36, 25, -16, 11, -6, 3, -1
};
static bool eaInitUpsampler2x (int32_t** upsampleBuffer, const uint16_t bitRateMode, const uint16_t sampleRate,
const uint16_t frameSize, const uint16_t numChannels)
{
const uint16_t inLength = frameSize >> 1;
const uint16_t chLength = inLength + (32 << 1);
const bool useUpsampler = (frameSize > (32 << 1) && bitRateMode * 4000 > sampleRate);
if (useUpsampler)
{
if ((*upsampleBuffer = (int32_t*) malloc (chLength * numChannels * sizeof (int32_t))) == nullptr) return false;
for (uint16_t ch = 0; ch < numChannels; ch++)
{
memset (*upsampleBuffer + inLength + chLength * ch, 0, (chLength - inLength) * sizeof (int32_t));
}
}
return useUpsampler;
}
static void eaApplyUpsampler2x (int32_t* const pcmBuffer, int32_t* const upsampleBuffer,
const uint16_t frameSize, const uint16_t numChannels, const bool firstFrame = false)
{
const uint16_t inLength = (frameSize >> 1) + (firstFrame ? 32 : 0);
const uint16_t chLength = (frameSize >> 1) + (32 << 1);
uint16_t ch;
for (ch = 0; ch < numChannels; ch++) // step 1: add deinterleaved input samples to resampling buffer
{
int32_t* chPcmBuf = &pcmBuffer[ch];
int32_t* chUpsBuf = &upsampleBuffer[chLength * ch];
# if 0
if (firstFrame) // construct leading sample values via extrapolation
{
for (int8_t i = 0; i < 32; i++) chUpsBuf[i] = (*chPcmBuf * i + (32 >> 1)) >> 5;
}
else
# endif
memcpy (chUpsBuf, &chUpsBuf[inLength], (chLength - inLength) * sizeof (int32_t)); // update memory
chUpsBuf += chLength - inLength;
for (uint16_t i = inLength; i > 0; i--, chPcmBuf += numChannels, chUpsBuf++)
{
*chUpsBuf = *chPcmBuf; // deinterleave, store in resampling buffer
}
}
for (ch = 0; ch < numChannels; ch++) // step 2: upsample, reinterleave, and save to PCM input buffer
{
/*in*/int32_t* chPcmBuf = &pcmBuffer[ch];
const int32_t* chUpsBuf = &upsampleBuffer[chLength * ch + 32];
for (uint16_t i = (frameSize >> 1); i > 0; i--, chPcmBuf += numChannels, chUpsBuf++)
{
int64_t r = (chUpsBuf[0] + (int64_t) chUpsBuf[1]) * 41681;
for (int16_t c = 32 - 1; c > 0; c--) r += (chUpsBuf[-c] + (int64_t) chUpsBuf[c + 1]) * usfc2x[c];
*chPcmBuf = *chUpsBuf; chPcmBuf += numChannels; // 1-to-1 mapping
*chPcmBuf = int32_t ((r - SHRT_MIN) >> 16); // interpolated sample
}
}
}
#endif // ENABLE_RESAMPLING
// main routine
#ifdef EXHALE_APP_WCHAR
# ifdef __MINGW32__
@ -67,6 +137,9 @@ int main (const int argc, char* argv[])
const bool readStdin = (argc == 3);
BasicWavReader wavReader;
int32_t* inPcmData = nullptr; // 24-bit WAVE audio input buffer
#if ENABLE_RESAMPLING
int32_t* inPcmRsmp = nullptr; // temporary buffer for resampler
#endif
uint8_t* outAuData = nullptr; // access unit (AU) output buffer
int inFileHandle = -1, outFileHandle = -1;
uint32_t loudStats = EA_LOUD_INIT; // valid empty loudness data
@ -381,6 +454,10 @@ int main (const int argc, char* argv[])
goto mainFinish; // ask for resampling
}
if (wavReader.getSampleRate () > 32000 && variableCoreBitRateMode == 1)
{
fprintf_s (stderr, " WARNING: The input sampling rate should be 32 kHz or less for preset mode %d!\n\n", variableCoreBitRateMode);
}
if (outPathEnd == 0) // name has no path
{
@ -430,8 +507,13 @@ int main (const int argc, char* argv[])
const unsigned numChannels = wavReader.getNumChannels ();
const unsigned inFrameSize = frameLength * sizeof (int32_t);
const unsigned inSampDepth = wavReader.getBitDepth ();
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t (numChannels * inSampDepth >> 3);
#if ENABLE_RESAMPLING
const bool enableUpsampler = eaInitUpsampler2x (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : frameLength); // upsampler look-ahead
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t ((numChannels * inSampDepth) >> (enableUpsampler ? 4 : 3));
#else
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t ((numChannels * inSampDepth) >> 3);
#endif
// allocate dynamic frame memory buffers
inPcmData = (int32_t*) malloc (inFrameSize * numChannels); // max frame in size
outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size
@ -443,7 +525,11 @@ int main (const int argc, char* argv[])
goto mainFinish; // memory alloc error
}
#if ENABLE_RESAMPLING
if (wavReader.read (inPcmData, firstLength) != firstLength) // full first frame
#else
if (wavReader.read (inPcmData, frameLength) != frameLength) // full first frame
#endif
{
fprintf_s (stderr, " ERROR while trying to encode input audio data! The audio stream is too short!\n\n");
i = 1024; // return value
@ -452,7 +538,11 @@ int main (const int argc, char* argv[])
}
else // start coding loop, show progress
{
#if ENABLE_RESAMPLING
const unsigned sampleRate = wavReader.getSampleRate () << (enableUpsampler ? 1 : 0);
#else
const unsigned sampleRate = wavReader.getSampleRate ();
#endif
const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17);
uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0);
@ -462,10 +552,13 @@ int main (const int argc, char* argv[])
LoudnessEstimator loudnessEst (inPcmData, 24 /*bit*/, sampleRate, numChannels);
// open & prepare ExhaleEncoder object
#if USE_EXHALELIB_DLL
ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode +
ExhaleEncAPI& exhaleEnc = *exhaleCreate (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode
#else
ExhaleEncoder exhaleEnc (inPcmData, outAuData, sampleRate, numChannels, frameLength, indepPeriod, variableCoreBitRateMode
#endif
#if ENABLE_RESAMPLING
+ (enableUpsampler && (variableCoreBitRateMode < 9) ? 1 : 0)
#endif
#if !RESTRICT_TO_AAC
, true /*noise filling*/, compatibleExtensionFlag > 0
#endif
@ -477,7 +570,7 @@ int main (const int argc, char* argv[])
i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size
if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
indepPeriod, outAuData, bw, time (nullptr) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
{
fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i);
i <<= 2; // return value
@ -519,6 +612,10 @@ int main (const int argc, char* argv[])
#endif
i = 1; // for progress bar
#if ENABLE_RESAMPLING
// upsample initial frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels, true);
#endif
// initial frame, encode look-ahead AU
if ((bw = exhaleEnc.encodeLookahead ()) < 3)
{
@ -540,8 +637,16 @@ int main (const int argc, char* argv[])
}
byteCount += bw;
#if ENABLE_RESAMPLING
while (wavReader.read (inPcmData, frameLength >> (enableUpsampler ? 1 : 0)) > 0) // read a new audio frame
#else
while (wavReader.read (inPcmData, frameLength) > 0) // read a new audio frame
#endif
{
#if ENABLE_RESAMPLING
// upsample audio frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// frame coding loop, encode next AU
if ((bw = exhaleEnc.encodeFrame ()) < 3)
{
@ -572,6 +677,10 @@ int main (const int argc, char* argv[])
}
} // frame loop
#if ENABLE_RESAMPLING
// upsample the last frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// end of coding loop, encode final AU
if ((bw = exhaleEnc.encodeFrame ()) < 3)
{
@ -593,12 +702,18 @@ int main (const int argc, char* argv[])
}
byteCount += bw;
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t (numChannels * inSampDepth >> 3);
#if ENABLE_RESAMPLING
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t ((numChannels * inSampDepth) >> (enableUpsampler ? 4 : 3));
#else
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t ((numChannels * inSampDepth) >> 3);
#endif
if (((actualLength + startLength) % frameLength) > 0) // flush trailing audio
{
memset (inPcmData, 0, inFrameSize * numChannels);
#if ENABLE_RESAMPLING
// upsample flush frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// flush remaining audio into new AU
if ((bw = exhaleEnc.encodeFrame ()) < 3)
{
@ -671,7 +786,7 @@ int main (const int argc, char* argv[])
// mean & max. bit-rate of encoded AUs
br = uint32_t (((actualLength >> 1) + 8 * (byteCount + 4 * (int64_t) mp4Writer.getFrameCount ()) * sampleRate) / actualLength);
bw = uint32_t (((frameLength >> 1) + 8 * (bwMax + 4u /* maximum AU size + stsz as a bit-rate */) * sampleRate) / frameLength);
bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), time (nullptr) & UINT_MAX,
bw = mp4Writer.finishFile (br, bw, uint32_t (__min (UINT_MAX - startLength, actualLength)), (time (nullptr) + 2082844800) & UINT_MAX,
(i == 0) && (numChannels < 7) ? outAuData : nullptr);
// print out collected file statistics
fprintf_s (stdout, " Done, actual average %.1f kbit/s\n\n", (float) br * 0.001f);
@ -701,6 +816,13 @@ mainFinish:
free ((void*) inPcmData);
inPcmData = nullptr;
}
#if ENABLE_RESAMPLING
if (inPcmRsmp != nullptr)
{
free ((void*) inPcmRsmp);
inPcmRsmp = nullptr;
}
#endif
if (outAuData != nullptr)
{
free ((void*) outAuData);

View File

@ -73,11 +73,13 @@
<LinkIncremental>false</LinkIncremental>
<IntDir>$(SolutionDir)build\$(PlatformToolset)\$(Platform)\$(Configuration)\</IntDir>
<OutDir>$(SolutionDir)bin\$(PlatformToolset)\$(Platform)\$(Configuration)\</OutDir>
<TargetName>exhale</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(SolutionDir)build\$(PlatformToolset)\$(Platform)\$(Configuration)\</IntDir>
<OutDir>$(SolutionDir)bin\$(PlatformToolset)\$(Platform)\$(Configuration)\</OutDir>
<TargetName>exhale</TargetName>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>

View File

@ -916,7 +916,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
{
SfbGroupData& grpData = coreConfig.groupingData[ch];
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
const bool saveBitRate = (meanSpecFlat[ci] > (SCHAR_MAX >> 1) && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
const bool saveBitRate = (meanSpecFlat[ci] > SCHAR_MAX && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
const uint8_t maxSfbCh = grpData.sfbsPerGroup;
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4);

View File

@ -520,7 +520,7 @@ unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
{
const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
#if EC_TRELLIS_OPT_CODING
const uint8_t numTrellisStates = (samplingRate < 28800 ? 8 - samplingRate / 16000 : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
const uint8_t numTrellisStates = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) - __min (2, (bitRateMode + 2) >> 2); // states per SFB
const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC()
#endif