low-energy tuning

This commit is contained in:
Christian R. Helmrich 2020-04-23 00:00:56 +02:00
parent 660cffb257
commit 8bc32a02cd
4 changed files with 32 additions and 11 deletions

View File

@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
similar to the 3-clause BSD license but modified to address specific
aspects dictated by the nature and the output of this application.
The license text and release notes for the current version 1.0.2 can
The license text and release notes for the current version 1.0.3 can
be found in the `include` subdirectory of the exhale distribution.

View File

@ -25,9 +25,13 @@
<td valign="top">
<h1><br><span class="pink">exhale</span> - <span class="pink">e</span>codis e<span class="pink">x</span>tended <span class="pink">h</span>igh-efficiency <span class="pink">a</span>nd <span class="pink">l</span>ow-complexity <span class="pink">e</span>ncoder<br><span class="gray"><sup><br>Software Release Notes, Version History, Known Issues, Upcoming Feature Roadmap</sup></span><br><br></h1>
<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.2</b> (official pub&shy;lic minor release) from March 2020. Please check <a href="http://www.ecodis.de">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.3</b> (official pub&shy;lic minor release) from April 2020. Please check <a href="http://www.ecodis.de">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
<h3><br><b>Chronological Version History</b></h3>
<h3>&nbsp; &nbsp;Version <b>1.0.2 <span class="gray">&nbsp;Mar. 2020, this release</span></b></h3>
<h3>&nbsp; &nbsp;Version <b>1.0.3 <span class="gray">&nbsp;Apr. 2020, this release</span></b></h3>
<ul>
<li><h3>extended basic joint-stereo coding functionality for mid/high rates, minor bugfixes</h3></li>
</ul>
<h3>&nbsp; &nbsp;Version <b>1.0.2 <span class="gray">&nbsp;Mar. 2020</span></b></h3>
<ul>
<li><h3>added basic low/mid-rate joint-stereo coding functionality, bugfixes, and speedups</h3></li>
<li><h3>exhaleApp: support for input sampling rates of up to 48000 Hz with CVBR mode 2</h3></li>
@ -63,13 +67,13 @@
<h3>&nbsp; &nbsp;If you are in need of an additional library or application feature <b>not</b> mentioned below, please contact ecodis or a contributor with a request, and we will see what we can do.</h3>
<ul>
<li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
<li><h3>exhaleLib: finalized integration of higher-rate joint-channel coding, version 1.0.3</h3></li>
<li><h3>exhaleLib: quality tuning and bug fixing for low-rate stereo coding, version 1.0.3</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0 &#x2013; 5.1 multichannel coding, version 1.0.4</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.4</h3></li>
<li><h3>exhaleLib: quality tuning and bug fixing for low-rate stereo coding, version 1.0.4</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0 &#x2013; 5.1 multichannel coding, version 1.0.5</h3></li>
<li><h3>exhaleLib: speed-ups and further quality tuning for critical signals, version 1.0.5.</h3></li>
</ul>
<h3><br></h3>
<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.2, Mar. 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.3, Apr. 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
</td>
<td valign="top" colspan="2">

View File

@ -693,7 +693,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3); // undercoding reduction
const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 3 ? nChannels >> 1 : 0);
const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - (m_bitRateMode > 2 ? nChannels >> 1 : 0);
const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
@ -857,7 +857,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
uint8_t* grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr];
uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr];
#if SA_IMPROVED_REAL_ABS
uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
#endif
uint32_t b, grpRmsMin = INT_MAX; // min. RMS value, used for overcoding reduction
// undercoding reduction for case where large number of coefs is quantized to zero
@ -873,6 +875,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
#endif
if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
#ifndef NO_DTX_MODE
if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS))
#endif
if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
{
s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS)
@ -887,6 +892,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]);
const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp :
getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9);
#endif
#ifndef NO_DTX_MODE
if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS))
#endif
if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
{
@ -901,7 +909,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
const uint64_t rateFac = mSfmFac * s * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1
const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * (scaleBr - (grpStereoData[b] > 0 ? 1 : 0)) * rateFac) >> 24;
const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * scaleBr * rateFac) >> 24;
// scale step-sizes according to VBR mode & derive scale factors from step-sizes
grpStepSizes[b] = uint32_t (__max (BA_EPS, __min (UINT_MAX, sScaled)));
@ -919,6 +927,15 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
{
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
{
#ifndef NO_DTX_MODE
if (meanSpecFlat[ci] <= (SCHAR_MAX >> 1)) // noise-like low-energy "DTX" frame
{
for (s = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT ? 24 : m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++)
{
if (grpData.sfbRmsValues[s + m_numSwbShort * gr] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
}
}
#endif
memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
}
grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame;
@ -1061,7 +1078,7 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
const uint8_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint8_t maxSfbShort = (samplingRate < 37566 ? 14 /*32 kHz*/ : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 5 + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5);
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 4 : maxSfbLong - 6) + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5);
const unsigned targetBitCount25 = ((60000 + 20000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
unsigned b = grpData.sfbsPerGroup - 1;

View File

@ -453,7 +453,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
if (alphaRe == 0)
{
if (realOnlyCalc) // update previous magnitude value
if (realOnlyCalc) // update previous res. MDCT value
{
sfbMdctR += sfbWidth - 1;
prevResi = (grpSData[sfbEv] > 0 ? *sfbMdctR : int32_t (((int64_t) sfbMdctD[sfbWidth - 1] +