rate ctrl, 32 kHz fix

This commit is contained in:
Christian R. Helmrich 2020-08-18 01:00:00 +02:00
parent 051900fe7c
commit b033287115
9 changed files with 48 additions and 36 deletions

View File

@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
endif()
project(exhale VERSION 1.0.6 LANGUAGES CXX)
project(exhale VERSION 1.0.7 LANGUAGES CXX)
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release

View File

@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is
similar to the 3-clause BSD license but modified to address specific
aspects dictated by the nature and the output of this application.
The license text and release notes for the current version 1.0.6 can
The license text and release notes for the current version 1.0.7 can
be found in the `include` subdirectory of the exhale distribution.

View File

@ -25,9 +25,14 @@
<td valign="top">
<h1><br><span class="pink">exhale</span> - <span class="pink">e</span>codis e<span class="pink">x</span>tended <span class="pink">h</span>igh-efficiency <span class="pink">a</span>nd <span class="pink">l</span>ow-complexity <span class="pink">e</span>ncoder<br><span class="gray"><sup><br>Software Release Notes, Version History, Known Issues, Upcoming Feature Roadmap</sup></span><br><br></h1>
<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.6</b> (official pub&shy;lic minor release) from July 30, 2020. Please check <a href="http://www.ecodis.de/audio.htm#mpeg">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.7</b> (official pub&shy;lic minor release) from August 2020. Please check <a href="http://www.ecodis.de/audio.htm#mpeg">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
<h3><br><b>Chronological Version History</b></h3>
<h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020, this release</span></b></h3>
<h3>&nbsp; &nbsp;Version <b>1.0.7 <span class="gray">&nbsp;Aug. 2020, this release</span></b></h3>
<ul>
<li><h3>minor bugfixes in bit-rate control and higher-rate coding at 32 kHz sampling rate</h3></li>
<li><h3>exhaleApp: write complete MP4 &laquo;stss&raquo; data for improved compatibility (issue 13)</h3></li>
</ul>
<h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020</span></b></h3>
<ul>
<li><h3>bugfixes, improved quality on some transient signals, better decoder compatibility</h3></li>
<li><h3>exhaleApp: support for Extensible WAVE format, write MP4 &laquo;prol&raquo; data (issue 10)</h3></li>
@ -85,19 +90,19 @@
<h3><br><b>Known Issues with This Release</b></h3>
<h3>&nbsp; &nbsp;If you notice an issue with this release <b>not</b> mentioned below, please contact ecodis or a contributor with the details (configuration, input file) needed to reproduce the issue.</h3>
<ul>
<li><h3>exhaleLib: Coding of stereo or multichannel input occasionally leads to slightly in&shy;creased bit-rates because the predictive joint-channel coding provided by ISO/IEC <a href="https://www.iso.org/standard/76385.html">23003-3</a> has not been fully implemented. See the functionality roadmap below.</h3></li>
<li><h3>exhaleApp: At the end of each encoding run, the MPEG-4 file header with seeking information is written to the beginning of the output file to maximize compatibility in audio streaming applications. Since such a file operation is not supported with pipe based output, encoding to stdout instead of files is currently not supported.</h3></li>
<li><h3>exhaleApp: Only basic WAVE input file reading functionality has been implemen&shy;ted. Specifically, 8-bit WAVE input is assumed to contain an even number of audio samples, and ITU-R <a href="https://www.itu.int/rec/R-REC-BS.2088/en">BS.2088</a> (RF64, Broadcast WAVE) files are not supported.</h3></li>
</ul>
<h3><br><b>Roadmap of Upcoming Features</b></h3>
<h3>&nbsp; &nbsp;If you are in need of an additional library or application feature <b>not</b> mentioned below, please contact ecodis or a contributor with a request, and we will see what we can do.</h3>
<ul>
<li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.7</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.1.x</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0&#x2013;5.1 multichannel coding, no version plan</h3></li>
<li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li>
</ul>
<h3><br></h3>
<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.6, July 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.7, August 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
</td>
<td valign="top" colspan="2">

View File

@ -15,5 +15,5 @@
# define EXHALELIB_VERSION_MINOR "0"
#endif
#ifndef EXHALELIB_VERSION_BUGFIX
# define EXHALELIB_VERSION_BUGFIX ".6" // "RC" or ".0", ".1", ...
# define EXHALELIB_VERSION_BUGFIX ".7" // "RC" or ".0", ".1", ...
#endif

View File

@ -13,7 +13,7 @@
0 ICON "exhaleApp.ico"
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1,0,6
FILEVERSION 1,0,7
BEGIN
BLOCK "StringFileInfo"
BEGIN

View File

@ -432,9 +432,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
const SfbGroupData& grpData = *groupData[ch];
const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
const bool eightShorts = (grpData.numWindowGroups != 1);
#if 1
const bool lowRateTuning = (samplingRate >= 27713) && (sfm[ch] <= (SCHAR_MAX >> 1));
#endif
const uint32_t* rms = grpData.sfbRmsValues;
uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
@ -453,7 +451,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
uint64_t s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t));
#if 1
if ((m_rateIndex == 0) && lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
{
uint32_t numRedBands = nSamplesInFrame; // final result lies between 1/4 and 1/2
@ -471,7 +469,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
grpStepSizes[b] = __max (grpStepSizes[b], grpRms[b] >= (UINT_MAX >> 1) ? UINT_MAX : (grpRms[b] + 1) << 1);
}
}
#endif
// undercoding reduction for case where large number of coefs is quantized to zero
for (b = 0; b < maxSfbInCh; b++)
{
@ -520,14 +518,15 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
{
grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15);
if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11);
#if 1
if ((m_rateIndex == 0) && lowRateTuning)
if ((m_rateIndex == 0) && lowRateTuning) // clip near-zero SNRs to a minimum SNR
{
if ((grpStepSizes[b] > grpRms[b]) && ((grpStepSizes[b] >> 1) <= grpRms[b])) grpStepSizes[b] = grpRms[b];
const uint32_t rms = uint32_t ((grpRms[b] * (8192u - (uint64_t) sfm[ch] * sfm[ch]) + (1u << 12)) >> 13);
if ((grpStepSizes[b] > grpRms[b]) && ((grpStepSizes[b] >> 1) <= rms)) grpStepSizes[b] = grpRms[b];
}
#endif
}
} // for gr
}
} // for ch
return 0; // no error

View File

@ -838,7 +838,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const TnsData& tnsData1 = coreConfig.tnsData[1];
uint8_t realOnlyStartSfb = (eightShorts0 ? m_numSwbShort : m_numSwbLong) - __max (tnsData0.filterLength[0], tnsData1.filterLength[0]);
if (coreConfig.commonWindow && (coreConfig.stereoMode == 0) && (m_perCorrHCurr[el] > SCHAR_MAX || m_perCorrLCurr[el] > (UCHAR_MAX * 3) / 4))
if (coreConfig.commonWindow && (coreConfig.stereoMode == 0) && (m_perCorrHCurr[el] > SCHAR_MAX || m_perCorrLCurr[el] > (UCHAR_MAX * 5) / 8))
{
coreConfig.stereoMode = 1;
}
@ -929,8 +929,11 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
(coreConfig.stereoConfig & 2) > 0, realOnlyStartSfb,
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * ci],
&sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]);
if (errorValue == 2) // signal M/S with complex prediction
if (errorValue >= 2) // signal M/S with complex prediction
{
#if SP_MDST_PRED
coreConfig.stereoConfig |= errorValue - 2; // cplx coefs
#endif
coreConfig.stereoMode += 2; errorValue = 0;
}
m_specAnaCurr[ci ] = (m_specAnaCurr[ci ] & (UINT_MAX - 65504)) | peakIndexSte;
@ -952,7 +955,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
#if !RESTRICT_TO_AAC
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
#endif
const uint16_t mSfmFac = UCHAR_MAX - (((16u + (m_bitRateMode >> 1)) * meanSpecFlat[ci]) >> 5);
const uint16_t mSfmSqr = (m_bitRateMode < 2 && samplingRate >= 27713 ? ((uint16_t) meanSpecFlat[ci] * meanSpecFlat[ci]) >> m_bitRateMode : 0);
const uint16_t mSfmFac = 256u - (((32u + m_bitRateMode) * ((uint32_t) meanSpecFlat[ci] << 4) - mSfmSqr + (1u << 9)) >> 10);
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -1713,7 +1717,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
if ((winSeq0 != initialWs0) && (winSeq0 == EIGHT_SHORT))
{
#if !RESTRICT_TO_AAC
if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2))
if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2) &&
(abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
{
winSeq0 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
}
@ -1724,7 +1729,8 @@ unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects o
if ((winSeq1 != initialWs1) && (winSeq1 == EIGHT_SHORT))
{
#if !RESTRICT_TO_AAC
if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2))
if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2) &&
(abs (m_specFlatPrev[ci - 1] - (int) m_specFlatPrev[ci - 2]) > UCHAR_MAX / 4))
{
winSeq1 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap
}

View File

@ -77,9 +77,9 @@ static inline void setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
StereoProcessor::StereoProcessor ()
{
#if SP_OPT_ALPHA_QUANT
memset (m_randomIntMemRe, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
memset (m_randomIntMemRe, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
# if SP_MDST_PRED
memset (m_randomIntMemIm, 0, (MAX_NUM_SWB_LONG / 2) * sizeof (int32_t));
memset (m_randomIntMemIm, 0, (1+MAX_NUM_SWB_LONG/2) * sizeof (int32_t));
# endif
#endif
memset (m_stereoCorrValue, 0, (1024 >> SA_BW_SHIFT) * sizeof (uint8_t));
@ -132,7 +132,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
#if SP_OPT_ALPHA_QUANT
if ((bitRateMode >= 4) && eightShorts) // reset quantizer dither memory in short transform
{
for (uint16_t sfb = 0; sfb < MAX_NUM_SWB_LONG / 2; sfb++)
for (uint16_t sfb = 0; sfb <= MAX_NUM_SWB_LONG / 2; sfb++)
{
m_randomIntMemRe[sfb] = (1 << 30);
# if SP_MDST_PRED
@ -351,7 +351,7 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
}
sfbTempVar *= sfbTempVar; // account for residual RMS reduction due to prediction
#if SP_MDST_PRED
if (bitRateMode > 1) sfbTempVar += alphaLimit * alphaLimit; // including alpha_im
if (bitRateMode > 0) sfbTempVar += alphaLimit * alphaLimit; // including alpha_im
#endif
for (b = sfbIsOdd; b >= 0; b--)
{
@ -501,15 +501,17 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
if (numSfbPredSte == 0) // discard prediction coefficients and stay with legacy M/S stereo
{
if (applyPredSte)
for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
{
uint8_t* const grpSData = &sfbStereoData[numSwbFrame * gr];
for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
for (uint16_t gr = 0; gr < grp.numWindowGroups; gr++)
{
if (grpSData[sfb] > 0) grpSData[sfb] = 16;
uint8_t* const grpSData = &sfbStereoData[numSwbFrame * gr];
for (uint16_t sfb = 0; sfb < maxSfbSte; sfb++)
{
if (grpSData[sfb] > 0) grpSData[sfb] = 16;
}
if (numSwbFrame > maxSfbSte) memset (&grpSData[maxSfbSte], (useFullFrameMS ? 16 : 0), (numSwbFrame - maxSfbSte) * sizeof (uint8_t));
}
if (numSwbFrame > maxSfbSte) memset (&grpSData[maxSfbSte], (useFullFrameMS ? 16 : 0), (numSwbFrame - maxSfbSte) * sizeof (uint8_t));
}
}
else // at least one "significant" prediction band, apply prediction and update RMS values
@ -634,5 +636,5 @@ unsigned StereoProcessor::applyPredJointStereo (int32_t* const mdctSpectrum1, in
numSfbPredSte = 2;
}
return (numSfbPredSte); // no error
return numSfbPredSte; // no error
}

View File

@ -39,9 +39,9 @@ private:
#endif
#if SP_OPT_ALPHA_QUANT
std::minstd_rand m_randomInt32;
int32_t m_randomIntMemRe[MAX_NUM_SWB_LONG / 2];
int32_t m_randomIntMemRe[1+MAX_NUM_SWB_LONG/2];
# if SP_MDST_PRED
int32_t m_randomIntMemIm[MAX_NUM_SWB_LONG / 2];
int32_t m_randomIntMemIm[1+MAX_NUM_SWB_LONG/2];
# endif
#endif
uint8_t m_stereoCorrValue[1024 >> SA_BW_SHIFT]; // one value for every 32 spectral coefficients