fixes, finish tuning

This commit is contained in:
Christian R. Helmrich 2020-07-25 14:00:14 +02:00
parent 86ba7b8af6
commit f8ad0b34d7
5 changed files with 53 additions and 24 deletions

View File

@ -25,9 +25,16 @@
<td valign="top">
<h1><br><span class="pink">exhale</span> - <span class="pink">e</span>codis e<span class="pink">x</span>tended <span class="pink">h</span>igh-efficiency <span class="pink">a</span>nd <span class="pink">l</span>ow-complexity <span class="pink">e</span>ncoder<br><span class="gray"><sup><br>Software Release Notes, Version History, Known Issues, Upcoming Feature Roadmap</sup></span><br><br></h1>
<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.5</b> (official pub&shy;lic minor release) from June 30, 2020. Please check <a href="http://www.ecodis.de">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
<h3>&nbsp; &nbsp;The version of this distribution of the &laquo;exhale&raquo; software release is <b>1.0.6</b> (official pub&shy;lic minor release) from July 30, 2020. Please check <a href="http://www.ecodis.de/audio.htm#mpeg">www.ecodis.de</a> regularly for new versions of this software. A summary of each version up to this release, a list of known issues with this release, and a roadmap of additional functionality are provided below.</h3>
<h3><br><b>Chronological Version History</b></h3>
<h3>&nbsp; &nbsp;Version <b>1.0.5 <span class="gray">&nbsp;June 2020, this release</span></b></h3>
<h3>&nbsp; &nbsp;Version <b>1.0.6 <span class="gray">&nbsp;July 2020, this release</span></b></h3>
<ul>
<li><h3>bugfixes, improved quality on some transient signals, better decoder compatibility</h3></li>
<li><h3>exhaleApp: support for Extensible WAVE format, write MP4 &laquo;prol&raquo; data (issue 10)</h3></li>
<li><h3>exhaleApp: automatic downsampling of 48-kHz input to 32 kHz for CVBR mode 1</h3></li>
<li><h3>exhaleLib: fine-tuning of psychoacoustic model for difficult transient input signals</h3></li>
</ul>
<h3>&nbsp; &nbsp;Version <b>1.0.5 <span class="gray">&nbsp;June 2020</span></b></h3>
<ul>
<li><h3>slightly reduced bit-rates with lower modes, better compatibility when using stdin</h3></li>
<li><h3>exhaleApp: support for Unicode text on Windows&trade;, 44100 Hz with CVBR mode 1</h3></li>
@ -79,18 +86,18 @@
<h3>&nbsp; &nbsp;If you notice an issue with this release <b>not</b> mentioned below, please contact ecodis or a contributor with the details (configuration, input file) needed to reproduce the issue.</h3>
<ul>
<li><h3>exhaleLib: Coding of stereo or multichannel input occasionally leads to slightly in&shy;creased bit-rates because the predictive joint-channel coding provided by ISO/IEC <a href="https://www.iso.org/standard/76385.html">23003-3</a> has not been fully implemented. See the functionality roadmap below.</h3></li>
<li><h3>exhaleApp: Only basic WAVE input file reading functionality has been implemen&shy;ted. Specifically, 8-bit WAVE input is assumed to contain an even number of audio samples, and ITU-R <a href="https://www.itu.int/rec/R-REC-BS.2088/en">BS.2088</a> (RF64) or Extensible WAVE files are not supported.</h3></li>
<li><h3>exhaleApp: Only basic WAVE input file reading functionality has been implemen&shy;ted. Specifically, 8-bit WAVE input is assumed to contain an even number of audio samples, and ITU-R <a href="https://www.itu.int/rec/R-REC-BS.2088/en">BS.2088</a> (RF64, Broadcast WAVE) files are not supported.</h3></li>
</ul>
<h3><br><b>Roadmap of Upcoming Features</b></h3>
<h3>&nbsp; &nbsp;If you are in need of an additional library or application feature <b>not</b> mentioned below, please contact ecodis or a contributor with a request, and we will see what we can do.</h3>
<ul>
<li><h3>support for coding with a core coder frame length of 768 samples, no version plan</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.6</h3></li>
<li><h3>exhaleLib: completed integration of predictive joint-channel coding, version 1.0.7</h3></li>
<li><h3>exhaleLib: finalization of support for 3.0&#x2013;5.1 multichannel coding, no version plan</h3></li>
<li><h3>exhaleLib: speed-ups and further quality tuning for difficult signals, as necessary.</h3></li>
</ul>
<h3><br></h3>
<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.5, June 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
<h4><span class="gray">Written by C. R. Helmrich for exhale 1.0.6, July 2020. Available at www.ecodis.de/exhale/release.htm.</span><br><br></h4>
</td>
<td valign="top" colspan="2">

View File

@ -11,6 +11,7 @@
#include "exhaleAppPch.h"
#include "basicMP4Writer.h"
#include "version.h"
#if 0 // DEBUG
static const uint8_t muLawHeader[44] = {
@ -317,7 +318,24 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
m_dynamicHeader.push_back (0x61); m_dynamicHeader.push_back (0x74); // mdat
for (uint32_t pNdx = 0; pNdx < headerPaddingLength; pNdx++)
{
m_dynamicHeader.push_back (0x00); // add padding bytes. TODO: ver string?
if (pNdx == 0) // add padding byte with library version
{
const char ver[] = EXHALELIB_VERSION_MAJOR "." EXHALELIB_VERSION_MINOR EXHALELIB_VERSION_BUGFIX;
const int verInt = (ver[0] - 0x30) * 100 + (ver[2] - 0x30) * 10 + (ver[4] - 0x30);
m_dynamicHeader.push_back (__max (0, __min (UCHAR_MAX, verInt)));
}
else if (pNdx == 1) // add 8-bit cyclic redundancy check
{
uint8_t crc8 = m_dynamicHeader.back(); // Baicheva '98
for (uint16_t i = 8; i > 0; i--) if (crc8 & 0x80) crc8 = (crc8 << 1) ^ 0x2F; else crc8 <<= 1;
m_dynamicHeader.push_back (crc8); // add padding CRC-8
}
else
{
m_dynamicHeader.push_back (0x00); // add padding bytes
}
}
_SEEK (m_fileHandle, 0, 0 /*SEEK_SET*/); // back to start
@ -346,7 +364,9 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for
return _WRITE (m_fileHandle, m_staticHeader, 44);
#else
const bool flushFrameUsed = ((audioLength + m_pregapLength) % m_frameLength) > 0;
/* NOTE: the following condition is, as far as I can tell, correct, but some decoders with DRC processing
may decode too few samples with it. Hence, I disabled it. See also corresponding NOTE in exhaleApp.cpp */
const bool flushFrameUsed = true; // ((audioLength + m_pregapLength) % m_frameLength) > 0;
const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1);
const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod);
const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod);

View File

@ -51,14 +51,15 @@ static void jndPowerLawAndPeakSmoothing (uint32_t* const stepSizes, const unsig
stepSizes[0] = __min (stepSizeM1, stepSizes[0]); // `- becomes --
for (/*b*/; b < nStepSizes; b++)
{
const uint64_t oneMinusB = 128 - b;
const uint32_t stepSizeB = jndModel (stepSizes[b], avgStepSize, expTimes512, mulTimes512);
if ((stepSizeM3 <= stepSizeM2) && (stepSizeM3 <= stepSizeM1) && (stepSizeB <= stepSizeM2) && (stepSizeB <= stepSizeM1))
{
const uint32_t maxM3M0 = __max (stepSizeM3, stepSizeB); // smoothen local spectral peak of _´`- shape
stepSizes[b - 2] = __min (maxM3M0, stepSizes[b - 2]); // _-`-
stepSizes[b - 1] = __min (maxM3M0, stepSizes[b - 1]); // _---
stepSizes[b - 2] = uint32_t ((b * (uint64_t) stepSizes[b - 2] + oneMinusB * __min (maxM3M0, stepSizes[b - 2]) + 64) >> 7); // _-`-
stepSizes[b - 1] = uint32_t ((b * (uint64_t) stepSizes[b - 1] + oneMinusB * __min (maxM3M0, stepSizes[b - 1]) + 64) >> 7); // _---
}
stepSizeM3 = stepSizeM2;
stepSizeM2 = stepSizeM1;
@ -175,9 +176,9 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
// equal-loudness weighting based on data from: K. Kurakata, T. Mizunami, and K. Matsushita, "Percentiles
// of Normal Hearing-Threshold Distribution Under Free-Field Listening Conditions in Numerical Form," Ac.
// Sci. Tech, vol. 26, no. 5, pp. 447-449, Jan. 2005, https://www.researchgate.net/publication/239433096.
const unsigned HF/*idx*/= ((123456 - samplingRate) >> 11) + (samplingRate <= 34150 ? 2 : 0); // start SFB
const unsigned HF/*idx*/= ((123456 - samplingRate) >> 11) + (samplingRate < 37566 ? 2 : 0); // start SFB
const unsigned LF/*idx*/= 9;
const unsigned MF/*idx*/= (samplingRate < 28800 ? HF : __min (HF, 30u));
const unsigned MF/*idx*/= (samplingRate < 27713 ? HF : __min (HF, 30u));
const unsigned msShift = (samplingRate + 36736) >> 15; // TODO: 768 smp
const unsigned msOffset = 1 << (msShift - 1);
uint32_t nMeans = 0, sumMeans = 0;
@ -291,7 +292,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
maskingSlope = (stepSizes[b - 1] + msOffset) >> msShift;
stepSizes[b] = __max (rms[b], maskingSlope + BA_EPS);
}
if ((samplingRate >= 28800) && (samplingRate <= 64000))
if ((samplingRate >= 27713) && (samplingRate < 75132))
{
for (/*b*/; b < __min (HF, maxSfbInCh); b++) // compensate high-frequency slopes for linear SFB width
{
@ -346,7 +347,7 @@ unsigned BitAllocator::initSfbStepSizes (const SfbGroupData* const groupData[USA
jndPowerLawAndPeakSmoothing (stepSizes, maxSfbInCh, m_avgStepSize[ch], m_avgSpecFlat[ch], tnsDisabled ? m_avgTempFlat[ch] : 0);
if ((samplingRate >= 28800) && (samplingRate <= 64000))
if ((samplingRate >= 27713) && (samplingRate < 75132))
{
elw = 36; // 36/32 = 9/8
for (b = HF; b < maxSfbInCh; b++) // undo above additional high-frequency equal-loudness attenuation
@ -457,11 +458,11 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
if (redWeight > 0 && !eightShorts && sfbWidth > 12) // further reduce step-sizes of transient bands
if (redWeight > 0 && !eightShorts && sfbWidth > (samplingRate >= 18783 ? 8 : 12)) // transient SFBs
{
const uint32_t gains = m_tnsPredictor->calcParCorCoeffs (&mdctSpec[ch][grpOff[b]], sfbWidth, MAX_PREDICTION_ORDER, tempCoeffs) >> 24;
m_tempSfbValue[b] = UCHAR_MAX - uint8_t ((512u + gains * gains * redWeight) >> (sfbWidth > 16 ? 10 : 11));
m_tempSfbValue[b] = UCHAR_MAX - uint8_t ((512u + gains * gains * redWeight) >> (10 + (sfbWidth > 16 ? 0 : (20 - sfbWidth) >> 2)));
if ((b >= 2) && (m_tempSfbValue[b - 1] < m_tempSfbValue[b]) && (m_tempSfbValue[b - 1] < m_tempSfbValue[b - 2]))
{
m_tempSfbValue[b - 1] = __min (m_tempSfbValue[b], m_tempSfbValue[b - 2]); // remove local peaks
@ -477,7 +478,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
}
}
if ((samplingRate > 27712) && (b < maxSfbL16k) && !eightShorts) // zeroed HF coefs
if ((samplingRate >= 27713) && (b < maxSfbL16k) && !eightShorts) // zeroed HF data
{
const uint32_t rmsComp = (grpSte != nullptr && grpSte[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]);
const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);

View File

@ -717,7 +717,7 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u
sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample);
}
}
if (sumAbsOrg * 9 <= sumAbsTns * 8) break; // band SNR was reduced by more than 1 dB
if (sumAbsOrg * 17 <= sumAbsTns * 16) break; // band SNR reduced by more than 0.5 dB
}
m_specAnaCurr[channelIndex] = (m_specAnaCurr[channelIndex] & (UINT_MAX - 31)) | (b + 1);
} // if order > 0
@ -786,7 +786,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
const uint32_t maxSfbLong = (samplingRate < 37566 ? MAX_NUM_SWB_LONG : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 24000 ? 32 : 34) - __min (3, m_bitRateMode) : 37) - (nChannels >> 1);
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3, m_bitRateMode)
: (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
const uint64_t scaleBr = (m_bitRateMode == 0 ? __min (32, 3 + (samplingRate >> 10) + (samplingRate >> 13) - (nChannels >> 1))
: scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1));
uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
@ -947,12 +948,12 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
{
SfbGroupData& grpData = coreConfig.groupingData[ch];
const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT);
const bool saveBitRate = (meanSpecFlat[ci] > SCHAR_MAX && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
const bool saveBitRate = (meanSpecFlat[ci] > (UCHAR_MAX * 3) / 4 && samplingRate >= 32000 + (unsigned) m_bitRateMode * 12000);
const uint8_t maxSfbCh = grpData.sfbsPerGroup;
#if !RESTRICT_TO_AAC
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
#endif
const uint16_t mSfmFac = UCHAR_MAX - ((9u * meanSpecFlat[ci]) >> 4);
const uint16_t mSfmFac = UCHAR_MAX - (((16u + (m_bitRateMode >> 1)) * meanSpecFlat[ci]) >> 5);
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
@ -988,7 +989,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
#ifndef NO_DTX_MODE
const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);
if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000) && eightShorts)
if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
{
for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13))));
}
@ -1002,7 +1003,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
#ifndef NO_DTX_MODE
const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];
if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000))
if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713))
{
const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr];
uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr];
@ -1043,7 +1044,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
}
}
#ifndef NO_DTX_MODE
else if (m_noiseFilling[el] && (m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000))
else if (m_noiseFilling[el] && (m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713))
{
for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS;
}

View File

@ -69,7 +69,7 @@ static inline void setStepSizesMS (const uint32_t* const rmsSfbL, const uint32
const double rat = __min (1.0, grpStepSizes1[idx] / (sfbRmsL * 2.0)) * __min (1.0, grpStepSizes2[idx] / (sfbRmsR * 2.0)) * sfbFacLR;
grpStepSizes1[idx] = grpStepSizes2[idx] = uint32_t (__max (SP_EPS, (min > rat * sfbMaxMS ? sqrt (rat * sfbMaxMS * min) :
__min (1.0, rat) * sfbMaxMS)) + 0.5);
__max (1.0/2048.0, __min (1.0, rat)) * sfbMaxMS)) + 0.5);
}
}