mirror of https://gitlab.com/ecodis/exhale.git
clean DTX and RC
This commit is contained in:
parent
b11042a0f0
commit
057bb87e64
|
@ -13,7 +13,7 @@
|
|||
|
||||
0 ICON "exhaleApp.ico"
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 1,1,6
|
||||
FILEVERSION 1,1,6,1
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
|
|
|
@ -101,6 +101,15 @@ void BitAllocator::getChAverageTempFlat (uint8_t meanTempFlatInCh[USAC_MAX_NUM_C
|
|||
memcpy (meanTempFlatInCh, m_avgTempFlat, nChannels * sizeof (uint8_t));
|
||||
}
|
||||
|
||||
uint16_t BitAllocator::getRateCtrlFac (const int32_t rateRatio, const unsigned samplingRate, const uint32_t specFlatness)
|
||||
{
|
||||
const uint32_t brRatio = __max (1 << 15, __min (USHRT_MAX, rateRatio * (36 - 9 * m_rateIndex)));
|
||||
const uint16_t mSfmSqr = (m_rateIndex < 2 && samplingRate >= 27713 ? (specFlatness * specFlatness) >> m_rateIndex : 0);
|
||||
const uint16_t mSfmFac = 256 - (((32 + m_rateIndex) * (specFlatness << 4) - mSfmSqr + (1 << 9)) >> 10);
|
||||
|
||||
return uint16_t ((brRatio * mSfmFac + (1 << 7)) >> 8);
|
||||
}
|
||||
|
||||
uint8_t BitAllocator::getScaleFac (const uint32_t sfbStepSize, const int32_t* const sfbSignal, const uint8_t sfbWidth,
|
||||
const uint32_t sfbRmsValue)
|
||||
{
|
||||
|
|
|
@ -39,6 +39,7 @@ public:
|
|||
// public functions
|
||||
void getChAverageSpecFlat (uint8_t meanSpecFlatInCh[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
|
||||
void getChAverageTempFlat (uint8_t meanTempFlatInCh[USAC_MAX_NUM_CHANNELS], const unsigned nChannels);
|
||||
uint16_t getRateCtrlFac (const int32_t rateRatio, const unsigned samplingRate, const uint32_t specFlatness);
|
||||
uint8_t getScaleFac (const uint32_t sfbStepSize, const int32_t* const sfbSignal, const uint8_t sfbWidth,
|
||||
const uint32_t sfbRmsValue);
|
||||
unsigned initAllocMemory (LinearPredictor* const linPredictor, const uint8_t numSwb, const uint8_t bitRateMode);
|
||||
|
|
|
@ -936,7 +936,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
|
|||
const uint8_t numSwbShort, uint8_t* const tempBuffer,
|
||||
#if !RESTRICT_TO_AAC
|
||||
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
|
||||
const uint32_t frameCount, const uint32_t indepPeriod,
|
||||
const uint32_t frameCount, const uint32_t indepPeriod, uint32_t* rate,
|
||||
#endif
|
||||
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
|
||||
unsigned char* const accessUnit, const unsigned nSamplesInFrame)
|
||||
|
@ -971,7 +971,6 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
|
|||
}
|
||||
#endif
|
||||
m_auBitStream.reset ();
|
||||
m_frameLength = nSamplesInFrame;
|
||||
m_numSwbShort = numSwbShort;
|
||||
m_uCharBuffer = tempBuffer;
|
||||
m_auBitStream.write (usacIndependencyFlag ? 1 : 0, 1);
|
||||
|
@ -1112,6 +1111,18 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
|
|||
#if RESTRICT_TO_AAC || defined (NO_PREROLL_DATA)
|
||||
memcpy (accessUnit, &m_auBitStream.stream.front (), __min (768 * ci, bitCount >> 3));
|
||||
#else
|
||||
m_auByteCount += bitCount >> 3;
|
||||
if (rate != nullptr) // sampling rate
|
||||
{
|
||||
const double framesPerSec = (double) *rate / nSamplesInFrame;
|
||||
const unsigned targetRate = (4 - (sbrRatioShiftValue & 1)) * ci; // frame average for preset 1
|
||||
|
||||
if (framesPerSec > 0.0 && targetRate > 0 && frameCount < UINT_MAX) // running overcoding ratio
|
||||
{
|
||||
*rate = uint32_t (0.5 + (m_auByteCount * framesPerSec) / (__max (20.0 * framesPerSec, (double) frameCount) * targetRate));
|
||||
}
|
||||
else *rate = 0; // insufficient data
|
||||
}
|
||||
memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1152 : 768), bitCount >> 3));
|
||||
#endif
|
||||
return (bitCount >> 3); // byte count
|
||||
|
|
|
@ -25,7 +25,7 @@ private:
|
|||
|
||||
// member variables
|
||||
OutputStream m_auBitStream; // access unit bit-stream to write
|
||||
uint32_t m_frameLength;
|
||||
uint64_t m_auByteCount;
|
||||
uint8_t m_numSwbShort; // max. SFB count in short windows
|
||||
uint8_t* m_uCharBuffer; // temporary buffer for ungrouping
|
||||
#ifndef NO_PREROLL_DATA
|
||||
|
@ -37,7 +37,7 @@ private:
|
|||
#endif
|
||||
// helper functions
|
||||
void writeByteAlignment (); // write 0s for byte alignment
|
||||
unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo); // ics_info()
|
||||
unsigned writeChannelWiseIcsInfo (const IcsInfo& icsInfo);
|
||||
unsigned writeChannelWiseSbrData (const int32_t* const sbrDataCh0, const int32_t* const sbrDataCh1,
|
||||
const bool indepFlag = false);
|
||||
unsigned writeChannelWiseTnsData (const TnsData& tnsData, const bool eightShorts);
|
||||
|
@ -56,7 +56,7 @@ private:
|
|||
public:
|
||||
|
||||
// constructor
|
||||
BitStreamWriter () { m_auBitStream.reset (); m_frameLength = 0; m_numSwbShort = 0; m_uCharBuffer = nullptr;
|
||||
BitStreamWriter () { m_auBitStream.reset (); m_auByteCount = m_numSwbShort = 0; m_uCharBuffer = nullptr;
|
||||
#ifndef NO_PREROLL_DATA
|
||||
memset (m_usacConfig, 0, 20); m_usacConfigLen = 0; memset (m_usacIpfState, 0, 4);
|
||||
#endif
|
||||
|
@ -77,7 +77,7 @@ public:
|
|||
const uint8_t numSwbShort, uint8_t* const tempBuffer,
|
||||
#if !RESTRICT_TO_AAC
|
||||
const bool* const tw_mdct /*N/A*/, const bool* const noiseFilling,
|
||||
const uint32_t frameCount, const uint32_t indepPeriod,
|
||||
const uint32_t frameCount, const uint32_t indepPeriod, uint32_t* rate,
|
||||
#endif
|
||||
const uint8_t sbrRatioShiftValue, int32_t** const sbrInfoAndData,
|
||||
unsigned char* const accessUnit, const unsigned nSamplesInFrame);
|
||||
|
|
|
@ -445,13 +445,13 @@ static const uint8_t numSwbOffsetS[USAC_NUM_FREQ_TABLES] = {13, 13, 15, 16, 16,
|
|||
|
||||
// ISO/IEC 23003-3, Table 79
|
||||
static const uint8_t freqIdxToSwbTableIdxAAC[USAC_NUM_SAMPLE_RATES + 2] = {
|
||||
/*96000*/ 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, // AAC
|
||||
255, 255, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4 // USAC
|
||||
/*96000*/ 0, 0, 1, 2, 2, 2,/*24000*/ 3, 3, 4, 4, 4, 5, 5, // AAC
|
||||
255, 255, 1, 2, 2, 2, 2, 2,/*25600*/ 3, 3, 3, 4, 4, 4, 4 // USAC
|
||||
};
|
||||
#if !RESTRICT_TO_AAC
|
||||
static const uint8_t freqIdxToSwbTableIdx768[USAC_NUM_SAMPLE_RATES + 2] = {
|
||||
/*96000*/ 0, 0, 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, // AAC
|
||||
255, 255, 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4 // USAC
|
||||
/*96000*/ 0, 0, 0, 1, 1, 2,/*24000*/ 2, 2, 3, 4, 4, 4, 4, // AAC
|
||||
255, 255, 0, 1, 2, 2, 2, 2,/*25600*/ 2, 3, 3, 3, 3, 4, 4 // USAC
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -745,7 +745,6 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u
|
|||
return (predGainMax >> 24) & UCHAR_MAX; // max pred gain of all orders and length-1 groups
|
||||
}
|
||||
|
||||
#ifndef NO_DTX_MODE
|
||||
uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbIndex)
|
||||
{
|
||||
const uint16_t* const sfbLoudMem = m_sfbLoudMem[channelIndex][sfbIndex];
|
||||
|
@ -756,7 +755,6 @@ uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbI
|
|||
|
||||
return sumSfbLoud * (sumSfbLoud >> (toSamplingRate (m_frequencyIdx) >> 13)); // scaled SMR
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via scale factors
|
||||
{
|
||||
|
@ -933,8 +931,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
|||
#if !RESTRICT_TO_AAC
|
||||
const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong);
|
||||
#endif
|
||||
const uint16_t mSfmSqr = (m_bitRateMode < 2 && samplingRate >= 27713 ? ((uint16_t) meanSpecFlat[ci] * meanSpecFlat[ci]) >> m_bitRateMode : 0);
|
||||
const uint16_t mSfmFac = 256u - (((32u + m_bitRateMode) * ((uint32_t) meanSpecFlat[ci] << 4) - mSfmSqr + (1u << 9)) >> 10);
|
||||
const uint16_t rateFac = m_bitAllocator.getRateCtrlFac (m_priLength ? m_rateFactor : 0, samplingRate, meanSpecFlat[ci]); // RC factor
|
||||
uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS];
|
||||
|
||||
memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t));
|
||||
|
@ -951,10 +948,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
|||
const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // lfAtten: LF SNR boost, as in my M.Sc. thesis
|
||||
const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4));
|
||||
const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b];
|
||||
const uint64_t scale = scaleBr * mSfmFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB)
|
||||
const uint64_t scale = scaleBr * rateFac * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 (SFB)
|
||||
|
||||
// scale step-sizes according to VBR mode & derive scale factors from step-sizes
|
||||
grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 17) + grpStepSizes[b] * scale) >> 18));
|
||||
grpStepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 24) + grpStepSizes[b] * scale) >> 25));
|
||||
#if !RESTRICT_TO_AAC
|
||||
if (!m_noiseFilling[el] || (m_bitRateMode > 0) || (m_shiftValSBR == 0) || (samplingRate < 23004) ||
|
||||
(b + 3 - (meanSpecFlat[ci] >> 6) < m_numSwbLong)) // HF
|
||||
|
@ -971,19 +968,15 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
|||
const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts));
|
||||
const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) || (m_shiftValSBR > 0) ? 4u : 3u)) >> 2,
|
||||
(eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0u : 1u));
|
||||
#ifndef NO_DTX_MODE
|
||||
|
||||
if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
|
||||
{
|
||||
for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13))));
|
||||
}
|
||||
if ((maxSfbCh < numSwbFrame) || (m_bitRateMode <= 2)) // increase coding bandwidth
|
||||
#else
|
||||
if (maxSfbCh < numSwbFrame) // increase coding bandwidth
|
||||
#endif
|
||||
{
|
||||
for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++)
|
||||
{
|
||||
#ifndef NO_DTX_MODE
|
||||
const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr];
|
||||
|
||||
if ((m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713))
|
||||
|
@ -1009,7 +1002,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
|||
if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
memset (&grpData.scaleFactors[maxSfbCh + m_numSwbShort * gr], 0, (numSwbFrame - maxSfbCh) * sizeof (uint8_t));
|
||||
}
|
||||
grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame;
|
||||
|
@ -1026,12 +1019,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
|
|||
coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1);
|
||||
}
|
||||
}
|
||||
#ifndef NO_DTX_MODE
|
||||
else if (m_noiseFilling[el] && (m_bitRateMode == 0) && (m_numElements == 1) && (samplingRate < 27713))
|
||||
{
|
||||
for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS;
|
||||
}
|
||||
#endif
|
||||
#endif // !RESTRICT_TO_AAC
|
||||
ci++;
|
||||
} // for ch
|
||||
|
@ -1294,10 +1285,11 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
|||
entrCoder.setIsShortWindow (shortWinPrev);
|
||||
#if !RESTRICT_TO_AAC
|
||||
s = 22050 + 7350 * m_bitRateMode; // compute channel-wise noise_level and noise_offset
|
||||
sfIdxPred = ((m_bitRateMode == 0) && (m_priLength) && (m_shiftValSBR) && ((m_tempAnaCurr[ci] >> 24) || (m_tempAnaNext[ci] >> 24)) && (meanSpecFlat[ci] +
|
||||
__min ((m_tempAnaCurr[ci] >> 16) & UCHAR_MAX, (m_tempAnaNext[ci] >> 16) & UCHAR_MAX) >= 192) ? UCHAR_MAX : meanSpecFlat[ci]);
|
||||
coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci], m_numSwbShort,
|
||||
grpData, nSamplesInFrame, samplingRate, s,
|
||||
shortWinCurr ? 0 : meanSpecFlat[ci]));
|
||||
// NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed
|
||||
shortWinCurr ? 0 : sfIdxPred));
|
||||
if (coreConfig.specFillData[ch] == 1) errorValue |= 1;
|
||||
#endif
|
||||
s = ci + nrChannels - 1 - 2 * ch; // other channel in stereo
|
||||
|
@ -1332,11 +1324,13 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
|
|||
ci++;
|
||||
}
|
||||
} // for el
|
||||
|
||||
#if !RESTRICT_TO_AAC
|
||||
if (m_priLength) m_rateFactor = samplingRate;
|
||||
#endif
|
||||
return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag,
|
||||
m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf,
|
||||
#if !RESTRICT_TO_AAC
|
||||
m_timeWarping, m_noiseFilling, m_frameCount - 1u, m_indepPeriod,
|
||||
m_timeWarping, m_noiseFilling, m_frameCount - 1u, m_indepPeriod, &m_rateFactor,
|
||||
#endif
|
||||
m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
|
||||
}
|
||||
|
@ -1829,7 +1823,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
|
|||
if (m_channelConf == CCI_CONF) m_channelConf = CCI_2_CHM; // passing numChannels = 0 means 2-ch dual-mono
|
||||
m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig
|
||||
m_shiftValSBR = (frameLength >= 1536 ? 1 : 0);
|
||||
m_frameCount = m_priLength = 0;
|
||||
m_frameCount = m_rateFactor = m_priLength = 0;
|
||||
m_frameLength = USAC_CCFL (frameLength >> m_shiftValSBR); // ccfl signaled using coreSbrFrameLengthIndex
|
||||
m_frequencyIdx = toSamplingFrequencyIndex (sampleRate >> m_shiftValSBR); // as usacSamplingFrequencyIndex
|
||||
m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame
|
||||
|
@ -2085,9 +2079,8 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
|
|||
m_elementData[el]->elementType = elementTypeConfig[chConf][el]; // usacElementType[el]
|
||||
}
|
||||
}
|
||||
#ifndef NO_DTX_MODE
|
||||
memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t));
|
||||
#endif
|
||||
|
||||
// allocate all signal buffers
|
||||
if (m_shiftValSBR > 0)
|
||||
{
|
||||
|
|
|
@ -50,7 +50,9 @@ typedef enum USAC_CCI : signed char
|
|||
typedef enum USAC_CCFL : short
|
||||
{
|
||||
CCFL_UNDEF = -1,
|
||||
#if !RESTRICT_TO_AAC
|
||||
CCFL_768 = 768, // LD
|
||||
#endif
|
||||
CCFL_1024 = 1024 // LC
|
||||
} USAC_CCFL;
|
||||
|
||||
|
@ -92,10 +94,9 @@ private:
|
|||
uint8_t m_perCorrHCurr[USAC_MAX_NUM_ELEMENTS];
|
||||
uint8_t m_perCorrLCurr[USAC_MAX_NUM_ELEMENTS];
|
||||
uint8_t m_priLength;
|
||||
uint32_t m_rateFactor; // RC
|
||||
SfbGroupData* m_scaleFacData[USAC_MAX_NUM_CHANNELS];
|
||||
#ifndef NO_DTX_MODE
|
||||
uint16_t m_sfbLoudMem[2][26][32]; // loudness mem
|
||||
#endif
|
||||
SfbQuantizer m_sfbQuantizer; // powerlaw quantization
|
||||
uint8_t m_shiftValSBR; // SBR ratio for shifting
|
||||
SpecAnalyzer m_specAnalyzer; // for spectral analysis
|
||||
|
@ -128,9 +129,7 @@ private:
|
|||
int32_t* const mdctSignal, int32_t* const mdstSignal);
|
||||
unsigned getOptParCorCoeffs (const SfbGroupData& grpData, const uint8_t maxSfb, TnsData& tnsData,
|
||||
const unsigned channelIndex, const uint8_t firstGroupIndexToTest = 0);
|
||||
#ifndef NO_DTX_MODE
|
||||
uint32_t getThr (const unsigned channelIndex, const unsigned sfbIndex);
|
||||
#endif
|
||||
unsigned psychBitAllocation ();
|
||||
unsigned quantizationCoding ();
|
||||
unsigned spectralProcessing ();
|
||||
|
@ -146,7 +145,7 @@ public:
|
|||
#if !RESTRICT_TO_AAC
|
||||
, const bool useNoiseFilling = true, const bool useEcodisExt = false
|
||||
#endif
|
||||
);
|
||||
);
|
||||
// destructor
|
||||
virtual ~ExhaleEncoder ();
|
||||
// public functions
|
||||
|
|
Loading…
Reference in New Issue