LR tuning, stss fix

This commit is contained in:
Christian R. Helmrich
2020-08-05 01:00:01 +02:00
parent c72996090e
commit 051900fe7c
4 changed files with 68 additions and 12 deletions

View File

@ -105,7 +105,12 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
const uint32_t stscAtomSize = STSX_BSIZE + (numFramesFinalPeriod == 0 ? 12 : 24); const uint32_t stscAtomSize = STSX_BSIZE + (numFramesFinalPeriod == 0 ? 12 : 24);
const uint32_t stcoAtomSize = STSX_BSIZE + (uint32_t) m_rndAccOffsets.size () * 4; const uint32_t stcoAtomSize = STSX_BSIZE + (uint32_t) m_rndAccOffsets.size () * 4;
#ifndef NO_FIX_FOR_ISSUE_1 #ifndef NO_FIX_FOR_ISSUE_1
const uint32_t stssAtomSize = STSX_BSIZE; # ifndef NO_FIX_FOR_ISSUE_13
const uint32_t chunkCount = (m_frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod;
const uint32_t stssAtomSize = STSX_BSIZE + chunkCount * 4;
# else
const uint32_t stssAtomSize = STSX_BSIZE + 4;
# endif
// The following code creates a 'prol' sample group with a repeating pattern of membership, // The following code creates a 'prol' sample group with a repeating pattern of membership,
// indicating that the first sample in each increment of m_rndAccPeriod samples is a member // indicating that the first sample in each increment of m_rndAccPeriod samples is a member
// and therefore an independent frame (IF), and the remainder are not. // and therefore an independent frame (IF), and the remainder are not.
@ -244,8 +249,24 @@ int BasicMP4Writer::finishFile (const unsigned avgBitrate, const unsigned maxBit
m_dynamicHeader.push_back (0x73); m_dynamicHeader.push_back (0x73); // stss m_dynamicHeader.push_back (0x73); m_dynamicHeader.push_back (0x73); // stss
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
# ifndef NO_FIX_FOR_ISSUE_13
m_dynamicHeader.push_back ((chunkCount >> 24) & UCHAR_MAX);
m_dynamicHeader.push_back ((chunkCount >> 16) & UCHAR_MAX);
m_dynamicHeader.push_back ((chunkCount >> 8) & UCHAR_MAX);
m_dynamicHeader.push_back ( chunkCount & UCHAR_MAX);
for (uint32_t i = 1; i <= m_frameCount; i += m_rndAccPeriod)
{
m_dynamicHeader.push_back ((i >> 24) & UCHAR_MAX);
m_dynamicHeader.push_back ((i >> 16) & UCHAR_MAX);
m_dynamicHeader.push_back ((i >> 8) & UCHAR_MAX);
m_dynamicHeader.push_back ( i & UCHAR_MAX);
}
# else
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x01); // 1 entry
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x00);
m_dynamicHeader.push_back (0x00); m_dynamicHeader.push_back (0x01); // 1st AU
# endif
m_dynamicHeader.push_back ((sgpdAtomSize >> 24) & UCHAR_MAX); m_dynamicHeader.push_back ((sgpdAtomSize >> 24) & UCHAR_MAX);
m_dynamicHeader.push_back ((sgpdAtomSize >> 16) & UCHAR_MAX); m_dynamicHeader.push_back ((sgpdAtomSize >> 16) & UCHAR_MAX);
m_dynamicHeader.push_back ((sgpdAtomSize >> 8) & UCHAR_MAX); m_dynamicHeader.push_back ((sgpdAtomSize >> 8) & UCHAR_MAX);
@ -372,7 +393,11 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for
const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod); const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod);
#ifndef NO_FIX_FOR_ISSUE_1 #ifndef NO_FIX_FOR_ISSUE_1
const unsigned smpGrpSize = 10 /*sgpd*/ + (m_rndAccPeriod > UINT8_MAX ? 10 : 9) + ((m_rndAccPeriod + 1) >> 1) /*csgp*/; const unsigned smpGrpSize = 10 /*sgpd*/ + (m_rndAccPeriod > UINT8_MAX ? 10 : 9) + ((m_rndAccPeriod + 1) >> 1) /*csgp*/;
const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 6 + smpGrpSize + # ifndef NO_FIX_FOR_ISSUE_13
const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 6 + smpGrpSize + chunkCount * 4 /*stss*/ +
# else
const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 6 + smpGrpSize + 4 /*stss*/ +
# endif
#else #else
const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 3 + const int estimHeaderSize = STAT_HEADER_SIZE + m_ascSizeM5 + 6+4 + frameCount * 4 /*stsz*/ + STSX_BSIZE * 3 +
#endif #endif

View File

@ -432,6 +432,9 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
const SfbGroupData& grpData = *groupData[ch]; const SfbGroupData& grpData = *groupData[ch];
const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup); const uint32_t maxSfbInCh = __min (MAX_NUM_SWB_LONG, grpData.sfbsPerGroup);
const bool eightShorts = (grpData.numWindowGroups != 1); const bool eightShorts = (grpData.numWindowGroups != 1);
#if 1
const bool lowRateTuning = (samplingRate >= 27713) && (sfm[ch] <= (SCHAR_MAX >> 1));
#endif
const uint32_t* rms = grpData.sfbRmsValues; const uint32_t* rms = grpData.sfbRmsValues;
uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS]; uint32_t* stepSizes = &sfbStepSizes[ch * numSwbShort * NUM_WINDOW_GROUPS];
@ -450,7 +453,25 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
uint64_t s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2); uint64_t s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2);
memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t)); memset (m_tempSfbValue, UCHAR_MAX, maxSfbInCh * sizeof (uint8_t));
#if 1
if ((m_rateIndex == 0) && lowRateTuning && (maxSfbInCh > 0) && !eightShorts)
{
uint32_t numRedBands = nSamplesInFrame; // final result lies between 1/4 and 1/2
if ((nChannels == 2) && commonWindow && (grpSte != nullptr))
{
for (b = 0; b < maxSfbInCh; b++) if (grpSte[b] == 0) numRedBands += grpOff[b + 1] - grpOff[b];
}
b = MAX_NUM_SWB_LONG - ((numRedBands * ((SCHAR_MAX >> 1) + 1 - sfm[ch]) + (1 << 11)) >> 12);
while ((b < maxSfbInCh) && (grpRms[b] > grpRms[b - 1])) b++; // start after peak
for (b += ((nChannels == 2) && commonWindow ? b & 1 : 0); b < maxSfbInCh; b++)
{
grpStepSizes[b] = __max (grpStepSizes[b], grpRms[b] >= (UINT_MAX >> 1) ? UINT_MAX : (grpRms[b] + 1) << 1);
}
}
#endif
// undercoding reduction for case where large number of coefs is quantized to zero // undercoding reduction for case where large number of coefs is quantized to zero
for (b = 0; b < maxSfbInCh; b++) for (b = 0; b < maxSfbInCh; b++)
{ {
@ -470,7 +491,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
} }
if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b]; if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b];
#if 1 #if 1
if ((m_rateIndex > 0) || (samplingRate >= 27713 && sfm[ch] <= (SCHAR_MAX >> 1))) if ((m_rateIndex > 0) || lowRateTuning)
#endif #endif
if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized
{ {
@ -484,7 +505,7 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp); const uint32_t rmsRef9 = (commonWindow ? refRms[b] >> 9 : rmsComp);
const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b]; const uint8_t sfbWidth = grpOff[maxSfbL16k] - grpOff[b];
#if 1 #if 1
if ((m_rateIndex > 0) || (samplingRate >= 27713 && sfm[ch] <= (SCHAR_MAX >> 1))) if ((m_rateIndex > 0) || lowRateTuning)
#endif #endif
if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification
{ {
@ -499,6 +520,12 @@ unsigned BitAllocator::imprSfbStepSizes (const SfbGroupData* const groupData[USA
{ {
grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15); grpStepSizes[b] = uint32_t ((__max (grpRmsMin, grpStepSizes[b]) * s * (m_tempSfbValue[b] + 1u) + (1u << 14)) >> 15);
if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11); if (grpStepSizes[b] <= (grpRms[b] >> 11)) grpStepSizes[b] = __max (BA_EPS, grpRms[b] >> 11);
#if 1
if ((m_rateIndex == 0) && lowRateTuning)
{
if ((grpStepSizes[b] > grpRms[b]) && ((grpStepSizes[b] >> 1) <= grpRms[b])) grpStepSizes[b] = grpRms[b];
}
#endif
} }
} // for gr } // for gr
} // for ch } // for ch

View File

@ -2076,7 +2076,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin
// initialize coder class memory // initialize coder class memory
m_tempIntBuf = m_timeSignals[0]; m_tempIntBuf = m_timeSignals[0];
if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode >> ((nChannels - 1) >> 1)) > 0 || if (m_bitAllocator.initAllocMemory (&m_linPredictor, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode >> ((nChannels - 1) >> 2)) > 0 ||
#if EC_TRELLIS_OPT_CODING #if EC_TRELLIS_OPT_CODING
m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 || m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 ||
#else #else

View File

@ -42,7 +42,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
const uint16_t sfbsPerGrp = grpData.sfbsPerGroup; const uint16_t sfbsPerGrp = grpData.sfbsPerGroup;
const uint16_t windowNfso = noiseFillingStartOffset[grpData.numWindowGroups == 1 ? 0 : 1][nSamplesInFrame >> 10]; const uint16_t windowNfso = noiseFillingStartOffset[grpData.numWindowGroups == 1 ? 0 : 1][nSamplesInFrame >> 10];
const bool saveRate = (samplingRate >= sampRateBitSave); const bool saveRate = (samplingRate >= sampRateBitSave);
uint8_t scaleFacLim = 0; // limit range uint8_t scaleFacLim = 0;
uint16_t u = 0; uint16_t u = 0;
short diff = 0, s = 0; short diff = 0, s = 0;
double magnSum = 0.0; double magnSum = 0.0;
@ -181,7 +181,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
u = __max (1, u - int (specFlat >> 5)); // SFM-adaptive reduction u = __max (1, u - int (specFlat >> 5)); // SFM-adaptive reduction
magnSum = pow (2.0, (14 - u) / 3.0); // noiseVal^-1, 23003-3, 7.2 magnSum = pow (2.0, (14 - u) / 3.0); // noiseVal^-1, 23003-3, 7.2
magnSum *= 1.25 - specFlat * 0.0009765625; // zero-quant increase magnSum *= 1.25 - specFlat * 0.0009765625;
// --- calculate gap-fill scale factors for zero quantized SFBs, then determine noise_offset // --- calculate gap-fill scale factors for zero quantized SFBs, then determine noise_offset
u <<= 5; // left-shift for bit-stream u <<= 5; // left-shift for bit-stream
@ -236,6 +236,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
#if SGF_SF_PEAK_SMOOTHING #if SGF_SF_PEAK_SMOOTHING
else if (saveRate) lastNonZeroSfb = b; else if (saveRate) lastNonZeroSfb = b;
#endif #endif
if ((b > m_1stGapFillSfb) && (((grpRms[b - 1] >> 16) > 0) ^ ((grpRms[b - 2] >> 16) > 0))) if ((b > m_1stGapFillSfb) && (((grpRms[b - 1] >> 16) > 0) ^ ((grpRms[b - 2] >> 16) > 0)))
{ {
diff += (int) grpScFacs[b - 1] - (int) grpScFacs[b - 2]; // sum up transition deltas diff += (int) grpScFacs[b - 1] - (int) grpScFacs[b - 2]; // sum up transition deltas
@ -248,7 +249,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
const int32_t start = lastNonZeroSfb + 1; const int32_t start = lastNonZeroSfb + 1;
const int32_t size = sfbsPerGrp - start - 1; const int32_t size = sfbsPerGrp - start - 1;
const int32_t xSum = (size * (size + 1)) >> 1; const int32_t xSum = (size * (size + 1)) >> 1;
int32_t ySum = 0, a = 0, b = 0; int32_t ySum = 0, a = 0, b = 0, y = 0;
uint16_t x; uint16_t x;
for (x = start + 1; x < sfbsPerGrp; x++) ySum += grpScFacs[x]; // size * (mean factor) for (x = start + 1; x < sfbsPerGrp; x++) ySum += grpScFacs[x]; // size * (mean factor)
@ -256,11 +257,14 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
for (x = start + 1; x < sfbsPerGrp; x++) for (x = start + 1; x < sfbsPerGrp; x++)
{ {
const int32_t xZ = size * (x - start) - xSum; // zero-mean const int32_t xZ = size * (x - start) - xSum; // zero-mean
const int32_t yZ = size * grpScFacs[x] - ySum;
a += xZ * xZ; a += xZ * xZ;
b += xZ * (size * grpScFacs[x] - ySum); b += xZ * yZ;
y += yZ * yZ;
} }
if (a > 0) // complete line and adjust gap-fill scale factors
if ((a > 0) && (b * b > ((a * y) >> 3))) // factor smoothing
{ {
unsigned countOld = 0, countNew = 0; unsigned countOld = 0, countNew = 0;
@ -270,7 +274,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
ySum = grpScFacs[start]; ySum = grpScFacs[start];
for (x = start + 1; x < sfbsPerGrp; x++) for (x = start + 1; x < sfbsPerGrp; x++)
{ {
const int32_t y = CLIP_UCHAR ((a + b * (x - start) - SCHAR_MIN) >> 8); y = CLIP_UCHAR ((a + b * (x - start) - SCHAR_MIN) >> 8);
countOld += huffBitCountEstimate ((int) grpScFacs[x] - grpScFacs[x - 1]); countOld += huffBitCountEstimate ((int) grpScFacs[x] - grpScFacs[x - 1]);
countNew += huffBitCountEstimate (y - ySum); countNew += huffBitCountEstimate (y - ySum);
@ -307,7 +311,7 @@ uint8_t SpecGapFiller::getSpecGapFillParams (const SfbQuantizer& sfbQuantizer, c
if (grpScFacs[b] > scaleFacLim) grpScFacs[b] = scaleFacLim; if (grpScFacs[b] > scaleFacLim) grpScFacs[b] = scaleFacLim;
} }
} // for b }
// repeat first significant scale factor downwards to save bits // repeat first significant scale factor downwards to save bits
memset (grpScFacs, grpScFacs[m_1stNonZeroSfb[gr]], m_1stNonZeroSfb[gr] * sizeof (uint8_t)); memset (grpScFacs, grpScFacs[m_1stNonZeroSfb[gr]], m_1stNonZeroSfb[gr] * sizeof (uint8_t));