mirror of https://gitlab.com/ecodis/exhale.git
1083 lines
39 KiB
C++
1083 lines
39 KiB
C++
/* quantization.cpp - source file for class with nonuniform quantization functionality
|
|
* written by C. R. Helmrich, last modified in 2023 - see License.htm for legal notices
|
|
*
|
|
* The copyright in this software is being made available under the exhale Copyright License
|
|
* and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
|
|
* party rights, including patent rights. No such rights are granted under this License.
|
|
*
|
|
* Copyright (c) 2018-2024 Christian R. Helmrich, project ecodis. All rights reserved.
|
|
*/
|
|
|
|
#include "exhaleLibPch.h"
|
|
#include "quantization.h"
|
|
#if SFB_QUANT_SSE
|
|
# include <xmmintrin.h>
|
|
#endif
|
|
|
|
#define EC_TRAIN (0 && EC_TRELLIS_OPT_CODING) // for RDOC testing
|
|
|
|
// static helper functions
|
|
static inline short getBitCount (EntropyCoder& entrCoder, const int sfIndex, const int sfIndexPred,
|
|
const uint8_t groupLength, const uint8_t* coeffQuant,
|
|
const uint16_t coeffOffset, const uint16_t numCoeffs)
|
|
{
|
|
unsigned bitCount = (sfIndex != UCHAR_MAX && sfIndexPred == UCHAR_MAX ? 8 : entrCoder.indexGetBitCount (sfIndex - sfIndexPred));
|
|
|
|
if (groupLength == 1) // include arithmetic coding in bit count
|
|
{
|
|
#if EC_TRELLIS_OPT_CODING
|
|
bitCount += entrCoder.arithCodeSigTest (coeffQuant, coeffOffset, numCoeffs);
|
|
#else
|
|
bitCount += entrCoder.arithCodeSigMagn (coeffQuant, coeffOffset, numCoeffs);
|
|
#endif
|
|
}
|
|
|
|
return (short) __min (SHRT_MAX, bitCount); // exclude sign bits
|
|
}
|
|
|
|
#if EC_TRELLIS_OPT_CODING && !EC_TRAIN
|
|
static inline double getLagrangeValue (const uint16_t rateIndex) // RD optimization constant
|
|
{
|
|
return (95.0 + rateIndex * rateIndex) * 0.0009765625; // / 1024
|
|
}
|
|
#endif
|
|
|
|
// private helper functions
|
|
double SfbQuantizer::getQuantDist (const unsigned* const coeffMagn, const uint8_t scaleFactor,
|
|
const uint8_t* const coeffQuant, const uint16_t numCoeffs)
|
|
{
|
|
#if SFB_QUANT_SSE
|
|
const __m128 stepSizeDiv = _mm_set_ps1 ((float) m_lutSfNorm[scaleFactor]); // or _mm_set1_ps ()
|
|
__m128 sumsSquares = _mm_setzero_ps ();
|
|
float dist[4];
|
|
|
|
for (int i = numCoeffs - 4; i >= 0; i -= 4)
|
|
{
|
|
__m128 orig = _mm_set_ps ((float) coeffMagn[i + 0], (float) coeffMagn[i + 1],
|
|
(float) coeffMagn[i + 2], (float) coeffMagn[i + 3]);
|
|
__m128 reco = _mm_set_ps ((float) m_lutXExp43[coeffQuant[i + 0]], (float) m_lutXExp43[coeffQuant[i + 1]],
|
|
(float) m_lutXExp43[coeffQuant[i + 2]], (float) m_lutXExp43[coeffQuant[i + 3]]);
|
|
__m128 diff = _mm_sub_ps (reco, _mm_mul_ps (orig, stepSizeDiv));
|
|
|
|
sumsSquares = _mm_add_ps (sumsSquares, _mm_mul_ps (diff, diff));
|
|
}
|
|
_mm_storeu_ps (dist, sumsSquares);
|
|
|
|
// consider quantization step-size in calculation of distortion
|
|
return ((double) dist[0] + dist[1] + dist[2] + dist[3]) * m_lut2ExpX4[scaleFactor] * m_lut2ExpX4[scaleFactor];
|
|
#else
|
|
const double stepSizeDiv = m_lutSfNorm[scaleFactor];
|
|
double dDist = 0.0;
|
|
|
|
for (int i = numCoeffs - 1; i >= 0; i--)
|
|
{
|
|
const double d = m_lutXExp43[coeffQuant[i]] - coeffMagn[i] * stepSizeDiv;
|
|
|
|
dDist += d * d;
|
|
}
|
|
|
|
// consider quantization step-size in calculation of distortion
|
|
return dDist * m_lut2ExpX4[scaleFactor] * m_lut2ExpX4[scaleFactor];
|
|
#endif
|
|
}
|
|
|
|
uint8_t SfbQuantizer::quantizeMagnSfb (const unsigned* const coeffMagn, const uint8_t scaleFactor,
|
|
/*mod*/uint8_t* const coeffQuant, const uint16_t numCoeffs,
|
|
#if EC_TRELLIS_OPT_CODING
|
|
EntropyCoder* const arithmCoder, const uint16_t coeffOffset,
|
|
#endif
|
|
short* const sigMaxQ /*= nullptr*/, short* const sigNumQ /*= nullptr*/)
|
|
{
|
|
const double stepSizeDiv = m_lutSfNorm[scaleFactor];
|
|
double dNum = 0.0, dDen = 0.0;
|
|
short sf, maxQ = 0, numQ = 0;
|
|
|
|
for (int i = numCoeffs - 1; i >= 0; i--)
|
|
{
|
|
const double normalizedMagn = (double) coeffMagn[i] * stepSizeDiv;
|
|
short q;
|
|
|
|
if (normalizedMagn < 28.5) // fast approximate pow (d, 0.75)
|
|
{
|
|
// based on code from: N. N. Schraudolph, "A Fast, Compact Approximation of the Expo-
|
|
// nential Function," Neural Comput., vol. 11, pp. 853-862, 1998 and M. Ankerl, 2007,
|
|
// https://martin.ankerl.com/2007/10/04/optimized-pow-approximation-for-java-and-c-c/
|
|
union { double d; int32_t i[2]; } u = { normalizedMagn };
|
|
|
|
u.i[1] = int32_t (0.75 * (u.i[1] - 1072632447) + 1072632447.0);
|
|
u.i[0] = 0;
|
|
q = short (u.d + (u.d < 1.0 ? 0.3822484 : 0.2734375));
|
|
}
|
|
else
|
|
{
|
|
q = short (SFB_QUANT_OFFSET + pow (__min (1048544.0, normalizedMagn), 0.75)); // min avoids rare preset-9 overflow
|
|
}
|
|
|
|
if (q > 0)
|
|
{
|
|
if (q >= SCHAR_MAX)
|
|
{
|
|
if (maxQ < q)
|
|
{
|
|
maxQ = q; // find maximum quantized magnitude in vector
|
|
}
|
|
q = SCHAR_MAX;
|
|
}
|
|
else
|
|
{
|
|
const double diffRoundD = m_lutXExp43[q ] - normalizedMagn;
|
|
const double diffRoundU = m_lutXExp43[q + 1] - normalizedMagn;
|
|
|
|
if (diffRoundU * diffRoundU < diffRoundD * diffRoundD)
|
|
{
|
|
q++; // round-up gives lower distortion than round-down
|
|
}
|
|
}
|
|
if (maxQ < q)
|
|
{
|
|
maxQ = q;
|
|
}
|
|
numQ++;
|
|
dNum += m_lutXExp43[q] * normalizedMagn;
|
|
dDen += m_lutXExp43[q] * m_lutXExp43[q];
|
|
}
|
|
#if SFB_QUANT_PERCEPT_OPT
|
|
else // q == 0, assume perceptual transparency for code below
|
|
{
|
|
dNum += normalizedMagn * normalizedMagn;
|
|
dDen += normalizedMagn * normalizedMagn;
|
|
}
|
|
#endif
|
|
coeffQuant[i] = (uint8_t) q;
|
|
}
|
|
|
|
if (sigMaxQ) *sigMaxQ = maxQ; // max. quantized value magnitude
|
|
if (sigNumQ) *sigNumQ = numQ; // nonzero coeff. count (L0 norm)
|
|
|
|
sf = scaleFactor;
|
|
// compute least-squares optimal modifier added to scale factor
|
|
if (dNum > SF_THRESH_POS * dDen) sf++;
|
|
else
|
|
if (dNum < SF_THRESH_NEG * dDen) sf--;
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
if (arithmCoder && (sf > 0) && (maxQ <= SCHAR_MAX)) // use RDOC
|
|
{
|
|
EntropyCoder& entrCoder = *arithmCoder;
|
|
#if EC_TRAIN
|
|
const uint32_t codStart = entrCoder.arithGetCodState ();
|
|
const uint32_t ctxStart = entrCoder.arithGetCtxState ();
|
|
uint32_t bitCount = entrCoder.arithCodeSigTest (&coeffQuant[-((int) coeffOffset)], coeffOffset, numCoeffs) + (uint32_t) numQ;
|
|
|
|
entrCoder.arithSetCodState (codStart); // back to last state
|
|
entrCoder.arithSetCtxState (ctxStart);
|
|
#else
|
|
uint32_t bitCount = (uint32_t) numQ;
|
|
#endif
|
|
if ((bitCount = quantizeMagnRDOC (entrCoder, (uint8_t) sf, bitCount, coeffOffset, coeffMagn, numCoeffs, coeffQuant)) > 0)
|
|
{
|
|
numQ = bitCount & SHRT_MAX;
|
|
|
|
if ((numQ > 0) && (sf < m_maxSfIndex)) // nonzero-quantized
|
|
{
|
|
const double magnNormDiv = m_lutSfNorm[sf];
|
|
|
|
dNum = dDen = 0.0;
|
|
for (int i = numCoeffs - 1; i >= 0; i--)
|
|
{
|
|
const double normalizedMagn = (double) coeffMagn[i] * magnNormDiv;
|
|
const uint8_t q = coeffQuant[i];
|
|
|
|
if (q > 0)
|
|
{
|
|
dNum += m_lutXExp43[q] * normalizedMagn;
|
|
dDen += m_lutXExp43[q] * m_lutXExp43[q];
|
|
}
|
|
# if SFB_QUANT_PERCEPT_OPT
|
|
else // assume perceptual transparency for code below
|
|
{
|
|
dNum += normalizedMagn * normalizedMagn;
|
|
dDen += normalizedMagn * normalizedMagn;
|
|
}
|
|
# endif
|
|
}
|
|
|
|
// re-compute least-squares optimal scale factor modifier
|
|
if (dNum > SF_THRESH_POS * dDen) sf++;
|
|
# if !SFB_QUANT_PERCEPT_OPT
|
|
else
|
|
if (dNum < SF_THRESH_NEG * dDen) sf--; // reduces SFB RMS
|
|
# endif
|
|
} // if nonzero
|
|
|
|
if (sigMaxQ) *sigMaxQ = (numQ > 0 ? maxQ : 0); // a new max
|
|
if (sigNumQ) *sigNumQ = numQ; // a new nonzero coeff. count
|
|
}
|
|
}
|
|
#endif // EC_TRELLIS_OPT_CODING
|
|
|
|
#if SFB_QUANT_PERCEPT_OPT
|
|
if ((numQ > 0) && (sf > 0 && sf <= scaleFactor)) // recover RMS
|
|
{
|
|
# if SFB_QUANT_SSE
|
|
const __m128 magnNormDiv = _mm_set_ps1 ((float) m_lutSfNorm[sf]); // or _mm_set1_ps ()
|
|
__m128 sumsSquares = _mm_setzero_ps ();
|
|
float fl[4]; // dDen has normalized energy after quantization
|
|
|
|
for (int i = numCoeffs - 4; i >= 0; i -= 4)
|
|
{
|
|
__m128 orig = _mm_set_ps ((float) coeffMagn[i + 0], (float) coeffMagn[i + 1],
|
|
(float) coeffMagn[i + 2], (float) coeffMagn[i + 3]);
|
|
__m128 norm = _mm_mul_ps (orig, magnNormDiv);
|
|
|
|
sumsSquares = _mm_add_ps (sumsSquares, _mm_mul_ps (norm, norm));
|
|
}
|
|
_mm_storeu_ps (fl, sumsSquares);
|
|
|
|
if ((double) fl[0] + fl[1] + fl[2] + fl[3] > SF_THRESH_POS * SF_THRESH_POS * dDen) sf++;
|
|
# else
|
|
const double magnNormDiv = m_lutSfNorm[sf];
|
|
|
|
dNum = 0.0; // dDen has normalized energy after quantization
|
|
for (int i = numCoeffs - 1; i >= 0; i--)
|
|
{
|
|
const double normalizedMagn = (double) coeffMagn[i] * magnNormDiv;
|
|
|
|
dNum += normalizedMagn * normalizedMagn;
|
|
}
|
|
|
|
if (dNum > SF_THRESH_POS * SF_THRESH_POS * dDen) sf++;
|
|
# endif
|
|
}
|
|
#endif
|
|
return (uint8_t) __max (0, sf); // optimized scale factor index
|
|
}
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
uint32_t SfbQuantizer::quantizeMagnRDOC (EntropyCoder& entropyCoder, const uint8_t optimalSf, const unsigned targetBitCount,
|
|
const uint16_t coeffOffset, const unsigned* const coeffMagn, // initial MDCT magnitudes
|
|
const uint16_t numCoeffs, uint8_t* const quantCoeffs) // returns updated SFB statistics
|
|
{
|
|
// numTuples: num of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
|
|
// "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," in Proc. IEEE Workshop on
|
|
// Speech Coding, pp. 142-144, Sep. 2000. Modified for arithmetic instead of Huffman coder
|
|
const uint32_t codStart = entropyCoder.arithGetCodState ();
|
|
const uint32_t ctxStart = entropyCoder.arithGetCtxState (); // before call to getBitCount
|
|
const double stepSizeDiv = m_lutSfNorm[optimalSf];
|
|
const uint16_t numStates = 4; // 4 reduction types: [0, 0], [0, -1], [-1, 0], and [-1, -1]
|
|
const uint16_t numTuples = numCoeffs >> 1;
|
|
uint8_t* const quantRate = &m_coeffTemp[((unsigned) m_maxSize8M1 + 1) << 3];
|
|
uint32_t prevCodState[4] = {0, 0, 0, 0};
|
|
uint32_t prevCtxState[4] = {0, 0, 0, 0};
|
|
double prevVtrbCost[4] = {0, 0, 0, 0};
|
|
uint32_t tempCodState[4] = {0, 0, 0, 0};
|
|
uint32_t tempCtxState[4] = {0, 0, 0, 0};
|
|
double tempVtrbCost[4] = {0, 0, 0, 0};
|
|
double quantDist[32][4]; // TODO: dynamic memory allocation
|
|
uint8_t* const optimalIs = (uint8_t* const) (quantDist[32-1]);
|
|
uint8_t tempQuant[4], numQ; // for tuple/SFB sign bit counting
|
|
unsigned tuple, is;
|
|
int ds;
|
|
#if EC_TRAIN
|
|
unsigned tempBitCount;
|
|
double refSfbDist = 0.0, tempSfbDist = 0.0;
|
|
#else
|
|
const double lambda = getLagrangeValue (m_rateIndex);
|
|
#endif
|
|
|
|
if ((coeffMagn == nullptr) || (quantCoeffs == nullptr) || (optimalSf > m_maxSfIndex) || (numTuples == 0) || (numTuples > 32) ||
|
|
(targetBitCount == 0) || (targetBitCount > SHRT_MAX))
|
|
{
|
|
return 0; // invalid input error
|
|
}
|
|
|
|
// save third-last tuple value, required due to an insufficiency of arithGet/SetCtxState()
|
|
if (coeffOffset > 5) tempQuant[3] = entropyCoder.arithGetTuplePtr ()[(coeffOffset >> 1) - 3];
|
|
|
|
for (tuple = 0; tuple < numTuples; tuple++) // tuple-wise non-weighted distortion and rate
|
|
{
|
|
const uint16_t tupleStart = tuple << 1;
|
|
const uint16_t tupleOffset = coeffOffset + tupleStart;
|
|
const double normalMagnA = (double) coeffMagn[tupleStart ] * stepSizeDiv;
|
|
const double normalMagnB = (double) coeffMagn[tupleStart + 1] * stepSizeDiv;
|
|
uint8_t coeffQuantA = quantCoeffs[tupleStart];
|
|
uint8_t coeffQuantB = quantCoeffs[tupleStart + 1];
|
|
|
|
for (is = 0; is < numStates; is++) // populate tuple trellis
|
|
{
|
|
uint8_t* const mag = (is != 0 ? tempQuant : quantCoeffs) - (int) tupleOffset; // see arithCodeTupTest()
|
|
uint8_t* currRate = &quantRate[(is + tuple * numStates) * numStates];
|
|
double diffA, diffB;
|
|
|
|
if (is != 0) // test reduction of quantized MDCT magnitudes
|
|
{
|
|
const uint8_t redA = is >> 1;
|
|
const uint8_t redB = is & 1;
|
|
|
|
if ((redA > 0 && coeffQuantA != 1) || (redB > 0 && coeffQuantB != 1)) // avoid path
|
|
{
|
|
tempCodState[is] = tempCodState[0];
|
|
tempCtxState[is] = tempCtxState[0];
|
|
memset (currRate, UCHAR_MAX, numStates);
|
|
|
|
continue;
|
|
}
|
|
tempQuant[0] = (coeffQuantA -= redA);
|
|
tempQuant[1] = (coeffQuantB -= redB);
|
|
}
|
|
diffA = m_lutXExp43[coeffQuantA] - normalMagnA;
|
|
diffB = m_lutXExp43[coeffQuantB] - normalMagnB;
|
|
quantDist[tuple][is] = diffA * diffA + diffB * diffB;
|
|
|
|
numQ = (coeffQuantA > 0 ? 1 : 0) + (coeffQuantB > 0 ? 1 : 0);
|
|
|
|
if (tuple == 0) // first tuple, with tupleStart == sfbStart
|
|
{
|
|
entropyCoder.arithSetCodState (codStart); // start of SFB
|
|
entropyCoder.arithSetCtxState (ctxStart, 0);
|
|
|
|
memset (currRate, entropyCoder.arithCodeTupTest (mag, tupleOffset) + numQ, numStates); // +- m_acBits
|
|
}
|
|
else // tuple > 0, rate depends on decisions for last tuple
|
|
{
|
|
for (ds = numStates - 1; ds >= 0; ds--)
|
|
{
|
|
if (quantRate[(ds + (tuple-1) * numStates) * numStates] >= UCHAR_MAX)// avoid path
|
|
{
|
|
currRate[ds] = UCHAR_MAX;
|
|
|
|
continue;
|
|
}
|
|
|
|
entropyCoder.arithSetCodState (prevCodState[ds]);
|
|
entropyCoder.arithSetCtxState (prevCtxState[ds], tupleOffset);
|
|
|
|
currRate[ds] = uint8_t (entropyCoder.arithCodeTupTest (mag, tupleOffset) + numQ); // incl. m_acBits
|
|
}
|
|
}
|
|
// statistically best place to save states is after ds == 0
|
|
tempCodState[is] = entropyCoder.arithGetCodState ();
|
|
tempCtxState[is] = entropyCoder.arithGetCtxState ();
|
|
} // for is
|
|
#if EC_TRAIN
|
|
refSfbDist += quantDist[tuple][0];
|
|
#endif
|
|
memcpy (prevCodState, tempCodState, numStates * sizeof (uint32_t));
|
|
memcpy (prevCtxState, tempCtxState, numStates * sizeof (uint32_t));
|
|
} // for tuple
|
|
|
|
entropyCoder.arithSetCodState (codStart); // back to last state
|
|
entropyCoder.arithSetCtxState (ctxStart, coeffOffset);
|
|
// restore third-last tuple value, see insufficiency note above
|
|
if (coeffOffset > 5) entropyCoder.arithGetTuplePtr ()[(coeffOffset >> 1) - 3] = tempQuant[3];
|
|
|
|
#if EC_TRAIN
|
|
tempBitCount = targetBitCount + 1; // Viterbi search for minimum distortion at target rate
|
|
for (double lambda = 0.015625; (lambda <= 0.375) && (tempBitCount > targetBitCount); lambda += 0.0078125)
|
|
#endif
|
|
{
|
|
double* const prevCost = prevVtrbCost;
|
|
#if !EC_TRAIN
|
|
uint8_t* const prevPath = (uint8_t*) quantDist;// backtracker
|
|
#endif
|
|
double costMinIs = (double) UINT_MAX;
|
|
unsigned pathMinIs = 0;
|
|
#if EC_TRAIN
|
|
uint8_t prevPath[16*4];
|
|
tempSfbDist = 0.0;
|
|
#endif
|
|
|
|
for (is = 0; is < numStates; is++) // initialize minimum path
|
|
{
|
|
const uint8_t currRate = quantRate[is * numStates];
|
|
|
|
prevCost[is] = (currRate >= UCHAR_MAX ? (double) UINT_MAX : lambda * currRate + quantDist[0][is]);
|
|
prevPath[is] = 0;
|
|
}
|
|
|
|
for (tuple = 1; tuple < numTuples; tuple++) // find min. path
|
|
{
|
|
double* const currCost = tempVtrbCost;
|
|
uint8_t* const currPath = &prevPath[tuple * numStates];
|
|
|
|
for (is = 0; is < numStates; is++) // tuple's minimum path
|
|
{
|
|
uint8_t* currRate = &quantRate[(is + tuple * numStates) * numStates];
|
|
double costMinDs = (double) UINT_MAX;
|
|
uint8_t pathMinDs = 0;
|
|
|
|
for (ds = numStates - 1; ds >= 0; ds--) // transitions
|
|
{
|
|
const double costCurr = (currRate[ds] >= UCHAR_MAX ? (double) UINT_MAX : prevCost[ds] + lambda * currRate[ds]);
|
|
|
|
if (costMinDs > costCurr)
|
|
{
|
|
costMinDs = costCurr;
|
|
pathMinDs = (uint8_t) ds;
|
|
}
|
|
}
|
|
if (costMinDs < UINT_MAX) costMinDs += quantDist[tuple][is];
|
|
|
|
currCost[is] = costMinDs;
|
|
currPath[is] = pathMinDs;
|
|
} // for is
|
|
|
|
memcpy (prevCost, currCost, numStates * sizeof (double)); // TODO: avoid memcpy, use pointer swapping instead for speed
|
|
} // for tuple
|
|
#if EC_TRAIN
|
|
tempBitCount = 0;
|
|
#endif
|
|
for (is = 0; is < numStates; is++) // search for minimum path
|
|
{
|
|
if (costMinIs > prevCost[is])
|
|
{
|
|
costMinIs = prevCost[is];
|
|
pathMinIs = is;
|
|
}
|
|
}
|
|
|
|
for (tuple--; tuple > 0; tuple--) // min-cost rate and types
|
|
{
|
|
const uint8_t* currPath = &prevPath[tuple * numStates];
|
|
const uint8_t pathMinDs = currPath[pathMinIs];
|
|
|
|
optimalIs[tuple] = (uint8_t) pathMinIs;
|
|
#if EC_TRAIN
|
|
tempBitCount += quantRate[pathMinDs + (pathMinIs + tuple * numStates) * numStates];
|
|
tempSfbDist += quantDist[tuple][pathMinIs];
|
|
#endif
|
|
pathMinIs = pathMinDs;
|
|
}
|
|
optimalIs[0] = (uint8_t) pathMinIs;
|
|
#if EC_TRAIN
|
|
tempBitCount += quantRate[pathMinIs * numStates];
|
|
tempSfbDist += quantDist[0][pathMinIs];
|
|
#endif
|
|
} // Viterbi search
|
|
|
|
#if EC_TRAIN
|
|
if ((tempSfbDist <= refSfbDist) || (tempBitCount <= targetBitCount))
|
|
#endif
|
|
{
|
|
#if !EC_TRAIN
|
|
numQ = 0;
|
|
#endif
|
|
for (tuple = 0; tuple < numTuples; tuple++) // re-quantize SFB with R/D optimal rounding
|
|
{
|
|
const uint16_t tupleStart = tuple << 1;
|
|
const uint8_t tupIs = optimalIs[tuple];
|
|
uint8_t& coeffQuantA = quantCoeffs[tupleStart];
|
|
uint8_t& coeffQuantB = quantCoeffs[tupleStart + 1];
|
|
|
|
if (tupIs != 0) // optimal red of quantized MDCT magnitudes
|
|
{
|
|
coeffQuantA -= (tupIs >> 1);
|
|
coeffQuantB -= (tupIs & 1);
|
|
}
|
|
#if !EC_TRAIN
|
|
numQ += (coeffQuantA > 0 ? 1 : 0) + (coeffQuantB > 0 ? 1 : 0);
|
|
#endif
|
|
} // for tuple
|
|
|
|
#if EC_TRAIN
|
|
return tempBitCount;
|
|
#else
|
|
return (1u << 15) | numQ; // final stats: OK flag | sign bits
|
|
#endif
|
|
}
|
|
|
|
return targetBitCount;
|
|
}
|
|
#endif // EC_TRELLIS_OPT_CODING
|
|
|
|
// constructor
|
|
SfbQuantizer::SfbQuantizer ()
|
|
{
|
|
// initialize all helper buffers
|
|
m_coeffMagn = nullptr;
|
|
#if EC_TRELLIS_OPT_CODING
|
|
m_coeffTemp = nullptr;
|
|
#endif
|
|
m_lut2ExpX4 = nullptr;
|
|
m_lutSfNorm = nullptr;
|
|
m_lutXExp43 = nullptr;
|
|
|
|
m_maxSfIndex = 0;
|
|
#if EC_TRELLIS_OPT_CODING
|
|
m_numCStates = 0;
|
|
|
|
for (unsigned b = 0; b < 52; b++)
|
|
{
|
|
m_quantDist[b] = nullptr;
|
|
m_quantInSf[b] = nullptr;
|
|
m_quantRate[b] = nullptr;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// destructor
|
|
SfbQuantizer::~SfbQuantizer ()
|
|
{
|
|
// free allocated helper buffers
|
|
MFREE (m_coeffMagn);
|
|
#if EC_TRELLIS_OPT_CODING
|
|
MFREE (m_coeffTemp);
|
|
#endif
|
|
MFREE (m_lut2ExpX4);
|
|
MFREE (m_lutSfNorm);
|
|
MFREE (m_lutXExp43);
|
|
#if EC_TRELLIS_OPT_CODING
|
|
|
|
for (unsigned b = 0; b < 52; b++)
|
|
{
|
|
MFREE (m_quantDist[b]);
|
|
MFREE (m_quantInSf[b]);
|
|
MFREE (m_quantRate[b]);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// public functions
|
|
unsigned SfbQuantizer::initQuantMemory (const unsigned maxTransfLength,
|
|
#if EC_TRELLIS_OPT_CODING
|
|
const uint8_t numSwb, const uint8_t bitRateMode, const unsigned samplingRate,
|
|
#endif
|
|
const uint8_t maxScaleFacIndex /*= SCHAR_MAX*/)
|
|
{
|
|
const unsigned numScaleFactors = (unsigned) maxScaleFacIndex + 1;
|
|
#if EC_TRELLIS_OPT_CODING
|
|
const uint8_t complexityOffset = (samplingRate < 28800 ? 8 - (samplingRate >> 13) : 5) + ((bitRateMode == 0) && (samplingRate >= 8192) ? 1 : 0);
|
|
const uint8_t numTrellisStates = complexityOffset - __min (2, (bitRateMode + 2) >> 2); // number of states per SFB
|
|
const uint8_t numSquaredStates = numTrellisStates * numTrellisStates;
|
|
const uint16_t quantRateLength = (samplingRate < 28800 || samplingRate >= 57600 ? 512 : 256); // quantizeMagnRDOC()
|
|
#endif
|
|
unsigned x;
|
|
|
|
if ((maxTransfLength < 128) || (maxTransfLength > 2048) || (maxTransfLength & 7) || (maxScaleFacIndex == 0) || (maxScaleFacIndex > SCHAR_MAX))
|
|
{
|
|
return 1; // invalid arguments error
|
|
}
|
|
|
|
m_maxSfIndex = maxScaleFacIndex;
|
|
|
|
if ((m_coeffMagn = (unsigned*) malloc (maxTransfLength * sizeof (unsigned))) == nullptr ||
|
|
#if EC_TRELLIS_OPT_CODING
|
|
(m_coeffTemp = (uint8_t* ) malloc (maxTransfLength + quantRateLength )) == nullptr ||
|
|
#endif
|
|
(m_lut2ExpX4 = (double* ) malloc (numScaleFactors * sizeof (double ))) == nullptr ||
|
|
(m_lutSfNorm = (double* ) malloc (numScaleFactors * sizeof (double ))) == nullptr ||
|
|
(m_lutXExp43 = (double* ) malloc ((SCHAR_MAX + 1) * sizeof (double ))) == nullptr)
|
|
{
|
|
return 2; // memory allocation error
|
|
}
|
|
#if EC_TRELLIS_OPT_CODING
|
|
m_maxSize8M1 = (maxTransfLength >> 3) - 1;
|
|
m_numCStates = numTrellisStates;
|
|
m_rateIndex = bitRateMode;
|
|
|
|
for (x = 0; x < __min (52u, numSwb); x++)
|
|
{
|
|
if ((m_quantDist[x] = (double* ) malloc (numTrellisStates * sizeof (double ))) == nullptr ||
|
|
(m_quantInSf[x] = (uint8_t* ) malloc (numTrellisStates * sizeof (uint8_t ))) == nullptr ||
|
|
(m_quantRate[x] = (uint16_t*) malloc (numSquaredStates * sizeof (uint16_t))) == nullptr)
|
|
{
|
|
return 2;
|
|
}
|
|
}
|
|
#else
|
|
memset (m_coeffTemp, 0, sizeof (m_coeffTemp));
|
|
#endif
|
|
// calculate scale factor gain 2^(x/4)
|
|
for (x = 0; x < numScaleFactors; x++)
|
|
{
|
|
m_lut2ExpX4[x] = pow (2.0, (double) x / 4.0);
|
|
m_lutSfNorm[x] = 1.0 / m_lut2ExpX4[x];
|
|
}
|
|
// calculate dequantized coeff x^(4/3)
|
|
for (x = 0; x < (SCHAR_MAX + 1); x++)
|
|
{
|
|
m_lutXExp43[x] = pow ((double) x, 4.0 / 3.0);
|
|
}
|
|
|
|
return 0; // no error
|
|
}
|
|
|
|
uint8_t SfbQuantizer::quantizeSpecSfb (EntropyCoder& entropyCoder, const int32_t* const inputCoeffs, const uint8_t grpLength,
|
|
const uint16_t* const grpOffsets, uint32_t* const grpStats, // quant./coding statistics
|
|
const unsigned sfb, const uint8_t sfIndex, const uint8_t sfIndexPred /*= UCHAR_MAX*/,
|
|
uint8_t* const quantCoeffs /*= nullptr*/) // returns the RD optimized scale factor index
|
|
{
|
|
#if EC_TRELLIS_OPT_CODING
|
|
EntropyCoder* const entrCoder = (grpLength == 1 ? &entropyCoder : nullptr);
|
|
#endif
|
|
uint8_t sfBest = sfIndex;
|
|
|
|
if ((inputCoeffs == nullptr) || (grpOffsets == nullptr) || (sfb >= 52) || (sfIndex > m_maxSfIndex))
|
|
{
|
|
return UCHAR_MAX; // invalid input error
|
|
}
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
if (grpLength == 1) // references for RDOC
|
|
{
|
|
m_quantDist[sfb][1] = -1.0;
|
|
m_quantInSf[sfb][1] = sfIndex;
|
|
m_quantRate[sfb][1] = 0; // for sgn bits
|
|
m_quantRate[sfb][0] = entropyCoder.arithGetCtxState () & USHRT_MAX; // ref start context
|
|
}
|
|
#endif
|
|
if ((sfIndex == 0) || (sfIndexPred <= m_maxSfIndex && sfIndex + INDEX_OFFSET < sfIndexPred))
|
|
{
|
|
const uint16_t grpStart = grpOffsets[0];
|
|
const uint16_t sfbStart = grpOffsets[sfb];
|
|
const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
|
|
uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];
|
|
|
|
for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
|
|
{
|
|
coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
|
|
}
|
|
|
|
if (quantCoeffs)
|
|
{
|
|
memset (&quantCoeffs[sfbStart], 0, sfbWidth * sizeof (uint8_t)); // SFB output zeroing
|
|
if (grpStats) // approximate bit count
|
|
{
|
|
grpStats[sfb] = getBitCount (entropyCoder, 0, 0, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
|
|
}
|
|
}
|
|
return sfIndexPred - (sfIndex == 0 ? 0 : INDEX_OFFSET); // save delta bits if applicable
|
|
}
|
|
else // nonzero sf, optimized quantization
|
|
{
|
|
const uint16_t grpStart = grpOffsets[0];
|
|
const uint16_t sfbStart = grpOffsets[sfb];
|
|
const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
|
|
const uint16_t cpyWidth = sfbWidth * sizeof (uint8_t);
|
|
uint32_t* const coeffMagn = &m_coeffMagn[sfbStart];
|
|
uint32_t codStart = 0, ctxStart = 0;
|
|
uint32_t codFinal = 0, ctxFinal = 0;
|
|
double distBest = 0, distCurr = 0;
|
|
short maxQBest = 0, maxQCurr = 0;
|
|
short numQBest = 0, numQCurr = 0;
|
|
#if EC_TRELLIS_OPT_CODING
|
|
bool rdOptimQuant = (grpLength != 1);
|
|
#else
|
|
bool rdOptimQuant = true;
|
|
#endif
|
|
uint8_t* ptrBest = &m_coeffTemp[0];
|
|
uint8_t* ptrCurr = &m_coeffTemp[100];
|
|
uint8_t sfCurr = sfIndex;
|
|
|
|
for (int i = sfbWidth - 1; i >= 0; i--) // back up magnitudes
|
|
{
|
|
coeffMagn[i] = abs (inputCoeffs[sfbStart + i]);
|
|
}
|
|
|
|
// --- determine default quantization result using range limited scale factor as a reference
|
|
sfBest = quantizeMagnSfb (coeffMagn, sfCurr, ptrBest, sfbWidth,
|
|
#if EC_TRELLIS_OPT_CODING
|
|
entrCoder, sfbStart - grpStart,
|
|
#endif
|
|
&maxQBest, &numQBest);
|
|
|
|
if (maxQBest > SCHAR_MAX) // limit SNR via scale factor index
|
|
{
|
|
for (uint8_t c = 0; (c < 2) && (maxQBest > SCHAR_MAX); c++) // very rarely done twice
|
|
{
|
|
sfCurr += getScaleFacOffset (pow ((double) maxQBest, 4.0 / 3.0) * 0.001566492688) + c; // / m_lutXExp43[SCHAR_MAX]
|
|
sfBest = quantizeMagnSfb (coeffMagn, sfCurr, ptrBest, sfbWidth,
|
|
#if EC_TRELLIS_OPT_CODING
|
|
entrCoder, sfbStart - grpStart,
|
|
#endif
|
|
&maxQBest, &numQBest);
|
|
}
|
|
rdOptimQuant = false;
|
|
}
|
|
else if ((sfBest < sfCurr) && (sfBest != sfIndexPred)) // re-optimize above quantization
|
|
{
|
|
sfBest = quantizeMagnSfb (coeffMagn, --sfCurr, ptrBest, sfbWidth,
|
|
#if EC_TRELLIS_OPT_CODING
|
|
entrCoder, sfbStart - grpStart,
|
|
#endif
|
|
&maxQBest, &numQBest);
|
|
|
|
rdOptimQuant &= (maxQBest <= SCHAR_MAX);
|
|
}
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
if (grpLength == 1) // ref masking level
|
|
{
|
|
m_quantInSf[sfb][1] = __min (sfCurr, m_maxSfIndex);
|
|
}
|
|
#endif
|
|
if (maxQBest == 0) // SFB was quantized to zero - zero output
|
|
{
|
|
if (quantCoeffs)
|
|
{
|
|
memset (&quantCoeffs[sfbStart], 0, cpyWidth);
|
|
if (grpStats) // estimated bit count
|
|
{
|
|
grpStats[sfb] = getBitCount (entropyCoder, 0, 0, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
|
|
}
|
|
}
|
|
return sfIndexPred; // repeat scale factor, save delta bits
|
|
}
|
|
|
|
// --- check whether optimized quantization and coding results in lower rate-distortion cost
|
|
distBest = getQuantDist (coeffMagn, sfBest, ptrBest, sfbWidth);
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
if (grpLength == 1) // ref band-wise NMR
|
|
{
|
|
const double refSfbNmrDiv = m_lutSfNorm[m_quantInSf[sfb][1]];
|
|
|
|
m_quantDist[sfb][1] = distBest * refSfbNmrDiv * refSfbNmrDiv;
|
|
m_quantRate[sfb][1] = numQBest; // sgn
|
|
}
|
|
#endif
|
|
if (quantCoeffs)
|
|
{
|
|
memcpy (&quantCoeffs[sfbStart], ptrBest, cpyWidth);
|
|
|
|
codStart = entropyCoder.arithGetCodState (); // start state
|
|
ctxStart = entropyCoder.arithGetCtxState ();
|
|
numQBest += getBitCount (entropyCoder, sfBest, sfIndexPred, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
|
|
codFinal = entropyCoder.arithGetCodState (); // final state
|
|
ctxFinal = entropyCoder.arithGetCtxState ();
|
|
}
|
|
rdOptimQuant &= (distBest > 0.0);
|
|
|
|
if ((sfBest < sfCurr) && (sfBest != sfIndexPred) && rdOptimQuant) // R/D re-optimization
|
|
{
|
|
#if EC_TRELLIS_OPT_CODING
|
|
const double refSfbNmrDiv = m_lutSfNorm[sfCurr];
|
|
const double lambda = getLagrangeValue (m_rateIndex);
|
|
#endif
|
|
sfCurr = quantizeMagnSfb (coeffMagn, sfCurr - 1, ptrCurr, sfbWidth,
|
|
#if EC_TRELLIS_OPT_CODING
|
|
entrCoder, sfbStart - grpStart,
|
|
#endif
|
|
&maxQCurr, &numQCurr);
|
|
|
|
distCurr = getQuantDist (coeffMagn, sfCurr, ptrCurr, sfbWidth);
|
|
if (quantCoeffs)
|
|
{
|
|
memcpy (&quantCoeffs[sfbStart], ptrCurr, cpyWidth);
|
|
|
|
entropyCoder.arithSetCodState (codStart); // reset state
|
|
entropyCoder.arithSetCtxState (ctxStart);
|
|
numQCurr += getBitCount (entropyCoder, sfCurr, sfIndexPred, grpLength, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
|
|
}
|
|
|
|
// rate-distortion decision, using empirical Lagrange value
|
|
#if EC_TRELLIS_OPT_CODING
|
|
if (distCurr * refSfbNmrDiv * refSfbNmrDiv + lambda * numQCurr < distBest * refSfbNmrDiv * refSfbNmrDiv + lambda * numQBest)
|
|
#else
|
|
if ((maxQCurr <= maxQBest) && (numQCurr <= numQBest + (distCurr >= distBest ? -1 : short (0.5 + distBest / __max (1.0, distCurr)))))
|
|
#endif
|
|
{
|
|
maxQBest = maxQCurr;
|
|
numQBest = numQCurr;
|
|
sfBest = sfCurr;
|
|
}
|
|
else if (quantCoeffs) // discard result, recover best trial
|
|
{
|
|
memcpy (&quantCoeffs[sfbStart], ptrBest, cpyWidth);
|
|
|
|
entropyCoder.arithSetCodState (codFinal); // reset state
|
|
entropyCoder.arithSetCtxState (ctxFinal);
|
|
}
|
|
}
|
|
|
|
if (grpStats)
|
|
{
|
|
grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magnitude and bit count
|
|
}
|
|
} // if sfIndex == 0
|
|
|
|
return __min (sfBest, m_maxSfIndex);
|
|
}
|
|
|
|
#if EC_TRELLIS_OPT_CODING
|
|
unsigned SfbQuantizer::quantizeSpecRDOC (EntropyCoder& entropyCoder, uint8_t* const optimalSf, const unsigned targetBitCount,
|
|
const uint16_t* const grpOffsets, uint32_t* const grpStats, // quant./coding statistics
|
|
const unsigned numSfb, uint8_t* const quantCoeffs) // returns RD optimization bit count
|
|
{
|
|
// numSfb: number of trellis stages. Based on: A. Aggarwal, S. L. Regunathan, and K. Rose,
|
|
// "Trellis-Based Optimization of MPEG-4 Advanced Audio Coding," see also quantizeMagnRDOC
|
|
const uint32_t codStart = USHRT_MAX << 16;
|
|
const uint32_t ctxStart = m_quantRate[0][0]; // start context before call to quantizeSfb()
|
|
const uint32_t codFinal = entropyCoder.arithGetCodState ();
|
|
const uint32_t ctxFinal = entropyCoder.arithGetCtxState (); // after call to quantizeSfb()
|
|
const uint16_t grpStart = grpOffsets[0];
|
|
uint8_t* const inScaleFac = &m_coeffTemp[((unsigned) m_maxSize8M1 - 6) << 3];
|
|
uint32_t prevCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint32_t prevCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint8_t prevScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
double prevVtrbCost[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint32_t tempCodState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint32_t tempCtxState[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
uint8_t tempScaleFac[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
double tempVtrbCost[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
unsigned tempBitCount, sfb, is;
|
|
int ds;
|
|
#if EC_TRAIN
|
|
double refGrpDist = 0.0, tempGrpDist = 0.0;
|
|
#else
|
|
const double lambda = getLagrangeValue (m_rateIndex);
|
|
#endif
|
|
|
|
if ((optimalSf == nullptr) || (quantCoeffs == nullptr) || (grpOffsets == nullptr) || (numSfb == 0) || (numSfb > 52) ||
|
|
(targetBitCount == 0) || (targetBitCount > SHRT_MAX))
|
|
{
|
|
return 0; // invalid input error
|
|
}
|
|
|
|
for (sfb = 0; sfb < numSfb; sfb++) // SFB-wise scale factor, weighted distortion, and rate
|
|
{
|
|
const uint8_t refSf = m_quantInSf[sfb][1];
|
|
const uint16_t refNumQ = m_quantRate[sfb][1];
|
|
const double refQuantDist = m_quantDist[sfb][1];
|
|
const double refQuantNorm = m_lutSfNorm[refSf] * m_lutSfNorm[refSf];
|
|
const uint16_t sfbStart = grpOffsets[sfb];
|
|
const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
|
|
const uint32_t* coeffMagn = &m_coeffMagn[sfbStart];
|
|
uint8_t* const tempQuant = &m_coeffTemp[sfbStart - grpStart];
|
|
bool maxSnrReached = false;
|
|
|
|
if (refQuantDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
|
|
#if EC_TRAIN
|
|
else refGrpDist += refQuantDist;
|
|
#endif
|
|
if (grpStats)
|
|
{
|
|
grpStats[sfb] = (grpStats[sfb] & (USHRT_MAX << 16)) | refNumQ; // keep magn, sign bits
|
|
}
|
|
|
|
for (is = 0; is < m_numCStates; is++) // populate SFB trellis
|
|
{
|
|
const uint8_t* mag = (is != 1 ? m_coeffTemp /*= tempQuant[grpStart - sfbStart]*/ : &quantCoeffs[grpStart]);
|
|
double& currDist = m_quantDist[sfb][is];
|
|
uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
|
|
uint8_t sfBest = optimalSf[sfb]; // optimal scalefactor
|
|
short maxQCurr = 0, numQCurr = 0; // for sign bits counting
|
|
|
|
if (refQuantDist < 0.0) // -1.0 means SFB is zero-quantized
|
|
{
|
|
currDist = -1.0;
|
|
m_quantInSf[sfb][is] = refSf;
|
|
}
|
|
else if (is != 1) // quantization & distortion not computed
|
|
{
|
|
const uint8_t sfCurr = __max (0, __min (m_maxSfIndex, refSf + 1 - (int) is));
|
|
|
|
currDist = -1.0;
|
|
if ((sfCurr == 0) || maxSnrReached)
|
|
{
|
|
maxSnrReached = true;
|
|
}
|
|
else // sfCurr > 0 && sfCurr <= m_maxSfIndex, re-quantize
|
|
{
|
|
sfBest = quantizeMagnSfb (coeffMagn, sfCurr, tempQuant, sfbWidth,
|
|
&entropyCoder, sfbStart - grpStart,
|
|
&maxQCurr, &numQCurr);
|
|
|
|
if (maxQCurr > SCHAR_MAX)
|
|
{
|
|
maxSnrReached = true; numQCurr = 0;
|
|
}
|
|
else
|
|
{
|
|
currDist = getQuantDist (coeffMagn, sfBest, tempQuant, sfbWidth) * refQuantNorm;
|
|
}
|
|
}
|
|
if (currDist < 0.0) memset (tempQuant, 0, sfbWidth * sizeof (uint8_t));
|
|
m_quantInSf[sfb][is] = sfCurr; // store initial scale fac
|
|
}
|
|
else // is == 1, quant. & dist. computed with quantizeSfb()
|
|
{
|
|
numQCurr = refNumQ;
|
|
}
|
|
|
|
if (sfb == 0) // first SFB, having sfbStart - grpStart == 0
|
|
{
|
|
entropyCoder.arithSetCodState (codStart); // group start
|
|
entropyCoder.arithSetCtxState (ctxStart);
|
|
tempBitCount = (maxSnrReached ? USHRT_MAX : numQCurr + getBitCount (entropyCoder, sfBest, UCHAR_MAX, 1, mag, 0, sfbWidth));
|
|
|
|
for (ds = m_numCStates - 1; ds >= 0; ds--)
|
|
{
|
|
currRate[ds] = (uint16_t) tempBitCount;
|
|
}
|
|
tempCodState[is] = entropyCoder.arithGetCodState ();
|
|
tempCtxState[is] = entropyCoder.arithGetCtxState ();
|
|
}
|
|
else // sfb > 0, rate depends on decisions in preceding SFB
|
|
{
|
|
for (ds = m_numCStates - 1; ds >= 0; ds--)
|
|
{
|
|
const uint16_t prevRate = m_quantRate[sfb - 1][ds * m_numCStates];
|
|
|
|
entropyCoder.arithSetCodState (prevCodState[ds]);
|
|
entropyCoder.arithSetCtxState (prevCtxState[ds], sfbStart - grpStart);
|
|
tempBitCount = (maxSnrReached || (prevRate >= USHRT_MAX) ? USHRT_MAX : numQCurr + getBitCount (entropyCoder,
|
|
(numQCurr == 0 ? prevScaleFac[ds] : sfBest), prevScaleFac[ds], 1, mag, sfbStart - grpStart, sfbWidth));
|
|
currRate[ds] = (uint16_t) tempBitCount;
|
|
|
|
if (ds == 1) // statistically best place to save states
|
|
{
|
|
tempCodState[is] = entropyCoder.arithGetCodState ();
|
|
tempCtxState[is] = entropyCoder.arithGetCtxState ();
|
|
}
|
|
}
|
|
}
|
|
tempScaleFac[is] = sfBest; // optimized factor for next SFB
|
|
} // for is
|
|
|
|
memcpy (prevCodState, tempCodState, m_numCStates * sizeof (uint32_t));
|
|
memcpy (prevCtxState, tempCtxState, m_numCStates * sizeof (uint32_t));
|
|
memcpy (prevScaleFac, tempScaleFac, m_numCStates * sizeof (uint8_t ));
|
|
} // for sfb
|
|
|
|
entropyCoder.arithSetCodState (codFinal); // back to last state
|
|
entropyCoder.arithSetCtxState (ctxFinal, grpOffsets[numSfb] - grpStart);
|
|
|
|
#if EC_TRAIN
|
|
tempBitCount = targetBitCount + 1; // Viterbi search for minimum distortion at target rate
|
|
for (double lambda = 0.015625; (lambda <= 0.375) && (tempBitCount > targetBitCount); lambda += 0.0078125)
|
|
#endif
|
|
{
|
|
double* const prevCost = prevVtrbCost;
|
|
uint8_t* const prevPath = m_coeffTemp; // trellis backtracker
|
|
double costMinIs = (double) UINT_MAX;
|
|
unsigned pathMinIs = 1;
|
|
#if EC_TRAIN
|
|
tempGrpDist = 0.0;
|
|
#endif
|
|
|
|
for (is = 0; is < m_numCStates; is++) // initial minimum path
|
|
{
|
|
const uint16_t currRate = m_quantRate[0][is * m_numCStates];
|
|
|
|
prevCost[is] = (currRate >= USHRT_MAX ? (double) UINT_MAX : lambda * currRate + __max (0.0, m_quantDist[0][is]));
|
|
prevPath[is] = 0;
|
|
}
|
|
|
|
for (sfb = 1; sfb < numSfb; sfb++) // search for minimum path
|
|
{
|
|
double* const currCost = tempVtrbCost;
|
|
uint8_t* const currPath = &prevPath[sfb * m_numCStates];
|
|
|
|
for (is = 0; is < m_numCStates; is++) // SFB's minimum path
|
|
{
|
|
uint16_t* currRate = &m_quantRate[sfb][is * m_numCStates];
|
|
double costMinDs = (double) UINT_MAX;
|
|
uint8_t pathMinDs = 1;
|
|
|
|
for (ds = m_numCStates - 1; ds >= 0; ds--) // transitions
|
|
{
|
|
const double costCurr = (currRate[ds] >= USHRT_MAX ? (double) UINT_MAX : prevCost[ds] + lambda * currRate[ds]);
|
|
|
|
if (costMinDs > costCurr)
|
|
{
|
|
costMinDs = costCurr;
|
|
pathMinDs = (uint8_t) ds;
|
|
}
|
|
}
|
|
if (costMinDs < UINT_MAX) costMinDs += __max (0.0, m_quantDist[sfb][is]);
|
|
|
|
currCost[is] = costMinDs;
|
|
currPath[is] = pathMinDs;
|
|
} // for is
|
|
|
|
memcpy (prevCost, currCost, m_numCStates * sizeof (double)); // TODO: avoid memcpy, use pointer swapping instead for speed
|
|
} // for sfb
|
|
|
|
for (sfb--, is = 0; is < m_numCStates; is++) // group minimum
|
|
{
|
|
if (costMinIs > prevCost[is])
|
|
{
|
|
costMinIs = prevCost[is];
|
|
pathMinIs = is;
|
|
}
|
|
}
|
|
|
|
for (tempBitCount = 0; sfb > 0; sfb--) // min-cost group rate
|
|
{
|
|
const uint8_t* currPath = &prevPath[sfb * m_numCStates];
|
|
const uint8_t pathMinDs = currPath[pathMinIs];
|
|
|
|
inScaleFac[sfb] = (m_quantDist[sfb][pathMinIs] < 0.0 ? UCHAR_MAX : m_quantInSf[sfb][pathMinIs]);
|
|
tempBitCount += m_quantRate[sfb][pathMinDs + pathMinIs * m_numCStates];
|
|
#if EC_TRAIN
|
|
tempGrpDist += __max (0.0, m_quantDist[sfb][pathMinIs]);
|
|
#endif
|
|
pathMinIs = pathMinDs;
|
|
}
|
|
inScaleFac[0] = (m_quantDist[0][pathMinIs] < 0.0 ? UCHAR_MAX : m_quantInSf[0][pathMinIs]);
|
|
tempBitCount += m_quantRate[0][pathMinIs * m_numCStates];
|
|
#if EC_TRAIN
|
|
tempGrpDist += __max (0.0, m_quantDist[0][pathMinIs]);
|
|
#endif
|
|
} // Viterbi search
|
|
|
|
#if EC_TRAIN
|
|
if ((tempGrpDist <= refGrpDist) || (tempBitCount <= targetBitCount))
|
|
#endif
|
|
{
|
|
uint8_t sfIndexPred = UCHAR_MAX;
|
|
|
|
if (grpStats)
|
|
{
|
|
entropyCoder.arithSetCodState (codStart);// set group start
|
|
entropyCoder.arithSetCtxState (ctxStart);
|
|
|
|
tempBitCount = 0;
|
|
}
|
|
for (sfb = 0; sfb < numSfb; sfb++) // re-quantize spectrum with R/D optimized parameters
|
|
{
|
|
const uint16_t sfbStart = grpOffsets[sfb];
|
|
const uint16_t sfbWidth = grpOffsets[sfb + 1] - sfbStart;
|
|
|
|
if ((inScaleFac[sfb] == UCHAR_MAX) || (sfIndexPred <= m_maxSfIndex && inScaleFac[sfb] + INDEX_OFFSET < sfIndexPred))
|
|
{
|
|
memset (&quantCoeffs[sfbStart], 0, sfbWidth * sizeof (uint8_t)); // zero SFB output
|
|
|
|
optimalSf[sfb] = sfIndexPred - (inScaleFac[sfb] == UCHAR_MAX ? 0 : INDEX_OFFSET);
|
|
}
|
|
else if (inScaleFac[sfb] != m_quantInSf[sfb][1]) // speedup
|
|
{
|
|
short maxQBest = 0, numQBest = 0;
|
|
|
|
optimalSf[sfb] = quantizeMagnSfb (&m_coeffMagn[sfbStart], inScaleFac[sfb], &quantCoeffs[sfbStart], sfbWidth,
|
|
&entropyCoder, sfbStart - grpStart,
|
|
&maxQBest, &numQBest);
|
|
|
|
if (maxQBest == 0) optimalSf[sfb] = sfIndexPred; // empty
|
|
if (grpStats)
|
|
{
|
|
grpStats[sfb] = ((uint32_t) maxQBest << 16) | numQBest; // max magn. and sign bits
|
|
}
|
|
}
|
|
|
|
if (grpStats) // complete statistics with per-SFB bit count
|
|
{
|
|
grpStats[sfb] += getBitCount (entropyCoder, optimalSf[sfb], sfIndexPred, 1, &quantCoeffs[grpStart], sfbStart - grpStart, sfbWidth);
|
|
tempBitCount += grpStats[sfb] & USHRT_MAX;
|
|
}
|
|
|
|
if ((sfb > 0) && (optimalSf[sfb] < UCHAR_MAX) && (sfIndexPred == UCHAR_MAX))
|
|
{
|
|
memset (optimalSf, optimalSf[sfb], sfb * sizeof (uint8_t)); // back-propagate factor
|
|
}
|
|
sfIndexPred = optimalSf[sfb];
|
|
} // for sfb
|
|
|
|
return tempBitCount + (grpStats ? 2 : 0); // last coding bits
|
|
}
|
|
|
|
return targetBitCount;
|
|
}
|
|
#endif // EC_TRELLIS_OPT_CODING
|