/* exhaleEnc.cpp - source file for class providing Extended HE-AAC encoding capability * written by C. R. Helmrich, last modified in 2020 - see License.htm for legal notices * * The copyright in this software is being made available under a Modified BSD-Style License * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third- * party rights, including patent rights. No such rights are granted under this License. * * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. */ #include "exhaleLibPch.h" #include "exhaleEnc.h" // static helper functions static double modifiedBesselFunctionOfFirstKind (const double x) { const double xOver2 = x * 0.5; double d = 1.0, sum = 1.0; int i = 0; do { const double x2di = xOver2 / double (++i); d *= (x2di * x2di); sum += d; } while (d > sum * 1.2e-38); // FLT_MIN return sum; } static int32_t* initWindowHalfCoeffs (const USAC_WSHP windowShape, const unsigned frameLength) { int32_t* windowBuf = nullptr; unsigned u; if ((windowBuf = (int32_t*) malloc (frameLength * sizeof (int32_t))) == nullptr) { return nullptr; // allocation error } if (windowShape == WINDOW_SINE) { const double dNorm = 3.141592653589793 / (2.0 * frameLength); // MLT sine window half for (u = 0; u < frameLength; u++) { windowBuf[u] = int32_t (sin (dNorm * (u + 0.5)) * WIN_SCALE + 0.5); } } else // if windowShape == WINDOW_KBD { const double alpha = 3.141592653589793 * (frameLength > 256 ? 4.0 : 6.0); const double dBeta = 1.0 / modifiedBesselFunctionOfFirstKind (alpha /*sqrt (1.0)*/); const double dNorm = 4.0 / (2.0 * frameLength); const double iScal = double (1u << 30); const double dScal = 1.0 / iScal; double d, sum = 0.0; // create Kaiser-Bessel window half for (u = 0; u < frameLength; u++) { const double du1 = dNorm * u - 1.0; d = dBeta * modifiedBesselFunctionOfFirstKind (alpha * sqrt (1.0 - du1 * du1)); sum += d; windowBuf[u] = int32_t (d * iScal + 0.5); } d = 1.0 / sum; // normalized to sum sum = 0.0; // KBD window half for (u = 0; u < frameLength; u++) { sum += dScal * windowBuf[u]; windowBuf[u] = int32_t (sqrt (d * sum /*cumulative sum*/) * WIN_SCALE + 0.5); } } return windowBuf; } static uint32_t quantizeSfbWithMinSnr (const unsigned* const coeffMagn, const uint16_t* const sfbOffset, const unsigned b, const uint8_t groupLength, uint8_t* const quantMagn, char* const arithTuples, const bool nonZeroSnr = false) { const uint16_t sfbStart = sfbOffset[b]; const uint16_t sfbWidth = sfbOffset[b + 1] - sfbStart; const unsigned* sfbMagn = &coeffMagn[sfbStart]; uint32_t maxIndex = 0, maxLevel = sfbMagn[0]; for (uint16_t s = sfbWidth - 1; s > 0; s--) { if (maxLevel < sfbMagn[s]) // find largest-level magn. in SFB { maxLevel = sfbMagn[s]; maxIndex = s; } } if (quantMagn != nullptr) // update quantized sample magnitudes { memset (&quantMagn[sfbStart], 0, sfbWidth * sizeof (uint8_t)); if (nonZeroSnr) quantMagn[sfbStart + maxIndex] = 1; // magn. 1 } if (arithTuples != nullptr) // update entropy coding two-tuples { const uint16_t swbStart = ((sfbStart - sfbOffset[0]) * oneTwentyEightOver[groupLength]) >> 7; memset (&arithTuples[swbStart >> 1], 1, ((sfbWidth * oneTwentyEightOver[groupLength]) >> 8) * sizeof (char)); if (nonZeroSnr && (groupLength == 1)) // max. two-tuple is 1+1 { arithTuples[(swbStart + maxIndex) >> 1] = 2; } } return maxLevel; } // inline helper functions static inline void applyStereoPreProcessingCplx (int32_t* mdctSample1, int32_t* mdctSample2, int32_t* mdstSample1, int32_t* mdstSample2, const int64_t factIn, const int64_t factDe, const int64_t sign) { const int32_t valI1 = *mdstSample1; const int32_t valI2 = *mdstSample2; const int32_t valR1 = *mdctSample1; const int32_t valR2 = *mdctSample2; const int64_t absR1 = abs (valR1); const int64_t absR2 = abs (valR2); int64_t dmxI1, dmxR1 = valR1 * factDe + sign * valR2 * factIn; // cross int64_t dmxI2, dmxR2 = valR2 * factDe + sign * valR1 * factIn; // -talk double n, d; if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations { if (absR1 * factDe < absR2 * factIn) { dmxR1 = valR2 * factIn - sign * valR1 * factDe; dmxI1 = valI2 * factIn - sign * valI1 * factDe; } else { dmxR1 = valR1 * factDe - sign * valR2 * factIn; dmxI1 = valI1 * factDe - sign * valI2 * factIn; } } else dmxI1 = valI1 * factDe + sign * valI2 * factIn; if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations { if (absR1 * factIn < absR2 * factDe) { dmxR2 = valR2 * factDe - sign * valR1 * factIn; dmxI2 = valI2 * factDe - sign * valI1 * factIn; } else { dmxR2 = valR1 * factIn - sign * valR2 * factDe; dmxI2 = valI1 * factIn - sign * valI2 * factDe; } } else dmxI2 = valI2 * factDe + sign * valI1 * factIn; n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1; d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1; *mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5)); n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2; d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2; *mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5)); } static inline void applyStereoPreProcessingReal (int32_t* mdctSample1, int32_t* mdctSample2, int32_t* prevSample1, int32_t* prevSample2, const int64_t factIn, const int64_t factDe, const int64_t sign) { const int64_t valI1 = (*(mdctSample1 + 1) - (int64_t) *prevSample1) >> 1; // estimate, see also const int64_t valI2 = (*(mdctSample2 + 1) - (int64_t) *prevSample2) >> 1; // getMeanAbsValues() const int32_t valR1 = (*prevSample1 = *mdctSample1); const int32_t valR2 = (*prevSample2 = *mdctSample2); const int64_t absR1 = abs (valR1); const int64_t absR2 = abs (valR2); int64_t dmxI1, dmxR1 = valR1 * factDe + sign * valR2 * factIn; // cross int64_t dmxI2, dmxR2 = valR2 * factDe + sign * valR1 * factIn; // -talk double n, d; if (abs (dmxR1) < absR1 + absR2) // avoid destructive summations { if (absR1 * factDe < absR2 * factIn) { dmxR1 = valR2 * factIn - sign * valR1 * factDe; dmxI1 = valI2 * factIn - sign * valI1 * factDe; } else { dmxR1 = valR1 * factDe - sign * valR2 * factIn; dmxI1 = valI1 * factDe - sign * valI2 * factIn; } } else dmxI1 = valI1 * factDe + sign * valI2 * factIn; if (abs (dmxR2) < absR1 + absR2) // avoid destructive summations { if (absR1 * factIn < absR2 * factDe) { dmxR2 = valR2 * factDe - sign * valR1 * factIn; dmxI2 = valI2 * factDe - sign * valI1 * factIn; } else { dmxR2 = valR1 * factIn - sign * valR2 * factDe; dmxI2 = valI1 * factIn - sign * valI2 * factDe; } } else dmxI2 = valI2 * factDe + sign * valI1 * factIn; n = (double) valR1 * (double) valR1 + (double) valI1 * (double) valI1; d = (double) dmxR1 * (double) dmxR1 + (double) dmxI1 * (double) dmxI1; *mdctSample1 = int32_t (dmxR1 * sqrt (n / __max (1.0, d)) + (dmxR1 < 0 ? -0.5 : 0.5)); n = (double) valR2 * (double) valR2 + (double) valI2 * (double) valI2; d = (double) dmxR2 * (double) dmxR2 + (double) dmxI2 * (double) dmxI2; *mdctSample2 = int32_t (dmxR2 * sqrt (n / __max (1.0, d)) + (dmxR2 < 0 ? -0.5 : 0.5)); } static inline uint8_t brModeAndFsToMaxSfbLong (const unsigned bitRateMode, const unsigned samplingRate) { // max. for fs of 44 kHz: band 47 (19.3 kHz), 48 kHz: 45 (19.5 kHz), 64 kHz: 39 (22.0 kHz) return __max (39, (0x20A000 + (samplingRate >> 1)) / samplingRate) - 9 + bitRateMode - (samplingRate < 48000 ? bitRateMode >> 3 : 0); } static inline uint8_t brModeAndFsToMaxSfbShort(const unsigned bitRateMode, const unsigned samplingRate) { // max. for fs of 44 kHz: band 13 (19.3 kHz), 48 kHz: 13 (21.0 kHz), 64 kHz: 11 (23.0 kHz) return (samplingRate > 51200 ? 11 : 13) - 2 + (bitRateMode >> 2); } #if SA_IMPROVED_REAL_ABS static inline uint32_t squareMeanRoot (const uint32_t value1, const uint32_t value2) { const double meanRoot = (sqrt ((double) value1) + sqrt ((double) value2)) * 0.5; return uint32_t (meanRoot * meanRoot + 0.5); } #else static inline uint32_t getComplexRmsValue (const uint32_t rmsValue, const unsigned sfbGroup, const unsigned sfbIndex, const uint8_t numSwb, const TnsData& tnsData) { // compensate for missing MDST coefficients in RMS calculation of SFBs where TNS is active return ((tnsData.numFilters > 0) && (sfbGroup == tnsData.filteredWindow) && (rmsValue <= UINT_MAX / 5) && (tnsData.filterLength[0] + sfbIndex >= numSwb) ? (rmsValue * 5u) >> 2 : rmsValue); } #endif // ISO/IEC 23003-3, Table 75 static inline unsigned toFrameLength (const USAC_CCFL coreCoderFrameLength) { return (unsigned) coreCoderFrameLength; } // ISO/IEC 23003-3, Table 73 static const uint8_t numberOfChannels[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 2, 3, 4, 5, 6, 8, 2, 3, 4, 7, 8}; static inline unsigned toNumChannels (const USAC_CCI chConfigurationIndex) { return numberOfChannels[__max (0, (char) chConfigurationIndex)]; } // ISO/IEC 23003-3, Table 68 static const uint8_t elementCountConfig[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 1, 2, 3, 3, 4, 5, 2, 2, 2, 5, 5}; static const ELEM_TYPE elementTypeConfig[USAC_MAX_NUM_ELCONFIGS][USAC_MAX_NUM_ELEMENTS] = { {ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_UNDEF {ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_1_CH {ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CH {ID_USAC_SCE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CH {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CH {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_5_CH {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE, ID_EL_UNDEF}, // CCI_6_CH {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE}, // CCI_8_CH {ID_USAC_SCE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CHM {ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CHR {ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CHR {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_SCE, ID_USAC_LFE}, // CCI_7_CH {ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE} // CCI_8_CHM }; // ISO/IEC 14496-3, Table 4.140 static const uint16_t sfbOffsetL0[42] = { // 88.2 and 96 kHz 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 108, 120, 132, 144, 156, 172, 188, 212, 240, 276, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024 }; // ISO/IEC 14496-3, Table 4.141 static const uint16_t sfbOffsetS0[13] = { 0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128 }; // ISO/IEC 14496-3, Table 4.138 static const uint16_t sfbOffsetL1[48] = { // 64 kHz 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 100, 112, 124, 140, 156, 172, 192, 216, 240, 268, 304, 344, 384, 424, 464, 504, 544, 584, 624, 664, 704, 744, 784, 824, 864, 904, 944, 984, 1024 }; // ISO/IEC 14496-3, Table 4.139 static const uint16_t sfbOffsetS1[13] = { 0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128 }; // ISO/IEC 14496-3, Table 4.131 static const uint16_t sfbOffsetL2[52] = { // 32, 44.1, and 48 kHz 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 48, 56, 64, 72, 80, 88, 96, 108, 120, 132, 144, 160, 176, 196, 216, 240, 264, 292, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960/*!*/, 992/*!*/, 1024 }; // ISO/IEC 14496-3, Table 4.130 static const uint16_t sfbOffsetS2[15] = { 0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, 128 }; // ISO/IEC 14496-3, Table 4.136 static const uint16_t sfbOffsetL3[48] = { // 22.05 and 24 kHz 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, 136, 148, 160, 172, 188, 204, 220, 240, 260, 284, 308, 336, 364, 396, 432, 468, 508, 552, 600, 652, 704, 768, 832, 896, 960, 1024 }; // ISO/IEC 14496-3, Table 4.137 static const uint16_t sfbOffsetS3[16] = { 0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, 92, 108, 128 }; // ISO/IEC 14496-3, Table 4.134 static const uint16_t sfbOffsetL4[44] = { // 11.025, 12, and 16 kHz 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 100, 112, 124, 136, 148, 160, 172, 184, 196, 212, 228, 244, 260, 280, 300, 320, 344, 368, 396, 424, 456, 492, 532, 572, 616, 664, 716, 772, 832, 896, 960, 1024 }; // ISO/IEC 14496-3, Table 4.135 static const uint16_t sfbOffsetS4[16] = { 0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 60, 72, 88, 108, 128 }; // ISO/IEC 14496-3, Table 4.132 static const uint16_t sfbOffsetL5[41] = { // 8 kHz 0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 172, 188, 204, 220, 236, 252, 268, 288, 308, 328, 348, 372, 396, 420, 448, 476, 508, 544, 580, 620, 664, 712, 764, 820, 880, 944, 1024 }; // ISO/IEC 14496-3, Table 4.133 static const uint16_t sfbOffsetS5[16] = { 0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 60, 72, 88, 108, 128 }; // long-window SFB offset tables static const uint16_t* swbOffsetsL[USAC_NUM_FREQ_TABLES] = { sfbOffsetL0, sfbOffsetL1, sfbOffsetL2, sfbOffsetL3, sfbOffsetL4, sfbOffsetL5 }; static const uint8_t numSwbOffsetL[USAC_NUM_FREQ_TABLES] = {42, 48, 52, 48, 44, 41}; // short-window SFB offset tables static const uint16_t* swbOffsetsS[USAC_NUM_FREQ_TABLES] = { sfbOffsetS0, sfbOffsetS1, sfbOffsetS2, sfbOffsetS3, sfbOffsetS4, sfbOffsetS5 }; static const uint8_t numSwbOffsetS[USAC_NUM_FREQ_TABLES] = {13, 13, 15, 16, 16, 16}; // ISO/IEC 23003-3, Table 79 static const uint8_t freqIdxToSwbTableIdxAAC[USAC_NUM_SAMPLE_RATES + 2] = { /*96000*/ 0, 0, 1, 2, 2, 2,/*24000*/ 3, 3, 4, 4, 4, 5, 5, // AAC 255, 255, 1, 2, 2, 2, 2, 2,/*25600*/ 3, 3, 3, 4, 4, 4, 4 // USAC }; #if !RESTRICT_TO_AAC static const uint8_t freqIdxToSwbTableIdx768[USAC_NUM_SAMPLE_RATES + 2] = { /*96000*/ 0, 0, 0, 1, 1, 2,/*24000*/ 2, 2, 3, 4, 4, 4, 4, // AAC 255, 255, 0, 1, 2, 2, 2, 2,/*25600*/ 2, 3, 3, 3, 3, 4, 4 // USAC }; #endif // ISO/IEC 23003-3, Table 131 static const uint8_t tnsScaleFactorBandLimit[2 /*long/short*/][USAC_NUM_FREQ_TABLES] = { // TNS_MAX_BANDS #if 0 // RESTRICT_TO_AAC {31, 34, 51 /*to be corrected to 42 (44.1) and 40 (48 kHz)!*/, 46, 42, 39}, {9, 10, 14, 14, 14, 14} #else {31, 34, 51 /*to be corrected to 42 (44.1) and 40 (48 kHz)!*/, 47, 43, 40}, {9, 10, 14, 15, 15, 15} #endif }; // scale_factor_grouping map // group lengths based on transient location: 1133, 1115, 2114, 3113, 4112, 5111, 3311, 1331 static const uint8_t scaleFactorGrouping[8] = {0x1B, 0x0F, 0x47, 0x63, 0x71, 0x78, 0x6C, 0x36}; static const uint8_t windowGroupingTable[8][NUM_WINDOW_GROUPS] = { // for window_group_length {1, 1, 3, 3}, {1, 1, 1, 5}, {2, 1, 1, 4}, {3, 1, 1, 3}, {4, 1, 1, 2}, {5, 1, 1, 1}, {3, 3, 1, 1}, {1, 3, 3, 1} }; // window_sequence equalizer static const USAC_WSEQ windowSequenceSynch[5][5] = { // 1st: chan index 0, 2nd: chan index 1 {ONLY_LONG, LONG_START, EIGHT_SHORT, LONG_STOP, STOP_START }, // left: ONLY_LONG #if RESTRICT_TO_AAC {LONG_START, LONG_START, EIGHT_SHORT, EIGHT_SHORT, STOP_START }, // Left: LONG_START #else {LONG_START, LONG_START, EIGHT_SHORT, STOP_START, STOP_START }, // Left: LONG_START #endif {EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT, EIGHT_SHORT}, // Left: EIGHT_SHORT #if RESTRICT_TO_AAC {LONG_STOP, EIGHT_SHORT, EIGHT_SHORT, LONG_STOP, STOP_START }, // Left: LONG_STOP #else {LONG_STOP, STOP_START, EIGHT_SHORT, LONG_STOP, STOP_START }, // Left: LONG_STOP #endif {STOP_START, STOP_START, EIGHT_SHORT, STOP_START, STOP_START } // Left: STOP_START }; // private helper functions unsigned ExhaleEncoder::applyTnsToWinGroup (TnsData& tnsData, SfbGroupData& grpData, const bool eightShorts, const uint8_t maxSfb, const unsigned channelIndex) { const uint16_t filtOrder = tnsData.filterOrder[0]; const uint16_t* grpSO = &grpData.sfbOffsets[m_numSwbShort * tnsData.filteredWindow]; unsigned errorValue = 0; // no error if ((maxSfb > (eightShorts ? MAX_NUM_SWB_SHORT : MAX_NUM_SWB_LONG)) || (channelIndex >= USAC_MAX_NUM_CHANNELS)) { return 1; // invalid arguments error } if (filtOrder > 0) // determine TNS filter length in SFBs and apply TNS analysis filtering { const int numSwbWin = (eightShorts ? m_numSwbShort : m_numSwbLong); uint8_t tnsMaxBands = tnsScaleFactorBandLimit[eightShorts ? 1 : 0][m_swbTableIdx]; uint8_t tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx); // 8-short TNS start if (!eightShorts) { const unsigned samplingRate = toSamplingRate (m_frequencyIdx); // refine TNS_MAX_BANDS const unsigned tnsStartOffs = (m_specAnaCurr[channelIndex] & 31) << SA_BW_SHIFT; if ((samplingRate >= 46009) && (samplingRate < 55426)) tnsMaxBands = 40; // for 48 kHz else if ((samplingRate >= 37566) && (samplingRate < 46009)) tnsMaxBands = 42; // & 44.1 kHz while (grpSO[tnsStartSfb] < tnsStartOffs) tnsStartSfb++; // start band for TNS filter } tnsMaxBands = __min (tnsMaxBands, maxSfb); if ((tnsData.filterLength[0] = __max (0, numSwbWin - tnsStartSfb)) > 0) { int32_t* const mdctSignal = m_mdctSignals[channelIndex]; const short offs = grpSO[tnsStartSfb]; uint16_t s = grpSO[tnsMaxBands] - offs; short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0}; int32_t* predSig = &mdctSignal[grpSO[tnsMaxBands]]; // end of spectrum to be predicted errorValue |= m_linPredictor.quantTnsToLpCoeffs (tnsData.coeff[0], filtOrder, tnsData.coeffResLow, tnsData.coeffParCor, filterC); // back up the leading MDCT samples memcpy (m_tempIntBuf, &mdctSignal[offs - MAX_PREDICTION_ORDER], MAX_PREDICTION_ORDER * sizeof (int32_t)); // TNS compliance: set them to zero memset (&mdctSignal[offs - MAX_PREDICTION_ORDER], 0, MAX_PREDICTION_ORDER * sizeof (int32_t)); if (filtOrder >= 4) // max. order 4 { for (predSig--; s > 0; s--) { const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] + *(predSig - 3) * (int64_t) filterC[2] + *(predSig - 4) * (int64_t) filterC[3]; *(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1)); } } else if (filtOrder == 3) // order 3 { for (predSig--; s > 0; s--) { const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] + *(predSig - 3) * (int64_t) filterC[2]; *(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1)); } } else // save 1-2 MACs, order 2 or 1 { for (predSig--; s > 0; s--) { const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1]; *(predSig--) += int32_t ((predSample + (1 << (LP_DEPTH - 2))) >> (LP_DEPTH - 1)); } } // restore the leading MDCT samples memcpy (&mdctSignal[offs - MAX_PREDICTION_ORDER], m_tempIntBuf, MAX_PREDICTION_ORDER * sizeof (int32_t)); // recalculate SFB RMS in TNS range errorValue |= m_specAnalyzer.getMeanAbsValues (mdctSignal, nullptr /*MDST wasn't filtered*/, grpSO[grpData.sfbsPerGroup], 0 /*ci*/, &grpSO[tnsStartSfb], __max (0, tnsMaxBands - (int) tnsStartSfb), &grpData.sfbRmsValues[tnsStartSfb + m_numSwbShort * tnsData.filteredWindow]); } else tnsData.filterOrder[0] = tnsData.numFilters = 0; // disable zero-length TNS filters } // if order > 0 return errorValue; } unsigned ExhaleEncoder::eightShortGrouping (SfbGroupData& grpData, uint16_t* const grpOffsets, int32_t* const mdctSignal #if SA_IMPROVED_REAL_ABS , int32_t* const mdstSignal /*= nullptr*/ #endif ) { const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned nSamplesInShort = nSamplesInFrame >> 3; #if SA_IMPROVED_REAL_ABS int32_t* const tempIntBuf/*2*/ = m_timeSignals[1]; // NOTE: requires at least stereo input #endif unsigned grpStartLine = nSamplesInFrame; if ((grpOffsets == nullptr) || (mdctSignal == nullptr)) { return 1; // invalid arguments error } for (short gr = grpData.numWindowGroups - 1; gr >= 0; gr--) // grouping, 14496-3 Fig. 4.24 { const unsigned grpLength = grpData.windowGroupLength[gr]; uint16_t* const grpOffset = &grpOffsets[m_numSwbShort * gr]; int32_t* const grpMdctSig = &mdctSignal[grpStartLine -= nSamplesInShort * grpLength]; #if SA_IMPROVED_REAL_ABS int32_t* const grpMdstSig = (mdstSignal != nullptr ? &mdstSignal[grpStartLine] : nullptr); #endif for (uint16_t b = 0; b < m_numSwbShort; b++) { const unsigned swbOffset = grpOffsets[b]; const unsigned numCoeffs = __min (grpOffsets[b + 1], nSamplesInShort) - swbOffset; // adjust scale factor band offsets grpOffset[b] = uint16_t (grpStartLine + swbOffset * grpLength); // interleave spectral coefficients for (uint16_t w = 0; w < grpLength; w++) { memcpy (&m_tempIntBuf[grpOffset[b] + w * numCoeffs], &grpMdctSig[swbOffset + w * nSamplesInShort], numCoeffs * sizeof (int32_t)); #if SA_IMPROVED_REAL_ABS if (grpMdstSig != nullptr) { memcpy (&tempIntBuf[grpOffset[b] + w * numCoeffs], &grpMdstSig[swbOffset + w * nSamplesInShort], numCoeffs * sizeof (int32_t)); } #endif } } grpOffset[m_numSwbShort] = uint16_t (grpStartLine + nSamplesInShort * grpLength); } // for gr memcpy (mdctSignal, m_tempIntBuf, nSamplesInFrame * sizeof (int32_t)); #if SA_IMPROVED_REAL_ABS if (mdstSignal != nullptr) { memcpy (mdstSignal, tempIntBuf, nSamplesInFrame * sizeof (int32_t)); } #endif return 0; // no error } unsigned ExhaleEncoder::getOptParCorCoeffs (const int32_t* const mdctSignal, const SfbGroupData& grpData, const uint8_t maxSfb, const unsigned channelIndex, TnsData& tnsData, const uint8_t firstGroupIndexToTest /*= 0*/) { const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned tnsStartSfb = 3 + 32000 / toSamplingRate (m_frequencyIdx); // 8-short start unsigned bestOrder = MAX_PREDICTION_ORDER, predGainCurr, predGainPrev, temp = 0; int16_t parCorBuffer[MAX_PREDICTION_ORDER]; tnsData.filterOrder[0] = tnsData.filteredWindow = tnsData.numFilters = 0; // zero TNS data tnsData.filterDownward[0] = false; // enforce direction = 0 for now, detection difficult if ((mdctSignal == nullptr) || (tnsData.coeffParCor == nullptr) || (maxSfb <= tnsStartSfb) || (channelIndex >= USAC_MAX_NUM_CHANNELS)) { return 0; // invalid arguments error } if (grpData.numWindowGroups == 1) // LONG window: use ParCor coeffs from spectral analyzer { tnsData.filterOrder[0] = (uint8_t) m_specAnalyzer.getLinPredCoeffs (tnsData.coeffParCor, channelIndex); #if EE_OPT_TNS_SPEC_RANGE if (tnsData.filterOrder[0] > 0) // try to reduce TNS start band as long as SNR increases { const uint16_t filtOrder = tnsData.filterOrder[0]; uint16_t b = __min (m_specAnaCurr[channelIndex] & 31, (nSamplesInFrame - filtOrder) >> SA_BW_SHIFT); short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0}; int32_t* predSig = &m_mdctSignals[channelIndex][b << SA_BW_SHIFT]; // TNS start offset m_linPredictor.parCorToLpCoeffs (tnsData.coeffParCor, filtOrder, filterC); for (b = (b > 0 ? b - 1 : 0), predSig--; b > 0; b--) // b is in spectr. analysis units { uint64_t sumAbsOrg = 0, sumAbsTns = 0; if (filtOrder >= 4) // max. order 4 { for (uint16_t s = 1 << SA_BW_SHIFT; s > 0; s--) // produce the TNS filter residual { const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] + *(predSig - 3) * (int64_t) filterC[2] + *(predSig - 4) * (int64_t) filterC[3]; const int64_t mdctSample = *(predSig--); const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9); sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample); } } else if (filtOrder == 3) // order 3 { for (uint16_t s = 1 << SA_BW_SHIFT; s > 0; s--) // produce the TNS filter residual { const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1] + *(predSig - 3) * (int64_t) filterC[2]; const int64_t mdctSample = *(predSig--); const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9); sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample); } } else // save 1-2 MACs, order 2 or 1 { for (uint16_t s = 1 << SA_BW_SHIFT; s > 0; s--) // produce the TNS filter residual { const int64_t predSample = *(predSig - 1) * (int64_t) filterC[0] + *(predSig - 2) * (int64_t) filterC[1]; const int64_t mdctSample = *(predSig--); const int64_t resiSample = mdctSample + ((predSample + (1 << 8)) >> 9); sumAbsOrg += abs (mdctSample); sumAbsTns += abs (resiSample); } } if (sumAbsOrg * 9 <= sumAbsTns * 8) break; // band SNR was reduced by more than 1 dB } m_specAnaCurr[channelIndex] = (m_specAnaCurr[channelIndex] & (UINT_MAX - 31)) | (b + 1); } // if order > 0 #endif // EE_OPT_TNS_SPEC_RANGE return (m_specAnaCurr[channelIndex] >> 24) & UCHAR_MAX; // spectral analyzer's pred gain } // SHORT window: find short group with maximum pred gain, then determine best filter order for (uint8_t gr = firstGroupIndexToTest; gr < grpData.numWindowGroups; gr++) { if (grpData.windowGroupLength[gr] == 1) { const uint16_t* grpSO = &grpData.sfbOffsets[m_numSwbShort * gr]; predGainCurr = m_linPredictor.calcParCorCoeffs (&mdctSignal[grpSO[tnsStartSfb]], grpSO[maxSfb] - grpSO[tnsStartSfb], bestOrder, parCorBuffer); if (temp < predGainCurr) // current pred gain set is "better" than best pred gain set { temp = predGainCurr; tnsData.filteredWindow = gr; // changed later memcpy (tnsData.coeffParCor, parCorBuffer, bestOrder * sizeof (int16_t)); } } } // for gr predGainCurr = (temp >> 24) & UCHAR_MAX; predGainPrev = (temp >> 16) & UCHAR_MAX; while ((bestOrder > 1) && (predGainPrev >= predGainCurr)) // get lowest-order gain maximum { bestOrder--; predGainCurr = predGainPrev; predGainPrev = (temp >> (8 * bestOrder - 16)) & UCHAR_MAX; } tnsData.filterOrder[0] = ((bestOrder == 1) && (tnsData.coeffParCor[0] == 0) ? 0 : bestOrder); return predGainCurr; // maximum pred gain of all filter orders and length-1 window groups } unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via scale factors { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint32_t reductionFactor = (samplingRate < 37566 ? 2 : 3); // undercoding reduction const uint64_t scaleSr = (samplingRate < 27713 ? 37 - m_bitRateMode : 37) - ((m_bitRateMode & 7) > 2/*TODO*/ ? nChannels >> 1 : 0); const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - (m_bitRateMode >> 1)); uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf; uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; //uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; unsigned ci = 0, s; // running index unsigned errorValue = 0; // no error // psychoacoustic processing of SFB RMS values yielding masking thresholds in m_tempIntBuf errorValue |= m_bitAllocator.initSfbStepSizes (m_scaleFacData, m_numSwbShort, m_specAnaCurr, m_tempAnaCurr, nChannels, samplingRate, sfbStepSizes, lfeChannelIndex); // get means of spectral and temporal flatness for every channel m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels); //m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels); for (unsigned el = 0; el < m_numElements; el++) // element loop { CoreCoderData& coreConfig = *m_elementData[el]; const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData() if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements { SfbGroupData& grpData = coreConfig.groupingData[0]; uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS]; const uint16_t* off = grpData.sfbOffsets; const uint32_t* rms = grpData.sfbRmsValues; uint8_t* scaleFactors = grpData.scaleFactors; for (uint16_t b = 0; b < grpData.sfbsPerGroup; b++) { const unsigned lfConst = (samplingRate < 27713 ? 1 : 2); const unsigned lfAtten = 4 + b * lfConst; // LF SNR boost, cf my M.Sc. thesis, p. 54 const uint8_t sfbWidth = off[b + 1] - off[b]; const uint64_t scale = scaleBr * __min (32, lfAtten); // rate control part 1 // scale step-sizes according to VBR mode, then derive scale factors from step-sizes stepSizes[b] = uint32_t (__max (BA_EPS, ((1u << 9) + stepSizes[b] * scale) >> 10)); scaleFactors[b] = m_bitAllocator.getScaleFac (stepSizes[b], &m_mdctSignals[ci][off[b]], sfbWidth, rms[b]); } ci++; } else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel { // if ((coreConfig.stereoMode == 0) && (m_perCorrCurr[el] > SCHAR_MAX)) coreConfig.stereoMode = 1; if (coreConfig.commonWindow && (m_perCorrCurr[el] > 128)) // run stereo pre-processing { const int16_t chanCorrSign = (coreConfig.stereoConfig & 2 ? -1 : 1); const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT); const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][maxSfbLong]); const uint8_t steppFadeLen = (eightShorts ? 4 : (coreConfig.tnsActive ? 32 : 64)); const uint8_t steppFadeOff = ((m_bitRateMode + 1) & 6) << (eightShorts ? 2 : 5); const int64_t steppWeightI = __min (64, m_perCorrCurr[el] - 128) >> (eightShorts || coreConfig.tnsActive ? 1 : 0); const int64_t steppWeightD = 128 - steppWeightI; // decrement, (1 - crosstalk) * 128 const TnsData& tnsData0 = coreConfig.tnsData[0]; const TnsData& tnsData1 = coreConfig.tnsData[1]; for (uint16_t gr = 0; gr < coreConfig.groupingData[0].numWindowGroups; gr++) { const uint8_t grpLength = coreConfig.groupingData[0].windowGroupLength[gr]; const uint16_t* grpOff = &coreConfig.groupingData[0].sfbOffsets[m_numSwbShort * gr]; const uint16_t grpStart = grpOff[0] + steppFadeOff * grpLength; int32_t* sigR0 = &m_mdctSignals[ci][grpStart]; int32_t* sigR1 = &m_mdctSignals[ci + 1][grpStart]; int64_t xTalkI = 0, xTalkD = 0; // weights for crosstalk if ((tnsData0.numFilters > 0 && gr == tnsData0.filteredWindow) || (tnsData1.numFilters > 0 && gr == tnsData1.filteredWindow)) { const uint16_t maxLen = (eightShorts ? grpOff[m_numSwbShort] - 1 : __min (nSamplesInFrame - 1u, nSamplesMax)) - grpStart; int32_t prevR0 = 0; // NOTE: functions also on grouped int32_t prevR1 = 0; // MDCT spectra, but not properly! for (uint16_t w = 0; w < grpLength; w++) // sub-window { prevR0 = *(sigR0++); prevR1 = *(sigR1++); // processing starts at offset of 1! xTalkI = steppWeightI; xTalkD = steppWeightD * (2 * steppFadeLen - 1); for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++) // start with fade-in { applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD, chanCorrSign); xTalkI += steppWeightI; xTalkD -= steppWeightD; } } for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++) // end { applyStereoPreProcessingReal (sigR0, sigR1, &prevR0, &prevR1, xTalkI, xTalkD, chanCorrSign); } } else // TNS inactive, both MDCTs and MDSTs are available { const uint16_t maxLen = (eightShorts ? grpOff[m_numSwbShort] : nSamplesMax) - grpStart; int32_t* sigI0 = &m_mdstSignals[ci][grpStart]; // imag int32_t* sigI1 = &m_mdstSignals[ci + 1][grpStart]; for (uint16_t w = 0; w < grpLength; w++) // sub-window { sigR0++; sigR1++; sigI0++; sigI1++; // processing starts at an offset of 1! xTalkI = steppWeightI; xTalkD = steppWeightD * (2 * steppFadeLen - 1); for (s = steppFadeLen - 1; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++) { applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD, chanCorrSign); xTalkI += steppWeightI; xTalkD -= steppWeightD; } } for (s = maxLen - steppFadeLen * grpLength; s > 0; s--, sigR0++, sigR1++, sigI0++, sigI1++) { applyStereoPreProcessingCplx (sigR0, sigR1, sigI0, sigI1, xTalkI, xTalkD, chanCorrSign); } } // if coreConfig.tnsActive } } // if coreConfig.commonWindow if ((errorValue == 0) && (coreConfig.stereoMode == 2)) // frame M/S, synch statistics { const uint8_t numSwbFrame = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT ? m_numSwbShort : __min (m_numSwbLong, maxSfbLong)); const uint32_t peakIndexSte = __max ((m_specAnaCurr[ci] >> 5) & 2047, (m_specAnaCurr[ci + 1] >> 5) & 2047) << 5; errorValue = m_stereoCoder.applyFullFrameMatrix (m_mdctSignals[ci], m_mdctSignals[ci + 1], m_mdstSignals[ci], m_mdstSignals[ci + 1], coreConfig.groupingData[0], coreConfig.groupingData[1], coreConfig.tnsData[0], coreConfig.tnsData[1], numSwbFrame, coreConfig.stereoDataCurr, coreConfig.stereoConfig >> 1, coreConfig.stereoConfig & 1, &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * ci], &sfbStepSizes[m_numSwbShort * NUM_WINDOW_GROUPS * (ci + 1)]); if (errorValue == 2) // use frame M/S with cplx_pred_all=1 { coreConfig.stereoMode += 2; errorValue = 0; } m_specAnaCurr[ci ] = (m_specAnaCurr[ci ] & (UINT_MAX - 65504)) | peakIndexSte; m_specAnaCurr[ci + 1] = (m_specAnaCurr[ci + 1] & (UINT_MAX - 65504)) | peakIndexSte; meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1; // meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1; } for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { SfbGroupData& grpData = coreConfig.groupingData[ch]; const bool eightShorts = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT); const uint8_t numSwbCh = (eightShorts ? m_numSwbShort : m_numSwbLong); const uint8_t mSfmFac = eightTimesSqrt256Minus[meanSpecFlat[ci]]; uint32_t* stepSizes = &sfbStepSizes[ci * m_numSwbShort * NUM_WINDOW_GROUPS]; memset (grpData.scaleFactors, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint8_t)); for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { const uint16_t* grpOff = &grpData.sfbOffsets[m_numSwbShort * gr]; const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr]; const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr]; uint8_t* grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr]; uint32_t* grpStepSizes = &stepSizes[m_numSwbShort * gr]; uint32_t b, grpRmsMin = INT_MAX; // min. RMS value, used for overcoding reduction // undercoding reduction for case where large number of coefs is quantized to zero s = (eightShorts ? (nSamplesInFrame * grpData.windowGroupLength[gr]) >> 1 : nSamplesInFrame << 2); for (b = 0; b < grpData.sfbsPerGroup; b++) { #if SA_IMPROVED_REAL_ABS const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp); #else const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]); const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp : getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9); #endif if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b]; if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized { s -= ((grpOff[b + 1] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS) } } if ((samplingRate >= 27713) && (b < maxSfbLong) && !eightShorts) // uncoded coefs { #if SA_IMPROVED_REAL_ABS const uint32_t rmsComp = (coreConfig.stereoMode > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp); #else const uint32_t rmsComp = getComplexRmsValue (grpRms[b], gr, b, numSwbCh, coreConfig.tnsData[ch]); const uint32_t rmsRef9 = (!coreConfig.commonWindow ? rmsComp : getComplexRmsValue (refRms[b], gr, b, numSwbCh, coreConfig.tnsData[1 - ch]) >> 9); #endif if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification { s -= ((grpOff[maxSfbLong] - grpOff[b]) * reductionFactor * __min (2 * SA_EPS, rmsComp) + SA_EPS) >> 11; // / (2 * SA_EPS) } } s = (eightShorts ? s / ((nSamplesInFrame * grpData.windowGroupLength[gr]) >> 8) : s / (nSamplesInFrame >> 5)); for (b = 0; b < grpData.sfbsPerGroup; b++) { const unsigned lfConst = (samplingRate < 27713 && !eightShorts ? 1 : 2); // LF SNR boost, cf my M.Sc. thesis const unsigned lfAtten = (b <= 5 ? (eightShorts ? 1 : 4) + b * lfConst : 5 * lfConst - 1 + b + ((b + 5) >> 4)); const uint8_t sfbWidth = grpOff[b + 1] - grpOff[b]; const uint64_t rateFac = mSfmFac * s * __min (32, lfAtten * grpData.numWindowGroups); // rate control part 1 const uint64_t sScaled = ((1u << 23) + __max (grpRmsMin, grpStepSizes[b]) * (scaleBr - (coreConfig.stereoMode > 0 ? 1 : 0)) * rateFac) >> 24; // scale step-sizes according to VBR mode & derive scale factors from step-sizes grpStepSizes[b] = uint32_t (__max (BA_EPS, __min (UINT_MAX, sScaled))); grpScaleFacs[b] = m_bitAllocator.getScaleFac (grpStepSizes[b], &m_mdctSignals[ci][grpOff[b]], sfbWidth, grpRms[b]); } } // for gr #if !RESTRICT_TO_AAC if (grpData.sfbsPerGroup > 0 && m_noiseFilling[el] && !eightShorts) // HF noise-fill { const uint8_t numSwbFrame = __min (numSwbCh, maxSfbLong); // rate based bandwidth if (grpData.sfbsPerGroup < numSwbFrame) { memset (&grpData.scaleFactors[grpData.sfbsPerGroup], 0, (numSwbFrame - grpData.sfbsPerGroup) * sizeof (uint8_t)); grpData.sfbsPerGroup = coreConfig.icsInfoCurr[ch].maxSfb = numSwbFrame; } if (ch > 0) coreConfig.commonMaxSfb = (coreConfig.icsInfoCurr[0].maxSfb == coreConfig.icsInfoCurr[1].maxSfb); } #endif ci++; } // for ch for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { SfbGroupData& grpData = coreConfig.groupingData[ch]; TnsData& tnsData = coreConfig.tnsData[ch]; if (tnsData.numFilters > 0) // convert TNS group index to window index for write-out { s = 0; for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { if (gr == tnsData.filteredWindow) { tnsData.filteredWindow = (uint8_t) s; break; } s += grpData.windowGroupLength[gr]; } } } // for ch } } // for el return errorValue; } unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and entropy coding { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr (); uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; //uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; unsigned ci = 0, s; // running index unsigned errorValue = (coeffMagn == nullptr ? 1 : 0); // get means of spectral and temporal flatness for every channel m_bitAllocator.getChAverageSpecFlat (meanSpecFlat, nChannels); //m_bitAllocator.getChAverageTempFlat (meanTempFlat, nChannels); for (unsigned el = 0; el < m_numElements; el++) // element loop { CoreCoderData& coreConfig = *m_elementData[el]; const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData() if ((coreConfig.elementType < ID_USAC_LFE) && (coreConfig.stereoMode > 0)) // synch SFMs { meanSpecFlat[ci] = meanSpecFlat[ci + 1] = ((uint16_t) meanSpecFlat[ci] + (uint16_t) meanSpecFlat[ci + 1]) >> 1; // meanTempFlat[ci] = meanTempFlat[ci + 1] = ((uint16_t) meanTempFlat[ci] + (uint16_t) meanTempFlat[ci + 1]) >> 1; } for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { EntropyCoder& entrCoder = m_entropyCoder[ci]; SfbGroupData& grpData = coreConfig.groupingData[ch]; const bool shortWinCurr = (coreConfig.icsInfoCurr[ch].windowSequence == EIGHT_SHORT); const bool shortWinPrev = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT); char* const arithTuples = entrCoder.arithGetTuplePtr (); uint8_t sfIdxPred = UCHAR_MAX; if ((errorValue > 0) || (arithTuples == nullptr)) { return 0; // an internal error } // back up entropy coder memory for use by bit-stream writer memcpy (m_tempIntBuf, arithTuples, (nSamplesInFrame >> 1) * sizeof (char)); errorValue |= (entrCoder.getIsShortWindow () != shortWinPrev ? 1 : 0); // sanity check memset (m_mdctQuantMag[ci], 0, nSamplesInFrame * sizeof (uint8_t)); // initialization for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { const uint8_t grpLength = grpData.windowGroupLength[gr]; const uint16_t* grpOff = &grpData.sfbOffsets[m_numSwbShort * gr]; uint32_t* const grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr]; // coding stats uint8_t* grpScaleFacs = &grpData.scaleFactors[m_numSwbShort * gr]; uint32_t estimBitCount = 0; unsigned lastSfb = 0, lastSOff = 0; errorValue |= entrCoder.initWindowCoding (m_indepFlag && (gr == 0), shortWinCurr); s = 0; for (uint16_t b = 0; b < grpData.sfbsPerGroup; b++) { // partial SFB ungrouping for entropy coding setup below const uint16_t swbSize = ((grpOff[b + 1] - grpOff[b]) * oneTwentyEightOver[grpLength]) >> 7; // sfbWidth / grpLength uint8_t* const swbMagn = &m_mdctQuantMag[ci][grpOff[b + 1] - swbSize]; grpScaleFacs[b] = m_sfbQuantizer.quantizeSpecSfb (entrCoder, m_mdctSignals[ci], grpLength, grpOff, grpRms, b, grpScaleFacs[b], sfIdxPred, m_mdctQuantMag[ci]); if ((b > 0) && (grpScaleFacs[b] < UCHAR_MAX) && (sfIdxPred == UCHAR_MAX)) { // back-propagate first nonzero-SFB scale factor index memset (grpScaleFacs, grpScaleFacs[b], b * sizeof (uint8_t)); } sfIdxPred = grpScaleFacs[b]; // correct previous scale factor if the delta exceeds 60 if ((b > 0) && (grpScaleFacs[b] > grpScaleFacs[b - 1] + INDEX_OFFSET)) { const uint16_t sfbM1Start = grpOff[b - 1]; const uint16_t sfbM1Width = grpOff[b] - sfbM1Start; const uint16_t swbM1Size = (sfbM1Width * oneTwentyEightOver[grpLength]) >> 7; // sfbM1Width / grpLength grpScaleFacs[b - 1] = grpScaleFacs[b] - (b > 1 ? INDEX_OFFSET : 0); // zero-out memset (&m_mdctQuantMag[ci][sfbM1Start], 0, sfbM1Width * sizeof (uint8_t)); // correct SFB statistics with some bit count estimate grpRms[b - 1] = 1 + (sfbM1Width >> 3) + entrCoder.indexGetBitCount (b > 1 ? (int) grpScaleFacs[b - 1] - grpScaleFacs[b - 2] : 0); // correct entropy coding 2-tuples for the next window memset (&arithTuples[lastSOff], 1, (swbM1Size >> 1) * sizeof (char)); } if (b > 0) { if ((grpRms[b - 1] >> 16) > 0) lastSfb = b - 1; estimBitCount += grpRms[b - 1] & USHRT_MAX; } // set up entropy coding 2-tuples for next SFB or window lastSOff = s; for (uint16_t c = 0; c < swbSize; c += 2) { arithTuples[s++] = __min (0xF, swbMagn[c] + swbMagn[c + 1] + 1); // 23003-3, 7.4 } } // for b if (grpData.sfbsPerGroup > 0) // rate control part 2 to reach constrained VBR (CVBR) { const uint8_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint8_t maxSfbShort = (samplingRate < 37566 ? 14 /*32 kHz*/ : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate)); const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047); const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort : maxSfbLong) - 5 + (m_bitRateMode >> 1) + (meanSpecFlat[ci] >> 5); const unsigned targetBitCount25 = ((60000 + 20000 * m_bitRateMode) * nSamplesInFrame) / (samplingRate * ((grpData.numWindowGroups + 1) >> 1)); unsigned b = grpData.sfbsPerGroup - 1; if ((grpRms[b] >> 16) > 0) lastSfb = b; estimBitCount += grpRms[b] & USHRT_MAX; #if EC_TRELLIS_OPT_CODING if (grpLength == 1) // finalize bit count estimate, RDOC { estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, __min (estimBitCount + 2, targetBitCount25), grpOff, grpRms, grpData.sfbsPerGroup, m_mdctQuantMag[ci]); for (b = 1; b < grpData.sfbsPerGroup; b++) { // correct previous scale factor if delta exceeds 60 if (grpScaleFacs[b] > grpScaleFacs[b - 1] + INDEX_OFFSET) { const uint16_t sfbM1Start = grpOff[b - 1]; const uint16_t sfbM1Width = grpOff[b] - sfbM1Start; grpScaleFacs[b - 1] = grpScaleFacs[b] - (b > 1 ? INDEX_OFFSET : 0); // 0-out memset (&m_mdctQuantMag[ci][sfbM1Start], 0, sfbM1Width * sizeof (uint8_t)); // correct statistics with some bit count estimate grpRms[b - 1] = 1 + (sfbM1Width >> 3) + entrCoder.indexGetBitCount (b > 1 ? (int) grpScaleFacs[b - 1] - grpScaleFacs[b - 2] : 0); // correct entropy coding 2-tuples for next window memset (&arithTuples[(sfbM1Start - grpOff[0]) >> 1], 1, (sfbM1Width >> 1) * sizeof (char)); } } } #endif b = lastSfb; while ((b >= sfmBasedSfbStart) && (grpOff[b] > peakIndex) && ((grpRms[b] >> 16) <= 1) /*coarse quantization*/ && ((estimBitCount * 5 > targetBitCount25 * 2) || (grpLength > 1 /*no accurate bit count est. available*/))) { b--; // search first coarsely quantized high-freq. SFB } lastSOff = b; for (b++; b <= lastSfb; b++) { if ((grpRms[b] >> 16) > 0) // re-quantize nonzero band { #if RESTRICT_TO_AAC uint32_t maxVal = 1; #else uint32_t maxVal = (shortWinCurr || !m_noiseFilling[el] ? 1 : (m_specAnaCurr[ci] >> 23) & 1); // 1 or 0 #endif estimBitCount -= grpRms[b] & USHRT_MAX; grpRms[b] = (maxVal << 16) + maxVal; // bit estimate maxVal = quantizeSfbWithMinSnr (coeffMagn, grpOff, b, grpLength, m_mdctQuantMag[ci], arithTuples, maxVal > 0); grpScaleFacs[b] = __min (SCHAR_MAX, m_sfbQuantizer.getScaleFacOffset ((double) maxVal)); // correct SFB statistics with estimate of bit count grpRms[b] += 3 + entrCoder.indexGetBitCount ((int) grpScaleFacs[b] - grpScaleFacs[b - 1]); estimBitCount += grpRms[b] & USHRT_MAX; } else // re-repeat scale factor for zero quantized band { grpScaleFacs[b] = grpScaleFacs[b - 1]; } } if (estimBitCount > targetBitCount25) // too many bits!! { for (b = lastSOff; b > 0; b--) { if ((grpRms[b] >> 16) > 0) // emergency re-quantizer { #if RESTRICT_TO_AAC uint32_t maxVal = 1; #else uint32_t maxVal = (shortWinCurr || !m_noiseFilling[el] ? 1 : (m_specAnaCurr[ci] >> 23) & 1); // 1 or 0 #endif estimBitCount -= grpRms[b] & USHRT_MAX; grpRms[b] = (maxVal << 16) + maxVal; // bit estim. maxVal = quantizeSfbWithMinSnr (coeffMagn, grpOff, b, grpLength, m_mdctQuantMag[ci], arithTuples, maxVal > 0); grpScaleFacs[b] = __min (SCHAR_MAX, m_sfbQuantizer.getScaleFacOffset ((double) maxVal)); // correct SFB statistics with estimated bit count grpRms[b] += 3 + entrCoder.indexGetBitCount ((int) grpScaleFacs[b] - grpScaleFacs[b - 1]); estimBitCount += grpRms[b] & USHRT_MAX; } if (estimBitCount <= targetBitCount25) break; } for (b++; b <= lastSfb; b++) // re-repeat scale factor { if ((grpRms[b] >> 16) == 0) // a zero quantized band { grpScaleFacs[b] = grpScaleFacs[b - 1]; } } } // if estimBitCount > targetBitCount25 for (b = lastSfb + 1; b < grpData.sfbsPerGroup; b++) { if ((grpRms[b] >> 16) == 0) // HF zero quantized bands { grpScaleFacs[b] = grpScaleFacs[b - 1]; } } if ((grpScaleFacs[0] == UCHAR_MAX) && #if !RESTRICT_TO_AAC !m_noiseFilling[el] && #endif (lastSfb == 0)) // ensure all scale factors are set { memset (grpScaleFacs, (gr == 1 ? grpData.scaleFactors[grpData.sfbsPerGroup - 1] : 0), grpData.sfbsPerGroup * sizeof (uint8_t)); } } } // for gr // restore entropy coder memory for use by bit-stream writer memcpy (arithTuples, m_tempIntBuf, (nSamplesInFrame >> 1) * sizeof (char)); entrCoder.setIsShortWindow (shortWinPrev); #if !RESTRICT_TO_AAC // obtain channel-wise noise_level and noise_offset for USAC coreConfig.specFillData[ch] = (!m_noiseFilling[el] ? 0 : m_specGapFiller.getSpecGapFillParams (m_sfbQuantizer, m_mdctQuantMag[ci], m_numSwbShort, grpData, nSamplesInFrame, shortWinCurr ? 0 : meanSpecFlat[ci])); // NOTE: gap-filling SFB bit count might be inaccurate now since scale factors changed if (coreConfig.specFillData[ch] == 1) errorValue |= 1; #endif ci++; } } // for el return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag, m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf, #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif m_outAuData, nSamplesInFrame)); // returns AU size } unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS and SFB data { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned nSamplesInShort = nSamplesInFrame >> 3; const unsigned samplingRate = toSamplingRate (m_frequencyIdx); const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); unsigned ci = 0, s; // running index unsigned errorValue = 0; // no error // get spectral channel statistics for last frame, used for input bandwidth (BW) detection //m_specAnalyzer.getSpecAnalysisStats (m_specAnaPrev, nChannels); m_specAnalyzer.getSpectralBandwidth (m_bandwidPrev, nChannels); // spectral analysis for current MCLT signal (windowed time-samples for the current frame) errorValue |= m_specAnalyzer.spectralAnalysis (m_mdctSignals, m_mdstSignals, nChannels, nSamplesInFrame, samplingRate, lfeChannelIndex); // get spectral channel statistics for this frame, used for perceptual model & BW detector m_specAnalyzer.getSpecAnalysisStats (m_specAnaCurr, nChannels); m_specAnalyzer.getSpectralBandwidth (m_bandwidCurr, nChannels); for (unsigned el = 0; el < m_numElements; el++) // element loop { CoreCoderData& coreConfig = *m_elementData[el]; const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData() coreConfig.commonMaxSfb = false; coreConfig.commonTnsData = false; coreConfig.tnsActive = false; coreConfig.tnsOnLeftRight = true; // enforce tns_on_lr = 1 for now, detection difficult memset (coreConfig.tnsData, 0, nrChannels * sizeof (TnsData)); if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements { SfbGroupData& grpData = coreConfig.groupingData[0]; uint16_t* grpSO = grpData.sfbOffsets; IcsInfo& icsCurr = coreConfig.icsInfoCurr[0]; memcpy (grpSO, swbOffsetsL[m_swbTableIdx], numSwbOffsetL[m_swbTableIdx] * sizeof (uint16_t)); icsCurr.maxSfb = MAX_NUM_SWB_LFE; while (grpSO[icsCurr.maxSfb] > LFE_MAX) icsCurr.maxSfb--; // limit coefficients in LFE ci++; } else // SCE or CPE: bandwidth-to-max_sfb mapping, short-window grouping for each channel { coreConfig.stereoConfig = coreConfig.stereoMode = 0; if (coreConfig.commonWindow && (m_bitRateMode <= 4)) // stereo pre-processing analysis { const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT); const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx]; const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]); const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1], m_mdstSignals[ci], m_mdstSignals[ci + 1], nSamplesMax, nSamplesInFrame, eightShorts, coreConfig.stereoDataCurr); if (steAnaStats == SHRT_MIN) errorValue = 1; if ((s = abs (steAnaStats)) * m_perCorrCurr[el] == 0) // transitions to/from silence { m_perCorrCurr[el] = (uint8_t) s; } else // gentle overlap length dependent temporal smoothing { const int16_t allowedDiff = (coreConfig.icsInfoCurr[0].windowSequence < EIGHT_SHORT ? 16 : 32); const int16_t prevPerCorr = __max (128, __min (192, m_perCorrCurr[el])); m_perCorrCurr[el] = (uint8_t) __max (prevPerCorr - allowedDiff, __min (prevPerCorr + allowedDiff, (int16_t) s)); } if (s == steAnaStats * -1) coreConfig.stereoConfig = 2; // 2: side > mid, pred_dir=1 if (s > (UCHAR_MAX * 3) / 4) coreConfig.stereoMode = 2; // 2: all, ms_mask_present=2 } else if (nrChannels > 1) m_perCorrCurr[el] = 128; // update history with halfway value for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { SfbGroupData& grpData = coreConfig.groupingData[ch]; uint16_t* grpSO = grpData.sfbOffsets; IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch]; TnsData& tnsData = coreConfig.tnsData[ch]; memset (grpSO, 0, (1 + MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint16_t)); if (icsCurr.windowSequence != EIGHT_SHORT) { memcpy (grpSO, swbOffsetsL[m_swbTableIdx], numSwbOffsetL[m_swbTableIdx] * sizeof (uint16_t)); icsCurr.maxSfb = 0; while (grpSO[icsCurr.maxSfb] < nSamplesInFrame) icsCurr.maxSfb++; // num_swb_long grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInFrame; grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb; // changed to max_sfb later if (samplingRate > 32000) // set max_sfb based on VBR mode and bandwidth detection { if (icsCurr.maxSfb > 49) // may still be 51 for 32 kHz { grpData.sfbsPerGroup = m_numSwbLong = icsCurr.maxSfb = 49; // fix 44.1, 48 kHz } icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); } while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci])) icsCurr.maxSfb--; // BW detector } else // icsCurr.windowSequence == EIGHT_SHORT { memcpy (grpSO, swbOffsetsS[m_swbTableIdx], numSwbOffsetS[m_swbTableIdx] * sizeof (uint16_t)); icsCurr.maxSfb = 0; while (grpSO[icsCurr.maxSfb] < nSamplesInShort) icsCurr.maxSfb++; // num_swb_short grpSO[icsCurr.maxSfb] = (uint16_t) nSamplesInShort; grpData.sfbsPerGroup = m_numSwbShort = icsCurr.maxSfb; // changed to max_sfb later if (samplingRate > 32000) // set max_sfb based on VBR mode and zero-ness detection { icsCurr.maxSfb = __min (icsCurr.maxSfb, brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)); } #if SA_OPT_WINDOW_GROUPING if (ch > 0 && coreConfig.commonWindow) // resynchronize the scale_factor_grouping { if (icsCurr.windowGrouping != coreConfig.icsInfoCurr[0].windowGrouping) { icsCurr.windowGrouping = coreConfig.icsInfoCurr[0].windowGrouping; } } else // first element channel or not common_window, optimize scale_factor_grouping { if ((s = m_specAnalyzer.optimizeGrouping (ci, grpSO[icsCurr.maxSfb] << 3, icsCurr.windowGrouping)) < 8) { icsCurr.windowGrouping = (uint8_t) s; } } memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t)); #endif while (grpSO[icsCurr.maxSfb] > __max (m_bandwidCurr[ci], m_bandwidPrev[ci])) icsCurr.maxSfb--; // not a bug!! #if SA_IMPROVED_REAL_ABS errorValue |= eightShortGrouping (grpData, grpSO, m_mdctSignals[ci], nChannels < 2 ? nullptr : m_mdstSignals[ci]); #else errorValue |= eightShortGrouping (grpData, grpSO, m_mdctSignals[ci]); #endif } // if EIGHT_SHORT // compute and quantize optimal TNS coefficients, then find optimal TNS filter order s /*linear pred gain*/ = getOptParCorCoeffs (m_mdctSignals[ci], grpData, icsCurr.maxSfb, ci, tnsData, ch > 0 && coreConfig.commonWindow ? coreConfig.tnsData[0].filteredWindow : 0); tnsData.filterOrder[0] = m_linPredictor.calcOptTnsCoeffs (tnsData.coeffParCor, tnsData.coeff[0], &tnsData.coeffResLow, tnsData.filterOrder[0], s, (m_specAnaCurr[ci] >> 16) & UCHAR_MAX); tnsData.numFilters = (tnsData.filterOrder[0] > 0 ? 1 : 0); ci++; } // for ch if (coreConfig.commonWindow) // synchronization of all StereoCoreToolInfo() components { uint8_t& maxSfb0 = coreConfig.icsInfoCurr[0].maxSfb; uint8_t& maxSfb1 = coreConfig.icsInfoCurr[1].maxSfb; const uint8_t maxSfbSte = __max (maxSfb0, maxSfb1); // max_sfb_ste, as in Table 24 if ((maxSfb0 > 0) && (maxSfb1 > 0) && (maxSfbSte - __min (maxSfb0, maxSfb1) <= 1 || coreConfig.stereoMode == 2)) { uint32_t& sa0 = m_specAnaCurr[ci-2]; uint32_t& sa1 = m_specAnaCurr[ci-1]; const int specFlat[2] = {int (sa0 >> 16) & UCHAR_MAX, int (sa1 >> 16) & UCHAR_MAX}; const int tnsStart[2] = {int (sa0 & 31), int (sa1 & 31)}; // long TNS start offset if ((coreConfig.tnsData[0].filteredWindow == coreConfig.tnsData[1].filteredWindow) && (abs (specFlat[0] - specFlat[1]) <= (UCHAR_MAX >> 3)) && (abs (tnsStart[0] - tnsStart[1]) <= (UCHAR_MAX >> 5))) // TNS synchronization { const uint16_t maxTnsOrder = __max (coreConfig.tnsData[0].filterOrder[0], coreConfig.tnsData[1].filterOrder[0]); TnsData& tnsData0 = coreConfig.tnsData[0]; TnsData& tnsData1 = coreConfig.tnsData[1]; if (m_linPredictor.similarParCorCoeffs (tnsData0.coeffParCor, tnsData1.coeffParCor, maxTnsOrder, LP_DEPTH)) { coreConfig.commonTnsData = true; // synch tns_data for (s = 0; s < maxTnsOrder; s++) { tnsData0.coeffParCor[s] = (tnsData0.coeffParCor[s] + tnsData1.coeffParCor[s] + 1) >> 1; } tnsData0.coeffResLow = false; // reoptimize coeffs tnsData0.filterOrder[0] = m_linPredictor.calcOptTnsCoeffs (tnsData0.coeffParCor, tnsData0.coeff[0], &tnsData0.coeffResLow, maxTnsOrder, UCHAR_MAX /*maximum pred gain*/, 0, LP_DEPTH); tnsData0.numFilters = (tnsData0.filterOrder[0] > 0 ? 1 : 0); memcpy (&tnsData1, &tnsData0, sizeof (TnsData)); } else if ((maxTnsOrder > 0) && (tnsData0.coeffResLow == tnsData1.coeffResLow) && (tnsData0.filterOrder[0] == tnsData1.filterOrder[0])) { const int32_t* coeff0 = (int32_t*) tnsData0.coeff[0]; // fast comparison code, const int32_t* coeff1 = (int32_t*) tnsData1.coeff[0]; // might not be portable coreConfig.commonTnsData = (*coeff0 == *coeff1); // first four coeffs the same } if (coreConfig.commonTnsData) // synch TNS start SFB { const uint32_t avgTnsStart = (tnsStart[0] + tnsStart[1]) >> 1; // mean offset sa0 = (sa0 & (UINT_MAX - 31)) | avgTnsStart; // is used by applyTnsToWinGroup sa1 = (sa1 & (UINT_MAX - 31)) | avgTnsStart; } } maxSfb0 = maxSfb1 = maxSfbSte; } else coreConfig.stereoMode = 0; // since a max_sfb is 0 coreConfig.commonMaxSfb = (maxSfb0 == maxSfb1); // synch } // if coreConfig.commonWindow } ci -= nrChannels; // zero frequency coefficients above num_swb for all channels, windows for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { SfbGroupData& grpData = coreConfig.groupingData[ch]; const uint16_t* grpSO = grpData.sfbOffsets; const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch]; const bool eightShorts = (icsCurr.windowSequence == EIGHT_SHORT); unsigned grpEndLine = 0; if (eightShorts) // map grouping table idx to scale_factor_grouping idx for bit-stream { coreConfig.icsInfoCurr[ch].windowGrouping = scaleFactorGrouping[icsCurr.windowGrouping]; } for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { const unsigned grpSOStart = grpSO[grpData.sfbsPerGroup + m_numSwbShort * gr]; grpEndLine += (eightShorts ? nSamplesInShort : nSamplesInFrame) * grpData.windowGroupLength[gr]; memset (&m_mdctSignals[ci][grpSOStart], 0, (grpEndLine - grpSOStart) * sizeof (int32_t)); memset (&m_mdstSignals[ci][grpSOStart], 0, (grpEndLine - grpSOStart) * sizeof (int32_t)); } memset (grpData.sfbRmsValues, 0, (MAX_NUM_SWB_SHORT * NUM_WINDOW_GROUPS) * sizeof (uint32_t)); if (icsCurr.maxSfb > 0) { // use MCLTs for LONG but only MDCTs for SHORT windows when the MDSTs aren't grouped #if SA_IMPROVED_REAL_ABS for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { s = m_numSwbShort * gr; errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], eightShorts && nChannels < 2 ? nullptr : m_mdstSignals[ci], grpSO[grpData.sfbsPerGroup + s], (eightShorts ? USAC_MAX_NUM_CHANNELS : ci), &grpSO[s], grpData.sfbsPerGroup, &grpData.sfbRmsValues[s]); } #else errorValue |= m_specAnalyzer.getMeanAbsValues (m_mdctSignals[ci], eightShorts ? nullptr : m_mdstSignals[ci], nSamplesInFrame, ci, grpSO, grpData.sfbsPerGroup * grpData.numWindowGroups, grpData.sfbRmsValues); #endif errorValue |= applyTnsToWinGroup (coreConfig.tnsData[ch], grpData, eightShorts, grpData.sfbsPerGroup, ci); coreConfig.tnsActive |= (coreConfig.tnsData[ch].numFilters > 0); // tns_data_present } grpData.sfbsPerGroup = icsCurr.maxSfb; // change num_swb to max_sfb for coding process ci++; } } // for el return errorValue; } unsigned ExhaleEncoder::temporalProcessing () // determine time-domain aspects of ics_info() { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); unsigned ci = 0; // running ch index unsigned errorValue = 0; // no error // get temporal channel statistics for this frame, used for spectral grouping/quantization m_tempAnalyzer.getTempAnalysisStats (m_tempAnaCurr, nChannels); m_tempAnalyzer.getTransientLocation (m_tranLocCurr, nChannels); // temporal analysis for look-ahead signal (central nSamplesInFrame samples of next frame) errorValue |= m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna, lfeChannelIndex); // get temporal channel statistics for next frame, used for window length/overlap decision m_tempAnalyzer.getTempAnalysisStats (m_tempAnaNext, nChannels); m_tempAnalyzer.getTransientLocation (m_tranLocNext, nChannels); #ifndef NO_FIX_FOR_ISSUE_1 m_indepFlag = (((m_frameCount++) % m_indepPeriod) <= 1); // configure usacIndependencyFlag #else m_indepFlag = (((m_frameCount++) % m_indepPeriod) == 0); // configure usacIndependencyFlag #endif for (unsigned el = 0; el < m_numElements; el++) // element loop { CoreCoderData& coreConfig = *m_elementData[el]; const unsigned nrChannels = (coreConfig.elementType & 1) + 1; // for UsacCoreCoderData() coreConfig.commonWindow = false; coreConfig.icsInfoPrev[0] = coreConfig.icsInfoCurr[0]; coreConfig.icsInfoPrev[1] = coreConfig.icsInfoCurr[1]; if (coreConfig.elementType >= ID_USAC_LFE) // LFE/EXT elements { IcsInfo& icsCurr = coreConfig.icsInfoCurr[0]; icsCurr.windowGrouping = 0; icsCurr.windowSequence = ONLY_LONG; #if RESTRICT_TO_AAC icsCurr.windowShape = WINDOW_SINE; #else icsCurr.windowShape = WINDOW_KBD; #endif ci++; } else // SCE or CPE: short-window, low-overlap, and sine-shape detection for each channel { unsigned tsCurr[2]; // save temporal stationarity values unsigned tsNext[2]; // for common_window decision in CPE for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch]; IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch]; const USAC_WSEQ wsPrev = icsPrev.windowSequence; USAC_WSEQ& wsCurr = icsCurr.windowSequence; // get temporal signal statistics, then determine overlap config. for the next frame const unsigned sfCurr = (m_tempAnaCurr[ci] >> 24) & UCHAR_MAX; const unsigned tfCurr = (m_tempAnaCurr[ci] >> 16) & UCHAR_MAX; const unsigned sfNext = (m_tempAnaNext[ci] >> 24) & UCHAR_MAX; const unsigned tfNext = (m_tempAnaNext[ci] >> 16) & UCHAR_MAX; tsCurr[ch] = (m_tempAnaCurr[ci] /*R*/) & UCHAR_MAX; tsNext[ch] = (m_tempAnaNext[ci] >> 8) & UCHAR_MAX; const bool lowOlapNext = (m_tranLocNext[ci] >= 0) || (sfNext < 68 && tfNext >= 204) || (tsCurr[ch] >= 153) || (tsNext[ch] >= 153); const bool sineWinCurr = (sfCurr >= 170) && (sfNext >= 170) && (sfCurr < 221) && (sfNext < 221) && (tsCurr[ch] < 20) && (tfCurr >= 153) && (tfNext >= 153) && (tfCurr < 184) && (tfNext < 184) && (tsNext[ch] < 20); // set window_sequence if ((wsPrev == ONLY_LONG) || (wsPrev == LONG_STOP)) // 1st window half - max overlap { wsCurr = (lowOlapNext ? LONG_START : ONLY_LONG); } else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap { wsCurr = (m_tranLocCurr[ci] >= 0) ? EIGHT_SHORT : #if RESTRICT_TO_AAC (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != EIGHT_SHORT) ? EIGHT_SHORT : LONG_STOP); #else (lowOlapNext && (m_tranLocNext[ci] >= 0 || wsPrev != STOP_START) ? STOP_START : LONG_STOP); #endif } // set window_shape if ((wsCurr == ONLY_LONG) || (wsCurr == LONG_STOP)) // 2nd window half - max overlap { icsCurr.windowShape = (sineWinCurr ? WINDOW_SINE : WINDOW_KBD); } else // LONG_START_SEQUENCE, STOP_START_SEQUENCE, EIGHT_SHORT_SEQUENCE - min overlap { icsCurr.windowShape = (m_tranLocCurr[ci] >= 0) ? WINDOW_KBD : (sineWinCurr ? WINDOW_SINE : WINDOW_KBD); } // set scale_factor_grouping icsCurr.windowGrouping = (wsCurr == EIGHT_SHORT ? (m_tranLocCurr[ci] * 8) / (int16_t) nSamplesInFrame : 0); ci++; } // for ch if (nrChannels > 1) // common_window element detection for use in StereoCoreToolInfo() { IcsInfo& icsInfo0 = coreConfig.icsInfoCurr[0]; IcsInfo& icsInfo1 = coreConfig.icsInfoCurr[1]; USAC_WSEQ& winSeq0 = icsInfo0.windowSequence; USAC_WSEQ& winSeq1 = icsInfo1.windowSequence; if (winSeq0 != winSeq1) // try to synch window_sequences { const USAC_WSEQ initialWs0 = winSeq0; const USAC_WSEQ initialWs1 = winSeq1; winSeq0 = winSeq1 = windowSequenceSynch[initialWs0][initialWs1]; // equalization if ((winSeq0 != initialWs0) && (winSeq0 == EIGHT_SHORT)) { #if !RESTRICT_TO_AAC if ((tsCurr[0] * 7 < tsCurr[1] * 2) && (tsNext[0] * 7 < tsNext[1] * 2)) { winSeq0 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap } else #endif icsInfo0.windowGrouping = icsInfo1.windowGrouping; } if ((winSeq1 != initialWs1) && (winSeq1 == EIGHT_SHORT)) { #if !RESTRICT_TO_AAC if ((tsCurr[1] * 7 < tsCurr[0] * 2) && (tsNext[1] * 7 < tsNext[0] * 2)) { winSeq1 = STOP_START; // don't synchronize to EIGHT_SHORT but keep low overlap } else #endif icsInfo1.windowGrouping = icsInfo0.windowGrouping; } } else if (winSeq0 == EIGHT_SHORT) // resynchronize scale_factor_grouping if necessary { const int16_t tranLocSynch = __min (m_tranLocCurr[ci - 2], m_tranLocCurr[ci - 1]); icsInfo0.windowGrouping = icsInfo1.windowGrouping = (tranLocSynch * 8) / (int16_t) nSamplesInFrame; } if ((icsInfo0.windowShape != WINDOW_SINE) || (icsInfo1.windowShape != WINDOW_SINE)) { icsInfo0.windowShape = WINDOW_KBD; // always synchronize window_shapes in order to icsInfo1.windowShape = WINDOW_KBD; // encourage synch in next frame; KBD dominates } coreConfig.commonWindow = (winSeq0 == winSeq1); // synch memset (coreConfig.stereoDataPrev, 16, (MAX_NUM_SWB_LONG + 1) * sizeof (uint8_t)); if (((winSeq0 == EIGHT_SHORT) == (coreConfig.icsInfoPrev[0].windowSequence == EIGHT_SHORT)) && !m_indepFlag && ((winSeq1 == EIGHT_SHORT) == (coreConfig.icsInfoPrev[1].windowSequence == EIGHT_SHORT)) && (coreConfig.stereoMode > 0)) { const unsigned lastGrpOffset = (coreConfig.icsInfoPrev[0].windowSequence == EIGHT_SHORT ? m_numSwbShort * (NUM_WINDOW_GROUPS - 1) : 0); memcpy (coreConfig.stereoDataPrev, &coreConfig.stereoDataCurr[lastGrpOffset], (coreConfig.icsInfoPrev[0].maxSfb + 1) * sizeof (uint8_t)); } } // if nrChannels > 1 } ci -= nrChannels; // modulated complex lapped transform (MCLT) for all channels, windows for (unsigned ch = 0; ch < nrChannels; ch++) // channel loop { const IcsInfo& icsPrev = coreConfig.icsInfoPrev[ch]; const IcsInfo& icsCurr = coreConfig.icsInfoCurr[ch]; const USAC_WSEQ wsCurr = icsCurr.windowSequence; const bool eightShorts = (wsCurr == EIGHT_SHORT); SfbGroupData& grpData = coreConfig.groupingData[ch]; grpData.numWindowGroups = (eightShorts ? NUM_WINDOW_GROUPS : 1); // fill groupingData memcpy (grpData.windowGroupLength, windowGroupingTable[icsCurr.windowGrouping], NUM_WINDOW_GROUPS * sizeof (uint8_t)); errorValue |= m_transform.applyMCLT (m_timeSignals[ci], eightShorts, icsPrev.windowShape != WINDOW_SINE, icsCurr.windowShape != WINDOW_SINE, wsCurr > LONG_START /*lOL*/, (wsCurr % 3) != ONLY_LONG /*lOR*/, m_mdctSignals[ci], m_mdstSignals[ci]); m_scaleFacData[ci] = &grpData; ci++; } } // for el return errorValue; } // constructor ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned char* const outputAuData, const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/, const unsigned frameLength /*= 1024*/, const unsigned indepPeriod /*= 45*/, const unsigned varBitRateMode /*= 3*/ #if !RESTRICT_TO_AAC , const bool useNoiseFilling /*= true*/, const bool useEcodisExt /*= false*/ #endif ) { // adopt basic coding parameters m_bitRateMode = __min (9, varBitRateMode); m_channelConf = (numChannels >= 7 ? CCI_UNDEF : (USAC_CCI) numChannels); // see 23003-3, Tables 73 & 161 if (m_channelConf == CCI_CONF) { m_channelConf = CCI_2_CHM; // passing numChannels = 0 to ExhaleEncoder is interpreted as 2-ch dual-mono } m_numElements = elementCountConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS]; // used in UsacDecoderConfig m_frameCount = 0; m_frameLength = (USAC_CCFL) frameLength; // coreCoderFrameLength, signaled using coreSbrFrameLengthIndex m_frequencyIdx = toSamplingFrequencyIndex (sampleRate); // I/O sample rate as usacSamplingFrequencyIndex m_indepFlag = true; // usacIndependencyFlag in UsacFrame(), will be set per frame, true in first frame m_indepPeriod = (indepPeriod == 0 ? UINT_MAX : indepPeriod); // RAP, signaled using usacIndependencyFlag #if !RESTRICT_TO_AAC m_nonMpegExt = useEcodisExt; #endif m_numSwbLong = MAX_NUM_SWB_LONG; m_numSwbShort = MAX_NUM_SWB_SHORT; m_outAuData = outputAuData; m_pcm24Data = inputPcmData; m_tempIntBuf = nullptr; // initialize all helper structs for (unsigned el = 0; el < USAC_MAX_NUM_ELEMENTS; el++) { const ELEM_TYPE et = elementTypeConfig[m_channelConf % USAC_MAX_NUM_ELCONFIGS][el]; // usacElementType m_elementData[el] = nullptr; m_perCorrCurr[el] = 0; #if !RESTRICT_TO_AAC m_noiseFilling[el] = (useNoiseFilling && (et < ID_USAC_LFE)); m_timeWarping[el] = (false /* N/A */ && (et < ID_USAC_LFE)); #endif } // initialize all signal buffers for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) { m_bandwidCurr[ch] = 0; m_bandwidPrev[ch] = 0; m_mdctQuantMag[ch] = nullptr; m_mdctSignals[ch] = nullptr; m_mdstSignals[ch] = nullptr; m_scaleFacData[ch] = nullptr; m_specAnaCurr[ch] = 0; //m_specAnaPrev[ch] = 0; m_tempAnaCurr[ch] = 0; m_tempAnaNext[ch] = 0; m_timeSignals[ch] = nullptr; m_tranLocCurr[ch] = -1; m_tranLocNext[ch] = -1; } // initialize all window buffers for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++) { m_timeWindowL[ws] = nullptr; m_timeWindowS[ws] = nullptr; } } // destructor ExhaleEncoder::~ExhaleEncoder () { // free allocated helper structs for (unsigned el = 0; el < USAC_MAX_NUM_ELEMENTS; el++) { MFREE (m_elementData[el]); } // free allocated signal buffers for (unsigned ch = 0; ch < USAC_MAX_NUM_CHANNELS; ch++) { MFREE (m_mdctQuantMag[ch]); MFREE (m_mdctSignals[ch]); MFREE (m_mdstSignals[ch]); MFREE (m_timeSignals[ch]); } // free allocated window buffers for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++) { MFREE (m_timeWindowL[ws]); MFREE (m_timeWindowS[ws]); } // execute sub-class destructors } // public functions unsigned ExhaleEncoder::encodeLookahead () { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead const int32_t* chSig = m_pcm24Data; unsigned ch, s; // copy nSamplesInFrame external channel-interleaved samples into internal channel buffers for (s = 0; s < nSamplesInFrame; s++) // sample loop { for (ch = 0; ch < nChannels; ch++) // channel loop { m_timeSignals[ch][nSamplesTempAna + s] = *(chSig++); } } // generate first nSamplesTempAna deinterleaved samples (previous frame data) by LP filter for (ch = 0; ch < nChannels; ch++) { short filterC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0}; short parCorC[MAX_PREDICTION_ORDER] = {0, 0, 0, 0}; int32_t* predSig = &m_timeSignals[ch][nSamplesTempAna]; // end of signal to be predicted m_linPredictor.calcParCorCoeffs (predSig, uint16_t (nSamplesInFrame >> 1), MAX_PREDICTION_ORDER, parCorC); m_linPredictor.parCorToLpCoeffs (parCorC, MAX_PREDICTION_ORDER, filterC); for (s = nSamplesTempAna; s > 0; s--) // generate prediction signal without limit cycles { const int64_t predSample = *(predSig + 0) * (int64_t) filterC[0] + *(predSig + 1) * (int64_t) filterC[1] + *(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3]; *(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9); } } // set initial temporal channel statistic to something meaningful before first coded frame m_tempAnalyzer.temporalAnalysis (m_timeSignals, nChannels, nSamplesInFrame, nSamplesTempAna - nSamplesInFrame); if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform { return 2; // internal error in temporal processing } if (spectralProcessing ()) // MCLT domain: (common_)max_sfb, grouping 2, TNS, and SFB data { return 2; // internal error in spectral processing } if (psychBitAllocation ()) // SFB domain: psychoacoustic model and scale factor estimation { return 1; // internal error in bit-allocation code } return quantizationCoding (); // max(3, coded bytes) } unsigned ExhaleEncoder::encodeFrame () { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead const int32_t* chSig = m_pcm24Data; unsigned ch, s; // move internal channel buffers nSamplesInFrame to the past to make room for next samples for (ch = 0; ch < nChannels; ch++) { memcpy (&m_timeSignals[ch][0], &m_timeSignals[ch][nSamplesInFrame], nSamplesInFrame * sizeof (int32_t)); memcpy (&m_timeSignals[ch][nSamplesInFrame], &m_timeSignals[ch][2 * nSamplesInFrame], (nSamplesTempAna - nSamplesInFrame) * sizeof (int32_t)); } // copy nSamplesInFrame external channel-interleaved samples into internal channel buffers for (s = 0; s < nSamplesInFrame; s++) // sample loop { for (ch = 0; ch < nChannels; ch++) // channel loop { m_timeSignals[ch][nSamplesTempAna + s] = *(chSig++); } } if (temporalProcessing ()) // time domain: window length, overlap, grouping, and transform { return 2; // internal error in temporal processing } if (spectralProcessing ()) // MCLT domain: (common_)max_sfb, grouping 2, TNS, and SFB data { return 2; // internal error in spectral processing } if (psychBitAllocation ()) // SFB domain: psychoacoustic model and scale factor estimation { return 1; // internal error in bit-allocation code } return quantizationCoding (); // max(3, coded bytes) } unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uint32_t* const audioConfigBytes /*= nullptr*/) { const unsigned nChannels = toNumChannels (m_channelConf); const unsigned nSamplesInFrame = toFrameLength (m_frameLength); const unsigned specSigBufSize = nSamplesInFrame * sizeof (int32_t); const unsigned timeSigBufSize = ((nSamplesInFrame * 41) >> 4) * sizeof (int32_t); // core-codec delay*4 const unsigned char chConf = m_channelConf; unsigned errorValue = 0; // no error // check user's input parameters #if RESTRICT_TO_AAC if ((m_channelConf <= CCI_CONF) || (m_channelConf > CCI_8_CH)) #else if ((m_channelConf <= CCI_CONF) || (m_channelConf > CCI_8_CHS)) #endif { errorValue |= 128; } #if RESTRICT_TO_AAC if (m_frameLength != CCFL_1024) #else if ((m_frameLength != CCFL_768) && (m_frameLength != CCFL_1024)) #endif { errorValue |= 64; } if (m_frequencyIdx < 0) { errorValue |= 32; } if ((m_outAuData == nullptr) || (m_pcm24Data == nullptr)) { errorValue |= 16; } if (errorValue > 0) return errorValue; // get window band table index errorValue = (unsigned) m_frequencyIdx; // for temporary storage #if RESTRICT_TO_AAC m_swbTableIdx = freqIdxToSwbTableIdxAAC[errorValue]; #else m_swbTableIdx = (m_frameLength == CCFL_768 ? freqIdxToSwbTableIdx768[errorValue] : freqIdxToSwbTableIdxAAC[errorValue]); #endif errorValue = 0; if (m_elementData[0] != nullptr) // initEncoder was called before, don't reallocate memory { if (audioConfigBuffer != nullptr) // recreate the UsacConfig() { errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements, elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0, #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif audioConfigBuffer); if (audioConfigBytes) *audioConfigBytes = errorValue; // size of UsacConfig() in bytes errorValue = (errorValue == 0 ? 1 : 0); } return errorValue; } // allocate all helper structs for (unsigned el = 0; el < m_numElements; el++) // element loop { if ((m_elementData[el] = (CoreCoderData*) malloc (sizeof (CoreCoderData))) == nullptr) { errorValue |= 8; } else { memset (m_elementData[el], 0, sizeof (CoreCoderData)); m_elementData[el]->elementType = elementTypeConfig[chConf][el]; // usacElementType[el] } } // allocate all signal buffers for (unsigned ch = 0; ch < nChannels; ch++) { if ((m_entropyCoder[ch].initCodingMemory (nSamplesInFrame) > 0) || (m_mdctQuantMag[ch]= (uint8_t*) malloc (nSamplesInFrame * sizeof (uint8_t))) == nullptr || (m_mdctSignals[ch] = (int32_t*) malloc (specSigBufSize)) == nullptr || (m_mdstSignals[ch] = (int32_t*) malloc (specSigBufSize)) == nullptr || (m_timeSignals[ch] = (int32_t*) malloc (timeSigBufSize)) == nullptr) { errorValue |= 4; } } // allocate all window buffers for (unsigned ws = WINDOW_SINE; ws <= WINDOW_KBD; ws++) { if ((m_timeWindowL[ws] = initWindowHalfCoeffs ((USAC_WSHP) ws, nSamplesInFrame)) == nullptr || (m_timeWindowS[ws] = initWindowHalfCoeffs ((USAC_WSHP) ws, nSamplesInFrame >> 3)) == nullptr) { errorValue |= 2; } } if (errorValue > 0) return errorValue; // initialize coder class memory m_tempIntBuf = m_timeSignals[0]; #if EC_TRELLIS_OPT_CODING if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame, numSwbOffsetL[m_swbTableIdx] - 1, m_bitRateMode, toSamplingRate (m_frequencyIdx)) > 0 || #else if (m_sfbQuantizer.initQuantMemory (nSamplesInFrame) > 0 || #endif m_specAnalyzer.initLinPredictor (&m_linPredictor) > 0 || m_transform.initConstants (m_tempIntBuf, m_timeWindowL, m_timeWindowS, nSamplesInFrame) > 0) { errorValue |= 1; } if ((errorValue == 0) && (audioConfigBuffer != nullptr)) // save UsacConfig() for writeout { errorValue = m_outStream.createAudioConfig (m_frequencyIdx, m_frameLength != CCFL_1024, chConf, m_numElements, elementTypeConfig[chConf], audioConfigBytes ? *audioConfigBytes : 0, #if !RESTRICT_TO_AAC m_timeWarping, m_noiseFilling, #endif audioConfigBuffer); if (audioConfigBytes) *audioConfigBytes = errorValue; // length of UsacConfig() in bytes errorValue = (errorValue == 0 ? 1 : 0); } return errorValue; }