non-SBR pre-roll

2025-06-05 21:59:32 +02:00 · 2021-01-17 19:00:10 +01:00
parent 36964d2051
commit 920a3e8d56
7 changed files with 87 additions and 32 deletions
--- a/src/app/basicMP4Writer.cpp
+++ b/src/app/basicMP4Writer.cpp
@@ -6,7 +6,7 @@
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #include "exhaleAppPch.h"
@@ -364,7 +364,11 @@ int BasicMP4Writer::initHeader (const uint32_t audioLength) // reserve bytes for
  /* NOTE: the following condition is, as far as I can tell, correct, but some decoders with DRC processing
  may decode too few samples with it. Hence, I disabled it. See also corresponding NOTE in exhaleApp.cpp */
  const bool flushFrameUsed = true; // ((audioLength + m_pregapLength) % m_frameLength) > 0;
+#ifdef NO_PREROLL_DATA
  const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 2 : 1);
+#else
+  const unsigned frameCount = ((audioLength + m_frameLength - 1) / m_frameLength) + (flushFrameUsed ? 1 : 0);
+#endif
  const unsigned chunkCount = ((frameCount + m_rndAccPeriod - 1) / m_rndAccPeriod);
  const unsigned finalChunk = (frameCount <= m_rndAccPeriod ? 0 : frameCount % m_rndAccPeriod);
 #ifndef NO_FIX_FOR_ISSUE_1
--- a/src/app/exhaleApp.cpp
+++ b/src/app/exhaleApp.cpp
@@ -5,7 +5,7 @@
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #include "exhaleAppPch.h"
@@ -683,7 +683,11 @@ int main (const int argc, char* argv[])
 #endif
    // allocate dynamic frame memory buffers
    inPcmData = (int32_t*) malloc (inFrameSize * numChannels); // max frame in size
+#ifdef NO_PREROLL_DATA
    outAuData = (uint8_t*) malloc ((6144 >> 3) * numChannels); // max frame AU size
+#else
+    outAuData = (uint8_t*) malloc ((9216 >> 3) * numChannels); // max frame AU size
+#endif
    if ((inPcmData == nullptr) || (outAuData == nullptr))
    {
      fprintf_s (stderr, " ERROR while trying to allocate dynamic memory! Not enough free RAM available!\n\n");
@@ -736,12 +740,14 @@ int main (const int argc, char* argv[])
      memset (outAuData, 0, 108 * sizeof (uint8_t));  // max. allowed ASC + UC size
      i = exhaleEnc.initEncoder (outAuData, &bw); // bw stores actual ASC + UC size

+      if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength
 #if ENABLE_SIMPLE_SBR
-      if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength + (coreSbrFrameLengthIndex >= 3 ? 962 : 0),
-#else
-      if ((i |= mp4Writer.open (outFileHandle, sampleRate, numChannels, inSampDepth, frameLength, startLength,
+                                + (coreSbrFrameLengthIndex >= 3 ? 962 : 0)
 #endif
-                                indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
+#ifndef NO_PREROLL_DATA
+                                - frameLength
+#endif
+                              , indepPeriod, outAuData, bw, (time (nullptr) + 2082844800) & UINT_MAX, (char) variableCoreBitRateMode)) != 0)
      {
        fprintf_s (stderr, " ERROR while trying to initialize xHE-AAC encoder: error value %d was returned!\n\n", i);
        i <<= 2; // return value
@@ -804,6 +810,7 @@ int main (const int argc, char* argv[])
 #endif
        goto mainFinish; // coder-time error
      }
+#ifdef NO_PREROLL_DATA
      if (bwMax < bw) bwMax = bw;
      // write first AU, add frame to header
      if ((mp4Writer.addFrameAU (outAuData, bw) != (int) bw) || loudnessEst.addNewPcmData (frameLength))
@@ -814,7 +821,15 @@ int main (const int argc, char* argv[])
        goto mainFinish;   // writeout error
      }
      byteCount += bw;
-
+#else
+      if (loudnessEst.addNewPcmData (frameLength))
+      {
+# if USE_EXHALELIB_DLL
+        exhaleDelete (&exhaleEnc);
+# endif
+        goto mainFinish; // estimation error
+      }
+#endif
 #if ENABLE_RESAMPLING
      while (wavReader.read (inPcmData, (frameLength * resampRatio) >> resampShift) > 0) // read a new audio frame
 #else
--- a/src/app/loudnessEstim.cpp
+++ b/src/app/loudnessEstim.cpp
@@ -22,7 +22,7 @@ static const int64_t kFilterCoeffs[4][8] = { // first 4: numerator (16->-32 bit)

 // constructor
 LoudnessEstimator::LoudnessEstimator (int32_t* const inputPcmData,         const unsigned bitDepth /*= 24*/,
-                                      const unsigned sampleRate /*= 44100*/, const unsigned numChannels /*= 2*/)
+                                      const unsigned sampleRate /*= 44k*/, const unsigned numChannels /*= 2*/)
 {
 #if LE_ACCURATE_CALC
  m_filterCoeffs  = kFilterCoeffs[sampleRate <= 44100 ? (sampleRate <= 32000 ? 0 : 1) : (sampleRate <= 48000 ? 2 : 3)];
@@ -161,7 +161,7 @@ uint32_t LoudnessEstimator::getStatistics (const bool includeWarmUp /*= false*/)
  }
  if (zg < LE_THRESH_ABS) return peakValue16Bits;

-  zg = LE_LUFS_OFFSET + 10.0f * log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue));
+  zg = LE_LUFS_OFFSET + 10.0f * (float) log10 (zg / (normFac * numBlocks * (float) m_inputMaxValue * (float) m_inputMaxValue));
 #if LE_ACCURATE_CALC
  zg -= m_filterFactor * 0.046875f; // for sample rates other than 48 kHz
 #endif
--- a/src/lib/bitStreamWriter.cpp
+++ b/src/lib/bitStreamWriter.cpp
@@ -5,7 +5,7 @@
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #include "exhaleLibPch.h"
@@ -739,8 +739,16 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
  m_auBitStream.write (usfi, 5); // usacSamplingFrequencyIndex (after SBR dec.!)
  m_auBitStream.write (shortFrameLength ? 0 : fli, 3);// coreSbrFrameLengthIndex
  m_auBitStream.write (chConfigurationIndex, 5);    // channelConfigurationIndex
+#ifdef NO_PREROLL_DATA
  m_auBitStream.write (numElements - 1, 4);  // numElements in UsacDecoderConfig
+#else
+  m_auBitStream.write (numElements, 4); // 4bit numElements in UsacDecoderConfig

+  m_auBitStream.write (ID_USAC_EXT, 2); // usacElementType[0] = 3, for IPF stuff
+  m_auBitStream.write (3, 4); // UsacExtElementConfig(), ID_EXT_ELE_AUDIOPREROLL
+  m_auBitStream.write (0, 6); // usacExtElementConfigLength = 0, rest of config.
+  bitCount += 12;
+#endif
  for (unsigned el = 0; el < numElements; el++) // el element loop
  {
    m_auBitStream.write ((unsigned) elementType[el], 2);  // usacElementType[el]
@@ -802,7 +810,7 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
  bitCount += (8 - m_auBitStream.heldBitCount) & 7;
  writeByteAlignment ();  // flush bytes

-  memcpy (audioConfig, &m_auBitStream.stream.front (), __min (15u + fli, bitCount >> 3));
+  memcpy (audioConfig, &m_auBitStream.stream.front (), __min (17u + fli, bitCount >> 3));

  return (bitCount >> 3);  // byte count
 }
@@ -812,7 +820,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
                                            const bool usacIndependencyFlag,    const uint8_t numElements,
                                            const uint8_t numSwbShort,          uint8_t* const tempBuffer,
 #if !RESTRICT_TO_AAC
-                                            const bool* const tw_mdct /*N/A*/,  const bool* const noiseFilling,
+                                            const bool* const tw_mdct /*N/A*/,  const bool* const noiseFilling, const bool ipf,
 #endif
                                            const uint8_t sbrRatioShiftValue,   int32_t** const sbrInfoAndData,
                                            unsigned char* const accessUnit,    const unsigned nSamplesInFrame /*= 1024*/)
@@ -822,19 +830,45 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
  if ((elementData == nullptr) || (entropyCoder == nullptr) || (tempBuffer == nullptr) || (sbrInfoAndData == nullptr) ||
      (mdctSignals == nullptr) || (mdctQuantMag == nullptr) || (accessUnit == nullptr) || (nSamplesInFrame > 2048) ||
 #if !RESTRICT_TO_AAC
-      (noiseFilling == nullptr) || (tw_mdct == nullptr) ||
+      (noiseFilling == nullptr) || (tw_mdct == nullptr) || (ipf && !usacIndependencyFlag) ||
 #endif
      (numElements == 0) || (numElements > USAC_MAX_NUM_ELEMENTS) || (numSwbShort < MIN_NUM_SWB_SHORT) || (numSwbShort > MAX_NUM_SWB_SHORT))
  {
    return 0; // invalid arguments error
  }
-
+#ifndef NO_PREROLL_DATA
+  if (ipf) // save last AU for ext. data
+  {
+    bitCount = __min (65532, (uint32_t) m_auBitStream.stream.size ());
+    memcpy (tempBuffer, &m_auBitStream.stream.front (), bitCount);
+  }
+#endif
  m_auBitStream.reset ();
  m_frameLength = nSamplesInFrame;
  m_numSwbShort = numSwbShort;
  m_uCharBuffer = tempBuffer;
  m_auBitStream.write (usacIndependencyFlag ? 1 : 0, 1);

+#ifndef NO_PREROLL_DATA
+  m_auBitStream.write (ipf ? 1 : 0, 1); // UsacExtElement, usacExtElementPresent
+  if (ipf)
+  {
+    const unsigned payloadLength = bitCount + 3; // ext. payload size, in bytes!
+
+    m_auBitStream.write (0, 1); // usacExtElementUseDefaultLength = 0 (variable)
+    m_auBitStream.write (CLIP_UCHAR (payloadLength), 8);
+    if (payloadLength > 254) m_auBitStream.write (payloadLength - 253, 16); // valueAdd
+
+    m_auBitStream.write (0, 6); // start AudioPreRoll - configLen = reserved = 0
+    m_auBitStream.write (1, 2); // numPreRollFrames, only one supported for now!
+    m_auBitStream.write (bitCount, 16); // auLen
+
+    while (ci < bitCount) m_auBitStream.write (tempBuffer[ci++], 8); // write AU
+    ci = 0;
+    bitCount = (payloadLength > 254 ? 26 : 10) + (payloadLength << 3); // for ext. bits
+  }
+  bitCount++; // for ElementPresent flag
+#endif
  for (unsigned el = 0; el < numElements; el++) // el element loop
  {
    const CoreCoderData* const elData = elementData[el];
@@ -935,7 +969,10 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
  bitCount += (8 - m_auBitStream.heldBitCount) & 7;
  writeByteAlignment ();  // flush bytes

+#if RESTRICT_TO_AAC
  memcpy (accessUnit, &m_auBitStream.stream.front (), __min (768 * ci, bitCount >> 3));
-
+#else
+  memcpy (accessUnit, &m_auBitStream.stream.front (), __min (ci * (ipf ? 1152 : 768), bitCount >> 3));
+#endif
  return (bitCount >> 3);  // byte count
 }
--- a/src/lib/bitStreamWriter.h
+++ b/src/lib/bitStreamWriter.h
@@ -5,7 +5,7 @@
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #ifndef _BIT_STREAM_WRITER_H_
@@ -68,7 +68,7 @@ public:
                              const bool usacIndependencyFlag,    const uint8_t numElements,
                              const uint8_t numSwbShort,          uint8_t* const tempBuffer,
 #if !RESTRICT_TO_AAC
-                              const bool* const tw_mdct /*N/A*/,  const bool* const noiseFilling,
+                              const bool* const tw_mdct /*N/A*/,  const bool* const noiseFilling, const bool ipf,
 #endif
                              const uint8_t sbrRatioShiftValue,   int32_t** const sbrInfoAndData,
                              unsigned char* const accessUnit,    const unsigned nSamplesInFrame = 1024);
--- a/src/lib/exhaleEnc.cpp
+++ b/src/lib/exhaleEnc.cpp
@@ -5,7 +5,7 @@
 * and comes with ABSOLUTELY NO WARRANTY. This software may be subject to other third-
 * party rights, including patent rights. No such rights are granted under this License.
 *
- * Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved.
+ * Copyright (c) 2018-2021 Christian R. Helmrich, project ecodis. All rights reserved.
 */

 #include "exhaleLibPch.h"
@@ -773,7 +773,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
  const uint16_t scaleSBR        = (m_shiftValSBR > 0 || m_nonMpegExt ? sbrRateOffset[m_bitRateMode] : 0); // -25% rate
  const uint64_t scaleSr         = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
                                                         : (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
-  const uint64_t scaleBr         = (m_bitRateMode == 0 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1))
+  const uint64_t scaleBr         = (m_bitRateMode == 0 || m_frameCount <= 1 ? __min (32, 17u + (((samplingRate + (1 << 11)) >> 12) << 1) - (nChannels >> 1))
                                   : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)) + scaleSBR;
  uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf;
  uint8_t  meanSpecFlat[USAC_MAX_NUM_CHANNELS];
@@ -1174,7 +1174,7 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
          const uint16_t peakIndex  = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
          const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong  - 6 + (meanSpecFlat[ci] >> 5)) +
                                            (shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
-          const unsigned targetBitCount25 = ((60000 + 20000 * (m_bitRateMode + m_shiftValSBR)) * nSamplesInFrame) /
+          const unsigned targetBitCount25 = ((60000 + 20000 * ((m_bitRateMode + m_shiftValSBR) >> (m_frameCount <= 1 ? 2 : 0))) * nSamplesInFrame) /
                                            (samplingRate * ((grpData.numWindowGroups + 1) >> 1));
          unsigned b = grpData.sfbsPerGroup - 1;

@@ -1184,7 +1184,7 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
 #if EC_TRELLIS_OPT_CODING
          if (grpLength == 1) // finalize bit count estimate, RDOC
          {
-            estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, __min (estimBitCount + 2, targetBitCount25),
+            estimBitCount = m_sfbQuantizer.quantizeSpecRDOC (entrCoder, grpScaleFacs, estimBitCount + 2u,
                                                             grpOff, grpRms, grpData.sfbsPerGroup, m_mdctQuantMag[ci]);
            for (b = 1; b < grpData.sfbsPerGroup; b++)
            {
@@ -1327,11 +1327,10 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
        m_coreSignals[ci][0] |= getSbrEnvelopeAndNoise (&m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame], msfVal,
                                                        __max (m_meanTempPrev[ci], meanTempFlat[ci]) >> 3, m_bitRateMode == 0,
                                                        m_indepFlag, msfSte, tmpValSynch, nSamplesInFrame, &m_coreSignals[ci][1]);
-        if (ch + 1 == nrChannels) // update the flatness histories
+        if (ch + 1 == nrChannels) // update flatness histories - TODO: coupling
        {
          m_meanSpecPrev[ci] = meanSpecFlat[ci];  m_meanSpecPrev[s] = meanSpecFlat[s];
          m_meanTempPrev[ci] = meanTempFlat[ci];  m_meanTempPrev[s] = meanTempFlat[s];
-          // TODO: coupling (m_coreSignals[ci][0] |= 1 << 23;)
        }
      }
      ci++;
@@ -1341,7 +1340,7 @@ unsigned ExhaleEncoder::quantizationCoding ()  // apply MDCT quantization and en
  return (errorValue > 0 ? 0 : m_outStream.createAudioFrame (m_elementData, m_entropyCoder, m_mdctSignals, m_mdctQuantMag, m_indepFlag,
                                                             m_numElements, m_numSwbShort, (uint8_t* const) m_tempIntBuf,
 #if !RESTRICT_TO_AAC
-                                                             m_timeWarping, m_noiseFilling,
+                                                             m_timeWarping, m_noiseFilling, (m_frameCount == 2),
 #endif
                                                             m_shiftValSBR, m_coreSignals, m_outAuData, nSamplesInFrame)); // returns AU size
 }
--- a/src/lib/exhaleLibPch.cpp
+++ b/src/lib/exhaleLibPch.cpp
@@ -205,7 +205,7 @@ int32_t getSbrEnvelopeAndNoise (int32_t* const sbrLevels, const uint8_t specFlat
    const int32_t p[3] = {prev & SCHAR_MAX, (prev >> 8) & SCHAR_MAX, (prev >> 16) & SCHAR_MAX};

    if ((t > 0 || !ind) && (getSbrDeltaBitCount (c[0] - p[0], true) + getSbrDeltaBitCount (c[1] - p[1], true) +
-                            getSbrDeltaBitCount (c[2] - p[2], true) < 12)) // approximate!
+                            getSbrDeltaBitCount (c[2] - p[2], true) < 13)) // approximate!
    {
      tmpBest |= 1 << (12 + t); // delta-time coding flag for envelope