diff --git a/CMakeLists.txt b/CMakeLists.txt index f49a762..d9b5ceb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ # Copyright (c) 2018-2020 Christian R. Helmrich, project ecodis. All rights reserved. # -cmake_minimum_required(VERSION 3.5) # Default version of cmake on ubuntu 16.04 +cmake_minimum_required(VERSION 3.5) # Default version of cmake on Ubuntu 16.04 if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") message(FATAL_ERROR "Building in the source tree is not supported.\n" @@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") endif() -project(exhale VERSION 1.0.3 LANGUAGES CXX) +project(exhale VERSION 1.0.4 LANGUAGES CXX) if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE Release diff --git a/README.md b/README.md index 25966f9..76f3258 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ exhale is being made available under an open-source license which is similar to the 3-clause BSD license but modified to address specific aspects dictated by the nature and the output of this application. -The license text and release notes for the current version 1.0.3 can +The license text and release notes for the current version 1.0.4 can be found in the `include` subdirectory of the exhale distribution. @@ -46,6 +46,9 @@ executable application under Linux and Microsoft Windows. The binary application files will show up in a newly created `bin` subdirectory of the exhale distribution directory and/or a subdirectory thereof. +Note that, for advanced use cases, cmake files are provided as well. +See https://gitlab.com/ecodis/exhale/-/merge_requests/2 for details. + ### Linux and MacOS (GNU Compiler Collection, gcc): In a terminal, change to the exhale distribution directory and enter diff --git a/src/app/exhaleApp.cpp b/src/app/exhaleApp.cpp index a257c6f..38561a5 100644 --- a/src/app/exhaleApp.cpp +++ b/src/app/exhaleApp.cpp @@ -304,19 +304,13 @@ int main (const int argc, char* argv[]) goto mainFinish; // bad output string } - if ((variableCoreBitRateMode < 2) && (wavReader.getSampleRate () > 32000)) + if (wavReader.getSampleRate () > 24000 + (unsigned) variableCoreBitRateMode * 12000) { - fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=32 kHz for preset mode %d!\n\n", variableCoreBitRateMode); + i = 24 + variableCoreBitRateMode * 12; + fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=%d kHz for preset mode %d!\n\n", i, variableCoreBitRateMode); i = 4096; // return value - goto mainFinish; // resample to 32 kHz - } - if ((variableCoreBitRateMode < 4) && (wavReader.getSampleRate () > 48000)) - { - fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=48 kHz for preset mode %d!\n\n", variableCoreBitRateMode); - i = 4096; // return value - - goto mainFinish; // resample to 44 kHz + goto mainFinish; // ask for resampling } if (outPathEnd == 0) // name has no path diff --git a/src/lib/exhaleEnc.cpp b/src/lib/exhaleEnc.cpp index 9405c4f..52e8894 100644 --- a/src/lib/exhaleEnc.cpp +++ b/src/lib/exhaleEnc.cpp @@ -738,6 +738,19 @@ unsigned ExhaleEncoder::getOptParCorCoeffs (const SfbGroupData& grpData, const u return (predGainMax >> 24) & UCHAR_MAX; // max pred gain of all orders and length-1 groups } +#ifndef NO_DTX_MODE +uint32_t ExhaleEncoder::getThr (const unsigned channelIndex, const unsigned sfbIndex) +{ + const uint16_t* const sfbLoudMem = m_sfbLoudMem[channelIndex][sfbIndex]; + uint32_t sumSfbLoud = 0; + + for (int16_t s = 31; s >= 0; s--) sumSfbLoud += sfbLoudMem[s]; + sumSfbLoud = (sumSfbLoud + 16) >> 5; + + return sumSfbLoud * (sumSfbLoud >> (toSamplingRate (m_frequencyIdx) >> 13)); // scaled SMR +} +#endif + unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via scale factors { const unsigned nChannels = toNumChannels (m_channelConf); @@ -746,7 +759,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS); const uint32_t maxSfbLong = (samplingRate < 37566 ? 51 /*32 kHz*/ : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)); const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 24000 ? 32 : 34) - m_bitRateMode : 37) - (nChannels >> 1); - const uint64_t scaleBr = (m_bitRateMode == 0 ? 32 : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)); + const uint64_t scaleBr = (m_bitRateMode == 0 ? 4 + (samplingRate >> 10) : scaleSr - eightTimesSqrt256Minus[256 - m_bitRateMode] - __min (3, (m_bitRateMode - 1) >> 1)); uint32_t* sfbStepSizes = (uint32_t*) m_tempIntBuf; uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS]; //uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS]; @@ -928,6 +941,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s if (grpRms[b] < grpRmsMin) grpRmsMin = grpRms[b]; #ifndef NO_DTX_MODE + if ((m_bitRateMode > 0) || (m_numElements > 1) || (samplingRate > 24000)) if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS)) #endif if (rmsComp >= rmsRef9 && (rmsComp < (grpStepSizes[b] >> 1))) // zero-quantized @@ -940,6 +954,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s const uint32_t rmsComp = (grpStereoData[b] > 0 ? squareMeanRoot (refRms[b], grpRms[b]) : grpRms[b]); const uint32_t rmsRef9 = (coreConfig.commonWindow ? refRms[b] >> 9 : rmsComp); #ifndef NO_DTX_MODE + if ((m_bitRateMode > 0) || (m_numElements > 1) || (samplingRate > 24000)) if ((m_bitRateMode > 3) || (meanSpecFlat[ci] > (SCHAR_MAX >> 1)) || (rmsComp >= TA_EPS)) #endif if (rmsComp >= rmsRef9) // check only first SFB above max_sfb for simplification @@ -971,6 +986,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s #ifndef NO_DTX_MODE const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT); + if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000) && eightShorts) + { + for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = uint16_t (sqrt (double (getThr (ch, s) << (samplingRate >> 13)))); + } if ((maxSfbCh < numSwbFrame) || (m_bitRateMode <= 2)) // increase coding bandwidth #else if (maxSfbCh < numSwbFrame) // increase coding bandwidth @@ -979,11 +998,29 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s for (uint16_t gr = 0; gr < grpData.numWindowGroups; gr++) { #ifndef NO_DTX_MODE + const uint32_t* grpRms = &grpData.sfbRmsValues[m_numSwbShort * gr]; + + if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000)) + { + const uint32_t* refRms = &coreConfig.groupingData[1 - ch].sfbRmsValues[m_numSwbShort * gr]; + uint8_t* grpStereoData = &coreConfig.stereoDataCurr[m_numSwbShort * gr]; + const unsigned sfbStart = (prvEightShorts ? (samplingRate > 16000 ? 24 : 17) : m_specGapFiller.getFirstGapFillSfb ()); + + for (s = sfbStart; s < maxSfbCh; s++) + { + const double rmsValue = double (grpStereoData[s] > 0 ? squareMeanRoot (refRms[s], grpRms[s]) : grpRms[s]); + const unsigned sfbIdx = s - sfbStart; + + m_sfbLoudMem[ch][sfbIdx][m_frameCount & 31] = __max (BA_EPS, uint16_t (sqrt (rmsValue))); + if (grpRms[s] < getThr (ch, sfbIdx)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0; + } + } + else if ((m_bitRateMode <= 4) && (meanSpecFlat[ci] <= (SCHAR_MAX >> 1))) // low-RMS { for (s = (prvEightShorts ? (samplingRate < 27713 ? 24 : 22) : m_specGapFiller.getFirstGapFillSfb ()); s < maxSfbCh; s++) { - if (grpData.sfbRmsValues[s + m_numSwbShort * gr] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0; + if (grpRms[s] < ((3 * TA_EPS) >> 1)) grpData.scaleFactors[s + m_numSwbShort * gr] = 0; } } #endif @@ -993,6 +1030,12 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s } if (ch > 0) coreConfig.commonMaxSfb = (coreConfig.icsInfoCurr[0].maxSfb == coreConfig.icsInfoCurr[1].maxSfb); } +#ifndef NO_DTX_MODE + else if (m_noiseFilling[el] && (m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate <= 24000)) + { + for (s = 0; s < 26; s++) m_sfbLoudMem[ch][s][m_frameCount & 31] = BA_EPS; + } +#endif #endif // !RESTRICT_TO_AAC ci++; } // for ch @@ -1943,7 +1986,7 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin { errorValue |= 64; } - if (m_frequencyIdx < 0) + if ((m_frequencyIdx < 0) || (m_bitRateMode > (toSamplingRate (m_frequencyIdx) >> 12) + 1)) { errorValue |= 32; } @@ -1992,6 +2035,9 @@ unsigned ExhaleEncoder::initEncoder (unsigned char* const audioConfigBuffer, uin m_elementData[el]->elementType = elementTypeConfig[chConf][el]; // usacElementType[el] } } +#ifndef NO_DTX_MODE + memset (m_sfbLoudMem, 1, 2 * 26 * 32 * sizeof (uint16_t)); +#endif // allocate all signal buffers for (unsigned ch = 0; ch < nChannels; ch++) { diff --git a/src/lib/exhaleEnc.h b/src/lib/exhaleEnc.h index 89a860c..a82b87f 100644 --- a/src/lib/exhaleEnc.h +++ b/src/lib/exhaleEnc.h @@ -91,6 +91,9 @@ private: uint8_t m_perCorrHCurr[USAC_MAX_NUM_ELEMENTS]; uint8_t m_perCorrLCurr[USAC_MAX_NUM_ELEMENTS]; SfbGroupData* m_scaleFacData[USAC_MAX_NUM_CHANNELS]; +#ifndef NO_DTX_MODE + uint16_t m_sfbLoudMem[2][26][32]; // loudness mem +#endif SfbQuantizer m_sfbQuantizer; // powerlaw quantization SpecAnalyzer m_specAnalyzer; // for spectral analysis uint32_t m_specAnaCurr[USAC_MAX_NUM_CHANNELS]; @@ -121,6 +124,9 @@ private: int32_t* const mdctSignal, int32_t* const mdstSignal); unsigned getOptParCorCoeffs (const SfbGroupData& grpData, const uint8_t maxSfb, TnsData& tnsData, const unsigned channelIndex, const uint8_t firstGroupIndexToTest = 0); +#ifndef NO_DTX_MODE + uint32_t getThr (const unsigned channelIndex, const unsigned sfbIndex); +#endif unsigned psychBitAllocation (); unsigned quantizationCoding (); unsigned spectralProcessing ();