improve resampler

This commit is contained in:
Christian R. Helmrich 2020-07-13 01:00:06 +02:00
parent 92e148506c
commit be52276f05
3 changed files with 160 additions and 28 deletions

View File

@ -55,7 +55,7 @@ bool BasicWavReader::readFormatChunk ()
m_bytesRemaining -= m_bytesRead;
if ((b[0] == 0xFE) && (b[1] == 0xFF) && (m_chunkLength == CHUNK_FORMAT_MAX) && (b[16] == CHUNK_FORMAT_MAX - CHUNK_FORMAT_SIZE - 2) &&
(b[17] == 0) && (b[18] == b[14]) && (b[19] == 0) && ((b[25] | b[26] | b[27] | b[28] | b[29] | b[31] | b[33] | b[34] | b[36]) == 0))
(b[17] == 0) && (b[18] == b[14]) && ((b[19] | b[25] | b[26] | b[27] | b[28] | b[29] | b[31] | b[33] | b[34] | b[36]) == 0))
{
m_waveDataType = WAV_TYPE (b[24]-1); // extensible WAV
b[1] = 0;

View File

@ -55,8 +55,17 @@
#if ENABLE_RESAMPLING
static const int16_t usfc2x[32] = { // 2x upsampling filter coefficients
8913, -13785, 8142, -5681, 4281, -3367, 2716, -2225, 1840, -1530, 1275, -1062, 883, -732,
604, -495, 402, -325, 260, -205, 160, -124, 94, -70, 51, -36, 25, -16, 11, -6, 3, -1
(83359-65536), -27563, 16273, -11344, 8541, -6708, 5403, -4419, 3647, -3025, 2514, -2088, 1730,
-1428, 1173, -957, 775, -622, 494, -388, 300, -230, 172, -127, 91, -63, 43, -27, 16, -9, 4, -1
};
static const int16_t rsfc3x[128] = {// 3x resampling filter coefficients
21846, 6711, (36099-32768), 0, -18000, -14370, 0, 10208, 8901, 0, -7062, -6389, 0, 5347, 4934, 0, -4258,
-3977, 0, 3499, 3294, 0, -2937, -2780, 0, 2501, 2376, 0, -2151, -2050, 0, 1864, 1779, 0, -1623, -1551,
0, 1417, 1355, 0, -1240, -1187, 0, 1086, 1040, 0, -952, -910, 0, 833, 797, 0, -728, -696, 0, 635, 607,
0, -553, -528, 0, 480, 457, 0, -415, -395, 0, 358, 340, 0, -307, -291, 0, 262, 248, 0, -223, -211, 0,
188, 177, 0, -158, -149, 0, 132, 124, 0, -109, -102, 0, 90, 84, 0, -73, -68, 0, 59, 55, 0, -47, -43,
0, 37, 34, 0, -29, -26, 0, 22, 20, 0, -16, -15, 0, 12, 10, 0, -8, -7, 0, 5, 5, 0, -3, -3, 0, 2
};
static bool eaInitUpsampler2x (int32_t** upsampleBuffer, const uint16_t bitRateMode, const uint16_t sampleRate,
@ -78,11 +87,31 @@ static bool eaInitUpsampler2x (int32_t** upsampleBuffer, const uint16_t bitRateM
return useUpsampler;
}
static bool eaInitDownsampler (int32_t** resampleBuffer, const uint16_t bitRateMode, const uint16_t sampleRate,
const uint16_t frameSize, const uint16_t numChannels)
{
const uint16_t inLength = (frameSize * 3u) >> 1;
const uint16_t chLength = inLength + (frameSize >> 3);
const bool useResampler = (frameSize >= 512 && bitRateMode == 1 && sampleRate == 48000);
if (useResampler)
{
if ((*resampleBuffer = (int32_t*) malloc (chLength * numChannels * sizeof (int32_t))) == nullptr) return false;
for (uint16_t ch = 0; ch < numChannels; ch++)
{
memset (*resampleBuffer + inLength + chLength * ch, 0, (chLength - inLength) * sizeof (int32_t));
}
}
return useResampler;
}
static void eaApplyUpsampler2x (int32_t* const pcmBuffer, int32_t* const upsampleBuffer,
const uint16_t frameSize, const uint16_t numChannels, const bool firstFrame = false)
{
const uint16_t inLength = (frameSize >> 1) + (firstFrame ? 32 : 0);
const uint16_t chLength = (frameSize >> 1) + (32 << 1);
const int16_t lookahead = 32;
const uint16_t inLength = (frameSize >> 1) + (firstFrame ? lookahead : 0);
const uint16_t chLength = (frameSize >> 1) + (lookahead << 1);
uint16_t ch;
for (ch = 0; ch < numChannels; ch++) // step 1: add deinterleaved input samples to resampling buffer
@ -108,15 +137,87 @@ static void eaApplyUpsampler2x (int32_t* const pcmBuffer, int32_t* const upsampl
for (ch = 0; ch < numChannels; ch++) // step 2: upsample, reinterleave, and save to PCM input buffer
{
/*in*/int32_t* chPcmBuf = &pcmBuffer[ch];
const int32_t* chUpsBuf = &upsampleBuffer[chLength * ch + 32];
const int32_t* chUpsBuf = &upsampleBuffer[chLength * ch + lookahead];
for (uint16_t i = (frameSize >> 1); i > 0; i--, chPcmBuf += numChannels, chUpsBuf++)
for (uint16_t i = frameSize >> 1; i > 0; i--, chUpsBuf++)
{
int64_t r = (chUpsBuf[0] + (int64_t) chUpsBuf[1]) * (usfc2x[0] - SHRT_MIN);
int64_t r = (chUpsBuf[0] + (int64_t) chUpsBuf[1]) * (usfc2x[0] - 2 * SHRT_MIN);
for (int16_t c = 32 - 1; c > 0; c--) r += (chUpsBuf[-c] + (int64_t) chUpsBuf[c + 1]) * usfc2x[c];
*chPcmBuf = *chUpsBuf; chPcmBuf += numChannels; // 1-to-1 mapping
*chPcmBuf = int32_t ((r - SHRT_MIN) >> 16); // interpolated sample
for (int16_t c = lookahead - 1; c > 0; c--)
{
r += (chUpsBuf[-c] + (int64_t) chUpsBuf[c + 1]) * usfc2x[c];
}
*chPcmBuf = *chUpsBuf; // no filtering necessary, just copy sample
if (*chPcmBuf < MIN_VALUE_AUDIO24) *chPcmBuf = MIN_VALUE_AUDIO24;
else
if (*chPcmBuf > MAX_VALUE_AUDIO24) *chPcmBuf = MAX_VALUE_AUDIO24;
chPcmBuf += numChannels;
*chPcmBuf = int32_t ((r - 2 * SHRT_MIN) >> 17); // interp. sample
if (*chPcmBuf < MIN_VALUE_AUDIO24) *chPcmBuf = MIN_VALUE_AUDIO24;
else
if (*chPcmBuf > MAX_VALUE_AUDIO24) *chPcmBuf = MAX_VALUE_AUDIO24;
chPcmBuf += numChannels;
}
}
}
static void eaApplyDownsampler (int32_t* const pcmBuffer, int32_t* const resampleBuffer,
const uint16_t frameSize, const uint16_t numChannels, const bool firstFrame = false)
{
const int16_t lookahead = frameSize >> 4;
const uint16_t inLength = ((frameSize * 3u) >> 1) + (firstFrame ? lookahead : 0);
const uint16_t chLength = ((frameSize * 3u) >> 1) + (lookahead << 1);
uint16_t ch;
for (ch = 0; ch < numChannels; ch++) // step 1: add deinterleaved input samples to resampling buffer
{
int32_t* chPcmBuf = &pcmBuffer[ch];
int32_t* chResBuf = &resampleBuffer[chLength * ch];
# if 1
if (firstFrame) // construct leading sample values via extrapolation
{
memset (chResBuf, 0, (lookahead - 32) * sizeof (int32_t));
for (int8_t i = 0; i < 32; i++) chResBuf[lookahead - 32 + i] = (*chPcmBuf * i + (32 >> 1)) >> 5;
}
else
# endif
memcpy (chResBuf, &chResBuf[inLength], (chLength - inLength) * sizeof (int32_t)); // update memory
chResBuf += chLength - inLength;
for (uint16_t i = inLength; i > 0; i--, chPcmBuf += numChannels, chResBuf++)
{
*chResBuf = *chPcmBuf; // deinterleave, store in resampling buffer
}
}
for (ch = 0; ch < numChannels; ch++) // step 2: resample, reinterleave, and save to PCM input buffer
{
/*in*/int32_t* chPcmBuf = &pcmBuffer[ch];
const int32_t* chResBuf = &resampleBuffer[chLength * ch + lookahead];
for (uint16_t i = frameSize >> 1; i > 0; i--, chResBuf += 3)
{
int64_t r1 = (int64_t) chResBuf[0] * (rsfc3x[0] - 2 * SHRT_MIN) - (chResBuf[-1] + (int64_t) chResBuf[1]) * SHRT_MIN +
(int64_t) chResBuf[-lookahead] + (int64_t) chResBuf[lookahead];
int64_t r2 = (chResBuf[1] + (int64_t) chResBuf[2]) * (rsfc3x[1] - 2 * SHRT_MIN);
for (int16_t c = lookahead - 1; c > 0; c--)
{
r1 += (chResBuf[-c] + (int64_t) chResBuf[c]) * rsfc3x[c << 1];
r2 += (chResBuf[1 - c] + (int64_t) chResBuf[c + 2]) * rsfc3x[(c << 1) + 1];
}
*chPcmBuf = int32_t ((r1 - 2 * SHRT_MIN) >> 17); // lowpass sample
if (*chPcmBuf < MIN_VALUE_AUDIO24) *chPcmBuf = MIN_VALUE_AUDIO24;
else
if (*chPcmBuf > MAX_VALUE_AUDIO24) *chPcmBuf = MAX_VALUE_AUDIO24;
chPcmBuf += numChannels;
*chPcmBuf = int32_t ((r2 - 2 * SHRT_MIN) >> 17); // interp. sample
if (*chPcmBuf < MIN_VALUE_AUDIO24) *chPcmBuf = MIN_VALUE_AUDIO24;
else
if (*chPcmBuf > MAX_VALUE_AUDIO24) *chPcmBuf = MAX_VALUE_AUDIO24;
chPcmBuf += numChannels;
}
}
}
@ -338,6 +439,7 @@ int main (const int argc, char* argv[])
}
const unsigned frameLength = (3 + coreSbrFrameLengthIndex) << 8;
const unsigned startLength = (frameLength * 25) >> 4; // encoder PCM look-ahead
if (readStdin) // configure stdin
{
@ -411,9 +513,9 @@ int main (const int argc, char* argv[])
}
#if defined (_WIN32) || defined (WIN32) || defined (_WIN64) || defined (WIN64)
if ((wavReader.open (inFileHandle, frameLength, readStdin ? LLONG_MAX : _filelengthi64 (inFileHandle)) != 0) ||
if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : _filelengthi64 (inFileHandle)) != 0) ||
#else // Linux, MacOS, Unix
if ((wavReader.open (inFileHandle, frameLength, readStdin ? LLONG_MAX : lseek (inFileHandle, 0, 2 /*SEEK_END*/)) != 0) ||
if ((wavReader.open (inFileHandle, startLength, readStdin ? LLONG_MAX : lseek (inFileHandle, 0, 2 /*SEEK_END*/)) != 0) ||
#endif
(wavReader.getNumChannels () >= 7))
{
@ -446,7 +548,11 @@ int main (const int argc, char* argv[])
goto mainFinish; // bad output string
}
if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900)
if (wavReader.getSampleRate () > 32100 + (unsigned) variableCoreBitRateMode * 12000 + (variableCoreBitRateMode >> 2) * 3900
#if ENABLE_RESAMPLING
&& (variableCoreBitRateMode != 1 || wavReader.getSampleRate () != 48000)
#endif
)
{
i = (variableCoreBitRateMode > 4 ? 96 : __min (64, 32 + variableCoreBitRateMode * 12));
fprintf_s (stderr, " ERROR during encoding! Input sample rate must be <=%d kHz for preset mode %d!\n\n", i, variableCoreBitRateMode);
@ -456,6 +562,13 @@ int main (const int argc, char* argv[])
}
if (wavReader.getSampleRate () > 32000 && variableCoreBitRateMode == 1)
{
#if ENABLE_RESAMPLING
if (wavReader.getSampleRate () == 48000)
{
fprintf_s (stdout, " NOTE: Downsampling the input audio from 48 kHz to 32 kHz with preset mode %d\n\n", variableCoreBitRateMode);
}
else
#endif
fprintf_s (stderr, " WARNING: The input sampling rate should be 32 kHz or less for preset mode %d!\n\n", variableCoreBitRateMode);
}
@ -503,15 +616,23 @@ int main (const int argc, char* argv[])
}
else
{
const unsigned startLength = (frameLength * 25) >> 4; // encoder PCM look-ahead
const unsigned numChannels = wavReader.getNumChannels ();
const unsigned inFrameSize = frameLength * sizeof (int32_t);
const unsigned inSampDepth = wavReader.getBitDepth ();
#if ENABLE_RESAMPLING
const bool enableUpsampler = eaInitUpsampler2x (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : frameLength); // upsampler look-ahead
const int64_t expectLength = (wavReader.getDataBytesLeft () << (enableUpsampler ? 1 : 0)) / int64_t ((numChannels * inSampDepth) >> 3);
const bool enableResampler = eaInitDownsampler (&inPcmRsmp, variableCoreBitRateMode, i, frameLength, numChannels);
const uint16_t firstLength = uint16_t (enableUpsampler ? (frameLength >> 1) + 32 : (enableResampler ? startLength : frameLength));
const unsigned inFrameSize = (enableResampler ? startLength : frameLength) * sizeof (int32_t); // max buffer size
const unsigned resampRatio = (enableResampler ? 3 : 1); // for resampling ratio
const unsigned resampShift = (enableResampler || enableUpsampler ? 1 : 0);
const int64_t expectLength = (wavReader.getDataBytesLeft () << resampShift) / int64_t ((numChannels * inSampDepth * resampRatio) >> 3);
if (enableUpsampler) // notify by printf
{
fprintf_s (stdout, " NOTE: Upsampling the input audio from %d kHz to %d kHz with preset mode %d\n\n", i / 1000, i / 500, variableCoreBitRateMode);
}
#else
const unsigned inFrameSize = frameLength * sizeof (int32_t); // max buffer size
const int64_t expectLength = wavReader.getDataBytesLeft () / int64_t ((numChannels * inSampDepth) >> 3);
#endif
// allocate dynamic frame memory buffers
@ -539,11 +660,11 @@ int main (const int argc, char* argv[])
else // start coding loop, show progress
{
#if ENABLE_RESAMPLING
const unsigned sampleRate = wavReader.getSampleRate () << (enableUpsampler ? 1 : 0);
const unsigned sampleRate = (wavReader.getSampleRate () << resampShift) / resampRatio;
#else
const unsigned sampleRate = wavReader.getSampleRate ();
#endif
const unsigned indepPeriod = (sampleRate < 48000 ? sampleRate / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
const unsigned indepPeriod = (sampleRate < 48000 ? (sampleRate - 320) / frameLength : 45 /*for 50-Hz video, use 50 for 60-Hz video*/);
const unsigned mod3Percent = unsigned ((expectLength * (3 + coreSbrFrameLengthIndex)) >> 17);
uint32_t byteCount = 0, bw = (numChannels < 7 ? loudStats : 0);
uint32_t br, bwMax = 0; // br will be used to hold bytes read and/or bit-rate
@ -613,8 +734,10 @@ int main (const int argc, char* argv[])
i = 1; // for progress bar
#if ENABLE_RESAMPLING
// upsample initial frame if necessary
// resample initial frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels, true);
else
if (enableResampler) eaApplyDownsampler (inPcmData, inPcmRsmp, frameLength, numChannels, true);
#endif
// initial frame, encode look-ahead AU
if ((bw = exhaleEnc.encodeLookahead ()) < 3)
@ -638,14 +761,16 @@ int main (const int argc, char* argv[])
byteCount += bw;
#if ENABLE_RESAMPLING
while (wavReader.read (inPcmData, frameLength >> (enableUpsampler ? 1 : 0)) > 0) // read a new audio frame
while (wavReader.read (inPcmData, (frameLength * resampRatio) >> resampShift) > 0) // read a new audio frame
#else
while (wavReader.read (inPcmData, frameLength) > 0) // read a new audio frame
#endif
{
#if ENABLE_RESAMPLING
// upsample audio frame if necessary
// resample audio frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
else
if (enableResampler) eaApplyDownsampler (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// frame coding loop, encode next AU
if ((bw = exhaleEnc.encodeFrame ()) < 3)
@ -678,8 +803,10 @@ int main (const int argc, char* argv[])
} // frame loop
#if ENABLE_RESAMPLING
// upsample the last frame if necessary
// resample the last frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
else
if (enableResampler) eaApplyDownsampler (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// end of coding loop, encode final AU
if ((bw = exhaleEnc.encodeFrame ()) < 3)
@ -703,16 +830,20 @@ int main (const int argc, char* argv[])
byteCount += bw;
#if ENABLE_RESAMPLING
const int64_t actualLength = (wavReader.getDataBytesRead () << (enableUpsampler ? 1 : 0)) / int64_t ((numChannels * inSampDepth) >> 3);
const int64_t actualLength = (wavReader.getDataBytesRead () << resampShift) / int64_t ((numChannels * inSampDepth * resampRatio) >> 3);
#else
const int64_t actualLength = wavReader.getDataBytesRead () / int64_t ((numChannels * inSampDepth) >> 3);
#endif
/* NOTE: the following "if" is, as far as I can tell, correct, but some decoders
with DRC processing may decode too few samples with it. Hence, I disabled it.
if (((actualLength + startLength) % frameLength) > 0) // flush trailing audio
{
*/ {
memset (inPcmData, 0, inFrameSize * numChannels);
#if ENABLE_RESAMPLING
// upsample flush frame if necessary
// resample flush frame if necessary
if (enableUpsampler) eaApplyUpsampler2x (inPcmData, inPcmRsmp, frameLength, numChannels);
else
if (enableResampler) eaApplyDownsampler (inPcmData, inPcmRsmp, frameLength, numChannels);
#endif
// flush remaining audio into new AU
if ((bw = exhaleEnc.encodeFrame ()) < 3)

View File

@ -1022,8 +1022,9 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts))
{
const uint32_t maxSfbCurr = (eightShorts ? (samplingRate < 37566 ? 14 : brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong);
const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts));
const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) ? 4u : 3u)) >> 2,
(eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || samplingRate < 37566 ? 0 : 1));
(eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0 : 1));
#ifndef NO_DTX_MODE
const bool prvEightShorts = (coreConfig.icsInfoPrev[ch].windowSequence == EIGHT_SHORT);