smart padding for better gapless playback

Taken smart padding code using LPC extrapolation from vorbis/opus.
Padding is done on both beginning and ending, but enc_delay and padding
remains the same (we discard extra padding frame introduced on our side
after encoding).
This commit is contained in:
nu774 2013-10-29 17:34:28 +09:00
parent d11b044131
commit 4d48b091d4
11 changed files with 505 additions and 55 deletions

View File

@ -98,6 +98,8 @@ copy ..\fdk-aac\libSYS\include\machine_type.h include\fdk-aac\ </Command>
<ClCompile Include="..\src\aacenc.c" />
<ClCompile Include="..\src\caf_reader.c" />
<ClCompile Include="..\src\compat_win32.c" />
<ClCompile Include="..\src\extrapolater.c" />
<ClCompile Include="..\src\lpc.c" />
<ClCompile Include="..\src\lpcm.c" />
<ClCompile Include="..\src\m4af.c" />
<ClCompile Include="..\src\main.c" />
@ -114,6 +116,7 @@ copy ..\fdk-aac\libSYS\include\machine_type.h include\fdk-aac\ </Command>
<ClInclude Include="..\src\caf_reader.h" />
<ClInclude Include="..\src\catypes.h" />
<ClInclude Include="..\src\compat.h" />
<ClInclude Include="..\src\lpc.h" />
<ClInclude Include="..\src\lpcm.h" />
<ClInclude Include="..\src\m4af.h" />
<ClInclude Include="..\src\m4af_endian.h" />

View File

@ -24,6 +24,12 @@
<ClCompile Include="..\src\compat_win32.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\extrapolater.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\lpc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\lpcm.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -62,6 +68,9 @@
<ClInclude Include="..\src\compat.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\lpc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\lpcm.h">
<Filter>Header Files</Filter>
</ClInclude>

View File

@ -6,6 +6,8 @@ bin_PROGRAMS = fdkaac
fdkaac_SOURCES = \
src/aacenc.c \
src/caf_reader.c \
src/extrapolater.c \
src/lpc.c \
src/lpcm.c \
src/m4af.c \
src/main.c \

View File

@ -231,5 +231,5 @@ int aac_encode_frame(HANDLE_AACENCODER encoder,
return -1;
}
*olen = oargs.numOutBytes;
return oargs.numInSamples;
return oargs.numInSamples / format->channels_per_frame;
}

204
src/extrapolater.c Normal file
View File

@ -0,0 +1,204 @@
#if HAVE_CONFIG_H
# include "config.h"
#endif
#if HAVE_STDINT_H
# include <stdint.h>
#endif
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "pcm_reader.h"
#include "lpc.h"
typedef int16_t sample_t;
typedef struct buffer_t {
sample_t *data;
unsigned count; /* count in frames */
unsigned capacity; /* size in bytes */
} buffer_t;
typedef struct extrapolater_t {
pcm_reader_vtbl_t *vtbl;
pcm_reader_t *src;
pcm_sample_description_t format;
buffer_t buffer[2];
unsigned nbuffer;
int (*process)(struct extrapolater_t *, void *, unsigned);
} extrapolater_t;
#define LPC_ORDER 32
static inline pcm_reader_t *get_source(pcm_reader_t *reader)
{
return ((extrapolater_t *)reader)->src;
}
static const
pcm_sample_description_t *get_format(pcm_reader_t *reader)
{
return pcm_get_format(get_source(reader));
}
static int64_t get_length(pcm_reader_t *reader)
{
return pcm_get_length(get_source(reader));
}
static int64_t get_position(pcm_reader_t *reader)
{
return pcm_get_position(get_source(reader));
}
static int realloc_buffer(buffer_t *bp, size_t size)
{
if (bp->capacity < size) {
void *p = realloc(bp->data, size);
if (!p) return -1;
bp->data = p;
bp->capacity = size;
}
return 0;
}
static void reverse_buffer(sample_t *data, unsigned nframes, unsigned nchannels)
{
unsigned i = 0, j = nchannels * (nframes - 1), n;
for (; i < j; i += nchannels, j -= nchannels) {
for (n = 0; n < nchannels; ++n) {
sample_t tmp = data[i + n];
data[i + n] = data[j + n];
data[j + n] = tmp;
}
}
}
static int fetch(extrapolater_t *self, unsigned nframes)
{
const pcm_sample_description_t *sfmt = pcm_get_format(self->src);
buffer_t *bp = &self->buffer[self->nbuffer];
int rc = 0;
if (realloc_buffer(bp, nframes * sfmt->bytes_per_frame) == 0) {
rc = pcm_read_frames(self->src, bp->data, nframes);
bp->count = rc > 0 ? rc : 0;
}
if (rc > 0)
self->nbuffer ^= 1;
return bp->count;
}
static int extrapolate(extrapolater_t *self, const buffer_t *bp,
void *dst, unsigned nframes)
{
const pcm_sample_description_t *sfmt = pcm_get_format(self->src);
unsigned i, n = sfmt->channels_per_frame;
float lpc[LPC_ORDER];
for (i = 0; i < n; ++i) {
vorbis_lpc_from_data(bp->data + i, lpc, bp->count, LPC_ORDER, n);
vorbis_lpc_predict(lpc, &bp->data[i + n * (bp->count - LPC_ORDER)],
LPC_ORDER, (sample_t*)dst + i, nframes, n);
}
return nframes;
}
static int process1(extrapolater_t *self, void *buffer, unsigned nframes);
static int process2(extrapolater_t *self, void *buffer, unsigned nframes);
static int process3(extrapolater_t *self, void *buffer, unsigned nframes);
static int process0(extrapolater_t *self, void *buffer, unsigned nframes)
{
const pcm_sample_description_t *sfmt = pcm_get_format(self->src);
unsigned nchannels = sfmt->channels_per_frame;
buffer_t *bp = &self->buffer[self->nbuffer];
if (fetch(self, nframes) < 2 * LPC_ORDER)
memset(buffer, 0, nframes * sfmt->bytes_per_frame);
else {
reverse_buffer(bp->data, bp->count, nchannels);
extrapolate(self, bp, buffer, nframes);
reverse_buffer(buffer, nframes, nchannels);
reverse_buffer(bp->data, bp->count, nchannels);
}
self->process = bp->count ? process1 : process2;
return nframes;
}
static int process1(extrapolater_t *self, void *buffer, unsigned nframes)
{
const pcm_sample_description_t *sfmt = pcm_get_format(self->src);
buffer_t *bp = &self->buffer[self->nbuffer ^ 1];
assert(bp->count <= nframes);
memcpy(buffer, bp->data, bp->count * sfmt->bytes_per_frame);
if (!fetch(self, nframes))
self->process = process2;
return bp->count;
}
static int process2(extrapolater_t *self, void *buffer, unsigned nframes)
{
const pcm_sample_description_t *sfmt = pcm_get_format(self->src);
buffer_t *bp = &self->buffer[self->nbuffer];
buffer_t *bbp = &self->buffer[self->nbuffer ^ 1];
if (bp->count < 2 * LPC_ORDER) {
size_t total = bp->count + bbp->count;
if (bbp->count &&
realloc_buffer(bbp, total * sfmt->bytes_per_frame) == 0)
{
memcpy(bbp->data + bbp->count * sfmt->channels_per_frame,
bp->data, bp->count * sfmt->bytes_per_frame);
bbp->count = total;
bp->count = 0;
bp = bbp;
self->nbuffer ^= 1;
}
}
self->process = process3;
if (bp->count >= 2 * LPC_ORDER)
extrapolate(self, bp, buffer, nframes);
else
memset(buffer, 0, nframes * sfmt->bytes_per_frame);
return nframes;
}
static int process3(extrapolater_t *self, void *buffer, unsigned nframes)
{
return 0;
}
static int read_frames(pcm_reader_t *reader, void *buffer, unsigned nframes)
{
extrapolater_t *self = (extrapolater_t *)reader;
return self->process(self, buffer, nframes);
}
static void teardown(pcm_reader_t **reader)
{
extrapolater_t *self = (extrapolater_t *)*reader;
pcm_teardown(&self->src);
free(self->buffer[0].data);
free(self->buffer[1].data);
free(self);
*reader = 0;
}
static pcm_reader_vtbl_t my_vtable = {
get_format, get_length, get_position, read_frames, teardown
};
pcm_reader_t *extrapolater_open(pcm_reader_t *reader)
{
extrapolater_t *self = 0;
if ((self = calloc(1, sizeof(extrapolater_t))) == 0)
return 0;
self->src = reader;
self->vtbl = &my_vtable;
self->process = process0;
return (pcm_reader_t *)self;
}

169
src/lpc.c Normal file
View File

@ -0,0 +1,169 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: LPC low level routines
last mod: $Id: lpc.c 16227 2009-07-08 06:58:46Z xiphmont $
********************************************************************/
/* Some of these routines (autocorrelator, LPC coefficient estimator)
are derived from code written by Jutta Degener and Carsten Bormann;
thus we include their copyright below. The entirety of this file
is freely redistributable on the condition that both of these
copyright notices are preserved without modification. */
/* Preserved Copyright: *********************************************/
/* Copyright 1992, 1993, 1994 by Jutta Degener and Carsten Bormann,
Technische Universita"t Berlin
Any use of this software is permitted provided that this notice is not
removed and that neither the authors nor the Technische Universita"t
Berlin are deemed to have made any representations as to the
suitability of this software for any purpose nor are held responsible
for any defects of this software. THERE IS ABSOLUTELY NO WARRANTY FOR
THIS SOFTWARE.
As a matter of courtesy, the authors request to be informed about uses
this software has found, about bugs in this software, and about any
improvements that may be of general interest.
Berlin, 28.11.1994
Jutta Degener
Carsten Bormann
*********************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#if HAVE_STDINT_H
# include <stdint.h>
#endif
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "lpc.h"
#include "lpcm.h"
/* Autocorrelation LPC coeff generation algorithm invented by
N. Levinson in 1947, modified by J. Durbin in 1959. */
/* Input : n elements of time doamin data
Output: m lpc coefficients, excitation energy */
float vorbis_lpc_from_data(short *data,float *lpci,int n,int m,int stride){
double *aut=malloc(sizeof(*aut)*(m+1));
double *lpc=malloc(sizeof(*lpc)*(m));
double error;
double epsilon;
int i,j;
/* autocorrelation, p+1 lag coefficients */
j=m+1;
while(j--){
double d=0; /* double needed for accumulator depth */
for(i=j;i<n;i++)d+=(double)data[i*stride]*data[(i-j)*stride]/1073741824.0;
aut[j]=d;
}
/* Generate lpc coefficients from autocorr values */
/* set our noise floor to about -100dB */
error=aut[0] * (1. + 1e-10);
epsilon=1e-9*aut[0]+1e-10;
for(i=0;i<m;i++){
double r= -aut[i+1];
if(error<epsilon){
memset(lpc+i,0,(m-i)*sizeof(*lpc));
goto done;
}
/* Sum up this iteration's reflection coefficient; note that in
Vorbis we don't save it. If anyone wants to recycle this code
and needs reflection coefficients, save the results of 'r' from
each iteration. */
for(j=0;j<i;j++)r-=lpc[j]*aut[i-j];
r/=error;
/* Update LPC coefficients and total error */
lpc[i]=r;
for(j=0;j<i/2;j++){
double tmp=lpc[j];
lpc[j]+=r*lpc[i-1-j];
lpc[i-1-j]+=r*tmp;
}
if(i&1)lpc[j]+=lpc[j]*r;
error*=1.-r*r;
}
done:
/* slightly damp the filter */
{
double g = .99;
double damp = g;
for(j=0;j<m;j++){
lpc[j]*=damp;
damp*=g;
}
}
for(j=0;j<m;j++)lpci[j]=(float)lpc[j];
/* we need the error value to know how big an impulse to hit the
filter with later */
free(aut);
free(lpc);
return error;
}
void vorbis_lpc_predict(float *coeff,short *prime,int m,
short *data,long n,int stride){
/* in: coeff[0...m-1] LPC coefficients
prime[0...m-1] initial values (allocated size of n+m-1)
out: data[0...n-1] data samples */
long i,j,o,p;
float y;
float *work=malloc(sizeof(*work)*(m+n));
if(!prime)
for(i=0;i<m;i++)
work[i]=0.f;
else
for(i=0;i<m;i++)
work[i]=prime[i*stride]/32768.0f;
for(i=0;i<n;i++){
y=0;
o=i;
p=m;
for(j=0;j<m;j++)
y-=work[o++]*coeff[--p];
work[o]=y;
data[i*stride]=lrint(pcm_clip(y*32768.0,-32768.0,32767.0));
}
free(work);
}

27
src/lpc.h Normal file
View File

@ -0,0 +1,27 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: LPC low level routines
last mod: $Id: lpc.h 16037 2009-05-26 21:10:58Z xiphmont $
********************************************************************/
#ifndef _V_LPC_H_
#define _V_LPC_H_
/* simple linear scale LPC code */
extern float vorbis_lpc_from_data(short *data,float *lpc,int n,int m,int stride);
extern void vorbis_lpc_predict(float *coeff,short *prime,int m,
short *data,long n,int stride);
#endif

View File

@ -13,35 +13,6 @@
#include "lpcm.h"
#include "m4af_endian.h"
#if defined(_MSC_VER) && _MSC_VER < 1800
# ifdef _M_IX86
inline int lrint(double x)
{
int n;
_asm {
fld x
fistp n
}
return n;
}
# else
# include <emmintrin.h>
inline int lrint(double x)
{
return _mm_cvtsd_si32(_mm_load_sd(&x));
}
# endif
#endif
static
inline double pcm_clip(double n, double min_value, double max_value)
{
if (n < min_value)
return min_value;
else if (n > max_value)
return max_value;
return n;
}
static
inline float pcm_i2f(int32_t n)
{

View File

@ -31,6 +31,36 @@ typedef struct pcm_sample_description_t {
#define PCM_BYTES_PER_CHANNEL(desc) \
((desc)->bytes_per_frame / (desc)->channels_per_frame)
#if defined(_MSC_VER) && _MSC_VER < 1800
# ifdef _M_IX86
static inline int lrint(double x)
{
int n;
_asm {
fld x
fistp n
}
return n;
}
# else
# include <emmintrin.h>
static inline int lrint(double x)
{
return _mm_cvtsd_si32(_mm_load_sd(&x));
}
# endif
#endif
static
inline double pcm_clip(double n, double min_value, double max_value)
{
if (n < min_value)
return min_value;
else if (n > max_value)
return max_value;
return n;
}
int pcm_convert_to_native_sint16(const pcm_sample_description_t *format,
const void *input, uint32_t nframes,
int16_t *result);

View File

@ -489,51 +489,82 @@ int write_sample(FILE *ofp, m4af_ctx_t *m4af,
}
static
int encode(pcm_reader_t *reader, HANDLE_AACENCODER encoder,
uint32_t frame_length, FILE *ofp, m4af_ctx_t *m4af,
int show_progress)
int encode(aacenc_param_ex_t *params, pcm_reader_t *reader,
HANDLE_AACENCODER encoder, uint32_t frame_length,
m4af_ctx_t *m4af)
{
int16_t *ibuf = 0;
uint8_t *obuf = 0;
uint32_t olen;
uint32_t osize = 0;
struct buffer_t {
uint8_t *data;
uint32_t len, size;
};
int16_t *ibuf = 0, *ip;
struct buffer_t obuf[2] = {{ 0 }}, *obp;
unsigned flip = 0;
int nread = 1;
int consumed;
int rc = -1;
int frames_written = 0;
int remaining, consumed;
int frames_written = 0, encoded = 0;
aacenc_progress_t progress = { 0 };
const pcm_sample_description_t *fmt = pcm_get_format(reader);
ibuf = malloc(frame_length * fmt->bytes_per_frame);
aacenc_progress_init(&progress, pcm_get_length(reader), fmt->sample_rate);
do {
for (;;) {
/*
* Since we delay the write, we cannot just exit loop when interrupted.
* Instead, we regard it as EOF.
*/
if (g_interrupted)
nread = 0;
else if (nread) {
if (nread > 0) {
if ((nread = pcm_read_frames(reader, ibuf, frame_length)) < 0) {
fprintf(stderr, "ERROR: read failed\n");
goto END;
}
if (show_progress)
if (!params->silent)
aacenc_progress_update(&progress, pcm_get_position(reader),
fmt->sample_rate * 2);
}
if ((consumed = aac_encode_frame(encoder, fmt, ibuf, nread,
&obuf, &olen, &osize)) < 0)
goto END;
if (olen > 0) {
if (write_sample(ofp, m4af, obuf, olen, frame_length) < 0)
ip = ibuf;
remaining = nread;
do {
obp = &obuf[flip];
consumed = aac_encode_frame(encoder, fmt, ip, remaining,
&obp->data, &obp->len, &obp->size);
if (consumed < 0) goto END;
if (consumed == 0 && obp->len == 0) goto DONE;
if (obp->len == 0) break;
remaining -= consumed;
ip += consumed * fmt->channels_per_frame;
flip ^= 1;
/*
* As we pad 1 frame at beginning and ending by our extrapolator,
* we want to drop them.
* We delay output by 1 frame by double buffering, and discard
* second frame and final frame from the encoder.
* Since sbr_header is included in the first frame (in case of
* SBR), we cannot discard first frame. So we pick second instead.
*/
++encoded;
if (encoded == 1 || encoded == 3)
continue;
obp = &obuf[flip];
if (write_sample(params->output_fp, m4af, obp->data, obp->len,
frame_length) < 0)
goto END;
++frames_written;
}
} while (nread > 0 || olen > 0);
if (show_progress)
} while (remaining > 0);
}
DONE:
if (!params->silent)
aacenc_progress_finish(&progress, pcm_get_position(reader));
rc = frames_written;
END:
if (ibuf) free(ibuf);
if (obuf) free(obuf);
if (obuf[0].data) free(obuf[0].data);
if (obuf[1].data) free(obuf[1].data);
return rc;
}
@ -709,10 +740,13 @@ pcm_reader_t *open_input(aacenc_param_ex_t *params)
}
break;
default:
fprintf(stderr, "ERROR: unsupported input file\n");
goto END;
}
}
return pcm_open_sint16_converter(reader);
if ((reader = pcm_open_sint16_converter(reader)) != 0)
reader = extrapolater_open(reader);
return reader;
END:
return 0;
}
@ -794,8 +828,7 @@ int main(int argc, char **argv)
m4af_set_priming_mode(m4af, params.gapless_mode + 1);
m4af_begin_write(m4af);
}
frame_count = encode(reader, encoder, aacinfo.frameLength,
params.output_fp, m4af, !params.silent);
frame_count = encode(&params, reader, encoder, aacinfo.frameLength, m4af);
if (frame_count < 0)
goto END;
if (m4af) {

View File

@ -109,4 +109,6 @@ int apple_chan_chunk(pcm_io_context_t *io, uint32_t chunk_size,
pcm_reader_t *pcm_open_sint16_converter(pcm_reader_t *reader);
pcm_reader_t *extrapolater_open(pcm_reader_t *reader);
#endif