1
0
mirror of https://github.com/nu774/fdkaac.git synced 2025-06-05 23:29:14 +02:00

16 Commits

Author SHA1 Message Date
5d6ea58e92 bump version 2013-11-08 12:48:14 +09:00
71e4764062 fix to use libFDKAAC signaling mode 1
It turned out that mode 1 is explicit, backward compatible signaling.
We don't have to implement it on our side.
2013-11-08 12:23:52 +09:00
a26e3cf3e8 fix README 2013-11-05 15:45:08 +09:00
c6e18ebec1 rename README.md -> README 2013-11-04 22:53:28 +09:00
c1db26327a Update README -> README.md, generate groff manpage from README.md 2013-11-04 15:33:01 +09:00
1c1d1931cb update ChangeLog 2013-11-04 12:19:36 +09:00
2ecae04c89 update git2changelog to accept non-ascii output 2013-11-04 12:18:27 +09:00
30c77dfb6f add manpage 2013-11-04 12:15:09 +09:00
ed646ccf79 fix gcc warnings 2013-11-03 22:40:26 +09:00
8009f87dca Merge pull request #7 from rbrito/misc-fixes
gitignore: Add list of files to ignore.
2013-11-03 05:26:02 -08:00
d09acb7283 gitignore: Add list of files to ignore.
All of these files are generated by autotools or by the compilation and they
do not belong in a version control.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-11-03 10:45:51 -02:00
8ac221b853 update ChangeLog 2013-11-03 18:13:59 +09:00
2c116b15d5 bump version 2013-11-03 18:12:43 +09:00
e1adc17835 add --sbr-ratio to support AACENC_SBR_RATIO appeared on libFDK 3.4.12 2013-11-03 18:09:56 +09:00
3c0f152d39 support 7.1 channel mode added on FDK 3.4.12 2013-11-03 18:08:46 +09:00
5732f1f6c5 update ChangeLog 2013-10-30 19:05:13 +09:00
10 changed files with 1018 additions and 209 deletions

22
.gitignore vendored Normal file
View File

@ -0,0 +1,22 @@
*.o
*~
Makefile
Makefile.in
aclocal.m4
autom4te.cache
compile
config.guess
config.h
config.h.in
config.log
config.status
config.sub
configure
depcomp
install-sh
missing
missings/.deps/
src/.deps/
src/.dirstamp
stamp-h1
fdkaac

View File

@ -1,7 +1,83 @@
2013-10-25 nu774 <honeycomb77@gmail.com>
2013-11-04 nu774 <honeycomb77@gmail.com>
* update ChangeLog [HEAD]
* update git2changelog to accept non-ascii output
* add manpage
2013-11-03 nu774 <honeycomb77@gmail.com>
* fix gcc warnings [origin/master]
* Merge pull request #7 from rbrito/misc-fixes
2013-11-03 Rogério Brito <rbrito@ime.usp.br>
* gitignore: Add list of files to ignore.
2013-11-03 nu774 <honeycomb77@gmail.com>
* update ChangeLog
* bump version [v0.5.0]
* add --sbr-ratio to support AACENC_SBR_RATIO appeared on libFDK 3.4.12
* support 7.1 channel mode added on FDK 3.4.12
2013-10-30 nu774 <honeycomb77@gmail.com>
* update ChangeLog
* bump version [v0.4.2]
* use tell() to obtain data chunk offset
* rename aacenc_result_t -> aacenc_frame_t, simplify write_sample()
* prepend 1 sample zero padding in case of SBR and enc_delay is odd
* cleanup interface of aac_encode_frame()
* add some copyright notice
2013-10-29 nu774 <honeycomb77@gmail.com>
* smart padding for better gapless playback
* fix unused variable warning
* fix warning: cast size_t as sprintf() arg to int
* fix vcxproj
* fix pcm_seek() to inline
2013-10-27 nu774 <honeycomb77@gmail.com>
* bump version [v0.4.1]
* add --include-sbr-delay
* fix help message: show -I as shorthand for --ignorelength
* remove --sbr-signaling
2013-10-26 nu774 <honeycomb77@gmail.com>
* re-fix #ifdef cond for lrint()
* tag mapping: add recorded date and tempo, remove performer->artist
2013-10-25 nu774 <honeycomb77@gmail.com>
* fix MSVC12 build issue
* fix build issue on platform where fileno is a naive macro
* update ChangeLog
* bump version [v0.4.0]
* update README

View File

@ -18,6 +18,8 @@ fdkaac_SOURCES = \
src/progress.c \
src/wav_reader.c
dist_man_MANS = man/fdkaac.1
fdkaac_LDADD = \
@LIBICONV@ -lfdk-aac -lm

412
README
View File

@ -1,122 +1,332 @@
==========================================================================
fdkaac - command line frontend encoder for libfdk-aac
==========================================================================
% FDKAAC(1)
% nu774 <honeycomb77@gmail.com>
% November, 2013
Prerequisites
-------------
You need libfdk-aac.
On Posix environment, you will also need GNU gettext (for iconv.m4) and
GNU autoconf/automake.
NAME
====
How to build on Posix environment
---------------------------------
First, you need to build libfdk-aac and install on your system.
Once you have done it, the following will do the task.
(MinGW build can be done the same way, and doesn't require gettext/iconv)
fdkaac - command line frontend for libfdk-aac encoder  
$ autoreconf -i
$ ./configure && make && make install
SYNOPSIS
========
How to build on MSVC
--------------------
First you have to extract libfdk-aac source here, so that directory tree will
look like the following:
+- fdk-aac ---+-documentation
| +-libAACdec
| +-libAACenc
| :
+- m4
+- missings
+- MSVC
+- src
**fdkaac** [OPTIONS] [FILE]
MSVC solution for Visual Studio 2010 is under MSVC directory.
DESCRIPTION
===========
Available input format
----------------------
WAV, RF64, CAF, RAW, upto 32bit int / 64bit float format is supported.
Metadata in CAF info chunk can be read and copied to the resulting m4a.
This is especially useful and works well when you pipe from ffmpeg via CAF.
For example, you can copy tag from original "foo.flac" to "foo.m4a"
through the following pipeline:
**fdkaac** reads linear PCM audio in either WAV, raw PCM, or CAF format,
and encodes it into either M4A / AAC file.
$ ffmpeg -i foo.flac -f caf - | fdkaac -m3 - -o foo.m4a
If the input file is "-", data is read from stdin. Likewise, if the
output file is "-", data is written to stdout if one of streamable AAC
transport formats are selected by **-f**.
Since FDK AAC encoder is implemented based on fixed point integer,
encoder itself handles 16bit input only.
Therefore, when feeding non-integer input, be careful so that input doesn't
exceed 0dBFS to avoid hard clips.
You might also want to apply dither/noise shape beforehand when your input
has higher resolution.
When CAF input and M4A output is used, tags in CAF file are copied into
the resulting M4A.  
Note that fdkaac doesn't automatically resample for you
when input samplerate is not supported by AAC spec.
OPTIONS
=======
Tagging Options
---------------
Generic tagging options like --tag, --tag-from-file, --long-tag allows you
to set arbitrary tags.
Available tags and their fcc (four char code) for --tag and --tag-from-file
can be found at http://code.google.com/p/mp4v2/wiki/iTunesMetadata
-h, --help
: Show command help
For tags such as Artist where first char of fcc is copyright sign,
you can skip first char and just say like --tag="ART:Foo Bar" or
--tag-from-file=lyr:/path/to/your/lyrics.txt
-o \<FILE\>
: Output filename.
Currently, --tag-from-file just stores file contents into m4a without any
character encoding / line terminater conversion.
Therefore, only use UTF-8 (without BOM) when setting text tags by this option.
-p, --profile \<n\>
: Target profile (MPEG4 audio object type, AOT)
On the other hand, --tag / --long-tag (and other command line arguments) are
converted from locale character encoding to UTF-8 on Posix environment.
On Windows, command line arguments are always treated as Unicode.
2
: MPEG-4 AAC LC (default)
Tagging using JSON
------------------
With --tag-from-json, fdkaac can read JSON file and set tags from it.
By default, tags are assumed to be in the root object(dictionary) like this:
5
: MPEG-4 HE-AAC (SBR)
{
"title": "No Expectations",
"artist": "The Rolling Stones",
"album": "Beggars Banquet",
"track": 2
}
29
: MPEG-4 HE-AAC v2 (SBR+PS)
In this case, you can simply specify the filename like:
--tag-from-json=/path/to/json
23
: MPEG-4 AAC LD
If the object containing tags is placed somewhere else, you can optionally
specify the path of the object with dotted notation.
39
: MPEG-4 AAC ELD
{
"format" : {
"filename" : "Middle Curse.flac",
"nb_streams" : 1,
"format_name" : "flac",
"format_long_name" : "raw FLAC",
"start_time" : "N/A",
"duration" : "216.146667",
"size" : "11851007.000000",
"bit_rate" : "438628.000000",
"tags" : {
"ALBUM" : "Scary World Theory",
"ARTIST" : "Lali Puna",
"DATE" : "2001",
"DISCID" : "9208CC0A",
"TITLE" : "Middle Curse",
"TRACKTOTAL" : "10",
"track" : "2"
}
}
}
129
: MPEG-2 AAC LC
In this example, tags are placed under the object "format.tags".
("format" is a child of the root, and "tags" is a child of the "format").
In this case, you can say:
--tag-from-json=/path/to/json?format.tags
132
: MPEG-2 HE-AAC (SBR)
For your information, ffprobe of ffmpeg project (or avprobe of libav) can
output media information/metadata in json format like this.
156
: MPEG-2 HE-AAC v2 (SBR+PS)
-b, --bitrate \<n\>
: Target bitrate (for CBR)
-m, --bitrate-mode \<n\>
: Bitrate configuration mode. Available VBR quality value depends on
other parameters such as profile, sample rate, or number of
channels.
0
: CBR (default)
1-5
: VBR (higher value -\> higher bitrate)
-w, --bandwith \<n\>
: Frequency bandwith (lowpass cut-off frequency) in Hz. Available on
AAC LC only.
-a, --afterburner \<n\>
: Configure afterburner mode. When enabled, quality is increased at
the expense of additional computational workload.
0
: Off
1
: On (default)
-L, --lowdelay-sbr \<n\>
: Configure SBR activity on AAC ELD.
-1
: Use ELD SBR auto configuration
0
: Disable SBR on ELD (default)
1
: Enable SBR on ELD
-s, --sbr-ratio \<n\>
: Controls activation of downsampled SBR.
0
: Use lib default (default)
1
: Use downsampled SBR (default for ELD+SBR)
2
: Use dual-rate SBR (default for HE-AAC)
Dual-rate SBR is what is normally used for HE-AAC, where AAC is
encoded at half the sample rate of SBR, hence "dual rate". On the
other hand, downsampled SBR uses same sample rate for both of AAC
and SBR (single rate), therefore downsampled SBR typically consumes
more bitrate.
Downsampled SBR is newly introduced feature in FDK encoder library
version 3.4.12. When libfdk-aac in the system doesn't support this,
dual-rate SBR will be used. When available, dual-rate SBR is the
default for HE-AAC and downsampled SBR is the default for ELD+SBR.
Note that downsampled HE-AAC is not so common as dual-rate one. When
downsampled HE-AAC is selected, **fdkaac** is forced to choose
explicit hierarchical SBR signaling, which (at least) iTunes doesn't
accept.
-f, --transport-format \<n\>
: Transport format. Tagging and gapless playback is only available on
M4A. Streaming to stdout is only available on others.
0
: M4A (default)
1
: ADIF
2
: ADTS
6
: LATM MCP=1
7
: LATM MCP=0
10
: LOAS/LATM (LATM within LOAS)
-C, --adts-crc-check
: Add CRC protection on ADTS header.
-h, --header-period \<n\>
: StreamMuxConfig/PCE repetition period in the transport layer.
-G, --gapless-mode \<n\>
: Method to declare amount of encoder delay (and padding) in M4A
container. These values are mandatory for proper gapless playback on
player side.
0
: iTunSMPB (default)
1
: ISO standard (edts and sgpd)
2
: Both
--include-sbr-delay
: When specified, count SBR decoder delay in encoder delay.
This is not iTunes compatible and will lead to gapless playback
issue on LC only decoder, but this is the default behavior of FDK
library.
Whether counting SBR decoder delay in encoder delay or not result in
incompatibility in gapless playback. You should pick which one will
work for your favorite player.
However, it's better not to choose SBR at all if you want gapless
playback. LC doesn't have such issues.
-I, --ignorelength
: Ignore length field of data chunk in input WAV file.
-S, --silent
: Don't print progress messages.
--moov-before-mdat
: Place moov box before mdat box in M4A container. This option might
be important for some hardware players, that are known to refuse
moov box placed after mdat box.
-R, --raw
: Regard input as raw PCM.
--raw-channels \<n\>
: Specify number of channels of raw input (default: 2)
--raw-rate \<n\>
: Specify sample rate of raw input (default: 44100)
--raw-format \<spec\>
: Specify sample format of raw input (default: "S16L"). **Spec** is as
the following (case insensitive):
1st char -- type of sample
: **S** (igned) | **U** (nsigned) | **F** (loat)
2nd part (in digits)
: bits per channel
Last char -- endianness (can be omitted)
: **L** (ittle, default) | **B** (ig)
--title \<string\>
: Set title tag.
--artist \<string\>
: Set artist tag.
--album \<string\>
: Set album tag.
--genre \<string\>
: Set genre tag.
--date \<string\>
: Set date tag.
--composer \<string\>
: Set composer tag.
--grouping \<string\>
: Set grouping tag.
--comment \<string\>
: Set comment tag.
--album-artist \<string\>
: Set album artist tag.
--track \<number[/total]\>
: Set track tag, with or without number of total tracks.
--disk \<number[/total]\>
: Set disk tag, with or without number of total discs.
--tempo \<n\>
: Set tempo (BPM) tag.
--tag \<fcc\>:\<value\>
: Set iTunes predefined tag with explicit fourcc key and value. See
[https://code.google.com/p/mp4v2/wiki/iTunesMetadata](https://code.google.com/p/mp4v2/wiki/iTunesMetadata)
for known predefined keys. You can omit first char of **fcc** when
it is the copyright sign.
--tag-from-file \<fcc\>:\<filename\>
: Same as --tag, but set content of file as tag value.
--long-tag \<name\>:\<value\>
: Set arbitrary tag as iTunes custom metadata. Stored in
com.apple.iTunes field.
--tag-from-json \<filename[?dot\_notation]\>
: Read tags from JSON. By default, tags are assumed to be direct
children of the root object in JSON. Optionally you can specify
arbitrary dot notation to locate the object containing tags.
 
EXAMPLES
========
Encode WAV file into a M4A file. MPEG4 AAC LC, VBR quality 3:
fdkaac -m3 foo.wav
Encode WAV file into a M4A file. MPEG4 HE-AAC, bitrate 64kbps:
fdkaac -p5 -b64 foo.wav
Piping from **ffmpeg** (you need version supporting CAF output):
ffmpeg -i foo.flac -f caf - | fdkaac -b128 - -o foo.m4a
Import tags via json:
ffprobe -v 0 -of json -show_format foo.flac >foo.json
flac -dc foo.flac | \
fdkaac - -ox.m4a -m2 --import-tag-from-json=foo.json?format.tags
NOTES
=====
Upto 32bit integer or 64bit floating point format is supported as input.
However, FDK library is implemented based on fixed point math and only
supports 16bit integer PCM. Therefore, be wary of clipping. You might
want to dither/noise shape beforehand when your input has higher
resolution.
Following channel layouts are supported by the encoder.
1ch
: C
2ch
: L R
3ch
: C L R
4ch
: C L R Cs
5ch
: C L R Ls Rs
5.1ch
: C L R Ls Rs LFE
7.1ch (front)
: C Lc Rc L R Ls Rs LFE
7.1ch (rear)
: C L R Ls Rs Rls Rrs LFE
Note that not all tags can be read/written this way.

View File

@ -39,6 +39,6 @@ with Popen(GITLOG_CMD, shell=False, stdout=PIPE).stdout as pipe:
commits = parse_gitlog(pipe)
commits_by_date_author = groupby(commits, key=lambda x: (x.date, x.author))
for (date, author), commits in commits_by_date_author:
output('{0} {1}\n\n'.format(date, author))
output(u'{0} {1}\n\n'.format(date, author).encode('utf-8'))
for c in commits:
output(' * {0}{1}\n\n'.format(c.subject, c.ref))
output(u' * {0}{1}\n\n'.format(c.subject, c.ref).encode('utf-8'))

497
man/fdkaac.1 Normal file
View File

@ -0,0 +1,497 @@
.TH FDKAAC 1 "November, 2013"
.SH NAME
.PP
fdkaac - command line frontend for libfdk-aac encoder \
.SH SYNOPSIS
.PP
\f[B]fdkaac\f[] [OPTIONS] [FILE]
.SH DESCRIPTION
.PP
\f[B]fdkaac\f[] reads linear PCM audio in either WAV, raw PCM, or CAF
format, and encodes it into either M4A / AAC file.
.PP
If the input file is "-", data is read from stdin.
Likewise, if the output file is "-", data is written to stdout if one of
streamable AAC transport formats is selected by \f[B]-f\f[].
.PP
When CAF input and M4A output is used, tags in CAF file are copied into
the resulting M4A.
\
.SH OPTIONS
.TP
.B -h, --help
Show command help
.RS
.RE
.TP
.B -o <FILE>
Output filename.
.RS
.RE
.TP
.B -p, --profile <n>
Target profile (MPEG4 audio object type, AOT)
.RS
.TP
.B 2
MPEG-4 AAC LC (default)
.RS
.RE
.TP
.B 5
MPEG-4 HE-AAC (SBR)
.RS
.RE
.TP
.B 29
MPEG-4 HE-AAC v2 (SBR+PS)
.RS
.RE
.TP
.B 23
MPEG-4 AAC LD
.RS
.RE
.TP
.B 39
MPEG-4 AAC ELD
.RS
.RE
.TP
.B 129
MPEG-2 AAC LC
.RS
.RE
.TP
.B 132
MPEG-2 HE-AAC (SBR)
.RS
.RE
.TP
.B 156
MPEG-2 HE-AAC v2 (SBR+PS)
.RS
.RE
.RE
.TP
.B -b, --bitrate <n>
Target bitrate (for CBR)
.RS
.RE
.TP
.B -m, --bitrate-mode <n>
Bitrate configuration mode.
Available VBR quality value depends on other parameters such as profile,
sample rate, or number of channels.
.RS
.TP
.B 0
CBR (default)
.RS
.RE
.TP
.B 1-5
VBR (higher value -> higher bitrate)
.RS
.RE
.RE
.TP
.B -w, --bandwith <n>
Frequency bandwith (lowpass cut-off frequency) in Hz.
Available on AAC LC only.
.RS
.RE
.TP
.B -a, --afterburner <n>
Configure afterburner mode.
When enabled, quality is increased at the expense of additional
computational workload.
.RS
.TP
.B 0
Off
.RS
.RE
.TP
.B 1
On (default)
.RS
.RE
.RE
.TP
.B -L, --lowdelay-sbr <n>
Configure SBR activity on AAC ELD.
.RS
.TP
.B -1
Use ELD SBR auto configuration
.RS
.RE
.TP
.B 0
Disable SBR on ELD (default)
.RS
.RE
.TP
.B 1
Enable SBR on ELD
.RS
.RE
.RE
.TP
.B -s, --sbr-ratio <n>
Controls activation of downsampled SBR.
.RS
.TP
.B 0
Use lib default (default)
.RS
.RE
.TP
.B 1
Use downsampled SBR (default for ELD+SBR)
.RS
.RE
.TP
.B 2
Use dual-rate SBR (default for HE-AAC)
.RS
.RE
.PP
Dual-rate SBR is what is normally used for HE-AAC, where AAC is encoded
at half the sample rate of SBR, hence "dual rate".
On the other hand, downsampled SBR uses same sample rate for both of AAC
and SBR (single rate), therefore downsampled SBR typically consumes more
bitrate.
.PP
Downsampled SBR is newly introduced feature in FDK encoder library
version 3.4.12.
When libfdk-aac in the system doesn\[aq]t support this, dual-rate SBR
will be used.
When available, dual-rate SBR is the default for HE-AAC and downsampled
SBR is the default for ELD+SBR.
.PP
Note that downsampled HE-AAC is not so common as dual-rate one.
When downsampled HE-AAC is selected, \f[B]fdkaac\f[] is forced to choose
explicit hierarchical SBR signaling, which (at least) iTunes doesn\[aq]t
accept.
.RE
.TP
.B -f, --transport-format <n>
Transport format.
Tagging and gapless playback is only available on M4A.
Streaming to stdout is only available on others.
.RS
.TP
.B 0
M4A (default)
.RS
.RE
.TP
.B 1
ADIF
.RS
.RE
.TP
.B 2
ADTS
.RS
.RE
.TP
.B 6
LATM MCP=1
.RS
.RE
.TP
.B 7
LATM MCP=0
.RS
.RE
.TP
.B 10
LOAS/LATM (LATM within LOAS)
.RS
.RE
.RE
.TP
.B -C, --adts-crc-check
Add CRC protection on ADTS header.
.RS
.RE
.TP
.B -h, --header-period <n>
StreamMuxConfig/PCE repetition period in transport layer.
.RS
.RE
.TP
.B -G, --gapless-mode <n>
Method to declare amount of encoder delay (and padding) in M4A
container.
These values are mandatory for proper gapless playback on player side.
.RS
.TP
.B 0
iTunSMPB (default)
.RS
.RE
.TP
.B 1
ISO standard (edts and sgpd)
.RS
.RE
.TP
.B 2
Both
.RS
.RE
.RE
.TP
.B --include-sbr-delay
When specified, count SBR decoder delay in encoder delay.
.RS
.PP
This is not iTunes compatible and will lead to gapless playback issue on
LC only decoder, but this is the default behavior of FDK library.
.PP
Whether counting SBR decoder delay in encoder delay or not result in
incompatibility in gapless playback.
You should pick which one will work for your favorite player.
.PP
However, it\[aq]s better not to choose SBR at all if you want gapless
playback.
LC doesn\[aq]t have such issues.
.RE
.TP
.B -I, --ignorelength
Ignore length field of data chunk in input WAV file.
.RS
.RE
.TP
.B -S, --silent
Don\[aq]t print progress messages.
.RS
.RE
.TP
.B --moov-before-mdat
Place moov box before mdat box in M4A container.
This option might be important for some hardware players, that are known
to refuse moov box placed after mdat box.
.RS
.RE
.TP
.B -R, --raw
Regard input as raw PCM.
.RS
.RE
.TP
.B --raw-channels <n>
Specify number of channels of raw input (default: 2)
.RS
.RE
.TP
.B --raw-rate <n>
Specify sample rate of raw input (default: 44100)
.RS
.RE
.TP
.B --raw-format <spec>
Specify sample format of raw input (default: "S16L").
\f[B]Spec\f[] is as the following (case insensitive):
.RS
.TP
.B 1st char -- type of sample
\f[B]S\f[] (igned) | \f[B]U\f[] (nsigned) | \f[B]F\f[] (loat)
.RS
.RE
.TP
.B 2nd part (in digits)
bits per channel
.RS
.RE
.TP
.B Last char -- endianness (can be ommited)
\f[B]L\f[] (ittle, default) | \f[B]B\f[] (ig)
.RS
.RE
.RE
.TP
.B --title <string>
Set title tag.
.RS
.RE
.TP
.B --artist <string>
Set artist tag.
.RS
.RE
.TP
.B --album <string>
Set album tag.
.RS
.RE
.TP
.B --genre <string>
Set genre tag.
.RS
.RE
.TP
.B --date <string>
Set date tag.
.RS
.RE
.TP
.B --composer <string>
Set composer tag.
.RS
.RE
.TP
.B --grouping <string>
Set grouping tag.
.RS
.RE
.TP
.B --comment <string>
Set comment tag.
.RS
.RE
.TP
.B --album-artist <string>
Set album artist tag.
.RS
.RE
.TP
.B --track <number[/total]>
Set track tag, with or without number of total tracks.
.RS
.RE
.TP
.B --disk <number[/total]>
Set disk tag, with or without number of total discs.
.RS
.RE
.TP
.B --tempo <n>
Set tempo (BPM) tag.
.RS
.RE
.TP
.B --tag <fcc>:<value>
Set iTunes predefined tag with explicit fourcc key and value.
See
https://code.google.com/p/mp4v2/wiki/iTunesMetadata (https://code.google.com/p/mp4v2/wiki/iTunesMetadata)
for known predefined keys.
You can omit first char of \f[B]fcc\f[] when it is the copyright sign.
.RS
.RE
.TP
.B --tag-from-file <fcc>:<filename>
Same as --tag, but set content of file as tag value.
.RS
.RE
.TP
.B --long-tag <name>:<value>
Set arbitrary tag as iTunes custom metadata.
Stored in com.apple.iTunes field.
.RS
.RE
.TP
.B --tag-from-json <filename[?dot_notation]>
Read tags from JSON.
By default, tags are assumed to be direct children of the root object in
JSON.
Optionary you can speficy arbitrary dot notation to locate the object
containing tags.
.RS
.RE
.PP
\
.SH EXAMPLES
.PP
Encode WAV file into a M4A file.
MPEG4 AAC LC, VBR quality 3:
.IP
.nf
\f[C]
fdkaac\ -m3\ foo.wav
\f[]
.fi
.PP
Encode WAV file into a M4A file.
MPEG4 HE-AAC, bitrate 64kbps:
.IP
.nf
\f[C]
fdkaac\ -p5\ -b64\ foo.wav
\f[]
.fi
.PP
Piping from \f[B]ffmpeg\f[] (you need version supporting CAF output):
.IP
.nf
\f[C]
ffmpeg\ -i\ foo.flac\ -f\ caf\ -\ |\ fdkaac\ -b128\ -\ -o\ foo.m4a
\f[]
.fi
.PP
Import tags via json:
.IP
.nf
\f[C]
ffprobe\ -v\ 0\ -of\ json\ -show_format\ foo.flac\ >foo.json
flac\ -dc\ foo.flac\ |\ \\
fdkaac\ -\ -ox.m4a\ -m2\ --import-tag-from-json=foo.json?format.tags
\f[]
.fi
.SH NOTES
.PP
Upto 32bit integer or 64bit floating point format is supported as input.
However, FDK library is implemented based on fixed point math and only
supports 16bit integer PCM.
Therefore, be careful of clipping.
You might want to dither/noise shape beforehand when your input has
higher resolution.
.PP
Following channel layouts are supported by the encoder.
.TP
.B 1ch
C
.RS
.RE
.TP
.B 2ch
L R
.RS
.RE
.TP
.B 3ch
C L R
.RS
.RE
.TP
.B 4ch
C L R Cs
.RS
.RE
.TP
.B 5ch
C L R Ls Rs
.RS
.RE
.TP
.B 5.1ch
C L R Ls Rs LFE
.RS
.RE
.TP
.B 7.1ch (front)
C Lc Rc L R Ls Rs LFE
.RS
.RE
.TP
.B 7.1ch (rear)
C L R Ls Rs Rls Rrs LFE
.RS
.RE
.SH AUTHORS
nu774 <honeycomb77@gmail.com>.

View File

@ -13,6 +13,17 @@
#include <string.h>
#include "aacenc.h"
int aacenc_is_sbr_ratio_available()
{
#if AACENCODER_LIB_VL0 < 3 || (AACENCODER_LIB_VL0==3 && AACENCODER_LIB_VL1<4)
return 0;
#else
LIB_INFO lib_info;
aacenc_get_lib_info(&lib_info);
return lib_info.version > 0x03040000;
#endif
}
int aacenc_is_sbr_active(const aacenc_param_t *params)
{
switch (params->profile) {
@ -26,66 +37,31 @@ int aacenc_is_sbr_active(const aacenc_param_t *params)
return 0;
}
static const unsigned aacenc_sampling_freq_tab[] = {
96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
16000, 12000, 11025, 8000, 7350, 0, 0, 0
};
static
unsigned sampling_freq_index(unsigned rate)
int aacenc_is_dual_rate_sbr(const aacenc_param_t *params)
{
unsigned i;
for (i = 0; aacenc_sampling_freq_tab[i]; ++i)
if (aacenc_sampling_freq_tab[i] == rate)
return i;
return 0xf;
if (params->profile == AOT_PS || params->profile == AOT_MP2_PS)
return 1;
else if (params->profile == AOT_SBR || params->profile == AOT_MP2_SBR)
return params->sbr_ratio == 0 || params->sbr_ratio == 2;
else if (params->profile == AOT_ER_AAC_ELD && params->lowdelay_sbr)
return params->sbr_ratio == 2;
return 0;
}
/*
* Append backward compatible SBR/PS signaling to implicit signaling ASC,
* if SBR/PS is present.
*/
int aacenc_mp4asc(const aacenc_param_t *params,
const uint8_t *asc, uint32_t ascsize,
uint8_t *outasc, uint32_t *outsize)
void aacenc_get_lib_info(LIB_INFO *info)
{
unsigned asc_sfreq = aacenc_sampling_freq_tab[(asc[0]&0x7)<<1 |asc[1]>>7];
switch (params->profile) {
case AOT_SBR:
case AOT_PS:
if (*outsize < ascsize + 3)
return -1;
memcpy(outasc, asc, ascsize);
/* syncExtensionType:11 (value:0x2b7) */
outasc[ascsize+0] = 0x2b << 1;
outasc[ascsize+1] = 0x7 << 5;
/* extensionAudioObjectType:5 (value:5)*/
outasc[ascsize+1] |= 5;
/* sbrPresentFlag:1 (value:1) */
outasc[ascsize+2] = 0x80;
/* extensionSamplingFrequencyIndex:4 */
outasc[ascsize+2] |= sampling_freq_index(asc_sfreq << 1) << 3;
if (params->profile == AOT_SBR) {
*outsize = ascsize + 3;
break;
LIB_INFO *lib_info = 0;
lib_info = calloc(FDK_MODULE_LAST, sizeof(LIB_INFO));
if (aacEncGetLibInfo(lib_info) == AACENC_OK) {
int i;
for (i = 0; i < FDK_MODULE_LAST; ++i) {
if (lib_info[i].module_id == FDK_AACENC) {
memcpy(info, &lib_info[i], sizeof(LIB_INFO));
break;
}
}
if (*outsize < ascsize + 5)
return -1;
/* syncExtensionType:11 (value:0x548) */
outasc[ascsize+2] |= 0x5;
outasc[ascsize+3] = 0x48;
/* psPresentFlag:1 (value:1) */
outasc[ascsize+4] = 0x80;
*outsize = ascsize + 5;
break;
default:
if (*outsize < ascsize)
return -1;
memcpy(outasc, asc, ascsize);
*outsize = ascsize;
}
return 0;
free(lib_info);
}
static
@ -93,10 +69,10 @@ int aacenc_channel_mode(const pcm_sample_description_t *format)
{
uint32_t chanmask = format->channel_mask;
if (format->channels_per_frame > 6)
if (format->channels_per_frame > 8)
return 0;
if (!chanmask) {
static uint32_t defaults[] = { 0x4, 0x3, 0x7, 0, 0x37, 0x3f };
static uint32_t defaults[] = { 0x4, 0x3, 0x7, 0, 0x37, 0x3f, 0, 0x63f };
chanmask = defaults[format->channels_per_frame - 1];
}
switch (chanmask) {
@ -108,6 +84,10 @@ int aacenc_channel_mode(const pcm_sample_description_t *format)
case 0x107: return MODE_1_2_1;
case 0x607: return MODE_1_2_2;
case 0x60f: return MODE_1_2_2_1;
#if AACENCODER_LIB_VL0 > 3 || (AACENCODER_LIB_VL0==3 && AACENCODER_LIB_VL1>=4)
case 0xff: return MODE_1_2_2_2_1;
case 0x63f: return MODE_7_1_REAR_SURROUND;
#endif
}
return 0;
}
@ -118,8 +98,11 @@ int aacenc_init(HANDLE_AACENCODER *encoder, const aacenc_param_t *params,
{
int channel_mode;
int aot;
LIB_INFO lib_info;
*encoder = 0;
aacenc_get_lib_info(&lib_info);
if ((channel_mode = aacenc_channel_mode(format)) == 0) {
fprintf(stderr, "ERROR: unsupported channel layout\n");
goto FAIL;
@ -145,13 +128,21 @@ int aacenc_init(HANDLE_AACENCODER *encoder, const aacenc_param_t *params,
fprintf(stderr, "ERROR: unsupported sample rate\n");
goto FAIL;
}
aacEncoder_SetParam(*encoder, AACENC_CHANNELMODE, channel_mode);
if (aacEncoder_SetParam(*encoder, AACENC_CHANNELMODE,
channel_mode) != AACENC_OK) {
fprintf(stderr, "ERROR: unsupported channel mode\n");
goto FAIL;
}
aacEncoder_SetParam(*encoder, AACENC_BANDWIDTH, params->bandwidth);
aacEncoder_SetParam(*encoder, AACENC_CHANNELORDER, 1);
aacEncoder_SetParam(*encoder, AACENC_AFTERBURNER, !!params->afterburner);
if (aot == AOT_ER_AAC_ELD && params->lowdelay_sbr)
aacEncoder_SetParam(*encoder, AACENC_SBR_MODE, 1);
aacEncoder_SetParam(*encoder, AACENC_SBR_MODE, params->lowdelay_sbr);
#if AACENCODER_LIB_VL0 > 3 || (AACENCODER_LIB_VL0==3 && AACENCODER_LIB_VL1>=4)
if (lib_info.version > 0x03040000)
aacEncoder_SetParam(*encoder, AACENC_SBR_RATIO, params->sbr_ratio);
#endif
if (aacEncoder_SetParam(*encoder, AACENC_TRANSMUX,
params->transport_format) != AACENC_OK) {

View File

@ -15,6 +15,7 @@
unsigned bandwidth; \
unsigned afterburner; \
unsigned lowdelay_sbr; \
unsigned sbr_ratio; \
unsigned sbr_signaling; \
unsigned transport_format; \
unsigned adts_crc_check; \
@ -29,11 +30,13 @@ typedef struct aacenc_frame_t {
uint32_t size, capacity;
} aacenc_frame_t;
int aacenc_is_sbr_ratio_available();
int aacenc_is_sbr_active(const aacenc_param_t *params);
int aacenc_mp4asc(const aacenc_param_t *params,
const uint8_t *asc, uint32_t ascsize,
uint8_t *outasc, uint32_t *outsize);
int aacenc_is_dual_rate_sbr(const aacenc_param_t *params);
void aacenc_get_lib_info(LIB_INFO *info);
int aacenc_init(HANDLE_AACENCODER *encoder, const aacenc_param_t *params,
const pcm_sample_description_t *format,

View File

@ -54,7 +54,8 @@ static void handle_signals(void)
{
int i, sigs[] = { SIGINT, SIGHUP, SIGTERM };
for (i = 0; i < sizeof(sigs)/sizeof(sigs[0]); ++i) {
struct sigaction sa = { 0 };
struct sigaction sa;
memset(&sa, 0, sizeof sa);
sa.sa_handler = signal_handler;
sa.sa_flags |= SA_RESTART;
sigaction(sigs[i], &sa, 0);
@ -132,7 +133,14 @@ PROGNAME " %s\n"
" -a, --afterburner <n> Afterburner\n"
" 0: Off\n"
" 1: On(default)\n"
" -L, --lowdelay-sbr Enable ELD-SBR (AAC ELD only)\n"
" -L, --lowdelay-sbr <-1|0|1> Configure SBR activity on AAC ELD\n"
" -1: Use ELD SBR auto configurator\n"
" 0: Disable SBR on ELD (default)\n"
" 1: Enable SBR on ELD\n"
" -s, --sbr-ratio <0|1|2> Controls activation of downsampled SBR\n"
" 0: Use lib default (default)\n"
" 1: downsampled SBR (default for ELD+SBR)\n"
" 2: dual-rate SBR (default for HE-AAC)\n"
" -f, --transport-format <n> Transport format\n"
" 0: RAW (default, muxed into M4A)\n"
" 1: ADIF\n"
@ -228,7 +236,7 @@ static
int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
{
int ch;
unsigned n;
int n;
#define OPT_INCLUDE_SBR_DELAY M4AF_FOURCC('s','d','l','y')
#define OPT_MOOV_BEFORE_MDAT M4AF_FOURCC('m','o','o','v')
@ -247,7 +255,8 @@ int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
{ "bitrate-mode", required_argument, 0, 'm' },
{ "bandwidth", required_argument, 0, 'w' },
{ "afterburner", required_argument, 0, 'a' },
{ "lowdelay-sbr", no_argument, 0, 'L' },
{ "lowdelay-sbr", required_argument, 0, 'L' },
{ "sbr-ratio", required_argument, 0, 's' },
{ "transport-format", required_argument, 0, 'f' },
{ "adts-crc-check", no_argument, 0, 'C' },
{ "header-period", required_argument, 0, 'P' },
@ -325,7 +334,18 @@ int parse_options(int argc, char **argv, aacenc_param_ex_t *params)
params->afterburner = n;
break;
case 'L':
params->lowdelay_sbr = 1;
if (sscanf(optarg, "%d", &n) != 1 || n < -1 || n > 1) {
fprintf(stderr, "invalid arg for lowdelay-sbr\n");
return -1;
}
params->lowdelay_sbr = n;
break;
case 's':
if (sscanf(optarg, "%u", &n) != 1 || n > 2) {
fprintf(stderr, "invalid arg for sbr-ratio\n");
return -1;
}
params->sbr_ratio = n;
break;
case 'f':
if (sscanf(optarg, "%u", &n) != 1) {
@ -567,19 +587,11 @@ void put_tool_tag(m4af_ctx_t *m4af, const aacenc_param_ex_t *params,
{
char tool_info[256];
char *p = tool_info;
LIB_INFO *lib_info = 0;
LIB_INFO lib_info;
p += sprintf(p, PROGNAME " %s, ", fdkaac_version);
lib_info = calloc(FDK_MODULE_LAST, sizeof(LIB_INFO));
if (aacEncGetLibInfo(lib_info) == AACENC_OK) {
int i;
for (i = 0; i < FDK_MODULE_LAST; ++i)
if (lib_info[i].module_id == FDK_AACENC)
break;
p += sprintf(p, "libfdk-aac %s, ", lib_info[i].versionStr);
}
free(lib_info);
aacenc_get_lib_info(&lib_info);
p += sprintf(p, "libfdk-aac %s, ", lib_info.versionStr);
if (params->bitrate_mode)
sprintf(p, "VBR mode %d", params->bitrate_mode);
else
@ -758,9 +770,9 @@ int main(int argc, char **argv)
AACENC_InfoStruct aacinfo = { 0 };
m4af_ctx_t *m4af = 0;
const pcm_sample_description_t *sample_format;
int downsampled_timescale = 0;
int frame_count = 0;
int sbr_mode = 0;
unsigned scale_shift = 0;
setlocale(LC_CTYPE, "");
setbuf(stderr, 0);
@ -773,16 +785,18 @@ int main(int argc, char **argv)
sample_format = pcm_get_format(reader);
/*
* We use explicit/hierarchical signaling for LOAS.
* Other than that, we request implicit signaling to FDK library, then
* append explicit/backward-compatible signaling to ASC in case of MP4FF.
*
* Explicit/backward-compatible signaling of SBR is the most recommended
* way in MPEG4 part3 spec, and seems the only way supported by iTunes.
* Since FDK library does not support it, we have to do it on our side.
*/
params.sbr_signaling = (params.transport_format == TT_MP4_LOAS) ? 2 : 0;
sbr_mode = aacenc_is_sbr_active((aacenc_param_t*)&params);
if (sbr_mode && !aacenc_is_sbr_ratio_available()) {
fprintf(stderr, "WARNING: Only dual-rate SBR is available "
"for this version\n");
params.sbr_ratio = 2;
}
scale_shift = aacenc_is_dual_rate_sbr((aacenc_param_t*)&params);
params.sbr_signaling =
(params.transport_format == TT_MP4_LOAS) ? 2 :
(params.transport_format == TT_MP4_RAW) ? 1 : 0;
if (sbr_mode && !scale_shift)
params.sbr_signaling = 2;
if (aacenc_init(&encoder, (aacenc_param_t*)&params, sample_format,
&aacinfo) < 0)
@ -800,28 +814,23 @@ int main(int argc, char **argv)
goto END;
}
handle_signals();
sbr_mode = aacenc_is_sbr_active((aacenc_param_t*)&params);
if (!params.transport_format) {
uint32_t scale;
uint8_t mp4asc[32];
uint32_t ascsize = sizeof(mp4asc);
unsigned framelen = aacinfo.frameLength;
if (sbr_mode)
downsampled_timescale = 1;
scale = sample_format->sample_rate >> downsampled_timescale;
scale = sample_format->sample_rate >> scale_shift;
if ((m4af = m4af_create(M4AF_CODEC_MP4A, scale, &m4af_io,
params.output_fp)) < 0)
goto END;
aacenc_mp4asc((aacenc_param_t*)&params, aacinfo.confBuf,
aacinfo.confSize, mp4asc, &ascsize);
m4af_set_decoder_specific_info(m4af, 0, mp4asc, ascsize);
m4af_set_decoder_specific_info(m4af, 0,
aacinfo.confBuf, aacinfo.confSize);
m4af_set_fixed_frame_duration(m4af, 0,
framelen >> downsampled_timescale);
framelen >> scale_shift);
m4af_set_vbr_mode(m4af, 0, params.bitrate_mode);
m4af_set_priming_mode(m4af, params.gapless_mode + 1);
m4af_begin_write(m4af);
}
if (sbr_mode && (aacinfo.encoderDelay & 1)) {
if (scale_shift && (aacinfo.encoderDelay & 1)) {
/*
* Since odd delay cannot be exactly expressed in downsampled scale,
* we push one zero frame to the encoder here, to make delay even
@ -841,12 +850,11 @@ int main(int argc, char **argv)
if (sbr_mode && params.profile != AOT_ER_AAC_ELD &&
!params.include_sbr_delay)
delay -= 481 << 1;
if (sbr_mode && (delay & 1))
delay -= 481 << scale_shift;
if (scale_shift && (delay & 1))
++delay;
padding = frame_count * aacinfo.frameLength - frames_read - delay;
m4af_set_priming(m4af, 0, delay >> downsampled_timescale,
padding >> downsampled_timescale);
m4af_set_priming(m4af, 0, delay >> scale_shift, padding >> scale_shift);
if (finalize_m4a(m4af, &params, encoder) < 0)
goto END;
}

View File

@ -1,4 +1,4 @@
#ifndef VERSION_H
#define VERSION_H
const char *fdkaac_version = "0.4.2";
const char *fdkaac_version = "0.5.1";
#endif