/****************************************************************************** * * Copyright (C) 2020 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ #include #include "aacenc_lib.h" #include "src/aacenc.h" using namespace std; // IN_AUDIO_DATA, IN_ANCILLRY_DATA and IN_METADATA_SETUP constexpr size_t kMaxBuffers = 3; constexpr size_t kMaxOutputBufferSize = 8192; constexpr uint32_t kMinBitRate = 8000; constexpr uint32_t kMaxBitRate = 960000; constexpr int32_t kSampleRates[] = {8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000}; constexpr size_t kSampleRatesSize = size(kSampleRates); constexpr CHANNEL_MODE kChannelModes[] = {MODE_1, MODE_2, MODE_1_2, MODE_1_2_1, MODE_1_2_2, MODE_1_2_2_1, MODE_1_2_2_2_1, MODE_6_1, MODE_7_1_BACK, MODE_7_1_TOP_FRONT, MODE_7_1_REAR_SURROUND, MODE_7_1_FRONT_CENTER, MODE_212}; constexpr size_t kChannelModesSize = size(kChannelModes); constexpr TRANSPORT_TYPE kIdentifiers[] = { TT_MP4_RAW, TT_MP4_ADIF, TT_MP4_ADTS, TT_MP4_LATM_MCP1, TT_MP4_LATM_MCP0, TT_MP4_LOAS, TT_DRM}; constexpr size_t kIdentifiersSize = size(kIdentifiers); constexpr AUDIO_OBJECT_TYPE kAudioObjectTypes[] = {AOT_NONE, AOT_NULL_OBJECT, AOT_AAC_MAIN, AOT_AAC_LC, AOT_AAC_SSR, AOT_AAC_LTP, AOT_SBR, AOT_AAC_SCAL, AOT_TWIN_VQ, AOT_CELP, AOT_HVXC, AOT_RSVD_10, AOT_RSVD_11, AOT_TTSI, AOT_MAIN_SYNTH, AOT_WAV_TAB_SYNTH, AOT_GEN_MIDI, AOT_ALG_SYNTH_AUD_FX, AOT_ER_AAC_LC, AOT_RSVD_18, AOT_ER_AAC_LTP, AOT_ER_AAC_SCAL, AOT_ER_TWIN_VQ, AOT_ER_BSAC, AOT_ER_AAC_LD, AOT_ER_CELP, AOT_ER_HVXC, AOT_ER_HILN, AOT_ER_PARA, AOT_RSVD_28, AOT_PS, AOT_MPEGS, AOT_ESCAPE, AOT_MP3ONMP4_L1, AOT_MP3ONMP4_L2, AOT_MP3ONMP4_L3, AOT_RSVD_35, AOT_RSVD_36, AOT_AAC_SLS, AOT_SLS, AOT_ER_AAC_ELD, AOT_USAC, AOT_SAOC, AOT_LD_MPEGS, AOT_MP2_AAC_LC, AOT_MP2_SBR, AOT_DRM_AAC, AOT_DRM_SBR, AOT_DRM_MPEG_PS, AOT_DRM_SURROUND, AOT_DRM_USAC}; constexpr size_t kAudioObjectTypesSize = size(kAudioObjectTypes); constexpr int32_t kSbrRatios[] = {-1, 0, 1, 2}; constexpr size_t kSbrRatiosSize = size(kSbrRatios); constexpr int32_t kBitRateModes[] = { AACENC_BR_MODE_INVALID, AACENC_BR_MODE_CBR, AACENC_BR_MODE_VBR_1, AACENC_BR_MODE_VBR_2, AACENC_BR_MODE_VBR_3, AACENC_BR_MODE_VBR_4, AACENC_BR_MODE_VBR_5, AACENC_BR_MODE_FF, AACENC_BR_MODE_SFR}; constexpr size_t kBitRateModesSize = size(kBitRateModes); constexpr int32_t kGranuleLengths[] = {120, 128, 240, 256, 480, 512, 1024}; constexpr size_t kGranuleLengthsSize = size(kGranuleLengths); constexpr int32_t kChannelOrder[] = {CH_ORDER_MPEG, CH_ORDER_WAV}; constexpr size_t kChannelOrderSize = size(kChannelOrder); constexpr int32_t kSignalingModes[] = {-1, 0, 1, 2, 3}; constexpr size_t kSignalingModesSize = size(kSignalingModes); constexpr int32_t kAudioMuxVer[] = {-1, 0, 1, 2}; constexpr size_t kAudioMuxVerSize = size(kAudioMuxVer); constexpr int32_t kSbrModes[] = {-1, 0, 1, 2}; constexpr size_t kSbrModesSize = size(kSbrModes); constexpr AACENC_METADATA_DRC_PROFILE kMetaDataDrcProfiles[] = { AACENC_METADATA_DRC_NONE, AACENC_METADATA_DRC_FILMSTANDARD, AACENC_METADATA_DRC_FILMLIGHT, AACENC_METADATA_DRC_MUSICSTANDARD, AACENC_METADATA_DRC_MUSICLIGHT, AACENC_METADATA_DRC_SPEECH, AACENC_METADATA_DRC_NOT_PRESENT}; constexpr size_t kMetaDataDrcProfilesSize = size(kMetaDataDrcProfiles); enum { IDX_SBR_MODE = 0, IDX_AAC_AOT, IDX_SAMPLE_RATE, IDX_BIT_RATE_1, IDX_BIT_RATE_2, IDX_BIT_RATE_3, IDX_CHANNEL, IDX_IDENTIFIER, IDX_SBR_RATIO, IDX_METADATA_DRC_PROFILE, IDX_METADATA_COMP_PROFILE, IDX_METADATA_DRC_TARGET_REF_LEVEL, IDX_METADATA_COMP_TARGET_REF_LEVEL, IDX_METADATA_PROG_LEVEL_PRESENT, IDX_METADATA_PROG_LEVEL, IDX_METADATA_PCE_MIXDOWN_IDX_PRESENT, IDX_METADATA_ETSI_DMXLVL_PRESENT, IDX_METADATA_CENTER_MIX_LEVEL, IDX_METADATA_SURROUND_MIX_LEVEL, IDX_METADATA_DOLBY_SURROUND_MODE, IDX_METADATA_DRC_PRESENTATION_MODE, IDX_METADATA_EXT_ANC_DATA_ENABLE, IDX_METADATA_EXT_DOWNMIX_LEVEL_ENABLE, IDX_METADATA_EXT_DOWNMIX_LEVEL_A, IDX_METADATA_EXT_DOWNMIX_LEVEL_B, IDX_METADATA_DMX_GAIN_ENABLE, IDX_METADATA_DMX_GAIN_5, IDX_METADATA_DMX_GAIN_2, IDX_METADATA_LFE_DMX_ENABLE, IDX_METADATA_LFE_DMX_LEVEL, IDX_IN_BUFFER_INDEX_1, IDX_IN_BUFFER_INDEX_2, IDX_IN_BUFFER_INDEX_3, IDX_BIT_RATE_MODE, IDX_GRANULE_LENGTH, IDX_CHANNELORDER, IDX_AFTERBURNER, IDX_BANDWIDTH, IDX_PEAK_BITRATE, IDX_HEADER_PERIOD, IDX_SIGNALING_MODE, IDX_TPSUBFRAMES, IDX_AUDIOMUXVER, IDX_PROTECTION, IDX_ANCILLARY_BITRATE, IDX_METADATA_MODE, IDX_LAST }; template auto generateNumberInRangeFromData(type1 data, type2 min, type3 max) -> decltype(max) { return (data % (1 + max - min)) + min; } class Codec { public: ~Codec() { deInitEncoder(); } bool initEncoder(uint8_t **dataPtr, size_t *sizePtr); void encodeFrames(const uint8_t *data, size_t size); void deInitEncoder(); private: template void setAACParam(type1 data, const AACENC_PARAM aacParam, type2 min, type2 max, const type3 *array = nullptr); void setupMetaData(uint8_t *data); HANDLE_AACENCODER mEncoder = nullptr; AACENC_MetaData mMetaData = {}; uint32_t mInBufferIdx_1 = 0; uint32_t mInBufferIdx_2 = 0; uint32_t mInBufferIdx_3 = 0; }; void Codec::setupMetaData(uint8_t *data) { uint32_t drcProfileIndex = generateNumberInRangeFromData(data[IDX_METADATA_DRC_PROFILE], 0, kMetaDataDrcProfilesSize - 1); AACENC_METADATA_DRC_PROFILE drcProfile = kMetaDataDrcProfiles[drcProfileIndex]; mMetaData.drc_profile = drcProfile; uint32_t compProfileIndex = generateNumberInRangeFromData(data[IDX_METADATA_COMP_PROFILE], 0, kMetaDataDrcProfilesSize - 1); AACENC_METADATA_DRC_PROFILE compProfile = kMetaDataDrcProfiles[compProfileIndex]; mMetaData.comp_profile = compProfile; INT drcTargetRefLevel = generateNumberInRangeFromData(data[IDX_METADATA_DRC_TARGET_REF_LEVEL], 0, UINT8_MAX); mMetaData.drc_TargetRefLevel = drcTargetRefLevel; INT compTargetRefLevel = generateNumberInRangeFromData(data[IDX_METADATA_COMP_TARGET_REF_LEVEL], 0, UINT8_MAX); mMetaData.comp_TargetRefLevel = compTargetRefLevel; INT isProgRefLevelPresent = generateNumberInRangeFromData(data[IDX_METADATA_PROG_LEVEL_PRESENT], 0, 1); mMetaData.prog_ref_level_present = isProgRefLevelPresent; INT progRefLevel = generateNumberInRangeFromData(data[IDX_METADATA_PROG_LEVEL], 0, UINT8_MAX); mMetaData.prog_ref_level = progRefLevel; UCHAR isPCEMixdownIdxPresent = generateNumberInRangeFromData(data[IDX_METADATA_PCE_MIXDOWN_IDX_PRESENT], 0, 1); mMetaData.PCE_mixdown_idx_present = isPCEMixdownIdxPresent; UCHAR isETSIDmxLvlPresent = generateNumberInRangeFromData(data[IDX_METADATA_ETSI_DMXLVL_PRESENT], 0, 1); mMetaData.ETSI_DmxLvl_present = isETSIDmxLvlPresent; SCHAR centerMixLevel = generateNumberInRangeFromData(data[IDX_METADATA_CENTER_MIX_LEVEL], 0, 7); mMetaData.centerMixLevel = centerMixLevel; SCHAR surroundMixLevel = generateNumberInRangeFromData(data[IDX_METADATA_SURROUND_MIX_LEVEL], 0, 7); mMetaData.surroundMixLevel = surroundMixLevel; UCHAR dolbySurroundMode = generateNumberInRangeFromData(data[IDX_METADATA_DOLBY_SURROUND_MODE], 0, 2); mMetaData.dolbySurroundMode = dolbySurroundMode; UCHAR drcPresentationMode = generateNumberInRangeFromData(data[IDX_METADATA_DRC_PRESENTATION_MODE], 0, 2); mMetaData.drcPresentationMode = drcPresentationMode; UCHAR extAncDataEnable = generateNumberInRangeFromData(data[IDX_METADATA_EXT_ANC_DATA_ENABLE], 0, 1); mMetaData.ExtMetaData.extAncDataEnable = extAncDataEnable; UCHAR extDownmixLevelEnable = generateNumberInRangeFromData(data[IDX_METADATA_EXT_DOWNMIX_LEVEL_ENABLE], 0, 1); mMetaData.ExtMetaData.extDownmixLevelEnable = extDownmixLevelEnable; UCHAR extDownmixLevel_A = generateNumberInRangeFromData(data[IDX_METADATA_EXT_DOWNMIX_LEVEL_A], 0, 7); mMetaData.ExtMetaData.extDownmixLevel_A = extDownmixLevel_A; UCHAR extDownmixLevel_B = generateNumberInRangeFromData(data[IDX_METADATA_EXT_DOWNMIX_LEVEL_B], 0, 7); mMetaData.ExtMetaData.extDownmixLevel_B = extDownmixLevel_B; UCHAR dmxGainEnable = generateNumberInRangeFromData(data[IDX_METADATA_DMX_GAIN_ENABLE], 0, 1); mMetaData.ExtMetaData.dmxGainEnable = dmxGainEnable; INT dmxGain5 = generateNumberInRangeFromData(data[IDX_METADATA_DMX_GAIN_5], 0, UINT8_MAX); mMetaData.ExtMetaData.dmxGain5 = dmxGain5; INT dmxGain2 = generateNumberInRangeFromData(data[IDX_METADATA_DMX_GAIN_2], 0, UINT8_MAX); mMetaData.ExtMetaData.dmxGain2 = dmxGain2; UCHAR lfeDmxEnable = generateNumberInRangeFromData(data[IDX_METADATA_LFE_DMX_ENABLE], 0, 1); mMetaData.ExtMetaData.lfeDmxEnable = lfeDmxEnable; UCHAR lfeDmxLevel = generateNumberInRangeFromData(data[IDX_METADATA_LFE_DMX_LEVEL], 0, 15); mMetaData.ExtMetaData.lfeDmxLevel = lfeDmxLevel; } template void Codec::setAACParam(type1 data, const AACENC_PARAM aacParam, type2 min, type2 max, const type3 *array) { auto value = 0; if (array) { uint32_t index = generateNumberInRangeFromData(data, min, max); value = array[index]; } else { value = generateNumberInRangeFromData(data, min, max); } aacEncoder_SetParam(mEncoder, aacParam, value); (void)aacEncoder_GetParam(mEncoder, aacParam); } bool Codec::initEncoder(uint8_t **dataPtr, size_t *sizePtr) { uint8_t *data = *dataPtr; if (AACENC_OK != aacEncOpen(&mEncoder, 0, 0)) { return false; } setAACParam(data[IDX_SBR_MODE], AACENC_SBR_MODE, 0, kSbrModesSize - 1, kSbrModes); setAACParam(data[IDX_SBR_RATIO], AACENC_SBR_RATIO, 0, kSbrRatiosSize - 1, kSbrRatios); setAACParam(data[IDX_AAC_AOT], AACENC_AOT, 0, kAudioObjectTypesSize - 1, kAudioObjectTypes); setAACParam(data[IDX_SAMPLE_RATE], AACENC_SAMPLERATE, 0, kSampleRatesSize - 1, kSampleRates); uint32_t tempValue = (data[IDX_BIT_RATE_1] << 16) | (data[IDX_BIT_RATE_2] << 8) | data[IDX_BIT_RATE_3]; setAACParam(tempValue, AACENC_BITRATE, kMinBitRate, kMaxBitRate); setAACParam(data[IDX_CHANNEL], AACENC_CHANNELMODE, 0, kChannelModesSize - 1, kChannelModes); setAACParam(data[IDX_IDENTIFIER], AACENC_TRANSMUX, 0, kIdentifiersSize - 1, kIdentifiers); setAACParam(data[IDX_BIT_RATE_MODE], AACENC_BITRATEMODE, 0, kBitRateModesSize - 1, kBitRateModes); setAACParam(data[IDX_GRANULE_LENGTH], AACENC_GRANULE_LENGTH, 0, kGranuleLengthsSize - 1, kGranuleLengths); setAACParam(data[IDX_CHANNELORDER], AACENC_CHANNELORDER, 0, kChannelOrderSize - 1, kChannelOrder); setAACParam(data[IDX_AFTERBURNER], AACENC_AFTERBURNER, 0, 1); setAACParam(data[IDX_BANDWIDTH], AACENC_BANDWIDTH, 0, 1); setAACParam(data[IDX_PEAK_BITRATE], AACENC_PEAK_BITRATE, kMinBitRate, kMinBitRate); setAACParam(data[IDX_HEADER_PERIOD], AACENC_HEADER_PERIOD, 0, UINT8_MAX); setAACParam(data[IDX_SIGNALING_MODE], AACENC_SIGNALING_MODE, 0, kSignalingModesSize - 1, kSignalingModes); setAACParam(data[IDX_TPSUBFRAMES], AACENC_TPSUBFRAMES, 0, UINT8_MAX); setAACParam(data[IDX_AUDIOMUXVER], AACENC_AUDIOMUXVER, 0, kAudioMuxVerSize - 1, kAudioMuxVer); setAACParam(data[IDX_PROTECTION], AACENC_PROTECTION, 0, 1); setAACParam(data[IDX_ANCILLARY_BITRATE], AACENC_ANCILLARY_BITRATE, 0, kMaxBitRate); setAACParam(data[IDX_METADATA_MODE], AACENC_METADATA_MODE, 0, 3); AACENC_InfoStruct encInfo; aacEncInfo(mEncoder, &encInfo); mInBufferIdx_1 = generateNumberInRangeFromData(data[IDX_IN_BUFFER_INDEX_1], 0, kMaxBuffers - 1); mInBufferIdx_2 = generateNumberInRangeFromData(data[IDX_IN_BUFFER_INDEX_2], 0, kMaxBuffers - 1); mInBufferIdx_3 = generateNumberInRangeFromData(data[IDX_IN_BUFFER_INDEX_3], 0, kMaxBuffers - 1); setupMetaData(data); // Not re-using the data which was used for configuration for encoding *dataPtr += IDX_LAST; *sizePtr -= IDX_LAST; return true; } static void deleteBuffers(uint8_t **buffers, size_t size) { for (size_t n = 0; n < size; ++n) { delete[] buffers[n]; } delete[] buffers; } void Codec::encodeFrames(const uint8_t *data, size_t size) { uint8_t *audioData = (uint8_t *)data; uint8_t *ancData = (uint8_t *)data; size_t audioSize = size; size_t ancSize = size; while ((audioSize > 0) && (ancSize > 0)) { AACENC_InArgs inargs; memset(&inargs, 0, sizeof(inargs)); inargs.numInSamples = audioSize / sizeof(int16_t); inargs.numAncBytes = ancSize; void *buffers[] = {(void *)audioData, (void *)ancData, &mMetaData}; INT bufferIds[] = {IN_AUDIO_DATA, IN_ANCILLRY_DATA, IN_METADATA_SETUP}; INT bufferSizes[] = {static_cast(audioSize), static_cast(ancSize), static_cast(sizeof(mMetaData))}; INT bufferElSizes[] = {sizeof(int16_t), sizeof(UCHAR), sizeof(AACENC_MetaData)}; void *inBuffer[kMaxBuffers] = {}; INT inBufferIds[kMaxBuffers] = {}; INT inBufferSize[kMaxBuffers] = {}; INT inBufferElSize[kMaxBuffers] = {}; for (int32_t buffer = 0; buffer < kMaxBuffers; ++buffer) { uint32_t Idxs[] = {mInBufferIdx_1, mInBufferIdx_2, mInBufferIdx_3}; inBuffer[buffer] = buffers[Idxs[buffer]]; inBufferIds[buffer] = bufferIds[Idxs[buffer]]; inBufferSize[buffer] = bufferSizes[Idxs[buffer]]; inBufferElSize[buffer] = bufferElSizes[Idxs[buffer]]; } AACENC_BufDesc inBufDesc; inBufDesc.numBufs = kMaxBuffers; inBufDesc.bufs = (void **)&inBuffer; inBufDesc.bufferIdentifiers = inBufferIds; inBufDesc.bufSizes = inBufferSize; inBufDesc.bufElSizes = inBufferElSize; uint8_t **outPtrRef = new uint8_t *[kMaxBuffers]; for (int32_t buffer = 0; buffer < kMaxBuffers; ++buffer) { outPtrRef[buffer] = new uint8_t[kMaxOutputBufferSize]; } void *outBuffer[kMaxBuffers]; INT outBufferIds[kMaxBuffers]; INT outBufferSize[kMaxBuffers]; INT outBufferElSize[kMaxBuffers]; for (int32_t buffer = 0; buffer < kMaxBuffers; ++buffer) { outBuffer[buffer] = outPtrRef[buffer]; outBufferIds[buffer] = OUT_BITSTREAM_DATA; outBufferSize[buffer] = (INT)kMaxOutputBufferSize; outBufferElSize[buffer] = sizeof(UCHAR); } AACENC_BufDesc outBufDesc; outBufDesc.numBufs = kMaxBuffers; outBufDesc.bufs = (void **)&outBuffer; outBufDesc.bufferIdentifiers = outBufferIds; outBufDesc.bufSizes = outBufferSize; outBufDesc.bufElSizes = outBufferElSize; AACENC_OutArgs outargs = {}; aacEncEncode(mEncoder, &inBufDesc, &outBufDesc, &inargs, &outargs); if (outargs.numOutBytes == 0) { if (audioSize > 0) { ++audioData; --audioSize; } if (ancSize > 0) { ++ancData; --ancSize; } } else { size_t audioConsumed = outargs.numInSamples * sizeof(int16_t); audioData += audioConsumed; audioSize -= audioConsumed; size_t ancConsumed = outargs.numAncBytes; ancData += ancConsumed; ancSize -= ancConsumed; } deleteBuffers(outPtrRef, kMaxBuffers); // break out of loop if only metadata was sent in all the input buffers // as sending it multiple times in a loop is redundant. if ((mInBufferIdx_1 == kMaxBuffers - 1) && (mInBufferIdx_2 == kMaxBuffers - 1) && (mInBufferIdx_3 == kMaxBuffers - 1)) { break; } } } void Codec::deInitEncoder() { aacEncClose(&mEncoder); } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size < IDX_LAST) { return 0; } Codec encoder; if (encoder.initEncoder(const_cast(&data), &size)) { encoder.encodeFrames(data, size); } return 0; }