Audio book creation

This commit is contained in:
Jakub Melka 2020-10-21 18:32:04 +02:00
parent e9ae0595e3
commit fab94505d1
5 changed files with 154 additions and 5 deletions

View File

@ -66,6 +66,9 @@ public:
const Items& getItems() const { return m_items; }
/// Returns true, if text flow is empty
bool isEmpty() const { return m_items.empty(); }
private:
Items m_items;
};

View File

@ -225,6 +225,16 @@ void PDFToolAbstractApplication::initializeCommandLineParser(QCommandLineParser*
parser->addOption(QCommandLineOption("voice-age", "Choose voice age for text-to-speech engine.", "age"));
parser->addOption(QCommandLineOption("voice-lang-code", "Choose voice language code for text-to-speech engine.", "code"));
}
if (optionFlags.testFlag(TextSpeech))
{
parser->addOption(QCommandLineOption("mark-page-numbers", "Mark page numbers in audio stream."));
parser->addOption(QCommandLineOption("say-page-numbers", "Say page numbers."));
parser->addOption(QCommandLineOption("say-struct-titles", "Say titles extracted from structure tree (only for tagged pdf)."));
parser->addOption(QCommandLineOption("say-struct-alt-desc", "Say alternative descriptions extracted from structure tree (only for tagged pdf)."));
parser->addOption(QCommandLineOption("say-struct-exp-form", "Say expanded form extracted from structure tree (only for tagged pdf)."));
parser->addOption(QCommandLineOption("say-struct-act-text", "Say actual text extracted from structure tree (only for tagged pdf)."));
}
}
PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser) const
@ -376,6 +386,16 @@ PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser
options.textVoiceLangCode = parser->isSet("voice-lang-code") ? parser->value("voice-lang-code") : QString();
}
if (optionFlags.testFlag(TextSpeech))
{
options.textSpeechMarkPageNumbers = parser->isSet("mark-page-numbers");
options.textSpeechSayPageNumbers = parser->isSet("say-page-numbers");
options.textSpeechSayStructTitles = parser->isSet("say-struct-titles");
options.textSpeechSayStructAlternativeDescription = parser->isSet("say-struct-alt-desc");
options.textSpeechSayStructExpandedForm = parser->isSet("say-struct-exp-form");
options.textSpeechSayStructActualText = parser->isSet("say-struct-act-text");
}
return options;
}

View File

@ -103,11 +103,9 @@ struct PDFToolOptions
bool textSpeechMarkPageNumbers = false;
bool textSpeechSayPageNumbers = false;
bool textSpeechSayStructTitles = false;
bool textSpeechSayStructLanguage = false;
bool textSpeechSayStructAlternativeDescription = false;
bool textSpeechSayStructExpandedForm = false;
bool textSpeechSayStructActualText = false;
bool textSpeechSayStructPhoneme = false;
/// Returns page range. If page range is invalid, then \p errorMessage is empty.
/// \param pageCount Page count
@ -132,6 +130,7 @@ public:
ErrorInvalidArguments,
ErrorFailedWriteToFile,
ErrorPermissions,
ErrorNoText,
ErrorCOM,
ErrorSAPI
};

View File

@ -19,8 +19,9 @@
#ifdef Q_OS_WIN
#include <QFileInfo>
#include <sapi.h>
//#include <sphelper.h>
#pragma comment(lib, "ole32")
@ -309,21 +310,138 @@ QString PDFToolAudioBook::getStandardString(StandardString standardString) const
return QString();
}
int PDFToolAudioBook::getDocumentTextFlow(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow)
{
pdf::PDFDocument document;
QByteArray sourceData;
if (!readDocument(options, document, &sourceData))
{
return ErrorDocumentReading;
}
QString parseError;
std::vector<pdf::PDFInteger> pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
if (!parseError.isEmpty())
{
PDFConsole::writeError(parseError, options.outputCodec);
return ErrorInvalidArguments;
}
pdf::PDFDocumentTextFlowFactory factory;
flow = factory.create(&document, pages, options.textAnalysisAlgorithm);
return ExitSuccess;
}
int PDFToolAudioBook::createAudioBook(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow)
{
QString audioString;
QTextStream textStream(&audioString);
for (const pdf::PDFDocumentTextFlow::Item& item : flow.getItems())
{
if (item.flags.testFlag(pdf::PDFDocumentTextFlow::PageStart) && options.textSpeechMarkPageNumbers)
{
textStream << QString("<bookmark mark=\"%1\"/>").arg(item.text) << endl;
}
if (!item.text.isEmpty())
{
bool showText = (item.flags.testFlag(pdf::PDFDocumentTextFlow::Text)) ||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::PageStart) && options.textSpeechSayPageNumbers) ||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::PageEnd) && options.textSpeechSayPageNumbers) ||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureTitle) && options.textSpeechSayStructTitles) ||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureAlternativeDescription) && options.textSpeechSayStructAlternativeDescription) ||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureExpandedForm) && options.textSpeechSayStructExpandedForm) ||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureActualText) && options.textSpeechSayStructActualText);
if (showText)
{
textStream << item.text << endl;
}
}
}
PDFVoiceInfoList voices;
fillVoices(options, voices, true);
// Do we have any voice?
if (voices.empty())
{
PDFConsole::writeError(PDFToolTranslationContext::tr("Invalid voice."), options.outputCodec);
return ErrorSAPI;
}
if (!voices.front().getVoice())
{
PDFConsole::writeError(PDFToolTranslationContext::tr("Invalid voice."), options.outputCodec);
return ErrorSAPI;
}
QFileInfo info(options.document);
QString outputFile = QString("%1/%2.mp3").arg(info.path(), info.completeBaseName());
BSTR outputFileName = (BSTR)outputFile.utf16();
ISpeechFileStream* stream = nullptr;
if (!SUCCEEDED(::CoCreateInstance(CLSID_SpFileStream, NULL, CLSCTX_ALL, __uuidof(ISpeechFileStream), (LPVOID*)&stream)))
{
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create output stream '%1'.").arg(outputFile), options.outputCodec);
return ErrorSAPI;
}
if (!SUCCEEDED(stream->Open(outputFileName, SSFMCreateForWrite)))
{
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create output stream '%1'.").arg(outputFile), options.outputCodec);
stream->Release();
return ErrorSAPI;
}
ISpVoice* voice = voices.front().getVoice();
voice->AddRef();
voices.clear();
LPCWSTR stringToSpeak = (LPCWSTR)audioString.utf16();
voice->SetOutput(stream, FALSE);
voice->Speak(stringToSpeak, SPF_PURGEBEFORESPEAK | SPF_PARSE_SAPI, NULL);
voice->Release();
stream->Release();
return ExitSuccess;
}
int PDFToolAudioBook::execute(const PDFToolOptions& options)
{
pdf::PDFDocumentTextFlow textFlow;
int result = getDocumentTextFlow(options, textFlow);
if (result != ExitSuccess)
{
return result;
}
if (textFlow.isEmpty())
{
PDFConsole::writeError(PDFToolTranslationContext::tr("No text extracted to be converted to audio book."), options.outputCodec);
return ErrorNoText;
}
if (!SUCCEEDED(::CoInitializeEx(nullptr, COINIT_MULTITHREADED | COINIT_SPEED_OVER_MEMORY)))
{
return ErrorCOM;
}
result = createAudioBook(options, textFlow);
::CoUninitialize();
return ExitSuccess;
return result;
}
PDFToolAbstractApplication::Options PDFToolAudioBook::getOptionsFlags() const
{
return ConsoleFormat | OpenDocument | PageSelector | VoiceSelector | TextAnalysis;
return ConsoleFormat | OpenDocument | PageSelector | VoiceSelector | TextAnalysis | TextSpeech;
}
} // namespace pdftool

View File

@ -24,6 +24,11 @@
struct ISpVoice;
namespace pdf
{
class PDFDocumentTextFlow;
}
namespace pdftool
{
@ -82,6 +87,10 @@ public:
virtual QString getStandardString(StandardString standardString) const override;
virtual int execute(const PDFToolOptions& options) override;
virtual Options getOptionsFlags() const override;
private:
int getDocumentTextFlow(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow);
int createAudioBook(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow);
};
} // namespace pdftool