mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-01-14 01:28:28 +01:00
Audio book creation
This commit is contained in:
parent
e9ae0595e3
commit
fab94505d1
@ -66,6 +66,9 @@ public:
|
||||
|
||||
const Items& getItems() const { return m_items; }
|
||||
|
||||
/// Returns true, if text flow is empty
|
||||
bool isEmpty() const { return m_items.empty(); }
|
||||
|
||||
private:
|
||||
Items m_items;
|
||||
};
|
||||
|
@ -225,6 +225,16 @@ void PDFToolAbstractApplication::initializeCommandLineParser(QCommandLineParser*
|
||||
parser->addOption(QCommandLineOption("voice-age", "Choose voice age for text-to-speech engine.", "age"));
|
||||
parser->addOption(QCommandLineOption("voice-lang-code", "Choose voice language code for text-to-speech engine.", "code"));
|
||||
}
|
||||
|
||||
if (optionFlags.testFlag(TextSpeech))
|
||||
{
|
||||
parser->addOption(QCommandLineOption("mark-page-numbers", "Mark page numbers in audio stream."));
|
||||
parser->addOption(QCommandLineOption("say-page-numbers", "Say page numbers."));
|
||||
parser->addOption(QCommandLineOption("say-struct-titles", "Say titles extracted from structure tree (only for tagged pdf)."));
|
||||
parser->addOption(QCommandLineOption("say-struct-alt-desc", "Say alternative descriptions extracted from structure tree (only for tagged pdf)."));
|
||||
parser->addOption(QCommandLineOption("say-struct-exp-form", "Say expanded form extracted from structure tree (only for tagged pdf)."));
|
||||
parser->addOption(QCommandLineOption("say-struct-act-text", "Say actual text extracted from structure tree (only for tagged pdf)."));
|
||||
}
|
||||
}
|
||||
|
||||
PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser) const
|
||||
@ -376,6 +386,16 @@ PDFToolOptions PDFToolAbstractApplication::getOptions(QCommandLineParser* parser
|
||||
options.textVoiceLangCode = parser->isSet("voice-lang-code") ? parser->value("voice-lang-code") : QString();
|
||||
}
|
||||
|
||||
if (optionFlags.testFlag(TextSpeech))
|
||||
{
|
||||
options.textSpeechMarkPageNumbers = parser->isSet("mark-page-numbers");
|
||||
options.textSpeechSayPageNumbers = parser->isSet("say-page-numbers");
|
||||
options.textSpeechSayStructTitles = parser->isSet("say-struct-titles");
|
||||
options.textSpeechSayStructAlternativeDescription = parser->isSet("say-struct-alt-desc");
|
||||
options.textSpeechSayStructExpandedForm = parser->isSet("say-struct-exp-form");
|
||||
options.textSpeechSayStructActualText = parser->isSet("say-struct-act-text");
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
|
@ -103,11 +103,9 @@ struct PDFToolOptions
|
||||
bool textSpeechMarkPageNumbers = false;
|
||||
bool textSpeechSayPageNumbers = false;
|
||||
bool textSpeechSayStructTitles = false;
|
||||
bool textSpeechSayStructLanguage = false;
|
||||
bool textSpeechSayStructAlternativeDescription = false;
|
||||
bool textSpeechSayStructExpandedForm = false;
|
||||
bool textSpeechSayStructActualText = false;
|
||||
bool textSpeechSayStructPhoneme = false;
|
||||
|
||||
/// Returns page range. If page range is invalid, then \p errorMessage is empty.
|
||||
/// \param pageCount Page count
|
||||
@ -132,6 +130,7 @@ public:
|
||||
ErrorInvalidArguments,
|
||||
ErrorFailedWriteToFile,
|
||||
ErrorPermissions,
|
||||
ErrorNoText,
|
||||
ErrorCOM,
|
||||
ErrorSAPI
|
||||
};
|
||||
|
@ -19,8 +19,9 @@
|
||||
|
||||
#ifdef Q_OS_WIN
|
||||
|
||||
#include <QFileInfo>
|
||||
|
||||
#include <sapi.h>
|
||||
//#include <sphelper.h>
|
||||
|
||||
#pragma comment(lib, "ole32")
|
||||
|
||||
@ -309,21 +310,138 @@ QString PDFToolAudioBook::getStandardString(StandardString standardString) const
|
||||
return QString();
|
||||
}
|
||||
|
||||
|
||||
int PDFToolAudioBook::getDocumentTextFlow(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow)
|
||||
{
|
||||
pdf::PDFDocument document;
|
||||
QByteArray sourceData;
|
||||
if (!readDocument(options, document, &sourceData))
|
||||
{
|
||||
return ErrorDocumentReading;
|
||||
}
|
||||
|
||||
QString parseError;
|
||||
std::vector<pdf::PDFInteger> pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
|
||||
|
||||
if (!parseError.isEmpty())
|
||||
{
|
||||
PDFConsole::writeError(parseError, options.outputCodec);
|
||||
return ErrorInvalidArguments;
|
||||
}
|
||||
|
||||
pdf::PDFDocumentTextFlowFactory factory;
|
||||
flow = factory.create(&document, pages, options.textAnalysisAlgorithm);
|
||||
|
||||
return ExitSuccess;
|
||||
}
|
||||
|
||||
int PDFToolAudioBook::createAudioBook(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow)
|
||||
{
|
||||
QString audioString;
|
||||
QTextStream textStream(&audioString);
|
||||
|
||||
for (const pdf::PDFDocumentTextFlow::Item& item : flow.getItems())
|
||||
{
|
||||
if (item.flags.testFlag(pdf::PDFDocumentTextFlow::PageStart) && options.textSpeechMarkPageNumbers)
|
||||
{
|
||||
textStream << QString("<bookmark mark=\"%1\"/>").arg(item.text) << endl;
|
||||
}
|
||||
|
||||
if (!item.text.isEmpty())
|
||||
{
|
||||
bool showText = (item.flags.testFlag(pdf::PDFDocumentTextFlow::Text)) ||
|
||||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::PageStart) && options.textSpeechSayPageNumbers) ||
|
||||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::PageEnd) && options.textSpeechSayPageNumbers) ||
|
||||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureTitle) && options.textSpeechSayStructTitles) ||
|
||||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureAlternativeDescription) && options.textSpeechSayStructAlternativeDescription) ||
|
||||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureExpandedForm) && options.textSpeechSayStructExpandedForm) ||
|
||||
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureActualText) && options.textSpeechSayStructActualText);
|
||||
|
||||
if (showText)
|
||||
{
|
||||
textStream << item.text << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PDFVoiceInfoList voices;
|
||||
fillVoices(options, voices, true);
|
||||
|
||||
// Do we have any voice?
|
||||
if (voices.empty())
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("Invalid voice."), options.outputCodec);
|
||||
return ErrorSAPI;
|
||||
}
|
||||
|
||||
if (!voices.front().getVoice())
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("Invalid voice."), options.outputCodec);
|
||||
return ErrorSAPI;
|
||||
}
|
||||
|
||||
QFileInfo info(options.document);
|
||||
QString outputFile = QString("%1/%2.mp3").arg(info.path(), info.completeBaseName());
|
||||
BSTR outputFileName = (BSTR)outputFile.utf16();
|
||||
|
||||
ISpeechFileStream* stream = nullptr;
|
||||
if (!SUCCEEDED(::CoCreateInstance(CLSID_SpFileStream, NULL, CLSCTX_ALL, __uuidof(ISpeechFileStream), (LPVOID*)&stream)))
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create output stream '%1'.").arg(outputFile), options.outputCodec);
|
||||
return ErrorSAPI;
|
||||
}
|
||||
if (!SUCCEEDED(stream->Open(outputFileName, SSFMCreateForWrite)))
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create output stream '%1'.").arg(outputFile), options.outputCodec);
|
||||
stream->Release();
|
||||
return ErrorSAPI;
|
||||
}
|
||||
|
||||
ISpVoice* voice = voices.front().getVoice();
|
||||
voice->AddRef();
|
||||
voices.clear();
|
||||
|
||||
LPCWSTR stringToSpeak = (LPCWSTR)audioString.utf16();
|
||||
|
||||
voice->SetOutput(stream, FALSE);
|
||||
voice->Speak(stringToSpeak, SPF_PURGEBEFORESPEAK | SPF_PARSE_SAPI, NULL);
|
||||
|
||||
voice->Release();
|
||||
stream->Release();
|
||||
|
||||
return ExitSuccess;
|
||||
}
|
||||
|
||||
int PDFToolAudioBook::execute(const PDFToolOptions& options)
|
||||
{
|
||||
pdf::PDFDocumentTextFlow textFlow;
|
||||
int result = getDocumentTextFlow(options, textFlow);
|
||||
if (result != ExitSuccess)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (textFlow.isEmpty())
|
||||
{
|
||||
PDFConsole::writeError(PDFToolTranslationContext::tr("No text extracted to be converted to audio book."), options.outputCodec);
|
||||
return ErrorNoText;
|
||||
}
|
||||
|
||||
if (!SUCCEEDED(::CoInitializeEx(nullptr, COINIT_MULTITHREADED | COINIT_SPEED_OVER_MEMORY)))
|
||||
{
|
||||
return ErrorCOM;
|
||||
}
|
||||
|
||||
result = createAudioBook(options, textFlow);
|
||||
|
||||
::CoUninitialize();
|
||||
|
||||
return ExitSuccess;
|
||||
return result;
|
||||
}
|
||||
|
||||
PDFToolAbstractApplication::Options PDFToolAudioBook::getOptionsFlags() const
|
||||
{
|
||||
return ConsoleFormat | OpenDocument | PageSelector | VoiceSelector | TextAnalysis;
|
||||
return ConsoleFormat | OpenDocument | PageSelector | VoiceSelector | TextAnalysis | TextSpeech;
|
||||
}
|
||||
|
||||
} // namespace pdftool
|
||||
|
@ -24,6 +24,11 @@
|
||||
|
||||
struct ISpVoice;
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
class PDFDocumentTextFlow;
|
||||
}
|
||||
|
||||
namespace pdftool
|
||||
{
|
||||
|
||||
@ -82,6 +87,10 @@ public:
|
||||
virtual QString getStandardString(StandardString standardString) const override;
|
||||
virtual int execute(const PDFToolOptions& options) override;
|
||||
virtual Options getOptionsFlags() const override;
|
||||
|
||||
private:
|
||||
int getDocumentTextFlow(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow);
|
||||
int createAudioBook(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow);
|
||||
};
|
||||
|
||||
} // namespace pdftool
|
||||
|
Loading…
Reference in New Issue
Block a user