mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-01-13 08:42:52 +01:00
485 lines
16 KiB
C++
485 lines
16 KiB
C++
// Copyright (C) 2020-2021 Jakub Melka
|
|
//
|
|
// This file is part of PDF4QT.
|
|
//
|
|
// PDF4QT is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// with the written consent of the copyright owner, any later version.
|
|
//
|
|
// PDF4QT is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#include "pdftoolaudiobook.h"
|
|
|
|
#ifdef Q_OS_WIN
|
|
|
|
#include <QFileInfo>
|
|
|
|
#include <windows.h>
|
|
#include <sapi.h>
|
|
|
|
#if defined(PDF4QT_USE_PRAGMA_LIB)
|
|
#pragma comment(lib, "ole32")
|
|
#endif
|
|
|
|
namespace pdftool
|
|
{
|
|
|
|
static PDFToolAudioBook s_audioBookApplication;
|
|
static PDFToolAudioBookVoices s_audioBookVoicesApplication;
|
|
|
|
PDFVoiceInfo::PDFVoiceInfo(std::map<QString, QString> properties, ISpObjectToken* voiceToken) :
|
|
m_properties(qMove(properties)),
|
|
m_voiceToken(voiceToken)
|
|
{
|
|
if (m_voiceToken)
|
|
{
|
|
m_voiceToken->AddRef();
|
|
}
|
|
}
|
|
|
|
PDFVoiceInfo::PDFVoiceInfo(PDFVoiceInfo&& other)
|
|
{
|
|
std::swap(m_properties, other.m_properties);
|
|
std::swap(m_voiceToken, other.m_voiceToken);
|
|
}
|
|
|
|
PDFVoiceInfo& PDFVoiceInfo::operator=(PDFVoiceInfo&& other)
|
|
{
|
|
std::swap(m_properties, other.m_properties);
|
|
std::swap(m_voiceToken, other.m_voiceToken);
|
|
return *this;
|
|
}
|
|
|
|
PDFVoiceInfo::~PDFVoiceInfo()
|
|
{
|
|
if (m_voiceToken)
|
|
{
|
|
m_voiceToken->Release();
|
|
}
|
|
}
|
|
|
|
QLocale PDFVoiceInfo::getLocale() const
|
|
{
|
|
bool ok = false;
|
|
LCID locale = getLanguage().toInt(&ok, 16);
|
|
|
|
if (ok)
|
|
{
|
|
// Language name
|
|
int count = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, NULL, 0);
|
|
std::vector<wchar_t> buffer(count, wchar_t());
|
|
GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, buffer.data(), int(buffer.size()));
|
|
QString languageCode = QString::fromWCharArray(buffer.data());
|
|
|
|
// Country name
|
|
count = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, NULL, 0);
|
|
buffer.resize(count, wchar_t());
|
|
GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, buffer.data(), int(buffer.size()));
|
|
QString countryCode = QString::fromWCharArray(buffer.data());
|
|
|
|
return QLocale(QString("%1_%2").arg(languageCode, countryCode));
|
|
}
|
|
|
|
return QLocale();
|
|
}
|
|
|
|
QString PDFVoiceInfo::getStringValue(QString key) const
|
|
{
|
|
auto it = m_properties.find(key);
|
|
if (it != m_properties.cend())
|
|
{
|
|
return it->second;
|
|
}
|
|
|
|
return QString();
|
|
}
|
|
|
|
int PDFToolAudioBookBase::fillVoices(const PDFToolOptions& options, PDFVoiceInfoList& list, bool fillVoiceTokenPointers)
|
|
{
|
|
int result = ExitSuccess;
|
|
|
|
QStringList voiceSelector;
|
|
if (!options.textVoiceName.isEmpty())
|
|
{
|
|
voiceSelector << QString("Name=%1").arg(options.textVoiceName);
|
|
}
|
|
if (!options.textVoiceGender.isEmpty())
|
|
{
|
|
voiceSelector << QString("Gender=%1").arg(options.textVoiceGender);
|
|
}
|
|
if (!options.textVoiceAge.isEmpty())
|
|
{
|
|
voiceSelector << QString("Age=%1").arg(options.textVoiceAge);
|
|
}
|
|
if (!options.textVoiceLangCode.isEmpty())
|
|
{
|
|
voiceSelector << QString("Language=%1").arg(options.textVoiceLangCode);
|
|
}
|
|
QString voiceSelectorString = voiceSelector.join(";");
|
|
LPCWSTR requiredAttributes = !voiceSelectorString.isEmpty() ? (LPCWSTR)voiceSelectorString.utf16() : nullptr;
|
|
|
|
ISpObjectTokenCategory* category = nullptr;
|
|
if (!SUCCEEDED(::CoCreateInstance(CLSID_SpObjectTokenCategory, NULL, CLSCTX_ALL, __uuidof(ISpObjectTokenCategory), (LPVOID*)&category)))
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("SAPI Error: Cannot enumerate SAPI voices."), options.outputCodec);
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
if (!SUCCEEDED(category->SetId(SPCAT_VOICES, FALSE)))
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("SAPI Error: Cannot enumerate SAPI voices."), options.outputCodec);
|
|
category->Release();
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
IEnumSpObjectTokens* enumTokensObject = nullptr;
|
|
if (SUCCEEDED(category->EnumTokens(requiredAttributes, NULL, &enumTokensObject)))
|
|
{
|
|
ISpObjectToken* token = nullptr;
|
|
while (SUCCEEDED(enumTokensObject->Next(1, &token, NULL)))
|
|
{
|
|
if (token)
|
|
{
|
|
/* Attributes can be for example:
|
|
* Version,
|
|
* Language,
|
|
* Gender,
|
|
* Age,
|
|
* Name
|
|
* Vendor */
|
|
|
|
std::map<QString, QString> properties;
|
|
|
|
ISpDataKey* attributes = nullptr;
|
|
if (SUCCEEDED(token->OpenKey(L"Attributes", &attributes)))
|
|
{
|
|
for (ULONG i = 0; ; ++i)
|
|
{
|
|
LPWSTR valueName = NULL;
|
|
if (SUCCEEDED(attributes->EnumValues(i, &valueName)))
|
|
{
|
|
LPWSTR data = NULL;
|
|
if (SUCCEEDED(attributes->GetStringValue(valueName, &data)))
|
|
{
|
|
QString propertyName = QString::fromWCharArray(valueName);
|
|
QString propertyValue = QString::fromWCharArray(data);
|
|
if (!propertyValue.isEmpty())
|
|
{
|
|
properties[propertyName] = propertyValue;
|
|
}
|
|
::CoTaskMemFree(data);
|
|
}
|
|
|
|
::CoTaskMemFree(valueName);
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
attributes->Release();
|
|
}
|
|
|
|
if (fillVoiceTokenPointers)
|
|
{
|
|
list.emplace_back(qMove(properties), token);
|
|
}
|
|
else
|
|
{
|
|
list.emplace_back(qMove(properties), nullptr);
|
|
}
|
|
|
|
token->Release();
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("SAPI Error: Cannot enumerate SAPI voices."), options.outputCodec);
|
|
result = ErrorSAPI;
|
|
}
|
|
|
|
if (enumTokensObject)
|
|
{
|
|
enumTokensObject->Release();
|
|
}
|
|
|
|
if (category)
|
|
{
|
|
category->Release();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
int PDFToolAudioBookBase::showVoiceList(const PDFToolOptions& options)
|
|
{
|
|
PDFVoiceInfoList voices;
|
|
int result = fillVoices(options, voices, false);
|
|
|
|
PDFOutputFormatter formatter(options.outputStyle);
|
|
formatter.beginDocument("voices", PDFToolTranslationContext::tr("Available voices for given settings:"));
|
|
formatter.endl();
|
|
|
|
formatter.beginTable("voices", PDFToolTranslationContext::tr("Voice list"));
|
|
|
|
formatter.beginTableHeaderRow("header");
|
|
formatter.writeTableHeaderColumn("name", PDFToolTranslationContext::tr("Name"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("gender", PDFToolTranslationContext::tr("Gender"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("age", PDFToolTranslationContext::tr("Age"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("language-code", PDFToolTranslationContext::tr("Lang. Code"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("locale", PDFToolTranslationContext::tr("Locale"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("language", PDFToolTranslationContext::tr("Language"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("country", PDFToolTranslationContext::tr("Country"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("vendor", PDFToolTranslationContext::tr("Vendor"), Qt::AlignLeft);
|
|
formatter.writeTableHeaderColumn("version", PDFToolTranslationContext::tr("Version"), Qt::AlignLeft);
|
|
formatter.endTableHeaderRow();
|
|
|
|
for (const PDFVoiceInfo& voice : voices)
|
|
{
|
|
QLocale locale = voice.getLocale();
|
|
formatter.beginTableRow("voice");
|
|
formatter.writeTableColumn("name", voice.getName(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("gender", voice.getGender(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("age", voice.getAge(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("language", voice.getLanguage(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("locale", locale.name(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("language", locale.nativeLanguageName(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("country", locale.nativeTerritoryName(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("vendor", voice.getVendor(), Qt::AlignLeft);
|
|
formatter.writeTableColumn("version", voice.getVersion(), Qt::AlignLeft);
|
|
formatter.endTableRow();
|
|
}
|
|
|
|
formatter.endTable();
|
|
|
|
formatter.endDocument();
|
|
PDFConsole::writeText(formatter.getString(), options.outputCodec);
|
|
|
|
return result;
|
|
}
|
|
|
|
QString PDFToolAudioBookVoices::getStandardString(PDFToolAbstractApplication::StandardString standardString) const
|
|
{
|
|
switch (standardString)
|
|
{
|
|
case Command:
|
|
return "audio-book-voices";
|
|
|
|
case Name:
|
|
return PDFToolTranslationContext::tr("Audio book voices");
|
|
|
|
case Description:
|
|
return PDFToolTranslationContext::tr("List of available voices for audio book conversion.");
|
|
|
|
default:
|
|
Q_ASSERT(false);
|
|
break;
|
|
}
|
|
|
|
return QString();
|
|
}
|
|
|
|
int PDFToolAudioBookVoices::execute(const PDFToolOptions& options)
|
|
{
|
|
if (!SUCCEEDED(::CoInitialize(nullptr)))
|
|
{
|
|
return ErrorCOM;
|
|
}
|
|
|
|
int returnCode = showVoiceList(options);
|
|
|
|
::CoUninitialize();
|
|
|
|
return returnCode;
|
|
}
|
|
|
|
PDFToolAbstractApplication::Options PDFToolAudioBookVoices::getOptionsFlags() const
|
|
{
|
|
return ConsoleFormat | VoiceSelector;
|
|
}
|
|
|
|
QString PDFToolAudioBook::getStandardString(StandardString standardString) const
|
|
{
|
|
switch (standardString)
|
|
{
|
|
case Command:
|
|
return "audio-book";
|
|
|
|
case Name:
|
|
return PDFToolTranslationContext::tr("Audio book convertor");
|
|
|
|
case Description:
|
|
return PDFToolTranslationContext::tr("Convert your document to a simple audio book.");
|
|
|
|
default:
|
|
Q_ASSERT(false);
|
|
break;
|
|
}
|
|
|
|
return QString();
|
|
}
|
|
|
|
|
|
int PDFToolAudioBook::getDocumentTextFlow(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow)
|
|
{
|
|
pdf::PDFDocument document;
|
|
QByteArray sourceData;
|
|
if (!readDocument(options, document, &sourceData, false))
|
|
{
|
|
return ErrorDocumentReading;
|
|
}
|
|
|
|
QString parseError;
|
|
std::vector<pdf::PDFInteger> pages = options.getPageRange(document.getCatalog()->getPageCount(), parseError, true);
|
|
|
|
if (!parseError.isEmpty())
|
|
{
|
|
PDFConsole::writeError(parseError, options.outputCodec);
|
|
return ErrorInvalidArguments;
|
|
}
|
|
|
|
pdf::PDFDocumentTextFlowFactory factory;
|
|
flow = factory.create(&document, pages, options.textAnalysisAlgorithm);
|
|
|
|
return ExitSuccess;
|
|
}
|
|
|
|
int PDFToolAudioBook::createAudioBook(const PDFToolOptions& options, pdf::PDFDocumentTextFlow& flow)
|
|
{
|
|
QString audioString;
|
|
QTextStream textStream(&audioString);
|
|
|
|
for (const pdf::PDFDocumentTextFlow::Item& item : flow.getItems())
|
|
{
|
|
if (item.flags.testFlag(pdf::PDFDocumentTextFlow::PageStart) && options.textSpeechMarkPageNumbers)
|
|
{
|
|
textStream << QString("<bookmark mark=\"%1\"/>").arg(item.text) << Qt::endl;
|
|
}
|
|
|
|
if (!item.text.isEmpty())
|
|
{
|
|
bool showText = (item.flags.testFlag(pdf::PDFDocumentTextFlow::Text)) ||
|
|
(item.flags.testFlag(pdf::PDFDocumentTextFlow::PageStart) && options.textSpeechSayPageNumbers) ||
|
|
(item.flags.testFlag(pdf::PDFDocumentTextFlow::PageEnd) && options.textSpeechSayPageNumbers) ||
|
|
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureTitle) && options.textSpeechSayStructTitles) ||
|
|
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureAlternativeDescription) && options.textSpeechSayStructAlternativeDescription) ||
|
|
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureExpandedForm) && options.textSpeechSayStructExpandedForm) ||
|
|
(item.flags.testFlag(pdf::PDFDocumentTextFlow::StructureActualText) && options.textSpeechSayStructActualText);
|
|
|
|
if (showText)
|
|
{
|
|
textStream << item.text << Qt::endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
PDFVoiceInfoList voices;
|
|
fillVoices(options, voices, true);
|
|
|
|
// Do we have any voice?
|
|
if (voices.empty())
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("No suitable voice found."), options.outputCodec);
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
if (!voices.front().getVoiceToken())
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Invalid voice."), options.outputCodec);
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
QFileInfo info(options.document);
|
|
QString outputFile = QString("%1/%2.%3").arg(info.path(), info.completeBaseName(), options.textSpeechAudioFormat);
|
|
BSTR outputFileName = (BSTR)outputFile.utf16();
|
|
|
|
ISpeechFileStream* stream = nullptr;
|
|
if (!SUCCEEDED(::CoCreateInstance(CLSID_SpFileStream, NULL, CLSCTX_ALL, __uuidof(ISpeechFileStream), (LPVOID*)&stream)))
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create output stream '%1'.").arg(outputFile), options.outputCodec);
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
ISpVoice* voice = nullptr;
|
|
if (!SUCCEEDED(::CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, __uuidof(ISpVoice), (LPVOID*)&voice)))
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create voice."), options.outputCodec);
|
|
stream->Release();
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
if (!SUCCEEDED(stream->Open(outputFileName, SSFMCreateForWrite)))
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Cannot create output stream '%1'.").arg(outputFile), options.outputCodec);
|
|
voice->Release();
|
|
stream->Release();
|
|
return ErrorSAPI;
|
|
}
|
|
|
|
ISpObjectToken* voiceToken = voices.front().getVoiceToken();
|
|
if (!SUCCEEDED(voice->SetVoice(voiceToken)))
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("Failed to set requested voice. Default voice will be used."), options.outputCodec);
|
|
}
|
|
voices.clear();
|
|
|
|
LPCWSTR stringToSpeak = (LPCWSTR)audioString.utf16();
|
|
|
|
voice->SetOutput(stream, FALSE);
|
|
voice->Speak(stringToSpeak, SPF_PURGEBEFORESPEAK | SPF_PARSE_SAPI, NULL);
|
|
|
|
voice->Release();
|
|
stream->Release();
|
|
|
|
return ExitSuccess;
|
|
}
|
|
|
|
int PDFToolAudioBook::execute(const PDFToolOptions& options)
|
|
{
|
|
pdf::PDFDocumentTextFlow textFlow;
|
|
int result = getDocumentTextFlow(options, textFlow);
|
|
if (result != ExitSuccess)
|
|
{
|
|
return result;
|
|
}
|
|
|
|
if (textFlow.isEmpty())
|
|
{
|
|
PDFConsole::writeError(PDFToolTranslationContext::tr("No text extracted to be converted to audio book."), options.outputCodec);
|
|
return ErrorNoText;
|
|
}
|
|
|
|
auto comResult = ::CoInitialize(nullptr);
|
|
if (!SUCCEEDED(comResult))
|
|
{
|
|
return ErrorCOM;
|
|
}
|
|
|
|
result = createAudioBook(options, textFlow);
|
|
|
|
::CoUninitialize();
|
|
|
|
return result;
|
|
}
|
|
|
|
PDFToolAbstractApplication::Options PDFToolAudioBook::getOptionsFlags() const
|
|
{
|
|
return ConsoleFormat | OpenDocument | PageSelector | VoiceSelector | TextAnalysis | TextSpeech;
|
|
}
|
|
|
|
} // namespace pdftool
|
|
|
|
#endif
|