DocPage Organizer: Joining bookmarks

This commit is contained in:
Jakub Melka 2021-08-07 16:23:37 +02:00
parent 0ba06062e5
commit 8731e02823
12 changed files with 237 additions and 40 deletions

View File

@ -32,6 +32,11 @@ AssembleOutputSettingsDialog::AssembleOutputSettingsDialog(QString directory, QW
ui->setupUi(this);
ui->directoryEdit->setText(directory);
ui->outlineModeComboBox->addItem(tr("No Outline"), int(pdf::PDFDocumentManipulator::OutlineMode::NoOutline));
ui->outlineModeComboBox->addItem(tr("Join Outlines"), int(pdf::PDFDocumentManipulator::OutlineMode::Join));
ui->outlineModeComboBox->addItem(tr("Document Parts"), int(pdf::PDFDocumentManipulator::OutlineMode::DocumentParts));
ui->outlineModeComboBox->setCurrentIndex(ui->outlineModeComboBox->findData(int(pdf::PDFDocumentManipulator::OutlineMode::DocumentParts)));
pdf::PDFWidgetUtils::scaleWidget(this, QSize(450, 150));
}
@ -55,6 +60,11 @@ bool AssembleOutputSettingsDialog::isOverwriteFiles() const
return ui->overwriteFilesCheckBox->isChecked();
}
pdf::PDFDocumentManipulator::OutlineMode AssembleOutputSettingsDialog::getOutlineMode() const
{
return pdf::PDFDocumentManipulator::OutlineMode(ui->outlineModeComboBox->currentData().toInt());
}
void AssembleOutputSettingsDialog::on_selectDirectoryButton_clicked()
{
QString directory = QFileDialog::getExistingDirectory(this, tr("Select output directory"), ui->directoryEdit->text());

View File

@ -18,6 +18,8 @@
#ifndef PDFDOCPAGEORGANIZER_ASSEMBLEOUTPUTSETTINGSDIALOG_H
#define PDFDOCPAGEORGANIZER_ASSEMBLEOUTPUTSETTINGSDIALOG_H
#include "pdfdocumentmanipulator.h"
#include <QDialog>
namespace Ui
@ -39,6 +41,7 @@ public:
QString getDirectory() const;
QString getFileName() const;
bool isOverwriteFiles() const;
pdf::PDFDocumentManipulator::OutlineMode getOutlineMode() const;
private slots:
void on_selectDirectoryButton_clicked();

View File

@ -20,13 +20,6 @@
<string>Assemble Documents</string>
</property>
<layout class="QGridLayout" name="assembleDocumentsGroupBoxLayout">
<item row="1" column="0">
<widget class="QLabel" name="fileTemplateLabel">
<property name="text">
<string>File template</string>
</property>
</widget>
</item>
<item row="0" column="3">
<widget class="QToolButton" name="selectDirectoryButton">
<property name="text">
@ -34,7 +27,17 @@
</property>
</widget>
</item>
<item row="3" column="0" colspan="3">
<item row="0" column="1" colspan="2">
<widget class="QLineEdit" name="directoryEdit"/>
</item>
<item row="1" column="0">
<widget class="QLabel" name="fileTemplateLabel">
<property name="text">
<string>File template</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="3">
<widget class="QLabel" name="infoLabel">
<property name="text">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;In a template file name, you can use symbols '#' for output document number (means output document index, not input document) or '@' for page number of input document (if document contains more pages, it is a page number of a original document), or '%' for index of input document. Use more '#' or '@' or '%' for setting minimal number of digits (if number has less digits, the they are padded with zero).&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
@ -44,6 +47,13 @@
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QCheckBox" name="overwriteFilesCheckBox">
<property name="text">
<string>Overwrite existing files</string>
</property>
</widget>
</item>
<item row="1" column="1" colspan="2">
<widget class="QLineEdit" name="fileTemplateEdit">
<property name="text">
@ -58,16 +68,16 @@
</property>
</widget>
</item>
<item row="0" column="1" colspan="2">
<widget class="QLineEdit" name="directoryEdit"/>
</item>
<item row="2" column="1">
<widget class="QCheckBox" name="overwriteFilesCheckBox">
<item row="2" column="0">
<widget class="QLabel" name="outlineModeLabel">
<property name="text">
<string>Overwrite existing files</string>
<string>Outline Mode</string>
</property>
</widget>
</item>
<item row="2" column="1" colspan="2">
<widget class="QComboBox" name="outlineModeComboBox"/>
</item>
</layout>
</widget>
</item>

View File

@ -621,6 +621,8 @@ void MainWindow::performOperation(Operation operation)
QString directory = dialog.getDirectory();
QString fileNameTemplate = dialog.getFileName();
const bool isOverwriteEnabled = dialog.isOverwriteFiles();
pdf::PDFDocumentManipulator::OutlineMode outlineMode = dialog.getOutlineMode();
manipulator.setOutlineMode(outlineMode);
if (!directory.endsWith('/'))
{

View File

@ -190,7 +190,7 @@ PDFCatalog PDFCatalog::parse(const PDFObject& catalog, const PDFDocument* docume
if (catalogDictionary->hasKey("Outlines"))
{
catalogObject.m_outlineRoot = PDFOutlineItem::parse(document, catalogDictionary->get("Outlines"));
catalogObject.m_outlineRoot = PDFOutlineItem::parse(&document->getStorage(), catalogDictionary->get("Outlines"));
}
if (catalogDictionary->hasKey("OpenAction"))

View File

@ -940,8 +940,10 @@ void PDFDocumentBuilder::setObject(PDFObjectReference reference, PDFObject objec
m_storage.setObject(reference, qMove(object));
}
void PDFDocumentBuilder::createDocumentParts(const std::vector<size_t>& parts)
std::vector<PDFObjectReference> PDFDocumentBuilder::createDocumentParts(const std::vector<size_t>& parts)
{
std::vector<PDFObjectReference> documentParts;
PDFObjectReference root = createDocumentPartRoot();
std::vector<PDFObjectReference> pages = getPages();
@ -950,6 +952,8 @@ void PDFDocumentBuilder::createDocumentParts(const std::vector<size_t>& parts)
objectFactory.beginDictionaryItem("DParts");
objectFactory.beginArray();
documentParts.reserve(parts.size());
size_t start = 0;
for (std::size_t count : parts)
{
@ -967,6 +971,8 @@ void PDFDocumentBuilder::createDocumentParts(const std::vector<size_t>& parts)
setPageDocumentPart(*it, item);
}
documentParts.push_back(item);
objectFactory.beginArray();
objectFactory << item;
objectFactory.endArray();
@ -979,6 +985,7 @@ void PDFDocumentBuilder::createDocumentParts(const std::vector<size_t>& parts)
objectFactory.endDictionary();
mergeTo(root, objectFactory.takeObject());
return documentParts;
}
void PDFDocumentBuilder::mergeNames(PDFObjectReference a, PDFObjectReference b)
@ -1280,11 +1287,27 @@ PDFObjectReference PDFDocumentBuilder::createOutlineItem(const PDFOutlineItem* r
objectBuilder.endDictionaryItem();
// Destination
const PDFActionGoTo* action = dynamic_cast<const PDFActionGoTo*>(root->getAction());
if (action)
const PDFActionGoTo* actionGoTo = dynamic_cast<const PDFActionGoTo*>(root->getAction());
if (actionGoTo)
{
objectBuilder.beginDictionaryItem("Dest");
objectBuilder << action->getDestination();
objectBuilder << actionGoTo->getDestination();
objectBuilder.endDictionaryItem();
}
const PDFActionGoToDp* actionGoToDp = dynamic_cast<const PDFActionGoToDp*>(root->getAction());
if (actionGoToDp)
{
objectBuilder.beginDictionaryItem("A");
objectBuilder.beginDictionary();
objectBuilder.beginDictionaryItem("S");
objectBuilder << WrapName("GoToDp");
objectBuilder.endDictionaryItem();
objectBuilder.beginDictionaryItem("Dp");
objectBuilder << actionGoToDp->getDocumentPart();
objectBuilder.endDictionaryItem();
objectBuilder.endDictionary();
objectBuilder.endDictionaryItem();
}

View File

@ -409,7 +409,8 @@ public:
/// by function \p flattenPageTree. \sa flattenPageTree. Each document
/// part has certain page size, sum of \p parts must equal to page count.
/// \param parts Parts (page count of each document part)
void createDocumentParts(const std::vector<size_t>& parts);
/// \returns List of references to created document parts
std::vector<PDFObjectReference> createDocumentParts(const std::vector<size_t>& parts);
/// Merges two independent 'Names' entry in catalog dictionary. It is used,
/// for example, when documents are being merged.

View File

@ -84,22 +84,130 @@ PDFOperationResult PDFDocumentManipulator::assemble(const AssembledPages& pages)
if (!m_flags.testFlag(SingleDocument))
{
PDFInteger lastDocumentIndex = pages.front().documentIndex;
std::vector<size_t> documentPartPageCounts = { 0 };
struct DocumentPartInfo
{
size_t pageCount = 0;
PDFInteger documentIndex = 0;
bool isWholeDocument = false;
QString caption;
};
std::vector<DocumentPartInfo> documentParts = { DocumentPartInfo() };
PDFClosedIntervalSet pageNumbers;
PDFInteger imageCount = 0;
PDFInteger blankPageCount = 0;
auto addDocumentPartCaption = [&](PDFInteger documentIndex)
{
DocumentPartInfo& info = documentParts.back();
QString documentTitle;
if (documentIndex != -1 && m_documents.count(documentIndex))
{
const PDFDocument* document = m_documents.at(documentIndex);
documentTitle = document->getInfo()->title;
if (documentTitle.isEmpty())
{
documentTitle = tr("Document %1").arg(documentIndex);
}
if (pageNumbers.getTotalLength() < PDFInteger(document->getCatalog()->getPageCount()))
{
documentTitle = tr("%1, p. %2").arg(documentTitle, pageNumbers.toText(true));
}
else
{
info.isWholeDocument = true;
}
}
else if (imageCount > 0 && blankPageCount == 0)
{
documentTitle = tr("%1 Images").arg(imageCount);
}
else
{
documentTitle = tr("%1 Pages").arg(imageCount + blankPageCount);
}
info.caption = documentTitle;
info.documentIndex = documentIndex;
pageNumbers = PDFClosedIntervalSet();
imageCount = 0;
blankPageCount = 0;
};
for (const AssembledPage& page : pages)
{
if (page.documentIndex == lastDocumentIndex)
{
++documentPartPageCounts.back();
++documentParts.back().pageCount;
}
else
{
documentPartPageCounts.push_back(1);
addDocumentPartCaption(lastDocumentIndex);
documentParts.push_back(DocumentPartInfo());
++documentParts.back().pageCount;
lastDocumentIndex = page.documentIndex;
}
}
documentBuilder.createDocumentParts(documentPartPageCounts);
if (page.isDocumentPage())
{
pageNumbers.addValue(page.pageIndex + 1);
}
if (page.isImagePage())
{
++imageCount;
}
if (page.isBlankPage())
{
++blankPageCount;
}
}
addDocumentPartCaption(lastDocumentIndex);
std::vector<size_t> documentPartPageCounts;
std::transform(documentParts.cbegin(), documentParts.cend(), std::back_inserter(documentPartPageCounts), [](const auto& part) { return part.pageCount; });
std::vector<PDFObjectReference> parts = documentBuilder.createDocumentParts(documentPartPageCounts);
if (m_outlineMode != OutlineMode::NoOutline)
{
QSharedPointer<PDFOutlineItem> rootItem(new PDFOutlineItem());
int partIndex = 0;
for (const PDFObjectReference& documentPartReference : parts)
{
const DocumentPartInfo& info = documentParts[partIndex++];
QSharedPointer<PDFOutlineItem> documentPartItem(new PDFOutlineItem);
PDFObjectReference actionGoToPart = documentBuilder.createActionGoToDocumentPart(documentPartReference);
auto action = PDFAction::parse(documentBuilder.getStorage(), documentBuilder.getObjectByReference(actionGoToPart));
documentPartItem->setAction(std::move(action));
documentPartItem->setTitle(info.caption);
documentPartItem->setFontBold(true);
if (m_outlineMode == OutlineMode::Join && info.isWholeDocument)
{
const PDFInteger documentIndex = info.documentIndex;
QSharedPointer<PDFOutlineItem> outline = PDFOutlineItem::parse(documentBuilder.getStorage(), PDFObject::createReference(m_outlines.at(documentIndex)));
if (outline)
{
for (size_t i = 0; i < outline->getChildCount(); ++i)
{
documentPartItem->addChild(outline->getChildPtr(i));
}
}
}
rootItem->addChild(std::move(documentPartItem));
}
documentBuilder.setOutline(rootItem.data());
}
}
pdf::PDFDocument mergedDocument = documentBuilder.build();
@ -286,6 +394,7 @@ PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::collectObjectsAnd
pdf::PDFObjectReference acroFormReference;
pdf::PDFObjectReference namesReference;
pdf::PDFObjectReference ocPropertiesReference;
pdf::PDFObjectReference outlineReference;
pdf::PDFObject formObject = document->getCatalog()->getFormObject();
if (formObject.isReference())
@ -310,6 +419,12 @@ PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::collectObjectsAnd
{
ocPropertiesReference = ocPropertiesObject.getReference();
}
pdf::PDFObject outlineObject = catalogDictionary->get("Outlines");
if (outlineObject.isReference())
{
outlineReference = outlineObject.getReference();
}
}
if (!namesReference.isValid())
@ -322,11 +437,18 @@ PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::collectObjectsAnd
ocPropertiesReference = temporaryBuilder.addObject(pdf::PDFObject());
}
objectsToMerge.insert(objectsToMerge.end(), { acroFormReference, namesReference, ocPropertiesReference });
if (!outlineReference.isValid())
{
outlineReference = temporaryBuilder.addObject(pdf::PDFObject());
}
objectsToMerge.insert(objectsToMerge.end(), { acroFormReference, namesReference, ocPropertiesReference, outlineReference });
// Now, we are ready to merge objects into target document builder
std::vector<pdf::PDFObjectReference> references = pdf::PDFDocumentBuilder::createReferencesFromObjects(documentBuilder.copyFrom(pdf::PDFDocumentBuilder::createObjectsFromReferences(objectsToMerge), *temporaryBuilder.getStorage(), true));
outlineReference = references.back();
references.pop_back();
ocPropertiesReference = references.back();
references.pop_back();
namesReference = references.back();
@ -337,6 +459,7 @@ PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::collectObjectsAnd
documentBuilder.appendTo(m_mergedObjects[MOT_OCProperties], documentBuilder.getObjectByReference(ocPropertiesReference));
documentBuilder.appendTo(m_mergedObjects[MOT_Form], documentBuilder.getObjectByReference(acroFormReference));
documentBuilder.mergeNames(m_mergedObjects[MOT_Names], namesReference);
m_outlines[documentIndex] = outlineReference;
Q_ASSERT(references.size() == std::distance(it, itEnd));
@ -462,4 +585,14 @@ void PDFDocumentManipulator::finalizeDocument(PDFDocument* document)
m_assembledDocument = finalBuilder.build();
}
PDFDocumentManipulator::OutlineMode PDFDocumentManipulator::getOutlineMode() const
{
return m_outlineMode;
}
void PDFDocumentManipulator::setOutlineMode(OutlineMode outlineMode)
{
m_outlineMode = outlineMode;
}
} // namespace pdf

View File

@ -37,6 +37,16 @@ class Pdf4QtLIBSHARED_EXPORT PDFDocumentManipulator
public:
explicit PDFDocumentManipulator() = default;
/// Selects outline creation mode, when multiple documents
/// are merged into one. For single document manipulation,
/// this has no meaning.
enum class OutlineMode
{
NoOutline,
Join,
DocumentParts
};
struct AssembledPage
{
PDFInteger documentIndex = -1; ///< Source document index. If page is not from a document, value is -1.
@ -85,6 +95,9 @@ public:
static constexpr AssembledPage createImagePage(int imageIndex, QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ -1, imageIndex, -1, pageSize, pageRotation}; }
static constexpr AssembledPage createBlankPage(QSizeF pageSize, PageRotation pageRotation) { return AssembledPage{ -1, -1, -1, pageSize, pageRotation}; }
OutlineMode getOutlineMode() const;
void setOutlineMode(OutlineMode outlineMode);
private:
struct ProcessedPage
@ -133,6 +146,8 @@ private:
AssembleFlags m_flags = None;
std::array<PDFObjectReference, MOT_Last> m_mergedObjects = { };
PDFDocument m_assembledDocument;
OutlineMode m_outlineMode = OutlineMode::DocumentParts;
std::map<PDFInteger, PDFObjectReference> m_outlines;
};
} // namespace pdf

View File

@ -35,9 +35,9 @@ size_t PDFOutlineItem::getTotalCount() const
return count;
}
QSharedPointer<PDFOutlineItem> PDFOutlineItem::parse(const PDFDocument* document, const PDFObject& root)
QSharedPointer<PDFOutlineItem> PDFOutlineItem::parse(const PDFObjectStorage* storage, const PDFObject& root)
{
const PDFObject& rootDereferenced = document->getObject(root);
const PDFObject& rootDereferenced = storage->getObject(root);
if (rootDereferenced.isDictionary())
{
const PDFDictionary* dictionary = rootDereferenced.getDictionary();
@ -47,7 +47,7 @@ QSharedPointer<PDFOutlineItem> PDFOutlineItem::parse(const PDFDocument* document
{
QSharedPointer<PDFOutlineItem> result(new PDFOutlineItem());
std::set<PDFObjectReference> visitedOutlineItems;
parseImpl(document, result.get(), first.getReference(), visitedOutlineItems);
parseImpl(storage, result.get(), first.getReference(), visitedOutlineItems);
return result;
}
}
@ -55,7 +55,7 @@ QSharedPointer<PDFOutlineItem> PDFOutlineItem::parse(const PDFDocument* document
return QSharedPointer<PDFOutlineItem>();
}
void PDFOutlineItem::parseImpl(const PDFDocument* document,
void PDFOutlineItem::parseImpl(const PDFObjectStorage* storage,
PDFOutlineItem* parent,
PDFObjectReference currentItem,
std::set<PDFObjectReference>& visitedOutlineItems)
@ -71,24 +71,24 @@ void PDFOutlineItem::parseImpl(const PDFDocument* document,
};
checkCyclicDependence(currentItem);
PDFObject dereferencedItem = document->getObjectByReference(currentItem);
PDFObject dereferencedItem = storage->getObjectByReference(currentItem);
while (dereferencedItem.isDictionary())
{
const PDFDictionary* dictionary = dereferencedItem.getDictionary();
QSharedPointer<PDFOutlineItem> currentOutlineItem(new PDFOutlineItem());
const PDFObject& titleObject = document->getObject(dictionary->get("Title"));
const PDFObject& titleObject = storage->getObject(dictionary->get("Title"));
if (titleObject.isString())
{
currentOutlineItem->setTitle(PDFEncoding::convertTextString(titleObject.getString()));
}
currentOutlineItem->setAction(PDFAction::parse(&document->getStorage(), dictionary->get("A")));
currentOutlineItem->setAction(PDFAction::parse(storage, dictionary->get("A")));
if (!currentOutlineItem->getAction() && dictionary->hasKey("Dest"))
{
currentOutlineItem->setAction(PDFActionPtr(new PDFActionGoTo(PDFDestination::parse(&document->getStorage(), dictionary->get("Dest")), PDFDestination())));
currentOutlineItem->setAction(PDFActionPtr(new PDFActionGoTo(PDFDestination::parse(storage, dictionary->get("Dest")), PDFDestination())));
}
PDFDocumentDataLoaderDecorator loader(document);
PDFDocumentDataLoaderDecorator loader(storage);
std::vector<PDFReal> colors = loader.readNumberArrayFromDictionary(dictionary, "C", { 0.0, 0.0, 0.0 });
colors.resize(3, 0.0);
currentOutlineItem->setTextColor(QColor::fromRgbF(colors[0], colors[1], colors[2]));
@ -105,7 +105,7 @@ void PDFOutlineItem::parseImpl(const PDFDocument* document,
const PDFObject& firstItem = dictionary->get("First");
if (firstItem.isReference())
{
parseImpl(document, currentOutlineItem.get(), firstItem.getReference(), visitedOutlineItems);
parseImpl(storage, currentOutlineItem.get(), firstItem.getReference(), visitedOutlineItems);
}
// Add new child to the parent
@ -116,7 +116,7 @@ void PDFOutlineItem::parseImpl(const PDFDocument* document,
if (nextItem.isReference())
{
checkCyclicDependence(nextItem.getReference());
dereferencedItem = document->getObject(nextItem);
dereferencedItem = storage->getObject(nextItem);
}
else
{

View File

@ -45,7 +45,7 @@ public:
void addChild(QSharedPointer<PDFOutlineItem> child) { m_children.emplace_back(qMove(child)); }
QSharedPointer<PDFOutlineItem> getChildPtr(size_t index) const { return m_children[index]; }
static QSharedPointer<PDFOutlineItem> parse(const PDFDocument* document, const PDFObject& root);
static QSharedPointer<PDFOutlineItem> parse(const PDFObjectStorage* storage, const PDFObject& root);
const PDFAction* getAction() const;
PDFAction* getAction();
@ -66,7 +66,7 @@ public:
void apply(const std::function<void(PDFOutlineItem*)>& functor);
private:
static void parseImpl(const PDFDocument* document,
static void parseImpl(const PDFObjectStorage* storage,
PDFOutlineItem* parent,
PDFObjectReference currentItem,
std::set<PDFObjectReference>& visitedOutlineItems);

View File

@ -135,7 +135,7 @@ PDFDocument PDFRedact::perform(Options options)
{
if (catalogDictionary->hasKey("Outlines"))
{
QSharedPointer<PDFOutlineItem> outlineRoot = PDFOutlineItem::parse(m_document, catalogDictionary->get("Outlines"));
QSharedPointer<PDFOutlineItem> outlineRoot = PDFOutlineItem::parse(&m_document->getStorage(), catalogDictionary->get("Outlines"));
if (outlineRoot)
{