2022-01-30 18:23:25 +01:00
|
|
|
// Copyright (C) 2021-2022 Jakub Melka
|
2021-09-27 11:14:20 +02:00
|
|
|
//
|
|
|
|
// This file is part of PDF4QT.
|
|
|
|
//
|
|
|
|
// PDF4QT is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// with the written consent of the copyright owner, any later version.
|
|
|
|
//
|
|
|
|
// PDF4QT is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with PDF4QT. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
#include "pdfdocumentmanipulator.h"
|
|
|
|
#include "pdfdocumentbuilder.h"
|
|
|
|
#include "pdfoptimizer.h"
|
2022-01-30 18:23:25 +01:00
|
|
|
#include "pdfdbgheap.h"
|
2021-09-27 11:14:20 +02:00
|
|
|
|
|
|
|
namespace pdf
|
|
|
|
{
|
|
|
|
|
|
|
|
PDFOperationResult PDFDocumentManipulator::assemble(const AssembledPages& pages)
|
|
|
|
{
|
|
|
|
if (pages.empty())
|
|
|
|
{
|
|
|
|
return tr("Empty page list.");
|
|
|
|
}
|
|
|
|
|
|
|
|
m_flags = None;
|
|
|
|
m_mergedObjects = { };
|
|
|
|
m_assembledDocument = PDFDocument();
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
classify(pages);
|
|
|
|
|
|
|
|
pdf::PDFDocumentBuilder documentBuilder;
|
|
|
|
if (m_flags.testFlag(SingleDocument))
|
|
|
|
{
|
|
|
|
PDFInteger documentIndex = -1;
|
|
|
|
|
|
|
|
for (const AssembledPage& assembledPage : pages)
|
|
|
|
{
|
|
|
|
if (assembledPage.isDocumentPage())
|
|
|
|
{
|
|
|
|
documentIndex = assembledPage.documentIndex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (documentIndex == -1 || !m_documents.count(documentIndex))
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Invalid document."));
|
|
|
|
}
|
|
|
|
|
|
|
|
documentBuilder.setDocument(m_documents.at(documentIndex));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
documentBuilder.createDocument();
|
|
|
|
}
|
|
|
|
|
|
|
|
initializeMergedObjects(documentBuilder);
|
|
|
|
|
|
|
|
ProcessedPages processedPages = processPages(documentBuilder, pages);
|
|
|
|
std::vector<PDFObjectReference> adjustedPages;
|
|
|
|
std::transform(processedPages.cbegin(), processedPages.cend(), std::back_inserter(adjustedPages), [](const auto& page) { return page.targetPageReference; });
|
|
|
|
documentBuilder.setPages(adjustedPages);
|
|
|
|
|
|
|
|
// Correct page tree (invalid parents are present)
|
|
|
|
documentBuilder.flattenPageTree();
|
|
|
|
if (!m_flags.testFlag(SingleDocument) || m_flags.testFlag(RemovedPages))
|
|
|
|
{
|
|
|
|
documentBuilder.removeOutline();
|
|
|
|
documentBuilder.removeThreads();
|
|
|
|
documentBuilder.removeDocumentActions();
|
|
|
|
documentBuilder.removeStructureTree();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Jakub Melka: we also create document parts for each document part (if we aren't
|
|
|
|
// manipulating a single document).
|
|
|
|
if (!m_flags.testFlag(SingleDocument))
|
|
|
|
{
|
|
|
|
addOutlineAndDocumentParts(documentBuilder, pages, adjustedPages);
|
|
|
|
}
|
|
|
|
|
|
|
|
pdf::PDFDocument mergedDocument = documentBuilder.build();
|
|
|
|
|
|
|
|
// Optimize document - remove unused objects and shrink object storage
|
|
|
|
finalizeDocument(&mergedDocument);
|
|
|
|
}
|
2021-12-14 19:28:38 +01:00
|
|
|
catch (const PDFException& exception)
|
2021-09-27 11:14:20 +02:00
|
|
|
{
|
|
|
|
return exception.getMessage();
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-10-13 17:51:16 +02:00
|
|
|
PDFDocumentManipulator::AssembledPages PDFDocumentManipulator::createAllDocumentPages(int documentIndex, const PDFDocument* document)
|
|
|
|
{
|
|
|
|
AssembledPages assembledPages;
|
|
|
|
size_t pageCount = document->getCatalog()->getPageCount();
|
|
|
|
|
|
|
|
for (size_t i = 0; i < pageCount; ++i)
|
|
|
|
{
|
|
|
|
pdf::PDFDocumentManipulator::AssembledPage assembledPage;
|
|
|
|
|
|
|
|
assembledPage.documentIndex = documentIndex;
|
|
|
|
assembledPage.imageIndex = -1;
|
|
|
|
assembledPage.pageIndex = i;
|
|
|
|
|
|
|
|
const pdf::PDFPage* page = document->getCatalog()->getPage(i);
|
|
|
|
const pdf::PageRotation originalPageRotation = page->getPageRotation();
|
|
|
|
|
|
|
|
assembledPage.pageRotation = originalPageRotation;
|
|
|
|
assembledPage.pageSize = page->getMediaBox().size();
|
|
|
|
|
|
|
|
assembledPages.emplace_back(assembledPage);
|
|
|
|
}
|
|
|
|
|
|
|
|
return assembledPages;
|
|
|
|
}
|
|
|
|
|
2021-09-27 11:14:20 +02:00
|
|
|
PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::processPages(PDFDocumentBuilder& documentBuilder, const AssembledPages& pages)
|
|
|
|
{
|
|
|
|
ProcessedPages processedPages;
|
|
|
|
|
|
|
|
// First, decide, if we are manipulating a single document, or
|
|
|
|
// an array of documents. If the former is the case, then we do not
|
|
|
|
// want to copy objects, it is just unnecessary. If the latter is the case,
|
|
|
|
// then we must
|
|
|
|
|
|
|
|
if (m_flags.testFlag(SingleDocument))
|
|
|
|
{
|
|
|
|
documentBuilder.flattenPageTree();
|
|
|
|
std::vector<PDFObjectReference> pageReferences = documentBuilder.getPages();
|
|
|
|
std::set<PDFObjectReference> usedPages;
|
|
|
|
|
|
|
|
processedPages.reserve(pageReferences.size());
|
|
|
|
for (const AssembledPage& assembledPage : pages)
|
|
|
|
{
|
|
|
|
ProcessedPage processedPage;
|
|
|
|
processedPage.assembledPage = assembledPage;
|
|
|
|
|
|
|
|
if (assembledPage.isDocumentPage())
|
|
|
|
{
|
|
|
|
const PDFInteger pageIndex = assembledPage.pageIndex;
|
|
|
|
|
|
|
|
if (pageIndex < 0 || pageIndex >= PDFInteger(pageReferences.size()))
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Missing page (%1) in a document.").arg(pageIndex));
|
|
|
|
}
|
|
|
|
|
|
|
|
PDFObjectReference pageReference = pageReferences[pageIndex];
|
|
|
|
if (!usedPages.count(pageReference))
|
|
|
|
{
|
|
|
|
processedPage.targetPageReference = pageReference;
|
|
|
|
usedPages.insert(pageReference);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Page is being cloned. So we must clone it...
|
|
|
|
std::vector<pdf::PDFObjectReference> references = pdf::PDFDocumentBuilder::createReferencesFromObjects(documentBuilder.copyFrom(pdf::PDFDocumentBuilder::createObjectsFromReferences({ pageReference }), *documentBuilder.getStorage(), true));
|
|
|
|
Q_ASSERT(references.size() == 1);
|
|
|
|
processedPage.targetPageReference = references.front();
|
|
|
|
usedPages.insert(references.front());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
processedPages.push_back(processedPage);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
processedPages = collectObjectsAndCopyPages(documentBuilder, pages);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now, create "special" pages, such as image pages or blank pages, and rotate
|
|
|
|
// final pages (we must check, that page object exists).
|
|
|
|
for (ProcessedPage& processedPage : processedPages)
|
|
|
|
{
|
|
|
|
if (processedPage.assembledPage.isBlankPage() || processedPage.assembledPage.isImagePage())
|
|
|
|
{
|
|
|
|
QImage image;
|
|
|
|
|
|
|
|
if (processedPage.assembledPage.isImagePage())
|
|
|
|
{
|
|
|
|
const PDFInteger imageIndex = processedPage.assembledPage.imageIndex;
|
|
|
|
|
|
|
|
if (!m_images.count(imageIndex))
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Missing image."));
|
|
|
|
}
|
|
|
|
|
|
|
|
image = m_images.at(imageIndex);
|
|
|
|
|
|
|
|
if (image.isNull())
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Missing image."));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
QRectF pageRect = QRectF(QPointF(0, 0), processedPage.assembledPage.pageSize * PDF_MM_TO_POINT);
|
|
|
|
processedPage.targetPageReference = documentBuilder.appendPage(pageRect);
|
|
|
|
PDFPageContentStreamBuilder contentStreamBuilder(&documentBuilder);
|
|
|
|
|
|
|
|
QPainter* painter = contentStreamBuilder.begin(processedPage.targetPageReference);
|
|
|
|
|
|
|
|
if (processedPage.assembledPage.isImagePage())
|
|
|
|
{
|
|
|
|
// Just paint the image
|
|
|
|
painter->drawImage(pageRect, image);
|
|
|
|
}
|
|
|
|
|
|
|
|
contentStreamBuilder.end(painter);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!processedPage.targetPageReference.isValid())
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Error occured during page creation."));
|
|
|
|
}
|
|
|
|
|
|
|
|
documentBuilder.setPageRotation(processedPage.targetPageReference, processedPage.assembledPage.pageRotation);
|
|
|
|
}
|
|
|
|
|
|
|
|
return processedPages;
|
|
|
|
}
|
|
|
|
|
|
|
|
PDFDocumentManipulator::ProcessedPages PDFDocumentManipulator::collectObjectsAndCopyPages(PDFDocumentBuilder& documentBuilder, const AssembledPages& pages)
|
|
|
|
{
|
|
|
|
ProcessedPages processedPages;
|
|
|
|
processedPages.reserve(pages.size());
|
|
|
|
|
|
|
|
std::map<std::pair<int, int>, PDFObjectReference> documentPages;
|
|
|
|
|
|
|
|
for (const AssembledPage& assembledPage : pages)
|
|
|
|
{
|
|
|
|
ProcessedPage processedPage;
|
|
|
|
processedPage.assembledPage = assembledPage;
|
|
|
|
processedPages.push_back(processedPage);
|
|
|
|
|
|
|
|
if (assembledPage.isDocumentPage())
|
|
|
|
{
|
|
|
|
documentPages[std::make_pair(assembledPage.documentIndex, assembledPage.pageIndex)] = PDFObjectReference();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto it = documentPages.begin(); it != documentPages.end();)
|
|
|
|
{
|
|
|
|
const int documentIndex = it->first.first;
|
|
|
|
|
|
|
|
// Jakub Melka: we will find end of a single document page range
|
|
|
|
auto itEnd = it;
|
|
|
|
while (itEnd != documentPages.end() && itEnd->first.first == documentIndex)
|
|
|
|
{
|
|
|
|
++itEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (documentIndex != -1)
|
|
|
|
{
|
|
|
|
// Check we have the document
|
|
|
|
if (!m_documents.count(documentIndex))
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Invalid document."));
|
|
|
|
}
|
|
|
|
const PDFDocument* document = m_documents.at(documentIndex);
|
|
|
|
|
|
|
|
// Copy the pages into the target document builder
|
|
|
|
std::vector<PDFInteger> pageIndices;
|
|
|
|
for (auto currentIt = it; currentIt != itEnd; ++currentIt)
|
|
|
|
{
|
|
|
|
pageIndices.push_back(currentIt->first.second);
|
|
|
|
}
|
|
|
|
|
|
|
|
pdf::PDFDocumentBuilder temporaryBuilder(document);
|
|
|
|
temporaryBuilder.flattenPageTree();
|
|
|
|
|
|
|
|
std::vector<pdf::PDFObjectReference> currentPages = temporaryBuilder.getPages();
|
|
|
|
std::vector<pdf::PDFObjectReference> objectsToMerge;
|
|
|
|
objectsToMerge.reserve(std::distance(it, itEnd));
|
|
|
|
|
|
|
|
for (int pageIndex : pageIndices)
|
|
|
|
{
|
|
|
|
if (pageIndex < 0 || pageIndex >= currentPages.size())
|
|
|
|
{
|
|
|
|
throw PDFException(tr("Missing page (%1) in a document.").arg(pageIndex));
|
|
|
|
}
|
|
|
|
|
|
|
|
objectsToMerge.push_back(currentPages[pageIndex]);
|
|
|
|
}
|
|
|
|
|
|
|
|
pdf::PDFObjectReference acroFormReference;
|
|
|
|
pdf::PDFObjectReference namesReference;
|
|
|
|
pdf::PDFObjectReference ocPropertiesReference;
|
|
|
|
pdf::PDFObjectReference outlineReference;
|
|
|
|
|
|
|
|
pdf::PDFObject formObject = document->getCatalog()->getFormObject();
|
|
|
|
if (formObject.isReference())
|
|
|
|
{
|
|
|
|
acroFormReference = formObject.getReference();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
acroFormReference = temporaryBuilder.addObject(formObject);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const pdf::PDFDictionary* catalogDictionary = temporaryBuilder.getDictionaryFromObject(temporaryBuilder.getObjectByReference(temporaryBuilder.getCatalogReference())))
|
|
|
|
{
|
|
|
|
pdf::PDFObject namesObject = catalogDictionary->get("Names");
|
|
|
|
if (namesObject.isReference())
|
|
|
|
{
|
|
|
|
namesReference = namesObject.getReference();
|
|
|
|
}
|
|
|
|
|
|
|
|
pdf::PDFObject ocPropertiesObject = catalogDictionary->get("OCProperties");
|
|
|
|
if (ocPropertiesObject.isReference())
|
|
|
|
{
|
|
|
|
ocPropertiesReference = ocPropertiesObject.getReference();
|
|
|
|
}
|
|
|
|
|
|
|
|
pdf::PDFObject outlineObject = catalogDictionary->get("Outlines");
|
|
|
|
if (outlineObject.isReference())
|
|
|
|
{
|
|
|
|
outlineReference = outlineObject.getReference();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!namesReference.isValid())
|
|
|
|
{
|
|
|
|
namesReference = temporaryBuilder.addObject(pdf::PDFObject());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ocPropertiesReference.isValid())
|
|
|
|
{
|
|
|
|
ocPropertiesReference = temporaryBuilder.addObject(pdf::PDFObject());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!outlineReference.isValid())
|
|
|
|
{
|
|
|
|
outlineReference = temporaryBuilder.addObject(pdf::PDFObject());
|
|
|
|
}
|
|
|
|
|
|
|
|
objectsToMerge.insert(objectsToMerge.end(), { acroFormReference, namesReference, ocPropertiesReference, outlineReference });
|
|
|
|
|
|
|
|
// Now, we are ready to merge objects into target document builder
|
|
|
|
std::vector<pdf::PDFObjectReference> references = pdf::PDFDocumentBuilder::createReferencesFromObjects(documentBuilder.copyFrom(pdf::PDFDocumentBuilder::createObjectsFromReferences(objectsToMerge), *temporaryBuilder.getStorage(), true));
|
|
|
|
|
|
|
|
outlineReference = references.back();
|
|
|
|
references.pop_back();
|
|
|
|
ocPropertiesReference = references.back();
|
|
|
|
references.pop_back();
|
|
|
|
namesReference = references.back();
|
|
|
|
references.pop_back();
|
|
|
|
acroFormReference = references.back();
|
|
|
|
references.pop_back();
|
|
|
|
|
|
|
|
documentBuilder.appendTo(m_mergedObjects[MOT_OCProperties], documentBuilder.getObjectByReference(ocPropertiesReference));
|
|
|
|
documentBuilder.appendTo(m_mergedObjects[MOT_Form], documentBuilder.getObjectByReference(acroFormReference));
|
|
|
|
documentBuilder.mergeNames(m_mergedObjects[MOT_Names], namesReference);
|
|
|
|
m_outlines[documentIndex] = outlineReference;
|
|
|
|
|
2022-07-31 18:32:57 +02:00
|
|
|
Q_ASSERT(references.size() == size_t(std::distance(it, itEnd)));
|
2021-09-27 11:14:20 +02:00
|
|
|
|
|
|
|
auto referenceIt = references.begin();
|
|
|
|
for (auto currentIt = it; currentIt != itEnd; ++currentIt, ++referenceIt)
|
|
|
|
{
|
|
|
|
currentIt->second = *referenceIt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance the index
|
|
|
|
it = itEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::set<PDFObjectReference> usedReferences;
|
|
|
|
for (ProcessedPage& processedPage : processedPages)
|
|
|
|
{
|
|
|
|
if (processedPage.assembledPage.isDocumentPage())
|
|
|
|
{
|
|
|
|
auto key = std::make_pair(processedPage.assembledPage.documentIndex, processedPage.assembledPage.pageIndex);
|
|
|
|
Q_ASSERT(documentPages.count(key));
|
|
|
|
|
|
|
|
PDFObjectReference pageReference = documentPages.at(key);
|
|
|
|
if (!usedReferences.count(pageReference))
|
|
|
|
{
|
|
|
|
processedPage.targetPageReference = pageReference;
|
|
|
|
usedReferences.insert(pageReference);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Page is being cloned. So we must clone it...
|
|
|
|
std::vector<pdf::PDFObjectReference> references = pdf::PDFDocumentBuilder::createReferencesFromObjects(documentBuilder.copyFrom(pdf::PDFDocumentBuilder::createObjectsFromReferences({ pageReference }), *documentBuilder.getStorage(), true));
|
|
|
|
Q_ASSERT(references.size() == 1);
|
|
|
|
processedPage.targetPageReference = references.front();
|
|
|
|
usedReferences.insert(references.front());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return processedPages;
|
|
|
|
}
|
|
|
|
|
|
|
|
void PDFDocumentManipulator::classify(const AssembledPages& pages)
|
|
|
|
{
|
|
|
|
m_flags = None;
|
|
|
|
|
|
|
|
std::set<PDFInteger> documentIndices;
|
|
|
|
std::set<PDFInteger> pageIndices;
|
|
|
|
for (const AssembledPage& assembledPage : pages)
|
|
|
|
{
|
|
|
|
documentIndices.insert(assembledPage.documentIndex);
|
|
|
|
pageIndices.insert(assembledPage.pageIndex);
|
|
|
|
}
|
|
|
|
|
|
|
|
documentIndices.erase(-1);
|
|
|
|
pageIndices.erase(-1);
|
|
|
|
|
|
|
|
m_flags.setFlag(SingleDocument, documentIndices.size() == 1);
|
|
|
|
|
|
|
|
if (m_flags.testFlag(SingleDocument) && m_documents.count(*documentIndices.begin()))
|
|
|
|
{
|
|
|
|
const PDFDocument* document = m_documents.at(*documentIndices.begin());
|
|
|
|
const bool pagesRemoved = pageIndices.size() < document->getCatalog()->getPageCount();
|
|
|
|
m_flags.setFlag(RemovedPages, pagesRemoved);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void PDFDocumentManipulator::initializeMergedObjects(PDFDocumentBuilder& documentBuilder)
|
|
|
|
{
|
|
|
|
m_mergedObjects[MOT_OCProperties] = documentBuilder.addObject(PDFObject());
|
|
|
|
m_mergedObjects[MOT_Form] = documentBuilder.addObject(PDFObject());
|
|
|
|
m_mergedObjects[MOT_Names] = documentBuilder.addObject(PDFObject());
|
|
|
|
}
|
|
|
|
|
|
|
|
void PDFDocumentManipulator::finalizeMergedObjects(PDFDocumentBuilder& documentBuilder)
|
|
|
|
{
|
|
|
|
if (!m_flags.testFlag(SingleDocument))
|
|
|
|
{
|
|
|
|
if (!documentBuilder.getObjectByReference(m_mergedObjects[MOT_OCProperties]).isNull())
|
|
|
|
{
|
|
|
|
documentBuilder.setCatalogOptionalContentProperties(m_mergedObjects[MOT_OCProperties]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!documentBuilder.getObjectByReference(m_mergedObjects[MOT_Names]).isNull())
|
|
|
|
{
|
|
|
|
documentBuilder.setCatalogNames(m_mergedObjects[MOT_Names]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!documentBuilder.getObjectByReference(m_mergedObjects[MOT_Form]).isNull())
|
|
|
|
{
|
|
|
|
documentBuilder.setCatalogAcroForm(m_mergedObjects[MOT_Form]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void PDFDocumentManipulator::finalizeDocument(PDFDocument* document)
|
|
|
|
{
|
|
|
|
auto optimizationFlags = pdf::PDFOptimizer::OptimizationFlags(PDFOptimizer::RemoveUnusedObjects |
|
|
|
|
PDFOptimizer::ShrinkObjectStorage |
|
|
|
|
PDFOptimizer::DereferenceSimpleObjects |
|
|
|
|
PDFOptimizer::MergeIdenticalObjects);
|
|
|
|
PDFOptimizer optimizer(optimizationFlags, nullptr);
|
|
|
|
optimizer.setDocument(document);
|
|
|
|
optimizer.optimize();
|
|
|
|
PDFDocument mergedDocument = optimizer.takeOptimizedDocument();
|
|
|
|
|
|
|
|
// We must adjust some objects - they can have merged objects
|
|
|
|
pdf::PDFDocumentBuilder finalBuilder(&mergedDocument);
|
|
|
|
if (const pdf::PDFDictionary* dictionary = finalBuilder.getDictionaryFromObject(finalBuilder.getObjectByReference(finalBuilder.getCatalogReference())))
|
|
|
|
{
|
|
|
|
pdf::PDFDocumentDataLoaderDecorator loader(finalBuilder.getStorage());
|
|
|
|
pdf::PDFObjectReference ocPropertiesReference = loader.readReferenceFromDictionary(dictionary, "OCProperties");
|
|
|
|
if (ocPropertiesReference.isValid())
|
|
|
|
{
|
|
|
|
finalBuilder.setObject(ocPropertiesReference, pdf::PDFObjectManipulator::removeDuplicitReferencesInArrays(finalBuilder.getObjectByReference(ocPropertiesReference)));
|
|
|
|
}
|
|
|
|
pdf::PDFObjectReference acroFormReference = loader.readReferenceFromDictionary(dictionary, "AcroForm");
|
|
|
|
if (acroFormReference.isValid())
|
|
|
|
{
|
|
|
|
finalBuilder.setObject(acroFormReference, pdf::PDFObjectManipulator::removeDuplicitReferencesInArrays(finalBuilder.getObjectByReference(acroFormReference)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
m_assembledDocument = finalBuilder.build();
|
|
|
|
}
|
|
|
|
|
|
|
|
void PDFDocumentManipulator::addOutlineAndDocumentParts(PDFDocumentBuilder& documentBuilder,
|
|
|
|
const AssembledPages& pages,
|
|
|
|
const std::vector<PDFObjectReference>& adjustedPages)
|
|
|
|
{
|
|
|
|
PDFInteger lastDocumentIndex = pages.front().documentIndex;
|
|
|
|
|
|
|
|
struct DocumentPartInfo
|
|
|
|
{
|
|
|
|
size_t pageCount = 0;
|
|
|
|
PDFInteger documentIndex = 0;
|
|
|
|
bool isWholeDocument = false;
|
|
|
|
QString caption;
|
|
|
|
PDFObjectReference firstPageReference;
|
|
|
|
};
|
|
|
|
std::vector<DocumentPartInfo> documentParts = { DocumentPartInfo() };
|
|
|
|
|
|
|
|
PDFClosedIntervalSet pageNumbers;
|
|
|
|
PDFInteger imageCount = 0;
|
|
|
|
PDFInteger blankPageCount = 0;
|
|
|
|
PDFInteger totalPageCount = 0;
|
|
|
|
|
|
|
|
auto addDocumentPartCaption = [&](PDFInteger documentIndex)
|
|
|
|
{
|
|
|
|
DocumentPartInfo& info = documentParts.back();
|
|
|
|
|
|
|
|
QString documentTitle;
|
|
|
|
if (documentIndex != -1 && m_documents.count(documentIndex))
|
|
|
|
{
|
|
|
|
const PDFDocument* document = m_documents.at(documentIndex);
|
|
|
|
documentTitle = document->getInfo()->title;
|
|
|
|
if (documentTitle.isEmpty())
|
|
|
|
{
|
|
|
|
documentTitle = tr("Document %1").arg(documentIndex);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pageNumbers.getTotalLength() < PDFInteger(document->getCatalog()->getPageCount()))
|
|
|
|
{
|
|
|
|
documentTitle = tr("%1, p. %2").arg(documentTitle, pageNumbers.toText(true));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
info.isWholeDocument = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (imageCount > 0 && blankPageCount == 0)
|
|
|
|
{
|
|
|
|
documentTitle = tr("%1 Images").arg(imageCount);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
documentTitle = tr("%1 Pages").arg(imageCount + blankPageCount);
|
|
|
|
}
|
|
|
|
|
|
|
|
info.caption = documentTitle;
|
|
|
|
info.documentIndex = documentIndex;
|
|
|
|
info.firstPageReference = (totalPageCount < PDFInteger(adjustedPages.size())) ? adjustedPages[totalPageCount] : PDFObjectReference();
|
|
|
|
|
|
|
|
pageNumbers = PDFClosedIntervalSet();
|
|
|
|
imageCount = 0;
|
|
|
|
blankPageCount = 0;
|
|
|
|
totalPageCount += info.pageCount;
|
|
|
|
};
|
|
|
|
|
|
|
|
for (const AssembledPage& page : pages)
|
|
|
|
{
|
|
|
|
if (page.documentIndex == lastDocumentIndex)
|
|
|
|
{
|
|
|
|
++documentParts.back().pageCount;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
addDocumentPartCaption(lastDocumentIndex);
|
|
|
|
documentParts.push_back(DocumentPartInfo());
|
|
|
|
++documentParts.back().pageCount;
|
|
|
|
lastDocumentIndex = page.documentIndex;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (page.isDocumentPage())
|
|
|
|
{
|
|
|
|
pageNumbers.addValue(page.pageIndex + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (page.isImagePage())
|
|
|
|
{
|
|
|
|
++imageCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (page.isBlankPage())
|
|
|
|
{
|
|
|
|
++blankPageCount;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
addDocumentPartCaption(lastDocumentIndex);
|
|
|
|
|
|
|
|
std::vector<size_t> documentPartPageCounts;
|
|
|
|
std::transform(documentParts.cbegin(), documentParts.cend(), std::back_inserter(documentPartPageCounts), [](const auto& part) { return part.pageCount; });
|
|
|
|
documentBuilder.createDocumentParts(documentPartPageCounts);
|
|
|
|
|
|
|
|
if (m_outlineMode != OutlineMode::NoOutline)
|
|
|
|
{
|
|
|
|
QSharedPointer<PDFOutlineItem> rootItem(new PDFOutlineItem());
|
|
|
|
|
|
|
|
for (const DocumentPartInfo& info : documentParts)
|
|
|
|
{
|
|
|
|
QSharedPointer<PDFOutlineItem> documentPartItem(new PDFOutlineItem);
|
|
|
|
documentPartItem->setAction(PDFActionPtr(new PDFActionGoTo(PDFDestination::createFit(info.firstPageReference), PDFDestination())));
|
|
|
|
documentPartItem->setTitle(info.caption);
|
|
|
|
documentPartItem->setFontBold(true);
|
|
|
|
|
|
|
|
if (m_outlineMode == OutlineMode::Join && info.isWholeDocument)
|
|
|
|
{
|
|
|
|
const PDFInteger documentIndex = info.documentIndex;
|
|
|
|
QSharedPointer<PDFOutlineItem> outline = PDFOutlineItem::parse(documentBuilder.getStorage(), PDFObject::createReference(m_outlines.at(documentIndex)));
|
|
|
|
if (outline)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < outline->getChildCount(); ++i)
|
|
|
|
{
|
|
|
|
documentPartItem->addChild(outline->getChildPtr(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
rootItem->addChild(std::move(documentPartItem));
|
|
|
|
}
|
|
|
|
|
|
|
|
documentBuilder.setOutline(rootItem.data());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PDFDocumentManipulator::OutlineMode PDFDocumentManipulator::getOutlineMode() const
|
|
|
|
{
|
|
|
|
return m_outlineMode;
|
|
|
|
}
|
|
|
|
|
|
|
|
void PDFDocumentManipulator::setOutlineMode(OutlineMode outlineMode)
|
|
|
|
{
|
|
|
|
m_outlineMode = outlineMode;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace pdf
|