mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-06-05 21:59:17 +02:00
Parsing X Reference table
This commit is contained in:
148
PdfForQtLib/sources/pdfxreftable.cpp
Normal file
148
PdfForQtLib/sources/pdfxreftable.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
// Copyright (C) 2018 Jakub Melka
|
||||
//
|
||||
// This file is part of PdfForQt.
|
||||
//
|
||||
// PdfForQt is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// PdfForQt is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with PDFForQt. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include "pdfxreftable.h"
|
||||
#include "pdfconstants.h"
|
||||
#include "pdfparser.h"
|
||||
|
||||
#include <stack>
|
||||
|
||||
namespace pdf
|
||||
{
|
||||
|
||||
void PDFXRefTable::readXRefTable(PDFParsingContext* context, const QByteArray& byteArray, PDFInteger startTableOffset)
|
||||
{
|
||||
PDFParser parser(byteArray, context, PDFParser::None);
|
||||
|
||||
m_entries.clear();
|
||||
|
||||
std::set<PDFInteger> processedOffsets;
|
||||
std::stack<PDFInteger> workSet;
|
||||
workSet.push(startTableOffset);
|
||||
|
||||
while (!workSet.empty())
|
||||
{
|
||||
PDFInteger currentOffset = workSet.top();
|
||||
workSet.pop();
|
||||
|
||||
// Check, if we have cyclical references between tables
|
||||
if (processedOffsets.count(currentOffset))
|
||||
{
|
||||
throw PDFParserException(tr("Cyclic reference found in reference table."));
|
||||
}
|
||||
else
|
||||
{
|
||||
processedOffsets.insert(currentOffset);
|
||||
}
|
||||
|
||||
// Now, we are ready to scan the table. Seek to the start of the reference table.
|
||||
parser.seek(currentOffset);
|
||||
|
||||
if (parser.fetchCommand(PDF_XREF_HEADER))
|
||||
{
|
||||
while (!parser.fetchCommand(PDF_XREF_TRAILER))
|
||||
{
|
||||
// Now, first number is start offset, second number is count of table items
|
||||
PDFObject firstObject = parser.getObject();
|
||||
PDFObject countObject = parser.getObject();
|
||||
|
||||
if (!firstObject.isInt() || !countObject.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
|
||||
PDFInteger firstObjectNumber = firstObject.getInteger();
|
||||
PDFInteger count = countObject.getInteger();
|
||||
|
||||
const PDFInteger lastObjectIndex = firstObjectNumber + count - 1;
|
||||
const PDFInteger desiredSize = lastObjectIndex + 1;
|
||||
|
||||
if (static_cast<PDFInteger>(m_entries.size()) < desiredSize)
|
||||
{
|
||||
m_entries.resize(desiredSize);
|
||||
}
|
||||
|
||||
// Now, read the records
|
||||
for (PDFInteger i = 0; i < count; ++i)
|
||||
{
|
||||
const PDFInteger objectNumber = firstObjectNumber + i;
|
||||
|
||||
PDFObject offset = parser.getObject();
|
||||
PDFObject generation = parser.getObject();
|
||||
|
||||
bool occupied = parser.fetchCommand(PDF_XREF_OCCUPIED);
|
||||
if (!occupied && !parser.fetchCommand(PDF_XREF_FREE))
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||
}
|
||||
|
||||
if (!offset.isInt() || !generation.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Bad format of reference table entry."));
|
||||
}
|
||||
|
||||
Entry entry;
|
||||
if (occupied)
|
||||
{
|
||||
entry.reference = PDFObjectReference(objectNumber, generation.getInteger());
|
||||
entry.offset = offset.getInteger();
|
||||
entry.type = EntryType::Occupied;
|
||||
}
|
||||
|
||||
m_entries[objectNumber] = std::move(entry);
|
||||
}
|
||||
}
|
||||
|
||||
PDFObject trailerDictionary = parser.getObject();
|
||||
if (!trailerDictionary.isDictionary())
|
||||
{
|
||||
throw PDFParserException(tr("Trailer dictionary is invalid."));
|
||||
}
|
||||
|
||||
// Now, we have scanned the table. If we didn't have a trailer dictionary yet, then
|
||||
// try to load it. We must also check, that trailer dictionary is OK.
|
||||
if (m_trailerDictionary.isNull())
|
||||
{
|
||||
m_trailerDictionary = trailerDictionary;
|
||||
}
|
||||
|
||||
const PDFDictionary* dictionary = trailerDictionary.getDictionary();
|
||||
if (dictionary->hasKey(PDF_XREF_TRAILER_PREVIOUS))
|
||||
{
|
||||
PDFObject previousOffset = dictionary->get(PDF_XREF_TRAILER_PREVIOUS);
|
||||
|
||||
if (!previousOffset.isInt())
|
||||
{
|
||||
throw PDFParserException(tr("Offset of previous reference table is invalid."));
|
||||
}
|
||||
|
||||
workSet.push(previousOffset.getInteger());
|
||||
}
|
||||
|
||||
if (dictionary->hasKey(PDF_XREF_TRAILER_XREFSTM))
|
||||
{
|
||||
throw PDFParserException(tr("Hybrid reference tables not supported."));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw PDFParserException(tr("Invalid format of reference table."));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf
|
Reference in New Issue
Block a user