mirror of
https://github.com/JakubMelka/PDF4QT.git
synced 2025-01-27 07:46:08 +01:00
Issue #21: Export table as CSV
This commit is contained in:
parent
e0b67cefde
commit
900745580d
@ -469,6 +469,70 @@ PDFTextSelection PDFTextLayout::createTextSelection(PDFInteger pageIndex, const
|
||||
return selection;
|
||||
}
|
||||
|
||||
PDFTextSelection PDFTextLayout::selectBlock(const size_t blockIndex, PDFInteger pageIndex, QColor color) const
|
||||
{
|
||||
PDFTextSelection selection;
|
||||
if (blockIndex >= m_blocks.size())
|
||||
{
|
||||
return selection;
|
||||
}
|
||||
|
||||
const PDFTextBlock& textBlock = m_blocks[blockIndex];
|
||||
if (textBlock.getLines().empty())
|
||||
{
|
||||
return selection;
|
||||
}
|
||||
|
||||
PDFCharacterPointer ptrA;
|
||||
PDFCharacterPointer ptrB;
|
||||
|
||||
ptrA.pageIndex = pageIndex;
|
||||
ptrA.blockIndex = blockIndex;
|
||||
ptrA.lineIndex = 0;
|
||||
ptrA.characterIndex = 0;
|
||||
|
||||
ptrB.pageIndex = pageIndex;
|
||||
ptrB.blockIndex = blockIndex;
|
||||
ptrB.lineIndex = textBlock.getLines().size() - 1;
|
||||
ptrB.characterIndex = textBlock.getLines().back().getCharacters().size() - 1;
|
||||
|
||||
selection.addItems({ PDFTextSelectionItem(ptrA, ptrB) }, color);
|
||||
selection.build();
|
||||
return selection;
|
||||
}
|
||||
|
||||
PDFTextSelection PDFTextLayout::selectLineInBlock(const size_t blockIndex, const size_t lineIndex, PDFInteger pageIndex, QColor color) const
|
||||
{
|
||||
PDFTextSelection selection;
|
||||
if (blockIndex >= m_blocks.size())
|
||||
{
|
||||
return selection;
|
||||
}
|
||||
|
||||
const PDFTextBlock& textBlock = m_blocks[blockIndex];
|
||||
if (lineIndex >= textBlock.getLines().size())
|
||||
{
|
||||
return selection;
|
||||
}
|
||||
|
||||
PDFCharacterPointer ptrA;
|
||||
PDFCharacterPointer ptrB;
|
||||
|
||||
ptrA.pageIndex = pageIndex;
|
||||
ptrA.blockIndex = blockIndex;
|
||||
ptrA.lineIndex = lineIndex;
|
||||
ptrA.characterIndex = 0;
|
||||
|
||||
ptrB.pageIndex = pageIndex;
|
||||
ptrB.blockIndex = blockIndex;
|
||||
ptrB.lineIndex = lineIndex;
|
||||
ptrB.characterIndex = textBlock.getLines()[lineIndex].getCharacters().size() - 1;
|
||||
|
||||
selection.addItems({ PDFTextSelectionItem(ptrA, ptrB) }, color);
|
||||
selection.build();
|
||||
return selection;
|
||||
}
|
||||
|
||||
QString PDFTextLayout::getTextFromSelection(PDFTextSelection::iterator itBegin, PDFTextSelection::iterator itEnd, PDFInteger pageIndex) const
|
||||
{
|
||||
QStringList text;
|
||||
|
@ -381,6 +381,19 @@ public:
|
||||
/// \param pageIndex Index of the page
|
||||
QString getTextFromSelection(const PDFTextSelection& selection, PDFInteger pageIndex) const;
|
||||
|
||||
/// Creates text selection for whole block
|
||||
/// \param blockIndex Text block index
|
||||
/// \param pageIndex pageIndex
|
||||
/// \param color Selection color
|
||||
PDFTextSelection selectBlock(const size_t blockIndex, PDFInteger pageIndex, QColor color) const;
|
||||
|
||||
/// Creates text selection for signle line of text block
|
||||
/// \param blockIndex Text block index
|
||||
/// \param lineIndex Line index
|
||||
/// \param pageIndex pageIndex
|
||||
/// \param color Selection color
|
||||
PDFTextSelection selectLineInBlock(const size_t blockIndex, const size_t lineIndex, PDFInteger pageIndex, QColor color) const;
|
||||
|
||||
friend QDataStream& operator<<(QDataStream& stream, const PDFTextLayout& layout);
|
||||
friend QDataStream& operator>>(QDataStream& stream, PDFTextLayout& layout);
|
||||
|
||||
|
@ -1372,7 +1372,8 @@ void PDFExtractImageTool::onImagePicked(const QImage& image)
|
||||
PDFSelectTableTool::PDFSelectTableTool(PDFDrawWidgetProxy* proxy, QAction* action, QObject* parent) :
|
||||
BaseClass(proxy, action, parent),
|
||||
m_pickTool(nullptr),
|
||||
m_pageIndex(-1)
|
||||
m_pageIndex(-1),
|
||||
m_isTransposed(false)
|
||||
{
|
||||
m_pickTool = new PDFPickTool(proxy, PDFPickTool::Mode::Rectangles, this);
|
||||
connect(m_pickTool, &PDFPickTool::rectanglePicked, this, &PDFSelectTableTool::onRectanglePicked);
|
||||
@ -1511,6 +1512,141 @@ void PDFSelectTableTool::mouseMoveEvent(QWidget* widget, QMouseEvent* event)
|
||||
}
|
||||
}
|
||||
|
||||
void PDFSelectTableTool::shortcutOverrideEvent(QWidget* widget, QKeyEvent* event)
|
||||
{
|
||||
Q_UNUSED(widget);
|
||||
|
||||
if (event == QKeySequence::Copy)
|
||||
{
|
||||
event->accept();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void PDFSelectTableTool::keyPressEvent(QWidget* widget, QKeyEvent* event)
|
||||
{
|
||||
Q_UNUSED(widget);
|
||||
|
||||
if (event == QKeySequence::Copy ||
|
||||
event->key() == Qt::Key_Return ||
|
||||
event->key() == Qt::Key_Enter)
|
||||
{
|
||||
// Create table cells
|
||||
struct TableCell
|
||||
{
|
||||
size_t row = 0;
|
||||
size_t column = 0;
|
||||
QRectF rectangle;
|
||||
QString text;
|
||||
};
|
||||
|
||||
std::vector<TableCell> tableCells;
|
||||
std::vector<PDFReal> horizontalBreaks = m_horizontalBreaks;
|
||||
std::vector<PDFReal> verticalBreaks = m_verticalBreaks;
|
||||
|
||||
horizontalBreaks.insert(horizontalBreaks.begin(), m_pickedRectangle.left());
|
||||
horizontalBreaks.push_back(m_pickedRectangle.right());
|
||||
|
||||
verticalBreaks.insert(verticalBreaks.begin(), m_pickedRectangle.top());
|
||||
verticalBreaks.push_back(m_pickedRectangle.bottom());
|
||||
|
||||
tableCells.reserve((horizontalBreaks.size() - 1) * (verticalBreaks.size() - 1));
|
||||
for (size_t rowIndex = 1; rowIndex < verticalBreaks.size(); ++rowIndex)
|
||||
{
|
||||
const PDFReal top = verticalBreaks[rowIndex - 1];
|
||||
const PDFReal bottom = verticalBreaks[rowIndex];
|
||||
const PDFReal height = bottom - top;
|
||||
|
||||
for (size_t columnIndex = 1; columnIndex < horizontalBreaks.size(); ++columnIndex)
|
||||
{
|
||||
const PDFReal left = horizontalBreaks[columnIndex - 1];
|
||||
const PDFReal right = horizontalBreaks[columnIndex];
|
||||
const PDFReal width = right - left;
|
||||
|
||||
TableCell cell;
|
||||
cell.row = rowIndex;
|
||||
cell.column = columnIndex;
|
||||
cell.rectangle = QRectF(left, top, width, height);
|
||||
tableCells.push_back(cell);
|
||||
}
|
||||
}
|
||||
|
||||
const PDFTextBlocks& textBlocks = m_textLayout.getTextBlocks();
|
||||
for (size_t i = 0; i < textBlocks.size(); ++i)
|
||||
{
|
||||
// Detect, if whole block can be in some text cell
|
||||
const PDFTextBlock& textBlock = textBlocks[i];
|
||||
QRectF textRect = textBlock.getBoundingBox().boundingRect();
|
||||
auto it = std::find_if(tableCells.begin(), tableCells.end(), [textRect](const auto& cell) { return cell.rectangle.contains(textRect); });
|
||||
if (it != tableCells.end())
|
||||
{
|
||||
// Jakub Melka: whole block is contained in the cell
|
||||
PDFTextSelection blockSelection = m_textLayout.selectBlock(i, m_pageIndex, QColor());
|
||||
QString text = m_textLayout.getTextFromSelection(blockSelection, m_pageIndex);
|
||||
TableCell& cell = *it;
|
||||
cell.text = QString("%1 %2").arg(cell.text, text).trimmed();
|
||||
continue;
|
||||
}
|
||||
|
||||
const PDFTextLines& textLines = textBlock.getLines();
|
||||
for (size_t j = 0; j < textLines.size(); ++j)
|
||||
{
|
||||
const PDFTextLine& textLine = textLines[j];
|
||||
QRectF boundingRect = textLine.getBoundingBox().boundingRect();
|
||||
|
||||
auto it = std::find_if(tableCells.begin(), tableCells.end(), [boundingRect](const auto& cell) { return cell.rectangle.contains(boundingRect); });
|
||||
if (it != tableCells.end())
|
||||
{
|
||||
// Jakub Melka: whole block is contained in the cell
|
||||
PDFTextSelection blockSelection = m_textLayout.selectLineInBlock(i, j, m_pageIndex, QColor());
|
||||
QString text = m_textLayout.getTextFromSelection(blockSelection, m_pageIndex);
|
||||
TableCell& cell = *it;
|
||||
cell.text = QString("%1 %2").arg(cell.text, text).trimmed();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_isTransposed)
|
||||
{
|
||||
auto comparator = [](const TableCell& left, const TableCell right)
|
||||
{
|
||||
return std::make_pair(left.column, left.row) < std::make_pair(right.column, right.row);
|
||||
};
|
||||
std::sort(tableCells.begin(), tableCells.end(), comparator);
|
||||
}
|
||||
|
||||
// Make CSV string
|
||||
QString string;
|
||||
{
|
||||
QTextStream stream(&string, QIODevice::WriteOnly | QIODevice::Text);
|
||||
|
||||
bool isFirst = true;
|
||||
for (const TableCell& tableCell : tableCells)
|
||||
{
|
||||
if ((!m_isTransposed && tableCell.column == 1) ||
|
||||
(m_isTransposed && tableCell.row == 1))
|
||||
{
|
||||
if (isFirst)
|
||||
{
|
||||
isFirst = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
stream << Qt::endl;
|
||||
}
|
||||
}
|
||||
|
||||
stream << tableCell.text << ";";
|
||||
}
|
||||
}
|
||||
QApplication::clipboard()->setText(string);
|
||||
|
||||
setActive(false);
|
||||
event->accept();
|
||||
}
|
||||
}
|
||||
|
||||
void PDFSelectTableTool::setActiveImpl(bool active)
|
||||
{
|
||||
BaseClass::setActiveImpl(active);
|
||||
@ -1526,6 +1662,7 @@ void PDFSelectTableTool::setActiveImpl(bool active)
|
||||
setPickedRectangle(QRectF());
|
||||
setTextLayout(PDFTextLayout());
|
||||
|
||||
m_isTransposed = false;
|
||||
m_horizontalBreaks.clear();
|
||||
m_verticalBreaks.clear();
|
||||
|
||||
@ -1543,6 +1680,26 @@ void PDFSelectTableTool::onRectanglePicked(PDFInteger pageIndex, QRectF pageRect
|
||||
setPageIndex(pageIndex);
|
||||
setPickedRectangle(pageRectangle);
|
||||
setTextLayout(getProxy()->getTextLayoutCompiler()->createTextLayout(pageIndex));
|
||||
|
||||
const PDFPage* page = getDocument()->getCatalog()->getPage(pageIndex);
|
||||
const PageRotation rotation = getPageRotationCombined(page->getPageRotation(), getProxy()->getPageRotation());
|
||||
switch (rotation)
|
||||
{
|
||||
case pdf::PageRotation::None:
|
||||
case pdf::PageRotation::Rotate180:
|
||||
m_isTransposed = false;
|
||||
break;
|
||||
|
||||
case pdf::PageRotation::Rotate90:
|
||||
case pdf::PageRotation::Rotate270:
|
||||
m_isTransposed = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
Q_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
|
||||
autodetectTableGeometry();
|
||||
|
||||
emit messageDisplayRequest(tr("Table region was selected. Use left/right mouse buttons to add/remove rows/columns, then use Enter key to copy the table."), 5000);
|
||||
|
@ -396,6 +396,8 @@ public:
|
||||
const QMatrix& pagePointToDevicePointMatrix,
|
||||
QList<PDFRenderError>& errors) const override;
|
||||
|
||||
virtual void shortcutOverrideEvent(QWidget* widget, QKeyEvent* event) override;
|
||||
virtual void keyPressEvent(QWidget* widget, QKeyEvent* event) override;
|
||||
virtual void mousePressEvent(QWidget* widget, QMouseEvent* event) override;
|
||||
virtual void mouseMoveEvent(QWidget* widget, QMouseEvent* event) override;
|
||||
|
||||
@ -416,6 +418,7 @@ private:
|
||||
PDFInteger m_pageIndex;
|
||||
QRectF m_pickedRectangle;
|
||||
PDFTextLayout m_textLayout;
|
||||
bool m_isTransposed;
|
||||
std::vector<PDFReal> m_horizontalBreaks;
|
||||
std::vector<PDFReal> m_verticalBreaks;
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user