make new scraping feature more robust

This commit is contained in:
Martin Rotter 2024-03-08 08:36:32 +01:00
parent 47714ac590
commit 35b378d8b1
5 changed files with 34 additions and 21 deletions

View File

@ -3,7 +3,7 @@ const targetNode = document;
const waitTimeMs = 5000;
const idleIdString = "iiddllee";
var scrollings = 0; // Change to higher number to force some scrollings.
var scrollings = 1; // Change to higher number to force some scrollings.
var lastResourceTime = new Date();
// Setup DOM observer and observe for changes in elements only.

View File

@ -202,37 +202,50 @@ QUrl WebEngineViewer::url() const {
return QWebEngineView::url();
}
QByteArray WebEngineViewer::getJsEnabledHtml(const QString& url) {
QByteArray WebEngineViewer::getJsEnabledHtml(const QString& url, bool worker_thread) {
WebEnginePage* page = new WebEnginePage();
WebEngineViewer* viewer = nullptr;
QMetaObject::invokeMethod(
qApp,
[&] {
// NOTE: Must be created on main thread.
viewer = new WebEngineViewer();
},
Qt::ConnectionType::BlockingQueuedConnection);
if (worker_thread) {
QMetaObject::invokeMethod(
qApp,
[&] {
// NOTE: Must be created on main thread.
viewer = new WebEngineViewer();
},
Qt::ConnectionType::BlockingQueuedConnection);
viewer->moveToThread(qApp->thread());
page->moveToThread(qApp->thread());
viewer->moveToThread(qApp->thread());
page->moveToThread(qApp->thread());
}
else {
viewer = new WebEngineViewer();
}
viewer->setPage(page);
viewer->setAttribute(Qt::WidgetAttribute::WA_DontShowOnScreen, true);
viewer->setAttribute(Qt::WidgetAttribute::WA_DeleteOnClose, true);
QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection);
QString html;
QMetaObject::invokeMethod(page,
"pageHtml",
Qt::ConnectionType::BlockingQueuedConnection,
Q_RETURN_ARG(QString, html),
Q_ARG(QString, url));
if (worker_thread) {
QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection);
QMetaObject::invokeMethod(page,
"pageHtml",
Qt::ConnectionType::BlockingQueuedConnection,
Q_RETURN_ARG(QString, html),
Q_ARG(QString, url));
}
else {
viewer->show();
html = page->pageHtml(url);
}
page->deleteLater();
viewer->close();
IOFactory::writeFile("a.html", html.toUtf8());
return html.toUtf8();
}

View File

@ -37,7 +37,7 @@ class WebEngineViewer : public QWebEngineView, public WebViewer {
virtual QString html() const;
virtual QUrl url() const;
static QByteArray getJsEnabledHtml(const QString& url);
static QByteArray getJsEnabledHtml(const QString& url, bool worker_thread);
signals:
void pageTitleChanged(const QString& new_title);

View File

@ -297,7 +297,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
}
else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) {
#if defined(NO_LITE)
feed_contents = WebEngineViewer::getJsEnabledHtml(source);
feed_contents = WebEngineViewer::getJsEnabledHtml(source, false);
#else
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
#endif

View File

@ -242,7 +242,7 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
}
else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) {
#if defined(NO_LITE)
feed_contents = WebEngineViewer::getJsEnabledHtml(f->source());
feed_contents = WebEngineViewer::getJsEnabledHtml(f->source(), true);
#else
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
#endif