make new scraping feature more robust
This commit is contained in:
parent
47714ac590
commit
35b378d8b1
@ -3,7 +3,7 @@ const targetNode = document;
|
|||||||
const waitTimeMs = 5000;
|
const waitTimeMs = 5000;
|
||||||
const idleIdString = "iiddllee";
|
const idleIdString = "iiddllee";
|
||||||
|
|
||||||
var scrollings = 0; // Change to higher number to force some scrollings.
|
var scrollings = 1; // Change to higher number to force some scrollings.
|
||||||
var lastResourceTime = new Date();
|
var lastResourceTime = new Date();
|
||||||
|
|
||||||
// Setup DOM observer and observe for changes in elements only.
|
// Setup DOM observer and observe for changes in elements only.
|
||||||
|
@ -202,37 +202,50 @@ QUrl WebEngineViewer::url() const {
|
|||||||
return QWebEngineView::url();
|
return QWebEngineView::url();
|
||||||
}
|
}
|
||||||
|
|
||||||
QByteArray WebEngineViewer::getJsEnabledHtml(const QString& url) {
|
QByteArray WebEngineViewer::getJsEnabledHtml(const QString& url, bool worker_thread) {
|
||||||
WebEnginePage* page = new WebEnginePage();
|
WebEnginePage* page = new WebEnginePage();
|
||||||
WebEngineViewer* viewer = nullptr;
|
WebEngineViewer* viewer = nullptr;
|
||||||
|
|
||||||
QMetaObject::invokeMethod(
|
if (worker_thread) {
|
||||||
qApp,
|
QMetaObject::invokeMethod(
|
||||||
[&] {
|
qApp,
|
||||||
// NOTE: Must be created on main thread.
|
[&] {
|
||||||
viewer = new WebEngineViewer();
|
// NOTE: Must be created on main thread.
|
||||||
},
|
viewer = new WebEngineViewer();
|
||||||
Qt::ConnectionType::BlockingQueuedConnection);
|
},
|
||||||
|
Qt::ConnectionType::BlockingQueuedConnection);
|
||||||
|
|
||||||
viewer->moveToThread(qApp->thread());
|
viewer->moveToThread(qApp->thread());
|
||||||
page->moveToThread(qApp->thread());
|
page->moveToThread(qApp->thread());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
viewer = new WebEngineViewer();
|
||||||
|
}
|
||||||
|
|
||||||
viewer->setPage(page);
|
viewer->setPage(page);
|
||||||
viewer->setAttribute(Qt::WidgetAttribute::WA_DontShowOnScreen, true);
|
viewer->setAttribute(Qt::WidgetAttribute::WA_DontShowOnScreen, true);
|
||||||
viewer->setAttribute(Qt::WidgetAttribute::WA_DeleteOnClose, true);
|
viewer->setAttribute(Qt::WidgetAttribute::WA_DeleteOnClose, true);
|
||||||
|
|
||||||
QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection);
|
|
||||||
|
|
||||||
QString html;
|
QString html;
|
||||||
QMetaObject::invokeMethod(page,
|
|
||||||
"pageHtml",
|
if (worker_thread) {
|
||||||
Qt::ConnectionType::BlockingQueuedConnection,
|
QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection);
|
||||||
Q_RETURN_ARG(QString, html),
|
QMetaObject::invokeMethod(page,
|
||||||
Q_ARG(QString, url));
|
"pageHtml",
|
||||||
|
Qt::ConnectionType::BlockingQueuedConnection,
|
||||||
|
Q_RETURN_ARG(QString, html),
|
||||||
|
Q_ARG(QString, url));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
viewer->show();
|
||||||
|
html = page->pageHtml(url);
|
||||||
|
}
|
||||||
|
|
||||||
page->deleteLater();
|
page->deleteLater();
|
||||||
viewer->close();
|
viewer->close();
|
||||||
|
|
||||||
|
IOFactory::writeFile("a.html", html.toUtf8());
|
||||||
|
|
||||||
return html.toUtf8();
|
return html.toUtf8();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ class WebEngineViewer : public QWebEngineView, public WebViewer {
|
|||||||
virtual QString html() const;
|
virtual QString html() const;
|
||||||
virtual QUrl url() const;
|
virtual QUrl url() const;
|
||||||
|
|
||||||
static QByteArray getJsEnabledHtml(const QString& url);
|
static QByteArray getJsEnabledHtml(const QString& url, bool worker_thread);
|
||||||
|
|
||||||
signals:
|
signals:
|
||||||
void pageTitleChanged(const QString& new_title);
|
void pageTitleChanged(const QString& new_title);
|
||||||
|
@ -297,7 +297,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
|||||||
}
|
}
|
||||||
else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) {
|
else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) {
|
||||||
#if defined(NO_LITE)
|
#if defined(NO_LITE)
|
||||||
feed_contents = WebEngineViewer::getJsEnabledHtml(source);
|
feed_contents = WebEngineViewer::getJsEnabledHtml(source, false);
|
||||||
#else
|
#else
|
||||||
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
||||||
#endif
|
#endif
|
||||||
|
@ -242,7 +242,7 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
|||||||
}
|
}
|
||||||
else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) {
|
else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) {
|
||||||
#if defined(NO_LITE)
|
#if defined(NO_LITE)
|
||||||
feed_contents = WebEngineViewer::getJsEnabledHtml(f->source());
|
feed_contents = WebEngineViewer::getJsEnabledHtml(f->source(), true);
|
||||||
#else
|
#else
|
||||||
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user