very experimental way of PoC for JS-enabled websites scraping
This commit is contained in:
parent
0cbd356c4a
commit
accb478bee
@ -108,9 +108,9 @@ void CookieJar::saveCookies() {
|
||||
sett->endGroup();
|
||||
|
||||
for (const QNetworkCookie& cookie : cookies) {
|
||||
if (cookie.isSessionCookie()) {
|
||||
/*if (cookie.isSessionCookie()) {
|
||||
continue;
|
||||
}
|
||||
}*/
|
||||
sett->setPassword(GROUP(Cookies),
|
||||
QSL("%1-%2").arg(QString::number(i++), QString::fromUtf8(cookie.name())),
|
||||
cookie.toRawForm(QNetworkCookie::RawForm::Full));
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
#include <QTimer>
|
||||
#include <QUrl>
|
||||
#include <QUrlQuery>
|
||||
#include <QWebEngineScript>
|
||||
@ -33,8 +34,12 @@ WebEngineViewer* WebEnginePage::view() const {
|
||||
QString WebEnginePage::pageHtml(const QString& url) {
|
||||
QEventLoop loop;
|
||||
QString html;
|
||||
QTimer tmr;
|
||||
|
||||
connect(this, &WebEnginePage::loadFinished, &loop, &QEventLoop::quit);
|
||||
tmr.setInterval(15000);
|
||||
|
||||
connect(&tmr, &QTimer::timeout, &loop, &QEventLoop::quit);
|
||||
connect(this, &WebEnginePage::loadFinished, &tmr, QOverload<>::of(&QTimer::start));
|
||||
|
||||
load(url);
|
||||
loop.exec();
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "services/standard/standardserviceroot.h"
|
||||
|
||||
#if defined(NO_LITE)
|
||||
#include "gui/webviewers/webengine/webengineviewer.h"
|
||||
#include "network-web/webengine/webenginepage.h"
|
||||
#endif
|
||||
|
||||
@ -297,8 +298,16 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
||||
else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) {
|
||||
#if defined(NO_LITE)
|
||||
WebEnginePage page;
|
||||
WebEngineViewer viewer;
|
||||
|
||||
// NOTE: Viewer must be present or JavaScript just does not run.
|
||||
viewer.setPage(&page);
|
||||
viewer.setAttribute(Qt::WA_DontShowOnScreen);
|
||||
viewer.show();
|
||||
|
||||
feed_contents = page.pageHtml(source).toUtf8();
|
||||
|
||||
// IOFactory::writeFile("a.html", feed_contents);
|
||||
#else
|
||||
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
||||
#endif
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "services/standard/standardserviceentrypoint.h"
|
||||
|
||||
#if defined(NO_LITE)
|
||||
#include "gui/webviewers/webengine/webengineviewer.h"
|
||||
#include "network-web/webengine/webenginepage.h"
|
||||
#endif
|
||||
|
||||
@ -242,9 +243,24 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
||||
else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) {
|
||||
#if defined(NO_LITE)
|
||||
WebEnginePage* page = new WebEnginePage();
|
||||
WebEngineViewer* viewer = nullptr;
|
||||
|
||||
QMetaObject::invokeMethod(
|
||||
qApp,
|
||||
[&] {
|
||||
// NOTE: Must be create on main thread.
|
||||
viewer = new WebEngineViewer();
|
||||
},
|
||||
Qt::ConnectionType::BlockingQueuedConnection);
|
||||
|
||||
viewer->moveToThread(qApp->thread());
|
||||
page->moveToThread(qApp->thread());
|
||||
|
||||
viewer->setPage(page);
|
||||
viewer->setAttribute(Qt::WA_DontShowOnScreen);
|
||||
|
||||
QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection);
|
||||
|
||||
QString html;
|
||||
QMetaObject::invokeMethod(page,
|
||||
"pageHtml",
|
||||
@ -255,6 +271,7 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
||||
feed_contents = html.toUtf8();
|
||||
|
||||
page->deleteLater();
|
||||
viewer->deleteLater();
|
||||
#else
|
||||
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user