very experimental way of PoC for JS-enabled websites scraping
This commit is contained in:
parent
0cbd356c4a
commit
accb478bee
@ -108,9 +108,9 @@ void CookieJar::saveCookies() {
|
|||||||
sett->endGroup();
|
sett->endGroup();
|
||||||
|
|
||||||
for (const QNetworkCookie& cookie : cookies) {
|
for (const QNetworkCookie& cookie : cookies) {
|
||||||
if (cookie.isSessionCookie()) {
|
/*if (cookie.isSessionCookie()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}*/
|
||||||
sett->setPassword(GROUP(Cookies),
|
sett->setPassword(GROUP(Cookies),
|
||||||
QSL("%1-%2").arg(QString::number(i++), QString::fromUtf8(cookie.name())),
|
QSL("%1-%2").arg(QString::number(i++), QString::fromUtf8(cookie.name())),
|
||||||
cookie.toRawForm(QNetworkCookie::RawForm::Full));
|
cookie.toRawForm(QNetworkCookie::RawForm::Full));
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
#include <QTimer>
|
||||||
#include <QUrl>
|
#include <QUrl>
|
||||||
#include <QUrlQuery>
|
#include <QUrlQuery>
|
||||||
#include <QWebEngineScript>
|
#include <QWebEngineScript>
|
||||||
@ -33,8 +34,12 @@ WebEngineViewer* WebEnginePage::view() const {
|
|||||||
QString WebEnginePage::pageHtml(const QString& url) {
|
QString WebEnginePage::pageHtml(const QString& url) {
|
||||||
QEventLoop loop;
|
QEventLoop loop;
|
||||||
QString html;
|
QString html;
|
||||||
|
QTimer tmr;
|
||||||
|
|
||||||
connect(this, &WebEnginePage::loadFinished, &loop, &QEventLoop::quit);
|
tmr.setInterval(15000);
|
||||||
|
|
||||||
|
connect(&tmr, &QTimer::timeout, &loop, &QEventLoop::quit);
|
||||||
|
connect(this, &WebEnginePage::loadFinished, &tmr, QOverload<>::of(&QTimer::start));
|
||||||
|
|
||||||
load(url);
|
load(url);
|
||||||
loop.exec();
|
loop.exec();
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include "services/standard/standardserviceroot.h"
|
#include "services/standard/standardserviceroot.h"
|
||||||
|
|
||||||
#if defined(NO_LITE)
|
#if defined(NO_LITE)
|
||||||
|
#include "gui/webviewers/webengine/webengineviewer.h"
|
||||||
#include "network-web/webengine/webenginepage.h"
|
#include "network-web/webengine/webenginepage.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -297,8 +298,16 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
|||||||
else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) {
|
else if (source_type == StandardFeed::SourceType::EmbeddedBrowser) {
|
||||||
#if defined(NO_LITE)
|
#if defined(NO_LITE)
|
||||||
WebEnginePage page;
|
WebEnginePage page;
|
||||||
|
WebEngineViewer viewer;
|
||||||
|
|
||||||
|
// NOTE: Viewer must be present or JavaScript just does not run.
|
||||||
|
viewer.setPage(&page);
|
||||||
|
viewer.setAttribute(Qt::WA_DontShowOnScreen);
|
||||||
|
viewer.show();
|
||||||
|
|
||||||
feed_contents = page.pageHtml(source).toUtf8();
|
feed_contents = page.pageHtml(source).toUtf8();
|
||||||
|
|
||||||
|
// IOFactory::writeFile("a.html", feed_contents);
|
||||||
#else
|
#else
|
||||||
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
||||||
#endif
|
#endif
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "services/standard/standardserviceentrypoint.h"
|
#include "services/standard/standardserviceentrypoint.h"
|
||||||
|
|
||||||
#if defined(NO_LITE)
|
#if defined(NO_LITE)
|
||||||
|
#include "gui/webviewers/webengine/webengineviewer.h"
|
||||||
#include "network-web/webengine/webenginepage.h"
|
#include "network-web/webengine/webenginepage.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -242,9 +243,24 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
|||||||
else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) {
|
else if (f->sourceType() == StandardFeed::SourceType::EmbeddedBrowser) {
|
||||||
#if defined(NO_LITE)
|
#if defined(NO_LITE)
|
||||||
WebEnginePage* page = new WebEnginePage();
|
WebEnginePage* page = new WebEnginePage();
|
||||||
|
WebEngineViewer* viewer = nullptr;
|
||||||
|
|
||||||
|
QMetaObject::invokeMethod(
|
||||||
|
qApp,
|
||||||
|
[&] {
|
||||||
|
// NOTE: Must be create on main thread.
|
||||||
|
viewer = new WebEngineViewer();
|
||||||
|
},
|
||||||
|
Qt::ConnectionType::BlockingQueuedConnection);
|
||||||
|
|
||||||
|
viewer->moveToThread(qApp->thread());
|
||||||
page->moveToThread(qApp->thread());
|
page->moveToThread(qApp->thread());
|
||||||
|
|
||||||
|
viewer->setPage(page);
|
||||||
|
viewer->setAttribute(Qt::WA_DontShowOnScreen);
|
||||||
|
|
||||||
|
QMetaObject::invokeMethod(viewer, "show", Qt::ConnectionType::BlockingQueuedConnection);
|
||||||
|
|
||||||
QString html;
|
QString html;
|
||||||
QMetaObject::invokeMethod(page,
|
QMetaObject::invokeMethod(page,
|
||||||
"pageHtml",
|
"pageHtml",
|
||||||
@ -255,6 +271,7 @@ QList<Message> StandardServiceRoot::obtainNewMessages(Feed* feed,
|
|||||||
feed_contents = html.toUtf8();
|
feed_contents = html.toUtf8();
|
||||||
|
|
||||||
page->deleteLater();
|
page->deleteLater();
|
||||||
|
viewer->deleteLater();
|
||||||
#else
|
#else
|
||||||
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
throw ApplicationException(tr("this source type cannot be used on 'lite' %1 build").arg(QSL(APP_NAME)));
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user