starting to work on fetching full article content
This commit is contained in:
parent
d8525c3a93
commit
c7c069d889
@ -26,6 +26,7 @@
|
||||
|
||||
<file>scripts/adblock/adblock-server.js</file>
|
||||
<file>scripts/readability/readabilize-article.js</file>
|
||||
<file>scripts/article-extractor/extract-article.mjs</file>
|
||||
|
||||
<file>scripts/filters/blacklist.js</file>
|
||||
<file>scripts/filters/whitelist.js</file>
|
||||
|
11
resources/scripts/article-extractor/extract-article.mjs
Normal file
11
resources/scripts/article-extractor/extract-article.mjs
Normal file
@ -0,0 +1,11 @@
|
||||
import { extract } from '@extractus/article-extractor'
|
||||
|
||||
const input = process.argv[2];
|
||||
|
||||
try {
|
||||
const article = await extract(input);
|
||||
console.log(JSON.stringify(article));
|
||||
}
|
||||
catch (err) {
|
||||
console.error(err);
|
||||
}
|
@ -253,6 +253,8 @@ set(SOURCES
|
||||
network-web/adblock/adblockrequestinfo.h
|
||||
network-web/apiserver.cpp
|
||||
network-web/apiserver.h
|
||||
network-web/articleparse.cpp
|
||||
network-web/articleparse.h
|
||||
network-web/basenetworkaccessmanager.cpp
|
||||
network-web/basenetworkaccessmanager.h
|
||||
network-web/cookiejar.cpp
|
||||
|
@ -12,10 +12,12 @@
|
||||
#include "miscellaneous/application.h"
|
||||
#include "miscellaneous/iconfactory.h"
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "network-web/articleparse.h"
|
||||
#include "network-web/networkfactory.h"
|
||||
#include "network-web/readability.h"
|
||||
#include "network-web/webfactory.h"
|
||||
|
||||
#include <QJsonObject>
|
||||
#include <QKeyEvent>
|
||||
#include <QProgressBar>
|
||||
#include <QScrollBar>
|
||||
@ -37,7 +39,10 @@ WebBrowser::WebBrowser(WebViewer* viewer, QWidget* parent)
|
||||
#endif
|
||||
m_actionReadabilePage(new QAction(qApp->icons()->fromTheme(QSL("text-html")),
|
||||
tr("View website in reader mode"),
|
||||
this)) {
|
||||
this)),
|
||||
m_actionGetFullArticle(new QAction(qApp->icons()->fromTheme(QSL("download"), QSL("browser-download")),
|
||||
tr("Load full source article"),
|
||||
this)) {
|
||||
if (m_webView == nullptr) {
|
||||
m_webView = qApp->createWebView();
|
||||
dynamic_cast<QWidget*>(m_webView)->setParent(this);
|
||||
@ -87,6 +92,7 @@ void WebBrowser::createConnections() {
|
||||
|
||||
connect(m_actionOpenInSystemBrowser, &QAction::triggered, this, &WebBrowser::openCurrentSiteInSystemBrowser);
|
||||
connect(m_actionReadabilePage, &QAction::triggered, this, &WebBrowser::readabilePage);
|
||||
connect(m_actionGetFullArticle, &QAction::triggered, this, &WebBrowser::getFullArticle);
|
||||
|
||||
#if defined(ENABLE_MEDIAPLAYER)
|
||||
connect(m_actionPlayPageInMediaPlayer, &QAction::triggered, this, &WebBrowser::playCurrentSiteInMediaPlayer);
|
||||
@ -99,6 +105,9 @@ void WebBrowser::createConnections() {
|
||||
|
||||
connect(qApp->web()->readability(), &Readability::htmlReadabled, this, &WebBrowser::setReadabledHtml);
|
||||
connect(qApp->web()->readability(), &Readability::errorOnHtmlReadabiliting, this, &WebBrowser::readabilityFailed);
|
||||
|
||||
connect(qApp->web()->articleParse(), &ArticleParse::articleParsed, this, &WebBrowser::setFullArticleHtml);
|
||||
connect(qApp->web()->articleParse(), &ArticleParse::errorOnArticlePArsing, this, &WebBrowser::fullArticleFailed);
|
||||
}
|
||||
|
||||
void WebBrowser::updateUrl(const QUrl& url) {
|
||||
@ -182,6 +191,11 @@ void WebBrowser::readabilePage() {
|
||||
qApp->web()->readability()->makeHtmlReadable(this, m_webView->html(), m_webView->url().toString());
|
||||
}
|
||||
|
||||
void WebBrowser::getFullArticle() {
|
||||
m_actionGetFullArticle->setEnabled(false);
|
||||
qApp->web()->articleParse()->parseArticle(this, m_webView->url().toString());
|
||||
}
|
||||
|
||||
bool WebBrowser::eventFilter(QObject* watched, QEvent* event) {
|
||||
Q_UNUSED(watched)
|
||||
|
||||
@ -292,6 +306,21 @@ void WebBrowser::readabilityFailed(QObject* sndr, const QString& error) {
|
||||
}
|
||||
}
|
||||
|
||||
void WebBrowser::setFullArticleHtml(QObject* sndr, const QString& json_answer) {
|
||||
if (sndr == this && !json_answer.isEmpty()) {
|
||||
QJsonDocument json_doc = QJsonDocument::fromJson(json_answer.toUtf8());
|
||||
QString better_html = json_doc["content"].toString();
|
||||
|
||||
m_webView->setReadabledHtml(better_html, m_webView->url());
|
||||
}
|
||||
}
|
||||
|
||||
void WebBrowser::fullArticleFailed(QObject* sndr, const QString& error) {
|
||||
if (sndr == this && !error.isEmpty()) {
|
||||
m_webView->setReadabledHtml(error, m_webView->url());
|
||||
}
|
||||
}
|
||||
|
||||
void WebBrowser::initializeLayout() {
|
||||
m_toolBar->setFloatable(false);
|
||||
m_toolBar->setMovable(false);
|
||||
@ -324,9 +353,11 @@ void WebBrowser::initializeLayout() {
|
||||
|
||||
m_actionOpenInSystemBrowser->setEnabled(false);
|
||||
m_actionReadabilePage->setEnabled(false);
|
||||
m_actionGetFullArticle->setEnabled(false);
|
||||
|
||||
// Add needed actions into toolbar.
|
||||
m_toolBar->addAction(m_actionOpenInSystemBrowser);
|
||||
m_toolBar->addAction(m_actionGetFullArticle);
|
||||
m_toolBar->addAction(m_actionReadabilePage);
|
||||
|
||||
#if defined(ENABLE_MEDIAPLAYER)
|
||||
@ -358,6 +389,7 @@ void WebBrowser::onLoadingStarted() {
|
||||
m_loadingProgress->show();
|
||||
m_actionOpenInSystemBrowser->setEnabled(false);
|
||||
m_actionReadabilePage->setEnabled(false);
|
||||
m_actionGetFullArticle->setEnabled(false);
|
||||
|
||||
#if defined(ENABLE_MEDIAPLAYER)
|
||||
m_actionPlayPageInMediaPlayer->setEnabled(false);
|
||||
@ -375,6 +407,7 @@ void WebBrowser::onLoadingFinished(bool success) {
|
||||
|
||||
if (url.isValid() && !url.host().isEmpty()) {
|
||||
m_actionOpenInSystemBrowser->setEnabled(true);
|
||||
m_actionGetFullArticle->setEnabled(true);
|
||||
m_actionReadabilePage->setEnabled(true);
|
||||
|
||||
#if defined(ENABLE_MEDIAPLAYER)
|
||||
@ -384,6 +417,7 @@ void WebBrowser::onLoadingFinished(bool success) {
|
||||
else {
|
||||
m_actionOpenInSystemBrowser->setEnabled(false);
|
||||
m_actionReadabilePage->setEnabled(false);
|
||||
m_actionGetFullArticle->setEnabled(false);
|
||||
|
||||
#if defined(ENABLE_MEDIAPLAYER)
|
||||
m_actionPlayPageInMediaPlayer->setEnabled(false);
|
||||
|
@ -74,9 +74,14 @@ class RSSGUARD_DLLSPEC WebBrowser : public TabContent {
|
||||
void newWindowRequested(WebViewer* viewer);
|
||||
|
||||
void readabilePage();
|
||||
void getFullArticle();
|
||||
|
||||
void setReadabledHtml(QObject* sndr, const QString& better_html);
|
||||
void readabilityFailed(QObject* sndr, const QString& error);
|
||||
|
||||
void setFullArticleHtml(QObject* sndr, const QString& json_answer);
|
||||
void fullArticleFailed(QObject* sndr, const QString& error);
|
||||
|
||||
signals:
|
||||
void windowCloseRequested();
|
||||
void iconChanged(int index, const QIcon& icon);
|
||||
@ -106,6 +111,7 @@ class RSSGUARD_DLLSPEC WebBrowser : public TabContent {
|
||||
#endif
|
||||
|
||||
QAction* m_actionReadabilePage;
|
||||
QAction* m_actionGetFullArticle;
|
||||
|
||||
QList<Message> m_messages;
|
||||
QPointer<RootItem> m_root;
|
||||
|
140
src/librssguard/network-web/articleparse.cpp
Normal file
140
src/librssguard/network-web/articleparse.cpp
Normal file
@ -0,0 +1,140 @@
|
||||
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||
|
||||
#include "network-web/articleparse.h"
|
||||
|
||||
#include "3rd-party/boolinq/boolinq.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "miscellaneous/application.h"
|
||||
|
||||
#include <QDir>
|
||||
|
||||
#define EXTRACTOR_PACKAGE "@extractus/article-extractor"
|
||||
#define EXTRACTOR_VERSION "8.0.7"
|
||||
|
||||
ArticleParse::ArticleParse(QObject* parent) : QObject{parent}, m_modulesInstalling(false), m_modulesInstalled(false) {
|
||||
connect(qApp->nodejs(), &NodeJs::packageInstalledUpdated, this, &ArticleParse::onPackageReady);
|
||||
connect(qApp->nodejs(), &NodeJs::packageError, this, &ArticleParse::onPackageError);
|
||||
}
|
||||
|
||||
void ArticleParse::onPackageReady(const QList<NodeJs::PackageMetadata>& pkgs, bool already_up_to_date) {
|
||||
Q_UNUSED(already_up_to_date)
|
||||
|
||||
bool concerns_extractor = boolinq::from(pkgs).any([](const NodeJs::PackageMetadata& pkg) {
|
||||
return pkg.m_name == QSL(EXTRACTOR_PACKAGE);
|
||||
});
|
||||
|
||||
if (!concerns_extractor) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_modulesInstalled = true;
|
||||
m_modulesInstalling = false;
|
||||
|
||||
qApp->showGuiMessage(Notification::Event::NodePackageUpdated,
|
||||
{tr("Packages for article-extractor are installed"),
|
||||
tr("Press the button once more!"),
|
||||
QSystemTrayIcon::MessageIcon::Information},
|
||||
{true, true, false});
|
||||
|
||||
// Emit this just to allow readability again for user.
|
||||
emit articleParsed(nullptr, tr("Packages for article-extractor are installed. You can now use this feature!"));
|
||||
}
|
||||
|
||||
void ArticleParse::onPackageError(const QList<NodeJs::PackageMetadata>& pkgs, const QString& error) {
|
||||
bool concerns_extractor = boolinq::from(pkgs).any([](const NodeJs::PackageMetadata& pkg) {
|
||||
return pkg.m_name == QSL(EXTRACTOR_PACKAGE);
|
||||
});
|
||||
|
||||
if (!concerns_extractor) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_modulesInstalled = m_modulesInstalling = false;
|
||||
|
||||
qApp->showGuiMessage(Notification::Event::NodePackageUpdated,
|
||||
{tr("Packages for article-extractor are NOT installed"),
|
||||
tr("There is error: %1").arg(error),
|
||||
QSystemTrayIcon::MessageIcon::Critical},
|
||||
{true, true, false});
|
||||
|
||||
// Emit this just to allow readability again for user.
|
||||
emit articleParsed(nullptr, tr("Packages for article-extractor are NOT installed. There is error: %1").arg(error));
|
||||
}
|
||||
|
||||
void ArticleParse::parseArticle(QObject* sndr, const QString& url) {
|
||||
if (!m_modulesInstalled) {
|
||||
// NOTE: Here we use MJS file directly placed in its NODE package folder
|
||||
// because NODE_PATH is not supported for MJS files.
|
||||
m_scriptFilename = qApp->nodejs()->processedPackageFolder() + QDir::separator() + QSL("extract-article.mjs");
|
||||
|
||||
if (!IOFactory::copyFile(QSL(":/scripts/article-extractor/extract-article.mjs"), m_scriptFilename)) {
|
||||
qCriticalNN << LOGSEC_ADBLOCK << "Failed to copy article-extractor script to TEMP.";
|
||||
}
|
||||
|
||||
try {
|
||||
NodeJs::PackageStatus st_extractor =
|
||||
qApp->nodejs()->packageStatus({QSL(EXTRACTOR_PACKAGE), QSL(EXTRACTOR_VERSION)});
|
||||
|
||||
if (st_extractor != NodeJs::PackageStatus::UpToDate) {
|
||||
if (!m_modulesInstalling) {
|
||||
// We make sure to update modules.
|
||||
m_modulesInstalling = true;
|
||||
|
||||
qApp->showGuiMessage(Notification::Event::NodePackageUpdated,
|
||||
{tr("Node.js libraries not installed"),
|
||||
tr("%1 will now install some needed libraries, this will take only a few seconds. "
|
||||
"You will be notified when installation is complete.")
|
||||
.arg(QSL(APP_NAME)),
|
||||
QSystemTrayIcon::MessageIcon::Warning},
|
||||
{true, true, false});
|
||||
qApp->nodejs()->installUpdatePackages({{QSL(EXTRACTOR_PACKAGE), QSL(EXTRACTOR_VERSION)}});
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
else {
|
||||
m_modulesInstalled = true;
|
||||
}
|
||||
}
|
||||
catch (const ApplicationException& ex) {
|
||||
qApp->showGuiMessage(Notification::Event::NodePackageUpdated,
|
||||
{tr("Node.js libraries not installed"),
|
||||
tr("Node.js is not configured properly. Go to \"Settings\" -> \"Node.js\" and check "
|
||||
"if your Node.js is properly configured."),
|
||||
QSystemTrayIcon::MessageIcon::Critical},
|
||||
{true, true, false});
|
||||
|
||||
qCriticalNN << LOGSEC_CORE << "Failed to check for Node.js package status:" << QUOTE_W_SPACE_DOT(ex.message());
|
||||
|
||||
// Emit this just to allow readability again for user.
|
||||
emit articleParsed(sndr,
|
||||
tr("Node.js is not configured properly. Go to \"Settings\" -> \"Node.js\" and check "
|
||||
"if your Node.js is properly configured."));
|
||||
}
|
||||
}
|
||||
|
||||
QProcess* proc = new QProcess(this);
|
||||
|
||||
connect(proc,
|
||||
QOverload<int, QProcess::ExitStatus>::of(&QProcess::finished),
|
||||
this,
|
||||
[=](int exit_code, QProcess::ExitStatus exit_status) {
|
||||
onParsingFinished(sndr, exit_code, exit_status);
|
||||
});
|
||||
|
||||
qApp->nodejs()->runScript(proc, m_scriptFilename, {url});
|
||||
}
|
||||
|
||||
void ArticleParse::onParsingFinished(QObject* sndr, int exit_code, QProcess::ExitStatus exit_status) {
|
||||
QProcess* proc = qobject_cast<QProcess*>(sender());
|
||||
|
||||
if (exit_status == QProcess::ExitStatus::NormalExit && exit_code == EXIT_SUCCESS) {
|
||||
emit articleParsed(sndr, QString::fromUtf8(proc->readAllStandardOutput()));
|
||||
}
|
||||
else {
|
||||
QString err = QString::fromUtf8(proc->readAllStandardError());
|
||||
emit errorOnArticlePArsing(sndr, err);
|
||||
}
|
||||
|
||||
proc->deleteLater();
|
||||
}
|
34
src/librssguard/network-web/articleparse.h
Normal file
34
src/librssguard/network-web/articleparse.h
Normal file
@ -0,0 +1,34 @@
|
||||
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||
|
||||
#ifndef ARTICLEPARSE_H
|
||||
#define ARTICLEPARSE_H
|
||||
|
||||
#include "miscellaneous/nodejs.h"
|
||||
|
||||
#include <QObject>
|
||||
#include <QProcess>
|
||||
|
||||
class ArticleParse : public QObject {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
explicit ArticleParse(QObject* parent = nullptr);
|
||||
|
||||
void parseArticle(QObject* sndr, const QString& url);
|
||||
|
||||
private slots:
|
||||
void onParsingFinished(QObject* sndr, int exit_code, QProcess::ExitStatus exit_status);
|
||||
void onPackageReady(const QList<NodeJs::PackageMetadata>& pkgs, bool already_up_to_date);
|
||||
void onPackageError(const QList<NodeJs::PackageMetadata>& pkgs, const QString& error);
|
||||
|
||||
signals:
|
||||
void articleParsed(QObject* sndr, const QString& better_html);
|
||||
void errorOnArticlePArsing(QObject* sndr, const QString& error);
|
||||
|
||||
private:
|
||||
bool m_modulesInstalling;
|
||||
bool m_modulesInstalled;
|
||||
QString m_scriptFilename;
|
||||
};
|
||||
|
||||
#endif // ARTICLEPARSE_H
|
@ -8,6 +8,7 @@
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "network-web/adblock/adblockmanager.h"
|
||||
#include "network-web/apiserver.h"
|
||||
#include "network-web/articleparse.h"
|
||||
#include "network-web/cookiejar.h"
|
||||
#include "network-web/readability.h"
|
||||
|
||||
@ -54,6 +55,7 @@ WebFactory::WebFactory(QObject* parent) : QObject(parent), m_apiServer(nullptr),
|
||||
|
||||
m_cookieJar = new CookieJar(this);
|
||||
m_readability = new Readability(this);
|
||||
m_articleParse = new ArticleParse(this);
|
||||
|
||||
#if defined(NO_LITE)
|
||||
#if QT_VERSION >= 0x050D00 // Qt >= 5.13.0
|
||||
@ -568,6 +570,10 @@ Readability* WebFactory::readability() const {
|
||||
return m_readability;
|
||||
}
|
||||
|
||||
ArticleParse* WebFactory::articleParse() const {
|
||||
return m_articleParse;
|
||||
}
|
||||
|
||||
void WebFactory::startApiServer() {
|
||||
m_apiServer = new ApiServer(this);
|
||||
m_apiServer->setListenAddressPort(QSL("http://localhost:54123"), true);
|
||||
|
@ -20,6 +20,7 @@ class AdBlockManager;
|
||||
class CookieJar;
|
||||
class ApiServer;
|
||||
class Readability;
|
||||
class ArticleParse;
|
||||
|
||||
class RSSGUARD_DLLSPEC WebFactory : public QObject {
|
||||
Q_OBJECT
|
||||
@ -50,6 +51,7 @@ class RSSGUARD_DLLSPEC WebFactory : public QObject {
|
||||
|
||||
CookieJar* cookieJar() const;
|
||||
Readability* readability() const;
|
||||
ArticleParse* articleParse() const;
|
||||
|
||||
void startApiServer();
|
||||
void stopApiServer();
|
||||
@ -95,6 +97,7 @@ class RSSGUARD_DLLSPEC WebFactory : public QObject {
|
||||
ApiServer* m_apiServer;
|
||||
CookieJar* m_cookieJar;
|
||||
Readability* m_readability;
|
||||
ArticleParse* m_articleParse;
|
||||
QString m_customUserAgent;
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user