refactoring of parsers, initial work in sitemap parser
This commit is contained in:
parent
7eac82b841
commit
efea17f3aa
@ -117,6 +117,7 @@ option(REVISION_FROM_GIT "Get revision using `git rev-parse`" ON)
|
||||
option(NO_UPDATE_CHECK "Disable automatic checking for new application updates" OFF)
|
||||
option(IS_FLATPAK_BUILD "Set to 'ON' when building RSS Guard with Flatpak." OFF)
|
||||
option(FORCE_BUNDLE_ICONS "Forcibly bundle icon themes into RSS Guard." OFF)
|
||||
option(ENABLE_COMPRESSED_SITEMAP "Enable support for gzip-compressed sitemap feeds. Requires zlib." OFF)
|
||||
|
||||
# Import Qt libraries.
|
||||
set(QT6_MIN_VERSION 6.3.0)
|
||||
|
196
src/librssguard/3rd-party/qcompressor/qcompressor.cpp
vendored
Normal file
196
src/librssguard/3rd-party/qcompressor/qcompressor.cpp
vendored
Normal file
@ -0,0 +1,196 @@
|
||||
#include "qcompressor.h"
|
||||
|
||||
/**
|
||||
* @brief Compresses the given buffer using the standard GZIP algorithm
|
||||
* @param input The buffer to be compressed
|
||||
* @param output The result of the compression
|
||||
* @param level The compression level to be used (@c 0 = no compression, @c 9 = max, @c -1 = default)
|
||||
* @return @c true if the compression was successful, @c false otherwise
|
||||
*/
|
||||
bool QCompressor::gzipCompress(QByteArray input, QByteArray &output, int level)
|
||||
{
|
||||
// Prepare output
|
||||
output.clear();
|
||||
|
||||
// Is there something to do?
|
||||
if(input.length())
|
||||
{
|
||||
// Declare vars
|
||||
int flush = 0;
|
||||
|
||||
// Prepare deflater status
|
||||
z_stream strm;
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
|
||||
// Initialize deflater
|
||||
int ret = deflateInit2(&strm, qMax(-1, qMin(9, level)), Z_DEFLATED, GZIP_WINDOWS_BIT, 8, Z_DEFAULT_STRATEGY);
|
||||
|
||||
if (ret != Z_OK)
|
||||
return(false);
|
||||
|
||||
// Prepare output
|
||||
output.clear();
|
||||
|
||||
// Extract pointer to input data
|
||||
char *input_data = input.data();
|
||||
int input_data_left = input.length();
|
||||
|
||||
// Compress data until available
|
||||
do {
|
||||
// Determine current chunk size
|
||||
int chunk_size = qMin(GZIP_CHUNK_SIZE, input_data_left);
|
||||
|
||||
// Set deflater references
|
||||
strm.next_in = (unsigned char*)input_data;
|
||||
strm.avail_in = chunk_size;
|
||||
|
||||
// Update interval variables
|
||||
input_data += chunk_size;
|
||||
input_data_left -= chunk_size;
|
||||
|
||||
// Determine if it is the last chunk
|
||||
flush = (input_data_left <= 0 ? Z_FINISH : Z_NO_FLUSH);
|
||||
|
||||
// Deflate chunk and cumulate output
|
||||
do {
|
||||
|
||||
// Declare vars
|
||||
char out[GZIP_CHUNK_SIZE];
|
||||
|
||||
// Set deflater references
|
||||
strm.next_out = (unsigned char*)out;
|
||||
strm.avail_out = GZIP_CHUNK_SIZE;
|
||||
|
||||
// Try to deflate chunk
|
||||
ret = deflate(&strm, flush);
|
||||
|
||||
// Check errors
|
||||
if(ret == Z_STREAM_ERROR)
|
||||
{
|
||||
// Clean-up
|
||||
deflateEnd(&strm);
|
||||
|
||||
// Return
|
||||
return(false);
|
||||
}
|
||||
|
||||
// Determine compressed size
|
||||
int have = (GZIP_CHUNK_SIZE - strm.avail_out);
|
||||
|
||||
// Cumulate result
|
||||
if(have > 0)
|
||||
output.append((char*)out, have);
|
||||
|
||||
} while (strm.avail_out == 0);
|
||||
|
||||
} while (flush != Z_FINISH);
|
||||
|
||||
// Clean-up
|
||||
(void)deflateEnd(&strm);
|
||||
|
||||
// Return
|
||||
return(ret == Z_STREAM_END);
|
||||
}
|
||||
else
|
||||
return(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Decompresses the given buffer using the standard GZIP algorithm
|
||||
* @param input The buffer to be decompressed
|
||||
* @param output The result of the decompression
|
||||
* @return @c true if the decompression was successfull, @c false otherwise
|
||||
*/
|
||||
bool QCompressor::gzipDecompress(QByteArray input, QByteArray &output)
|
||||
{
|
||||
// Prepare output
|
||||
output.clear();
|
||||
|
||||
// Is there something to do?
|
||||
if(input.length() > 0)
|
||||
{
|
||||
// Prepare inflater status
|
||||
z_stream strm;
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
|
||||
// Initialize inflater
|
||||
int ret = inflateInit2(&strm, GZIP_WINDOWS_BIT);
|
||||
|
||||
if (ret != Z_OK)
|
||||
return(false);
|
||||
|
||||
// Extract pointer to input data
|
||||
char *input_data = input.data();
|
||||
int input_data_left = input.length();
|
||||
|
||||
// Decompress data until available
|
||||
do {
|
||||
// Determine current chunk size
|
||||
int chunk_size = qMin(GZIP_CHUNK_SIZE, input_data_left);
|
||||
|
||||
// Check for termination
|
||||
if(chunk_size <= 0)
|
||||
break;
|
||||
|
||||
// Set inflater references
|
||||
strm.next_in = (unsigned char*)input_data;
|
||||
strm.avail_in = chunk_size;
|
||||
|
||||
// Update interval variables
|
||||
input_data += chunk_size;
|
||||
input_data_left -= chunk_size;
|
||||
|
||||
// Inflate chunk and cumulate output
|
||||
do {
|
||||
|
||||
// Declare vars
|
||||
char out[GZIP_CHUNK_SIZE];
|
||||
|
||||
// Set inflater references
|
||||
strm.next_out = (unsigned char*)out;
|
||||
strm.avail_out = GZIP_CHUNK_SIZE;
|
||||
|
||||
// Try to inflate chunk
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
ret = Z_DATA_ERROR;
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR:
|
||||
case Z_STREAM_ERROR:
|
||||
// Clean-up
|
||||
inflateEnd(&strm);
|
||||
|
||||
// Return
|
||||
return(false);
|
||||
}
|
||||
|
||||
// Determine decompressed size
|
||||
int have = (GZIP_CHUNK_SIZE - strm.avail_out);
|
||||
|
||||
// Cumulate result
|
||||
if(have > 0)
|
||||
output.append((char*)out, have);
|
||||
|
||||
} while (strm.avail_out == 0);
|
||||
|
||||
} while (ret != Z_STREAM_END);
|
||||
|
||||
// Clean-up
|
||||
inflateEnd(&strm);
|
||||
|
||||
// Return
|
||||
return (ret == Z_STREAM_END);
|
||||
}
|
||||
else
|
||||
return(true);
|
||||
}
|
17
src/librssguard/3rd-party/qcompressor/qcompressor.h
vendored
Normal file
17
src/librssguard/3rd-party/qcompressor/qcompressor.h
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
#ifndef QCOMPRESSOR_H
|
||||
#define QCOMPRESSOR_H
|
||||
|
||||
#include <zlib.h>
|
||||
#include <QByteArray>
|
||||
|
||||
#define GZIP_WINDOWS_BIT 15 + 16
|
||||
#define GZIP_CHUNK_SIZE 32 * 1024
|
||||
|
||||
class QCompressor
|
||||
{
|
||||
public:
|
||||
static bool gzipCompress(QByteArray input, QByteArray &output, int level = -1);
|
||||
static bool gzipDecompress(QByteArray input, QByteArray &output);
|
||||
};
|
||||
|
||||
#endif // QCOMPRESSOR_H
|
@ -49,6 +49,8 @@ set(SOURCES
|
||||
dynamic-shortcuts/shortcutcatcher.h
|
||||
exceptions/applicationexception.cpp
|
||||
exceptions/applicationexception.h
|
||||
exceptions/feedrecognizedbutfailedexception.cpp
|
||||
exceptions/feedrecognizedbutfailedexception.h
|
||||
exceptions/feedfetchexception.cpp
|
||||
exceptions/feedfetchexception.h
|
||||
exceptions/filteringexception.cpp
|
||||
@ -383,6 +385,8 @@ set(SOURCES
|
||||
services/standard/parsers/rdfparser.h
|
||||
services/standard/parsers/rssparser.cpp
|
||||
services/standard/parsers/rssparser.h
|
||||
services/standard/parsers/sitemapparser.cpp
|
||||
services/standard/parsers/sitemapparser.h
|
||||
services/standard/standardcategory.cpp
|
||||
services/standard/standardcategory.h
|
||||
services/standard/standardfeed.cpp
|
||||
@ -536,9 +540,26 @@ else()
|
||||
3rd-party/sqlite/sqlite3.h
|
||||
)
|
||||
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSQLITE_THREADSAFE=1 -DSQLITE_ENABLE_COLUMN_METADATA=1")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSQLITE_THREADSAFE=1 -DSQLITE_ENABLE_COLUMN_METADATA=1")
|
||||
endif()
|
||||
|
||||
# Add ZLIB.
|
||||
set(ZLIB_ROOT "c:\\Users\\rotter\\Downloads\\zlib-1.3")
|
||||
|
||||
if(ENABLE_COMPRESSED_SITEMAP)
|
||||
find_package(ZLIB REQUIRED)
|
||||
|
||||
# Add qcompressor.
|
||||
list(APPEND SOURCES
|
||||
3rd-party/qcompressor/qcompressor.cpp
|
||||
3rd-party/qcompressor/qcompressor.h
|
||||
)
|
||||
endif(ENABLE_COMPRESSED_SITEMAP)
|
||||
|
||||
if(ZLIB_FOUND)
|
||||
message(STATUS "Using system zlib ${ZLIB_VERSION_STRING}.")
|
||||
endif(ZLIB_FOUND)
|
||||
|
||||
# Add SimpleCrypt.
|
||||
list(APPEND SOURCES
|
||||
3rd-party/sc/simplecrypt.cpp
|
||||
@ -676,6 +697,23 @@ if(SQLite3_FOUND)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(ZLIB_FOUND)
|
||||
target_include_directories(rssguard AFTER
|
||||
PRIVATE
|
||||
${ZLIB_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_compile_definitions(rssguard
|
||||
PRIVATE
|
||||
|
||||
ENABLE_COMPRESSED_SITEMAP
|
||||
)
|
||||
|
||||
target_link_libraries(rssguard PRIVATE
|
||||
${ZLIB_LIBRARIES}
|
||||
)
|
||||
endif(ZLIB_FOUND)
|
||||
|
||||
# Qt.
|
||||
target_link_libraries(rssguard PUBLIC
|
||||
Qt${QT_VERSION_MAJOR}::Core
|
||||
|
@ -24,4 +24,12 @@ struct UpdatedArticles {
|
||||
QList<Message> m_all;
|
||||
};
|
||||
|
||||
struct IconLocation {
|
||||
QString m_url;
|
||||
|
||||
// The "bool" if true means that the URL is direct and download directly, if false then
|
||||
// only use its domain and download via 3rd-party service.
|
||||
bool m_isDirect;
|
||||
};
|
||||
|
||||
#endif // TYPEDEFS_H
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include "exceptions/feedfetchexception.h"
|
||||
|
||||
FeedFetchException::FeedFetchException(Feed::Status feed_status, QString message)
|
||||
FeedFetchException::FeedFetchException(Feed::Status feed_status, const QString& message)
|
||||
: ApplicationException(message), m_feedStatus(feed_status) {}
|
||||
|
||||
Feed::Status FeedFetchException::feedStatus() const {
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
class FeedFetchException : public ApplicationException {
|
||||
public:
|
||||
explicit FeedFetchException(Feed::Status feed_status, QString message = {});
|
||||
explicit FeedFetchException(Feed::Status feed_status, const QString& message = {});
|
||||
|
||||
Feed::Status feedStatus() const;
|
||||
|
||||
|
@ -0,0 +1,5 @@
|
||||
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
|
||||
FeedRecognizedButFailedException::FeedRecognizedButFailedException(const QString &message) : ApplicationException(message) {}
|
@ -0,0 +1,13 @@
|
||||
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||
|
||||
#ifndef UNRECOGNIZEDFEEDFORMATEXCEPTION_H
|
||||
#define UNRECOGNIZEDFEEDFORMATEXCEPTION_H
|
||||
|
||||
#include "exceptions/applicationexception.h"
|
||||
|
||||
class FeedRecognizedButFailedException : public ApplicationException {
|
||||
public:
|
||||
explicit FeedRecognizedButFailedException(const QString& message = {});
|
||||
};
|
||||
|
||||
#endif // UNRECOGNIZEDFEEDFORMATEXCEPTION_H
|
@ -4,12 +4,12 @@
|
||||
|
||||
#include "definitions/definitions.h"
|
||||
|
||||
ScriptException::ScriptException(Reason reason, QString message) : ApplicationException(message), m_reason(reason) {
|
||||
ScriptException::ScriptException(Reason reason, const QString& message)
|
||||
: ApplicationException(message), m_reason(reason) {
|
||||
if (message.isEmpty()) {
|
||||
setMessage(messageForReason(reason));
|
||||
}
|
||||
else if (reason == ScriptException::Reason::InterpreterError ||
|
||||
reason == ScriptException::Reason::OtherError) {
|
||||
else if (reason == ScriptException::Reason::InterpreterError || reason == ScriptException::Reason::OtherError) {
|
||||
setMessage(messageForReason(reason) + QSL(": '%1'").arg(message));
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <QCoreApplication>
|
||||
|
||||
class ScriptException : public ApplicationException {
|
||||
Q_DECLARE_TR_FUNCTIONS(ScriptException)
|
||||
Q_DECLARE_TR_FUNCTIONS(ScriptException)
|
||||
|
||||
public:
|
||||
enum class Reason {
|
||||
@ -19,7 +19,7 @@ class ScriptException : public ApplicationException {
|
||||
OtherError
|
||||
};
|
||||
|
||||
explicit ScriptException(Reason reason = Reason::OtherError, QString message = QString());
|
||||
explicit ScriptException(Reason reason = Reason::OtherError, const QString& message = QString());
|
||||
|
||||
Reason reason() const;
|
||||
|
||||
|
@ -29,9 +29,9 @@ class ArticleListNotification : public BaseToastNotification {
|
||||
|
||||
private slots:
|
||||
void openArticleInArticleList();
|
||||
void openArticleInWebBrowser();
|
||||
void onMessageSelected(const QModelIndex& current, const QModelIndex& previous);
|
||||
void showFeed(int index);
|
||||
void openArticleInWebBrowser();
|
||||
void markAllRead();
|
||||
|
||||
private:
|
||||
|
@ -160,7 +160,7 @@ QString NetworkFactory::sanitizeUrl(const QString& url) {
|
||||
return QString(url).replace(QRegularExpression(QSL("[^\\w\\-.~:\\/?#\\[\\]@!$&'()*+,;=% \\|]")), {});
|
||||
}
|
||||
|
||||
QNetworkReply::NetworkError NetworkFactory::downloadIcon(const QList<QPair<QString, bool>>& urls,
|
||||
QNetworkReply::NetworkError NetworkFactory::downloadIcon(const QList<IconLocation>& urls,
|
||||
int timeout,
|
||||
QPixmap& output,
|
||||
const QList<QPair<QByteArray, QByteArray>>& additional_headers,
|
||||
@ -168,15 +168,15 @@ QNetworkReply::NetworkError NetworkFactory::downloadIcon(const QList<QPair<QStri
|
||||
QNetworkReply::NetworkError network_result = QNetworkReply::NetworkError::UnknownNetworkError;
|
||||
|
||||
for (const auto& url : urls) {
|
||||
if (url.first.isEmpty()) {
|
||||
if (url.m_url.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QByteArray icon_data;
|
||||
|
||||
if (url.second) {
|
||||
if (url.m_isDirect) {
|
||||
// Download directly.
|
||||
network_result = performNetworkOperation(url.first,
|
||||
network_result = performNetworkOperation(url.m_url,
|
||||
timeout,
|
||||
{},
|
||||
icon_data,
|
||||
@ -206,7 +206,7 @@ QNetworkReply::NetworkError NetworkFactory::downloadIcon(const QList<QPair<QStri
|
||||
}
|
||||
else {
|
||||
// Duck Duck Go.
|
||||
QUrl url_full = QUrl(url.first);
|
||||
QUrl url_full = QUrl(url.m_url);
|
||||
QString host = url_full.host();
|
||||
|
||||
if (host.startsWith(QSL("www."))) {
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "network-web/httpresponse.h"
|
||||
|
||||
#include "definitions/typedefs.h"
|
||||
#include "services/abstract/feed.h"
|
||||
|
||||
#include <QCoreApplication>
|
||||
@ -38,7 +39,11 @@ class NetworkFactory {
|
||||
explicit NetworkFactory() = default;
|
||||
|
||||
public:
|
||||
enum class NetworkAuthentication { NoAuthentication = 0, Basic = 1, Token = 2 };
|
||||
enum class NetworkAuthentication {
|
||||
NoAuthentication = 0,
|
||||
Basic = 1,
|
||||
Token = 2
|
||||
};
|
||||
|
||||
static QStringList extractFeedLinksFromHtmlPage(const QUrl& url, const QString& html);
|
||||
static QPair<QByteArray, QByteArray> generateBasicAuthHeader(NetworkAuthentication protection,
|
||||
@ -51,7 +56,7 @@ class NetworkFactory {
|
||||
|
||||
// Performs SYNCHRONOUS favicon download for the site,
|
||||
// given URL belongs to.
|
||||
static QNetworkReply::NetworkError downloadIcon(const QList<QPair<QString, bool>>& urls,
|
||||
static QNetworkReply::NetworkError downloadIcon(const QList<IconLocation>& urls,
|
||||
int timeout,
|
||||
QPixmap& output,
|
||||
const QList<QPair<QByteArray, QByteArray>>& additional_headers,
|
||||
|
@ -703,7 +703,7 @@ RootItem* GreaderNetwork::decodeTagsSubscriptions(const QString& categories,
|
||||
|
||||
if (obtain_icons) {
|
||||
QString icon_url = subscription[QSL("iconUrl")].toString();
|
||||
QList<QPair<QString, bool>> icon_urls;
|
||||
QList<IconLocation> icon_urls;
|
||||
|
||||
if (!icon_url.isEmpty()) {
|
||||
if (icon_url.startsWith(QSL("//"))) {
|
||||
|
@ -52,6 +52,10 @@ StandardFeedDetails::StandardFeedDetails(QWidget* parent) : QWidget(parent) {
|
||||
QVariant::fromValue(int(StandardFeed::Type::Rss2X)));
|
||||
m_ui.m_cmbType->addItem(StandardFeed::typeToString(StandardFeed::Type::Json),
|
||||
QVariant::fromValue(int(StandardFeed::Type::Json)));
|
||||
m_ui.m_cmbType->addItem(StandardFeed::typeToString(StandardFeed::Type::Sitemap),
|
||||
QVariant::fromValue(int(StandardFeed::Type::Sitemap)));
|
||||
m_ui.m_cmbType->addItem(StandardFeed::typeToString(StandardFeed::Type::SitemapIndex),
|
||||
QVariant::fromValue(int(StandardFeed::Type::SitemapIndex)));
|
||||
|
||||
// Load available encodings.
|
||||
const QList<QByteArray> encodings = QTextCodec::availableCodecs();
|
||||
|
@ -2,7 +2,13 @@
|
||||
|
||||
#include "services/standard/parsers/atomparser.h"
|
||||
|
||||
#include "definitions/definitions.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include <QTextCodec>
|
||||
|
||||
AtomParser::AtomParser(const QString& data) : FeedParser(data) {
|
||||
QString version = m_xml.documentElement().attribute(QSL("version"));
|
||||
@ -15,6 +21,70 @@ AtomParser::AtomParser(const QString& data) : FeedParser(data) {
|
||||
}
|
||||
}
|
||||
|
||||
AtomParser::~AtomParser() {}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> AtomParser::guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const {
|
||||
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
|
||||
QString xml_contents_encoded;
|
||||
QString enc =
|
||||
QRegularExpression(QSL("encoding=\"([A-Z0-9\\-]+)\""), QRegularExpression::PatternOption::CaseInsensitiveOption)
|
||||
.match(content)
|
||||
.captured(1);
|
||||
|
||||
if (!enc.isEmpty()) {
|
||||
// Some "encoding" attribute was found get the encoding
|
||||
// out of it.
|
||||
xml_schema_encoding = enc;
|
||||
}
|
||||
|
||||
QTextCodec* custom_codec = QTextCodec::codecForName(xml_schema_encoding.toLocal8Bit());
|
||||
|
||||
if (custom_codec != nullptr) {
|
||||
xml_contents_encoded = custom_codec->toUnicode(content);
|
||||
}
|
||||
else {
|
||||
xml_contents_encoded = QString::fromUtf8(content);
|
||||
}
|
||||
|
||||
// Feed XML was obtained, guess it now.
|
||||
QDomDocument xml_document;
|
||||
QString error_msg;
|
||||
int error_line, error_column;
|
||||
|
||||
if (!xml_document.setContent(xml_contents_encoded, true, &error_msg, &error_line, &error_column)) {
|
||||
throw ApplicationException(QObject::tr("XML is not well-formed, %1").arg(error_msg));
|
||||
}
|
||||
|
||||
QDomElement root_element = xml_document.documentElement();
|
||||
|
||||
if (root_element.namespaceURI() != atomNamespace()) {
|
||||
throw ApplicationException(QObject::tr("not an ATOM feed"));
|
||||
}
|
||||
|
||||
auto* feed = new StandardFeed();
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
|
||||
feed->setEncoding(xml_schema_encoding);
|
||||
feed->setType(StandardFeed::Type::Atom10);
|
||||
feed->setTitle(root_element.namedItem(QSL("title")).toElement().text());
|
||||
feed->setDescription(root_element.namedItem(QSL("subtitle")).toElement().text());
|
||||
|
||||
QString icon_link = root_element.namedItem(QSL("icon")).toElement().text();
|
||||
|
||||
if (!icon_link.isEmpty()) {
|
||||
icon_possible_locations.append({icon_link, true});
|
||||
}
|
||||
|
||||
QString home_page = root_element.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
}
|
||||
|
||||
return {feed, icon_possible_locations};
|
||||
}
|
||||
|
||||
QString AtomParser::feedAuthor() const {
|
||||
auto authors = m_xml.documentElement().elementsByTagNameNS(m_atomNamespace, QSL("author"));
|
||||
|
||||
|
@ -13,8 +13,10 @@
|
||||
class AtomParser : public FeedParser {
|
||||
public:
|
||||
explicit AtomParser(const QString& data);
|
||||
virtual ~AtomParser();
|
||||
|
||||
QString atomNamespace() const;
|
||||
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const;
|
||||
|
||||
protected:
|
||||
virtual QString xmlMessageTitle(const QDomElement& msg_element) const;
|
||||
@ -29,6 +31,8 @@ class AtomParser : public FeedParser {
|
||||
virtual QString feedAuthor() const;
|
||||
|
||||
private:
|
||||
QString atomNamespace() const;
|
||||
|
||||
QString m_atomNamespace;
|
||||
};
|
||||
|
||||
|
@ -15,6 +15,9 @@
|
||||
|
||||
FeedParser::FeedParser(QString data, bool is_xml)
|
||||
: m_isXml(is_xml), m_data(std::move(data)), m_mrssNamespace(QSL("http://search.yahoo.com/mrss/")) {
|
||||
if (m_data.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_isXml) {
|
||||
// XML.
|
||||
@ -36,6 +39,8 @@ FeedParser::FeedParser(QString data, bool is_xml)
|
||||
}
|
||||
}
|
||||
|
||||
FeedParser::~FeedParser() {}
|
||||
|
||||
QString FeedParser::xmlMessageRawContents(const QDomElement& msg_element) const {
|
||||
QString raw_contents;
|
||||
QTextStream str(&raw_contents);
|
||||
|
@ -10,12 +10,18 @@
|
||||
#include <QString>
|
||||
|
||||
#include "core/message.h"
|
||||
#include "definitions/typedefs.h"
|
||||
|
||||
class StandardFeed;
|
||||
|
||||
// Base class for all XML-based feed parsers.
|
||||
class FeedParser {
|
||||
public:
|
||||
explicit FeedParser(QString data, bool is_xml = true);
|
||||
virtual ~FeedParser();
|
||||
|
||||
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const = 0;
|
||||
virtual QList<Message> messages();
|
||||
|
||||
protected:
|
||||
|
@ -2,7 +2,13 @@
|
||||
|
||||
#include "services/standard/parsers/jsonparser.h"
|
||||
|
||||
#include "definitions/definitions.h"
|
||||
#include "definitions/typedefs.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include <QJsonArray>
|
||||
#include <QJsonDocument>
|
||||
@ -10,6 +16,51 @@
|
||||
|
||||
JsonParser::JsonParser(const QString& data) : FeedParser(data, false) {}
|
||||
|
||||
JsonParser::~JsonParser() {}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> JsonParser::guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const {
|
||||
if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive) ||
|
||||
content.simplified().startsWith('{')) {
|
||||
QJsonParseError json_err;
|
||||
QJsonDocument json = QJsonDocument::fromJson(content, &json_err);
|
||||
|
||||
if (json.isNull() && !json_err.errorString().isEmpty()) {
|
||||
throw FeedRecognizedButFailedException(QObject::tr("JSON error '%1'").arg(json_err.errorString()));
|
||||
}
|
||||
|
||||
auto* feed = new StandardFeed();
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
|
||||
feed->setEncoding(QSL(DEFAULT_FEED_ENCODING));
|
||||
feed->setType(StandardFeed::Type::Json);
|
||||
feed->setTitle(json.object()[QSL("title")].toString());
|
||||
feed->setDescription(json.object()[QSL("description")].toString());
|
||||
|
||||
auto home_page = json.object()[QSL("home_page_url")].toString();
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
}
|
||||
|
||||
auto icon = json.object()[QSL("favicon")].toString();
|
||||
|
||||
if (icon.isEmpty()) {
|
||||
icon = json.object()[QSL("icon")].toString();
|
||||
}
|
||||
|
||||
if (!icon.isEmpty()) {
|
||||
// Low priority, download directly.
|
||||
icon_possible_locations.append({icon, true});
|
||||
}
|
||||
|
||||
return QPair<StandardFeed*, QList<IconLocation>>(feed, icon_possible_locations);
|
||||
}
|
||||
else {
|
||||
throw ApplicationException(QObject::tr("not a JSON feed"));
|
||||
}
|
||||
}
|
||||
|
||||
QString JsonParser::feedAuthor() const {
|
||||
QString global_author = m_json.object()[QSL("author")].toObject()[QSL("name")].toString();
|
||||
|
||||
|
@ -10,6 +10,10 @@
|
||||
class JsonParser : public FeedParser {
|
||||
public:
|
||||
explicit JsonParser(const QString& data);
|
||||
virtual ~JsonParser();
|
||||
|
||||
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const;
|
||||
|
||||
protected:
|
||||
virtual QString feedAuthor() const;
|
||||
|
@ -2,15 +2,81 @@
|
||||
|
||||
#include "services/standard/parsers/rdfparser.h"
|
||||
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include <QDomDocument>
|
||||
#include <QTextCodec>
|
||||
|
||||
RdfParser::RdfParser(const QString& data)
|
||||
: FeedParser(data), m_rdfNamespace(QSL("http://www.w3.org/1999/02/22-rdf-syntax-ns#")),
|
||||
m_rssNamespace(QSL("http://purl.org/rss/1.0/")), m_rssCoNamespace(QSL("http://purl.org/rss/1.0/modules/content/")),
|
||||
m_dcElNamespace(QSL("http://purl.org/dc/elements/1.1/")) {}
|
||||
|
||||
RdfParser::~RdfParser() {}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> RdfParser::guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const {
|
||||
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
|
||||
QString xml_contents_encoded;
|
||||
QString enc =
|
||||
QRegularExpression(QSL("encoding=\"([A-Z0-9\\-]+)\""), QRegularExpression::PatternOption::CaseInsensitiveOption)
|
||||
.match(content)
|
||||
.captured(1);
|
||||
|
||||
if (!enc.isEmpty()) {
|
||||
// Some "encoding" attribute was found get the encoding
|
||||
// out of it.
|
||||
xml_schema_encoding = enc;
|
||||
}
|
||||
|
||||
QTextCodec* custom_codec = QTextCodec::codecForName(xml_schema_encoding.toLocal8Bit());
|
||||
|
||||
if (custom_codec != nullptr) {
|
||||
xml_contents_encoded = custom_codec->toUnicode(content);
|
||||
}
|
||||
else {
|
||||
xml_contents_encoded = QString::fromUtf8(content);
|
||||
}
|
||||
|
||||
// Feed XML was obtained, guess it now.
|
||||
QDomDocument xml_document;
|
||||
QString error_msg;
|
||||
int error_line, error_column;
|
||||
|
||||
if (!xml_document.setContent(xml_contents_encoded, true, &error_msg, &error_line, &error_column)) {
|
||||
throw ApplicationException(QObject::tr("XML is not well-formed, %1").arg(error_msg));
|
||||
}
|
||||
|
||||
QDomElement root_element = xml_document.documentElement();
|
||||
|
||||
if (root_element.namespaceURI() != rdfNamespace()) {
|
||||
throw ApplicationException(QObject::tr("not an RDF feed"));
|
||||
}
|
||||
|
||||
auto* feed = new StandardFeed();
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
|
||||
feed->setEncoding(xml_schema_encoding);
|
||||
feed->setType(StandardFeed::Type::Rdf);
|
||||
|
||||
QDomElement channel_element = root_element.elementsByTagNameNS(rssNamespace(), QSL("channel")).at(0).toElement();
|
||||
|
||||
feed->setTitle(channel_element.elementsByTagNameNS(rssNamespace(), QSL("title")).at(0).toElement().text());
|
||||
feed
|
||||
->setDescription(channel_element.elementsByTagNameNS(rssNamespace(), QSL("description")).at(0).toElement().text());
|
||||
|
||||
QString home_page = channel_element.elementsByTagNameNS(rssNamespace(), QSL("link")).at(0).toElement().text();
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
}
|
||||
|
||||
return {feed, icon_possible_locations};
|
||||
}
|
||||
|
||||
QDomNodeList RdfParser::xmlMessageElements() {
|
||||
return m_xml.elementsByTagNameNS(m_rssNamespace, QSL("item"));
|
||||
}
|
||||
|
@ -12,9 +12,10 @@
|
||||
class RdfParser : public FeedParser {
|
||||
public:
|
||||
explicit RdfParser(const QString& data);
|
||||
virtual ~RdfParser();
|
||||
|
||||
QString rdfNamespace() const;
|
||||
QString rssNamespace() const;
|
||||
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const;
|
||||
|
||||
protected:
|
||||
virtual QString xmlMessageTitle(const QDomElement& msg_element) const;
|
||||
@ -27,6 +28,9 @@ class RdfParser : public FeedParser {
|
||||
virtual QDomNodeList xmlMessageElements();
|
||||
|
||||
private:
|
||||
QString rdfNamespace() const;
|
||||
QString rssNamespace() const;
|
||||
|
||||
QString m_rdfNamespace;
|
||||
QString m_rssNamespace;
|
||||
QString m_rssCoNamespace;
|
||||
|
@ -2,13 +2,97 @@
|
||||
|
||||
#include "services/standard/parsers/rssparser.h"
|
||||
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include <QDomDocument>
|
||||
#include <QTextCodec>
|
||||
#include <QTextStream>
|
||||
|
||||
RssParser::RssParser(const QString& data) : FeedParser(data) {}
|
||||
|
||||
RssParser::~RssParser() {}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> RssParser::guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const {
|
||||
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
|
||||
QString xml_contents_encoded;
|
||||
QString enc =
|
||||
QRegularExpression(QSL("encoding=\"([A-Z0-9\\-]+)\""), QRegularExpression::PatternOption::CaseInsensitiveOption)
|
||||
.match(content)
|
||||
.captured(1);
|
||||
|
||||
if (!enc.isEmpty()) {
|
||||
// Some "encoding" attribute was found get the encoding
|
||||
// out of it.
|
||||
xml_schema_encoding = enc;
|
||||
}
|
||||
|
||||
QTextCodec* custom_codec = QTextCodec::codecForName(xml_schema_encoding.toLocal8Bit());
|
||||
|
||||
if (custom_codec != nullptr) {
|
||||
xml_contents_encoded = custom_codec->toUnicode(content);
|
||||
}
|
||||
else {
|
||||
xml_contents_encoded = QString::fromUtf8(content);
|
||||
}
|
||||
|
||||
// Feed XML was obtained, guess it now.
|
||||
QDomDocument xml_document;
|
||||
QString error_msg;
|
||||
int error_line, error_column;
|
||||
|
||||
if (!xml_document.setContent(xml_contents_encoded, true, &error_msg, &error_line, &error_column)) {
|
||||
throw ApplicationException(QObject::tr("XML is not well-formed, %1").arg(error_msg));
|
||||
}
|
||||
|
||||
QDomElement root_element = xml_document.documentElement();
|
||||
|
||||
if (root_element.tagName() != QL1S("rss")) {
|
||||
throw ApplicationException(QObject::tr("not a RSS feed"));
|
||||
}
|
||||
|
||||
auto* feed = new StandardFeed();
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
|
||||
feed->setEncoding(xml_schema_encoding);
|
||||
|
||||
QString rss_type = root_element.attribute(QSL("version"), QSL("2.0"));
|
||||
|
||||
if (rss_type == QL1S("0.91") || rss_type == QL1S("0.92") || rss_type == QL1S("0.93")) {
|
||||
feed->setType(StandardFeed::Type::Rss0X);
|
||||
}
|
||||
else {
|
||||
feed->setType(StandardFeed::Type::Rss2X);
|
||||
}
|
||||
|
||||
QDomElement channel_element = root_element.namedItem(QSL("channel")).toElement();
|
||||
|
||||
feed->setTitle(channel_element.namedItem(QSL("title")).toElement().text());
|
||||
feed->setDescription(channel_element.namedItem(QSL("description")).toElement().text());
|
||||
|
||||
QString icon_url_link = channel_element.namedItem(QSL("image")).namedItem(QSL("url")).toElement().text();
|
||||
|
||||
if (!icon_url_link.isEmpty()) {
|
||||
icon_possible_locations.append({icon_url_link, true});
|
||||
}
|
||||
|
||||
auto channel_links = channel_element.elementsByTagName(QSL("link"));
|
||||
|
||||
for (int i = 0; i < channel_links.size(); i++) {
|
||||
QString home_page = channel_links.at(i).toElement().text();
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return {feed, icon_possible_locations};
|
||||
}
|
||||
|
||||
QDomNodeList RssParser::xmlMessageElements() {
|
||||
QDomNode channel_elem = m_xml.namedItem(QSL("rss")).namedItem(QSL("channel"));
|
||||
|
||||
|
@ -12,6 +12,10 @@
|
||||
class RssParser : public FeedParser {
|
||||
public:
|
||||
explicit RssParser(const QString& data);
|
||||
virtual ~RssParser();
|
||||
|
||||
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const;
|
||||
|
||||
protected:
|
||||
virtual QDomNodeList xmlMessageElements();
|
||||
|
154
src/librssguard/services/standard/parsers/sitemapparser.cpp
Normal file
154
src/librssguard/services/standard/parsers/sitemapparser.cpp
Normal file
@ -0,0 +1,154 @@
|
||||
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||
|
||||
#include "services/standard/parsers/sitemapparser.h"
|
||||
|
||||
#if defined(ENABLE_COMPRESSED_SITEMAP)
|
||||
#include "3rd-party/qcompressor/qcompressor.h"
|
||||
#endif
|
||||
|
||||
#include "definitions/definitions.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/standardfeed.h"
|
||||
|
||||
#include <QDomDocument>
|
||||
#include <QTextCodec>
|
||||
#include <QTextStream>
|
||||
|
||||
SitemapParser::SitemapParser(const QString& data) : FeedParser(data) {}
|
||||
|
||||
SitemapParser::~SitemapParser() {}
|
||||
|
||||
QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const {
|
||||
QByteArray uncompressed_content;
|
||||
|
||||
if (isGzip(content)) {
|
||||
#if defined(ENABLE_COMPRESSED_SITEMAP)
|
||||
QCompressor::gzipDecompress(content, uncompressed_content);
|
||||
#else
|
||||
throw FeedRecognizedButFailedException(QObject::tr("support for gzipped sitemaps is not enabled"));
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
uncompressed_content = content;
|
||||
}
|
||||
|
||||
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
|
||||
QString xml_contents_encoded;
|
||||
QString enc =
|
||||
QRegularExpression(QSL("encoding=\"([A-Z0-9\\-]+)\""), QRegularExpression::PatternOption::CaseInsensitiveOption)
|
||||
.match(uncompressed_content)
|
||||
.captured(1);
|
||||
|
||||
if (!enc.isEmpty()) {
|
||||
// Some "encoding" attribute was found get the encoding
|
||||
// out of it.
|
||||
xml_schema_encoding = enc;
|
||||
}
|
||||
|
||||
QTextCodec* custom_codec = QTextCodec::codecForName(xml_schema_encoding.toLocal8Bit());
|
||||
|
||||
if (custom_codec != nullptr) {
|
||||
xml_contents_encoded = custom_codec->toUnicode(uncompressed_content);
|
||||
}
|
||||
else {
|
||||
xml_contents_encoded = QString::fromUtf8(uncompressed_content);
|
||||
}
|
||||
|
||||
// Feed XML was obtained, guess it now.
|
||||
QDomDocument xml_document;
|
||||
QString error_msg;
|
||||
int error_line, error_column;
|
||||
|
||||
if (!xml_document.setContent(xml_contents_encoded, true, &error_msg, &error_line, &error_column)) {
|
||||
throw ApplicationException(QObject::tr("XML is not well-formed, %1").arg(error_msg));
|
||||
}
|
||||
|
||||
QDomElement root_element = xml_document.documentElement();
|
||||
|
||||
if (root_element.tagName() != QSL("urlset") && root_element.tagName() != QSL("sitemapindex")) {
|
||||
throw ApplicationException(QObject::tr("not a Sitemap"));
|
||||
}
|
||||
|
||||
auto* feed = new StandardFeed();
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
|
||||
feed->setEncoding(xml_schema_encoding);
|
||||
|
||||
if (root_element.tagName() == QSL("urlset")) {
|
||||
// Sitemap.
|
||||
feed->setType(StandardFeed::Type::Sitemap);
|
||||
feed->setTitle(StandardFeed::typeToString(StandardFeed::Type::Sitemap));
|
||||
}
|
||||
else {
|
||||
// Sitemap index.
|
||||
feed->setType(StandardFeed::Type::SitemapIndex);
|
||||
feed->setTitle(StandardFeed::typeToString(StandardFeed::Type::SitemapIndex));
|
||||
}
|
||||
|
||||
return {feed, icon_possible_locations};
|
||||
}
|
||||
|
||||
QString SitemapParser::sitemapNamespace() const {
|
||||
return QSL("http://www.sitemaps.org/schemas/sitemap/0.9");
|
||||
}
|
||||
|
||||
QString SitemapParser::sitemapNewsNamespace() const {
|
||||
return QSL("http://www.google.com/schemas/sitemap-news/0.9");
|
||||
}
|
||||
|
||||
QString SitemapParser::sitemapImageNamespace() const {
|
||||
return QSL("http://www.google.com/schemas/sitemap-image/1.1");
|
||||
}
|
||||
|
||||
QString SitemapParser::sitemapVideoNamespace() const {
|
||||
return QSL("http://www.google.com/schemas/sitemap-video/1.1");
|
||||
}
|
||||
|
||||
QDomNodeList SitemapParser::xmlMessageElements() {
|
||||
return {};
|
||||
}
|
||||
|
||||
// TODO: implement
|
||||
|
||||
QString SitemapParser::xmlMessageTitle(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageUrl(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageDescription(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageAuthor(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QDateTime SitemapParser::xmlMessageDateCreated(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageId(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QList<Enclosure> SitemapParser::xmlMessageEnclosures(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QList<MessageCategory> SitemapParser::xmlMessageCategories(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
QString SitemapParser::xmlMessageRawContents(const QDomElement& msg_element) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
bool SitemapParser::isGzip(const QByteArray& content) {
|
||||
return ((content[0] & 0xFF) == 0x1f) && ((content[1] & 0xFF) == 0x8b);
|
||||
}
|
37
src/librssguard/services/standard/parsers/sitemapparser.h
Normal file
37
src/librssguard/services/standard/parsers/sitemapparser.h
Normal file
@ -0,0 +1,37 @@
|
||||
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||
|
||||
#ifndef SITEMAPPARSER_H
|
||||
#define SITEMAPPARSER_H
|
||||
|
||||
#include "services/standard/parsers/feedparser.h"
|
||||
|
||||
class SitemapParser : public FeedParser {
|
||||
public:
|
||||
explicit SitemapParser(const QString& data);
|
||||
virtual ~SitemapParser();
|
||||
|
||||
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
|
||||
const QString& content_type) const;
|
||||
|
||||
static bool isGzip(const QByteArray& content);
|
||||
|
||||
protected:
|
||||
virtual QDomNodeList xmlMessageElements();
|
||||
virtual QString xmlMessageTitle(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageUrl(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageDescription(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageAuthor(const QDomElement& msg_element) const;
|
||||
virtual QDateTime xmlMessageDateCreated(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageId(const QDomElement& msg_element) const;
|
||||
virtual QList<Enclosure> xmlMessageEnclosures(const QDomElement& msg_element) const;
|
||||
virtual QList<MessageCategory> xmlMessageCategories(const QDomElement& msg_element) const;
|
||||
virtual QString xmlMessageRawContents(const QDomElement& msg_element) const;
|
||||
|
||||
private:
|
||||
QString sitemapNamespace() const;
|
||||
QString sitemapNewsNamespace() const;
|
||||
QString sitemapImageNamespace() const;
|
||||
QString sitemapVideoNamespace() const;
|
||||
};
|
||||
|
||||
#endif // SITEMAPPARSER_H
|
@ -5,16 +5,20 @@
|
||||
#include "database/databasequeries.h"
|
||||
#include "definitions/definitions.h"
|
||||
#include "exceptions/applicationexception.h"
|
||||
#include "exceptions/feedrecognizedbutfailedexception.h"
|
||||
#include "exceptions/networkexception.h"
|
||||
#include "exceptions/scriptexception.h"
|
||||
#include "miscellaneous/settings.h"
|
||||
#include "miscellaneous/textfactory.h"
|
||||
#include "services/standard/definitions.h"
|
||||
#include "services/standard/gui/formstandardfeeddetails.h"
|
||||
#include "services/standard/parsers/atomparser.h"
|
||||
#include "services/standard/parsers/rdfparser.h"
|
||||
#include "services/standard/standardserviceroot.h"
|
||||
|
||||
#include "services/standard/parsers/atomparser.h"
|
||||
#include "services/standard/parsers/jsonparser.h"
|
||||
#include "services/standard/parsers/rdfparser.h"
|
||||
#include "services/standard/parsers/rssparser.h"
|
||||
#include "services/standard/parsers/sitemapparser.h"
|
||||
|
||||
#include <QCommandLineParser>
|
||||
#include <QDomDocument>
|
||||
#include <QDomElement>
|
||||
@ -24,6 +28,7 @@
|
||||
#include <QPointer>
|
||||
#include <QProcess>
|
||||
#include <QProcessEnvironment>
|
||||
#include <QScopedPointer>
|
||||
#include <QTextCodec>
|
||||
#include <QVariant>
|
||||
#include <QXmlStreamReader>
|
||||
@ -151,6 +156,12 @@ QString StandardFeed::typeToString(StandardFeed::Type type) {
|
||||
case Type::Json:
|
||||
return QSL("JSON 1.0/1.1");
|
||||
|
||||
case Type::Sitemap:
|
||||
return QSL("Sitemap");
|
||||
|
||||
case Type::SitemapIndex:
|
||||
return QSL("Sitemap Index");
|
||||
|
||||
case Type::Rss2X:
|
||||
default:
|
||||
return QSL("RSS 2.0/2.0.1");
|
||||
@ -270,172 +281,36 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
|
||||
}
|
||||
|
||||
StandardFeed* feed = nullptr;
|
||||
QList<IconLocation> icon_possible_locations;
|
||||
QList<QSharedPointer<FeedParser>> parsers;
|
||||
|
||||
// Now we need to obtain list of URLs of icons.
|
||||
// Priority of links:
|
||||
// 1. Links of "homepage" obtained from feed files which will be processed via DuckDuckGo.
|
||||
// 2. Direct links of "favicon", "icon", "logo" obtained from feed files which will be downloaded directly.
|
||||
// 3. Link of the feed file itself which will be processed via DuckDuckGo.
|
||||
// The "bool" if true means that the URL is direct and download directly, if false then
|
||||
// only use its domain and download via DuckDuckGo.
|
||||
QList<QPair<QString, bool>> icon_possible_locations;
|
||||
parsers.append(QSharedPointer<FeedParser>(new AtomParser({})));
|
||||
parsers.append(QSharedPointer<FeedParser>(new RssParser({})));
|
||||
parsers.append(QSharedPointer<FeedParser>(new RdfParser({})));
|
||||
parsers.append(QSharedPointer<FeedParser>(new JsonParser({})));
|
||||
parsers.append(QSharedPointer<FeedParser>(new SitemapParser({})));
|
||||
|
||||
if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive) || feed_contents.startsWith('{')) {
|
||||
feed = new StandardFeed();
|
||||
for (const QSharedPointer<FeedParser>& parser : parsers) {
|
||||
try {
|
||||
QPair<StandardFeed*, QList<IconLocation>> res = parser->guessFeed(feed_contents, content_type);
|
||||
|
||||
// We have JSON feed.
|
||||
feed->setEncoding(QSL(DEFAULT_FEED_ENCODING));
|
||||
feed->setType(Type::Json);
|
||||
|
||||
QJsonParseError json_err;
|
||||
QJsonDocument json = QJsonDocument::fromJson(feed_contents, &json_err);
|
||||
|
||||
if (json.isNull() && !json_err.errorString().isEmpty()) {
|
||||
throw ApplicationException(tr("JSON error '%1'").arg(json_err.errorString()));
|
||||
feed = res.first;
|
||||
icon_possible_locations = res.second;
|
||||
break;
|
||||
}
|
||||
|
||||
feed->setTitle(json.object()[QSL("title")].toString());
|
||||
feed->setDescription(json.object()[QSL("description")].toString());
|
||||
|
||||
auto home_page = json.object()[QSL("home_page_url")].toString();
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
catch (const FeedRecognizedButFailedException& format_ex) {
|
||||
// Parser reports that it is right parser for this feed
|
||||
// but its support is not enabled or available or it is broken.
|
||||
// In this case abort.
|
||||
throw format_ex;
|
||||
}
|
||||
|
||||
auto icon = json.object()[QSL("favicon")].toString();
|
||||
|
||||
if (icon.isEmpty()) {
|
||||
icon = json.object()[QSL("icon")].toString();
|
||||
}
|
||||
|
||||
if (!icon.isEmpty()) {
|
||||
// Low priority, download directly.
|
||||
icon_possible_locations.append({icon, true});
|
||||
catch (const ApplicationException& ex) {
|
||||
qWarningNN << LOGSEC_CORE << "Feed guessing error:" << QUOTE_W_SPACE_DOT(ex.message());
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Feed XML was obtained, now we need to try to guess
|
||||
// its encoding before we can read further data.
|
||||
QString xml_schema_encoding;
|
||||
QString xml_contents_encoded;
|
||||
QString enc =
|
||||
QRegularExpression(QSL("encoding=\"([A-Z0-9\\-]+)\""), QRegularExpression::PatternOption::CaseInsensitiveOption)
|
||||
.match(feed_contents)
|
||||
.captured(1);
|
||||
|
||||
if (!enc.isEmpty()) {
|
||||
// Some "encoding" attribute was found get the encoding
|
||||
// out of it.
|
||||
xml_schema_encoding = enc;
|
||||
}
|
||||
|
||||
QTextCodec* custom_codec = QTextCodec::codecForName(xml_schema_encoding.toLocal8Bit());
|
||||
QString encod;
|
||||
|
||||
if (custom_codec != nullptr) {
|
||||
// Feed encoding was probably guessed.
|
||||
xml_contents_encoded = custom_codec->toUnicode(feed_contents);
|
||||
encod = xml_schema_encoding;
|
||||
}
|
||||
else {
|
||||
// Feed encoding probably not guessed, set it as
|
||||
// default.
|
||||
xml_contents_encoded = feed_contents;
|
||||
encod = QSL(DEFAULT_FEED_ENCODING);
|
||||
}
|
||||
|
||||
// Feed XML was obtained, guess it now.
|
||||
QDomDocument xml_document;
|
||||
QString error_msg;
|
||||
int error_line, error_column;
|
||||
|
||||
if (!xml_document.setContent(xml_contents_encoded, true, &error_msg, &error_line, &error_column)) {
|
||||
throw ApplicationException(tr("XML is not well-formed, %1").arg(error_msg));
|
||||
}
|
||||
|
||||
feed = new StandardFeed();
|
||||
feed->setEncoding(encod);
|
||||
|
||||
QDomElement root_element = xml_document.documentElement();
|
||||
RdfParser rdf(QSL("<a/>"));
|
||||
AtomParser atom(QSL("<a/>"));
|
||||
|
||||
if (root_element.namespaceURI() == rdf.rdfNamespace()) {
|
||||
// We found RDF feed.
|
||||
QDomElement channel_element =
|
||||
root_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("channel")).at(0).toElement();
|
||||
|
||||
feed->setType(Type::Rdf);
|
||||
feed->setTitle(channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("title")).at(0).toElement().text());
|
||||
feed->setDescription(channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("description"))
|
||||
.at(0)
|
||||
.toElement()
|
||||
.text());
|
||||
|
||||
QString home_page = channel_element.elementsByTagNameNS(rdf.rssNamespace(), QSL("link")).at(0).toElement().text();
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
}
|
||||
}
|
||||
else if (root_element.tagName() == QL1S("rss")) {
|
||||
// We found RSS 0.91/0.92/0.93/2.0/2.0.1 feed.
|
||||
QString rss_type = root_element.attribute(QSL("version"), QSL("2.0"));
|
||||
|
||||
if (rss_type == QL1S("0.91") || rss_type == QL1S("0.92") || rss_type == QL1S("0.93")) {
|
||||
feed->setType(Type::Rss0X);
|
||||
}
|
||||
else {
|
||||
feed->setType(Type::Rss2X);
|
||||
}
|
||||
|
||||
QDomElement channel_element = root_element.namedItem(QSL("channel")).toElement();
|
||||
|
||||
feed->setTitle(channel_element.namedItem(QSL("title")).toElement().text());
|
||||
feed->setDescription(channel_element.namedItem(QSL("description")).toElement().text());
|
||||
|
||||
QString icon_url_link = channel_element.namedItem(QSL("image")).namedItem(QSL("url")).toElement().text();
|
||||
|
||||
if (!icon_url_link.isEmpty()) {
|
||||
icon_possible_locations.append({icon_url_link, true});
|
||||
}
|
||||
|
||||
auto channel_links = channel_element.elementsByTagName(QSL("link"));
|
||||
|
||||
for (int i = 0; i < channel_links.size(); i++) {
|
||||
QString home_page = channel_links.at(i).toElement().text();
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (root_element.namespaceURI() == atom.atomNamespace()) {
|
||||
// We found ATOM feed.
|
||||
feed->setType(Type::Atom10);
|
||||
feed->setTitle(root_element.namedItem(QSL("title")).toElement().text());
|
||||
feed->setDescription(root_element.namedItem(QSL("subtitle")).toElement().text());
|
||||
|
||||
QString icon_link = root_element.namedItem(QSL("icon")).toElement().text();
|
||||
|
||||
if (!icon_link.isEmpty()) {
|
||||
icon_possible_locations.append({icon_link, true});
|
||||
}
|
||||
|
||||
QString home_page = root_element.namedItem(QSL("link")).toElement().attribute(QSL("href"));
|
||||
|
||||
if (!home_page.isEmpty()) {
|
||||
icon_possible_locations.prepend({home_page, false});
|
||||
}
|
||||
}
|
||||
else {
|
||||
// File was downloaded and it really was XML file
|
||||
// but feed format was NOT recognized.
|
||||
feed->deleteLater();
|
||||
throw ApplicationException(tr("XML feed file format unrecognized"));
|
||||
}
|
||||
if (feed == nullptr) {
|
||||
throw ApplicationException(tr("feed format not recognized"));
|
||||
}
|
||||
|
||||
if (source_type == SourceType::Url && icon_possible_locations.isEmpty()) {
|
||||
@ -474,7 +349,9 @@ bool StandardFeed::performDragDropChange(RootItem* target_item) {
|
||||
|
||||
qApp->showGuiMessage(Notification::Event::GeneralEvent,
|
||||
{tr("Cannot move feed"),
|
||||
tr("Cannot move feed, detailed information was logged via debug log."),
|
||||
tr("Cannot move feed, detailed "
|
||||
"information was logged via "
|
||||
"debug log."),
|
||||
QSystemTrayIcon::MessageIcon::Critical});
|
||||
return false;
|
||||
}
|
||||
@ -550,7 +427,10 @@ QString StandardFeed::runScriptProcess(const QStringList& cmd_args,
|
||||
|
||||
if (!raw_error.simplified().isEmpty()) {
|
||||
qWarningNN << LOGSEC_CORE
|
||||
<< "Received error output from custom script even if it reported that it exited normally:"
|
||||
<< "Received error output from "
|
||||
"custom script even if it "
|
||||
"reported that it exited "
|
||||
"normally:"
|
||||
<< QUOTE_W_SPACE_DOT(raw_error);
|
||||
}
|
||||
|
||||
|
@ -24,14 +24,20 @@ class StandardFeed : public Feed {
|
||||
friend class StandardCategory;
|
||||
|
||||
public:
|
||||
enum class SourceType { Url = 0, Script = 1, LocalFile = 2 };
|
||||
enum class SourceType {
|
||||
Url = 0,
|
||||
Script = 1,
|
||||
LocalFile = 2
|
||||
};
|
||||
|
||||
enum class Type {
|
||||
Rss0X = 0,
|
||||
Rss2X = 1,
|
||||
Rdf = 2, // Sometimes denoted as RSS 1.0.
|
||||
Atom10 = 3,
|
||||
Json = 4
|
||||
Json = 4,
|
||||
SitemapIndex = 5,
|
||||
Sitemap = 6
|
||||
};
|
||||
|
||||
explicit StandardFeed(RootItem* parent_item = nullptr);
|
||||
@ -79,8 +85,8 @@ class StandardFeed : public Feed {
|
||||
const QString& post_process_script,
|
||||
NetworkFactory::NetworkAuthentication protection,
|
||||
bool fetch_icons = true,
|
||||
const QString& username = QString(),
|
||||
const QString& password = QString(),
|
||||
const QString& username = {},
|
||||
const QString& password = {},
|
||||
const QNetworkProxy& custom_proxy = QNetworkProxy::ProxyType::DefaultProxy);
|
||||
|
||||
// Converts particular feed type to string.
|
||||
|
Loading…
x
Reference in New Issue
Block a user