
530 lines
20 KiB
Raw Normal View History

2020-02-28 23:25:08 +01:00
2020-08-14 20:56:04 +02:00
* SPDX-FileCopyrightText: 2020 Tobias Fella <fella@posteo.de>
2021-04-08 13:16:36 +02:00
* SPDX-FileCopyrightText: 2021 Bart De Vries <bart@mogwai.be>
2020-02-28 23:25:08 +01:00
2020-08-14 20:56:04 +02:00
* SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
2020-02-28 23:25:08 +01:00
#include <KLocalizedString>
2021-03-12 00:19:04 +01:00
#include <QCryptographicHash>
2020-06-06 00:05:32 +02:00
#include <QDateTime>
2021-03-12 00:19:04 +01:00
#include <QDebug>
2021-05-01 21:35:37 +02:00
#include <QDir>
#include <QDomElement>
2020-04-26 23:40:09 +02:00
#include <QFile>
#include <QFileInfo>
2021-05-01 21:35:37 +02:00
#include <QMultiMap>
2020-02-28 23:25:08 +01:00
#include <QNetworkAccessManager>
#include <QNetworkReply>
#include <QStandardPaths>
2020-05-02 12:26:00 +02:00
#include <QTextDocumentFragment>
2020-02-28 23:25:08 +01:00
2020-02-29 15:34:12 +01:00
#include <Syndication/Syndication>
2020-02-28 23:25:08 +01:00
2020-03-16 22:37:04 +01:00
#include "database.h"
#include "enclosure.h"
2020-04-22 02:17:57 +02:00
#include "fetcher.h"
2021-06-05 20:12:42 +02:00
#include "fetcherlogging.h"
#include "settingsmanager.h"
2020-03-16 22:37:04 +01:00
2020-04-22 02:17:57 +02:00
m_updateProgress = -1;
m_updateTotal = -1;
m_updating = false;
manager = new QNetworkAccessManager(this);
#if !defined Q_OS_ANDROID && !defined Q_OS_WIN
m_nmInterface = new OrgFreedesktopNetworkManagerInterface(QStringLiteral("org.freedesktop.NetworkManager"),
2020-02-28 23:25:08 +01:00
2020-11-01 13:18:11 +01:00
void Fetcher::fetch(const QString &url)
2020-02-28 23:25:08 +01:00
QStringList urls(url);
2020-02-28 23:25:08 +01:00
void Fetcher::fetch(const QStringList &urls)
2021-05-01 21:35:37 +02:00
if (m_updating)
return; // update is already running, do nothing
m_updating = true;
m_updateProgress = 0;
m_updateTotal = urls.count();
connect(this, &Fetcher::updateProgressChanged, this, &Fetcher::updateMonitor);
Q_EMIT updatingChanged(m_updating);
Q_EMIT updateProgressChanged(m_updateProgress);
Q_EMIT updateTotalChanged(m_updateTotal);
2021-05-01 21:35:37 +02:00
for (int i = 0; i < urls.count(); i++) {
void Fetcher::fetchAll()
QStringList urls;
QSqlQuery query;
query.prepare(QStringLiteral("SELECT url FROM Feeds;"));
2020-06-06 00:05:32 +02:00
while (query.next()) {
2021-05-01 21:35:37 +02:00
urls += query.value(0).toString();
2021-05-01 18:59:08 +00:00
if (urls.count() > 0) {
2021-05-01 18:59:08 +00:00
void Fetcher::retrieveFeed(const QString &url)
if (isMeteredConnection() && !SettingsManager::self()->allowMeteredFeedUpdates()) {
Q_EMIT error(Error::Type::MeteredNotAllowed, url, QString(), 0, i18n("Podcast updates not allowed due to user setting"));
Q_EMIT updateProgressChanged(m_updateProgress);
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Starting to fetch" << url;
Q_EMIT startedFetchingFeed(url);
QNetworkRequest request((QUrl(url)));
QNetworkReply *reply = get(request);
connect(reply, &QNetworkReply::finished, this, [this, url, reply]() {
2021-05-01 21:35:37 +02:00
if (reply->error()) {
qWarning() << "Error fetching feed";
qWarning() << reply->errorString();
Q_EMIT error(Error::Type::FeedUpdate, url, QString(), reply->error(), reply->errorString());
} else {
QByteArray data = reply->readAll();
Syndication::DocumentSource *document = new Syndication::DocumentSource(data, url);
Syndication::FeedPtr feed = Syndication::parserCollection()->parse(*document, QStringLiteral("Atom"));
processFeed(feed, url);
Q_EMIT updateProgressChanged(m_updateProgress);
delete reply;
void Fetcher::updateMonitor(int progress)
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Update monitor" << progress << "/" << m_updateTotal;
// this method will watch for the end of the update process
if (progress > -1 && m_updateTotal > -1 && progress == m_updateTotal) {
m_updating = false;
m_updateProgress = -1;
m_updateTotal = -1;
disconnect(this, &Fetcher::updateProgressChanged, this, &Fetcher::updateMonitor);
Q_EMIT updatingChanged(m_updating);
2021-05-01 21:35:37 +02:00
// Q_EMIT updateProgressChanged(m_updateProgress);
// Q_EMIT updateTotalChanged(m_updateTotal);
2020-11-01 13:18:11 +01:00
void Fetcher::processFeed(Syndication::FeedPtr feed, const QString &url)
2020-05-18 16:47:12 +02:00
if (feed.isNull())
// First check if this is a newly added feed
bool isNewFeed = false;
QSqlQuery query;
query.prepare(QStringLiteral("SELECT new FROM Feeds WHERE url=:url;"));
query.bindValue(QStringLiteral(":url"), url);
if (query.next()) {
isNewFeed = query.value(QStringLiteral("new")).toBool();
} else {
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Feed not found in database" << url;
2021-05-01 21:35:37 +02:00
if (isNewFeed)
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "New feed" << feed->title() << ":" << isNewFeed;
// Retrieve "other" fields; this will include the "itunes" tags
QMultiMap<QString, QDomElement> otherItems = feed->additionalProperties();
query.prepare(QStringLiteral("UPDATE Feeds SET name=:name, image=:image, link=:link, description=:description, lastUpdated=:lastUpdated WHERE url=:url;"));
2020-05-18 16:47:12 +02:00
query.bindValue(QStringLiteral(":name"), feed->title());
query.bindValue(QStringLiteral(":url"), url);
query.bindValue(QStringLiteral(":link"), feed->link());
query.bindValue(QStringLiteral(":description"), feed->description());
2020-06-06 00:05:32 +02:00
QDateTime current = QDateTime::currentDateTime();
query.bindValue(QStringLiteral(":lastUpdated"), current.toSecsSinceEpoch());
// Process authors
QString authorname, authoremail;
if (feed->authors().count() > 0) {
for (auto &author : feed->authors()) {
processAuthor(url, QLatin1String(""), author->name(), QLatin1String(""), QLatin1String(""));
} else {
// Try to find itunes fields if plain author doesn't exist
QString authorname, authoremail;
// First try the "itunes:owner" tag, if that doesn't succeed, then try the "itunes:author" tag
if (otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdowner")).hasChildNodes()) {
QDomNodeList nodelist = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdowner")).childNodes();
2021-05-01 18:59:08 +00:00
for (int i = 0; i < nodelist.length(); i++) {
if (nodelist.item(i).nodeName() == QStringLiteral("itunes:name")) {
authorname = nodelist.item(i).toElement().text();
} else if (nodelist.item(i).nodeName() == QStringLiteral("itunes:email")) {
authoremail = nodelist.item(i).toElement().text();
} else {
authorname = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdauthor")).text();
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "authorname" << authorname;
2021-05-01 18:59:08 +00:00
if (!authorname.isEmpty()) {
processAuthor(url, QLatin1String(""), authorname, QLatin1String(""), authoremail);
QString image = feed->image()->url();
// If there is no regular image tag, then try the itunes tags
if (image.isEmpty()) {
if (otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).hasAttribute(QStringLiteral("href"))) {
image = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).attribute(QStringLiteral("href"));
if (image.startsWith(QStringLiteral("/")))
image = QUrl(url).adjusted(QUrl::RemovePath).toString() + image;
2020-05-26 16:32:07 +02:00
query.bindValue(QStringLiteral(":image"), image);
2020-05-18 16:47:12 +02:00
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Updated feed details:" << feed->title();
2020-05-18 16:47:12 +02:00
2020-06-06 00:05:32 +02:00
Q_EMIT feedDetailsUpdated(url, feed->title(), image, feed->link(), feed->description(), current);
2020-05-26 16:32:07 +02:00
bool updatedEntries = false;
2020-05-18 16:47:12 +02:00
for (const auto &entry : feed->items()) {
QCoreApplication::processEvents(); // keep the main thread semi-responsive
bool isNewEntry = processEntry(entry, url, isNewFeed);
updatedEntries = updatedEntries || isNewEntry;
// Now mark the appropriate number of recent entries "new" and "read" only for new feeds
if (isNewFeed) {
query.prepare(QStringLiteral("SELECT * FROM Entries WHERE feed=:feed ORDER BY updated DESC LIMIT :recentNew;"));
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":recentNew"), 0); // hardcode to marking no episode as new on a new feed
QSqlQuery updateQuery;
while (query.next()) {
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Marked as new:" << query.value(QStringLiteral("id")).toString();
updateQuery.prepare(QStringLiteral("UPDATE Entries SET read=:read, new=:new WHERE id=:id AND feed=:feed;"));
updateQuery.bindValue(QStringLiteral(":read"), false);
updateQuery.bindValue(QStringLiteral(":new"), true);
updateQuery.bindValue(QStringLiteral(":feed"), url);
updateQuery.bindValue(QStringLiteral(":id"), query.value(QStringLiteral("id")).toString());
// Finally, reset the new flag to false now that the new feed has been fully processed
// If we would reset the flag sooner, then too many episodes will get flagged as new if
// the initial import gets interrupted somehow.
query.prepare(QStringLiteral("UPDATE Feeds SET new=:new WHERE url=:url;"));
query.bindValue(QStringLiteral(":url"), url);
query.bindValue(QStringLiteral(":new"), false);
2020-05-18 16:47:12 +02:00
2020-05-26 16:32:07 +02:00
2021-05-01 21:35:37 +02:00
if (updatedEntries || isNewFeed)
Q_EMIT feedUpdated(url);
Q_EMIT feedUpdateFinished(url);
2020-05-18 16:47:12 +02:00
2021-05-01 21:00:12 +02:00
bool Fetcher::processEntry(Syndication::ItemPtr entry, const QString &url, bool isNewFeed)
2020-05-18 16:47:12 +02:00
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Processing" << entry->title();
// Retrieve "other" fields; this will include the "itunes" tags
QMultiMap<QString, QDomElement> otherItems = entry->additionalProperties();
2020-05-18 16:47:12 +02:00
QSqlQuery query;
query.prepare(QStringLiteral("SELECT COUNT (id) FROM Entries WHERE id=:id;"));
query.bindValue(QStringLiteral(":id"), entry->id());
if (query.value(0).toInt() != 0)
2021-05-01 21:35:37 +02:00
return false; // entry already exists
2020-05-18 16:47:12 +02:00
query.prepare(QStringLiteral("INSERT INTO Entries VALUES (:feed, :id, :title, :content, :created, :updated, :link, :read, :new, :hasEnclosure, :image);"));
2020-05-18 16:47:12 +02:00
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":id"), entry->id());
query.bindValue(QStringLiteral(":title"), QTextDocumentFragment::fromHtml(entry->title()).toPlainText());
query.bindValue(QStringLiteral(":created"), static_cast<int>(entry->datePublished()));
query.bindValue(QStringLiteral(":updated"), static_cast<int>(entry->dateUpdated()));
query.bindValue(QStringLiteral(":link"), entry->link());
2021-02-21 19:54:10 +01:00
query.bindValue(QStringLiteral(":hasEnclosure"), entry->enclosures().length() == 0 ? 0 : 1);
2021-05-01 18:59:08 +00:00
query.bindValue(QStringLiteral(":read"), isNewFeed); // if new feed, then mark all as read
query.bindValue(QStringLiteral(":new"), !isNewFeed); // if new feed, then mark none as new
2020-05-18 16:47:12 +02:00
if (!entry->content().isEmpty())
query.bindValue(QStringLiteral(":content"), entry->content());
query.bindValue(QStringLiteral(":content"), entry->description());
// Look for image in itunes tags
QString image;
if (otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).hasAttribute(QStringLiteral("href"))) {
image = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).attribute(QStringLiteral("href"));
if (image.startsWith(QStringLiteral("/")))
image = QUrl(url).adjusted(QUrl::RemovePath).toString() + image;
query.bindValue(QStringLiteral(":image"), image);
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Entry image found" << image;
2020-05-18 16:47:12 +02:00
if (entry->authors().count() > 0) {
for (const auto &author : entry->authors()) {
processAuthor(url, entry->id(), author->name(), author->uri(), author->email());
} else {
// As fallback, check if there is itunes "author" information
QString authorName = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdauthor")).text();
2021-05-01 21:35:37 +02:00
if (!authorName.isEmpty())
processAuthor(url, entry->id(), authorName, QLatin1String(""), QLatin1String(""));
2020-05-18 16:47:12 +02:00
2020-05-18 17:02:46 +02:00
for (const auto &enclosure : entry->enclosures()) {
processEnclosure(enclosure, entry, url);
Q_EMIT entryAdded(url, entry->id());
return true; // this is a new entry
2020-05-18 16:47:12 +02:00
void Fetcher::processAuthor(const QString &url, const QString &entryId, const QString &authorName, const QString &authorUri, const QString &authorEmail)
2020-05-18 16:47:12 +02:00
QSqlQuery query;
query.prepare(QStringLiteral("SELECT COUNT (id) FROM Authors WHERE feed=:feed AND id=:id AND name=:name;"));
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":id"), entryId);
query.bindValue(QStringLiteral(":name"), authorName);
if (query.value(0).toInt() != 0)
query.prepare(QStringLiteral("UPDATE Authors SET feed=:feed, id=:id, name=:name, uri=:uri, email=:email WHERE feed=:feed AND id=:id;"));
query.prepare(QStringLiteral("INSERT INTO Authors VALUES(:feed, :id, :name, :uri, :email);"));
2020-05-26 16:32:07 +02:00
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":id"), entryId);
query.bindValue(QStringLiteral(":name"), authorName);
query.bindValue(QStringLiteral(":uri"), authorUri);
query.bindValue(QStringLiteral(":email"), authorEmail);
2020-05-18 16:47:12 +02:00
2020-11-01 13:18:11 +01:00
void Fetcher::processEnclosure(Syndication::EnclosurePtr enclosure, Syndication::ItemPtr entry, const QString &feedUrl)
2020-05-18 17:02:46 +02:00
QSqlQuery query;
query.prepare(QStringLiteral("SELECT COUNT (id) FROM Enclosures WHERE feed=:feed AND id=:id;"));
query.bindValue(QStringLiteral(":feed"), feedUrl);
query.bindValue(QStringLiteral(":id"), entry->id());
if (query.value(0).toInt() != 0)
query.prepare(QStringLiteral("UPDATE Enclosures SET feed=:feed, id=:id, duration=:duration, size=:size, title=:title, type=:type, url=:url;"));
query.prepare(QStringLiteral("INSERT INTO Enclosures VALUES (:feed, :id, :duration, :size, :title, :type, :url, :playposition, :downloaded);"));
2020-05-26 16:32:07 +02:00
query.bindValue(QStringLiteral(":feed"), feedUrl);
2020-05-18 17:02:46 +02:00
query.bindValue(QStringLiteral(":id"), entry->id());
query.bindValue(QStringLiteral(":duration"), enclosure->duration());
query.bindValue(QStringLiteral(":size"), enclosure->length());
query.bindValue(QStringLiteral(":title"), enclosure->title());
query.bindValue(QStringLiteral(":type"), enclosure->type());
query.bindValue(QStringLiteral(":url"), enclosure->url());
query.bindValue(QStringLiteral(":playposition"), 0);
query.bindValue(QStringLiteral(":downloaded"), Enclosure::statusToDb(Enclosure::Downloadable));
2020-05-18 17:02:46 +02:00
2021-05-01 21:35:37 +02:00
QString Fetcher::image(const QString &url) const
if (url.isEmpty()) {
return QLatin1String("no-image");
// if image is already cached, then return the path
QString path = imagePath(url);
2020-05-10 23:25:23 +02:00
if (QFileInfo::exists(path)) {
if (QFileInfo(path).size() != 0) {
return QStringLiteral("file://") + path;
// if image has not yet been cached, then check for network connectivity if
// possible; and download the image
if (canCheckNetworkStatus()) {
if (networkConnected() && (!isMeteredConnection() || SettingsManager::self()->allowMeteredImageDownloads())) {
download(url, path);
} else {
return QLatin1String("no-image");
} else {
download(url, path);
2020-05-18 21:20:23 +02:00
return QLatin1String("fetching");
2020-05-18 21:20:23 +02:00
QNetworkReply *Fetcher::download(const QString &url, const QString &filePath) const
2020-05-18 21:20:23 +02:00
QNetworkRequest request((QUrl(url)));
QFile *file = new QFile(filePath);
int resumedAt = 0;
if (file->exists() && file->size() > 0) {
// try to resume download
resumedAt = file->size();
qCDebug(kastsFetcher) << "Resuming download at" << resumedAt << "bytes";
QByteArray rangeHeaderValue = QByteArray("bytes=") + QByteArray::number(resumedAt) + QByteArray("-");
request.setRawHeader(QByteArray("Range"), rangeHeaderValue);
file->open(QIODevice::WriteOnly | QIODevice::Append);
} else {
qCDebug(kastsFetcher) << "Starting new download";
QNetworkReply *headerReply = head(request);
connect(headerReply, &QNetworkReply::finished, this, [=]() {
if (headerReply->isOpen()) {
int fileSize = headerReply->header(QNetworkRequest::ContentLengthHeader).toInt();
qCDebug(kastsFetcher) << "Reported download size" << fileSize;
2020-07-02 19:14:07 +02:00
QNetworkReply *reply = get(request);
connect(reply, &QNetworkReply::readyRead, this, [=]() {
if (reply->isOpen() && file) {
QByteArray data = reply->readAll();
2021-02-21 19:54:10 +01:00
connect(reply, &QNetworkReply::finished, this, [=]() {
if (reply->isOpen() && file) {
2021-02-21 19:54:10 +01:00
QByteArray data = reply->readAll();
2021-02-21 19:54:10 +01:00
Q_EMIT downloadFinished(url);
// clean up; close file if still open in case something has gone wrong
if (file) {
if (file->isOpen()) {
delete file;
2021-02-21 19:54:10 +01:00
2021-02-21 19:54:10 +01:00
return reply;
2020-04-26 23:40:09 +02:00
2020-11-01 13:18:11 +01:00
void Fetcher::removeImage(const QString &url)
2020-04-26 23:40:09 +02:00
2021-06-05 20:12:42 +02:00
qCDebug(kastsFetcher) << "Removing image" << imagePath(url);
2020-04-26 23:40:09 +02:00
QString Fetcher::imagePath(const QString &url) const
2020-04-26 23:40:09 +02:00
QString path = QStandardPaths::writableLocation(QStandardPaths::CacheLocation) + QStringLiteral("/images/");
// Create path in cache if it doesn't exist yet
return path + QString::fromStdString(QCryptographicHash::hash(url.toUtf8(), QCryptographicHash::Md5).toHex().toStdString());
2020-04-26 23:40:09 +02:00
2020-07-02 19:14:07 +02:00
QString Fetcher::enclosurePath(const QString &url) const
QString path = QStandardPaths::writableLocation(QStandardPaths::DataLocation) + QStringLiteral("/enclosures/");
// Create path in cache if it doesn't exist yet
return path + QString::fromStdString(QCryptographicHash::hash(url.toUtf8(), QCryptographicHash::Md5).toHex().toStdString());
QNetworkReply *Fetcher::get(QNetworkRequest &request) const
2020-07-02 19:14:07 +02:00
2020-07-02 19:14:07 +02:00
return manager->get(request);
QNetworkReply *Fetcher::head(QNetworkRequest &request) const
return manager->head(request);
void Fetcher::setHeader(QNetworkRequest &request) const
request.setRawHeader("User-Agent", "Kasts/0.1; Syndication");
bool Fetcher::canCheckNetworkStatus() const
#if !defined Q_OS_ANDROID && !defined Q_OS_WIN
qCDebug(kastsFetcher) << "Can NetworkManager be reached?" << m_nmInterface->isValid();
return (m_nmInterface && m_nmInterface->isValid());
return false;
bool Fetcher::networkConnected() const
#if !defined Q_OS_ANDROID && !defined Q_OS_WIN
qCDebug(kastsFetcher) << "Network connected?" << (m_nmInterface->state() >= 70) << m_nmInterface->state();
return (m_nmInterface && m_nmInterface->state() >= 70);
return true;
bool Fetcher::isMeteredConnection() const
#if !defined Q_OS_ANDROID && !defined Q_OS_WIN
if (canCheckNetworkStatus()) {
// Get network connection status through DBus (NetworkManager)
// state == 1: explicitly configured as metered
// state == 3: connection guessed as metered
uint state = m_nmInterface->metered();
qCDebug(kastsFetcher) << "Network Status:" << state;
qCDebug(kastsFetcher) << "Connection is metered?" << (state == 1 || state == 3);
return (state == 1 || state == 3);
} else {
return false;
// TODO: get network connection type for Android and windows
return false;