kasts/src/fetcher.cpp
2021-04-21 14:53:03 +02:00

389 lines
16 KiB
C++

/**
* SPDX-FileCopyrightText: 2020 Tobias Fella <fella@posteo.de>
* SPDX-FileCopyrightText: 2021 Bart De Vries <bart@mogwai.be>
*
* SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
*/
#include <QCryptographicHash>
#include <QDateTime>
#include <QDebug>
#include <QFile>
#include <QFileInfo>
#include <QDir>
#include <QNetworkAccessManager>
#include <QNetworkReply>
#include <QStandardPaths>
#include <QTextDocumentFragment>
#include <QDomElement>
#include <QMultiMap>
#include <Syndication/Syndication>
#include "database.h"
#include "fetcher.h"
#include "settingsmanager.h"
Fetcher::Fetcher()
{
m_updateProgress = -1;
m_updateTotal = -1;
m_updating = false;
manager = new QNetworkAccessManager(this);
manager->setRedirectPolicy(QNetworkRequest::NoLessSafeRedirectPolicy);
manager->setStrictTransportSecurityEnabled(true);
manager->enableStrictTransportSecurityStore(true);
}
void Fetcher::fetch(const QString &url)
{
qDebug() << "Starting to fetch" << url;
Q_EMIT startedFetchingFeed(url);
QNetworkRequest request((QUrl(url)));
request.setTransferTimeout();
QNetworkReply *reply = get(request);
connect(reply, &QNetworkReply::finished, this, [this, url, reply]() {
if(reply->error()) {
qWarning() << "Error fetching feed";
qWarning() << reply->errorString();
Q_EMIT error(url, reply->error(), reply->errorString());
} else {
QByteArray data = reply->readAll();
Syndication::DocumentSource *document = new Syndication::DocumentSource(data, url);
Syndication::FeedPtr feed = Syndication::parserCollection()->parse(*document, QStringLiteral("Atom"));
processFeed(feed, url);
}
m_updateProgress++;
Q_EMIT updateProgressChanged(m_updateProgress);
delete reply;
});
}
void Fetcher::fetch(const QStringList &urls)
{
if (m_updating) return; // update is already running, do nothing
m_updating = true;
m_updateProgress = 0;
m_updateTotal = urls.count();
connect(this, &Fetcher::updateProgressChanged, this, &Fetcher::updateMonitor);
Q_EMIT updatingChanged(m_updating);
Q_EMIT updateProgressChanged(m_updateProgress);
Q_EMIT updateTotalChanged(m_updateTotal);
for (int i=0; i<urls.count(); i++) {
fetch(urls[i]);
}
}
void Fetcher::fetchAll()
{
QStringList urls;
QSqlQuery query;
query.prepare(QStringLiteral("SELECT url FROM Feeds;"));
Database::instance().execute(query);
while (query.next()) {
urls += query.value(0).toString();;
}
if (urls.count() > 0)
fetch(urls);
else
return; // no feeds in database
}
void Fetcher::updateMonitor(int progress)
{
//qDebug() << "Update monitor" << progress << "/" << m_updateTotal;
// this method will watch for the end of the update process
if (progress > -1 && m_updateTotal > -1 && progress == m_updateTotal) {
m_updating = false;
m_updateProgress = -1;
m_updateTotal = -1;
disconnect(this, &Fetcher::updateProgressChanged, this, &Fetcher::updateMonitor);
Q_EMIT updatingChanged(m_updating);
//Q_EMIT updateProgressChanged(m_updateProgress);
//Q_EMIT updateTotalChanged(m_updateTotal);
}
}
void Fetcher::processFeed(Syndication::FeedPtr feed, const QString &url)
{
if (feed.isNull())
return;
// First check if this is a newly added feed
bool isNewFeed = false;
QSqlQuery query;
query.prepare(QStringLiteral("SELECT new FROM Feeds WHERE url=:url;"));
query.bindValue(QStringLiteral(":url"), url);
Database::instance().execute(query);
if (query.next()) {
isNewFeed = query.value(QStringLiteral("new")).toBool();
} else {
qDebug() << "Feed not found in database" << url;
return;
}
if (isNewFeed) qDebug() << "New feed" << feed->title() << ":" << isNewFeed;
// Retrieve "other" fields; this will include the "itunes" tags
QMultiMap<QString, QDomElement> otherItems = feed->additionalProperties();
query.prepare(QStringLiteral("UPDATE Feeds SET name=:name, image=:image, link=:link, description=:description, lastUpdated=:lastUpdated, new=:new WHERE url=:url;"));
query.bindValue(QStringLiteral(":name"), feed->title());
query.bindValue(QStringLiteral(":url"), url);
query.bindValue(QStringLiteral(":link"), feed->link());
query.bindValue(QStringLiteral(":description"), feed->description());
query.bindValue(QStringLiteral(":new"), false); // set "new" to false now that new feed is being processed
QDateTime current = QDateTime::currentDateTime();
query.bindValue(QStringLiteral(":lastUpdated"), current.toSecsSinceEpoch());
// Process authors
QString authorname, authoremail;
if (feed->authors().count() > 0) {
for (auto &author : feed->authors()) {
processAuthor(url, QLatin1String(""), author->name(), QLatin1String(""), QLatin1String(""));
}
} else {
// Try to find itunes fields if plain author doesn't exist
QString authorname, authoremail;
// First try the "itunes:owner" tag, if that doesn't succeed, then try the "itunes:author" tag
if (otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdowner")).hasChildNodes()) {
QDomNodeList nodelist = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdowner")).childNodes();
for (int i=0; i < nodelist.length(); i++) {
if (nodelist.item(i).nodeName() == QStringLiteral("itunes:name")) {
authorname = nodelist.item(i).toElement().text();
} else if (nodelist.item(i).nodeName() == QStringLiteral("itunes:email")) {
authoremail = nodelist.item(i).toElement().text();
}
}
} else {
authorname = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdauthor")).text();
//qDebug() << "authorname" << authorname;
}
if (!authorname.isEmpty()) processAuthor(url, QLatin1String(""), authorname, QLatin1String(""), authoremail);
}
QString image = feed->image()->url();
// If there is no regular image tag, then try the itunes tags
if (image.isEmpty()) {
if (otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).hasAttribute(QStringLiteral("href"))) {
image = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).attribute(QStringLiteral("href"));
}
}
if (image.startsWith(QStringLiteral("/")))
image = QUrl(url).adjusted(QUrl::RemovePath).toString() + image;
query.bindValue(QStringLiteral(":image"), image);
Database::instance().execute(query);
qDebug() << "Updated feed:" << feed->title();
Q_EMIT feedDetailsUpdated(url, feed->title(), image, feed->link(), feed->description(), current);
bool updatedEntries = false;
for (const auto &entry : feed->items()) {
QCoreApplication::processEvents(); // keep the main thread semi-responsive
bool isNewEntry = processEntry(entry, url, isNewFeed);
updatedEntries = updatedEntries || isNewEntry;
}
// Now mark the appropriate number of recent entries "new" and "read" only for new feeds
if (isNewFeed) {
query.prepare(QStringLiteral("SELECT * FROM Entries WHERE feed=:feed ORDER BY updated DESC LIMIT :recentNew;"));
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":recentNew"), SettingsManager::self()->numberNewEpisodes());
Database::instance().execute(query);
QSqlQuery updateQuery;
while (query.next()) {
qDebug() << "Marked as new:" << query.value(QStringLiteral("id")).toString();
updateQuery.prepare(QStringLiteral("UPDATE Entries SET read=:read, new=:new WHERE id=:id AND feed=:feed;"));
updateQuery.bindValue(QStringLiteral(":read"), false);
updateQuery.bindValue(QStringLiteral(":new"), true);
updateQuery.bindValue(QStringLiteral(":feed"), url);
updateQuery.bindValue(QStringLiteral(":id"), query.value(QStringLiteral("id")).toString());
Database::instance().execute(updateQuery);
}
}
if (updatedEntries || isNewFeed) Q_EMIT feedUpdated(url);
Q_EMIT feedUpdateFinished(url);
}
bool Fetcher::processEntry(Syndication::ItemPtr entry, const QString &url, const bool &isNewFeed)
{
//qDebug() << "Processing" << entry->title();
// Retrieve "other" fields; this will include the "itunes" tags
QMultiMap<QString, QDomElement> otherItems = entry->additionalProperties();
QSqlQuery query;
query.prepare(QStringLiteral("SELECT COUNT (id) FROM Entries WHERE id=:id;"));
query.bindValue(QStringLiteral(":id"), entry->id());
Database::instance().execute(query);
query.next();
if (query.value(0).toInt() != 0)
return false; // entry already exists
query.prepare(QStringLiteral("INSERT INTO Entries VALUES (:feed, :id, :title, :content, :created, :updated, :link, :read, :new, :hasEnclosure, :image);"));
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":id"), entry->id());
query.bindValue(QStringLiteral(":title"), QTextDocumentFragment::fromHtml(entry->title()).toPlainText());
query.bindValue(QStringLiteral(":created"), static_cast<int>(entry->datePublished()));
query.bindValue(QStringLiteral(":updated"), static_cast<int>(entry->dateUpdated()));
query.bindValue(QStringLiteral(":link"), entry->link());
query.bindValue(QStringLiteral(":hasEnclosure"), entry->enclosures().length() == 0 ? 0 : 1);
query.bindValue(QStringLiteral(":read"), isNewFeed); // if new feed, then mark all as read
query.bindValue(QStringLiteral(":new"), !isNewFeed); // if new feed, then mark none as new
if (!entry->content().isEmpty())
query.bindValue(QStringLiteral(":content"), entry->content());
else
query.bindValue(QStringLiteral(":content"), entry->description());
// Look for image in itunes tags
QString image;
if (otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).hasAttribute(QStringLiteral("href"))) {
image = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdimage")).attribute(QStringLiteral("href"));
}
if (image.startsWith(QStringLiteral("/")))
image = QUrl(url).adjusted(QUrl::RemovePath).toString() + image;
query.bindValue(QStringLiteral(":image"), image);
//qDebug() << "Entry image found" << image;
Database::instance().execute(query);
if (entry->authors().count() > 0) {
for (const auto &author : entry->authors()) {
processAuthor(url, entry->id(), author->name(), author->uri(), author->email());
}
} else {
// As fallback, check if there is itunes "author" information
QString authorName = otherItems.value(QStringLiteral("http://www.itunes.com/dtds/podcast-1.0.dtdauthor")).text();
if (!authorName.isEmpty()) processAuthor(url, entry->id(), authorName, QLatin1String(""), QLatin1String(""));
}
for (const auto &enclosure : entry->enclosures()) {
processEnclosure(enclosure, entry, url);
}
Q_EMIT entryAdded(url, entry->id());
return true; // this is a new entry
}
void Fetcher::processAuthor(const QString &url, const QString &entryId, const QString &authorName, const QString &authorUri, const QString &authorEmail)
{
QSqlQuery query;
query.prepare(QStringLiteral("SELECT COUNT (id) FROM Authors WHERE feed=:feed AND id=:id AND name=:name;"));
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":id"), entryId);
query.bindValue(QStringLiteral(":name"), authorName);
Database::instance().execute(query);
query.next();
if (query.value(0).toInt() != 0)
query.prepare(QStringLiteral("UPDATE Authors SET feed=:feed, id=:id, name=:name, uri=:uri, email=:email WHERE feed=:feed AND id=:id;"));
else
query.prepare(QStringLiteral("INSERT INTO Authors VALUES(:feed, :id, :name, :uri, :email);"));
query.bindValue(QStringLiteral(":feed"), url);
query.bindValue(QStringLiteral(":id"), entryId);
query.bindValue(QStringLiteral(":name"), authorName);
query.bindValue(QStringLiteral(":uri"), authorUri);
query.bindValue(QStringLiteral(":email"), authorEmail);
Database::instance().execute(query);
}
void Fetcher::processEnclosure(Syndication::EnclosurePtr enclosure, Syndication::ItemPtr entry, const QString &feedUrl)
{
QSqlQuery query;
query.prepare(QStringLiteral("SELECT COUNT (id) FROM Enclosures WHERE feed=:feed AND id=:id;"));
query.bindValue(QStringLiteral(":feed"), feedUrl);
query.bindValue(QStringLiteral(":id"), entry->id());
Database::instance().execute(query);
query.next();
if (query.value(0).toInt() != 0)
query.prepare(QStringLiteral("UPDATE Enclosures SET feed=:feed, id=:id, duration=:duration, size=:size, title=:title, type=:type, url=:url;"));
else
query.prepare(QStringLiteral("INSERT INTO Enclosures VALUES (:feed, :id, :duration, :size, :title, :type, :url, :playposition, :downloaded);"));
query.bindValue(QStringLiteral(":feed"), feedUrl);
query.bindValue(QStringLiteral(":id"), entry->id());
query.bindValue(QStringLiteral(":duration"), enclosure->duration());
query.bindValue(QStringLiteral(":size"), enclosure->length());
query.bindValue(QStringLiteral(":title"), enclosure->title());
query.bindValue(QStringLiteral(":type"), enclosure->type());
query.bindValue(QStringLiteral(":url"), enclosure->url());
query.bindValue(QStringLiteral(":playposition"), 0);
query.bindValue(QStringLiteral(":downloaded"), false);
Database::instance().execute(query);
}
QString Fetcher::image(const QString& url) const
{
QString path = imagePath(url);
if (QFileInfo::exists(path)) {
if (QFileInfo(path).size() != 0 )
return path;
}
download(url, path);
return QLatin1String("");
}
QNetworkReply *Fetcher::download(const QString &url, const QString &filePath) const
{
QNetworkRequest request((QUrl(url)));
QNetworkReply *reply = get(request);
connect(reply, &QNetworkReply::finished, this, [=]() {
if (reply->isOpen()) {
QByteArray data = reply->readAll();
QFile file(filePath);
file.open(QIODevice::WriteOnly);
file.write(data);
file.close();
Q_EMIT downloadFinished(url);
}
reply->deleteLater();
});
return reply;
}
void Fetcher::removeImage(const QString &url)
{
qDebug() << imagePath(url);
QFile(imagePath(url)).remove();
}
QString Fetcher::imagePath(const QString &url) const
{
QString path = QStandardPaths::writableLocation(QStandardPaths::CacheLocation) + QStringLiteral("/images/");
// Create path in cache if it doesn't exist yet
QFileInfo().absoluteDir().mkpath(path);
return path + QString::fromStdString(QCryptographicHash::hash(url.toUtf8(), QCryptographicHash::Md5).toHex().toStdString());
}
QString Fetcher::enclosurePath(const QString &url) const
{
QString path = QStandardPaths::writableLocation(QStandardPaths::DataLocation) + QStringLiteral("/enclosures/");
// Create path in cache if it doesn't exist yet
QFileInfo().absoluteDir().mkpath(path);
return path + QString::fromStdString(QCryptographicHash::hash(url.toUtf8(), QCryptographicHash::Md5).toHex().toStdString());
}
QNetworkReply *Fetcher::get(QNetworkRequest &request) const
{
request.setRawHeader("User-Agent", "Alligator/0.1; Syndication");
return manager->get(request);
}