2020-02-28 23:25:08 +01:00
/**
2020-08-14 20:56:04 +02:00
* SPDX - FileCopyrightText : 2020 Tobias Fella < fella @ posteo . de >
2021-04-08 13:16:36 +02:00
* SPDX - FileCopyrightText : 2021 Bart De Vries < bart @ mogwai . be >
2020-02-28 23:25:08 +01:00
*
2020-08-14 20:56:04 +02:00
* SPDX - License - Identifier : GPL - 2.0 - only OR GPL - 3.0 - only OR LicenseRef - KDE - Accepted - GPL
2020-02-28 23:25:08 +01:00
*/
2021-03-12 00:19:04 +01:00
# include <QCryptographicHash>
2020-06-06 00:05:32 +02:00
# include <QDateTime>
2021-03-12 00:19:04 +01:00
# include <QDebug>
2020-04-26 23:40:09 +02:00
# include <QFile>
# include <QFileInfo>
2021-04-07 12:47:46 +02:00
# include <QDir>
2020-02-28 23:25:08 +01:00
# include <QNetworkAccessManager>
# include <QNetworkReply>
2020-04-25 22:16:19 +02:00
# include <QStandardPaths>
2020-05-02 12:26:00 +02:00
# include <QTextDocumentFragment>
2021-04-03 22:52:46 +02:00
# include <QDomElement>
# include <QMultiMap>
2020-02-28 23:25:08 +01:00
2020-02-29 15:34:12 +01:00
# include <Syndication/Syndication>
2020-02-28 23:25:08 +01:00
2020-03-16 22:37:04 +01:00
# include "database.h"
2020-04-22 02:17:57 +02:00
# include "fetcher.h"
2020-03-16 22:37:04 +01:00
2020-04-22 02:17:57 +02:00
Fetcher : : Fetcher ( )
{
2020-04-25 22:16:19 +02:00
manager = new QNetworkAccessManager ( this ) ;
manager - > setRedirectPolicy ( QNetworkRequest : : NoLessSafeRedirectPolicy ) ;
manager - > setStrictTransportSecurityEnabled ( true ) ;
manager - > enableStrictTransportSecurityStore ( true ) ;
2020-02-28 23:25:08 +01:00
}
2020-11-01 13:18:11 +01:00
void Fetcher : : fetch ( const QString & url )
2020-02-28 23:25:08 +01:00
{
2020-05-26 16:32:07 +02:00
qDebug ( ) < < " Starting to fetch " < < url ;
2020-03-31 23:52:03 +02:00
2020-05-31 18:17:25 +02:00
Q_EMIT startedFetchingFeed ( url ) ;
2020-05-26 16:32:07 +02:00
QNetworkRequest request ( ( QUrl ( url ) ) ) ;
2020-07-02 19:14:07 +02:00
QNetworkReply * reply = get ( request ) ;
2020-02-28 23:25:08 +01:00
connect ( reply , & QNetworkReply : : finished , this , [ this , url , reply ] ( ) {
2020-07-02 21:57:09 +02:00
if ( reply - > error ( ) ) {
qWarning ( ) < < " Error fetching feed " ;
qWarning ( ) < < reply - > errorString ( ) ;
Q_EMIT error ( url , reply - > error ( ) , reply - > errorString ( ) ) ;
} else {
QByteArray data = reply - > readAll ( ) ;
Syndication : : DocumentSource * document = new Syndication : : DocumentSource ( data , url ) ;
Syndication : : FeedPtr feed = Syndication : : parserCollection ( ) - > parse ( * document , QStringLiteral ( " Atom " ) ) ;
processFeed ( feed , url ) ;
}
2020-02-29 00:51:26 +01:00
delete reply ;
2020-02-28 23:25:08 +01:00
} ) ;
}
2020-04-25 22:16:19 +02:00
2020-05-31 18:17:25 +02:00
void Fetcher : : fetchAll ( )
{
QSqlQuery query ;
query . prepare ( QStringLiteral ( " SELECT url FROM Feeds; " ) ) ;
Database : : instance ( ) . execute ( query ) ;
2020-06-06 00:05:32 +02:00
while ( query . next ( ) ) {
2020-05-31 18:17:25 +02:00
fetch ( query . value ( 0 ) . toString ( ) ) ;
}
}
2020-11-01 13:18:11 +01:00
void Fetcher : : processFeed ( Syndication : : FeedPtr feed , const QString & url )
2020-05-18 16:47:12 +02:00
{
if ( feed . isNull ( ) )
return ;
2021-04-05 12:43:35 +02:00
// Retrieve "other" fields; this will include the "itunes" tags
QMultiMap < QString , QDomElement > otherItems = feed - > additionalProperties ( ) ;
2020-05-18 16:47:12 +02:00
QSqlQuery query ;
2020-06-06 00:05:32 +02:00
query . prepare ( QStringLiteral ( " UPDATE Feeds SET name=:name, image=:image, link=:link, description=:description, lastUpdated=:lastUpdated WHERE url=:url; " ) ) ;
2020-05-18 16:47:12 +02:00
query . bindValue ( QStringLiteral ( " :name " ) , feed - > title ( ) ) ;
query . bindValue ( QStringLiteral ( " :url " ) , url ) ;
2020-05-30 17:33:08 +02:00
query . bindValue ( QStringLiteral ( " :link " ) , feed - > link ( ) ) ;
query . bindValue ( QStringLiteral ( " :description " ) , feed - > description ( ) ) ;
2020-06-06 00:05:32 +02:00
QDateTime current = QDateTime : : currentDateTime ( ) ;
query . bindValue ( QStringLiteral ( " :lastUpdated " ) , current . toSecsSinceEpoch ( ) ) ;
2021-04-05 12:43:35 +02:00
// Process authors
QString authorname , authoremail ;
if ( feed - > authors ( ) . count ( ) > 0 ) {
for ( auto & author : feed - > authors ( ) ) {
processAuthor ( url , QLatin1String ( " " ) , author - > name ( ) , QLatin1String ( " " ) , QLatin1String ( " " ) ) ;
}
} else {
// Try to find itunes fields if plain author doesn't exist
QString authorname , authoremail ;
// First try the "itunes:owner" tag, if that doesn't succeed, then try the "itunes:author" tag
if ( otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdowner " ) ) . hasChildNodes ( ) ) {
QDomNodeList nodelist = otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdowner " ) ) . childNodes ( ) ;
for ( int i = 0 ; i < nodelist . length ( ) ; i + + ) {
if ( nodelist . item ( i ) . nodeName ( ) = = QStringLiteral ( " itunes:name " ) ) {
authorname = nodelist . item ( i ) . toElement ( ) . text ( ) ;
} else if ( nodelist . item ( i ) . nodeName ( ) = = QStringLiteral ( " itunes:email " ) ) {
authoremail = nodelist . item ( i ) . toElement ( ) . text ( ) ;
}
}
} else {
authorname = otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdauthor " ) ) . text ( ) ;
qDebug ( ) < < " authorname " < < authorname ;
}
2021-04-08 00:03:08 +02:00
if ( ! authorname . isEmpty ( ) ) processAuthor ( url , QLatin1String ( " " ) , authorname , QLatin1String ( " " ) , authoremail ) ;
2020-05-30 17:33:08 +02:00
}
2021-04-08 00:03:08 +02:00
2021-04-05 12:43:35 +02:00
QString image = feed - > image ( ) - > url ( ) ;
// If there is no regular image tag, then try the itunes tags
if ( image . isEmpty ( ) ) {
if ( otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdimage " ) ) . hasAttribute ( QStringLiteral ( " href " ) ) ) {
image = otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdimage " ) ) . attribute ( QStringLiteral ( " href " ) ) ;
}
}
if ( image . startsWith ( QStringLiteral ( " / " ) ) )
image = QUrl ( url ) . adjusted ( QUrl : : RemovePath ) . toString ( ) + image ;
2020-05-26 16:32:07 +02:00
query . bindValue ( QStringLiteral ( " :image " ) , image ) ;
2020-05-18 16:47:12 +02:00
Database : : instance ( ) . execute ( query ) ;
2020-05-30 17:33:08 +02:00
2020-05-18 16:47:12 +02:00
qDebug ( ) < < " Updated feed title: " < < feed - > title ( ) ;
2020-06-06 00:05:32 +02:00
Q_EMIT feedDetailsUpdated ( url , feed - > title ( ) , image , feed - > link ( ) , feed - > description ( ) , current ) ;
2020-05-26 16:32:07 +02:00
2020-05-18 16:47:12 +02:00
for ( const auto & entry : feed - > items ( ) ) {
processEntry ( entry , url ) ;
}
2020-05-26 16:32:07 +02:00
Q_EMIT feedUpdated ( url ) ;
2020-05-18 16:47:12 +02:00
}
2020-11-01 13:18:11 +01:00
void Fetcher : : processEntry ( Syndication : : ItemPtr entry , const QString & url )
2020-05-18 16:47:12 +02:00
{
qDebug ( ) < < " Processing " < < entry - > title ( ) ;
2021-04-05 12:43:35 +02:00
// Retrieve "other" fields; this will include the "itunes" tags
QMultiMap < QString , QDomElement > otherItems = entry - > additionalProperties ( ) ;
2020-05-18 16:47:12 +02:00
QSqlQuery query ;
query . prepare ( QStringLiteral ( " SELECT COUNT (id) FROM Entries WHERE id=:id; " ) ) ;
query . bindValue ( QStringLiteral ( " :id " ) , entry - > id ( ) ) ;
Database : : instance ( ) . execute ( query ) ;
query . next ( ) ;
if ( query . value ( 0 ) . toInt ( ) ! = 0 )
return ;
2021-04-07 10:39:12 +02:00
query . prepare ( QStringLiteral ( " INSERT INTO Entries VALUES (:feed, :id, :title, :content, :created, :updated, :link, 0, 1, :hasEnclosure, :image); " ) ) ;
2020-05-18 16:47:12 +02:00
query . bindValue ( QStringLiteral ( " :feed " ) , url ) ;
query . bindValue ( QStringLiteral ( " :id " ) , entry - > id ( ) ) ;
query . bindValue ( QStringLiteral ( " :title " ) , QTextDocumentFragment : : fromHtml ( entry - > title ( ) ) . toPlainText ( ) ) ;
query . bindValue ( QStringLiteral ( " :created " ) , static_cast < int > ( entry - > datePublished ( ) ) ) ;
query . bindValue ( QStringLiteral ( " :updated " ) , static_cast < int > ( entry - > dateUpdated ( ) ) ) ;
query . bindValue ( QStringLiteral ( " :link " ) , entry - > link ( ) ) ;
2021-02-21 19:54:10 +01:00
query . bindValue ( QStringLiteral ( " :hasEnclosure " ) , entry - > enclosures ( ) . length ( ) = = 0 ? 0 : 1 ) ;
2020-05-18 16:47:12 +02:00
if ( ! entry - > content ( ) . isEmpty ( ) )
query . bindValue ( QStringLiteral ( " :content " ) , entry - > content ( ) ) ;
else
query . bindValue ( QStringLiteral ( " :content " ) , entry - > description ( ) ) ;
2021-04-05 12:43:35 +02:00
// Look for image in itunes tags
QString image ;
if ( otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdimage " ) ) . hasAttribute ( QStringLiteral ( " href " ) ) ) {
image = otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdimage " ) ) . attribute ( QStringLiteral ( " href " ) ) ;
}
if ( image . startsWith ( QStringLiteral ( " / " ) ) )
image = QUrl ( url ) . adjusted ( QUrl : : RemovePath ) . toString ( ) + image ;
query . bindValue ( QStringLiteral ( " :image " ) , image ) ;
//qDebug() << "Entry image found" << image;
2020-05-18 16:47:12 +02:00
Database : : instance ( ) . execute ( query ) ;
2021-04-05 12:43:35 +02:00
if ( entry - > authors ( ) . count ( ) > 0 ) {
for ( const auto & author : entry - > authors ( ) ) {
processAuthor ( url , entry - > id ( ) , author - > name ( ) , author - > uri ( ) , author - > email ( ) ) ;
}
} else {
// As fallback, check if there is itunes "author" information
QString authorName = otherItems . value ( QStringLiteral ( " http://www.itunes.com/dtds/podcast-1.0.dtdauthor " ) ) . text ( ) ;
if ( ! authorName . isEmpty ( ) ) processAuthor ( url , entry - > id ( ) , authorName , QLatin1String ( " " ) , QLatin1String ( " " ) ) ;
2020-05-18 16:47:12 +02:00
}
2020-05-18 17:02:46 +02:00
for ( const auto & enclosure : entry - > enclosures ( ) ) {
processEnclosure ( enclosure , entry , url ) ;
}
2021-04-05 12:43:35 +02:00
2021-04-03 11:44:08 +02:00
Q_EMIT entryAdded ( url , entry - > id ( ) ) ;
2020-05-18 16:47:12 +02:00
}
2021-04-05 12:43:35 +02:00
void Fetcher : : processAuthor ( const QString & url , const QString & entryId , const QString & authorName , const QString & authorUri , const QString & authorEmail )
2020-05-18 16:47:12 +02:00
{
QSqlQuery query ;
2021-04-08 11:12:16 +02:00
query . prepare ( QStringLiteral ( " SELECT COUNT (id) FROM Authors WHERE feed=:feed AND id=:id; " ) ) ;
query . bindValue ( QStringLiteral ( " :feed " ) , url ) ;
query . bindValue ( QStringLiteral ( " :id " ) , entryId ) ;
Database : : instance ( ) . execute ( query ) ;
query . next ( ) ;
if ( query . value ( 0 ) . toInt ( ) ! = 0 )
query . prepare ( QStringLiteral ( " UPDATE Authors SET feed=:feed, id=:id, name=:name, uri=:uri, email=:email WHERE feed=:feed AND id=:id; " ) ) ;
else
query . prepare ( QStringLiteral ( " INSERT INTO Authors VALUES(:feed, :id, :name, :uri, :email); " ) ) ;
2020-05-26 16:32:07 +02:00
query . bindValue ( QStringLiteral ( " :feed " ) , url ) ;
2020-05-30 17:33:08 +02:00
query . bindValue ( QStringLiteral ( " :id " ) , entryId ) ;
2021-04-05 12:43:35 +02:00
query . bindValue ( QStringLiteral ( " :name " ) , authorName ) ;
query . bindValue ( QStringLiteral ( " :uri " ) , authorUri ) ;
query . bindValue ( QStringLiteral ( " :email " ) , authorEmail ) ;
2020-05-18 16:47:12 +02:00
Database : : instance ( ) . execute ( query ) ;
}
2020-11-01 13:18:11 +01:00
void Fetcher : : processEnclosure ( Syndication : : EnclosurePtr enclosure , Syndication : : ItemPtr entry , const QString & feedUrl )
2020-05-18 17:02:46 +02:00
{
QSqlQuery query ;
2021-04-08 20:28:15 +02:00
query . prepare ( QStringLiteral ( " SELECT COUNT (id) FROM Enclosures WHERE feed=:feed AND id=:id; " ) ) ;
query . bindValue ( QStringLiteral ( " :feed " ) , feedUrl ) ;
query . bindValue ( QStringLiteral ( " :id " ) , entry - > id ( ) ) ;
Database : : instance ( ) . execute ( query ) ;
query . next ( ) ;
if ( query . value ( 0 ) . toInt ( ) ! = 0 )
query . prepare ( QStringLiteral ( " UPDATE Enclosures SET feed=:feed, id=:id, duration=:duration, size=:size, title=:title, type=:type, url=:url, playposition=:playposition; " ) ) ;
else
query . prepare ( QStringLiteral ( " INSERT INTO Enclosures VALUES (:feed, :id, :duration, :size, :title, :type, :url, :playposition); " ) ) ;
2020-05-26 16:32:07 +02:00
query . bindValue ( QStringLiteral ( " :feed " ) , feedUrl ) ;
2020-05-18 17:02:46 +02:00
query . bindValue ( QStringLiteral ( " :id " ) , entry - > id ( ) ) ;
query . bindValue ( QStringLiteral ( " :duration " ) , enclosure - > duration ( ) ) ;
query . bindValue ( QStringLiteral ( " :size " ) , enclosure - > length ( ) ) ;
query . bindValue ( QStringLiteral ( " :title " ) , enclosure - > title ( ) ) ;
query . bindValue ( QStringLiteral ( " :type " ) , enclosure - > type ( ) ) ;
query . bindValue ( QStringLiteral ( " :url " ) , enclosure - > url ( ) ) ;
2021-04-08 20:28:15 +02:00
query . bindValue ( QStringLiteral ( " :playposition " ) , 0 ) ;
2020-05-18 17:02:46 +02:00
Database : : instance ( ) . execute ( query ) ;
}
2021-04-07 12:47:46 +02:00
QString Fetcher : : image ( const QString & url ) const
2020-04-25 22:16:19 +02:00
{
2021-04-07 12:47:46 +02:00
QString path = imagePath ( url ) ;
2020-05-10 23:25:23 +02:00
if ( QFileInfo : : exists ( path ) ) {
2021-04-05 14:15:44 +02:00
if ( QFileInfo ( path ) . size ( ) ! = 0 )
return path ;
2020-04-25 22:16:19 +02:00
}
2021-04-07 12:47:46 +02:00
download ( url , path ) ;
2020-05-18 21:20:23 +02:00
return QLatin1String ( " " ) ;
}
2021-04-07 12:47:46 +02:00
QNetworkReply * Fetcher : : download ( const QString & url , const QString & filePath ) const
2020-05-18 21:20:23 +02:00
{
2020-04-25 22:16:19 +02:00
QNetworkRequest request ( ( QUrl ( url ) ) ) ;
2020-07-02 19:14:07 +02:00
QNetworkReply * reply = get ( request ) ;
2021-02-21 19:54:10 +01:00
connect ( reply , & QNetworkReply : : finished , this , [ = ] ( ) {
if ( reply - > isOpen ( ) ) {
QByteArray data = reply - > readAll ( ) ;
2021-04-07 12:47:46 +02:00
QFile file ( filePath ) ;
2021-02-21 19:54:10 +01:00
file . open ( QIODevice : : WriteOnly ) ;
file . write ( data ) ;
file . close ( ) ;
2020-04-25 22:16:19 +02:00
2021-02-21 19:54:10 +01:00
Q_EMIT downloadFinished ( url ) ;
}
reply - > deleteLater ( ) ;
2020-04-25 22:16:19 +02:00
} ) ;
2021-02-21 19:54:10 +01:00
return reply ;
2020-04-25 22:16:19 +02:00
}
2020-04-26 23:40:09 +02:00
2020-11-01 13:18:11 +01:00
void Fetcher : : removeImage ( const QString & url )
2020-04-26 23:40:09 +02:00
{
2021-04-07 12:47:46 +02:00
qDebug ( ) < < imagePath ( url ) ;
QFile ( imagePath ( url ) ) . remove ( ) ;
2020-04-26 23:40:09 +02:00
}
2021-04-07 12:47:46 +02:00
QString Fetcher : : imagePath ( const QString & url ) const
2020-04-26 23:40:09 +02:00
{
2021-04-07 12:47:46 +02:00
QString path = QStandardPaths : : writableLocation ( QStandardPaths : : CacheLocation ) + QStringLiteral ( " /images/ " ) ;
// Create path in cache if it doesn't exist yet
QFileInfo ( ) . absoluteDir ( ) . mkpath ( path ) ;
return path + QString : : fromStdString ( QCryptographicHash : : hash ( url . toUtf8 ( ) , QCryptographicHash : : Md5 ) . toHex ( ) . toStdString ( ) ) ;
2020-04-26 23:40:09 +02:00
}
2020-07-02 19:14:07 +02:00
2021-04-07 12:47:46 +02:00
QString Fetcher : : enclosurePath ( const QString & url ) const
{
QString path = QStandardPaths : : writableLocation ( QStandardPaths : : DataLocation ) + QStringLiteral ( " /enclosures/ " ) ;
// Create path in cache if it doesn't exist yet
QFileInfo ( ) . absoluteDir ( ) . mkpath ( path ) ;
return path + QString : : fromStdString ( QCryptographicHash : : hash ( url . toUtf8 ( ) , QCryptographicHash : : Md5 ) . toHex ( ) . toStdString ( ) ) ;
}
QNetworkReply * Fetcher : : get ( QNetworkRequest & request ) const
2020-07-02 19:14:07 +02:00
{
request . setRawHeader ( " User-Agent " , " Alligator/0.1; Syndication " ) ;
return manager - > get ( request ) ;
}