GeniusLyricsProvider: Fix parsing of different HTML pages

This commit is contained in:
Jonas Kvinge 2021-12-19 20:59:38 +01:00
parent bbd81e7d9c
commit ce3af4961b
2 changed files with 31 additions and 8 deletions

View File

@ -652,7 +652,8 @@ QString DecodeHtmlEntities(const QString &text) {
.replace("&lt;", "<")
.replace("&#60;", "<")
.replace("&gt;", ">")
.replace("&#62;", ">");
.replace("&#62;", ">")
.replace("&#x27;", "'");
return copy;

View File

@ -231,7 +231,7 @@ void GeniusLyricsProvider::AccessTokenRequestFinished(QNetworkReply *reply) {
}
else {
// See if there is Json data containing "status" and "userMessage" then use that instead.
QByteArray data(reply->readAll());
QByteArray data = reply->readAll();
QJsonParseError json_error;
QJsonDocument json_doc = QJsonDocument::fromJson(data, &json_error);
if (json_error.error == QJsonParseError::NoError && !json_doc.isEmpty() && json_doc.isObject()) {
@ -255,7 +255,7 @@ void GeniusLyricsProvider::AccessTokenRequestFinished(QNetworkReply *reply) {
}
}
QByteArray data(reply->readAll());
QByteArray data = reply->readAll();
QJsonParseError json_error;
QJsonDocument json_doc = QJsonDocument::fromJson(data, &json_error);
@ -333,8 +333,6 @@ bool GeniusLyricsProvider::StartSearch(const QString &artist, const QString &alb
replies_ << reply;
QObject::connect(reply, &QNetworkReply::finished, this, [this, reply, id]() { HandleSearchReply(reply, id); });
//qLog(Debug) << "GeniusLyrics: Sending request for" << url;
return true;
}
@ -504,9 +502,33 @@ void GeniusLyricsProvider::HandleLyricReply(QNetworkReply *reply, const int sear
if (begin_idx > 0) {
begin_idx += tag_begin.length();
qint64 end_idx = content.indexOf(tag_end, begin_idx);
lyrics = content.mid(begin_idx, end_idx - begin_idx);
lyrics = lyrics.remove(QRegularExpression("<[^>]*>"));
lyrics = lyrics.trimmed();
if (end_idx > 0) {
QString text = content.mid(begin_idx, end_idx - begin_idx);
text = text.replace(QRegularExpression("<br[^>]+>"), "\n");
text = text.remove(QRegularExpression("<[^>]*>"));
text = text.trimmed();
if (text.length() < 6000) {
lyrics = text;
}
}
}
else {
QRegularExpressionMatch rematch = QRegularExpression("<div data-lyrics-container=[^>]+>").match(content);
if (rematch.hasMatch()) {
begin_idx = content.indexOf(rematch.captured());
if (begin_idx > 0) {
qint64 end_idx = content.indexOf("</div>", begin_idx + rematch.captured().length());
if (end_idx > 0) {
QString text = content.mid(begin_idx, end_idx - begin_idx);
text = text.replace(QRegularExpression("<br[^>]+>"), "\n");
text = text.remove(QRegularExpression("<[^>]*>"));
text = text.trimmed();
if (text.length() < 6000 && !text.contains("there are no lyrics to", Qt::CaseInsensitive)) {
lyrics = text;
}
}
}
}
}
if (!lyrics.isEmpty()) {