Some initial lyrics support. Scrapes all the sites that are supported by Amarok's Ultimate Lyrics script. No GUI or configuration yet.

This commit is contained in:
David Sansome 2010-09-26 14:21:23 +00:00
parent 12a144eeb3
commit 4f1a4fe39a
19 changed files with 1073 additions and 11 deletions

View File

@ -11,7 +11,7 @@ find_package(Qt4 REQUIRED QtCore QtGui QtOpenGL QtSql QtNetwork QtXml)
if(UNIX AND NOT APPLE)
find_package(Qt4 REQUIRED QtDbus)
endif(UNIX AND NOT APPLE)
find_package(Qt4 COMPONENTS Phonon)
find_package(Qt4 COMPONENTS Phonon QtWebKit)
# Find Qt's lconvert binary. Try qt's binary dir first, fall back to looking in PATH
find_program(QT_LCONVERT_EXECUTABLE NAMES lconvert lconvert-qt4 PATHS ${QT_BINARY_DIR} NO_DEFAULT_PATH)
@ -212,6 +212,7 @@ endif (WIN32)
add_subdirectory(3rdparty/universalchardet)
add_subdirectory(tests)
add_subdirectory(dist)
add_subdirectory(tools/ultimate_lyrics_parser)
option(WITH_DEBIAN OFF)
if(WITH_DEBIAN)

View File

@ -263,5 +263,6 @@
<file>icons/48x48/ipodtouchicon.png</file>
<file>icons/32x32/wiimotedev.png</file>
<file>schema-17.sql</file>
<file>lyrics/ultimate_providers.xml</file>
</qresource>
</RCC>

View File

@ -0,0 +1,212 @@
<?xml version="1.0" encoding="UTF-8"?>
<lyricproviders>
<provider name="azlyrics.com" title="{artist} LYRICS - {title}" charset="utf-8" url="http://www.azlyrics.com/lyrics/{artist}/{title}.html">
<urlFormat replace=" ._@,;&amp;\/'&quot;-" with=""/>
<extract>
<item begin="&lt;!-- END OF RINGTONE 1 --&gt;" end="&lt;!-- RINGTONE 2 --&gt;"/>
</extract>
<exclude>
<item tag="&lt;B&gt;"/>
<item begin="&lt;i&gt;[" end="]&lt;/i&gt;"/>
<item begin="[" end="]"/>
</exclude>
</provider>
<provider name="directlyrics.com" title="{artist} - {title} lyrics" charset="iso-8859-1" url="http://www.directlyrics.com/{artist}-{title}-lyrics.html">
<urlFormat replace=" _@,;&amp;\/'&quot;" with="-"/>
<urlFormat replace="." with=""/>
<extract>
<item tag="&lt;div id=&quot;lyricsContent&quot;&gt;"/>
<item tag="&lt;p&gt;"/>
</extract>
<exclude>
<item begin="&lt;b&gt;" end="&lt;/b&gt;"/>
</exclude>
</provider>
<provider name="elyrics.net" title="{title} Lyrics - {artist}" charset="iso-8859-1" url="http://www.elyrics.net/read/{a}/{artist}-lyrics/{title}-lyrics.html">
<urlFormat replace=" _@;&amp;\/&quot;" with="-"/>
<urlFormat replace="'" with="_"/>
<extract>
<item tag="&lt;div class='ly' style='font-size:12px;'&gt;"/>
</extract>
<exclude>
<item tag="&lt;strong&gt;"/>
<item tag="&lt;em&gt;"/>
</exclude>
<invalidIndicator value="Page not Found"/>
</provider>
<provider name="loudson.gs" title="" charset="utf-8" url="http://www.loudson.gs/{a}/{artist}/{album}/{title}">
<urlFormat replace=" _@,;&amp;\/&quot;" with="-"/>
<urlFormat replace="." with=""/>
<extract>
<item tag="&lt;div class=&quot;middle_col_TracksLyrics &quot;&gt;"/>
</extract>
</provider>
<provider name="lyrics.com" title="{artist} - {title} Lyrics" charset="utf-8" url="http://www.lyrics.com/lyrics/{artist}/{title}.html">
<urlFormat replace=" _@,;&amp;\/&quot;" with="-"/>
<urlFormat replace="'." with=""/>
<extract>
<item tag="&lt;div id=&quot;lyrics&quot; class=&quot;SCREENONLY&quot;&gt;"/>
</extract>
<invalidIndicator value="we do not have the lyric for this song"/>
</provider>
<provider name="lyrics.wikia.com" title="{artist}:{title} Lyrics - " charset="utf-8" url="http://lyrics.wikia.com/{Artist}:{Title}">
<urlFormat replace=" _@;\&quot;" with="_"/>
<urlFormat replace="?" with="%3F"/>
<extract>
<item begin="&lt;div class='lyricbox'&gt;" end="&lt;!--"/>
</extract>
<exclude>
<item tag="&lt;div class='rtMatcher'&gt;"/>
<item tag="&lt;span style=&quot;padding:1em&quot;&gt;"/>
</exclude>
</provider>
<provider name="lyricsbay.com" title="{title} lyrics {artist}" charset="iso-8859-1" url="http://www.lyricsbay.com/{title}_lyrics-{artist}.html">
<urlFormat replace=" _@,;&amp;\/'&quot;" with="_"/>
<urlFormat replace="." with=""/>
<extract>
<item tag="&lt;div id=EchoTopic&gt;"/>
</extract>
<exclude>
<item tag="&lt;textarea name=&quot;songscpy&quot; id=&quot;songscpyid&quot; onclick=&quot;callselect('songscpyid','selectswf')&quot; rows=&quot;3&quot; cols=&quot;45&quot; READONLY&gt;"/>
</exclude>
</provider>
<provider name="lyricsdownload.com" title="{artist} - {title} LYRICS" charset="utf-8" url="http://www.lyricsdownload.com/{artist}-{title}-lyrics.html">
<urlFormat replace=" _@,;&amp;\/&quot;" with="-"/>
<urlFormat replace="." with=""/>
<extract>
<item tag="&lt;div id=&quot;div_customCSS&quot;&gt;"/>
</extract>
<invalidIndicator value="We haven't lyrics of this song"/>
</provider>
<provider name="lyricsmania.com" title="{artist} - {title} Lyrics" charset="iso-8859-1" url="http://www.lyricsmania.com/{title}_lyrics_{artist}.html">
<urlFormat replace=" _@;&amp;\/&quot;'." with="_"/>
<extract>
<item begin="&lt;span style=&quot;font-size:14px;&quot;&gt;" end="&lt;span style=&quot;font-size:14px;&quot;&gt;"/>
<item begin="&lt;/center&gt;" end="&lt;a"/>
</extract>
<invalidIndicator value="The lyrics you requested is not in our archive yet,"/>
</provider>
<provider name="lyricsmode.com" title="{artist} - {title} lyrics" charset="iso-8859-1" url="http://www.lyricsmode.com/lyrics/{a}/{artist}/{title}.html">
<urlFormat replace=" ._@,;&amp;\/&quot;" with="_"/>
<extract>
<item tag="&lt;div id='songlyrics_h' class='dn'&gt;"/>
</extract>
<invalidIndicator value="Sorry, we have no"/>
</provider>
<provider name="lyricsplugin.com" title="{artist} - {title} Lyrics" charset="utf-8" url="http://www.lyricsplugin.com/winamp03/plugin/?title={title}&amp;artist={artist}">
<urlFormat replace="_@;&amp;\/&quot;" with="-"/>
<urlFormat replace="'" with=""/>
<urlFormat replace=" " with="%20"/>
<extract>
<item tag="&lt;div id=&quot;lyrics&quot;&gt;"/>
</extract>
</provider>
<provider name="lyricsreg.com" title="{title} lyrics {artist}" charset="iso-8859-1" url="http://www.lyricsreg.com/lyrics/{artist}/{title}/">
<urlFormat replace=" _@,;&amp;\/&quot;" with="+"/>
<urlFormat replace="'." with=""/>
<extract>
<item begin="Ringtone to your Cell" end="Ringtone to your Cell"/>
<item begin="&lt;div style=&quot;text-align:center;&quot;&gt;" end="&lt;a"/>
</extract>
<invalidIndicator value="Page not Found"/>
</provider>
<provider name="lyricstime.com" title="{artist} - {title} Lyrics" charset="iso-8859-1" url="http://www.lyricstime.com/{artist}-{title}-lyrics.html">
<urlFormat replace=" _@,;&amp;\/&quot;'" with="-"/>
<urlFormat replace="." with=""/>
<extract>
<item tag="&lt;div id=&quot;songlyrics&quot; &gt;"/>
<item tag="&lt;p&gt;"/>
</extract>
</provider>
<provider name="lyriki.com" title="" charset="utf-8" url="http://www.lyriki.com/{artist}:{title}">
<urlFormat replace=" _@,;&amp;\/&quot;" with="_"/>
<urlFormat replace="." with=""/>
<extract>
<item begin="&lt;/table&gt;" end="&lt;div class=&quot;printfooter&quot;&gt;"/>
<item tag="&lt;p&gt;"/>
</extract>
</provider>
<provider name="metrolyrics.com" title="{artist} - {title} LYRICS" charset="utf-8" url="http://www.metrolyrics.com/{title}-lyrics-{artist}.html">
<urlFormat replace=" _@,;&amp;\/&quot;" with="-"/>
<urlFormat replace="'." with=""/>
<extract>
<item tag="&lt;span id=&quot;lyrics&quot;&gt;"/>
</extract>
<extract>
<item tag="&lt;div id=&quot;lyrics&quot;&gt;"/>
</extract>
<exclude>
<item tag="&lt;h5&gt;"/>
</exclude>
<invalidIndicator value="These lyrics are missing"/>
</provider>
<provider name="mp3lyrics.org" title="{artist} &amp;quot;{title}&amp;quot; Lyrics" charset="utf-8" url="http://www.mp3lyrics.org/{a}/{artist}/{title}/">
<urlFormat replace=" _@,;&amp;\/&quot;" with="-"/>
<urlFormat replace="'." with=""/>
<extract>
<item tag="&lt;span id=gn_lyricsB&gt;"/>
</extract>
<extract>
<item tag="&lt;div class=&quot;KonaBody&quot; id=&quot;EchoTopic&quot;&gt;"/>
</extract>
<exclude>
<item tag="&lt;font size=2&gt;"/>
<item begin="&lt;b&gt;&lt;i&gt;" end="&lt;/u&gt;&lt;/b&gt;:"/>
<item begin="&lt;b&gt;Lyrics" end="&lt;/b&gt;"/>
</exclude>
<invalidIndicator value="Something went wrong"/>
</provider>
<provider name="seeklyrics.com" title="{artist} - {title} Lyrics" charset="iso-8859-1" url="http://www.seeklyrics.com/lyrics/{Artist}/{Title}.html">
<urlFormat replace=" _@,;&amp;\/'&quot;" with="-"/>
<urlFormat replace="." with=""/>
<extract>
<item tag="&lt;div id=&quot;songlyrics&quot;&gt;"/>
</extract>
</provider>
<provider name="songlyrics.com" title="{title} LYRICS - {artist}" charset="utf-8" url="http://www.songlyrics.com/{artist}/{title}-lyrics/">
<urlFormat replace=" ._@,;&amp;\/&quot;" with="-"/>
<urlFormat replace="'" with="_"/>
<extract>
<item tag="&lt;p id=&quot;songLyricsDiv&quot; ondragstart=&quot;return false;&quot; onselectstart=&quot;return false;&quot; oncontextmenu=&quot;return false;&quot; class=&quot;songLyricsV14&quot; style=&quot;font-size: 14px;z-index: 9999;position: absolute;left: -6000px;&quot;&gt;"/>
</extract>
<exclude>
<item begin="&#10;[" end="] "/>
</exclude>
<invalidIndicator value="Sorry, we have no"/>
<invalidIndicator value="This is an upcoming album and we do not have the"/>
</provider>
<provider name="tekstowo.pl (Polish translations)" title=" {artist} - {title} - " charset="iso-8859-2" url="http://www.tekstowo.pl/piosenka,{artist},{title}.html">
<urlFormat replace=" _@,;&amp;\/'&quot;." with="_"/>
<extract>
<item tag="&lt;div id=&quot;tran&quot; style=&quot;display:none&quot;&gt;"/>
</extract>
<extract>
<item tag="&lt;div id=&quot;tex&quot; style=&quot;display:block&quot;&gt;"/>
</extract>
<exclude>
<item begin="&lt;div style=&quot;float:left&quot;" end="&gt;"/>
</exclude>
</provider>
<provider name="teksty.org" title="{artist} - {title} - tekst" charset="UTF-8" url="http://www.teksty.org/{Artist},{Title2}">
<urlFormat replace=" _@,;&amp;\/&quot;." with="_"/>
<urlFormat replace="'" with=""/>
<extract>
<item tag="&lt;span class=&quot;text&quot; id='text25534-dta'&gt;"/>
</extract>
<exclude>
<item begin="&lt;div style=&quot;float:left&quot;" end="&gt;"/>
</exclude>
</provider>
<provider name="vagalume.uol.com.br" title="{title} de {artist} no VAGALUME" charset="iso-8859-1" url="http://vagalume.uol.com.br/{artist}/{title}.html">
<urlFormat replace=" _@,;&amp;\/'&quot;." with="-"/>
<extract>
<item tag="&lt;div class=&quot;tab_original&quot;&gt;"/>
</extract>
</provider>
<provider name="vagalume.uol.com.br (Portuguese translations)" title="{title} de {artist} no VAGALUME" charset="iso-8859-1" url="http://vagalume.uol.com.br/{artist}/{title}-traducao.html">
<urlFormat replace=" _@,;&amp;\/'&quot;." with="-"/>
<extract>
<item tag="&lt;div class=&quot;tab_tra_pt&quot;&gt;"/>
</extract>
</provider>
</lyricproviders>

View File

@ -83,6 +83,11 @@ set(SOURCES
library/librarywatcher.cpp
library/sqlrow.cpp
lyrics/htmlscraper.cpp
lyrics/lyricfetcher.cpp
lyrics/lyricprovider.cpp
lyrics/ultimatelyricsreader.cpp
playlist/playlist.cpp
playlist/playlistbackend.cpp
playlist/playlistcontainer.cpp
@ -209,6 +214,11 @@ set(HEADERS
library/libraryview.h
library/librarywatcher.h
lyrics/htmlscraper.h
lyrics/lyricfetcher.h
lyrics/lyricprovider.h
lyrics/ultimatelyricsreader.h
playlist/playlist.h
playlist/playlistbackend.h
playlist/playlistcontainer.h

View File

@ -31,16 +31,7 @@ void NetworkAccessManager::Get(const QUrl &url, QObject *receiver,
void NetworkAccessManager::RunGet(const QUrl &url, QObject *receiver,
const char *slot, quint64 id, bool force_cache) {
QNetworkRequest req(url);
req.setRawHeader("User-Agent", QString("%1 %2").arg(
QCoreApplication::applicationName(),
QCoreApplication::applicationVersion()).toUtf8());
if (force_cache) {
req.setAttribute(QNetworkRequest::CacheLoadControlAttribute,
QNetworkRequest::PreferCache);
}
QNetworkRequest req = CreateRequest(url, force_cache);
QNetworkReply* reply = network_->get(req);
connect(reply, SIGNAL(finished()), SLOT(RequestFinished()));
@ -52,6 +43,19 @@ void NetworkAccessManager::RunGet(const QUrl &url, QObject *receiver,
pending_replies_.insert(reply, r);
}
QNetworkRequest NetworkAccessManager::CreateRequest(const QUrl& url, bool force_cache) {
QNetworkRequest req(url);
req.setRawHeader("User-Agent", QString("%1 %2").arg(
QCoreApplication::applicationName(),
QCoreApplication::applicationVersion()).toUtf8());
if (force_cache) {
req.setAttribute(QNetworkRequest::CacheLoadControlAttribute,
QNetworkRequest::PreferCache);
}
return req;
}
void NetworkAccessManager::RequestFinished() {
QNetworkReply* reply = static_cast<QNetworkReply*>(sender());
Receiver r = pending_replies_.take(reply);
@ -60,3 +64,22 @@ void NetworkAccessManager::RequestFinished() {
Q_ARG(quint64, r.id),
Q_ARG(QNetworkReply*, reply));
}
QNetworkReply* NetworkAccessManager::GetBlocking(const QUrl& url, bool force_cache) {
QNetworkReply* reply = NULL;
QMetaObject::invokeMethod(
this, "RunGetBlocking", Qt::BlockingQueuedConnection,
Q_ARG(QUrl, url), Q_ARG(bool, force_cache),
Q_ARG(QNetworkReply**, &reply));
return reply;
}
void NetworkAccessManager::RunGetBlocking(const QUrl& url, bool force_cache,
QNetworkReply** reply) {
QNetworkRequest req = CreateRequest(url, force_cache);
*reply = network_->get(req);
QEventLoop loop;
connect(*reply, SIGNAL(finished()), &loop, SLOT(quit()));
loop.exec();
}

View File

@ -7,6 +7,7 @@
class QNetworkAccessManager;
class QNetworkDiskCache;
class QNetworkReply;
class QNetworkRequest;
class QUrl;
// It's like QNetworkAccessManager, but threadsafe, and sets our User-Agent
@ -23,13 +24,17 @@ class NetworkAccessManager : public QObject {
// Thread-safe. slot should take (quint64, QNetworkReply*)
void Get(const QUrl& url, QObject* receiver, const char* slot,
quint64 id, bool force_cache = false);
QNetworkReply* GetBlocking(const QUrl& url, bool force_cache = false);
private slots:
void RunGet(const QUrl& url, QObject* receiver, const char* slot,
quint64 id, bool force_cache);
void RunGetBlocking(const QUrl& url, bool force_cache, QNetworkReply** reply);
void RequestFinished();
private:
QNetworkRequest CreateRequest(const QUrl& url, bool force_cache);
QNetworkAccessManager* network_;
QNetworkDiskCache* cache_;

196
src/lyrics/htmlscraper.cpp Normal file
View File

@ -0,0 +1,196 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#include "htmlscraper.h"
#include "core/networkaccessmanager.h"
#include <QNetworkReply>
#include <QTextCodec>
#include <boost/scoped_ptr.hpp>
const int HtmlScraper::kRedirectLimit = 5;
HtmlScraper::HtmlScraper(NetworkAccessManager* network, QObject* parent)
: LyricProvider(network, parent)
{
}
LyricProvider::Result HtmlScraper::Search(const Song& metadata) const {
LyricProvider::Result ret;
// Get the text codec
const QTextCodec* codec = QTextCodec::codecForName(charset_.toAscii().constData());
if (!codec) {
qWarning() << "Invalid codec" << charset_;
return ret;
}
// Fill in fields in the URL
QString url_text(url_);
DoUrlReplace(&url_text, "{artist}", metadata.artist().toLower());
DoUrlReplace(&url_text, "{album}", metadata.album().toLower());
DoUrlReplace(&url_text, "{title}", metadata.title().toLower());
DoUrlReplace(&url_text, "{Artist}", metadata.artist());
DoUrlReplace(&url_text, "{Album}", metadata.album());
DoUrlReplace(&url_text, "{Title}", metadata.title());
DoUrlReplace(&url_text, "{Title2}", TitleCase(metadata.title()));
DoUrlReplace(&url_text, "{a}", FirstChar(metadata.artist()));
QUrl url(url_text);
// Fetch the URL, follow redirects
boost::scoped_ptr<QNetworkReply> reply;
for (int i=0 ; ; ++i) {
if (i >= kRedirectLimit)
return ret;
qDebug() << "Fetching" << url;
reply.reset(network_->GetBlocking(QUrl(url)));
if (reply->error() != QNetworkReply::NoError)
return ret;
QVariant redirect_target = reply->attribute(QNetworkRequest::RedirectionTargetAttribute);
if (redirect_target.isValid()) {
QUrl target = redirect_target.toUrl();
if (target.scheme().isEmpty() || target.host().isEmpty()) {
QString path = target.path();
target = url;
target.setPath(path);
}
url = target;
} else
break;
}
const QString original_content = codec->toUnicode(reply->readAll());
// Check for invalid indicators
foreach (const QString& indicator, invalid_indicators_) {
if (original_content.contains(indicator))
return ret;
}
// Apply extract rules
foreach (const Rule& rule, extract_rules_) {
QString content = original_content;
ApplyExtractRule(rule, &content);
if (!content.isEmpty())
ret.content = content;
}
// Apply exclude rules
foreach (const Rule& rule, exclude_rules_) {
ApplyExcludeRule(rule, &ret.content);
}
if (!ret.content.isEmpty())
ret.valid = true;
return ret;
}
void HtmlScraper::ApplyExtractRule(const Rule& rule, QString* content) const {
foreach (const RuleItem& item, rule) {
if (item.second.isNull()) {
*content = ExtractXmlTag(*content, item.first);
} else {
*content = Extract(*content, item.first, item.second);
}
}
}
QString HtmlScraper::ExtractXmlTag(const QString& source, const QString& tag) {
QRegExp re("<(\\w+).*>");
if (re.indexIn(tag) == -1)
return QString();
return Extract(source, tag, "</" + re.cap(1) + ">");
}
QString HtmlScraper::Extract(const QString& source, const QString& begin, const QString& end) {
int begin_idx = source.indexOf(begin);
if (begin_idx == -1)
return QString();
begin_idx += begin.length();
int end_idx = source.indexOf(end, begin_idx);
if (end_idx == -1)
return QString();
return source.mid(begin_idx, end_idx - begin_idx - 1);
}
void HtmlScraper::ApplyExcludeRule(const Rule& rule, QString* content) const {
foreach (const RuleItem& item, rule) {
if (item.second.isNull()) {
*content = ExcludeXmlTag(*content, item.first);
} else {
*content = Exclude(*content, item.first, item.second);
}
}
}
QString HtmlScraper::ExcludeXmlTag(const QString& source, const QString& tag) {
QRegExp re("<(\\w+).*>");
if (re.indexIn(tag) == -1)
return source;
return Exclude(source, tag, "</" + re.cap(1) + ">");
}
QString HtmlScraper::Exclude(const QString& source, const QString& begin, const QString& end) {
int begin_idx = source.indexOf(begin);
if (begin_idx == -1)
return source;
int end_idx = source.indexOf(end, begin_idx + begin.length());
if (end_idx == -1)
return source;
return source.left(begin_idx) + source.right(source.length() - end_idx - end.length());
}
QString HtmlScraper::FirstChar(const QString& text) {
if (text.isEmpty())
return QString();
return text[0].toLower();
}
QString HtmlScraper::TitleCase(const QString& text) {
if (text.length() == 0)
return QString();
if (text.length() == 1)
return text[0].toUpper();
return text[0].toUpper() + text.right(text.length() - 1).toLower();
}
void HtmlScraper::DoUrlReplace(QString* url, const QString& tag,
const QString& value) const {
if (!url->contains(tag))
return;
// Apply URL character replacement
QString value_copy(value);
foreach (const UrlFormat& format, url_formats_) {
QRegExp re("[" + QRegExp::escape(format.first) + "]");
value_copy.replace(re, format.second);
}
url->replace(tag, value_copy, Qt::CaseInsensitive);
}

77
src/lyrics/htmlscraper.h Normal file
View File

@ -0,0 +1,77 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HTMLSCRAPER_H
#define HTMLSCRAPER_H
#include <QObject>
#include <QPair>
#include <QStringList>
#include "lyricprovider.h"
class HtmlScraper : public LyricProvider {
Q_OBJECT
public:
HtmlScraper(NetworkAccessManager* network, QObject* parent = 0);
static const int kRedirectLimit;
typedef QPair<QString, QString> RuleItem;
typedef QList<RuleItem> Rule;
typedef QPair<QString, QString> UrlFormat;
void set_name(const QString& name) { name_ = name; }
void set_title(const QString& title) { title_ = title; }
void set_url(const QString& url) { url_ = url; }
void set_charset(const QString& charset) { charset_ = charset; }
void add_url_format(const QString& replace, const QString& with) {
url_formats_ << UrlFormat(replace, with); }
void add_extract_rule(const Rule& rule) { extract_rules_ << rule; }
void add_exclude_rule(const Rule& rule) { exclude_rules_ << rule; }
void add_invalid_indicator(const QString& indicator) { invalid_indicators_ << indicator; }
QString name() const { return name_; }
Result Search(const Song& metadata) const;
private:
void ApplyExtractRule(const Rule& rule, QString* content) const;
void ApplyExcludeRule(const Rule& rule, QString* content) const;
static QString ExtractXmlTag(const QString& source, const QString& tag);
static QString Extract(const QString& source, const QString& begin, const QString& end);
static QString ExcludeXmlTag(const QString& source, const QString& tag);
static QString Exclude(const QString& source, const QString& begin, const QString& end);
static QString FirstChar(const QString& text);
static QString TitleCase(const QString& text);
void DoUrlReplace(QString* url, const QString& tag, const QString& value) const;
private:
QString name_;
QString title_;
QString url_;
QString charset_;
QList<UrlFormat> url_formats_;
QList<Rule> extract_rules_;
QList<Rule> exclude_rules_;
QStringList invalid_indicators_;
};
#endif // HTMLSCRAPER_H

View File

@ -0,0 +1,76 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#include "lyricfetcher.h"
#include "ultimatelyricsreader.h"
#include <QFutureWatcher>
#include <QtConcurrentRun>
#include <QtDebug>
typedef QList<LyricProvider*> ProviderList;
LyricFetcher::LyricFetcher(NetworkAccessManager* network, QObject* parent)
: QObject(parent),
network_(network),
next_id_(1),
ultimate_reader_(new UltimateLyricsReader(network))
{
// Parse the ultimate lyrics xml file in the background
QFuture<ProviderList> future = QtConcurrent::run(
ultimate_reader_.get(), &UltimateLyricsReader::Parse,
QString(":lyrics/ultimate_providers.xml"));
QFutureWatcher<ProviderList>* watcher = new QFutureWatcher<ProviderList>(this);
watcher->setFuture(future);
connect(watcher, SIGNAL(finished()), SLOT(UltimateLyricsParsed()));
}
LyricFetcher::~LyricFetcher() {
}
void LyricFetcher::UltimateLyricsParsed() {
QFutureWatcher<ProviderList>* watcher =
static_cast<QFutureWatcher<ProviderList>*>(sender());
providers_.append(watcher->future().results()[0]);
watcher->deleteLater();
ultimate_reader_.reset();
}
int LyricFetcher::SearchAsync(const Song& metadata) {
const int id = next_id_ ++;
QtConcurrent::run(this, &LyricFetcher::DoSearch, metadata, id);
return id;
}
void LyricFetcher::DoSearch(const Song& metadata, int id) {
foreach (LyricProvider* provider, providers_) {
qDebug() << "Searching" << metadata.title() << "with" << provider->name();
LyricProvider::Result result = provider->Search(metadata);
if (result.valid) {
qDebug() << "Content" << result.content;
emit SearchResult(id, true, result.title, result.content);
//return;
}
}
emit SearchResult(id, false, QString(), QString());
}

57
src/lyrics/lyricfetcher.h Normal file
View File

@ -0,0 +1,57 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LYRICFETCHER_H
#define LYRICFETCHER_H
#include <QObject>
#include <boost/scoped_ptr.hpp>
#include "core/song.h"
class LyricProvider;
class NetworkAccessManager;
class UltimateLyricsReader;
class LyricFetcher : public QObject {
Q_OBJECT
public:
LyricFetcher(NetworkAccessManager* network, QObject* parent = 0);
~LyricFetcher();
int SearchAsync(const Song& metadata);
signals:
void SearchResult(int id, bool success, const QString& title, const QString& content);
private slots:
void UltimateLyricsParsed();
private:
void DoSearch(const Song& metadata, int id);
private:
NetworkAccessManager* network_;
int next_id_;
QList<LyricProvider*> providers_;
boost::scoped_ptr<UltimateLyricsReader> ultimate_reader_;
};
#endif // LYRICFETCHER_H

View File

@ -0,0 +1,23 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#include "lyricprovider.h"
LyricProvider::LyricProvider(NetworkAccessManager* network, QObject* parent)
: QObject(parent),
network_(network)
{
}

View File

@ -0,0 +1,47 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LYRICPROVIDER_H
#define LYRICPROVIDER_H
#include <QObject>
#include "core/song.h"
class NetworkAccessManager;
class LyricProvider : public QObject {
Q_OBJECT
public:
LyricProvider(NetworkAccessManager* network, QObject* parent = 0);
struct Result {
Result() : valid(false) {}
bool valid;
QString title;
QString content;
};
virtual QString name() const = 0;
virtual Result Search(const Song& metadata) const = 0;
protected:
NetworkAccessManager* network_;
};
#endif // LYRICPROVIDER_H

View File

@ -0,0 +1,119 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#include "htmlscraper.h"
#include "ultimatelyricsreader.h"
#include <QFile>
#include <QXmlStreamReader>
UltimateLyricsReader::UltimateLyricsReader(NetworkAccessManager* network, QObject* parent)
: QObject(parent),
network_(network)
{
}
QList<LyricProvider*> UltimateLyricsReader::Parse(const QString& filename) const {
QFile file(filename);
if (!file.open(QIODevice::ReadOnly)) {
qWarning() << "Error opening" << filename;
return QList<LyricProvider*>();
}
return ParseDevice(&file);
}
QList<LyricProvider*> UltimateLyricsReader::ParseDevice(QIODevice* device) const {
QList<LyricProvider*> ret;
QXmlStreamReader reader(device);
while (!reader.atEnd()) {
reader.readNext();
if (reader.name() == "provider") {
LyricProvider* provider = ParseProvider(&reader);
if (provider)
ret << provider;
}
}
return ret;
}
LyricProvider* UltimateLyricsReader::ParseProvider(QXmlStreamReader* reader) const {
QXmlStreamAttributes attributes = reader->attributes();
HtmlScraper* scraper = new HtmlScraper(network_);
scraper->set_name(attributes.value("name").toString());
scraper->set_title(attributes.value("title").toString());
scraper->set_charset(attributes.value("charset").toString());
scraper->set_url(attributes.value("url").toString());
while (!reader->atEnd()) {
reader->readNext();
if (reader->tokenType() == QXmlStreamReader::EndElement)
break;
if (reader->tokenType() == QXmlStreamReader::StartElement) {
if (reader->name() == "extract")
scraper->add_extract_rule(ParseRule(reader));
else if (reader->name() == "exclude")
scraper->add_exclude_rule(ParseRule(reader));
else if (reader->name() == "invalidIndicator")
scraper->add_invalid_indicator(ParseInvalidIndicator(reader));
else if (reader->name() == "urlFormat") {
scraper->add_url_format(reader->attributes().value("replace").toString(),
reader->attributes().value("with").toString());
reader->skipCurrentElement();
}
else
reader->skipCurrentElement();
}
}
return scraper;
}
HtmlScraper::Rule UltimateLyricsReader::ParseRule(QXmlStreamReader* reader) const {
HtmlScraper::Rule ret;
while (!reader->atEnd()) {
reader->readNext();
if (reader->tokenType() == QXmlStreamReader::EndElement)
break;
if (reader->tokenType() == QXmlStreamReader::StartElement) {
if (reader->name() == "item") {
QXmlStreamAttributes attr = reader->attributes();
if (attr.hasAttribute("tag"))
ret << HtmlScraper::RuleItem(attr.value("tag").toString(), QString());
else if (attr.hasAttribute("begin"))
ret << HtmlScraper::RuleItem(attr.value("begin").toString(),
attr.value("end").toString());
}
reader->skipCurrentElement();
}
}
return ret;
}
QString UltimateLyricsReader::ParseInvalidIndicator(QXmlStreamReader* reader) const {
QString ret = reader->attributes().value("value").toString();
reader->skipCurrentElement();
return ret;
}

View File

@ -0,0 +1,45 @@
/* This file is part of Clementine.
Clementine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Clementine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Clementine. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ULTIMATELYRICSREADER_H
#define ULTIMATELYRICSREADER_H
#include <QObject>
#include "htmlscraper.h"
class QIODevice;
class QXmlStreamReader;
class UltimateLyricsReader : public QObject {
Q_OBJECT
public:
UltimateLyricsReader(NetworkAccessManager* network, QObject* parent = 0);
QList<LyricProvider*> Parse(const QString& filename) const;
QList<LyricProvider*> ParseDevice(QIODevice* device) const;
private:
LyricProvider* ParseProvider(QXmlStreamReader* reader) const;
HtmlScraper::Rule ParseRule(QXmlStreamReader* reader) const;
QString ParseInvalidIndicator(QXmlStreamReader* reader) const;
private:
NetworkAccessManager* network_;
};
#endif // ULTIMATELYRICSREADER_H

View File

@ -122,6 +122,7 @@ int main(int argc, char *argv[]) {
qRegisterMetaTypeStreamOperators<Equalizer::Params>("Equalizer::Params");
qRegisterMetaType<const char*>("const char*");
qRegisterMetaType<QNetworkReply*>("QNetworkReply*");
qRegisterMetaType<QNetworkReply**>("QNetworkReply**");
#ifdef HAVE_GSTREAMER
qRegisterMetaType<GstBuffer*>("GstBuffer*");

View File

@ -34,6 +34,7 @@
#include "library/libraryconfig.h"
#include "library/librarydirectorymodel.h"
#include "library/library.h"
#include "lyrics/lyricfetcher.h"
#include "playlist/playlistbackend.h"
#include "playlist/playlist.h"
#include "playlist/playlistmanager.h"
@ -126,6 +127,7 @@ MainWindow::MainWindow(NetworkAccessManager* network, Engine::Type engine, QWidg
player_(NULL),
library_(NULL),
global_shortcuts_(new GlobalShortcuts(this)),
lyric_fetcher_(new LyricFetcher(network, this)),
devices_(NULL),
settings_dialog_(NULL),
cover_manager_(NULL),
@ -325,6 +327,7 @@ MainWindow::MainWindow(NetworkAccessManager* network, Engine::Type engine, QWidg
connect(player_, SIGNAL(ForceShowOSD(Song)), SLOT(ForceShowOSD(Song)));
connect(playlists_, SIGNAL(CurrentSongChanged(Song)), osd_, SLOT(SongChanged(Song)));
connect(playlists_, SIGNAL(CurrentSongChanged(Song)), player_, SLOT(CurrentMetadataChanged(Song)));
connect(playlists_, SIGNAL(CurrentSongChanged(Song)), this, SLOT(FetchLyrics(Song)));
connect(playlists_, SIGNAL(PlaylistChanged()), player_, SLOT(PlaylistChanged()));
connect(playlists_, SIGNAL(EditingFinished(QModelIndex)), SLOT(PlaylistEditFinished(QModelIndex)));
connect(playlists_, SIGNAL(Error(QString)), SLOT(ShowErrorDialog(QString)));
@ -1549,3 +1552,7 @@ void MainWindow::ShowVisualisations() {
visualisation_->show();
#endif // ENABLE_VISUALISATIONS
}
void MainWindow::FetchLyrics(const Song& song) {
lyric_fetcher_->SearchAsync(song);
}

View File

@ -42,6 +42,7 @@ class ErrorDialog;
class GlobalShortcuts;
class GroupByDialog;
class Library;
class LyricFetcher;
class MultiLoadingIndicator;
class NetworkAccessManager;
class OrganiseDialog;
@ -183,6 +184,9 @@ class MainWindow : public QMainWindow, public PlatformInterface {
void OpenSettingsDialog();
void OpenSettingsDialogAtPage(SettingsDialog::Page page);
// TODO: Move to the UI class
void FetchLyrics(const Song& song);
private:
void SaveGeometry();
void AddFilesToPlaylist(bool clear_first, const QList<QUrl>& urls);
@ -209,6 +213,7 @@ class MainWindow : public QMainWindow, public PlatformInterface {
Player* player_;
Library* library_;
GlobalShortcuts* global_shortcuts_;
LyricFetcher* lyric_fetcher_;
DeviceManager* devices_;

View File

@ -0,0 +1,7 @@
cmake_minimum_required(VERSION 2.6)
set(CMAKE_C_FLAGS "-Wall ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-Woverloaded-virtual -Wall -Wno-sign-compare ${CMAKE_CXX_FLAGS}")
add_executable(ultimate_lyrics_parser EXCLUDE_FROM_ALL main.cpp)
target_link_libraries(ultimate_lyrics_parser ${QT_LIBRARIES})

View File

@ -0,0 +1,150 @@
#include <QApplication>
#include <QFile>
#include <QWebFrame>
#include <QWebPage>
#include <QXmlStreamWriter>
#include <QtDebug>
int ShowUsage() {
qWarning() << "Usage:" << qApp->arguments()[0].toUtf8().constData() << "sites.js\n";
qWarning() << "This tool parses a sites.js file from the Ultimate Lyrics Amarok script";
qWarning() << "and outputs an XML file that can be used by Clementine.";
return 2;
}
void WriteRuleItem(QXmlStreamWriter& writer, const QVariant& value) {
if (value.type() == QVariant::String) {
writer.writeStartElement("item");
writer.writeAttribute("tag", value.toString());
writer.writeEndElement();
} else if (value.type() == QVariant::List) {
QVariantList list = value.toList();
writer.writeStartElement("item");
writer.writeAttribute("begin", list[0].toString());
writer.writeAttribute("end", list[1].toString());
writer.writeEndElement();
}
}
void WriteRules(QXmlStreamWriter& writer, const QString& name, const QVariantMap& map) {
for (int i=1 ; i<=4 ; ++i) {
const QString map_name = name + (i == 1 ? "" : QString::number(i));
if (!map.contains(map_name))
continue;
writer.writeStartElement(name);
QVariant value = map[map_name];
if (value.type() == QVariant::String) {
WriteRuleItem(writer, value);
} else if (value.type() == QVariant::List) {
foreach (const QVariant& child, value.toList()) {
WriteRuleItem(writer, child);
}
}
writer.writeEndElement();
}
}
void WriteList(QXmlStreamWriter& writer, const QString& name, const QVariantMap& map) {
if (!map.contains(name))
return;
QVariant value = map[name];
if (value.type() == QVariant::String) {
writer.writeStartElement(name);
writer.writeAttribute("value", value.toString());
writer.writeEndElement();
} else if (value.type() == QVariant::List) {
foreach (const QVariant& child, value.toList()) {
writer.writeStartElement(name);
writer.writeAttribute("value", child.toString());
writer.writeEndElement();
}
}
}
void WriteUrlFormat(QXmlStreamWriter& writer, const QVariantList& list) {
foreach (const QVariant& child, list) {
if (child.type() != QVariant::Map)
continue;
QVariantMap map = child.toMap();
if (!map.contains("rep") || !map.contains("punct"))
continue;
writer.writeStartElement("urlFormat");
writer.writeAttribute("replace", map["punct"].toString());
writer.writeAttribute("with", map["rep"].toString());
writer.writeEndElement();
}
}
void WriteProvider(QXmlStreamWriter& writer, const QString& name, const QVariant& data) {
QVariantMap map = data.toMap();
if (!map.contains("url") || map.contains("getReply"))
return;
writer.writeStartElement("provider");
writer.writeAttribute("name", name);
writer.writeAttribute("title", map["title"].toString());
writer.writeAttribute("charset", map["charset"].toString());
writer.writeAttribute("url", map["url"].toString());
WriteUrlFormat(writer, map["urlFormat"].toList());
WriteRules(writer, "extract", map);
WriteRules(writer, "exclude", map);
WriteList(writer, "invalidIndicator", map);
writer.writeEndElement();
}
int main(int argc, char** argv) {
QApplication a(argc, argv);
// Parse commandline arguments
if (a.arguments().count() != 2)
return ShowUsage();
QString sites_filename = a.arguments()[1];
if (!QFile::exists(sites_filename)) {
qWarning() << "Error:" << sites_filename << "does not exist";
return 1;
}
// Load the javascript file
QFile sites_file(sites_filename);
if (!sites_file.open(QIODevice::ReadOnly)) {
qWarning() << "Error: could not open" << sites_filename;
return 1;
}
QString javascript = QString::fromUtf8(sites_file.readAll());
javascript = javascript.section("\n}", 0, 0, QString::SectionIncludeTrailingSep);
javascript.append(";\n(siteDescriptors)");
// Parse the javascript
QWebPage page;
QVariant data = page.mainFrame()->evaluateJavaScript(javascript);
// Open the document
QFile stdout(NULL);
stdout.open(1, QIODevice::WriteOnly);
QXmlStreamWriter writer(&stdout);
writer.setAutoFormatting(true);
writer.setAutoFormattingIndent(2);
// Begin writing
writer.writeStartDocument();
writer.writeStartElement("lyricproviders");
foreach (const QString& provider_name, data.toMap().keys()) {
WriteProvider(writer, provider_name, data.toMap()[provider_name]);
}
writer.writeEndElement();
writer.writeEndDocument();
return 0;
}