mirror of
https://github.com/clementine-player/Clementine
synced 2024-12-14 18:35:16 +01:00
Add voting for codecs, eg. if 3/4 tags in a file are windows-1251 then they will all be decoded with windows-1251.
This commit is contained in:
parent
c0103cc1e0
commit
b9df2a09d4
@ -154,14 +154,63 @@ QTextCodec* UniversalEncodingHandler::Guess(const char* data) {
|
||||
}
|
||||
if (repeats > 3) {
|
||||
qWarning() << "Heuristic guessed windows-1251";
|
||||
return QTextCodec::codecForName("windows-1251");
|
||||
current_codec_ = QTextCodec::codecForName("windows-1251");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return current_codec_;
|
||||
}
|
||||
|
||||
QTextCodec* UniversalEncodingHandler::Guess(const TagLib::Tag& tag) {
|
||||
QHash<QTextCodec*, int> usages;
|
||||
Guess(tag.title(), &usages);
|
||||
Guess(tag.artist(), &usages);
|
||||
Guess(tag.album(), &usages);
|
||||
Guess(tag.comment(), &usages);
|
||||
Guess(tag.genre(), &usages);
|
||||
|
||||
QHash<QTextCodec*, int>::const_iterator max = usages.begin();
|
||||
for (QHash<QTextCodec*, int>::const_iterator it = usages.begin(); it != usages.end(); ++it) {
|
||||
if (it.value() > max.value()) {
|
||||
max = it;
|
||||
}
|
||||
}
|
||||
return max.key();
|
||||
}
|
||||
|
||||
void UniversalEncodingHandler::Guess(const TagLib::String& input,
|
||||
QHash<QTextCodec*, int>* usages) {
|
||||
if (input.isEmpty()) {
|
||||
return; // Empty strings don't vote.
|
||||
}
|
||||
QTextCodec* codec = Guess(input);
|
||||
QHash<QTextCodec*, int>::iterator it = usages->find(codec);
|
||||
if (it == usages->end()) {
|
||||
usages->insert(codec, 1);
|
||||
} else {
|
||||
++it.value();
|
||||
}
|
||||
}
|
||||
|
||||
QTextCodec* UniversalEncodingHandler::Guess(const TagLib::String& input) {
|
||||
if (input.isAscii()) {
|
||||
return NULL;
|
||||
}
|
||||
if (input.isLatin1()) {
|
||||
qWarning() << "Extended ASCII... possibly should be CP866 or windows-1251 instead";
|
||||
std::string broken = input.toCString(true);
|
||||
std::string fixed;
|
||||
if (broken.size() > input.size()) {
|
||||
fixed = QString::fromUtf8(broken.c_str()).toStdString();
|
||||
QTextCodec* codec = Guess(fixed.c_str());
|
||||
return codec;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return QTextCodec::codecForName("UTF-8");
|
||||
}
|
||||
|
||||
QString UniversalEncodingHandler::FixEncoding(const TagLib::String& input) {
|
||||
if (input.isLatin1() && !input.isAscii()) {
|
||||
qWarning() << "Extended ASCII... possibly should be CP866 or windows-1251 instead";
|
||||
@ -169,12 +218,11 @@ QString UniversalEncodingHandler::FixEncoding(const TagLib::String& input) {
|
||||
std::string fixed;
|
||||
if (broken.size() > input.size()) {
|
||||
fixed = QString::fromUtf8(broken.c_str()).toStdString();
|
||||
// This is single byte encoding, therefore can't be CJK.
|
||||
UniversalEncodingHandler detector(NS_FILTER_NON_CJK);
|
||||
QTextCodec* codec = detector.Guess(fixed.c_str());
|
||||
QTextCodec* codec = Guess(fixed.c_str());
|
||||
if (!codec) {
|
||||
qDebug() << "Could not guess encoding. Using extended ASCII.";
|
||||
} else {
|
||||
qDebug() << "Guessed:" << codec->name();
|
||||
QString foo = codec->toUnicode(fixed.c_str());
|
||||
return foo.trimmed();
|
||||
}
|
||||
@ -235,6 +283,15 @@ void Song::Init(const QString& title, const QString& artist, const QString& albu
|
||||
d->length_ = length;
|
||||
}
|
||||
|
||||
QString Song::Decode(const TagLib::String tag, const QTextCodec* codec) const {
|
||||
if (codec) {
|
||||
const std::string fixed = QString::fromUtf8(tag.toCString(true)).toStdString();
|
||||
return codec->toUnicode(fixed.c_str()).trimmed();
|
||||
} else {
|
||||
return TStringToQString(tag).trimmed();
|
||||
}
|
||||
}
|
||||
|
||||
void Song::InitFromFile(const QString& filename, int directory_id) {
|
||||
d->filename_ = filename;
|
||||
d->directory_id_ = directory_id;
|
||||
@ -250,13 +307,18 @@ void Song::InitFromFile(const QString& filename, int directory_id) {
|
||||
d->mtime_ = info.lastModified().toTime_t();
|
||||
d->ctime_ = info.created().toTime_t();
|
||||
|
||||
// This is single byte encoding, therefore can't be CJK.
|
||||
UniversalEncodingHandler detector(NS_FILTER_NON_CJK);
|
||||
|
||||
TagLib::Tag* tag = fileref->tag();
|
||||
QTextCodec* codec = NULL;
|
||||
if (tag) {
|
||||
d->title_ = UniversalEncodingHandler::FixEncoding(tag->title());
|
||||
d->artist_ = UniversalEncodingHandler::FixEncoding(tag->artist());
|
||||
d->album_ = UniversalEncodingHandler::FixEncoding(tag->album());
|
||||
d->comment_ = UniversalEncodingHandler::FixEncoding(tag->comment());
|
||||
d->genre_ = UniversalEncodingHandler::FixEncoding(tag->genre());
|
||||
codec = detector.Guess(*tag);
|
||||
d->title_ = Decode(tag->title(), codec);
|
||||
d->artist_ = Decode(tag->artist(), codec);
|
||||
d->album_ = Decode(tag->album(), codec);
|
||||
d->comment_ = Decode(tag->comment(), codec);
|
||||
d->genre_ = Decode(tag->genre(), codec);
|
||||
d->year_ = tag->year();
|
||||
d->track_ = tag->track();
|
||||
|
||||
@ -274,10 +336,10 @@ void Song::InitFromFile(const QString& filename, int directory_id) {
|
||||
d->bpm_ = TStringToQString(file->ID3v2Tag()->frameListMap()["TBPM"].front()->toString()).trimmed().toFloat();
|
||||
|
||||
if (!file->ID3v2Tag()->frameListMap()["TCOM"].isEmpty())
|
||||
d->composer_ = UniversalEncodingHandler::FixEncoding(file->ID3v2Tag()->frameListMap()["TCOM"].front()->toString());
|
||||
d->composer_ = Decode(file->ID3v2Tag()->frameListMap()["TCOM"].front()->toString(), codec);
|
||||
|
||||
if (!file->ID3v2Tag()->frameListMap()["TPE2"].isEmpty()) // non-standard: Apple, Microsoft
|
||||
d->albumartist_ = UniversalEncodingHandler::FixEncoding(file->ID3v2Tag()->frameListMap()["TPE2"].front()->toString());
|
||||
d->albumartist_ = Decode(file->ID3v2Tag()->frameListMap()["TPE2"].front()->toString(), codec);
|
||||
|
||||
if (!file->ID3v2Tag()->frameListMap()["TCMP"].isEmpty())
|
||||
compilation = TStringToQString(file->ID3v2Tag()->frameListMap()["TCMP"].front()->toString()).trimmed();
|
||||
@ -286,7 +348,7 @@ void Song::InitFromFile(const QString& filename, int directory_id) {
|
||||
else if (TagLib::Ogg::Vorbis::File* file = dynamic_cast<TagLib::Ogg::Vorbis::File*>(fileref->file())) {
|
||||
if (file->tag()) {
|
||||
if ( !file->tag()->fieldListMap()["COMPOSER"].isEmpty() )
|
||||
d->composer_ = UniversalEncodingHandler::FixEncoding(file->tag()->fieldListMap()["COMPOSER"].front());
|
||||
d->composer_ = Decode(file->tag()->fieldListMap()["COMPOSER"].front(), codec);
|
||||
|
||||
if ( !file->tag()->fieldListMap()["BPM"].isEmpty() )
|
||||
d->bpm_ = TStringToQString(file->tag()->fieldListMap()["BPM"].front()).trimmed().toFloat();
|
||||
@ -301,7 +363,7 @@ void Song::InitFromFile(const QString& filename, int directory_id) {
|
||||
else if (TagLib::FLAC::File* file = dynamic_cast<TagLib::FLAC::File*>(fileref->file())) {
|
||||
if ( file->xiphComment() ) {
|
||||
if (!file->xiphComment()->fieldListMap()["COMPOSER"].isEmpty())
|
||||
d->composer_ = UniversalEncodingHandler::FixEncoding( file->xiphComment()->fieldListMap()["COMPOSER"].front() );
|
||||
d->composer_ = Decode(file->xiphComment()->fieldListMap()["COMPOSER"].front(), codec);
|
||||
|
||||
if (!file->xiphComment()->fieldListMap()["BPM"].isEmpty() )
|
||||
d->bpm_ = TStringToQString( file->xiphComment()->fieldListMap()["BPM"].front() ).trimmed().toFloat();
|
||||
|
@ -17,6 +17,7 @@
|
||||
#ifndef SONG_H
|
||||
#define SONG_H
|
||||
|
||||
#include <QHash>
|
||||
#include <QImage>
|
||||
#include <QList>
|
||||
#include <QSharedData>
|
||||
@ -63,12 +64,16 @@ class UniversalEncodingHandler : public TagLib::ID3v1::StringHandler,
|
||||
virtual TagLib::String parse(const TagLib::ByteVector& data) const;
|
||||
|
||||
QTextCodec* Guess(const char* data);
|
||||
QTextCodec* Guess(const TagLib::Tag& tag);
|
||||
QTextCodec* Guess(const TagLib::String& input);
|
||||
|
||||
static QString FixEncoding(const TagLib::String& input);
|
||||
QString FixEncoding(const TagLib::String& input);
|
||||
private:
|
||||
// nsUniversalDetector
|
||||
virtual void Report(const char* charset);
|
||||
|
||||
void Guess(const TagLib::String& input, QHash<QTextCodec*, int>* usages);
|
||||
|
||||
QTextCodec* current_codec_;
|
||||
};
|
||||
|
||||
@ -111,6 +116,8 @@ class Song {
|
||||
void InitFromLastFM(const lastfm::Track& track);
|
||||
void MergeFromSimpleMetaBundle(const Engine::SimpleMetaBundle& bundle);
|
||||
|
||||
QString Decode(const TagLib::String tag, const QTextCodec* codec) const;
|
||||
|
||||
// Save
|
||||
void BindToQuery(QSqlQuery* query) const;
|
||||
void ToLastFM(lastfm::Track* track) const;
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include <QTemporaryFile>
|
||||
#include <QTextCodec>
|
||||
|
||||
#include <taglib/id3v2tag.h>
|
||||
|
||||
namespace {
|
||||
|
||||
class SongTest : public ::testing::Test {
|
||||
@ -90,7 +92,7 @@ TEST_F(SongTest, FixesCP866) {
|
||||
const char cp866[] = { 0x8a, 0xa8, 0xad, 0xae, '\0' }; // Кино
|
||||
TagLib::ByteVector bytes(cp866);
|
||||
TagLib::String str(bytes);
|
||||
QString fixed = UniversalEncodingHandler::FixEncoding(str);
|
||||
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
|
||||
EXPECT_EQ(4, fixed.length());
|
||||
EXPECT_STREQ("Кино", fixed.toUtf8().constData());
|
||||
}
|
||||
@ -99,7 +101,7 @@ TEST_F(SongTest, FixesWindows1251) {
|
||||
const char w1251[] = { 0xca, 0xe8, 0xed, 0xee, '\0' }; // Кино
|
||||
TagLib::ByteVector bytes(w1251);
|
||||
TagLib::String str(bytes);
|
||||
QString fixed = UniversalEncodingHandler::FixEncoding(str);
|
||||
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
|
||||
EXPECT_EQ(4, fixed.length());
|
||||
EXPECT_STREQ("Кино", fixed.toUtf8().constData());
|
||||
}
|
||||
@ -107,7 +109,7 @@ TEST_F(SongTest, FixesWindows1251) {
|
||||
TEST_F(SongTest, DoesNotFixAscii) {
|
||||
TagLib::ByteVector bytes("foobar");
|
||||
TagLib::String str(bytes);
|
||||
QString fixed = UniversalEncodingHandler::FixEncoding(str);
|
||||
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
|
||||
EXPECT_EQ(6, fixed.length());
|
||||
EXPECT_STREQ("foobar", fixed.toUtf8().constData());
|
||||
}
|
||||
@ -115,7 +117,7 @@ TEST_F(SongTest, DoesNotFixAscii) {
|
||||
TEST_F(SongTest, DoesNotFixUtf8) {
|
||||
TagLib::ByteVector bytes("Кино");
|
||||
TagLib::String str(bytes, TagLib::String::UTF8);
|
||||
QString fixed = UniversalEncodingHandler::FixEncoding(str);
|
||||
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
|
||||
EXPECT_EQ(4, fixed.length());
|
||||
EXPECT_STREQ("Кино", fixed.toUtf8().constData());
|
||||
}
|
||||
@ -125,16 +127,29 @@ TEST_F(SongTest, DoesNotFixExtendedAscii) {
|
||||
QTextCodec* codec = QTextCodec::codecForName("latin1");
|
||||
QString unicode = codec->toUnicode(latin1);
|
||||
TagLib::String str(latin1);
|
||||
QString fixed = UniversalEncodingHandler::FixEncoding(str);
|
||||
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
|
||||
EXPECT_EQ(fixed, unicode);
|
||||
}
|
||||
|
||||
TEST_F(SongTest, FixesUtf8MungedIntoLatin1) {
|
||||
char latin1[] = { 'E', 's', 't', 'h', 'e', 'r', 0xe2, 0x80, 0x99, 's', 0x00 };
|
||||
TagLib::String str(latin1, TagLib::String::Latin1);
|
||||
QString fixed = UniversalEncodingHandler::FixEncoding(str);
|
||||
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
|
||||
EXPECT_EQ(8, fixed.length());
|
||||
EXPECT_EQ(QString::fromUtf8("Esther’s"), fixed);
|
||||
}
|
||||
|
||||
TEST_F(SongTest, TakesMajorityVote) {
|
||||
const char w1251[] = { 0xca, 0xe8, 0xed, 0xee, '\0' }; // Кино
|
||||
// Actually windows-1251 but gets detected as windows-1252.
|
||||
const char w1252[] = { 0xcf, '.', 0xc7, '.', '\0' }; // П.Э.
|
||||
TagLib::ID3v2::Tag tag;
|
||||
tag.setTitle(w1251);
|
||||
tag.setArtist(w1251);
|
||||
tag.setAlbum(w1252);
|
||||
|
||||
UniversalEncodingHandler handler(NS_FILTER_NON_CJK);
|
||||
EXPECT_EQ(QTextCodec::codecForName("windows-1251"), handler.Guess(tag));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user