diff --git a/3rdparty/universalchardet/nsUniversalDetector.cpp b/3rdparty/universalchardet/nsUniversalDetector.cpp index 569db6453..f3e92a897 100644 --- a/3rdparty/universalchardet/nsUniversalDetector.cpp +++ b/3rdparty/universalchardet/nsUniversalDetector.cpp @@ -114,7 +114,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) { mStart = false; if (aLen > 3) - qDebug() << aBuf[0]; switch (aBuf[0]) { case '\xEF': @@ -241,7 +240,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen) break; default: //pure ascii - mDetectedCharset = "ASCII"; ;//do nothing here } return NS_OK; @@ -292,6 +290,8 @@ void nsUniversalDetector::DataEnd() break; case eEscAscii: break; + case ePureAscii: + Report("ASCII"); default: ; } diff --git a/src/core/song.cpp b/src/core/song.cpp index 0276a81bd..80b938bde 100644 --- a/src/core/song.cpp +++ b/src/core/song.cpp @@ -107,11 +107,11 @@ TagLib::String UniversalEncodingHandler::parse(const TagLib::ByteVector& data) c // Detected codec -> QString (UTF-16) -> UTF8 -> UTF16-BE (TagLib::String) // That's probably expensive. QString unicode = current_codec_->toUnicode(data.data(), data.size()); - qDebug() << "Decoded to:" << unicode; return TagLib::String(unicode.toUtf8().constData(), TagLib::String::UTF8); } } +/* TagLib::ByteVector UniversalEncodingHandler::render(const TagLib::String& s) const { // TODO: what should we do here? // 1. Coerce to ASCII @@ -120,12 +120,19 @@ TagLib::ByteVector UniversalEncodingHandler::render(const TagLib::String& s) con // 4. Nothing and rewrite the tag as ID3v2 & UTF8 return TagLib::ByteVector(); } +*/ void UniversalEncodingHandler::Report(const char* charset) { - qDebug() << "Detected as" << charset; + if (qstrcmp(charset, "ASCII") == 0) { + current_codec_ = 0; + return; + } + QTextCodec* codec = QTextCodec::codecForName(charset); if (!codec) { qWarning() << "Could not identify encoding in ID3v1 tag. Assuming ASCII."; + } else { + qWarning() << "Detected non-ASCII encoding in ID3v1 tag:" << charset; } current_codec_ = codec; } diff --git a/src/core/song.h b/src/core/song.h index 3d9c0e00d..0da7c02d9 100644 --- a/src/core/song.h +++ b/src/core/song.h @@ -60,7 +60,7 @@ class UniversalEncodingHandler : public TagLib::ID3v1::StringHandler, // TagLib::ID3v1::StringHandler virtual TagLib::String parse(const TagLib::ByteVector& data) const; - virtual TagLib::ByteVector render(const TagLib::String& s) const; + //virtual TagLib::ByteVector render(const TagLib::String& s) const; private: // nsUniversalDetector diff --git a/src/main.cpp b/src/main.cpp index 28d80f74d..e4c685dcb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -103,6 +103,10 @@ int main(int argc, char *argv[]) { lastfm::ws::ApiKey = LastFMService::kApiKey; lastfm::ws::SharedSecret = LastFMService::kSecret; + // Detect technically invalid usage of non-ASCII in ID3v1 tags. + UniversalEncodingHandler handler; + TagLib::ID3v1::Tag::setStringHandler(&handler); + QtSingleApplication a(argc, argv); a.setQuitOnLastWindowClosed(false); diff --git a/tests/song_test.cpp b/tests/song_test.cpp index d4d387cdc..b91da0db5 100644 --- a/tests/song_test.cpp +++ b/tests/song_test.cpp @@ -65,4 +65,24 @@ TEST_F(SongTest, InitsFromFile) { EXPECT_EQ("Baz", song.album()); } +TEST_F(SongTest, DetectsWindows1251) { + char cp1251[] = { 0xc2, 0xfb, 0xe4, 0xfb, 0xf5, 0xe0, 0xe9, 0x00 }; // Выдыхай + UniversalEncodingHandler handler; + TagLib::ByteVector bytes(cp1251); + TagLib::String str = handler.parse(bytes); + EXPECT_FALSE(str.isAscii()); + EXPECT_FALSE(str.isLatin1()); + EXPECT_STREQ("Выдыхай", str.to8Bit(true).c_str()); +} + +TEST_F(SongTest, LeavesASCIIAlone) { + char* ascii = "foobar"; + UniversalEncodingHandler handler; + TagLib::ByteVector bytes(ascii); + TagLib::String str = handler.parse(bytes); + EXPECT_TRUE(str.isAscii()); + EXPECT_TRUE(str.isLatin1()); + EXPECT_STREQ("foobar", str.to8Bit(false).c_str()); +} + } // namespace