Fix some Latin1 encoding guessing.

This commit is contained in:
John Maguire 2010-07-15 12:46:08 +00:00
parent 90fc1481e6
commit 579c7cc592
2 changed files with 11 additions and 8 deletions

View File

@ -229,14 +229,9 @@ QTextCodec* UniversalEncodingHandler::Guess(const TagLib::String& input) {
if (input.isLatin1()) { if (input.isLatin1()) {
qWarning() << "Extended ASCII... possibly should be CP866 or windows-1251 instead"; qWarning() << "Extended ASCII... possibly should be CP866 or windows-1251 instead";
std::string broken = input.toCString(true); std::string broken = input.toCString(true);
std::string fixed; std::string fixed = QString::fromUtf8(broken.c_str()).toStdString();
if (broken.size() > input.size()) { QTextCodec* codec = Guess(fixed.c_str());
fixed = QString::fromUtf8(broken.c_str()).toStdString(); return codec;
QTextCodec* codec = Guess(fixed.c_str());
return codec;
} else {
return NULL;
}
} }
return QTextCodec::codecForName("UTF-8"); return QTextCodec::codecForName("UTF-8");
} }

View File

@ -178,4 +178,12 @@ TEST_F(SongTest, DecodesUtf8AsUtf8) {
EXPECT_EQ(QString::fromUtf8(""), fixed); EXPECT_EQ(QString::fromUtf8(""), fixed);
} }
TEST_F(SongTest, DecodesAmbiguousLatin1AndWindows1252) {
const char latin1[] = { 0x53, 0x75, 0x64, 0xe1, 0x66, 0x72, 0x69, 0x63, 0x61, 0x00 };
TagLib::String str(latin1, TagLib::String::Latin1);
QString fixed = UniversalEncodingHandler(NS_FILTER_NON_CJK).FixEncoding(str);
EXPECT_EQ(9, fixed.length());
EXPECT_EQ(QString::fromUtf8("Sudáfrica"), fixed);
}
} // namespace } // namespace