parent
72f5307524
commit
5dd0a9c35f
|
@ -114,7 +114,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
|
||||||
{
|
{
|
||||||
mStart = false;
|
mStart = false;
|
||||||
if (aLen > 3)
|
if (aLen > 3)
|
||||||
qDebug() << aBuf[0];
|
|
||||||
switch (aBuf[0])
|
switch (aBuf[0])
|
||||||
{
|
{
|
||||||
case '\xEF':
|
case '\xEF':
|
||||||
|
@ -241,7 +240,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: //pure ascii
|
default: //pure ascii
|
||||||
mDetectedCharset = "ASCII";
|
|
||||||
;//do nothing here
|
;//do nothing here
|
||||||
}
|
}
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
|
@ -292,6 +290,8 @@ void nsUniversalDetector::DataEnd()
|
||||||
break;
|
break;
|
||||||
case eEscAscii:
|
case eEscAscii:
|
||||||
break;
|
break;
|
||||||
|
case ePureAscii:
|
||||||
|
Report("ASCII");
|
||||||
default:
|
default:
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,11 +107,11 @@ TagLib::String UniversalEncodingHandler::parse(const TagLib::ByteVector& data) c
|
||||||
// Detected codec -> QString (UTF-16) -> UTF8 -> UTF16-BE (TagLib::String)
|
// Detected codec -> QString (UTF-16) -> UTF8 -> UTF16-BE (TagLib::String)
|
||||||
// That's probably expensive.
|
// That's probably expensive.
|
||||||
QString unicode = current_codec_->toUnicode(data.data(), data.size());
|
QString unicode = current_codec_->toUnicode(data.data(), data.size());
|
||||||
qDebug() << "Decoded to:" << unicode;
|
|
||||||
return TagLib::String(unicode.toUtf8().constData(), TagLib::String::UTF8);
|
return TagLib::String(unicode.toUtf8().constData(), TagLib::String::UTF8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
TagLib::ByteVector UniversalEncodingHandler::render(const TagLib::String& s) const {
|
TagLib::ByteVector UniversalEncodingHandler::render(const TagLib::String& s) const {
|
||||||
// TODO: what should we do here?
|
// TODO: what should we do here?
|
||||||
// 1. Coerce to ASCII
|
// 1. Coerce to ASCII
|
||||||
|
@ -120,12 +120,19 @@ TagLib::ByteVector UniversalEncodingHandler::render(const TagLib::String& s) con
|
||||||
// 4. Nothing and rewrite the tag as ID3v2 & UTF8
|
// 4. Nothing and rewrite the tag as ID3v2 & UTF8
|
||||||
return TagLib::ByteVector();
|
return TagLib::ByteVector();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
void UniversalEncodingHandler::Report(const char* charset) {
|
void UniversalEncodingHandler::Report(const char* charset) {
|
||||||
qDebug() << "Detected as" << charset;
|
if (qstrcmp(charset, "ASCII") == 0) {
|
||||||
|
current_codec_ = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
QTextCodec* codec = QTextCodec::codecForName(charset);
|
QTextCodec* codec = QTextCodec::codecForName(charset);
|
||||||
if (!codec) {
|
if (!codec) {
|
||||||
qWarning() << "Could not identify encoding in ID3v1 tag. Assuming ASCII.";
|
qWarning() << "Could not identify encoding in ID3v1 tag. Assuming ASCII.";
|
||||||
|
} else {
|
||||||
|
qWarning() << "Detected non-ASCII encoding in ID3v1 tag:" << charset;
|
||||||
}
|
}
|
||||||
current_codec_ = codec;
|
current_codec_ = codec;
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,7 +60,7 @@ class UniversalEncodingHandler : public TagLib::ID3v1::StringHandler,
|
||||||
|
|
||||||
// TagLib::ID3v1::StringHandler
|
// TagLib::ID3v1::StringHandler
|
||||||
virtual TagLib::String parse(const TagLib::ByteVector& data) const;
|
virtual TagLib::String parse(const TagLib::ByteVector& data) const;
|
||||||
virtual TagLib::ByteVector render(const TagLib::String& s) const;
|
//virtual TagLib::ByteVector render(const TagLib::String& s) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// nsUniversalDetector
|
// nsUniversalDetector
|
||||||
|
|
|
@ -103,6 +103,10 @@ int main(int argc, char *argv[]) {
|
||||||
lastfm::ws::ApiKey = LastFMService::kApiKey;
|
lastfm::ws::ApiKey = LastFMService::kApiKey;
|
||||||
lastfm::ws::SharedSecret = LastFMService::kSecret;
|
lastfm::ws::SharedSecret = LastFMService::kSecret;
|
||||||
|
|
||||||
|
// Detect technically invalid usage of non-ASCII in ID3v1 tags.
|
||||||
|
UniversalEncodingHandler handler;
|
||||||
|
TagLib::ID3v1::Tag::setStringHandler(&handler);
|
||||||
|
|
||||||
QtSingleApplication a(argc, argv);
|
QtSingleApplication a(argc, argv);
|
||||||
a.setQuitOnLastWindowClosed(false);
|
a.setQuitOnLastWindowClosed(false);
|
||||||
|
|
||||||
|
|
|
@ -65,4 +65,24 @@ TEST_F(SongTest, InitsFromFile) {
|
||||||
EXPECT_EQ("Baz", song.album());
|
EXPECT_EQ("Baz", song.album());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(SongTest, DetectsWindows1251) {
|
||||||
|
char cp1251[] = { 0xc2, 0xfb, 0xe4, 0xfb, 0xf5, 0xe0, 0xe9, 0x00 }; // Выдыхай
|
||||||
|
UniversalEncodingHandler handler;
|
||||||
|
TagLib::ByteVector bytes(cp1251);
|
||||||
|
TagLib::String str = handler.parse(bytes);
|
||||||
|
EXPECT_FALSE(str.isAscii());
|
||||||
|
EXPECT_FALSE(str.isLatin1());
|
||||||
|
EXPECT_STREQ("Выдыхай", str.to8Bit(true).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(SongTest, LeavesASCIIAlone) {
|
||||||
|
char* ascii = "foobar";
|
||||||
|
UniversalEncodingHandler handler;
|
||||||
|
TagLib::ByteVector bytes(ascii);
|
||||||
|
TagLib::String str = handler.parse(bytes);
|
||||||
|
EXPECT_TRUE(str.isAscii());
|
||||||
|
EXPECT_TRUE(str.isLatin1());
|
||||||
|
EXPECT_STREQ("foobar", str.to8Bit(false).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
Loading…
Reference in New Issue