mirror of
https://github.com/martinrotter/rssguard.git
synced 2025-01-30 17:15:01 +01:00
Enhance HTML unescaping, also force unescape of AUTHOR and TITLE message fields for RSS/ATOM/RDF formats.
This commit is contained in:
parent
2903bf51be
commit
56c44a23b0
@ -69,33 +69,17 @@ QString WebFactory::stripTags(QString text) {
|
||||
return text.remove(QRegularExpression(QSL("<[^>]*>")));
|
||||
}
|
||||
|
||||
QString WebFactory::escapeHtml(const QString& html) {
|
||||
QString WebFactory::unescapeHtml(const QString& html) {
|
||||
if (m_escapes.isEmpty()) {
|
||||
generateEscapes();
|
||||
generateUnescapes();
|
||||
}
|
||||
|
||||
QString output = html;
|
||||
QMapIterator<QString, QString> i(m_escapes);
|
||||
QMapIterator<QString, char16_t> i(m_escapes);
|
||||
|
||||
while (i.hasNext()) {
|
||||
i.next();
|
||||
output = output.replace(i.key(), i.value());
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
QString WebFactory::deEscapeHtml(const QString& text) {
|
||||
if (m_deEscapes.isEmpty()) {
|
||||
generateDeescapes();
|
||||
}
|
||||
|
||||
QString output = text;
|
||||
QMapIterator<QString, QString> i(m_deEscapes);
|
||||
|
||||
while (i.hasNext()) {
|
||||
i.next();
|
||||
output = output.replace(i.key(), i.value());
|
||||
output = output.replace(i.key(), QString(QChar(i.value())));
|
||||
}
|
||||
|
||||
return output;
|
||||
@ -202,23 +186,264 @@ QAction* WebFactory::createEngineSettingsAction(const QString& title, QWebEngine
|
||||
|
||||
#endif
|
||||
|
||||
void WebFactory::generateEscapes() {
|
||||
m_escapes[QSL("<")] = QL1C('<');
|
||||
m_escapes[QSL(">")] = QL1C('>');
|
||||
m_escapes[QSL("&")] = QL1C('&');
|
||||
m_escapes[QSL(""")] = QL1C('\"');
|
||||
m_escapes[QSL(" ")] = QL1C(' ');
|
||||
m_escapes[QSL("±")] = QSL("±");
|
||||
m_escapes[QSL("×")] = QSL("×");
|
||||
m_escapes[QSL("'")] = QL1C('\'');
|
||||
}
|
||||
|
||||
void WebFactory::generateDeescapes() {
|
||||
m_deEscapes[QSL("<")] = QSL("<");
|
||||
m_deEscapes[QSL(">")] = QSL(">");
|
||||
m_deEscapes[QSL("&")] = QSL("&");
|
||||
m_deEscapes[QSL("\"")] = QSL(""");
|
||||
m_deEscapes[QSL("±")] = QSL("±");
|
||||
m_deEscapes[QSL("×")] = QSL("×");
|
||||
m_deEscapes[QSL("\'")] = QSL("'");
|
||||
void WebFactory::generateUnescapes() {
|
||||
m_escapes[QSL("Æ")] = 0x00c6;
|
||||
m_escapes[QSL("&")] = 38;
|
||||
m_escapes[QSL("Á")] = 0x00c1;
|
||||
m_escapes[QSL("Â")] = 0x00c2;
|
||||
m_escapes[QSL("À")] = 0x00c0;
|
||||
m_escapes[QSL("Α")] = 0x0391;
|
||||
m_escapes[QSL("Å")] = 0x00c5;
|
||||
m_escapes[QSL("Ã")] = 0x00c3;
|
||||
m_escapes[QSL("Ä")] = 0x00c4;
|
||||
m_escapes[QSL("Β")] = 0x0392;
|
||||
m_escapes[QSL("Ç")] = 0x00c7;
|
||||
m_escapes[QSL("Χ")] = 0x03a7;
|
||||
m_escapes[QSL("‡")] = 0x2021;
|
||||
m_escapes[QSL("Δ")] = 0x0394;
|
||||
m_escapes[QSL("Ð")] = 0x00d0;
|
||||
m_escapes[QSL("É")] = 0x00c9;
|
||||
m_escapes[QSL("Ê")] = 0x00ca;
|
||||
m_escapes[QSL("È")] = 0x00c8;
|
||||
m_escapes[QSL("Ε")] = 0x0395;
|
||||
m_escapes[QSL("Η")] = 0x0397;
|
||||
m_escapes[QSL("Ë")] = 0x00cb;
|
||||
m_escapes[QSL(">")] = 62;
|
||||
m_escapes[QSL("Γ")] = 0x0393;
|
||||
m_escapes[QSL("Í")] = 0x00cd;
|
||||
m_escapes[QSL("Î")] = 0x00ce;
|
||||
m_escapes[QSL("Ì")] = 0x00cc;
|
||||
m_escapes[QSL("Ι")] = 0x0399;
|
||||
m_escapes[QSL("Ï")] = 0x00cf;
|
||||
m_escapes[QSL("Κ")] = 0x039a;
|
||||
m_escapes[QSL("<")] = 60;
|
||||
m_escapes[QSL("Λ")] = 0x039b;
|
||||
m_escapes[QSL("Μ")] = 0x039c;
|
||||
m_escapes[QSL("Ñ")] = 0x00d1;
|
||||
m_escapes[QSL("Ν")] = 0x039d;
|
||||
m_escapes[QSL("Œ")] = 0x0152;
|
||||
m_escapes[QSL("Ó")] = 0x00d3;
|
||||
m_escapes[QSL("Ô")] = 0x00d4;
|
||||
m_escapes[QSL("Ò")] = 0x00d2;
|
||||
m_escapes[QSL("Ω")] = 0x03a9;
|
||||
m_escapes[QSL("Ο")] = 0x039f;
|
||||
m_escapes[QSL("Ø")] = 0x00d8;
|
||||
m_escapes[QSL("Õ")] = 0x00d5;
|
||||
m_escapes[QSL("Ö")] = 0x00d6;
|
||||
m_escapes[QSL("Φ")] = 0x03a6;
|
||||
m_escapes[QSL("Π")] = 0x03a0;
|
||||
m_escapes[QSL("″")] = 0x2033;
|
||||
m_escapes[QSL("Ψ")] = 0x03a8;
|
||||
m_escapes[QSL(""")] = 34;
|
||||
m_escapes[QSL("Ρ")] = 0x03a1;
|
||||
m_escapes[QSL("Š")] = 0x0160;
|
||||
m_escapes[QSL("Σ")] = 0x03a3;
|
||||
m_escapes[QSL("Þ")] = 0x00de;
|
||||
m_escapes[QSL("Τ")] = 0x03a4;
|
||||
m_escapes[QSL("Θ")] = 0x0398;
|
||||
m_escapes[QSL("Ú")] = 0x00da;
|
||||
m_escapes[QSL("Û")] = 0x00db;
|
||||
m_escapes[QSL("Ù")] = 0x00d9;
|
||||
m_escapes[QSL("Υ")] = 0x03a5;
|
||||
m_escapes[QSL("Ü")] = 0x00dc;
|
||||
m_escapes[QSL("Ξ")] = 0x039e;
|
||||
m_escapes[QSL("Ý")] = 0x00dd;
|
||||
m_escapes[QSL("Ÿ")] = 0x0178;
|
||||
m_escapes[QSL("Ζ")] = 0x0396;
|
||||
m_escapes[QSL("á")] = 0x00e1;
|
||||
m_escapes[QSL("â")] = 0x00e2;
|
||||
m_escapes[QSL("´")] = 0x00b4;
|
||||
m_escapes[QSL("æ")] = 0x00e6;
|
||||
m_escapes[QSL("à")] = 0x00e0;
|
||||
m_escapes[QSL("ℵ")] = 0x2135;
|
||||
m_escapes[QSL("α")] = 0x03b1;
|
||||
m_escapes[QSL("&")] = 38;
|
||||
m_escapes[QSL("∧")] = 0x22a5;
|
||||
m_escapes[QSL("∠")] = 0x2220;
|
||||
m_escapes[QSL("'")] = 0x0027;
|
||||
m_escapes[QSL("å")] = 0x00e5;
|
||||
m_escapes[QSL("≈")] = 0x2248;
|
||||
m_escapes[QSL("ã")] = 0x00e3;
|
||||
m_escapes[QSL("ä")] = 0x00e4;
|
||||
m_escapes[QSL("„")] = 0x201e;
|
||||
m_escapes[QSL("β")] = 0x03b2;
|
||||
m_escapes[QSL("¦")] = 0x00a6;
|
||||
m_escapes[QSL("•")] = 0x2022;
|
||||
m_escapes[QSL("∩")] = 0x2229;
|
||||
m_escapes[QSL("ç")] = 0x00e7;
|
||||
m_escapes[QSL("¸")] = 0x00b8;
|
||||
m_escapes[QSL("¢")] = 0x00a2;
|
||||
m_escapes[QSL("χ")] = 0x03c7;
|
||||
m_escapes[QSL("ˆ")] = 0x02c6;
|
||||
m_escapes[QSL("♣")] = 0x2663;
|
||||
m_escapes[QSL("≅")] = 0x2245;
|
||||
m_escapes[QSL("©")] = 0x00a9;
|
||||
m_escapes[QSL("↵")] = 0x21b5;
|
||||
m_escapes[QSL("∪")] = 0x222a;
|
||||
m_escapes[QSL("¤")] = 0x00a4;
|
||||
m_escapes[QSL("⇓")] = 0x21d3;
|
||||
m_escapes[QSL("†")] = 0x2020;
|
||||
m_escapes[QSL("↓")] = 0x2193;
|
||||
m_escapes[QSL("°")] = 0x00b0;
|
||||
m_escapes[QSL("δ")] = 0x03b4;
|
||||
m_escapes[QSL("♦")] = 0x2666;
|
||||
m_escapes[QSL("÷")] = 0x00f7;
|
||||
m_escapes[QSL("é")] = 0x00e9;
|
||||
m_escapes[QSL("ê")] = 0x00ea;
|
||||
m_escapes[QSL("è")] = 0x00e8;
|
||||
m_escapes[QSL("∅")] = 0x2205;
|
||||
m_escapes[QSL(" ")] = 0x2003;
|
||||
m_escapes[QSL(" ")] = 0x2002;
|
||||
m_escapes[QSL("ε")] = 0x03b5;
|
||||
m_escapes[QSL("≡")] = 0x2261;
|
||||
m_escapes[QSL("η")] = 0x03b7;
|
||||
m_escapes[QSL("ð")] = 0x00f0;
|
||||
m_escapes[QSL("ë")] = 0x00eb;
|
||||
m_escapes[QSL("€")] = 0x20ac;
|
||||
m_escapes[QSL("∃")] = 0x2203;
|
||||
m_escapes[QSL("ƒ")] = 0x0192;
|
||||
m_escapes[QSL("∀")] = 0x2200;
|
||||
m_escapes[QSL("½")] = 0x00bd;
|
||||
m_escapes[QSL("¼")] = 0x00bc;
|
||||
m_escapes[QSL("¾")] = 0x00be;
|
||||
m_escapes[QSL("⁄")] = 0x2044;
|
||||
m_escapes[QSL("γ")] = 0x03b3;
|
||||
m_escapes[QSL("≥")] = 0x2265;
|
||||
m_escapes[QSL(">")] = 62;
|
||||
m_escapes[QSL("⇔")] = 0x21d4;
|
||||
m_escapes[QSL("↔")] = 0x2194;
|
||||
m_escapes[QSL("♥")] = 0x2665;
|
||||
m_escapes[QSL("…")] = 0x2026;
|
||||
m_escapes[QSL("í")] = 0x00ed;
|
||||
m_escapes[QSL("î")] = 0x00ee;
|
||||
m_escapes[QSL("¡")] = 0x00a1;
|
||||
m_escapes[QSL("ì")] = 0x00ec;
|
||||
m_escapes[QSL("ℑ")] = 0x2111;
|
||||
m_escapes[QSL("∞")] = 0x221e;
|
||||
m_escapes[QSL("∫")] = 0x222b;
|
||||
m_escapes[QSL("ι")] = 0x03b9;
|
||||
m_escapes[QSL("¿")] = 0x00bf;
|
||||
m_escapes[QSL("∈")] = 0x2208;
|
||||
m_escapes[QSL("ï")] = 0x00ef;
|
||||
m_escapes[QSL("κ")] = 0x03ba;
|
||||
m_escapes[QSL("⇐")] = 0x21d0;
|
||||
m_escapes[QSL("λ")] = 0x03bb;
|
||||
m_escapes[QSL("⟨")] = 0x2329;
|
||||
m_escapes[QSL("«")] = 0x00ab;
|
||||
m_escapes[QSL("←")] = 0x2190;
|
||||
m_escapes[QSL("⌈")] = 0x2308;
|
||||
m_escapes[QSL("“")] = 0x201c;
|
||||
m_escapes[QSL("≤")] = 0x2264;
|
||||
m_escapes[QSL("⌊")] = 0x230a;
|
||||
m_escapes[QSL("∗")] = 0x2217;
|
||||
m_escapes[QSL("◊")] = 0x25ca;
|
||||
m_escapes[QSL("‎")] = 0x200e;
|
||||
m_escapes[QSL("‹")] = 0x2039;
|
||||
m_escapes[QSL("‘")] = 0x2018;
|
||||
m_escapes[QSL("<")] = 60;
|
||||
m_escapes[QSL("¯")] = 0x00af;
|
||||
m_escapes[QSL("—")] = 0x2014;
|
||||
m_escapes[QSL("µ")] = 0x00b5;
|
||||
m_escapes[QSL("·")] = 0x00b7;
|
||||
m_escapes[QSL("−")] = 0x2212;
|
||||
m_escapes[QSL("μ")] = 0x03bc;
|
||||
m_escapes[QSL("∇")] = 0x2207;
|
||||
m_escapes[QSL(" ")] = 0x00a0;
|
||||
m_escapes[QSL("–")] = 0x2013;
|
||||
m_escapes[QSL("≠")] = 0x2260;
|
||||
m_escapes[QSL("∋")] = 0x220b;
|
||||
m_escapes[QSL("¬")] = 0x00ac;
|
||||
m_escapes[QSL("∉")] = 0x2209;
|
||||
m_escapes[QSL("⊄")] = 0x2284;
|
||||
m_escapes[QSL("ñ")] = 0x00f1;
|
||||
m_escapes[QSL("ν")] = 0x03bd;
|
||||
m_escapes[QSL("ó")] = 0x00f3;
|
||||
m_escapes[QSL("ô")] = 0x00f4;
|
||||
m_escapes[QSL("œ")] = 0x0153;
|
||||
m_escapes[QSL("ò")] = 0x00f2;
|
||||
m_escapes[QSL("‾")] = 0x203e;
|
||||
m_escapes[QSL("ω")] = 0x03c9;
|
||||
m_escapes[QSL("ο")] = 0x03bf;
|
||||
m_escapes[QSL("⊕")] = 0x2295;
|
||||
m_escapes[QSL("∨")] = 0x22a6;
|
||||
m_escapes[QSL("ª")] = 0x00aa;
|
||||
m_escapes[QSL("º")] = 0x00ba;
|
||||
m_escapes[QSL("ø")] = 0x00f8;
|
||||
m_escapes[QSL("õ")] = 0x00f5;
|
||||
m_escapes[QSL("⊗")] = 0x2297;
|
||||
m_escapes[QSL("ö")] = 0x00f6;
|
||||
m_escapes[QSL("¶")] = 0x00b6;
|
||||
m_escapes[QSL("∂")] = 0x2202;
|
||||
m_escapes[QSL("%")] = 0x0025;
|
||||
m_escapes[QSL("‰")] = 0x2030;
|
||||
m_escapes[QSL("⊥")] = 0x22a5;
|
||||
m_escapes[QSL("φ")] = 0x03c6;
|
||||
m_escapes[QSL("π")] = 0x03c0;
|
||||
m_escapes[QSL("ϖ")] = 0x03d6;
|
||||
m_escapes[QSL("±")] = 0x00b1;
|
||||
m_escapes[QSL("£")] = 0x00a3;
|
||||
m_escapes[QSL("′")] = 0x2032;
|
||||
m_escapes[QSL("∏")] = 0x220f;
|
||||
m_escapes[QSL("∝")] = 0x221d;
|
||||
m_escapes[QSL("ψ")] = 0x03c8;
|
||||
m_escapes[QSL(""")] = 34;
|
||||
m_escapes[QSL("⇒")] = 0x21d2;
|
||||
m_escapes[QSL("√")] = 0x221a;
|
||||
m_escapes[QSL("⟩")] = 0x232a;
|
||||
m_escapes[QSL("»")] = 0x00bb;
|
||||
m_escapes[QSL("→")] = 0x2192;
|
||||
m_escapes[QSL("⌉")] = 0x2309;
|
||||
m_escapes[QSL("”")] = 0x201d;
|
||||
m_escapes[QSL("ℜ")] = 0x211c;
|
||||
m_escapes[QSL("®")] = 0x00ae;
|
||||
m_escapes[QSL("⌋")] = 0x230b;
|
||||
m_escapes[QSL("ρ")] = 0x03c1;
|
||||
m_escapes[QSL("‏")] = 0x200f;
|
||||
m_escapes[QSL("›")] = 0x203a;
|
||||
m_escapes[QSL("’")] = 0x2019;
|
||||
m_escapes[QSL("‚")] = 0x201a;
|
||||
m_escapes[QSL("š")] = 0x0161;
|
||||
m_escapes[QSL("⋅")] = 0x22c5;
|
||||
m_escapes[QSL("§")] = 0x00a7;
|
||||
m_escapes[QSL("­")] = 0x00ad;
|
||||
m_escapes[QSL("σ")] = 0x03c3;
|
||||
m_escapes[QSL("ς")] = 0x03c2;
|
||||
m_escapes[QSL("∼")] = 0x223c;
|
||||
m_escapes[QSL("♠")] = 0x2660;
|
||||
m_escapes[QSL("⊂")] = 0x2282;
|
||||
m_escapes[QSL("⊆")] = 0x2286;
|
||||
m_escapes[QSL("∑")] = 0x2211;
|
||||
m_escapes[QSL("⊃")] = 0x2283;
|
||||
m_escapes[QSL("¹")] = 0x00b9;
|
||||
m_escapes[QSL("²")] = 0x00b2;
|
||||
m_escapes[QSL("³")] = 0x00b3;
|
||||
m_escapes[QSL("⊇")] = 0x2287;
|
||||
m_escapes[QSL("ß")] = 0x00df;
|
||||
m_escapes[QSL("τ")] = 0x03c4;
|
||||
m_escapes[QSL("∴")] = 0x2234;
|
||||
m_escapes[QSL("θ")] = 0x03b8;
|
||||
m_escapes[QSL("ϑ")] = 0x03d1;
|
||||
m_escapes[QSL(" ")] = 0x2009;
|
||||
m_escapes[QSL("þ")] = 0x00fe;
|
||||
m_escapes[QSL("˜")] = 0x02dc;
|
||||
m_escapes[QSL("×")] = 0x00d7;
|
||||
m_escapes[QSL("™")] = 0x2122;
|
||||
m_escapes[QSL("⇑")] = 0x21d1;
|
||||
m_escapes[QSL("ú")] = 0x00fa;
|
||||
m_escapes[QSL("↑")] = 0x2191;
|
||||
m_escapes[QSL("û")] = 0x00fb;
|
||||
m_escapes[QSL("ù")] = 0x00f9;
|
||||
m_escapes[QSL("¨")] = 0x00a8;
|
||||
m_escapes[QSL("ϒ")] = 0x03d2;
|
||||
m_escapes[QSL("υ")] = 0x03c5;
|
||||
m_escapes[QSL("ü")] = 0x00fc;
|
||||
m_escapes[QSL("℘")] = 0x2118;
|
||||
m_escapes[QSL("ξ")] = 0x03be;
|
||||
m_escapes[QSL("ý")] = 0x00fd;
|
||||
m_escapes[QSL("¥")] = 0x00a5;
|
||||
m_escapes[QSL("ÿ")] = 0x00ff;
|
||||
m_escapes[QSL("ζ")] = 0x03b6;
|
||||
m_escapes[QSL("‍")] = 0x200d;
|
||||
m_escapes[QSL("‌")] = 0x200c;
|
||||
m_escapes[QSL("'")] = 0x27;
|
||||
}
|
||||
|
@ -28,8 +28,7 @@ class WebFactory : public QObject {
|
||||
QString stripTags(QString text);
|
||||
|
||||
// HTML entity escaping.
|
||||
QString escapeHtml(const QString& html);
|
||||
QString deEscapeHtml(const QString& text);
|
||||
QString unescapeHtml(const QString& html);
|
||||
|
||||
#if defined (USE_WEBENGINE)
|
||||
QAction* engineSettingsAction();
|
||||
@ -50,11 +49,9 @@ class WebFactory : public QObject {
|
||||
#endif
|
||||
|
||||
private:
|
||||
void generateEscapes();
|
||||
void generateDeescapes();
|
||||
void generateUnescapes();
|
||||
|
||||
QMap<QString, QString> m_escapes;
|
||||
QMap<QString, QString> m_deEscapes;
|
||||
QMap<QString, char16_t> m_escapes;
|
||||
|
||||
#if defined (USE_WEBENGINE)
|
||||
QAction* m_engineSettings;
|
||||
|
@ -60,9 +60,9 @@ Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime cur
|
||||
}
|
||||
|
||||
// Title is not empty, description does not matter.
|
||||
new_message.m_title = qApp->web()->stripTags(title);
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(title));
|
||||
new_message.m_contents = summary;
|
||||
new_message.m_author = qApp->web()->escapeHtml(messageAuthor(msg_element));
|
||||
new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(msg_element));
|
||||
|
||||
QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", "));
|
||||
|
||||
|
@ -38,13 +38,13 @@ QList<Message> RdfParser::parseXmlData(const QString& data) {
|
||||
}
|
||||
else {
|
||||
// Title is empty but description is not.
|
||||
new_message.m_title = qApp->web()->escapeHtml(qApp->web()->stripTags(elem_description.simplified()));
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified()));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Title is really not empty, description does not matter.
|
||||
new_message.m_title = qApp->web()->escapeHtml(qApp->web()->stripTags(elem_title));
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
|
||||
|
@ -46,13 +46,13 @@ Message RssParser::extractMessage(const QDomElement& msg_element, QDateTime curr
|
||||
}
|
||||
else {
|
||||
// Title is empty but description is not.
|
||||
new_message.m_title = qApp->web()->stripTags(elem_description.simplified());
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified()));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Title is really not empty, description does not matter.
|
||||
new_message.m_title = qApp->web()->stripTags(elem_title);
|
||||
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title));
|
||||
new_message.m_contents = elem_description;
|
||||
}
|
||||
|
||||
@ -102,6 +102,8 @@ Message RssParser::extractMessage(const QDomElement& msg_element, QDateTime curr
|
||||
new_message.m_author = "";
|
||||
}
|
||||
|
||||
new_message.m_author = qApp->web()->unescapeHtml(new_message.m_author);
|
||||
|
||||
if (new_message.m_url.isNull()) {
|
||||
new_message.m_url = "";
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user