diff --git a/src/librssguard/network-web/webfactory.cpp b/src/librssguard/network-web/webfactory.cpp index 51dc3012e..b28c37e4e 100644 --- a/src/librssguard/network-web/webfactory.cpp +++ b/src/librssguard/network-web/webfactory.cpp @@ -69,33 +69,17 @@ QString WebFactory::stripTags(QString text) { return text.remove(QRegularExpression(QSL("<[^>]*>"))); } -QString WebFactory::escapeHtml(const QString& html) { +QString WebFactory::unescapeHtml(const QString& html) { if (m_escapes.isEmpty()) { - generateEscapes(); + generateUnescapes(); } QString output = html; - QMapIterator i(m_escapes); + QMapIterator i(m_escapes); while (i.hasNext()) { i.next(); - output = output.replace(i.key(), i.value()); - } - - return output; -} - -QString WebFactory::deEscapeHtml(const QString& text) { - if (m_deEscapes.isEmpty()) { - generateDeescapes(); - } - - QString output = text; - QMapIterator i(m_deEscapes); - - while (i.hasNext()) { - i.next(); - output = output.replace(i.key(), i.value()); + output = output.replace(i.key(), QString(QChar(i.value()))); } return output; @@ -202,23 +186,264 @@ QAction* WebFactory::createEngineSettingsAction(const QString& title, QWebEngine #endif -void WebFactory::generateEscapes() { - m_escapes[QSL("<")] = QL1C('<'); - m_escapes[QSL(">")] = QL1C('>'); - m_escapes[QSL("&")] = QL1C('&'); - m_escapes[QSL(""")] = QL1C('\"'); - m_escapes[QSL(" ")] = QL1C(' '); - m_escapes[QSL("±")] = QSL("±"); - m_escapes[QSL("×")] = QSL("×"); - m_escapes[QSL("'")] = QL1C('\''); -} - -void WebFactory::generateDeescapes() { - m_deEscapes[QSL("<")] = QSL("<"); - m_deEscapes[QSL(">")] = QSL(">"); - m_deEscapes[QSL("&")] = QSL("&"); - m_deEscapes[QSL("\"")] = QSL("""); - m_deEscapes[QSL("±")] = QSL("±"); - m_deEscapes[QSL("×")] = QSL("×"); - m_deEscapes[QSL("\'")] = QSL("'"); +void WebFactory::generateUnescapes() { + m_escapes[QSL("Æ")] = 0x00c6; + m_escapes[QSL("&")] = 38; + m_escapes[QSL("Á")] = 0x00c1; + m_escapes[QSL("Â")] = 0x00c2; + m_escapes[QSL("À")] = 0x00c0; + m_escapes[QSL("Α")] = 0x0391; + m_escapes[QSL("Å")] = 0x00c5; + m_escapes[QSL("Ã")] = 0x00c3; + m_escapes[QSL("Ä")] = 0x00c4; + m_escapes[QSL("Β")] = 0x0392; + m_escapes[QSL("Ç")] = 0x00c7; + m_escapes[QSL("Χ")] = 0x03a7; + m_escapes[QSL("‡")] = 0x2021; + m_escapes[QSL("Δ")] = 0x0394; + m_escapes[QSL("Ð")] = 0x00d0; + m_escapes[QSL("É")] = 0x00c9; + m_escapes[QSL("Ê")] = 0x00ca; + m_escapes[QSL("È")] = 0x00c8; + m_escapes[QSL("Ε")] = 0x0395; + m_escapes[QSL("Η")] = 0x0397; + m_escapes[QSL("Ë")] = 0x00cb; + m_escapes[QSL(">")] = 62; + m_escapes[QSL("Γ")] = 0x0393; + m_escapes[QSL("Í")] = 0x00cd; + m_escapes[QSL("Î")] = 0x00ce; + m_escapes[QSL("Ì")] = 0x00cc; + m_escapes[QSL("Ι")] = 0x0399; + m_escapes[QSL("Ï")] = 0x00cf; + m_escapes[QSL("Κ")] = 0x039a; + m_escapes[QSL("<")] = 60; + m_escapes[QSL("Λ")] = 0x039b; + m_escapes[QSL("Μ")] = 0x039c; + m_escapes[QSL("Ñ")] = 0x00d1; + m_escapes[QSL("Ν")] = 0x039d; + m_escapes[QSL("Œ")] = 0x0152; + m_escapes[QSL("Ó")] = 0x00d3; + m_escapes[QSL("Ô")] = 0x00d4; + m_escapes[QSL("Ò")] = 0x00d2; + m_escapes[QSL("Ω")] = 0x03a9; + m_escapes[QSL("Ο")] = 0x039f; + m_escapes[QSL("Ø")] = 0x00d8; + m_escapes[QSL("Õ")] = 0x00d5; + m_escapes[QSL("Ö")] = 0x00d6; + m_escapes[QSL("Φ")] = 0x03a6; + m_escapes[QSL("Π")] = 0x03a0; + m_escapes[QSL("″")] = 0x2033; + m_escapes[QSL("Ψ")] = 0x03a8; + m_escapes[QSL(""")] = 34; + m_escapes[QSL("Ρ")] = 0x03a1; + m_escapes[QSL("Š")] = 0x0160; + m_escapes[QSL("Σ")] = 0x03a3; + m_escapes[QSL("Þ")] = 0x00de; + m_escapes[QSL("Τ")] = 0x03a4; + m_escapes[QSL("Θ")] = 0x0398; + m_escapes[QSL("Ú")] = 0x00da; + m_escapes[QSL("Û")] = 0x00db; + m_escapes[QSL("Ù")] = 0x00d9; + m_escapes[QSL("Υ")] = 0x03a5; + m_escapes[QSL("Ü")] = 0x00dc; + m_escapes[QSL("Ξ")] = 0x039e; + m_escapes[QSL("Ý")] = 0x00dd; + m_escapes[QSL("Ÿ")] = 0x0178; + m_escapes[QSL("Ζ")] = 0x0396; + m_escapes[QSL("á")] = 0x00e1; + m_escapes[QSL("â")] = 0x00e2; + m_escapes[QSL("´")] = 0x00b4; + m_escapes[QSL("æ")] = 0x00e6; + m_escapes[QSL("à")] = 0x00e0; + m_escapes[QSL("ℵ")] = 0x2135; + m_escapes[QSL("α")] = 0x03b1; + m_escapes[QSL("&")] = 38; + m_escapes[QSL("∧")] = 0x22a5; + m_escapes[QSL("∠")] = 0x2220; + m_escapes[QSL("'")] = 0x0027; + m_escapes[QSL("å")] = 0x00e5; + m_escapes[QSL("≈")] = 0x2248; + m_escapes[QSL("ã")] = 0x00e3; + m_escapes[QSL("ä")] = 0x00e4; + m_escapes[QSL("„")] = 0x201e; + m_escapes[QSL("β")] = 0x03b2; + m_escapes[QSL("¦")] = 0x00a6; + m_escapes[QSL("•")] = 0x2022; + m_escapes[QSL("∩")] = 0x2229; + m_escapes[QSL("ç")] = 0x00e7; + m_escapes[QSL("¸")] = 0x00b8; + m_escapes[QSL("¢")] = 0x00a2; + m_escapes[QSL("χ")] = 0x03c7; + m_escapes[QSL("ˆ")] = 0x02c6; + m_escapes[QSL("♣")] = 0x2663; + m_escapes[QSL("≅")] = 0x2245; + m_escapes[QSL("©")] = 0x00a9; + m_escapes[QSL("↵")] = 0x21b5; + m_escapes[QSL("∪")] = 0x222a; + m_escapes[QSL("¤")] = 0x00a4; + m_escapes[QSL("⇓")] = 0x21d3; + m_escapes[QSL("†")] = 0x2020; + m_escapes[QSL("↓")] = 0x2193; + m_escapes[QSL("°")] = 0x00b0; + m_escapes[QSL("δ")] = 0x03b4; + m_escapes[QSL("♦")] = 0x2666; + m_escapes[QSL("÷")] = 0x00f7; + m_escapes[QSL("é")] = 0x00e9; + m_escapes[QSL("ê")] = 0x00ea; + m_escapes[QSL("è")] = 0x00e8; + m_escapes[QSL("∅")] = 0x2205; + m_escapes[QSL(" ")] = 0x2003; + m_escapes[QSL(" ")] = 0x2002; + m_escapes[QSL("ε")] = 0x03b5; + m_escapes[QSL("≡")] = 0x2261; + m_escapes[QSL("η")] = 0x03b7; + m_escapes[QSL("ð")] = 0x00f0; + m_escapes[QSL("ë")] = 0x00eb; + m_escapes[QSL("€")] = 0x20ac; + m_escapes[QSL("∃")] = 0x2203; + m_escapes[QSL("ƒ")] = 0x0192; + m_escapes[QSL("∀")] = 0x2200; + m_escapes[QSL("½")] = 0x00bd; + m_escapes[QSL("¼")] = 0x00bc; + m_escapes[QSL("¾")] = 0x00be; + m_escapes[QSL("⁄")] = 0x2044; + m_escapes[QSL("γ")] = 0x03b3; + m_escapes[QSL("≥")] = 0x2265; + m_escapes[QSL(">")] = 62; + m_escapes[QSL("⇔")] = 0x21d4; + m_escapes[QSL("↔")] = 0x2194; + m_escapes[QSL("♥")] = 0x2665; + m_escapes[QSL("…")] = 0x2026; + m_escapes[QSL("í")] = 0x00ed; + m_escapes[QSL("î")] = 0x00ee; + m_escapes[QSL("¡")] = 0x00a1; + m_escapes[QSL("ì")] = 0x00ec; + m_escapes[QSL("ℑ")] = 0x2111; + m_escapes[QSL("∞")] = 0x221e; + m_escapes[QSL("∫")] = 0x222b; + m_escapes[QSL("ι")] = 0x03b9; + m_escapes[QSL("¿")] = 0x00bf; + m_escapes[QSL("∈")] = 0x2208; + m_escapes[QSL("ï")] = 0x00ef; + m_escapes[QSL("κ")] = 0x03ba; + m_escapes[QSL("⇐")] = 0x21d0; + m_escapes[QSL("λ")] = 0x03bb; + m_escapes[QSL("⟨")] = 0x2329; + m_escapes[QSL("«")] = 0x00ab; + m_escapes[QSL("←")] = 0x2190; + m_escapes[QSL("⌈")] = 0x2308; + m_escapes[QSL("“")] = 0x201c; + m_escapes[QSL("≤")] = 0x2264; + m_escapes[QSL("⌊")] = 0x230a; + m_escapes[QSL("∗")] = 0x2217; + m_escapes[QSL("◊")] = 0x25ca; + m_escapes[QSL("‎")] = 0x200e; + m_escapes[QSL("‹")] = 0x2039; + m_escapes[QSL("‘")] = 0x2018; + m_escapes[QSL("<")] = 60; + m_escapes[QSL("¯")] = 0x00af; + m_escapes[QSL("—")] = 0x2014; + m_escapes[QSL("µ")] = 0x00b5; + m_escapes[QSL("·")] = 0x00b7; + m_escapes[QSL("−")] = 0x2212; + m_escapes[QSL("μ")] = 0x03bc; + m_escapes[QSL("∇")] = 0x2207; + m_escapes[QSL(" ")] = 0x00a0; + m_escapes[QSL("–")] = 0x2013; + m_escapes[QSL("≠")] = 0x2260; + m_escapes[QSL("∋")] = 0x220b; + m_escapes[QSL("¬")] = 0x00ac; + m_escapes[QSL("∉")] = 0x2209; + m_escapes[QSL("⊄")] = 0x2284; + m_escapes[QSL("ñ")] = 0x00f1; + m_escapes[QSL("ν")] = 0x03bd; + m_escapes[QSL("ó")] = 0x00f3; + m_escapes[QSL("ô")] = 0x00f4; + m_escapes[QSL("œ")] = 0x0153; + m_escapes[QSL("ò")] = 0x00f2; + m_escapes[QSL("‾")] = 0x203e; + m_escapes[QSL("ω")] = 0x03c9; + m_escapes[QSL("ο")] = 0x03bf; + m_escapes[QSL("⊕")] = 0x2295; + m_escapes[QSL("∨")] = 0x22a6; + m_escapes[QSL("ª")] = 0x00aa; + m_escapes[QSL("º")] = 0x00ba; + m_escapes[QSL("ø")] = 0x00f8; + m_escapes[QSL("õ")] = 0x00f5; + m_escapes[QSL("⊗")] = 0x2297; + m_escapes[QSL("ö")] = 0x00f6; + m_escapes[QSL("¶")] = 0x00b6; + m_escapes[QSL("∂")] = 0x2202; + m_escapes[QSL("%")] = 0x0025; + m_escapes[QSL("‰")] = 0x2030; + m_escapes[QSL("⊥")] = 0x22a5; + m_escapes[QSL("φ")] = 0x03c6; + m_escapes[QSL("π")] = 0x03c0; + m_escapes[QSL("ϖ")] = 0x03d6; + m_escapes[QSL("±")] = 0x00b1; + m_escapes[QSL("£")] = 0x00a3; + m_escapes[QSL("′")] = 0x2032; + m_escapes[QSL("∏")] = 0x220f; + m_escapes[QSL("∝")] = 0x221d; + m_escapes[QSL("ψ")] = 0x03c8; + m_escapes[QSL(""")] = 34; + m_escapes[QSL("⇒")] = 0x21d2; + m_escapes[QSL("√")] = 0x221a; + m_escapes[QSL("⟩")] = 0x232a; + m_escapes[QSL("»")] = 0x00bb; + m_escapes[QSL("→")] = 0x2192; + m_escapes[QSL("⌉")] = 0x2309; + m_escapes[QSL("”")] = 0x201d; + m_escapes[QSL("ℜ")] = 0x211c; + m_escapes[QSL("®")] = 0x00ae; + m_escapes[QSL("⌋")] = 0x230b; + m_escapes[QSL("ρ")] = 0x03c1; + m_escapes[QSL("‏")] = 0x200f; + m_escapes[QSL("›")] = 0x203a; + m_escapes[QSL("’")] = 0x2019; + m_escapes[QSL("‚")] = 0x201a; + m_escapes[QSL("š")] = 0x0161; + m_escapes[QSL("⋅")] = 0x22c5; + m_escapes[QSL("§")] = 0x00a7; + m_escapes[QSL("­")] = 0x00ad; + m_escapes[QSL("σ")] = 0x03c3; + m_escapes[QSL("ς")] = 0x03c2; + m_escapes[QSL("∼")] = 0x223c; + m_escapes[QSL("♠")] = 0x2660; + m_escapes[QSL("⊂")] = 0x2282; + m_escapes[QSL("⊆")] = 0x2286; + m_escapes[QSL("∑")] = 0x2211; + m_escapes[QSL("⊃")] = 0x2283; + m_escapes[QSL("¹")] = 0x00b9; + m_escapes[QSL("²")] = 0x00b2; + m_escapes[QSL("³")] = 0x00b3; + m_escapes[QSL("⊇")] = 0x2287; + m_escapes[QSL("ß")] = 0x00df; + m_escapes[QSL("τ")] = 0x03c4; + m_escapes[QSL("∴")] = 0x2234; + m_escapes[QSL("θ")] = 0x03b8; + m_escapes[QSL("ϑ")] = 0x03d1; + m_escapes[QSL(" ")] = 0x2009; + m_escapes[QSL("þ")] = 0x00fe; + m_escapes[QSL("˜")] = 0x02dc; + m_escapes[QSL("×")] = 0x00d7; + m_escapes[QSL("™")] = 0x2122; + m_escapes[QSL("⇑")] = 0x21d1; + m_escapes[QSL("ú")] = 0x00fa; + m_escapes[QSL("↑")] = 0x2191; + m_escapes[QSL("û")] = 0x00fb; + m_escapes[QSL("ù")] = 0x00f9; + m_escapes[QSL("¨")] = 0x00a8; + m_escapes[QSL("ϒ")] = 0x03d2; + m_escapes[QSL("υ")] = 0x03c5; + m_escapes[QSL("ü")] = 0x00fc; + m_escapes[QSL("℘")] = 0x2118; + m_escapes[QSL("ξ")] = 0x03be; + m_escapes[QSL("ý")] = 0x00fd; + m_escapes[QSL("¥")] = 0x00a5; + m_escapes[QSL("ÿ")] = 0x00ff; + m_escapes[QSL("ζ")] = 0x03b6; + m_escapes[QSL("‍")] = 0x200d; + m_escapes[QSL("‌")] = 0x200c; + m_escapes[QSL("'")] = 0x27; } diff --git a/src/librssguard/network-web/webfactory.h b/src/librssguard/network-web/webfactory.h index b41452367..9d61fef15 100644 --- a/src/librssguard/network-web/webfactory.h +++ b/src/librssguard/network-web/webfactory.h @@ -28,8 +28,7 @@ class WebFactory : public QObject { QString stripTags(QString text); // HTML entity escaping. - QString escapeHtml(const QString& html); - QString deEscapeHtml(const QString& text); + QString unescapeHtml(const QString& html); #if defined (USE_WEBENGINE) QAction* engineSettingsAction(); @@ -50,11 +49,9 @@ class WebFactory : public QObject { #endif private: - void generateEscapes(); - void generateDeescapes(); + void generateUnescapes(); - QMap m_escapes; - QMap m_deEscapes; + QMap m_escapes; #if defined (USE_WEBENGINE) QAction* m_engineSettings; diff --git a/src/librssguard/services/standard/atomparser.cpp b/src/librssguard/services/standard/atomparser.cpp index dd71970a0..425653847 100755 --- a/src/librssguard/services/standard/atomparser.cpp +++ b/src/librssguard/services/standard/atomparser.cpp @@ -60,9 +60,9 @@ Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime cur } // Title is not empty, description does not matter. - new_message.m_title = qApp->web()->stripTags(title); + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(title)); new_message.m_contents = summary; - new_message.m_author = qApp->web()->escapeHtml(messageAuthor(msg_element)); + new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(msg_element)); QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", ")); diff --git a/src/librssguard/services/standard/rdfparser.cpp b/src/librssguard/services/standard/rdfparser.cpp index 217328940..ed926caab 100644 --- a/src/librssguard/services/standard/rdfparser.cpp +++ b/src/librssguard/services/standard/rdfparser.cpp @@ -38,13 +38,13 @@ QList RdfParser::parseXmlData(const QString& data) { } else { // Title is empty but description is not. - new_message.m_title = qApp->web()->escapeHtml(qApp->web()->stripTags(elem_description.simplified())); + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified())); new_message.m_contents = elem_description; } } else { // Title is really not empty, description does not matter. - new_message.m_title = qApp->web()->escapeHtml(qApp->web()->stripTags(elem_title)); + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title)); new_message.m_contents = elem_description; } diff --git a/src/librssguard/services/standard/rssparser.cpp b/src/librssguard/services/standard/rssparser.cpp index 54a06e31f..1d3d17dc5 100644 --- a/src/librssguard/services/standard/rssparser.cpp +++ b/src/librssguard/services/standard/rssparser.cpp @@ -46,13 +46,13 @@ Message RssParser::extractMessage(const QDomElement& msg_element, QDateTime curr } else { // Title is empty but description is not. - new_message.m_title = qApp->web()->stripTags(elem_description.simplified()); + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_description.simplified())); new_message.m_contents = elem_description; } } else { // Title is really not empty, description does not matter. - new_message.m_title = qApp->web()->stripTags(elem_title); + new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(elem_title)); new_message.m_contents = elem_description; } @@ -102,6 +102,8 @@ Message RssParser::extractMessage(const QDomElement& msg_element, QDateTime curr new_message.m_author = ""; } + new_message.m_author = qApp->web()->unescapeHtml(new_message.m_author); + if (new_message.m_url.isNull()) { new_message.m_url = ""; }