Decode HTML entities in the JSON Feed parser for content_html. Fix #176.

This commit is contained in:
Brent Simmons 2017-11-18 12:41:15 -08:00
parent b3b8c73f1c
commit ca081c41a9
3 changed files with 26 additions and 1 deletions

View File

@ -99,6 +99,7 @@ private extension JSONFeedParser {
if contentHTML == nil && contentText == nil {
return nil
}
let decodedContentHTML = contentHTML?.rsparser_stringByDecodingHTMLEntities()
let url = itemDictionary["url"] as? String
let externalURL = itemDictionary["external_url"] as? String
@ -117,7 +118,7 @@ private extension JSONFeedParser {
}
let attachments = parseAttachments(itemDictionary)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, contentHTML: decodedContentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments)
}
static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? {

View File

@ -29,4 +29,25 @@ class JSONFeedParserTests: XCTestCase {
}
}
func testThatEntitiesAreDecoded() {
let d = parserData("DaringFireball", "json", "http://daringfireball.net/")
let parsedFeed = try! FeedParser.parse(d)
// https://github.com/brentsimmons/Evergreen/issues/176
// In the article titled "The Talk Show: I Do Like Throwing a Baby",
// make sure the content HTML starts with "\n<p>New episode of Americas"
// instead of "\n<p>New episode of America&#8217;s" this will tell us
// that entities are being decoded.
for article in parsedFeed!.items {
if article.title == "The Talk Show: I Do Like Throwing a Baby" {
XCTAssert(article.contentHTML!.hasPrefix("\n<p>New episode of Americas"))
return
}
}
XCTAssert(false, "Expected to find “The Talk Show: I Do Like Throwing a Baby” article.")
}
}

View File

@ -8,6 +8,8 @@
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface NSString (RSParser)
- (NSString *)rsparser_stringByDecodingHTMLEntities;
@ -16,3 +18,4 @@
@end
NS_ASSUME_NONNULL_END