NetNewsWire/Frameworks/RSParser/Feeds/JSON/RSSInJSONParser.swift

181 lines
5.8 KiB
Swift
Raw Normal View History

2017-06-25 19:23:30 +02:00
//
// RSSInJSONParser.swift
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
// Also: http://cyber.harvard.edu/rss/rss.html
public struct RSSInJSONParser {
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
2017-06-25 19:23:30 +02:00
do {
guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else {
throw FeedParserError(.invalidJSON)
}
guard let rssObject = parsedObject["rss"] as? JSONDictionary else {
throw FeedParserError(.rssChannelNotFound)
}
guard let channelObject = rssObject["channel"] as? JSONDictionary else {
2017-06-25 19:23:30 +02:00
throw FeedParserError(.rssChannelNotFound)
}
// Id bet money that in practice the items array wont always appear correctly inside the channel object.
// Id also bet that sometimes it gets called "items" instead of "item".
var itemsObject = channelObject["item"] as? JSONArray
if itemsObject == nil {
itemsObject = parsedObject["item"] as? JSONArray
}
if itemsObject == nil {
itemsObject = channelObject["items"] as? JSONArray
}
if itemsObject == nil {
itemsObject = parsedObject["items"] as? JSONArray
2017-06-25 19:23:30 +02:00
}
if itemsObject == nil {
throw FeedParserError(.rssItemsNotFound)
}
let title = channelObject["title"] as? String
let homePageURL = channelObject["link"] as? String
let feedURL = parserData.url
let feedDescription = channelObject["description"] as? String
let items = parseItems(itemsObject!, parserData.url)
2017-06-25 19:23:30 +02:00
return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
catch { throw error }
}
}
private extension RSSInJSONParser {
static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> [ParsedItem] {
2017-06-25 19:23:30 +02:00
return itemsObject.flatMap{ (oneItemDictionary) -> ParsedItem? in
2017-06-25 19:23:30 +02:00
return parsedItemWithDictionary(oneItemDictionary, feedURL)
2017-06-25 19:23:30 +02:00
}
}
static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
2017-06-25 19:23:30 +02:00
let externalURL = itemDictionary["link"] as? String
let title = itemDictionary["title"] as? String
var contentHTML = itemDictionary["description"] as? String
var contentText: String? = nil
2017-06-25 19:23:30 +02:00
if contentHTML != nil && !(contentHTML!.contains("<")) {
contentText = contentHTML
contentHTML = nil
}
if contentHTML == nil && contentText == nil && title == nil {
return nil
}
var datePublished: Date? = nil
2017-06-25 19:23:30 +02:00
if let datePublishedString = itemDictionary["pubDate"] as? String {
datePublished = RSDateWithString(datePublishedString)
2017-06-25 19:23:30 +02:00
}
let authors = parseAuthors(itemDictionary)
let tags = parseTags(itemDictionary)
let attachments = parseAttachments(itemDictionary)
2017-06-25 19:23:30 +02:00
var uniqueID: String? = itemDictionary["guid"] as? String
if uniqueID == nil {
// Calculate a uniqueID based on a combination of non-empty elements. Then hash the result.
// Items should have guids. When they don't, re-runs are very likely
// because there's no other 100% reliable way to determine identity.
// This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.)
var s = ""
if let datePublished = datePublished {
s += "\(datePublished.timeIntervalSince1970)"
}
if let title = title {
s += title
}
if let externalURL = externalURL {
s += externalURL
}
if let authorEmailAddress = authors?.first?.emailAddress {
2017-06-25 19:23:30 +02:00
s += authorEmailAddress
}
if let oneAttachmentURL = attachments?.first?.url {
s += oneAttachmentURL
}
if s.isEmpty {
// Sheesh. Tough case.
if let _ = contentHTML {
s = contentHTML!
2017-06-25 19:23:30 +02:00
}
if let _ = contentText {
s = contentText!
2017-06-25 19:23:30 +02:00
}
}
uniqueID = (s as NSString).rsparser_md5Hash()
2017-06-25 19:23:30 +02:00
}
if let uniqueID = uniqueID {
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments)
}
return nil
2017-06-25 19:23:30 +02:00
}
static func parseAuthors(_ itemDictionary: JSONDictionary) -> [ParsedAuthor]? {
guard let authorEmailAddress = itemDictionary["author"] as? String else {
return nil
}
let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
return [parsedAuthor]
}
static func parseTags(_ itemDictionary: JSONDictionary) -> [String]? {
if let categoryObject = itemDictionary["category"] as? JSONDictionary {
if let oneTag = categoryObject["#value"] as? String {
return [oneTag]
}
return nil
}
else if let categoryArray = itemDictionary["category"] as? JSONArray {
return categoryArray.flatMap{ (categoryObject) in
return categoryObject["#value"] as? String
}
}
return nil
}
static func parseAttachments(_ itemDictionary: JSONDictionary) -> [ParsedAttachment]? {
guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else {
return nil
}
guard let attachmentURL = enclosureObject["url"] as? String else {
return nil
}
var attachmentSize = enclosureObject["length"] as? Int
if attachmentSize == nil {
if let attachmentSizeString = enclosureObject["length"] as? String {
attachmentSize = (attachmentSizeString as NSString).integerValue
}
}
let type = enclosureObject["type"] as? String
let oneAttachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil)
return [oneAttachment]
}
2017-06-25 19:23:30 +02:00
}