From 8589c21091015457c4cfdfdccfede02d4a5cde05 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 25 Jun 2017 10:23:30 -0700 Subject: [PATCH] Start RSS-in-JSON parser. --- Frameworks/RSParser/Feeds/FeedParser.swift | 54 +++++- .../RSParser/Feeds/FeedParserError.swift | 26 +++ .../RSParser/Feeds/JSON/JSONTypes.swift | 12 ++ .../RSParser/Feeds/JSON/RSSInJSONParser.swift | 158 ++++++++++++++++++ .../RSParser/Feeds/ParsedAttachment.swift | 9 + Frameworks/RSParser/Feeds/ParsedAuthor.swift | 8 + Frameworks/RSParser/Feeds/ParsedFeed.swift | 17 ++ Frameworks/RSParser/Feeds/ParsedItem.swift | 19 +++ Frameworks/RSParser/RSParser.h | 2 + .../RSParser.xcodeproj/project.pbxproj | 28 ++++ .../RSParser/Utilities/NSData+RSParser.h | 21 +++ .../RSParser/Utilities/NSData+RSParser.m | 132 +++++++++++++++ 12 files changed, 482 insertions(+), 4 deletions(-) create mode 100644 Frameworks/RSParser/Feeds/FeedParserError.swift create mode 100644 Frameworks/RSParser/Feeds/JSON/JSONTypes.swift create mode 100644 Frameworks/RSParser/Feeds/JSON/RSSInJSONParser.swift create mode 100644 Frameworks/RSParser/Utilities/NSData+RSParser.h create mode 100644 Frameworks/RSParser/Utilities/NSData+RSParser.m diff --git a/Frameworks/RSParser/Feeds/FeedParser.swift b/Frameworks/RSParser/Feeds/FeedParser.swift index 4dedd59d7..9a98802f2 100644 --- a/Frameworks/RSParser/Feeds/FeedParser.swift +++ b/Frameworks/RSParser/Feeds/FeedParser.swift @@ -8,19 +8,65 @@ import Foundation +// FeedParser knows about the various syndication feed types. +// It might be a good idea to do a plugin-style architecture here instead — +// but feed formats don’t appear all that often, so it’s probably not necessary. + public struct FeedParser { - static func feedType(parserData: ParserData) -> FeedType { + static let minNumberOfBytesRequired = 128 + public static func feedType(parserData: ParserData) -> FeedType { + + // Can call with partial data — while still downloading, for instance. // If there’s not enough data, return .unknown. Ask again when there’s more data. // If it’s definitely not a feed, return .notAFeed. - return .unknown //stub + if parserData.data.count < minNumberOfBytesRequired { + return .unknown + } + + if parserData.data.isProbablyJSONFeed() { + return .jsonFeed + } + if parserData.data.isProbablyRSSInJSON() { + return .rssInJSON + } + + if parserData.data.isProbablyHTML() { + return .notAFeed + } + + if parserData.data.isProbablyRSS() { + return .rss + } + if parserData.data.isProbablyAtom() { + return .atom + } + + return .notAFeed } - static func parseFeed(parserData: ParserData) throws -> ParsedFeed? { + public static func parseFeed(parserData: ParserData) -> ParsedFeed? { + let type = feedType(parserData) - return nil //stub + switch type { + + case .jsonFeed: + return JSONFeedParser.parse(parserData) + + case .rssInJSON: + return RSSInJSONFeedParser.parse(parserData) + + case .rss: + return RSSParser.parse(parserData) + + case .atom: + return AtomParser.parser(parserData) + + case .unknown, .notAFeed: + return nil + } } } diff --git a/Frameworks/RSParser/Feeds/FeedParserError.swift b/Frameworks/RSParser/Feeds/FeedParserError.swift new file mode 100644 index 000000000..c1edd8900 --- /dev/null +++ b/Frameworks/RSParser/Feeds/FeedParserError.swift @@ -0,0 +1,26 @@ +// +// FeedParserError.swift +// RSParser +// +// Created by Brent Simmons on 6/24/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +public struct FeedParserError: Error { + + public enum FeedParserErrorType { + + case rssChannelNotFound + case rssItemsNotFound + + } + + public let errorType: FeedParserErrorType + + public init(_ errorType: FeedParserErrorType) { + + self.errorType = errorType + } +} diff --git a/Frameworks/RSParser/Feeds/JSON/JSONTypes.swift b/Frameworks/RSParser/Feeds/JSON/JSONTypes.swift new file mode 100644 index 000000000..aa65ae52e --- /dev/null +++ b/Frameworks/RSParser/Feeds/JSON/JSONTypes.swift @@ -0,0 +1,12 @@ +// +// JSONDictionary.swift +// RSParser +// +// Created by Brent Simmons on 6/24/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +typealias JSONDictionary = [String: Any] +typealias JSONArray = [JSONDictionary] diff --git a/Frameworks/RSParser/Feeds/JSON/RSSInJSONParser.swift b/Frameworks/RSParser/Feeds/JSON/RSSInJSONParser.swift new file mode 100644 index 000000000..7e5e4e837 --- /dev/null +++ b/Frameworks/RSParser/Feeds/JSON/RSSInJSONParser.swift @@ -0,0 +1,158 @@ +// +// RSSInJSONParser.swift +// RSParser +// +// Created by Brent Simmons on 6/24/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md +// Also: http://cyber.harvard.edu/rss/rss.html + +public struct RSSInJSONParser { + + public static func parse(parserData: ParserData) throws -> ParsedFeed? { + + do { + let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) + + guard let channelObject = parsedObject["channel"] as? JSONDictionary else { + throw FeedParserError(.rssChannelNotFound) + } + + // I’d bet money that in practice the items array won’t always appear correctly inside the channel object. + // I’d also bet that sometimes it gets called "items" instead of "item". + var itemsObject = channelObject["item"] as? JSONArray + if itemsObject == nil { + itemsObject = parsedObject["item"] as? JSONArray + } + if itemsObject == nil { + itemsObject = channelObject["items"] as? JSONArray + } + if itemsObject == nil { + itemsObject == parsedObject["items"] as? JSONArray + } + if itemsObject == nil { + throw FeedParserError(.rssItemsNotFound) + } + + let title = channelObject["title"] as? String + let homePageURL = channelObject["link"] as? String + let feedURL = parserData.url + let feedDescription = channelObject["description"] as? String + + let items = parseItems(itemsObject) + + return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) + + } + catch { throw error } + } + +} + +private extension RSSInJSONParser { + + static func parseItems(_ itemsObject: JSONArray) -> [ParsedItem] { + + return itemsObject.flatMap{ (oneItemDictionary) -> ParsedItem in + + return parsedItemWithDictionary(oneItemDictionary) + } + } + + static func parsedItemWithDictionary(_ JSONDictionary: itemDictionary) -> ParsedItem? { + + let externalURL = itemDictionary["link"] as? String + let title = itemDictionary["title"] as? String + + var contentHTML = itemDictionary["description"] as? String + var contentText = nil + if contentHTML != nil && !(contentHTML!.contains("<")) { + contentText = contentHTML + contentHTML = nil + } + if contentHTML == nil && contentText == nil && title == nil { + return nil + } + + var datePublished: Date = nil + if let datePublishedString = itemDictionary["pubDate"] as? String { + datePublished = RSDateWithString(datePublishedString as NSString) + } + + let authorEmailAddress = itemDictionary["author"] as? String + var authors: [ParsedAuthor] = nil + if authorEmailAddress != nil { + let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) + authors = [parsedAuthor] + } + + var tags: [String]? = nil + if let categoryObject = itemDictionary["category"] as? JSONDictionary { + if let oneTag = categoryObject["#value"] { + tags = [oneTag] + } + } + else if let categoryArray = itemDictionary["category"] as? JSONArray { + tags = categoryArray.flatMap{ (oneCategoryDictionary) in + return oneCategoryDictionary["#value"] + } + } + + var attachments: [ParsedAttachment]? = nil + if let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary { + if let attachmentURL = enclosureObject["url"] as? String { + var attachmentSize = enclosureObject["length"] as? Int + if attachmentSize == nil { + if let attachmentSizeString = enclosureObject["length"] as? String { + attachmentSize = (attachmentSizeString as NSString).integerValue + } + } + let type = enclosureObject["type"] as? String + let oneAttachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) + attachments = [oneAttachment] + } + } + + var uniqueID: String? = itemDictionary["guid"] as? String + if uniqueID == nil { + + // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result. + // Items should have guids. When they don't, re-runs are very likely + // because there's no other 100% reliable way to determine identity. + // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.) + + var s = "" + if let datePublished = datePublished { + s += "\(datePublished.timeIntervalSince1970)" + } + if let title = title { + s += title + } + if let externalURL = externalURL { + s += externalURL + } + if let authorEmailAddress = authorEmailAddress { + s += authorEmailAddress + } + if let oneAttachmentURL = attachments?.first?.url { + s += oneAttachmentURL + } + if s.isEmpty { + // Sheesh. Tough case. + if contentHTML != nil { + s = contentHTML + } + if contentText != nil { + s = contentText + } + } + uniqueID = (s as NSString).rsxml_md5HashString() + } + + return ParsedItem(uniqueID: uniqueID, url: nil, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments) + } +} diff --git a/Frameworks/RSParser/Feeds/ParsedAttachment.swift b/Frameworks/RSParser/Feeds/ParsedAttachment.swift index 60989fda6..e3d369659 100644 --- a/Frameworks/RSParser/Feeds/ParsedAttachment.swift +++ b/Frameworks/RSParser/Feeds/ParsedAttachment.swift @@ -15,4 +15,13 @@ public struct ParsedAttachment { public let title: String? public let sizeInBytes: Int? public let durationInSeconds: Int? + + init(url: String?, mimeType: String?, title: String?, sizeInBytes: Int?, durationInSeconds: Int?) { + + self.url = url + self.mimeType = mimeType + self.title = title + self.sizeInBytes = sizeInBytes + self.durationInSeconds = durationInSeconds + } } diff --git a/Frameworks/RSParser/Feeds/ParsedAuthor.swift b/Frameworks/RSParser/Feeds/ParsedAuthor.swift index 16ff631d4..ad5041889 100644 --- a/Frameworks/RSParser/Feeds/ParsedAuthor.swift +++ b/Frameworks/RSParser/Feeds/ParsedAuthor.swift @@ -14,4 +14,12 @@ public struct ParsedAuthor { public let url: String? public let avatarURL: String? public let emailAddress: String? + + init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) { + + self.name = name + self.url = url + self.avatarURL = avatarURL + self.emailAddress = emailAddress + } } diff --git a/Frameworks/RSParser/Feeds/ParsedFeed.swift b/Frameworks/RSParser/Feeds/ParsedFeed.swift index 4a98bbae0..15a8686bb 100644 --- a/Frameworks/RSParser/Feeds/ParsedFeed.swift +++ b/Frameworks/RSParser/Feeds/ParsedFeed.swift @@ -22,4 +22,21 @@ public struct ParsedFeed { public let expired: Bool public let hubs: [ParsedHub]? public let items: [ParsedItem] + + init(type: FeedType, title: String?, homePageURL: String?, feedURL: String?, feedDescription: String?, nextURL: String?, iconURL: String?, faviconURL: String?, authors: [ParsedAuthor]?, expired: Bool, hubs: [ParsedHub]?, items:[ParsedItem]) { + + self.type = type + self.title = title + self.homePageURL = homePageURL + self.feedURL = feedURL + self.feedDescription = feedDescription + self.nextURL = nextURL + self.iconURL = iconURL + self.faviconURL = faviconURL + self.authors = authors + self.expired = expired + self.hubs = hubs + self.items = items + } } + diff --git a/Frameworks/RSParser/Feeds/ParsedItem.swift b/Frameworks/RSParser/Feeds/ParsedItem.swift index 090d9c563..24600c6c1 100644 --- a/Frameworks/RSParser/Feeds/ParsedItem.swift +++ b/Frameworks/RSParser/Feeds/ParsedItem.swift @@ -24,4 +24,23 @@ public struct ParsedItem { public let authors: [ParsedAuthor]? public let tags: [String]? public let attachments: [ParsedAttachment]? + + init(uniqueID: String?, url: String?, externalURL: String?, title: String?, contentHTML: String?, contentText: String?, summary: String?, imageURL: String?, bannerImageURL: String?, datePublished: Date?, dateModified: Date?, authors: [ParsedAuthor]?, tags: [String]?, attachments: [ParsedAttachment]?) { + + self.uniqueID = uniqueID + self.url = url + self.externalURL = externalURL + self.title = title + self.contentHTML = contentHTML + self.contentText = contentText + self.summary = summary + self.imageURL = imageURL + self.bannerImageURL = bannerImageURL + self.datePublished = datePublished + self.dateModified = dateModified + self.authors = authors + self.tags = tags + self.attachments = attachments + } } + diff --git a/Frameworks/RSParser/RSParser.h b/Frameworks/RSParser/RSParser.h index 69a75e5f0..2a6a43913 100644 --- a/Frameworks/RSParser/RSParser.h +++ b/Frameworks/RSParser/RSParser.h @@ -8,6 +8,8 @@ @import Foundation; +#import +#import //#import //#import diff --git a/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj b/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj index a6d9506be..c34d80191 100644 --- a/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj +++ b/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj @@ -45,6 +45,11 @@ 84469D2F1EFA3134004A6B28 /* RSRSSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D251EFA3134004A6B28 /* RSRSSParser.h */; }; 84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D261EFA3134004A6B28 /* RSRSSParser.m */; }; 84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D311EFA31CF004A6B28 /* FeedParser.swift */; }; + 84469D351EFF1190004A6B28 /* NSData+RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D331EFF1190004A6B28 /* NSData+RSParser.h */; }; + 84469D361EFF1190004A6B28 /* NSData+RSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D341EFF1190004A6B28 /* NSData+RSParser.m */; }; + 84469D381EFF2645004A6B28 /* RSSInJSONParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D371EFF2645004A6B28 /* RSSInJSONParser.swift */; }; + 84469D401EFF29A9004A6B28 /* FeedParserError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D3F1EFF29A9004A6B28 /* FeedParserError.swift */; }; + 84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D411EFF2B2D004A6B28 /* JSONTypes.swift */; }; 84D81BDC1EFA28E700652332 /* RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84D81BDA1EFA28E700652332 /* RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */; }; 84D81BE01EFA2BAE00652332 /* FeedType.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDF1EFA2BAE00652332 /* FeedType.swift */; }; @@ -105,6 +110,11 @@ 84469D251EFA3134004A6B28 /* RSRSSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSRSSParser.h; sourceTree = ""; }; 84469D261EFA3134004A6B28 /* RSRSSParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSRSSParser.m; sourceTree = ""; }; 84469D311EFA31CF004A6B28 /* FeedParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = FeedParser.swift; path = Feeds/FeedParser.swift; sourceTree = ""; }; + 84469D331EFF1190004A6B28 /* NSData+RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSData+RSParser.h"; sourceTree = ""; }; + 84469D341EFF1190004A6B28 /* NSData+RSParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSData+RSParser.m"; sourceTree = ""; }; + 84469D371EFF2645004A6B28 /* RSSInJSONParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = RSSInJSONParser.swift; path = Feeds/JSON/RSSInJSONParser.swift; sourceTree = ""; }; + 84469D3F1EFF29A9004A6B28 /* FeedParserError.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = FeedParserError.swift; path = Feeds/FeedParserError.swift; sourceTree = ""; }; + 84469D411EFF2B2D004A6B28 /* JSONTypes.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = JSONTypes.swift; path = Feeds/JSON/JSONTypes.swift; sourceTree = ""; }; 84D81BD91EFA28E700652332 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 84D81BDA1EFA28E700652332 /* RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParser.h; sourceTree = ""; }; 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = ParsedFeed.swift; path = Feeds/ParsedFeed.swift; sourceTree = ""; }; @@ -148,6 +158,8 @@ 84D81BE31EFA2D3D00652332 /* ParsedItem.swift */, 84D81BE51EFA2DFB00652332 /* ParsedAttachment.swift */, 84D81BE71EFA2E6700652332 /* ParsedHub.swift */, + 84469D3F1EFF29A9004A6B28 /* FeedParserError.swift */, + 84469D391EFF2649004A6B28 /* JSON */, 84469D1C1EFA3134004A6B28 /* XML */, ); name = Feeds; @@ -197,6 +209,8 @@ 84469D0F1EFA30A2004A6B28 /* Utilities */ = { isa = PBXGroup; children = ( + 84469D331EFF1190004A6B28 /* NSData+RSParser.h */, + 84469D341EFF1190004A6B28 /* NSData+RSParser.m */, 84469D101EFA30A2004A6B28 /* NSString+RSXML.h */, 84469D111EFA30A2004A6B28 /* NSString+RSXML.m */, 84469D121EFA30A2004A6B28 /* RSDateParser.h */, @@ -225,6 +239,15 @@ path = Feeds/XML; sourceTree = ""; }; + 84469D391EFF2649004A6B28 /* JSON */ = { + isa = PBXGroup; + children = ( + 84469D411EFF2B2D004A6B28 /* JSONTypes.swift */, + 84469D371EFF2645004A6B28 /* RSSInJSONParser.swift */, + ); + name = JSON; + sourceTree = ""; + }; 84FF5F7A1EFA285800C15A01 = { isa = PBXGroup; children = ( @@ -273,6 +296,7 @@ 84469D2D1EFA3134004A6B28 /* RSParsedFeed.h in Headers */, 84469D181EFA30A2004A6B28 /* RSDateParser.h in Headers */, 84469D1A1EFA30A2004A6B28 /* RSXMLInternal.h in Headers */, + 84469D351EFF1190004A6B28 /* NSData+RSParser.h in Headers */, 84D81BDC1EFA28E700652332 /* RSParser.h in Headers */, 84469D0B1EFA307E004A6B28 /* RSHTMLMetadataParser.h in Headers */, 84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */, @@ -392,6 +416,7 @@ 84469D0E1EFA307E004A6B28 /* RSSAXHTMLParser.m in Sources */, 84469CF41EFA3000004A6B28 /* RSOPMLFeedSpecifier.m in Sources */, 84469CF01EFA3000004A6B28 /* RSOPMLAttributes.m in Sources */, + 84469D381EFF2645004A6B28 /* RSSInJSONParser.swift in Sources */, 84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */, 84469D191EFA30A2004A6B28 /* RSDateParser.m in Sources */, 84469CFD1EFA3069004A6B28 /* RSSAXParser.m in Sources */, @@ -400,14 +425,17 @@ 84469CF61EFA3000004A6B28 /* RSOPMLItem.m in Sources */, 84469D2A1EFA3134004A6B28 /* RSFeedParser.m in Sources */, 84D81BE41EFA2D3D00652332 /* ParsedItem.swift in Sources */, + 84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */, 84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */, 84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */, 84469D171EFA30A2004A6B28 /* NSString+RSXML.m in Sources */, 84469D2C1EFA3134004A6B28 /* RSParsedArticle.m in Sources */, 84469D2E1EFA3134004A6B28 /* RSParsedFeed.m in Sources */, 84469CF81EFA3000004A6B28 /* RSOPMLParser.m in Sources */, + 84469D401EFF29A9004A6B28 /* FeedParserError.swift in Sources */, 84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */, 84469D281EFA3134004A6B28 /* RSAtomParser.m in Sources */, + 84469D361EFF1190004A6B28 /* NSData+RSParser.m in Sources */, 84D81BE61EFA2DFB00652332 /* ParsedAttachment.swift in Sources */, 84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */, 84D81BE81EFA2E6700652332 /* ParsedHub.swift in Sources */, diff --git a/Frameworks/RSParser/Utilities/NSData+RSParser.h b/Frameworks/RSParser/Utilities/NSData+RSParser.h new file mode 100644 index 000000000..f661dcea8 --- /dev/null +++ b/Frameworks/RSParser/Utilities/NSData+RSParser.h @@ -0,0 +1,21 @@ +// +// NSData+RSParser.h +// RSParser +// +// Created by Brent Simmons on 6/24/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + + +@interface NSData (RSParser) + +- (BOOL)isProbablyHTML; +- (BOOL)isProbablyXML; +- (BOOL)isProbablyJSON; + +@end + + + diff --git a/Frameworks/RSParser/Utilities/NSData+RSParser.m b/Frameworks/RSParser/Utilities/NSData+RSParser.m new file mode 100644 index 000000000..ef7378478 --- /dev/null +++ b/Frameworks/RSParser/Utilities/NSData+RSParser.m @@ -0,0 +1,132 @@ +// +// NSData+RSParser.m +// RSParser +// +// Created by Brent Simmons on 6/24/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +#import "NSData+RSParser.h" + +/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/ + +static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes); +static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes); +static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes); + +@implementation NSData (RSParser) + +- (BOOL)isProbablyHTML { + + return bytesAreProbablyHTML(self.bytes, self.length); +} + +- (BOOL)isProbablyXML { + + return bytesAreProbablyXML(self.bytes, self.length); +} + +- (BOOL)isProbablyJSON { + + return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length); +} + +- (BOOL)isProbablyJSONFeed { + + if (![self isProbablyJSON]) { + return NO; + } + return didFindString("https://jsonfeed.org/version/", self.bytes, self.length); +} + +- (BOOL)isProbablyRSSInJSONFeed { + + if (![self isProbablyJSON]) { + return NO; + } + const char *bytes = self.bytes; + NSUInteger length = self.length; + return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length); +} + +- (BOOL)isProbablyRSS { + + if (![self isProbablyXML]) { + return NO; + } + + return didFindString("