Start RSS-in-JSON parser.
This commit is contained in:
parent
552ab693a3
commit
8589c21091
|
@ -8,19 +8,65 @@
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
|
// FeedParser knows about the various syndication feed types.
|
||||||
|
// It might be a good idea to do a plugin-style architecture here instead —
|
||||||
|
// but feed formats don’t appear all that often, so it’s probably not necessary.
|
||||||
|
|
||||||
public struct FeedParser {
|
public struct FeedParser {
|
||||||
|
|
||||||
static func feedType(parserData: ParserData) -> FeedType {
|
static let minNumberOfBytesRequired = 128
|
||||||
|
|
||||||
|
public static func feedType(parserData: ParserData) -> FeedType {
|
||||||
|
|
||||||
|
// Can call with partial data — while still downloading, for instance.
|
||||||
// If there’s not enough data, return .unknown. Ask again when there’s more data.
|
// If there’s not enough data, return .unknown. Ask again when there’s more data.
|
||||||
// If it’s definitely not a feed, return .notAFeed.
|
// If it’s definitely not a feed, return .notAFeed.
|
||||||
|
|
||||||
return .unknown //stub
|
if parserData.data.count < minNumberOfBytesRequired {
|
||||||
|
return .unknown
|
||||||
|
}
|
||||||
|
|
||||||
|
if parserData.data.isProbablyJSONFeed() {
|
||||||
|
return .jsonFeed
|
||||||
|
}
|
||||||
|
if parserData.data.isProbablyRSSInJSON() {
|
||||||
|
return .rssInJSON
|
||||||
|
}
|
||||||
|
|
||||||
|
if parserData.data.isProbablyHTML() {
|
||||||
|
return .notAFeed
|
||||||
|
}
|
||||||
|
|
||||||
|
if parserData.data.isProbablyRSS() {
|
||||||
|
return .rss
|
||||||
|
}
|
||||||
|
if parserData.data.isProbablyAtom() {
|
||||||
|
return .atom
|
||||||
|
}
|
||||||
|
|
||||||
|
return .notAFeed
|
||||||
}
|
}
|
||||||
|
|
||||||
static func parseFeed(parserData: ParserData) throws -> ParsedFeed? {
|
public static func parseFeed(parserData: ParserData) -> ParsedFeed? {
|
||||||
|
|
||||||
|
let type = feedType(parserData)
|
||||||
|
|
||||||
return nil //stub
|
switch type {
|
||||||
|
|
||||||
|
case .jsonFeed:
|
||||||
|
return JSONFeedParser.parse(parserData)
|
||||||
|
|
||||||
|
case .rssInJSON:
|
||||||
|
return RSSInJSONFeedParser.parse(parserData)
|
||||||
|
|
||||||
|
case .rss:
|
||||||
|
return RSSParser.parse(parserData)
|
||||||
|
|
||||||
|
case .atom:
|
||||||
|
return AtomParser.parser(parserData)
|
||||||
|
|
||||||
|
case .unknown, .notAFeed:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
//
|
||||||
|
// FeedParserError.swift
|
||||||
|
// RSParser
|
||||||
|
//
|
||||||
|
// Created by Brent Simmons on 6/24/17.
|
||||||
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
public struct FeedParserError: Error {
|
||||||
|
|
||||||
|
public enum FeedParserErrorType {
|
||||||
|
|
||||||
|
case rssChannelNotFound
|
||||||
|
case rssItemsNotFound
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public let errorType: FeedParserErrorType
|
||||||
|
|
||||||
|
public init(_ errorType: FeedParserErrorType) {
|
||||||
|
|
||||||
|
self.errorType = errorType
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
//
|
||||||
|
// JSONDictionary.swift
|
||||||
|
// RSParser
|
||||||
|
//
|
||||||
|
// Created by Brent Simmons on 6/24/17.
|
||||||
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
typealias JSONDictionary = [String: Any]
|
||||||
|
typealias JSONArray = [JSONDictionary]
|
|
@ -0,0 +1,158 @@
|
||||||
|
//
|
||||||
|
// RSSInJSONParser.swift
|
||||||
|
// RSParser
|
||||||
|
//
|
||||||
|
// Created by Brent Simmons on 6/24/17.
|
||||||
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
|
||||||
|
// Also: http://cyber.harvard.edu/rss/rss.html
|
||||||
|
|
||||||
|
public struct RSSInJSONParser {
|
||||||
|
|
||||||
|
public static func parse(parserData: ParserData) throws -> ParsedFeed? {
|
||||||
|
|
||||||
|
do {
|
||||||
|
let parsedObject = try JSONSerialization.jsonObject(with: parserData.data)
|
||||||
|
|
||||||
|
guard let channelObject = parsedObject["channel"] as? JSONDictionary else {
|
||||||
|
throw FeedParserError(.rssChannelNotFound)
|
||||||
|
}
|
||||||
|
|
||||||
|
// I’d bet money that in practice the items array won’t always appear correctly inside the channel object.
|
||||||
|
// I’d also bet that sometimes it gets called "items" instead of "item".
|
||||||
|
var itemsObject = channelObject["item"] as? JSONArray
|
||||||
|
if itemsObject == nil {
|
||||||
|
itemsObject = parsedObject["item"] as? JSONArray
|
||||||
|
}
|
||||||
|
if itemsObject == nil {
|
||||||
|
itemsObject = channelObject["items"] as? JSONArray
|
||||||
|
}
|
||||||
|
if itemsObject == nil {
|
||||||
|
itemsObject == parsedObject["items"] as? JSONArray
|
||||||
|
}
|
||||||
|
if itemsObject == nil {
|
||||||
|
throw FeedParserError(.rssItemsNotFound)
|
||||||
|
}
|
||||||
|
|
||||||
|
let title = channelObject["title"] as? String
|
||||||
|
let homePageURL = channelObject["link"] as? String
|
||||||
|
let feedURL = parserData.url
|
||||||
|
let feedDescription = channelObject["description"] as? String
|
||||||
|
|
||||||
|
let items = parseItems(itemsObject)
|
||||||
|
|
||||||
|
return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||||
|
|
||||||
|
}
|
||||||
|
catch { throw error }
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private extension RSSInJSONParser {
|
||||||
|
|
||||||
|
static func parseItems(_ itemsObject: JSONArray) -> [ParsedItem] {
|
||||||
|
|
||||||
|
return itemsObject.flatMap{ (oneItemDictionary) -> ParsedItem in
|
||||||
|
|
||||||
|
return parsedItemWithDictionary(oneItemDictionary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static func parsedItemWithDictionary(_ JSONDictionary: itemDictionary) -> ParsedItem? {
|
||||||
|
|
||||||
|
let externalURL = itemDictionary["link"] as? String
|
||||||
|
let title = itemDictionary["title"] as? String
|
||||||
|
|
||||||
|
var contentHTML = itemDictionary["description"] as? String
|
||||||
|
var contentText = nil
|
||||||
|
if contentHTML != nil && !(contentHTML!.contains("<")) {
|
||||||
|
contentText = contentHTML
|
||||||
|
contentHTML = nil
|
||||||
|
}
|
||||||
|
if contentHTML == nil && contentText == nil && title == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var datePublished: Date = nil
|
||||||
|
if let datePublishedString = itemDictionary["pubDate"] as? String {
|
||||||
|
datePublished = RSDateWithString(datePublishedString as NSString)
|
||||||
|
}
|
||||||
|
|
||||||
|
let authorEmailAddress = itemDictionary["author"] as? String
|
||||||
|
var authors: [ParsedAuthor] = nil
|
||||||
|
if authorEmailAddress != nil {
|
||||||
|
let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
|
||||||
|
authors = [parsedAuthor]
|
||||||
|
}
|
||||||
|
|
||||||
|
var tags: [String]? = nil
|
||||||
|
if let categoryObject = itemDictionary["category"] as? JSONDictionary {
|
||||||
|
if let oneTag = categoryObject["#value"] {
|
||||||
|
tags = [oneTag]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if let categoryArray = itemDictionary["category"] as? JSONArray {
|
||||||
|
tags = categoryArray.flatMap{ (oneCategoryDictionary) in
|
||||||
|
return oneCategoryDictionary["#value"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var attachments: [ParsedAttachment]? = nil
|
||||||
|
if let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary {
|
||||||
|
if let attachmentURL = enclosureObject["url"] as? String {
|
||||||
|
var attachmentSize = enclosureObject["length"] as? Int
|
||||||
|
if attachmentSize == nil {
|
||||||
|
if let attachmentSizeString = enclosureObject["length"] as? String {
|
||||||
|
attachmentSize = (attachmentSizeString as NSString).integerValue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let type = enclosureObject["type"] as? String
|
||||||
|
let oneAttachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil)
|
||||||
|
attachments = [oneAttachment]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var uniqueID: String? = itemDictionary["guid"] as? String
|
||||||
|
if uniqueID == nil {
|
||||||
|
|
||||||
|
// Calculate a uniqueID based on a combination of non-empty elements. Then hash the result.
|
||||||
|
// Items should have guids. When they don't, re-runs are very likely
|
||||||
|
// because there's no other 100% reliable way to determine identity.
|
||||||
|
// This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.)
|
||||||
|
|
||||||
|
var s = ""
|
||||||
|
if let datePublished = datePublished {
|
||||||
|
s += "\(datePublished.timeIntervalSince1970)"
|
||||||
|
}
|
||||||
|
if let title = title {
|
||||||
|
s += title
|
||||||
|
}
|
||||||
|
if let externalURL = externalURL {
|
||||||
|
s += externalURL
|
||||||
|
}
|
||||||
|
if let authorEmailAddress = authorEmailAddress {
|
||||||
|
s += authorEmailAddress
|
||||||
|
}
|
||||||
|
if let oneAttachmentURL = attachments?.first?.url {
|
||||||
|
s += oneAttachmentURL
|
||||||
|
}
|
||||||
|
if s.isEmpty {
|
||||||
|
// Sheesh. Tough case.
|
||||||
|
if contentHTML != nil {
|
||||||
|
s = contentHTML
|
||||||
|
}
|
||||||
|
if contentText != nil {
|
||||||
|
s = contentText
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uniqueID = (s as NSString).rsxml_md5HashString()
|
||||||
|
}
|
||||||
|
|
||||||
|
return ParsedItem(uniqueID: uniqueID, url: nil, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments)
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,4 +15,13 @@ public struct ParsedAttachment {
|
||||||
public let title: String?
|
public let title: String?
|
||||||
public let sizeInBytes: Int?
|
public let sizeInBytes: Int?
|
||||||
public let durationInSeconds: Int?
|
public let durationInSeconds: Int?
|
||||||
|
|
||||||
|
init(url: String?, mimeType: String?, title: String?, sizeInBytes: Int?, durationInSeconds: Int?) {
|
||||||
|
|
||||||
|
self.url = url
|
||||||
|
self.mimeType = mimeType
|
||||||
|
self.title = title
|
||||||
|
self.sizeInBytes = sizeInBytes
|
||||||
|
self.durationInSeconds = durationInSeconds
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,4 +14,12 @@ public struct ParsedAuthor {
|
||||||
public let url: String?
|
public let url: String?
|
||||||
public let avatarURL: String?
|
public let avatarURL: String?
|
||||||
public let emailAddress: String?
|
public let emailAddress: String?
|
||||||
|
|
||||||
|
init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) {
|
||||||
|
|
||||||
|
self.name = name
|
||||||
|
self.url = url
|
||||||
|
self.avatarURL = avatarURL
|
||||||
|
self.emailAddress = emailAddress
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,4 +22,21 @@ public struct ParsedFeed {
|
||||||
public let expired: Bool
|
public let expired: Bool
|
||||||
public let hubs: [ParsedHub]?
|
public let hubs: [ParsedHub]?
|
||||||
public let items: [ParsedItem]
|
public let items: [ParsedItem]
|
||||||
|
|
||||||
|
init(type: FeedType, title: String?, homePageURL: String?, feedURL: String?, feedDescription: String?, nextURL: String?, iconURL: String?, faviconURL: String?, authors: [ParsedAuthor]?, expired: Bool, hubs: [ParsedHub]?, items:[ParsedItem]) {
|
||||||
|
|
||||||
|
self.type = type
|
||||||
|
self.title = title
|
||||||
|
self.homePageURL = homePageURL
|
||||||
|
self.feedURL = feedURL
|
||||||
|
self.feedDescription = feedDescription
|
||||||
|
self.nextURL = nextURL
|
||||||
|
self.iconURL = iconURL
|
||||||
|
self.faviconURL = faviconURL
|
||||||
|
self.authors = authors
|
||||||
|
self.expired = expired
|
||||||
|
self.hubs = hubs
|
||||||
|
self.items = items
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,4 +24,23 @@ public struct ParsedItem {
|
||||||
public let authors: [ParsedAuthor]?
|
public let authors: [ParsedAuthor]?
|
||||||
public let tags: [String]?
|
public let tags: [String]?
|
||||||
public let attachments: [ParsedAttachment]?
|
public let attachments: [ParsedAttachment]?
|
||||||
|
|
||||||
|
init(uniqueID: String?, url: String?, externalURL: String?, title: String?, contentHTML: String?, contentText: String?, summary: String?, imageURL: String?, bannerImageURL: String?, datePublished: Date?, dateModified: Date?, authors: [ParsedAuthor]?, tags: [String]?, attachments: [ParsedAttachment]?) {
|
||||||
|
|
||||||
|
self.uniqueID = uniqueID
|
||||||
|
self.url = url
|
||||||
|
self.externalURL = externalURL
|
||||||
|
self.title = title
|
||||||
|
self.contentHTML = contentHTML
|
||||||
|
self.contentText = contentText
|
||||||
|
self.summary = summary
|
||||||
|
self.imageURL = imageURL
|
||||||
|
self.bannerImageURL = bannerImageURL
|
||||||
|
self.datePublished = datePublished
|
||||||
|
self.dateModified = dateModified
|
||||||
|
self.authors = authors
|
||||||
|
self.tags = tags
|
||||||
|
self.attachments = attachments
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,8 @@
|
||||||
|
|
||||||
@import Foundation;
|
@import Foundation;
|
||||||
|
|
||||||
|
#import <RSParser/NSData+RSParser.h>
|
||||||
|
#import <RSParser/RSParser.h>
|
||||||
|
|
||||||
//#import <RSXML/RSSAXParser.h>
|
//#import <RSXML/RSSAXParser.h>
|
||||||
//#import <RSXML/RSXMLData.h>
|
//#import <RSXML/RSXMLData.h>
|
||||||
|
|
|
@ -45,6 +45,11 @@
|
||||||
84469D2F1EFA3134004A6B28 /* RSRSSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D251EFA3134004A6B28 /* RSRSSParser.h */; };
|
84469D2F1EFA3134004A6B28 /* RSRSSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D251EFA3134004A6B28 /* RSRSSParser.h */; };
|
||||||
84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D261EFA3134004A6B28 /* RSRSSParser.m */; };
|
84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D261EFA3134004A6B28 /* RSRSSParser.m */; };
|
||||||
84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D311EFA31CF004A6B28 /* FeedParser.swift */; };
|
84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D311EFA31CF004A6B28 /* FeedParser.swift */; };
|
||||||
|
84469D351EFF1190004A6B28 /* NSData+RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D331EFF1190004A6B28 /* NSData+RSParser.h */; };
|
||||||
|
84469D361EFF1190004A6B28 /* NSData+RSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D341EFF1190004A6B28 /* NSData+RSParser.m */; };
|
||||||
|
84469D381EFF2645004A6B28 /* RSSInJSONParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D371EFF2645004A6B28 /* RSSInJSONParser.swift */; };
|
||||||
|
84469D401EFF29A9004A6B28 /* FeedParserError.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D3F1EFF29A9004A6B28 /* FeedParserError.swift */; };
|
||||||
|
84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D411EFF2B2D004A6B28 /* JSONTypes.swift */; };
|
||||||
84D81BDC1EFA28E700652332 /* RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84D81BDA1EFA28E700652332 /* RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
84D81BDC1EFA28E700652332 /* RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84D81BDA1EFA28E700652332 /* RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||||
84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */; };
|
84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */; };
|
||||||
84D81BE01EFA2BAE00652332 /* FeedType.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDF1EFA2BAE00652332 /* FeedType.swift */; };
|
84D81BE01EFA2BAE00652332 /* FeedType.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDF1EFA2BAE00652332 /* FeedType.swift */; };
|
||||||
|
@ -105,6 +110,11 @@
|
||||||
84469D251EFA3134004A6B28 /* RSRSSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSRSSParser.h; sourceTree = "<group>"; };
|
84469D251EFA3134004A6B28 /* RSRSSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSRSSParser.h; sourceTree = "<group>"; };
|
||||||
84469D261EFA3134004A6B28 /* RSRSSParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSRSSParser.m; sourceTree = "<group>"; };
|
84469D261EFA3134004A6B28 /* RSRSSParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSRSSParser.m; sourceTree = "<group>"; };
|
||||||
84469D311EFA31CF004A6B28 /* FeedParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = FeedParser.swift; path = Feeds/FeedParser.swift; sourceTree = "<group>"; };
|
84469D311EFA31CF004A6B28 /* FeedParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = FeedParser.swift; path = Feeds/FeedParser.swift; sourceTree = "<group>"; };
|
||||||
|
84469D331EFF1190004A6B28 /* NSData+RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSData+RSParser.h"; sourceTree = "<group>"; };
|
||||||
|
84469D341EFF1190004A6B28 /* NSData+RSParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSData+RSParser.m"; sourceTree = "<group>"; };
|
||||||
|
84469D371EFF2645004A6B28 /* RSSInJSONParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = RSSInJSONParser.swift; path = Feeds/JSON/RSSInJSONParser.swift; sourceTree = "<group>"; };
|
||||||
|
84469D3F1EFF29A9004A6B28 /* FeedParserError.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = FeedParserError.swift; path = Feeds/FeedParserError.swift; sourceTree = "<group>"; };
|
||||||
|
84469D411EFF2B2D004A6B28 /* JSONTypes.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = JSONTypes.swift; path = Feeds/JSON/JSONTypes.swift; sourceTree = "<group>"; };
|
||||||
84D81BD91EFA28E700652332 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
84D81BD91EFA28E700652332 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||||
84D81BDA1EFA28E700652332 /* RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParser.h; sourceTree = "<group>"; };
|
84D81BDA1EFA28E700652332 /* RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParser.h; sourceTree = "<group>"; };
|
||||||
84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = ParsedFeed.swift; path = Feeds/ParsedFeed.swift; sourceTree = "<group>"; };
|
84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = ParsedFeed.swift; path = Feeds/ParsedFeed.swift; sourceTree = "<group>"; };
|
||||||
|
@ -148,6 +158,8 @@
|
||||||
84D81BE31EFA2D3D00652332 /* ParsedItem.swift */,
|
84D81BE31EFA2D3D00652332 /* ParsedItem.swift */,
|
||||||
84D81BE51EFA2DFB00652332 /* ParsedAttachment.swift */,
|
84D81BE51EFA2DFB00652332 /* ParsedAttachment.swift */,
|
||||||
84D81BE71EFA2E6700652332 /* ParsedHub.swift */,
|
84D81BE71EFA2E6700652332 /* ParsedHub.swift */,
|
||||||
|
84469D3F1EFF29A9004A6B28 /* FeedParserError.swift */,
|
||||||
|
84469D391EFF2649004A6B28 /* JSON */,
|
||||||
84469D1C1EFA3134004A6B28 /* XML */,
|
84469D1C1EFA3134004A6B28 /* XML */,
|
||||||
);
|
);
|
||||||
name = Feeds;
|
name = Feeds;
|
||||||
|
@ -197,6 +209,8 @@
|
||||||
84469D0F1EFA30A2004A6B28 /* Utilities */ = {
|
84469D0F1EFA30A2004A6B28 /* Utilities */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
84469D331EFF1190004A6B28 /* NSData+RSParser.h */,
|
||||||
|
84469D341EFF1190004A6B28 /* NSData+RSParser.m */,
|
||||||
84469D101EFA30A2004A6B28 /* NSString+RSXML.h */,
|
84469D101EFA30A2004A6B28 /* NSString+RSXML.h */,
|
||||||
84469D111EFA30A2004A6B28 /* NSString+RSXML.m */,
|
84469D111EFA30A2004A6B28 /* NSString+RSXML.m */,
|
||||||
84469D121EFA30A2004A6B28 /* RSDateParser.h */,
|
84469D121EFA30A2004A6B28 /* RSDateParser.h */,
|
||||||
|
@ -225,6 +239,15 @@
|
||||||
path = Feeds/XML;
|
path = Feeds/XML;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
|
84469D391EFF2649004A6B28 /* JSON */ = {
|
||||||
|
isa = PBXGroup;
|
||||||
|
children = (
|
||||||
|
84469D411EFF2B2D004A6B28 /* JSONTypes.swift */,
|
||||||
|
84469D371EFF2645004A6B28 /* RSSInJSONParser.swift */,
|
||||||
|
);
|
||||||
|
name = JSON;
|
||||||
|
sourceTree = "<group>";
|
||||||
|
};
|
||||||
84FF5F7A1EFA285800C15A01 = {
|
84FF5F7A1EFA285800C15A01 = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
@ -273,6 +296,7 @@
|
||||||
84469D2D1EFA3134004A6B28 /* RSParsedFeed.h in Headers */,
|
84469D2D1EFA3134004A6B28 /* RSParsedFeed.h in Headers */,
|
||||||
84469D181EFA30A2004A6B28 /* RSDateParser.h in Headers */,
|
84469D181EFA30A2004A6B28 /* RSDateParser.h in Headers */,
|
||||||
84469D1A1EFA30A2004A6B28 /* RSXMLInternal.h in Headers */,
|
84469D1A1EFA30A2004A6B28 /* RSXMLInternal.h in Headers */,
|
||||||
|
84469D351EFF1190004A6B28 /* NSData+RSParser.h in Headers */,
|
||||||
84D81BDC1EFA28E700652332 /* RSParser.h in Headers */,
|
84D81BDC1EFA28E700652332 /* RSParser.h in Headers */,
|
||||||
84469D0B1EFA307E004A6B28 /* RSHTMLMetadataParser.h in Headers */,
|
84469D0B1EFA307E004A6B28 /* RSHTMLMetadataParser.h in Headers */,
|
||||||
84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */,
|
84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */,
|
||||||
|
@ -392,6 +416,7 @@
|
||||||
84469D0E1EFA307E004A6B28 /* RSSAXHTMLParser.m in Sources */,
|
84469D0E1EFA307E004A6B28 /* RSSAXHTMLParser.m in Sources */,
|
||||||
84469CF41EFA3000004A6B28 /* RSOPMLFeedSpecifier.m in Sources */,
|
84469CF41EFA3000004A6B28 /* RSOPMLFeedSpecifier.m in Sources */,
|
||||||
84469CF01EFA3000004A6B28 /* RSOPMLAttributes.m in Sources */,
|
84469CF01EFA3000004A6B28 /* RSOPMLAttributes.m in Sources */,
|
||||||
|
84469D381EFF2645004A6B28 /* RSSInJSONParser.swift in Sources */,
|
||||||
84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */,
|
84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */,
|
||||||
84469D191EFA30A2004A6B28 /* RSDateParser.m in Sources */,
|
84469D191EFA30A2004A6B28 /* RSDateParser.m in Sources */,
|
||||||
84469CFD1EFA3069004A6B28 /* RSSAXParser.m in Sources */,
|
84469CFD1EFA3069004A6B28 /* RSSAXParser.m in Sources */,
|
||||||
|
@ -400,14 +425,17 @@
|
||||||
84469CF61EFA3000004A6B28 /* RSOPMLItem.m in Sources */,
|
84469CF61EFA3000004A6B28 /* RSOPMLItem.m in Sources */,
|
||||||
84469D2A1EFA3134004A6B28 /* RSFeedParser.m in Sources */,
|
84469D2A1EFA3134004A6B28 /* RSFeedParser.m in Sources */,
|
||||||
84D81BE41EFA2D3D00652332 /* ParsedItem.swift in Sources */,
|
84D81BE41EFA2D3D00652332 /* ParsedItem.swift in Sources */,
|
||||||
|
84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */,
|
||||||
84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */,
|
84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */,
|
||||||
84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */,
|
84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */,
|
||||||
84469D171EFA30A2004A6B28 /* NSString+RSXML.m in Sources */,
|
84469D171EFA30A2004A6B28 /* NSString+RSXML.m in Sources */,
|
||||||
84469D2C1EFA3134004A6B28 /* RSParsedArticle.m in Sources */,
|
84469D2C1EFA3134004A6B28 /* RSParsedArticle.m in Sources */,
|
||||||
84469D2E1EFA3134004A6B28 /* RSParsedFeed.m in Sources */,
|
84469D2E1EFA3134004A6B28 /* RSParsedFeed.m in Sources */,
|
||||||
84469CF81EFA3000004A6B28 /* RSOPMLParser.m in Sources */,
|
84469CF81EFA3000004A6B28 /* RSOPMLParser.m in Sources */,
|
||||||
|
84469D401EFF29A9004A6B28 /* FeedParserError.swift in Sources */,
|
||||||
84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */,
|
84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */,
|
||||||
84469D281EFA3134004A6B28 /* RSAtomParser.m in Sources */,
|
84469D281EFA3134004A6B28 /* RSAtomParser.m in Sources */,
|
||||||
|
84469D361EFF1190004A6B28 /* NSData+RSParser.m in Sources */,
|
||||||
84D81BE61EFA2DFB00652332 /* ParsedAttachment.swift in Sources */,
|
84D81BE61EFA2DFB00652332 /* ParsedAttachment.swift in Sources */,
|
||||||
84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */,
|
84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */,
|
||||||
84D81BE81EFA2E6700652332 /* ParsedHub.swift in Sources */,
|
84D81BE81EFA2E6700652332 /* ParsedHub.swift in Sources */,
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
//
|
||||||
|
// NSData+RSParser.h
|
||||||
|
// RSParser
|
||||||
|
//
|
||||||
|
// Created by Brent Simmons on 6/24/17.
|
||||||
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
@import Foundation;
|
||||||
|
|
||||||
|
|
||||||
|
@interface NSData (RSParser)
|
||||||
|
|
||||||
|
- (BOOL)isProbablyHTML;
|
||||||
|
- (BOOL)isProbablyXML;
|
||||||
|
- (BOOL)isProbablyJSON;
|
||||||
|
|
||||||
|
@end
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
//
|
||||||
|
// NSData+RSParser.m
|
||||||
|
// RSParser
|
||||||
|
//
|
||||||
|
// Created by Brent Simmons on 6/24/17.
|
||||||
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
#import "NSData+RSParser.h"
|
||||||
|
|
||||||
|
/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/
|
||||||
|
|
||||||
|
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
|
||||||
|
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes);
|
||||||
|
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes);
|
||||||
|
|
||||||
|
@implementation NSData (RSParser)
|
||||||
|
|
||||||
|
- (BOOL)isProbablyHTML {
|
||||||
|
|
||||||
|
return bytesAreProbablyHTML(self.bytes, self.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)isProbablyXML {
|
||||||
|
|
||||||
|
return bytesAreProbablyXML(self.bytes, self.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)isProbablyJSON {
|
||||||
|
|
||||||
|
return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)isProbablyJSONFeed {
|
||||||
|
|
||||||
|
if (![self isProbablyJSON]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
return didFindString("https://jsonfeed.org/version/", self.bytes, self.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)isProbablyRSSInJSONFeed {
|
||||||
|
|
||||||
|
if (![self isProbablyJSON]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
const char *bytes = self.bytes;
|
||||||
|
NSUInteger length = self.length;
|
||||||
|
return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)isProbablyRSS {
|
||||||
|
|
||||||
|
if (![self isProbablyXML]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
return didFindString("<rss", bytes, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)isProbablyAtom {
|
||||||
|
|
||||||
|
if (![self isProbablyXML]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
return didFindString("<feed", bytes, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@end
|
||||||
|
|
||||||
|
|
||||||
|
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
|
||||||
|
|
||||||
|
char *foundString = strnstr(bytes, string, numberOfBytes);
|
||||||
|
return foundString != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes) {
|
||||||
|
|
||||||
|
NSUInteger i = 0;
|
||||||
|
for (i = 0; i < numberOfBytes; i++) {
|
||||||
|
|
||||||
|
const char *ch = bytes[i];
|
||||||
|
if (ch == ' ' || ch = '\r' || ch == '\n' || ch == '\t') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == string[0]) {
|
||||||
|
return strnstr(bytes, string, numberOfBytes) == bytes + i;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
|
||||||
|
|
||||||
|
if (didFindString("<html", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
if (didFindString("<HTML", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (didFindString("<body", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
if (didFindString("<meta", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (didFindString("<", bytes, numberOfBytes) {
|
||||||
|
if (didFindString("doctype html", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes) {
|
||||||
|
|
||||||
|
return bytesStartWithStringIgnoringWhiteSpace("<?xml", bytes, numberOfBytes);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue