2017-05-22 22:27:54 +02:00
|
|
|
|
//
|
|
|
|
|
// FeedFinder.swift
|
2018-06-21 22:18:28 +02:00
|
|
|
|
// FeedFinder
|
2017-05-22 22:27:54 +02:00
|
|
|
|
//
|
|
|
|
|
// Created by Brent Simmons on 8/2/16.
|
2017-05-29 22:17:58 +02:00
|
|
|
|
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
2017-05-22 22:27:54 +02:00
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
import Foundation
|
2017-07-02 02:22:19 +02:00
|
|
|
|
import RSParser
|
2017-05-22 22:27:54 +02:00
|
|
|
|
import RSWeb
|
|
|
|
|
import RSCore
|
|
|
|
|
|
2018-08-26 02:03:10 +02:00
|
|
|
|
protocol FeedFinderDelegate: class {
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
|
|
|
|
func feedFinder(_: FeedFinder, didFindFeeds: Set<FeedSpecifier>)
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-26 02:03:10 +02:00
|
|
|
|
class FeedFinder {
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
2019-02-10 07:22:12 +01:00
|
|
|
|
private weak var delegate: FeedFinderDelegate?
|
|
|
|
|
private var feedSpecifiers = [String: FeedSpecifier]()
|
|
|
|
|
private var didNotifyDelegate = false
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
2018-08-26 02:03:10 +02:00
|
|
|
|
var initialDownloadError: Error?
|
|
|
|
|
var initialDownloadStatusCode = -1
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
2018-08-26 02:03:10 +02:00
|
|
|
|
init(url: URL, delegate: FeedFinderDelegate) {
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
|
|
|
|
self.delegate = delegate
|
|
|
|
|
|
|
|
|
|
DispatchQueue.main.async() { () -> Void in
|
|
|
|
|
|
|
|
|
|
self.findFeeds(url)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
deinit {
|
|
|
|
|
notifyDelegateIfNeeded()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private extension FeedFinder {
|
|
|
|
|
|
|
|
|
|
func addFeedSpecifier(_ feedSpecifier: FeedSpecifier) {
|
|
|
|
|
|
|
|
|
|
// If there’s an existing feed specifier, merge the two so that we have the best data. If one has a title and one doesn’t, use that non-nil title. Use the better source.
|
|
|
|
|
|
|
|
|
|
if let existingFeedSpecifier = feedSpecifiers[feedSpecifier.urlString] {
|
|
|
|
|
let mergedFeedSpecifier = existingFeedSpecifier.feedSpecifierByMerging(feedSpecifier)
|
|
|
|
|
feedSpecifiers[feedSpecifier.urlString] = mergedFeedSpecifier
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
feedSpecifiers[feedSpecifier.urlString] = feedSpecifier
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func findFeedsInHTMLPage(htmlData: Data, urlString: String) {
|
|
|
|
|
|
|
|
|
|
// Feeds in the <head> section we automatically assume are feeds.
|
|
|
|
|
// If there are none from the <head> section,
|
|
|
|
|
// then possible feeds in <body> section are downloaded individually
|
|
|
|
|
// and added once we determine they are feeds.
|
|
|
|
|
|
|
|
|
|
let possibleFeedSpecifiers = possibleFeedsInHTMLPage(htmlData: htmlData, urlString: urlString)
|
|
|
|
|
var feedSpecifiersToDownload = Set<FeedSpecifier>()
|
|
|
|
|
|
|
|
|
|
var didFindFeedInHTMLHead = false
|
|
|
|
|
|
|
|
|
|
for oneFeedSpecifier in possibleFeedSpecifiers {
|
|
|
|
|
if oneFeedSpecifier.source == .HTMLHead {
|
|
|
|
|
addFeedSpecifier(oneFeedSpecifier)
|
|
|
|
|
didFindFeedInHTMLHead = true
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if !feedSpecifiersContainsURLString(oneFeedSpecifier.urlString) {
|
|
|
|
|
feedSpecifiersToDownload.insert(oneFeedSpecifier)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if didFindFeedInHTMLHead || feedSpecifiersToDownload.isEmpty {
|
|
|
|
|
stopFinding()
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
downloadFeedSpecifiers(feedSpecifiersToDownload)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func possibleFeedsInHTMLPage(htmlData: Data, urlString: String) -> Set<FeedSpecifier> {
|
|
|
|
|
|
2017-07-02 02:22:19 +02:00
|
|
|
|
let parserData = ParserData(url: urlString, data: htmlData)
|
|
|
|
|
var feedSpecifiers = HTMLFeedFinder(parserData: parserData).feedSpecifiers
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
|
|
|
|
if feedSpecifiers.isEmpty {
|
|
|
|
|
// Odds are decent it’s a WordPress site, and just adding /feed/ will work.
|
2018-01-06 07:54:28 +01:00
|
|
|
|
// It’s also fairly common for /index.xml to work.
|
2017-05-22 22:27:54 +02:00
|
|
|
|
if let url = URL(string: urlString) {
|
|
|
|
|
let feedURL = url.appendingPathComponent("feed", isDirectory: true)
|
|
|
|
|
let wordpressFeedSpecifier = FeedSpecifier(title: nil, urlString: feedURL.absoluteString, source: .HTMLLink)
|
|
|
|
|
feedSpecifiers.insert(wordpressFeedSpecifier)
|
2018-01-06 07:54:28 +01:00
|
|
|
|
|
|
|
|
|
let indexXMLURL = url.appendingPathComponent("index.xml", isDirectory: false)
|
|
|
|
|
let indexXMLFeedSpecifier = FeedSpecifier(title: nil, urlString: indexXMLURL.absoluteString, source: .HTMLLink)
|
|
|
|
|
feedSpecifiers.insert(indexXMLFeedSpecifier)
|
2017-05-22 22:27:54 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return feedSpecifiers
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func feedSpecifiersContainsURLString(_ urlString: String) -> Bool {
|
|
|
|
|
|
|
|
|
|
if let _ = feedSpecifiers[urlString] {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isHTML(_ data: Data) -> Bool {
|
|
|
|
|
|
|
|
|
|
return (data as NSData).rs_dataIsProbablyHTML()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func findFeeds(_ initialURL: URL) {
|
|
|
|
|
|
|
|
|
|
downloadInitialFeed(initialURL)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func downloadInitialFeed(_ initialURL: URL) {
|
|
|
|
|
|
2017-11-23 19:29:00 +01:00
|
|
|
|
downloadUsingCache(initialURL) { (data, response, error) in
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
|
|
|
|
self.initialDownloadStatusCode = response?.forcedStatusCode ?? -1
|
|
|
|
|
|
|
|
|
|
if let error = error {
|
|
|
|
|
self.initialDownloadError = error
|
|
|
|
|
self.stopFinding()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
guard let data = data, let response = response else {
|
|
|
|
|
self.stopFinding()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !response.statusIsOK || data.isEmpty {
|
|
|
|
|
self.stopFinding()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if self.isFeed(data, initialURL.absoluteString) {
|
|
|
|
|
let feedSpecifier = FeedSpecifier(title: nil, urlString: initialURL.absoluteString, source: .UserEntered)
|
|
|
|
|
self.addFeedSpecifier(feedSpecifier)
|
|
|
|
|
self.stopFinding()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !self.isHTML(data) {
|
|
|
|
|
self.stopFinding()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.findFeedsInHTMLPage(htmlData: data, urlString: initialURL.absoluteString)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func downloadFeedSpecifiers(_ feedSpecifiers: Set<FeedSpecifier>) {
|
|
|
|
|
|
|
|
|
|
var pendingDownloads = feedSpecifiers
|
|
|
|
|
|
|
|
|
|
for oneFeedSpecifier in feedSpecifiers {
|
|
|
|
|
|
|
|
|
|
guard let url = URL(string: oneFeedSpecifier.urlString) else {
|
|
|
|
|
pendingDownloads.remove(oneFeedSpecifier)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-23 19:29:00 +01:00
|
|
|
|
downloadUsingCache(url) { (data, response, error) in
|
2017-05-22 22:27:54 +02:00
|
|
|
|
|
|
|
|
|
pendingDownloads.remove(oneFeedSpecifier)
|
|
|
|
|
|
|
|
|
|
if let data = data, let response = response, response.statusIsOK, error == nil {
|
|
|
|
|
if self.isFeed(data, oneFeedSpecifier.urlString) {
|
|
|
|
|
self.addFeedSpecifier(oneFeedSpecifier)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if pendingDownloads.isEmpty {
|
|
|
|
|
self.stopFinding()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func stopFinding() {
|
|
|
|
|
|
|
|
|
|
notifyDelegateIfNeeded()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func notifyDelegateIfNeeded() {
|
|
|
|
|
|
|
|
|
|
if !didNotifyDelegate {
|
2017-12-14 06:19:12 +01:00
|
|
|
|
delegate?.feedFinder(self, didFindFeeds: Set(feedSpecifiers.values))
|
2017-05-22 22:27:54 +02:00
|
|
|
|
didNotifyDelegate = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isFeed(_ data: Data, _ urlString: String) -> Bool {
|
2017-07-02 02:22:19 +02:00
|
|
|
|
|
|
|
|
|
let parserData = ParserData(url: urlString, data: data)
|
|
|
|
|
return FeedParser.canParse(parserData)
|
2017-05-22 22:27:54 +02:00
|
|
|
|
}
|
|
|
|
|
}
|