NetNewsWire/Shared/FeedFinder/FeedFinder.swift

214 lines
5.4 KiB
Swift
Raw Normal View History

2017-05-22 22:27:54 +02:00
//
// FeedFinder.swift
// FeedFinder
2017-05-22 22:27:54 +02:00
//
// Created by Brent Simmons on 8/2/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
2017-05-22 22:27:54 +02:00
//
import Foundation
import RSParser
2017-05-22 22:27:54 +02:00
import RSWeb
import RSCore
protocol FeedFinderDelegate: class {
2017-05-22 22:27:54 +02:00
func feedFinder(_: FeedFinder, didFindFeeds: Set<FeedSpecifier>)
}
class FeedFinder {
2017-05-22 22:27:54 +02:00
private weak var delegate: FeedFinderDelegate?
private var feedSpecifiers = [String: FeedSpecifier]()
private var didNotifyDelegate = false
2017-05-22 22:27:54 +02:00
var initialDownloadError: Error?
var initialDownloadStatusCode = -1
2017-05-22 22:27:54 +02:00
init(url: URL, delegate: FeedFinderDelegate) {
2017-05-22 22:27:54 +02:00
self.delegate = delegate
DispatchQueue.main.async() { () -> Void in
self.findFeeds(url)
}
}
deinit {
notifyDelegateIfNeeded()
}
}
private extension FeedFinder {
func addFeedSpecifier(_ feedSpecifier: FeedSpecifier) {
// If theres an existing feed specifier, merge the two so that we have the best data. If one has a title and one doesnt, use that non-nil title. Use the better source.
if let existingFeedSpecifier = feedSpecifiers[feedSpecifier.urlString] {
let mergedFeedSpecifier = existingFeedSpecifier.feedSpecifierByMerging(feedSpecifier)
feedSpecifiers[feedSpecifier.urlString] = mergedFeedSpecifier
}
else {
feedSpecifiers[feedSpecifier.urlString] = feedSpecifier
}
}
func findFeedsInHTMLPage(htmlData: Data, urlString: String) {
// Feeds in the <head> section we automatically assume are feeds.
// If there are none from the <head> section,
// then possible feeds in <body> section are downloaded individually
// and added once we determine they are feeds.
let possibleFeedSpecifiers = possibleFeedsInHTMLPage(htmlData: htmlData, urlString: urlString)
var feedSpecifiersToDownload = Set<FeedSpecifier>()
var didFindFeedInHTMLHead = false
for oneFeedSpecifier in possibleFeedSpecifiers {
if oneFeedSpecifier.source == .HTMLHead {
addFeedSpecifier(oneFeedSpecifier)
didFindFeedInHTMLHead = true
}
else {
if !feedSpecifiersContainsURLString(oneFeedSpecifier.urlString) {
feedSpecifiersToDownload.insert(oneFeedSpecifier)
}
}
}
if didFindFeedInHTMLHead || feedSpecifiersToDownload.isEmpty {
stopFinding()
}
else {
downloadFeedSpecifiers(feedSpecifiersToDownload)
}
}
func possibleFeedsInHTMLPage(htmlData: Data, urlString: String) -> Set<FeedSpecifier> {
let parserData = ParserData(url: urlString, data: htmlData)
var feedSpecifiers = HTMLFeedFinder(parserData: parserData).feedSpecifiers
2017-05-22 22:27:54 +02:00
if feedSpecifiers.isEmpty {
// Odds are decent its a WordPress site, and just adding /feed/ will work.
// Its also fairly common for /index.xml to work.
2017-05-22 22:27:54 +02:00
if let url = URL(string: urlString) {
let feedURL = url.appendingPathComponent("feed", isDirectory: true)
let wordpressFeedSpecifier = FeedSpecifier(title: nil, urlString: feedURL.absoluteString, source: .HTMLLink)
feedSpecifiers.insert(wordpressFeedSpecifier)
let indexXMLURL = url.appendingPathComponent("index.xml", isDirectory: false)
let indexXMLFeedSpecifier = FeedSpecifier(title: nil, urlString: indexXMLURL.absoluteString, source: .HTMLLink)
feedSpecifiers.insert(indexXMLFeedSpecifier)
2017-05-22 22:27:54 +02:00
}
}
return feedSpecifiers
}
func feedSpecifiersContainsURLString(_ urlString: String) -> Bool {
if let _ = feedSpecifiers[urlString] {
return true
}
return false
}
func isHTML(_ data: Data) -> Bool {
return (data as NSData).rs_dataIsProbablyHTML()
}
func findFeeds(_ initialURL: URL) {
downloadInitialFeed(initialURL)
}
func downloadInitialFeed(_ initialURL: URL) {
downloadUsingCache(initialURL) { (data, response, error) in
2017-05-22 22:27:54 +02:00
self.initialDownloadStatusCode = response?.forcedStatusCode ?? -1
if let error = error {
self.initialDownloadError = error
self.stopFinding()
return
}
guard let data = data, let response = response else {
self.stopFinding()
return
}
if !response.statusIsOK || data.isEmpty {
self.stopFinding()
return
}
if self.isFeed(data, initialURL.absoluteString) {
let feedSpecifier = FeedSpecifier(title: nil, urlString: initialURL.absoluteString, source: .UserEntered)
self.addFeedSpecifier(feedSpecifier)
self.stopFinding()
return
}
if !self.isHTML(data) {
self.stopFinding()
return
}
self.findFeedsInHTMLPage(htmlData: data, urlString: initialURL.absoluteString)
}
}
func downloadFeedSpecifiers(_ feedSpecifiers: Set<FeedSpecifier>) {
var pendingDownloads = feedSpecifiers
for oneFeedSpecifier in feedSpecifiers {
guard let url = URL(string: oneFeedSpecifier.urlString) else {
pendingDownloads.remove(oneFeedSpecifier)
continue
}
downloadUsingCache(url) { (data, response, error) in
2017-05-22 22:27:54 +02:00
pendingDownloads.remove(oneFeedSpecifier)
if let data = data, let response = response, response.statusIsOK, error == nil {
if self.isFeed(data, oneFeedSpecifier.urlString) {
self.addFeedSpecifier(oneFeedSpecifier)
}
}
if pendingDownloads.isEmpty {
self.stopFinding()
}
}
}
}
func stopFinding() {
notifyDelegateIfNeeded()
}
func notifyDelegateIfNeeded() {
if !didNotifyDelegate {
delegate?.feedFinder(self, didFindFeeds: Set(feedSpecifiers.values))
2017-05-22 22:27:54 +02:00
didNotifyDelegate = true
}
}
func isFeed(_ data: Data, _ urlString: String) -> Bool {
let parserData = ParserData(url: urlString, data: data)
return FeedParser.canParse(parserData)
2017-05-22 22:27:54 +02:00
}
}