NetNewsWire/Frameworks/RSFeedFinder/RSFeedFinder/HTMLFeedFinder.swift

84 lines
2.2 KiB
Swift
Raw Normal View History

2017-05-22 22:27:54 +02:00
//
// HTMLFeedFinder.swift
// RSFeedFinder
//
// Created by Brent Simmons on 8/7/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
2017-05-22 22:27:54 +02:00
//
import Foundation
import RSParser
2017-05-22 22:27:54 +02:00
private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom"]
class HTMLFeedFinder {
var feedSpecifiers: Set<FeedSpecifier> {
get {
return Set(feedSpecifiersDictionary.values)
}
}
fileprivate var feedSpecifiersDictionary = [String: FeedSpecifier]()
init(parserData: ParserData) {
2017-05-22 22:27:54 +02:00
let metadata = RSHTMLMetadataParser.htmlMetadata(with: parserData)
2017-05-22 22:27:54 +02:00
for oneFeedLink in metadata.feedLinks {
if let oneURLString = oneFeedLink.urlString {
let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead)
addFeedSpecifier(oneFeedSpecifier)
}
}
if let bodyLinks = RSHTMLLinkParser.htmlLinks(with: parserData) {
2017-05-22 22:27:54 +02:00
for oneBodyLink in bodyLinks {
if linkMightBeFeed(oneBodyLink) {
let oneFeedSpecifier = FeedSpecifier(title: oneBodyLink.text, urlString: oneBodyLink.urlString!, source: .HTMLLink)
addFeedSpecifier(oneFeedSpecifier)
}
}
}
}
}
private extension HTMLFeedFinder {
func addFeedSpecifier(_ feedSpecifier: FeedSpecifier) {
// If theres an existing feed specifier, merge the two so that we have the best data. If one has a title and one doesnt, use that non-nil title. Use the better source.
if let existingFeedSpecifier = feedSpecifiersDictionary[feedSpecifier.urlString] {
let mergedFeedSpecifier = existingFeedSpecifier.feedSpecifierByMerging(feedSpecifier)
feedSpecifiersDictionary[feedSpecifier.urlString] = mergedFeedSpecifier
}
else {
feedSpecifiersDictionary[feedSpecifier.urlString] = feedSpecifier
}
}
func urlStringMightBeFeed(_ urlString: String) -> Bool {
let massagedURLString = urlString.replacingOccurrences(of: "buzzfeed", with: "_")
for oneMatch in feedURLWordsToMatch {
let range = (massagedURLString as NSString).range(of: oneMatch, options: .caseInsensitive)
if range.length > 0 {
return true
}
}
return false
}
func linkMightBeFeed(_ link: RSHTMLLink) -> Bool {
if let linkURLString = link.urlString, urlStringMightBeFeed(linkURLString) {
return true
}
return false
}
}