NetNewsWire/Evergreen/FeedFinder/HTMLFeedFinder.swift
2018-06-23 11:35:55 -07:00

82 lines
2.2 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// HTMLFeedFinder.swift
// FeedFinder
//
// Created by Brent Simmons on 8/7/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
import Foundation
import RSParser
private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"]
class HTMLFeedFinder {
var feedSpecifiers: Set<FeedSpecifier> {
return Set(feedSpecifiersDictionary.values)
}
fileprivate var feedSpecifiersDictionary = [String: FeedSpecifier]()
init(parserData: ParserData) {
let metadata = RSHTMLMetadataParser.htmlMetadata(with: parserData)
for oneFeedLink in metadata.feedLinks {
if let oneURLString = oneFeedLink.urlString {
let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead)
addFeedSpecifier(oneFeedSpecifier)
}
}
if let bodyLinks = RSHTMLLinkParser.htmlLinks(with: parserData) {
for oneBodyLink in bodyLinks {
if linkMightBeFeed(oneBodyLink) {
let oneFeedSpecifier = FeedSpecifier(title: oneBodyLink.text, urlString: oneBodyLink.urlString!, source: .HTMLLink)
addFeedSpecifier(oneFeedSpecifier)
}
}
}
}
}
private extension HTMLFeedFinder {
func addFeedSpecifier(_ feedSpecifier: FeedSpecifier) {
// If theres an existing feed specifier, merge the two so that we have the best data. If one has a title and one doesnt, use that non-nil title. Use the better source.
if let existingFeedSpecifier = feedSpecifiersDictionary[feedSpecifier.urlString] {
let mergedFeedSpecifier = existingFeedSpecifier.feedSpecifierByMerging(feedSpecifier)
feedSpecifiersDictionary[feedSpecifier.urlString] = mergedFeedSpecifier
}
else {
feedSpecifiersDictionary[feedSpecifier.urlString] = feedSpecifier
}
}
func urlStringMightBeFeed(_ urlString: String) -> Bool {
let massagedURLString = urlString.replacingOccurrences(of: "buzzfeed", with: "_")
for oneMatch in feedURLWordsToMatch {
let range = (massagedURLString as NSString).range(of: oneMatch, options: .caseInsensitive)
if range.length > 0 {
return true
}
}
return false
}
func linkMightBeFeed(_ link: RSHTMLLink) -> Bool {
if let linkURLString = link.urlString, urlStringMightBeFeed(linkURLString) {
return true
}
return false
}
}