239 lines
7.1 KiB
Swift
239 lines
7.1 KiB
Swift
//
|
||
// RSSParserTests.swift
|
||
// Parser
|
||
//
|
||
// Created by Brent Simmons on 6/26/17.
|
||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||
//
|
||
|
||
import XCTest
|
||
import Parser
|
||
|
||
final class RSSParserTests: XCTestCase {
|
||
|
||
func testScriptingNewsPerformance() {
|
||
|
||
// 0.004 sec on my 2012 iMac.
|
||
// 0.002 2022 Mac Studio
|
||
let d = parserData("scriptingNews", "rss", "http://scripting.com/")
|
||
self.measure {
|
||
let _ = try! FeedParser.parse(d)
|
||
}
|
||
}
|
||
|
||
func testKatieFloydPerformance() {
|
||
|
||
// 0.004 sec on my 2012 iMac.
|
||
// 0.001 2022 Mac Studio
|
||
let d = parserData("KatieFloyd", "rss", "http://katiefloyd.com/")
|
||
self.measure {
|
||
let _ = try! FeedParser.parse(d)
|
||
}
|
||
}
|
||
|
||
func testEMarleyPerformance() {
|
||
|
||
// 0.001 sec on my 2012 iMac.
|
||
// 0.0004 2022 Mac Studio
|
||
let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
|
||
self.measure {
|
||
let _ = try! FeedParser.parse(d)
|
||
}
|
||
}
|
||
|
||
func testMantonPerformance() {
|
||
|
||
// 0.002 sec on my 2012 iMac.
|
||
// 0.0006 2022 Mac Studio
|
||
let d = parserData("manton", "rss", "http://manton.org/")
|
||
self.measure {
|
||
let _ = try! FeedParser.parse(d)
|
||
}
|
||
}
|
||
|
||
func testNatashaTheRobot() async {
|
||
|
||
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
XCTAssertEqual(parsedFeed.items.count, 10)
|
||
}
|
||
|
||
func testTheOmniShowAttachments() async {
|
||
|
||
let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotNil(article.attachments)
|
||
XCTAssertEqual(article.attachments!.count, 1)
|
||
let attachment = Array(article.attachments!).first!
|
||
XCTAssertNotNil(attachment.mimeType)
|
||
XCTAssertNotNil(attachment.sizeInBytes)
|
||
XCTAssert(attachment.url.contains("cloudfront"))
|
||
XCTAssertGreaterThanOrEqual(attachment.sizeInBytes!, 22275279)
|
||
XCTAssertEqual(attachment.mimeType, "audio/mpeg")
|
||
}
|
||
}
|
||
|
||
func testTheOmniShowUniqueIDs() async {
|
||
|
||
let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotNil(article.uniqueID)
|
||
XCTAssertTrue(article.uniqueID.hasPrefix("https://theomnishow.omnigroup.com/episode/"))
|
||
}
|
||
}
|
||
|
||
func testMacworldUniqueIDs() async {
|
||
|
||
// Macworld’s feed doesn’t have guids, so they should be calculated unique IDs.
|
||
|
||
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotNil(article.uniqueID)
|
||
XCTAssertEqual(article.uniqueID.count, 32) // calculated unique IDs are MD5 hashes
|
||
}
|
||
}
|
||
|
||
func testMacworldAuthors() async {
|
||
|
||
// Macworld uses names instead of email addresses (despite the RSS spec saying they should be email addresses).
|
||
|
||
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
|
||
let author = article.authors!.first!
|
||
XCTAssertNil(author.emailAddress)
|
||
XCTAssertNil(author.url)
|
||
XCTAssertNotNil(author.name)
|
||
}
|
||
}
|
||
|
||
func testMonkeyDomGuids() async {
|
||
|
||
// https://coding.monkeydom.de/posts.rss has a bug in the feed (at this writing):
|
||
// It has guids that are supposed to be permalinks, per the spec —
|
||
// except that they’re not actually permalinks. The RSS parser should
|
||
// detect this situation, and every article in the feed should have a permalink.
|
||
|
||
let d = parserData("monkeydom", "rss", "https://coding.monkeydom.de/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNil(article.url)
|
||
XCTAssertNotNil(article.uniqueID)
|
||
}
|
||
}
|
||
|
||
func testEmptyContentEncoded() async {
|
||
// The ATP feed (at the time of this writing) has some empty content:encoded elements. The parser should ignore those.
|
||
// https://github.com/brentsimmons/NetNewsWire/issues/529
|
||
|
||
let d = parserData("atp", "rss", "http://atp.fm/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotNil(article.contentHTML)
|
||
}
|
||
}
|
||
|
||
func testFeedKnownToHaveGuidsThatArentPermalinks() async {
|
||
let d = parserData("livemint", "xml", "https://www.livemint.com/rss/news")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNil(article.url)
|
||
}
|
||
}
|
||
|
||
func testAuthorsWithTitlesInside() async {
|
||
// This feed uses atom authors, and we don’t want author/title to be used as item/title.
|
||
// https://github.com/brentsimmons/NetNewsWire/issues/943
|
||
let d = parserData("cloudblog", "rss", "https://cloudblog.withgoogle.com/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotEqual(article.title, "Product Manager, Office of the CTO")
|
||
XCTAssertNotEqual(article.title, "Developer Programs Engineer")
|
||
XCTAssertNotEqual(article.title, "Product Director")
|
||
}
|
||
}
|
||
|
||
func testTitlesWithInvalidFeedWithImageStructures() async {
|
||
// This invalid feed has <image> elements inside <item>s.
|
||
// 17 Jan 2021 bug report — we’re not parsing titles in this feed.
|
||
let d = parserData("aktuality", "rss", "https://www.aktuality.sk/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
|
||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotNil(article.title)
|
||
}
|
||
}
|
||
|
||
func testFeedLanguage() async {
|
||
let d = parserData("manton", "rss", "http://manton.org/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
XCTAssertEqual(parsedFeed.language, "en-US")
|
||
}
|
||
|
||
func testFeedWithGB2312Encoding() {
|
||
// This feed has an encoding we don’t run into very often.
|
||
// https://github.com/Ranchero-Software/NetNewsWire/issues/1477
|
||
let d = parserData("kc0011", "rss", "http://kc0011.net/")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
XCTAssert(parsedFeed.items.count > 0)
|
||
for article in parsedFeed.items {
|
||
XCTAssertNotNil(article.contentHTML)
|
||
}
|
||
}
|
||
|
||
func testVincodeDates() {
|
||
let d = parserData("vincode", "rss", "https://vincode.io/feed.xml")
|
||
let parsedFeed = try! FeedParser.parse(d)!
|
||
XCTAssert(parsedFeed.items.count > 0)
|
||
|
||
var didFindFirstTestArticle = false
|
||
var didFindSecondTestArticle = false
|
||
for article in parsedFeed.items {
|
||
|
||
if article.title == "Drag Boat Race in Parker, AZ" {
|
||
didFindFirstTestArticle = true
|
||
// Tue, 07 Mar 2023 15:15:29 -0500
|
||
let expectedDatePublished = dateWithValues(2023, 3, 7, 20, 15, 29)
|
||
XCTAssertEqual(article.datePublished, expectedDatePublished)
|
||
}
|
||
else if article.title == "Ventura’s System Settings" {
|
||
didFindSecondTestArticle = true
|
||
// Sun, 30 Oct 2022 11:58:26 -0500
|
||
let expectedDatePublished = dateWithValues(2022, 10, 30, 16, 58, 26)
|
||
XCTAssertEqual(article.datePublished, expectedDatePublished)
|
||
}
|
||
}
|
||
|
||
XCTAssertTrue(didFindFirstTestArticle)
|
||
XCTAssertTrue(didFindSecondTestArticle)
|
||
}
|
||
}
|