2024-04-02 21:43:06 -07:00
|
|
|
|
//
|
|
|
|
|
// RSSParserTests.swift
|
|
|
|
|
// RSParser
|
|
|
|
|
//
|
|
|
|
|
// Created by Brent Simmons on 6/26/17.
|
|
|
|
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
import XCTest
|
|
|
|
|
import Parser
|
|
|
|
|
|
|
|
|
|
class RSSParserTests: XCTestCase {
|
|
|
|
|
|
|
|
|
|
func testScriptingNewsPerformance() {
|
|
|
|
|
|
|
|
|
|
// 0.004 sec on my 2012 iMac.
|
2024-05-18 19:02:36 -07:00
|
|
|
|
// 0.002 2022 Mac Studio
|
2024-04-02 21:43:06 -07:00
|
|
|
|
let d = parserData("scriptingNews", "rss", "http://scripting.com/")
|
|
|
|
|
self.measure {
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let _ = try! FeedParser.parseSync(d)
|
2024-04-02 21:43:06 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testKatieFloydPerformance() {
|
|
|
|
|
|
|
|
|
|
// 0.004 sec on my 2012 iMac.
|
2024-05-18 19:02:36 -07:00
|
|
|
|
// 0.001 2022 Mac Studio
|
2024-04-02 21:43:06 -07:00
|
|
|
|
let d = parserData("KatieFloyd", "rss", "http://katiefloyd.com/")
|
|
|
|
|
self.measure {
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let _ = try! FeedParser.parseSync(d)
|
2024-04-02 21:43:06 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testEMarleyPerformance() {
|
|
|
|
|
|
|
|
|
|
// 0.001 sec on my 2012 iMac.
|
2024-05-18 19:02:36 -07:00
|
|
|
|
// 0.0004 2022 Mac Studio
|
2024-04-02 21:43:06 -07:00
|
|
|
|
let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
|
|
|
|
|
self.measure {
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let _ = try! FeedParser.parseSync(d)
|
2024-04-02 21:43:06 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testMantonPerformance() {
|
|
|
|
|
|
|
|
|
|
// 0.002 sec on my 2012 iMac.
|
2024-05-18 19:02:36 -07:00
|
|
|
|
// 0.0006 2022 Mac Studio
|
2024-04-02 21:43:06 -07:00
|
|
|
|
let d = parserData("manton", "rss", "http://manton.org/")
|
|
|
|
|
self.measure {
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let _ = try! FeedParser.parseSync(d)
|
2024-04-02 21:43:06 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testNatashaTheRobot() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
XCTAssertEqual(parsedFeed.items.count, 10)
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testTheOmniShowAttachments() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNotNil(article.attachments)
|
|
|
|
|
XCTAssertEqual(article.attachments!.count, 1)
|
|
|
|
|
let attachment = Array(article.attachments!).first!
|
|
|
|
|
XCTAssertNotNil(attachment.mimeType)
|
|
|
|
|
XCTAssertNotNil(attachment.sizeInBytes)
|
|
|
|
|
XCTAssert(attachment.url.contains("cloudfront"))
|
|
|
|
|
XCTAssertGreaterThanOrEqual(attachment.sizeInBytes!, 22275279)
|
|
|
|
|
XCTAssertEqual(attachment.mimeType, "audio/mpeg")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testTheOmniShowUniqueIDs() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNotNil(article.uniqueID)
|
|
|
|
|
XCTAssertTrue(article.uniqueID.hasPrefix("https://theomnishow.omnigroup.com/episode/"))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testMacworldUniqueIDs() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
// Macworld’s feed doesn’t have guids, so they should be calculated unique IDs.
|
|
|
|
|
|
|
|
|
|
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNotNil(article.uniqueID)
|
|
|
|
|
XCTAssertEqual(article.uniqueID.count, 32) // calculated unique IDs are MD5 hashes
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testMacworldAuthors() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
// Macworld uses names instead of email addresses (despite the RSS spec saying they should be email addresses).
|
|
|
|
|
|
|
|
|
|
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
|
|
|
|
|
let author = article.authors!.first!
|
|
|
|
|
XCTAssertNil(author.emailAddress)
|
|
|
|
|
XCTAssertNil(author.url)
|
|
|
|
|
XCTAssertNotNil(author.name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testMonkeyDomGuids() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
// https://coding.monkeydom.de/posts.rss has a bug in the feed (at this writing):
|
|
|
|
|
// It has guids that are supposed to be permalinks, per the spec —
|
|
|
|
|
// except that they’re not actually permalinks. The RSS parser should
|
|
|
|
|
// detect this situation, and every article in the feed should have a permalink.
|
|
|
|
|
|
|
|
|
|
let d = parserData("monkeydom", "rss", "https://coding.monkeydom.de/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNil(article.url)
|
|
|
|
|
XCTAssertNotNil(article.uniqueID)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testEmptyContentEncoded() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
// The ATP feed (at the time of this writing) has some empty content:encoded elements. The parser should ignore those.
|
|
|
|
|
// https://github.com/brentsimmons/NetNewsWire/issues/529
|
|
|
|
|
|
|
|
|
|
let d = parserData("atp", "rss", "http://atp.fm/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
|
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNotNil(article.contentHTML)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testFeedKnownToHaveGuidsThatArentPermalinks() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
let d = parserData("livemint", "xml", "https://www.livemint.com/rss/news")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNil(article.url)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testAuthorsWithTitlesInside() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
// This feed uses atom authors, and we don’t want author/title to be used as item/title.
|
|
|
|
|
// https://github.com/brentsimmons/NetNewsWire/issues/943
|
|
|
|
|
let d = parserData("cloudblog", "rss", "https://cloudblog.withgoogle.com/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNotEqual(article.title, "Product Manager, Office of the CTO")
|
|
|
|
|
XCTAssertNotEqual(article.title, "Developer Programs Engineer")
|
|
|
|
|
XCTAssertNotEqual(article.title, "Product Director")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testTitlesWithInvalidFeedWithImageStructures() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
// This invalid feed has <image> elements inside <item>s.
|
|
|
|
|
// 17 Jan 2021 bug report — we’re not parsing titles in this feed.
|
|
|
|
|
let d = parserData("aktuality", "rss", "https://www.aktuality.sk/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
for article in parsedFeed.items {
|
|
|
|
|
XCTAssertNotNil(article.title)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 19:02:36 -07:00
|
|
|
|
func testFeedLanguage() async {
|
2024-04-02 21:43:06 -07:00
|
|
|
|
let d = parserData("manton", "rss", "http://manton.org/")
|
2024-05-18 19:02:36 -07:00
|
|
|
|
let parsedFeed = try! await FeedParser.parse(d)!
|
2024-04-02 21:43:06 -07:00
|
|
|
|
XCTAssertEqual(parsedFeed.language, "en-US")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// func testFeedWithGB2312Encoding() {
|
|
|
|
|
// // This feed has an encoding we don’t run into very often.
|
|
|
|
|
// // https://github.com/Ranchero-Software/NetNewsWire/issues/1477
|
|
|
|
|
// let d = parserData("kc0011", "rss", "http://kc0011.net/")
|
|
|
|
|
// let parsedFeed = try! FeedParser.parse(d)!
|
|
|
|
|
// XCTAssert(parsedFeed.items.count > 0)
|
|
|
|
|
// for article in parsedFeed.items {
|
|
|
|
|
// XCTAssertNotNil(article.contentHTML)
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
}
|