Handle relative <link> elements in Atom parser — use xml:base when present to resolve. Add test case.
This commit is contained in:
parent
ccb1b8294e
commit
d22f3819cd
@ -25,6 +25,7 @@ final class AtomParser {
|
||||
attributesStack.last
|
||||
}
|
||||
|
||||
private var xmlBaseURL: URL?
|
||||
private var parsingXHTML = false
|
||||
private var xhtmlString: String?
|
||||
|
||||
@ -90,6 +91,7 @@ private extension AtomParser {
|
||||
static let type = "type"
|
||||
static let length = "length"
|
||||
static let xmlLang = "xml:lang"
|
||||
static let xmlBase = "xml:base"
|
||||
}
|
||||
|
||||
func currentString(_ saxParser: SAXParser) -> String? {
|
||||
@ -139,13 +141,23 @@ private extension AtomParser {
|
||||
}
|
||||
}
|
||||
|
||||
func addFeedLanguage() {
|
||||
func addFeedAttributes() {
|
||||
|
||||
guard feed.language == nil, let currentAttributes else {
|
||||
guard let currentAttributes else {
|
||||
return
|
||||
}
|
||||
|
||||
feed.language = currentAttributes[XMLString.xmlLang]
|
||||
if feed.language == nil {
|
||||
feed.language = currentAttributes[XMLString.xmlLang]
|
||||
}
|
||||
|
||||
if xmlBaseURL == nil {
|
||||
if let xmlBase = currentAttributes[XMLString.xmlBase] {
|
||||
if let baseURL = URL(string: xmlBase) {
|
||||
xmlBaseURL = baseURL
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func addArticle() {
|
||||
@ -225,7 +237,8 @@ private extension AtomParser {
|
||||
guard let urlString = attributes[XMLString.href], !urlString.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
let resolvedURLString = linkResolvedAgainstXMLBase(urlString)
|
||||
|
||||
var rel = attributes[XMLString.rel]
|
||||
if rel?.isEmpty ?? true {
|
||||
rel = XMLString.alternate
|
||||
@ -233,21 +246,33 @@ private extension AtomParser {
|
||||
|
||||
if rel == XMLString.related {
|
||||
if article.link == nil {
|
||||
article.link = urlString
|
||||
article.link = resolvedURLString
|
||||
}
|
||||
}
|
||||
else if rel == XMLString.alternate {
|
||||
if article.permalink == nil {
|
||||
article.permalink = urlString
|
||||
article.permalink = resolvedURLString
|
||||
}
|
||||
}
|
||||
else if rel == XMLString.enclosure {
|
||||
if let enclosure = enclosure(urlString, attributes) {
|
||||
if let enclosure = enclosure(resolvedURLString, attributes) {
|
||||
article.addEnclosure(enclosure)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func linkResolvedAgainstXMLBase(_ urlString: String) -> String {
|
||||
|
||||
guard let xmlBaseURL else {
|
||||
return urlString
|
||||
}
|
||||
|
||||
if let resolvedURL = URL(string: urlString, relativeTo: xmlBaseURL) {
|
||||
return resolvedURL.absoluteString
|
||||
}
|
||||
return urlString
|
||||
}
|
||||
|
||||
func enclosure(_ urlString: String, _ attributes: StringDictionary) -> RSSEnclosure? {
|
||||
|
||||
let enclosure = RSSEnclosure(url: urlString)
|
||||
@ -351,7 +376,7 @@ extension AtomParser: SAXParserDelegate {
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.feed) {
|
||||
addFeedLanguage()
|
||||
addFeedAttributes()
|
||||
}
|
||||
|
||||
saxParser.beginStoringCharacters()
|
||||
|
@ -109,4 +109,22 @@ final class AtomParserTests: XCTestCase {
|
||||
XCTAssertNotEqual(article.title, "Default Title")
|
||||
}
|
||||
}
|
||||
|
||||
func testLinkElementsWithRelativeURLs() {
|
||||
|
||||
// This feed has <link> elements that look like this…
|
||||
// <link href="/en/publish/2022/07/01/great-moments-in-document-history-reimagining-the-declaration-of-independence-as-pdf"/>
|
||||
// …and it also has, in the feed declaration…
|
||||
// xml:base="https://blog.adobe.com"
|
||||
// …and so the <link> values should be parsed as (for example):
|
||||
// https://blog.adobe.com/en/publish/2022/07/01/great-moments-in-document-history-reimagining-the-declaration-of-independence-as-pdf
|
||||
// Issue: https://github.com/Ranchero-Software/NetNewsWire/issues/3662
|
||||
|
||||
let d = parserData("adobe", "atom", "https://blog.adobe.com/feed.xml")
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertTrue(article.url!.hasPrefix("https://blog.adobe.com/en/publish/20"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
1
Modules/Parser/Tests/ParserTests/Resources/adobe.atom
Normal file
1
Modules/Parser/Tests/ParserTests/Resources/adobe.atom
Normal file
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user