Get HTMLMetadataParser working.

This commit is contained in:
Brent Simmons 2024-09-23 21:38:23 -07:00
parent 21848049f6
commit 3bc02a78a3

View File

@ -6,36 +6,98 @@
//
import Foundation
import FoundationExtras
import SAX
public final class HTMLMetadataParser {
private let parserData: ParserData
private var tags = [HTMLTag]()
private var htmlMetadata: HTMLMetadata? = nil
public static func metadata(with parserData: ParserData) -> HTMLMetadata {
let parser = HTMLMetadataParser(parserData)
parser.parse()
return parser.htmlMetadata
}
init(_ parserData: ParserData) {
self.parserData = parserData
HTMLMetadataParser().parse(parserData)
}
}
private extension HTMLMetadataParser {
func parse() {
func parse(_ parserData: ParserData) -> HTMLMetadata {
self.tags = [HTMLTag]()
tags = [HTMLTag]()
let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data)
htmlParser.parse()
self.htmlMetadata = HTMLMetadata(parserData.url, tags)
return HTMLMetadata(parserData.url, tags)
}
}
extension HTMLMetadataParser: SAXHTMLParserDelegate {
private struct HTMLName {
static let link = "link".utf8CString
static let meta = "meta".utf8CString
}
private struct HTMLKey {
static let href = "href"
static let src = "src"
static let rel = "rel"
}
private func link(with attributes: StringDictionary) -> String? {
if let link = attributes.object(forCaseInsensitiveKey: HTMLKey.href) {
return link
}
return attributes.object(forCaseInsensitiveKey: HTMLKey.src)
}
private func handleLinkAttributes(_ attributes: StringDictionary) {
guard let rel = attributes.object(forCaseInsensitiveKey: HTMLKey.rel), !rel.isEmpty else {
return
}
guard let link = link(with: attributes), !link.isEmpty else {
return
}
let tag = HTMLTag(tagType: .link, attributes: attributes)
tags.append(tag)
}
private func handleMetaAttributes(_ attributes: StringDictionary) {
let tag = HTMLTag(tagType: .meta, attributes: attributes)
tags.append(tag)
}
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
if SAXEqualTags(name, HTMLName.link) {
let d = saxHTMLParser.attributesDictionary(attributes)
if let d, !d.isEmpty {
handleLinkAttributes(d)
}
}
else if SAXEqualTags(name, HTMLName.meta) {
let d = saxHTMLParser.attributesDictionary(attributes)
if let d, !d.isEmpty {
handleMetaAttributes(d)
}
}
}
public func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) {
// Nothing to do
}
public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) {
// Nothing to do
}
}