diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift b/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift index 4ffa7d732..ab96da818 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift @@ -6,36 +6,98 @@ // import Foundation +import FoundationExtras import SAX public final class HTMLMetadataParser { - private let parserData: ParserData private var tags = [HTMLTag]() - private var htmlMetadata: HTMLMetadata? = nil public static func metadata(with parserData: ParserData) -> HTMLMetadata { - let parser = HTMLMetadataParser(parserData) - parser.parse() - return parser.htmlMetadata - } - - init(_ parserData: ParserData) { - - self.parserData = parserData + HTMLMetadataParser().parse(parserData) } } private extension HTMLMetadataParser { - func parse() { + func parse(_ parserData: ParserData) -> HTMLMetadata { - self.tags = [HTMLTag]() + tags = [HTMLTag]() let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data) htmlParser.parse() - self.htmlMetadata = HTMLMetadata(parserData.url, tags) + return HTMLMetadata(parserData.url, tags) + } +} + +extension HTMLMetadataParser: SAXHTMLParserDelegate { + + private struct HTMLName { + + static let link = "link".utf8CString + static let meta = "meta".utf8CString + } + + private struct HTMLKey { + + static let href = "href" + static let src = "src" + static let rel = "rel" + } + + private func link(with attributes: StringDictionary) -> String? { + + if let link = attributes.object(forCaseInsensitiveKey: HTMLKey.href) { + return link + } + + return attributes.object(forCaseInsensitiveKey: HTMLKey.src) + } + + private func handleLinkAttributes(_ attributes: StringDictionary) { + + guard let rel = attributes.object(forCaseInsensitiveKey: HTMLKey.rel), !rel.isEmpty else { + return + } + guard let link = link(with: attributes), !link.isEmpty else { + return + } + + let tag = HTMLTag(tagType: .link, attributes: attributes) + tags.append(tag) + } + + private func handleMetaAttributes(_ attributes: StringDictionary) { + + let tag = HTMLTag(tagType: .meta, attributes: attributes) + tags.append(tag) + } + + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { + + if SAXEqualTags(name, HTMLName.link) { + let d = saxHTMLParser.attributesDictionary(attributes) + if let d, !d.isEmpty { + handleLinkAttributes(d) + } + } + else if SAXEqualTags(name, HTMLName.meta) { + let d = saxHTMLParser.attributesDictionary(attributes) + if let d, !d.isEmpty { + handleMetaAttributes(d) + } + } + } + + public func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) { + + // Nothing to do + } + + public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { + + // Nothing to do } }