Create first draft of HTMLMetadata.
This commit is contained in:
parent
a2fc8b5dec
commit
3e6e843dc8
392
Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift
Normal file
392
Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift
Normal file
@ -0,0 +1,392 @@
|
||||
//
|
||||
// HTMLMetadata.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/22/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SAX
|
||||
|
||||
public final class HTMLMetadata {
|
||||
|
||||
public let baseURLString: String
|
||||
public let tags: [HTMLTag]
|
||||
public let favicons: [HTMLMetadataFavicon]?
|
||||
public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]?
|
||||
public let feedLinks: [HTMLMetadataFeedLink]?
|
||||
public let openGraphProperties: HTMLOpenGraphProperties?
|
||||
public let twitterProperties: HTMLTwitterProperties?
|
||||
|
||||
init(_ urlString: String, _ tags: [HTMLTag]) {
|
||||
|
||||
self.baseURLString = urlString
|
||||
self.tags = tags
|
||||
|
||||
self.favicons = Self.resolvedFaviconLinks(urlString, tags)
|
||||
|
||||
if let appleTouchIconTags = Self.appleTouchIconTags(tags) {
|
||||
self.appleTouchIcons = appleTouchIconTags.map { htmlTag in
|
||||
HTMLMetadataAppleTouchIcon(urlString, htmlTag)
|
||||
}
|
||||
}
|
||||
else {
|
||||
self.appleTouchIcons = nil
|
||||
}
|
||||
|
||||
if let feedLinkTags = Self.feedLinkTags(tags) {
|
||||
self.feedLinks = feedLinkTags.map { htmlTag in
|
||||
HTMLMetadataFeedLink(urlString, htmlTag)
|
||||
}
|
||||
}
|
||||
else {
|
||||
self.feedLinks = nil
|
||||
}
|
||||
|
||||
self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags)
|
||||
self.twitterProperties = HTMLTwitterProperties(urlString, tags)
|
||||
}
|
||||
|
||||
static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? {
|
||||
|
||||
let linkTags = linkTagsWithMatchingRel("icon")
|
||||
var seenHrefs = [String]()
|
||||
|
||||
let favicons = linkTags.compactMap { htmlTag in
|
||||
|
||||
let favicon = HTMLMetadataFavicon(baseURLString, htmlTag)
|
||||
guard let urlString = favicon.urlString else {
|
||||
return nil
|
||||
}
|
||||
guard !seenHrefs.contains(urlString) else {
|
||||
return nil
|
||||
}
|
||||
seenHrefs.append(urlString)
|
||||
return favicon
|
||||
}
|
||||
|
||||
return favicons.isEmpty ? nil : favicons
|
||||
}
|
||||
|
||||
static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
guard let linkTags = linkTags(tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], tags)
|
||||
return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags
|
||||
}
|
||||
|
||||
static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let feedLinkTags = alternateLinkTags.filter { tag in
|
||||
|
||||
guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else {
|
||||
return false
|
||||
}
|
||||
guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
return feedLinkTags.isEmpty ? nil : feedLinkTags
|
||||
}
|
||||
|
||||
static func typeIsFeedType(_ type: String) -> Bool {
|
||||
|
||||
let lowerType = type.lowercased()
|
||||
return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json")
|
||||
}
|
||||
|
||||
static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
let linkTags = tags.filter { $0.tagType == .link }
|
||||
return linkTags.isEmpty ? nil : linkTags
|
||||
}
|
||||
|
||||
static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
// Case-insensitive; matches a whitespace-delimited word
|
||||
|
||||
guard let linkTags = linkTags(tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let tagsWithURLString = linkTags.filter { tag in
|
||||
guard let urlString = urlStringFromDictionary(tag.attributes), !urlString.isEmpty else {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
if tagsWithURLString.isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString)
|
||||
return matchingTags.isEmpty ? nil : matchingTags
|
||||
}
|
||||
|
||||
static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() }
|
||||
|
||||
let matchingTags: [HTMLTag] = {
|
||||
|
||||
tags.filter { tag in
|
||||
|
||||
guard let relValue = relValue(tag.attributes) else {
|
||||
return false
|
||||
}
|
||||
|
||||
let relValues = relValue.componentsSeparatedByCharactersInSet(.whitespacesAndNewlines)
|
||||
for oneRelValue in relValues {
|
||||
let oneLowerRelValue = oneRelValue.lowercased()
|
||||
|
||||
for lowerValueToMatch in lowerValuesToMatch {
|
||||
if lowerValueToMatch == oneLowerRelValue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return matchingTags.isEmpty ? nil : matchingTags
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLMetadataAppleTouchIcon {
|
||||
|
||||
public let rel: String?
|
||||
public let sizes: String?
|
||||
public let size: CGSize?
|
||||
public let urlString: String? // Absolute
|
||||
|
||||
init(_ urlString: String, _ tag: HTMLTag) {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
self.rel = nil
|
||||
self.sizes = nil
|
||||
self.size = nil
|
||||
self.urlString = nil
|
||||
return
|
||||
}
|
||||
|
||||
self.rel = attributes.object(forCaseInsensitiveKey: "rel")
|
||||
self.urlString = absoluteURLStringWithDictionary(attributes)
|
||||
|
||||
guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else {
|
||||
self.sizes = nil
|
||||
self.size = nil
|
||||
return
|
||||
}
|
||||
self.sizes = sizes
|
||||
|
||||
let size: CGSize? = {
|
||||
let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x"))
|
||||
guard sizeComponents.count == 2 else {
|
||||
return nil
|
||||
}
|
||||
let width = Double(sizeComponents[0])
|
||||
let height = Double(sizeComponents[1])
|
||||
return CGSize(width: width, height: height)
|
||||
}()
|
||||
|
||||
self.size = size
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLMetadataFeedLink {
|
||||
|
||||
public let title: String?
|
||||
public let type: String?
|
||||
public let urlString: String? // Absolute
|
||||
|
||||
init(_ urlString: String, _ tag: HTMLTag) {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
self.title = nil
|
||||
self.type = nil
|
||||
self.urlString = nil
|
||||
return
|
||||
}
|
||||
|
||||
self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
|
||||
self.title = attributes.object(forCaseInsensitiveKey: "title")
|
||||
self.type = attributes.object(forCaseInsensitiveKey: "type")
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLMetadataFavicon {
|
||||
|
||||
public let type: String?
|
||||
public let urlString: String?
|
||||
|
||||
init(_ urlString: String, _ tag: HTMLTag) {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
self.type = nil
|
||||
self.urlString = nil
|
||||
return
|
||||
}
|
||||
|
||||
self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
|
||||
self.type = attributes.object(forCaseInsensitiveKey: "type")
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLOpenGraphProperties {
|
||||
|
||||
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
|
||||
// See http://ogp.me/
|
||||
|
||||
public let image: HTMLOpenGraphImage?
|
||||
|
||||
init(_ urlString: String, _ tags: [HTMLTag]) {
|
||||
|
||||
self.image = Self.parse(tags)
|
||||
}
|
||||
}
|
||||
|
||||
private extension HTMLOpenGraphProperties {
|
||||
|
||||
private static let ogPrefix = "og:"
|
||||
|
||||
struct OGKey {
|
||||
static let property = "property"
|
||||
static let content = "content"
|
||||
}
|
||||
|
||||
struct OGValue {
|
||||
static let ogImage = "og:image"
|
||||
static let ogImageURL = "og:image:url"
|
||||
static let ogImageSecureURL = "og:image:secure_url"
|
||||
static let ogImageType = "og:image:type"
|
||||
static let ogImageAlt = "og:image:alt"
|
||||
static let ogImageWidth = "og:image:width"
|
||||
static let ogImageHeight = "og:image:height"
|
||||
}
|
||||
|
||||
static func parse(_ tags: [HTMLTag]) -> [HTMLOpenGraphImage]? {
|
||||
|
||||
let metaTags = tags.filter { $0.tagType == .meta }
|
||||
if metaTags.isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
// HTMLOpenGraphImage properties to fill in.
|
||||
var url: String?
|
||||
var secureURL: String?
|
||||
var mimeType: String?
|
||||
var width: CGFloat?
|
||||
var height: CGFloat?
|
||||
var altText: String?
|
||||
|
||||
for tag in metaTags {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
continue
|
||||
}
|
||||
guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else {
|
||||
continue
|
||||
}
|
||||
guard let content = attributes[OGKey.content] else {
|
||||
continue
|
||||
}
|
||||
|
||||
if propertyName == OGValue.ogImage {
|
||||
url = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageURL {
|
||||
url = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageSecureURL {
|
||||
secureURL = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageType {
|
||||
mimeType = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageAlt {
|
||||
altText = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageWidth {
|
||||
width = CGFloat(content)
|
||||
}
|
||||
else if propertyName == OGValue.ogImageHeight {
|
||||
height = CGFloat(content)
|
||||
}
|
||||
}
|
||||
|
||||
if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText)
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLOpenGraphImage {
|
||||
|
||||
public let url : String?
|
||||
public let secureURL: String?
|
||||
public let mimeType: String?
|
||||
public let width: CGFloat?
|
||||
public let height: CGFloat?
|
||||
public let altText: String?
|
||||
|
||||
init(url: String?, secureURL: String?, mimeType: String, width: CGFloat?, height: CGFloat?, altText: String?) {
|
||||
|
||||
self.url = url
|
||||
self.secureURL = secureURL
|
||||
self.mimeType = mimeType
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.altText = altText
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLTwitterProperties {
|
||||
|
||||
public let imageURL: String? // twitter:image:src
|
||||
|
||||
private struct TwitterKey {
|
||||
static let name = "name"
|
||||
static let content = "content"
|
||||
}
|
||||
|
||||
private struct TwitterValue {
|
||||
static let imageSrc = "twitter:image:src"
|
||||
}
|
||||
|
||||
init(_ urlString: String, _ tags: [HTMLTag]) {
|
||||
|
||||
let imageURL: String = {
|
||||
for tag in tags {
|
||||
guard tag.tagType == .meta else {
|
||||
continue
|
||||
}
|
||||
guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else {
|
||||
continue
|
||||
}
|
||||
guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else {
|
||||
continue
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
|
||||
self.imageURL = imageURL
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public typealias HTMLTagAttributes = [String: String]
|
||||
|
||||
public struct HTMLTag: Sendable {
|
||||
|
||||
public enum TagType: Sendable {
|
||||
@ -15,9 +17,9 @@ public struct HTMLTag: Sendable {
|
||||
}
|
||||
|
||||
public let tagType: TagType
|
||||
public let attributes: [String: String]?
|
||||
public let attributes: HTMLTagAttributes?
|
||||
|
||||
public init(tagType: TagType, attributes: [String : String]?) {
|
||||
public init(tagType: TagType, attributes: HTMLTagAttributes?) {
|
||||
self.tagType = tagType
|
||||
self.attributes = attributes
|
||||
}
|
@ -8,7 +8,6 @@
|
||||
|
||||
import XCTest
|
||||
import HTMLParser
|
||||
import SAX
|
||||
import libxml2
|
||||
|
||||
class HTMLLinkTests: XCTestCase {
|
||||
|
Loading…
x
Reference in New Issue
Block a user