393 lines
9.3 KiB
Swift
393 lines
9.3 KiB
Swift
//
|
|
// HTMLMetadata.swift
|
|
//
|
|
//
|
|
// Created by Brent Simmons on 9/22/24.
|
|
//
|
|
|
|
import Foundation
|
|
import SAX
|
|
|
|
public final class HTMLMetadata {
|
|
|
|
public let baseURLString: String
|
|
public let tags: [HTMLTag]
|
|
public let favicons: [HTMLMetadataFavicon]?
|
|
public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]?
|
|
public let feedLinks: [HTMLMetadataFeedLink]?
|
|
public let openGraphProperties: HTMLOpenGraphProperties?
|
|
public let twitterProperties: HTMLTwitterProperties?
|
|
|
|
init(_ urlString: String, _ tags: [HTMLTag]) {
|
|
|
|
self.baseURLString = urlString
|
|
self.tags = tags
|
|
|
|
self.favicons = Self.resolvedFaviconLinks(urlString, tags)
|
|
|
|
if let appleTouchIconTags = Self.appleTouchIconTags(tags) {
|
|
self.appleTouchIcons = appleTouchIconTags.map { htmlTag in
|
|
HTMLMetadataAppleTouchIcon(urlString, htmlTag)
|
|
}
|
|
}
|
|
else {
|
|
self.appleTouchIcons = nil
|
|
}
|
|
|
|
if let feedLinkTags = Self.feedLinkTags(tags) {
|
|
self.feedLinks = feedLinkTags.map { htmlTag in
|
|
HTMLMetadataFeedLink(urlString, htmlTag)
|
|
}
|
|
}
|
|
else {
|
|
self.feedLinks = nil
|
|
}
|
|
|
|
self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags)
|
|
self.twitterProperties = HTMLTwitterProperties(urlString, tags)
|
|
}
|
|
|
|
static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? {
|
|
|
|
let linkTags = linkTagsWithMatchingRel("icon")
|
|
var seenHrefs = [String]()
|
|
|
|
let favicons = linkTags.compactMap { htmlTag in
|
|
|
|
let favicon = HTMLMetadataFavicon(baseURLString, htmlTag)
|
|
guard let urlString = favicon.urlString else {
|
|
return nil
|
|
}
|
|
guard !seenHrefs.contains(urlString) else {
|
|
return nil
|
|
}
|
|
seenHrefs.append(urlString)
|
|
return favicon
|
|
}
|
|
|
|
return favicons.isEmpty ? nil : favicons
|
|
}
|
|
|
|
static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
|
|
|
guard let linkTags = linkTags(tags) else {
|
|
return nil
|
|
}
|
|
|
|
let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], tags)
|
|
return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags
|
|
}
|
|
|
|
static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
|
|
|
let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else {
|
|
return nil
|
|
}
|
|
|
|
let feedLinkTags = alternateLinkTags.filter { tag in
|
|
|
|
guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else {
|
|
return false
|
|
}
|
|
guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
return feedLinkTags.isEmpty ? nil : feedLinkTags
|
|
}
|
|
|
|
static func typeIsFeedType(_ type: String) -> Bool {
|
|
|
|
let lowerType = type.lowercased()
|
|
return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json")
|
|
}
|
|
|
|
static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
|
|
|
let linkTags = tags.filter { $0.tagType == .link }
|
|
return linkTags.isEmpty ? nil : linkTags
|
|
}
|
|
|
|
static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? {
|
|
|
|
// Case-insensitive; matches a whitespace-delimited word
|
|
|
|
guard let linkTags = linkTags(tags) else {
|
|
return nil
|
|
}
|
|
|
|
let tagsWithURLString = linkTags.filter { tag in
|
|
guard let urlString = urlStringFromDictionary(tag.attributes), !urlString.isEmpty else {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
if tagsWithURLString.isEmpty {
|
|
return nil
|
|
}
|
|
|
|
let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString)
|
|
return matchingTags.isEmpty ? nil : matchingTags
|
|
}
|
|
|
|
static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? {
|
|
|
|
let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() }
|
|
|
|
let matchingTags: [HTMLTag] = {
|
|
|
|
tags.filter { tag in
|
|
|
|
guard let relValue = relValue(tag.attributes) else {
|
|
return false
|
|
}
|
|
|
|
let relValues = relValue.componentsSeparatedByCharactersInSet(.whitespacesAndNewlines)
|
|
for oneRelValue in relValues {
|
|
let oneLowerRelValue = oneRelValue.lowercased()
|
|
|
|
for lowerValueToMatch in lowerValuesToMatch {
|
|
if lowerValueToMatch == oneLowerRelValue {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
}
|
|
|
|
return matchingTags.isEmpty ? nil : matchingTags
|
|
}
|
|
}
|
|
|
|
public final class HTMLMetadataAppleTouchIcon {
|
|
|
|
public let rel: String?
|
|
public let sizes: String?
|
|
public let size: CGSize?
|
|
public let urlString: String? // Absolute
|
|
|
|
init(_ urlString: String, _ tag: HTMLTag) {
|
|
|
|
guard let attributes = tag.attributes else {
|
|
self.rel = nil
|
|
self.sizes = nil
|
|
self.size = nil
|
|
self.urlString = nil
|
|
return
|
|
}
|
|
|
|
self.rel = attributes.object(forCaseInsensitiveKey: "rel")
|
|
self.urlString = absoluteURLStringWithDictionary(attributes)
|
|
|
|
guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else {
|
|
self.sizes = nil
|
|
self.size = nil
|
|
return
|
|
}
|
|
self.sizes = sizes
|
|
|
|
let size: CGSize? = {
|
|
let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x"))
|
|
guard sizeComponents.count == 2 else {
|
|
return nil
|
|
}
|
|
let width = Double(sizeComponents[0])
|
|
let height = Double(sizeComponents[1])
|
|
return CGSize(width: width, height: height)
|
|
}()
|
|
|
|
self.size = size
|
|
}
|
|
}
|
|
|
|
public final class HTMLMetadataFeedLink {
|
|
|
|
public let title: String?
|
|
public let type: String?
|
|
public let urlString: String? // Absolute
|
|
|
|
init(_ urlString: String, _ tag: HTMLTag) {
|
|
|
|
guard let attributes = tag.attributes else {
|
|
self.title = nil
|
|
self.type = nil
|
|
self.urlString = nil
|
|
return
|
|
}
|
|
|
|
self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
|
|
self.title = attributes.object(forCaseInsensitiveKey: "title")
|
|
self.type = attributes.object(forCaseInsensitiveKey: "type")
|
|
}
|
|
}
|
|
|
|
public final class HTMLMetadataFavicon {
|
|
|
|
public let type: String?
|
|
public let urlString: String?
|
|
|
|
init(_ urlString: String, _ tag: HTMLTag) {
|
|
|
|
guard let attributes = tag.attributes else {
|
|
self.type = nil
|
|
self.urlString = nil
|
|
return
|
|
}
|
|
|
|
self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
|
|
self.type = attributes.object(forCaseInsensitiveKey: "type")
|
|
}
|
|
}
|
|
|
|
public final class HTMLOpenGraphProperties {
|
|
|
|
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
|
|
// See http://ogp.me/
|
|
|
|
public let image: HTMLOpenGraphImage?
|
|
|
|
init(_ urlString: String, _ tags: [HTMLTag]) {
|
|
|
|
self.image = Self.parse(tags)
|
|
}
|
|
}
|
|
|
|
private extension HTMLOpenGraphProperties {
|
|
|
|
private static let ogPrefix = "og:"
|
|
|
|
struct OGKey {
|
|
static let property = "property"
|
|
static let content = "content"
|
|
}
|
|
|
|
struct OGValue {
|
|
static let ogImage = "og:image"
|
|
static let ogImageURL = "og:image:url"
|
|
static let ogImageSecureURL = "og:image:secure_url"
|
|
static let ogImageType = "og:image:type"
|
|
static let ogImageAlt = "og:image:alt"
|
|
static let ogImageWidth = "og:image:width"
|
|
static let ogImageHeight = "og:image:height"
|
|
}
|
|
|
|
static func parse(_ tags: [HTMLTag]) -> [HTMLOpenGraphImage]? {
|
|
|
|
let metaTags = tags.filter { $0.tagType == .meta }
|
|
if metaTags.isEmpty {
|
|
return nil
|
|
}
|
|
|
|
// HTMLOpenGraphImage properties to fill in.
|
|
var url: String?
|
|
var secureURL: String?
|
|
var mimeType: String?
|
|
var width: CGFloat?
|
|
var height: CGFloat?
|
|
var altText: String?
|
|
|
|
for tag in metaTags {
|
|
|
|
guard let attributes = tag.attributes else {
|
|
continue
|
|
}
|
|
guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else {
|
|
continue
|
|
}
|
|
guard let content = attributes[OGKey.content] else {
|
|
continue
|
|
}
|
|
|
|
if propertyName == OGValue.ogImage {
|
|
url = content
|
|
}
|
|
else if propertyName == OGValue.ogImageURL {
|
|
url = content
|
|
}
|
|
else if propertyName == OGValue.ogImageSecureURL {
|
|
secureURL = content
|
|
}
|
|
else if propertyName == OGValue.ogImageType {
|
|
mimeType = content
|
|
}
|
|
else if propertyName == OGValue.ogImageAlt {
|
|
altText = content
|
|
}
|
|
else if propertyName == OGValue.ogImageWidth {
|
|
width = CGFloat(content)
|
|
}
|
|
else if propertyName == OGValue.ogImageHeight {
|
|
height = CGFloat(content)
|
|
}
|
|
}
|
|
|
|
if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil {
|
|
return nil
|
|
}
|
|
|
|
return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText)
|
|
}
|
|
}
|
|
|
|
public final class HTMLOpenGraphImage {
|
|
|
|
public let url : String?
|
|
public let secureURL: String?
|
|
public let mimeType: String?
|
|
public let width: CGFloat?
|
|
public let height: CGFloat?
|
|
public let altText: String?
|
|
|
|
init(url: String?, secureURL: String?, mimeType: String, width: CGFloat?, height: CGFloat?, altText: String?) {
|
|
|
|
self.url = url
|
|
self.secureURL = secureURL
|
|
self.mimeType = mimeType
|
|
self.width = width
|
|
self.height = height
|
|
self.altText = altText
|
|
}
|
|
}
|
|
|
|
public final class HTMLTwitterProperties {
|
|
|
|
public let imageURL: String? // twitter:image:src
|
|
|
|
private struct TwitterKey {
|
|
static let name = "name"
|
|
static let content = "content"
|
|
}
|
|
|
|
private struct TwitterValue {
|
|
static let imageSrc = "twitter:image:src"
|
|
}
|
|
|
|
init(_ urlString: String, _ tags: [HTMLTag]) {
|
|
|
|
let imageURL: String = {
|
|
for tag in tags {
|
|
guard tag.tagType == .meta else {
|
|
continue
|
|
}
|
|
guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else {
|
|
continue
|
|
}
|
|
guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else {
|
|
continue
|
|
}
|
|
return content
|
|
}
|
|
|
|
return nil
|
|
}()
|
|
|
|
self.imageURL = imageURL
|
|
}
|
|
}
|
|
|