Create first draft of HTMLMetadata.

This commit is contained in:
Brent Simmons 2024-09-22 21:40:52 -07:00
parent a2fc8b5dec
commit 3e6e843dc8
3 changed files with 396 additions and 3 deletions

View File

@ -0,0 +1,392 @@
//
// HTMLMetadata.swift
//
//
// Created by Brent Simmons on 9/22/24.
//
import Foundation
import SAX
public final class HTMLMetadata {
public let baseURLString: String
public let tags: [HTMLTag]
public let favicons: [HTMLMetadataFavicon]?
public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]?
public let feedLinks: [HTMLMetadataFeedLink]?
public let openGraphProperties: HTMLOpenGraphProperties?
public let twitterProperties: HTMLTwitterProperties?
init(_ urlString: String, _ tags: [HTMLTag]) {
self.baseURLString = urlString
self.tags = tags
self.favicons = Self.resolvedFaviconLinks(urlString, tags)
if let appleTouchIconTags = Self.appleTouchIconTags(tags) {
self.appleTouchIcons = appleTouchIconTags.map { htmlTag in
HTMLMetadataAppleTouchIcon(urlString, htmlTag)
}
}
else {
self.appleTouchIcons = nil
}
if let feedLinkTags = Self.feedLinkTags(tags) {
self.feedLinks = feedLinkTags.map { htmlTag in
HTMLMetadataFeedLink(urlString, htmlTag)
}
}
else {
self.feedLinks = nil
}
self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags)
self.twitterProperties = HTMLTwitterProperties(urlString, tags)
}
static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? {
let linkTags = linkTagsWithMatchingRel("icon")
var seenHrefs = [String]()
let favicons = linkTags.compactMap { htmlTag in
let favicon = HTMLMetadataFavicon(baseURLString, htmlTag)
guard let urlString = favicon.urlString else {
return nil
}
guard !seenHrefs.contains(urlString) else {
return nil
}
seenHrefs.append(urlString)
return favicon
}
return favicons.isEmpty ? nil : favicons
}
static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
guard let linkTags = linkTags(tags) else {
return nil
}
let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], tags)
return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags
}
static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else {
return nil
}
let feedLinkTags = alternateLinkTags.filter { tag in
guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else {
return false
}
guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
return false
}
return true
}
return feedLinkTags.isEmpty ? nil : feedLinkTags
}
static func typeIsFeedType(_ type: String) -> Bool {
let lowerType = type.lowercased()
return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json")
}
static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
let linkTags = tags.filter { $0.tagType == .link }
return linkTags.isEmpty ? nil : linkTags
}
static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? {
// Case-insensitive; matches a whitespace-delimited word
guard let linkTags = linkTags(tags) else {
return nil
}
let tagsWithURLString = linkTags.filter { tag in
guard let urlString = urlStringFromDictionary(tag.attributes), !urlString.isEmpty else {
return false
}
return true
}
if tagsWithURLString.isEmpty {
return nil
}
let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString)
return matchingTags.isEmpty ? nil : matchingTags
}
static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? {
let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() }
let matchingTags: [HTMLTag] = {
tags.filter { tag in
guard let relValue = relValue(tag.attributes) else {
return false
}
let relValues = relValue.componentsSeparatedByCharactersInSet(.whitespacesAndNewlines)
for oneRelValue in relValues {
let oneLowerRelValue = oneRelValue.lowercased()
for lowerValueToMatch in lowerValuesToMatch {
if lowerValueToMatch == oneLowerRelValue {
return true
}
}
}
return false
}
}
return matchingTags.isEmpty ? nil : matchingTags
}
}
public final class HTMLMetadataAppleTouchIcon {
public let rel: String?
public let sizes: String?
public let size: CGSize?
public let urlString: String? // Absolute
init(_ urlString: String, _ tag: HTMLTag) {
guard let attributes = tag.attributes else {
self.rel = nil
self.sizes = nil
self.size = nil
self.urlString = nil
return
}
self.rel = attributes.object(forCaseInsensitiveKey: "rel")
self.urlString = absoluteURLStringWithDictionary(attributes)
guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else {
self.sizes = nil
self.size = nil
return
}
self.sizes = sizes
let size: CGSize? = {
let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x"))
guard sizeComponents.count == 2 else {
return nil
}
let width = Double(sizeComponents[0])
let height = Double(sizeComponents[1])
return CGSize(width: width, height: height)
}()
self.size = size
}
}
public final class HTMLMetadataFeedLink {
public let title: String?
public let type: String?
public let urlString: String? // Absolute
init(_ urlString: String, _ tag: HTMLTag) {
guard let attributes = tag.attributes else {
self.title = nil
self.type = nil
self.urlString = nil
return
}
self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
self.title = attributes.object(forCaseInsensitiveKey: "title")
self.type = attributes.object(forCaseInsensitiveKey: "type")
}
}
public final class HTMLMetadataFavicon {
public let type: String?
public let urlString: String?
init(_ urlString: String, _ tag: HTMLTag) {
guard let attributes = tag.attributes else {
self.type = nil
self.urlString = nil
return
}
self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
self.type = attributes.object(forCaseInsensitiveKey: "type")
}
}
public final class HTMLOpenGraphProperties {
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
// See http://ogp.me/
public let image: HTMLOpenGraphImage?
init(_ urlString: String, _ tags: [HTMLTag]) {
self.image = Self.parse(tags)
}
}
private extension HTMLOpenGraphProperties {
private static let ogPrefix = "og:"
struct OGKey {
static let property = "property"
static let content = "content"
}
struct OGValue {
static let ogImage = "og:image"
static let ogImageURL = "og:image:url"
static let ogImageSecureURL = "og:image:secure_url"
static let ogImageType = "og:image:type"
static let ogImageAlt = "og:image:alt"
static let ogImageWidth = "og:image:width"
static let ogImageHeight = "og:image:height"
}
static func parse(_ tags: [HTMLTag]) -> [HTMLOpenGraphImage]? {
let metaTags = tags.filter { $0.tagType == .meta }
if metaTags.isEmpty {
return nil
}
// HTMLOpenGraphImage properties to fill in.
var url: String?
var secureURL: String?
var mimeType: String?
var width: CGFloat?
var height: CGFloat?
var altText: String?
for tag in metaTags {
guard let attributes = tag.attributes else {
continue
}
guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else {
continue
}
guard let content = attributes[OGKey.content] else {
continue
}
if propertyName == OGValue.ogImage {
url = content
}
else if propertyName == OGValue.ogImageURL {
url = content
}
else if propertyName == OGValue.ogImageSecureURL {
secureURL = content
}
else if propertyName == OGValue.ogImageType {
mimeType = content
}
else if propertyName == OGValue.ogImageAlt {
altText = content
}
else if propertyName == OGValue.ogImageWidth {
width = CGFloat(content)
}
else if propertyName == OGValue.ogImageHeight {
height = CGFloat(content)
}
}
if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil {
return nil
}
return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText)
}
}
public final class HTMLOpenGraphImage {
public let url : String?
public let secureURL: String?
public let mimeType: String?
public let width: CGFloat?
public let height: CGFloat?
public let altText: String?
init(url: String?, secureURL: String?, mimeType: String, width: CGFloat?, height: CGFloat?, altText: String?) {
self.url = url
self.secureURL = secureURL
self.mimeType = mimeType
self.width = width
self.height = height
self.altText = altText
}
}
public final class HTMLTwitterProperties {
public let imageURL: String? // twitter:image:src
private struct TwitterKey {
static let name = "name"
static let content = "content"
}
private struct TwitterValue {
static let imageSrc = "twitter:image:src"
}
init(_ urlString: String, _ tags: [HTMLTag]) {
let imageURL: String = {
for tag in tags {
guard tag.tagType == .meta else {
continue
}
guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else {
continue
}
guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else {
continue
}
return content
}
return nil
}()
self.imageURL = imageURL
}
}

View File

@ -7,6 +7,8 @@
import Foundation
public typealias HTMLTagAttributes = [String: String]
public struct HTMLTag: Sendable {
public enum TagType: Sendable {
@ -15,9 +17,9 @@ public struct HTMLTag: Sendable {
}
public let tagType: TagType
public let attributes: [String: String]?
public let attributes: HTMLTagAttributes?
public init(tagType: TagType, attributes: [String : String]?) {
public init(tagType: TagType, attributes: HTMLTagAttributes?) {
self.tagType = tagType
self.attributes = attributes
}

View File

@ -8,7 +8,6 @@
import XCTest
import HTMLParser
import SAX
import libxml2
class HTMLLinkTests: XCTestCase {