//Made by Lumaa import Foundation import SwiftSoup import SwiftUI private enum CodingKeys: CodingKey { case htmlValue, asMarkdown, asRawText, statusesURLs, links } public struct HTMLString: Codable, Equatable, Hashable, @unchecked Sendable { public var htmlValue: String = "" public var asMarkdown: String = "" public var asRawText: String = "" public var statusesURLs = [URL]() public private(set) var links = [Link]() public var asSafeMarkdownAttributedString: AttributedString = .init() private var main_regex: NSRegularExpression? private var underscore_regex: NSRegularExpression? public init(from decoder: Decoder) { var alreadyDecoded = false do { let container = try decoder.singleValueContainer() htmlValue = try container.decode(String.self) } catch { do { alreadyDecoded = true let container = try decoder.container(keyedBy: CodingKeys.self) htmlValue = try container.decode(String.self, forKey: .htmlValue) asMarkdown = try container.decode(String.self, forKey: .asMarkdown) asRawText = try container.decode(String.self, forKey: .asRawText) statusesURLs = try container.decode([URL].self, forKey: .statusesURLs) links = try container.decode([Link].self, forKey: .links) } catch { htmlValue = "" } } if !alreadyDecoded { // https://daringfireball.net/projects/markdown/syntax // Pre-escape \ ` _ * ~ and [ as these are the only // characters the markdown parser uses when it renders // to attributed text. Note that ~ for strikethrough is // not documented in the syntax docs but is used by // AttributedString. main_regex = try? NSRegularExpression(pattern: "([\\*\\`\\~\\[\\\\])", options: .caseInsensitive) // don't escape underscores that are between colons, they are most likely custom emoji underscore_regex = try? NSRegularExpression(pattern: "(?!\\B:[^:]*)(_)(?![^:]*:\\B)", options: .caseInsensitive) asMarkdown = "" do { let document: Document = try SwiftSoup.parse(htmlValue) handleNode(node: document) document.outputSettings(OutputSettings().prettyPrint(pretty: false)) try document.select("br").after("\n") try document.select("p").after("\n\n") let html = try document.html() var text = try SwiftSoup.clean(html, "", Whitelist.none(), OutputSettings().prettyPrint(pretty: false)) ?? "" // Remove the two last line break added after the last paragraph. if text.hasSuffix("\n\n") { _ = text.removeLast() _ = text.removeLast() } asRawText = text if asMarkdown.hasPrefix("\n") { _ = asMarkdown.removeFirst() } } catch { asRawText = htmlValue } } do { let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true, interpretedSyntax: .inlineOnlyPreservingWhitespace) asSafeMarkdownAttributedString = try AttributedString(markdown: asMarkdown, options: options) } catch { asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue) } } public init(stringValue: String, parseMarkdown: Bool = false) { htmlValue = stringValue asMarkdown = stringValue asRawText = stringValue statusesURLs = [] if parseMarkdown { do { let options = AttributedString.MarkdownParsingOptions(allowsExtendedAttributes: true, interpretedSyntax: .inlineOnlyPreservingWhitespace) asSafeMarkdownAttributedString = try AttributedString(markdown: asMarkdown, options: options) } catch { asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue) } } else { asSafeMarkdownAttributedString = AttributedString(stringLiteral: htmlValue) } } public func encode(to encoder: Encoder) throws { var container = encoder.container(keyedBy: CodingKeys.self) try container.encode(htmlValue, forKey: .htmlValue) try container.encode(asMarkdown, forKey: .asMarkdown) try container.encode(asRawText, forKey: .asRawText) try container.encode(statusesURLs, forKey: .statusesURLs) try container.encode(links, forKey: .links) } private mutating func handleNode(node: SwiftSoup.Node) { do { if let className = try? node.attr("class") { if className == "invisible" { // don't display return } if className == "ellipsis" { // descend into this one now and // append the ellipsis for nn in node.getChildNodes() { handleNode(node: nn) } asMarkdown += "…" return } } if node.nodeName() == "p" { if asMarkdown.count > 0 { // ignore first opening

asMarkdown += "\n\n" } } else if node.nodeName() == "br" { if asMarkdown.count > 0 { // ignore first opening
asMarkdown += "\n" } } else if node.nodeName() == "a" { let href = try node.attr("href") if href != "" { if let url = URL(string: href), let _ = Int(url.lastPathComponent) { statusesURLs.append(url) } } asMarkdown += "[" let start = asMarkdown.endIndex // descend into this node now so we can wrap the // inner part of the link in the right markup for nn in node.getChildNodes() { handleNode(node: nn) } let finish = asMarkdown.endIndex var linkRef = href // Try creating a URL from the string. If it fails, try URL encoding // the string first. var url = URL(string: href) if url == nil { url = URL(string: href, encodePath: true) } if let linkUrl = url { linkRef = linkUrl.absoluteString let displayString = asMarkdown[start ..< finish] links.append(Link(linkUrl, displayString: String(displayString))) } asMarkdown += "](" asMarkdown += linkRef asMarkdown += ")" return } else if node.nodeName() == "#text" { var txt = node.description if let underscore_regex, let main_regex { // This is the markdown escaper txt = main_regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1") txt = underscore_regex.stringByReplacingMatches(in: txt, options: [], range: NSRange(location: 0, length: txt.count), withTemplate: "\\\\$1") } // Strip newlines and line separators - they should be being sent as
s asMarkdown += txt.replacingOccurrences(of: "\n", with: "").replacingOccurrences(of: "\u{2028}", with: "") } else if node.nodeName() == "ul" { // Unordered (bulleted) list // SwiftUI's Text won't display these in an AttributedString, but we can at least improve the appearance asMarkdown += "\n\n" for nn in node.getChildNodes() { asMarkdown += "- " handleNode(node: nn) asMarkdown += "\n" } return } else if node.nodeName() == "ol" { // Ordered (numbered) list // Same thing, won't display in a Text, but this is just an attempt to improve the appearance asMarkdown += "\n\n" var curNumber = 1 for nn in node.getChildNodes() { asMarkdown += "\(curNumber). " handleNode(node: nn) asMarkdown += "\n" curNumber += 1 } return } for n in node.getChildNodes() { handleNode(node: n) } } catch {} } public struct Link: Codable, Hashable, Identifiable { public var id: Int { hashValue } public let url: URL public let displayString: String public let type: LinkType public let title: String init(_ url: URL, displayString: String) { self.url = url self.displayString = displayString switch displayString.first { case "@": type = .mention title = displayString case "#": type = .hashtag title = String(displayString.dropFirst()) default: type = .url var hostNameUrl = url.host ?? url.absoluteString if hostNameUrl.hasPrefix("www.") { hostNameUrl = String(hostNameUrl.dropFirst(4)) } title = hostNameUrl } } public enum LinkType: String, Codable { case url case mention case hashtag } } } public extension URL { // It's common to use non-ASCII characters in URLs even though they're technically // invalid characters. Every modern browser handles this by silently encoding // the invalid characters on the user's behalf. However, trying to create a URL // object with un-encoded characters will result in nil so we need to encode the // invalid characters before creating the URL object. The unencoded version // should still be shown in the displayed status. init?(string: String, encodePath: Bool) { var encodedUrlString = "" if encodePath, string.starts(with: "http://") || string.starts(with: "https://"), var startIndex = string.firstIndex(of: "/") { startIndex = string.index(startIndex, offsetBy: 1) // We don't want to encode the host portion of the URL if var startIndex = string[startIndex...].firstIndex(of: "/") { encodedUrlString = String(string[...startIndex]) while let endIndex = string[string.index(after: startIndex)...].firstIndex(of: "/") { let componentStartIndex = string.index(after: startIndex) encodedUrlString = encodedUrlString + (string[componentStartIndex ... endIndex].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "") startIndex = endIndex } // The last part of the path may have a query string appended to it let componentStartIndex = string.index(after: startIndex) if let queryStartIndex = string[componentStartIndex...].firstIndex(of: "?") { encodedUrlString = encodedUrlString + (string[componentStartIndex ..< queryStartIndex].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "") encodedUrlString = encodedUrlString + (string[queryStartIndex...].addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? "") } else { encodedUrlString = encodedUrlString + (string[componentStartIndex...].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "") } } } if encodedUrlString.isEmpty { encodedUrlString = string } self.init(string: encodedUrlString) } }