Remove no-longer-needed Unicode support for URL (since it’s part of iOS and macOS now).

This commit is contained in:
Brent Simmons 2024-06-09 10:58:07 -07:00
parent 447aa7a448
commit e74c81518e
14 changed files with 13 additions and 1058 deletions

View File

@ -212,7 +212,7 @@ private extension FaviconDownloader {
func findFaviconURLs(with homePageURL: String) async -> [String]? {
guard let url = URL(unicodeString: homePageURL) else {
guard let url = URL(string: homePageURL) else {
return nil
}
guard let faviconURLs = await FaviconURLFinder.findFaviconURLs(with: homePageURL, downloadMetadata: delegate!.downloadMetadata(_:)) else {

View File

@ -25,7 +25,7 @@ import UniformTypeIdentifiers
/// - urls: An array of favicon URLs as strings.
static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> RSHTMLMetadata?)) async -> [String]? {
guard let _ = URL(unicodeString: homePageURL) else {
guard let _ = URL(string: homePageURL) else {
return nil
}

View File

@ -43,7 +43,7 @@ struct Browser {
/// - Note: Some browsers (specifically Chromium-derived ones) will ignore the request
/// to open in the background.
static func open(_ urlString: String, inBackground: Bool) {
guard let url = URL(unicodeString: urlString), let preparedURL = url.preparedForOpeningInBrowser() else { return }
guard let url = URL(string: urlString), let preparedURL = url.preparedForOpeningInBrowser() else { return }
let configuration = NSWorkspace.OpenConfiguration()
configuration.requiresUniversalLinks = true

View File

@ -162,11 +162,11 @@ private extension FeedInspectorViewController {
}
func updateHomePageURL() {
homePageURLTextField?.stringValue = feed?.homePageURL?.decodedURLString ?? ""
homePageURLTextField?.stringValue = feed?.homePageURL ?? ""
}
func updateFeedURL() {
urlTextField?.stringValue = feed?.url.decodedURLString ?? ""
urlTextField?.stringValue = feed?.url ?? ""
}
func updateNotifyAboutNewArticles() {

View File

@ -96,7 +96,7 @@ class AddFeedWindowController : NSWindowController {
cancelSheet()
return;
}
guard let url = URL(unicodeString: normalizedURLString) else {
guard let url = URL(string: normalizedURLString) else {
cancelSheet()
return
}

View File

@ -209,16 +209,16 @@ private extension SidebarViewController {
}
if let homePageURL = feed.homePageURL, let _ = URL(string: homePageURL) {
let item = menuItem(NSLocalizedString("Open Home Page", comment: "Command"), #selector(openHomePageFromContextualMenu(_:)), homePageURL.decodedURLString ?? homePageURL)
let item = menuItem(NSLocalizedString("Open Home Page", comment: "Command"), #selector(openHomePageFromContextualMenu(_:)), homePageURL)
menu.addItem(item)
menu.addItem(NSMenuItem.separator())
}
let copyFeedURLItem = menuItem(NSLocalizedString("Copy Feed URL", comment: "Command"), #selector(copyURLFromContextualMenu(_:)), feed.url.decodedURLString ?? feed.url)
let copyFeedURLItem = menuItem(NSLocalizedString("Copy Feed URL", comment: "Command"), #selector(copyURLFromContextualMenu(_:)), feed.url)
menu.addItem(copyFeedURLItem)
if let homePageURL = feed.homePageURL {
let item = menuItem(NSLocalizedString("Copy Home Page URL", comment: "Command"), #selector(copyURLFromContextualMenu(_:)), homePageURL.decodedURLString ?? homePageURL)
let item = menuItem(NSLocalizedString("Copy Home Page URL", comment: "Command"), #selector(copyURLFromContextualMenu(_:)), homePageURL)
menu.addItem(item)
}
menu.addItem(NSMenuItem.separator())

View File

@ -17,7 +17,7 @@ struct HTMLMetadataDownloader {
@MainActor static func downloadMetadata(for url: String) async -> RSHTMLMetadata? {
guard let actualURL = URL(unicodeString: url) else {
guard let actualURL = URL(string: url) else {
return nil
}

View File

@ -1,21 +0,0 @@
//
// Data+Extensions.swift
// PunyCocoa Swift
//
// Created by Nate Weaver on 2020-04-12.
//
import Foundation
import zlib
extension Data {
var crc32: UInt32 {
return self.withUnsafeBytes {
let buffer = $0.bindMemory(to: UInt8.self)
let initial = zlib.crc32(0, nil, 0)
return UInt32(zlib.crc32(initial, buffer.baseAddress, numericCast(buffer.count)))
}
}
}

View File

@ -1,594 +0,0 @@
//
// String+Punycode.swift
// Punycode
//
// Created by Nate Weaver on 2020-03-16.
//
import Foundation
public extension String {
/// The IDNA-encoded representation of a Unicode domain.
///
/// This will properly split domains on periods; e.g.,
/// "www.bücher.ch" becomes "www.xn--bcher-kva.ch".
var idnaEncoded: String? {
guard let mapped = try? self.mapUTS46() else { return nil }
let nonASCII = CharacterSet(charactersIn: UnicodeScalar(0)...UnicodeScalar(127)).inverted
var result = ""
let s = Scanner(string: mapped.precomposedStringWithCanonicalMapping)
let dotAt = CharacterSet(charactersIn: ".@")
while !s.isAtEnd {
if let input = s.scanUpToCharacters(from: dotAt) {
if !input.isValidLabel { return nil }
if input.rangeOfCharacter(from: nonASCII) != nil {
result.append("xn--")
if let encoded = input.punycodeEncoded {
result.append(encoded)
}
} else {
result.append(input)
}
}
if let input = s.scanCharacters(from: dotAt) {
result.append(input)
}
}
return result
}
/// The Unicode representation of an IDNA-encoded domain.
///
/// This will properly split domains on periods; e.g.,
/// "www.xn--bcher-kva.ch" becomes "www.bücher.ch".
var idnaDecoded: String? {
var result = ""
let s = Scanner(string: self)
let dotAt = CharacterSet(charactersIn: ".@")
while !s.isAtEnd {
if let input = s.scanUpToCharacters(from: dotAt) {
if input.lowercased().hasPrefix("xn--") {
let start = input.index(input.startIndex, offsetBy: 4)
guard let substr = input[start...].punycodeDecoded else { return nil }
guard substr.isValidLabel else { return nil }
result.append(substr)
} else {
result.append(input)
}
}
if let input = s.scanCharacters(from: dotAt) {
result.append(input)
}
}
return result
}
/// The IDNA- and percent-encoded representation of a URL string.
var encodedURLString: String? {
let urlParts = self.urlParts
var pathAndQuery = urlParts.pathAndQuery
var allowedCharacters = CharacterSet.urlPathAllowed
allowedCharacters.insert(charactersIn: "%?")
pathAndQuery = pathAndQuery.addingPercentEncoding(withAllowedCharacters: allowedCharacters) ?? ""
var result = "\(urlParts.scheme)\(urlParts.delim)"
if let username = urlParts.username?.addingPercentEncoding(withAllowedCharacters: .urlUserAllowed) {
if let password = urlParts.password?.addingPercentEncoding(withAllowedCharacters: .urlPasswordAllowed) {
result.append("\(username):\(password)@")
} else {
result.append("\(username)@")
}
}
guard let host = urlParts.host.idnaEncoded else { return nil }
result.append("\(host)\(pathAndQuery)")
if var fragment = urlParts.fragment {
var fragmentAlloweCharacters = CharacterSet.urlFragmentAllowed
fragmentAlloweCharacters.insert(charactersIn: "%")
fragment = fragment.addingPercentEncoding(withAllowedCharacters: fragmentAlloweCharacters) ?? ""
result.append("#\(fragment)")
}
return result
}
/// The Unicode representation of an IDNA- and percent-encoded URL string.
var decodedURLString: String? {
let urlParts = self.urlParts
var usernamePassword = ""
if let username = urlParts.username?.removingPercentEncoding {
if let password = urlParts.password?.removingPercentEncoding {
usernamePassword = "\(username):\(password)@"
} else {
usernamePassword = "\(username)@"
}
}
guard let host = urlParts.host.idnaDecoded else { return nil }
var result = "\(urlParts.scheme)\(urlParts.delim)\(usernamePassword)\(host)\(urlParts.pathAndQuery.removingPercentEncoding ?? "")"
if let fragment = urlParts.fragment?.removingPercentEncoding {
result.append("#\(fragment)")
}
return result
}
}
public extension URL {
/// Initializes a URL with a Unicode URL string.
///
/// If `unicodeString` can be successfully encoded, equivalent to
///
/// ```
/// URL(string: unicodeString.encodedURLString!)
/// ```
///
/// - Parameter unicodeString: The unicode URL string with which to create a URL.
init?(unicodeString: String) {
if let url = URL(string: unicodeString) {
self = url
return
}
guard let encodedString = unicodeString.encodedURLString else { return nil }
self.init(string: encodedString)
}
/// The IDNA- and percent-decoded representation of the URL.
///
/// Equivalent to
///
/// ```
/// self.absoluteString.decodedURLString
/// ```
var decodedURLString: String? {
return self.absoluteString.decodedURLString
}
/// Initializes a URL from a relative Unicode string and a base URL.
/// - Parameters:
/// - unicodeString: The URL string with which to initialize the NSURL object. `unicodeString` is interpreted relative to `baseURL`.
/// - url: The base URL for the URL object
init?(unicodeString: String, relativeTo url: URL?) {
if let url = URL(string: unicodeString, relativeTo: url) {
self = url
return
}
let parts = unicodeString.urlParts
if !parts.host.isEmpty {
guard let encodedString = unicodeString.encodedURLString else { return nil }
self.init(string: encodedString, relativeTo: url)
} else {
var allowedCharacters = CharacterSet.urlPathAllowed
allowedCharacters.insert(charactersIn: "%?#")
guard let encoded = unicodeString.addingPercentEncoding(withAllowedCharacters: allowedCharacters) else { return nil }
self.init(string: encoded, relativeTo: url)
}
}
}
private extension StringProtocol {
/// Punycode-encodes a string.
///
/// Returns `nil` on error.
/// - Todo: Throw errors on failure instead of returning `nil`.
var punycodeEncoded: String? {
var result = ""
let scalars = self.unicodeScalars
let inputLength = scalars.count
var n = Punycode.initialN
var delta: UInt32 = 0
var outLen: UInt32 = 0
var bias = Punycode.initialBias
for scalar in scalars where scalar.isASCII {
result.unicodeScalars.append(scalar)
outLen += 1
}
let b: UInt32 = outLen
var h: UInt32 = outLen
if b > 0 {
result.append(Punycode.delimiter)
}
// Main encoding loop:
while h < inputLength {
var m = UInt32.max
for c in scalars {
if c.value >= n && c.value < m {
m = c.value
}
}
if m - n > (UInt32.max - delta) / (h + 1) {
return nil // overflow
}
delta += (m - n) * (h + 1)
n = m
for c in scalars {
if c.value < n {
delta += 1
if delta == 0 {
return nil // overflow
}
}
if c.value == n {
var q = delta
var k = Punycode.base
while true {
let t = k <= bias ? Punycode.tmin :
k >= bias + Punycode.tmax ? Punycode.tmax : k - bias
if q < t {
break
}
let encodedDigit = Punycode.encodeDigit(t + (q - t) % (Punycode.base - t), flag: false)
result.unicodeScalars.append(UnicodeScalar(encodedDigit)!)
q = (q - t) / (Punycode.base - t)
k += Punycode.base
}
result.unicodeScalars.append(UnicodeScalar(Punycode.encodeDigit(q, flag: false))!)
bias = Punycode.adapt(delta: delta, numPoints: h + 1, firstTime: h == b)
delta = 0
h += 1
}
}
delta += 1
n += 1
}
return result
}
/// Punycode-decodes a string.
///
/// Returns `nil` on error.
/// - Todo: Throw errors on failure instead of returning `nil`.
var punycodeDecoded: String? {
var result = ""
let scalars = self.unicodeScalars
let endIndex = scalars.endIndex
var n = Punycode.initialN
var outLen: UInt32 = 0
var i: UInt32 = 0
var bias = Punycode.initialBias
var b = scalars.startIndex
for j in scalars.indices {
if Character(self.unicodeScalars[j]) == Punycode.delimiter {
b = j
break
}
}
for j in scalars.indices {
if j >= b {
break
}
let scalar = scalars[j]
if !scalar.isASCII {
return nil // bad input
}
result.unicodeScalars.append(scalar)
outLen += 1
}
var inPos = b > scalars.startIndex ? scalars.index(after: b) : scalars.startIndex
while inPos < endIndex {
var k = Punycode.base
var w: UInt32 = 1
let oldi = i
while true {
if inPos >= endIndex {
return nil // bad input
}
let digit = Punycode.decodeDigit(scalars[inPos].value)
inPos = scalars.index(after: inPos)
if digit >= Punycode.base { return nil } // bad input
if digit > (UInt32.max - i) / w { return nil } // overflow
i += digit * w
let t = k <= bias ? Punycode.tmin :
k >= bias + Punycode.tmax ? Punycode.tmax : k - bias
if digit < t {
break
}
if w > UInt32.max / (Punycode.base - t) { return nil } // overflow
w *= Punycode.base - t
k += Punycode.base
}
bias = Punycode.adapt(delta: i - oldi, numPoints: outLen + 1, firstTime: oldi == 0)
if i / (outLen + 1) > UInt32.max - n { return nil } // overflow
n += i / (outLen + 1)
i %= outLen + 1
let index = result.unicodeScalars.index(result.unicodeScalars.startIndex, offsetBy: Int(i))
result.unicodeScalars.insert(UnicodeScalar(n)!, at: index)
outLen += 1
i += 1
}
return result
}
}
private extension String {
var urlParts: URLParts {
let colonSlash = CharacterSet(charactersIn: ":/")
let slashQuestion = CharacterSet(charactersIn: "/?")
let s = Scanner(string: self)
var scheme = ""
var delim = ""
var host = ""
var path = ""
var username: String?
var password: String?
var fragment: String?
if let hostOrScheme = s.scanUpToCharacters(from: colonSlash) {
let maybeDelim = s.scanCharacters(from: colonSlash) ?? ""
if maybeDelim.hasPrefix(":") {
delim = maybeDelim
scheme = hostOrScheme
host = s.scanUpToCharacters(from: slashQuestion) ?? ""
} else {
path.append(hostOrScheme)
path.append(maybeDelim)
}
} else if let maybeDelim = s.scanString("//") {
delim = maybeDelim
if let maybeHost = s.scanUpToCharacters(from: slashQuestion) {
host = maybeHost
}
}
path.append(s.scanUpToString("#") ?? "")
if s.scanString("#") != nil {
fragment = s.scanUpToCharacters(from: .newlines) ?? ""
}
let usernamePasswordHostPort = host.components(separatedBy: "@")
switch usernamePasswordHostPort.count {
case 1:
host = usernamePasswordHostPort[0]
case 0:
break // error
default:
let usernamePassword = usernamePasswordHostPort[0].components(separatedBy: ":")
username = usernamePassword[0]
password = usernamePassword.count > 1 ? usernamePassword[1] : nil
host = usernamePasswordHostPort[1]
}
return URLParts(scheme: scheme, delim: delim, host: host, pathAndQuery: path, username: username, password: password, fragment: fragment)
}
enum UTS46MapError: Error {
/// A disallowed codepoint was found in the string.
case disallowedCodepoint(scalar: UnicodeScalar)
}
/// Perform a single-pass mapping using UTS #46.
///
/// - Returns: The mapped string.
/// - Throws: `UTS46Error`.
func mapUTS46() throws -> String {
var result = ""
for scalar in self.unicodeScalars {
if UTS46.shared.disallowedCharacters.contains(scalar) {
throw UTS46MapError.disallowedCodepoint(scalar: scalar)
}
if UTS46.shared.ignoredCharacters.contains(scalar) {
continue
}
if let mapped = UTS46.shared.characterMap[scalar.value] {
result.append(mapped)
} else {
result.unicodeScalars.append(scalar)
}
}
return result
}
var isValidLabel: Bool {
guard self.precomposedStringWithCanonicalMapping.unicodeScalars.elementsEqual(self.unicodeScalars) else { return false }
guard (try? self.mapUTS46()) != nil else { return false }
if let category = self.unicodeScalars.first?.properties.generalCategory {
if category == .nonspacingMark || category == .spacingMark || category == .enclosingMark { return false }
}
return self.hasValidJoiners
}
/// Whether a string's joiners (if any) are valid according to IDNA 2008 ContextJ.
///
/// See [RFC 5892, Appendix A.1 and A.2](https://tools.ietf.org/html/rfc5892#appendix-A).
var hasValidJoiners: Bool {
let scalars = self.unicodeScalars
for index in scalars.indices {
let scalar = scalars[index]
if scalar.value == 0x200C { // Zero-width non-joiner
if index == scalars.indices.first { return false }
var subindex = scalars.index(before: index)
var previous = scalars[subindex]
if previous.properties.canonicalCombiningClass == .virama { continue }
while true {
guard let joiningType = UTS46.shared.joiningTypes[previous.value] else { return false }
if joiningType == .transparent {
if subindex == scalars.startIndex {
return false
}
subindex = scalars.index(before: subindex)
previous = scalars[subindex]
} else if joiningType == .dual || joiningType == .left {
break
} else {
return false
}
}
subindex = scalars.index(after: index)
var next = scalars[subindex]
while true {
if subindex == scalars.endIndex {
return false
}
guard let joiningType = UTS46.shared.joiningTypes[next.value] else { return false }
if joiningType == .transparent {
subindex = scalars.index(after: index)
next = scalars[subindex]
} else if joiningType == .right || joiningType == .dual {
break
} else {
return false
}
}
} else if scalar.value == 0x200D { // Zero-width joiner
if index == scalars.startIndex { return false }
let subindex = scalars.index(before: index)
let previous = scalars[subindex]
if previous.properties.canonicalCombiningClass != .virama { return false }
}
}
return true
}
}
private enum Punycode {
static let base = UInt32(36)
static let tmin = UInt32(1)
static let tmax = UInt32(26)
static let skew = UInt32(38)
static let damp = UInt32(700)
static let initialBias = UInt32(72)
static let initialN = UInt32(0x80)
static let delimiter: Character = "-"
static func decodeDigit(_ cp: UInt32) -> UInt32 {
return cp &- 48 < 10 ? cp &- 22 : cp &- 65 < 26 ? cp &- 65 :
cp &- 97 < 26 ? cp &- 97 : Self.base
}
static func encodeDigit(_ d: UInt32, flag: Bool) -> UInt32 {
return d + 22 + 75 * UInt32(d < 26 ? 1 : 0) - ((flag ? 1 : 0) << 5)
}
static let maxint = UInt32.max
static func adapt(delta: UInt32, numPoints: UInt32, firstTime: Bool) -> UInt32 {
var delta = delta
delta = firstTime ? delta / Self.damp : delta >> 1
delta += delta / numPoints
var k: UInt32 = 0
while delta > ((Self.base - Self.tmin) * Self.tmax) / 2 {
delta /= Self.base - Self.tmin
k += Self.base
}
return k + (Self.base - Self.tmin + 1) * delta / (delta + Self.skew)
}
}
private struct URLParts {
var scheme: String
var delim: String
var host: String
var pathAndQuery: String
var username: String?
var password: String?
var fragment: String?
}

View File

@ -1,199 +0,0 @@
//
// UTS46.swift
// PunyCocoa Swift
//
// Created by Nate Weaver on 2020-03-29.
//
import Foundation
import Compression
/// UTS46 mapping.
///
/// Storage file format. Codepoints are stored UTF-8-encoded.
///
/// All multibyte integers are little-endian.
///
/// Header:
///
/// +--------------+---------+---------+---------+
/// | 6 bytes | 1 byte | 1 byte | 4 bytes |
/// +--------------+---------+---------+---------+
/// | magic number | version | flags | crc32 |
/// +--------------+---------+---------+---------+
///
/// - `magic number`: `"UTS#46"` (`0x55 0x54 0x53 0x23 0x34 0x36`).
/// - `version`: format version (1 byte; currently `0x01`).
/// - `flags`: Bitfield:
///
/// +-----+-----+-----+-----+-----+-----+-----+-----+
/// | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
/// +-----+-----+-----+-----+-----+-----+-----+-----+
/// | currently unused | crc | compression |
/// +-----+-----+-----+-----+-----+-----+-----+-----+
///
/// - `crc`: Contains a CRC32 of the data after the header.
/// - `compression`: compression mode of the data.
/// Currently identical to NSData's compression constants + 1:
///
/// - 0: no compression
/// - 1: LZFSE
/// - 2: LZ4
/// - 3: LZMA
/// - 4: ZLIB
///
/// - `crc32`: CRC32 of the (possibly compressed) data. Implementations can skip
/// parsing this unless data integrity is an issue.
///
/// The data section is a collection of data blocks of the format
///
/// [marker][section data] ...
///
/// Section data formats:
///
/// If marker is `characterMap`:
///
/// [codepoint][mapped-codepoint ...][null] ...
///
/// If marker is `disallowedCharacters` or `ignoredCharacters`:
///
/// [codepoint-range] ...
///
/// If marker is `joiningTypes`:
///
/// [type][[codepoint-range] ...]
///
/// where `type` is one of `C`, `D`, `L`, `R`, or `T`.
///
/// `codepoint-range`: two codepoints, marking the first and last codepoints of a
/// closed range. Single-codepoint ranges have the same start and end codepoint.
///
final class UTS46: Sendable {
static let shared = UTS46()
let characterMap: [UInt32: String]
let ignoredCharacters: CharacterSet
let disallowedCharacters: CharacterSet
let joiningTypes: [UInt32: JoiningType]
enum Marker {
static let characterMap = UInt8.max
static let ignoredCharacters = UInt8.max - 1
static let disallowedCharacters = UInt8.max - 2
static let joiningTypes = UInt8.max - 3
static let min = UInt8.max - 10 // No valid UTF-8 byte can fall here.
static let sequenceTerminator: UInt8 = 0
}
enum JoiningType: Character {
case causing = "C"
case dual = "D"
case right = "R"
case left = "L"
case transparent = "T"
}
enum UTS46Error: Error {
case badSize
case compressionError
case decompressionError
case badMarker
case unknownVersion
}
/// Identical values to `NSData.CompressionAlgorithm + 1`.
enum CompressionAlgorithm: UInt8 {
case none = 0
case lzfse = 1
case lz4 = 2
case lzma = 3
case zlib = 4
var rawAlgorithm: compression_algorithm? {
switch self {
case .lzfse:
return COMPRESSION_LZFSE
case .lz4:
return COMPRESSION_LZ4
case .lzma:
return COMPRESSION_LZMA
case .zlib:
return COMPRESSION_ZLIB
default:
return nil
}
}
}
struct Header: RawRepresentable, CustomDebugStringConvertible {
typealias RawValue = [UInt8]
var rawValue: [UInt8] {
let value = Self.signature + [version, flags.rawValue]
assert(value.count == 8)
return value
}
private static let compressionMask: UInt8 = 0x07
private static let signature: [UInt8] = Array("UTS#46".utf8)
private struct Flags: RawRepresentable {
var rawValue: UInt8 {
return (hasCRC ? hasCRCMask : 0) | compression.rawValue
}
var hasCRC: Bool
var compression: CompressionAlgorithm
private let hasCRCMask: UInt8 = 1 << 3
private let compressionMask: UInt8 = 0x7
init(rawValue: UInt8) {
hasCRC = rawValue & hasCRCMask != 0
let compressionBits = rawValue & compressionMask
compression = CompressionAlgorithm(rawValue: compressionBits) ?? .none
}
init(compression: CompressionAlgorithm = .none, hasCRC: Bool = false) {
self.compression = compression
self.hasCRC = hasCRC
}
}
let version: UInt8
private var flags: Flags
var hasCRC: Bool { flags.hasCRC }
var compression: CompressionAlgorithm { flags.compression }
var dataOffset: Int { 8 + (flags.hasCRC ? 4 : 0) }
init?<T: DataProtocol>(rawValue: T) where T.Index == Int {
guard rawValue.count == 8 else { return nil }
guard rawValue.prefix(Self.signature.count).elementsEqual(Self.signature) else { return nil }
version = rawValue[rawValue.index(rawValue.startIndex, offsetBy: 6)]
flags = Flags(rawValue: rawValue[rawValue.index(rawValue.startIndex, offsetBy: 7)])
}
init(compression: CompressionAlgorithm = .none, hasCRC: Bool = false) {
self.version = 1
self.flags = Flags(compression: compression, hasCRC: hasCRC)
}
var debugDescription: String { "has CRC: \(hasCRC); compression: \(String(describing: compression))" }
}
init() {
let loader = try! UTS46Loader()
self.characterMap = loader.characterMap
self.ignoredCharacters = loader.ignoredCharacters
self.disallowedCharacters = loader.disallowedCharacters
self.joiningTypes = loader.joiningTypes
}
}

View File

@ -1,231 +0,0 @@
//
// UTS46Loader.swift
// icumap2code
//
// Created by Nate Weaver on 2020-05-08.
//
import Foundation
import Compression
final class UTS46Loader {
var characterMap: [UInt32: String] = [:]
var ignoredCharacters: CharacterSet = []
var disallowedCharacters: CharacterSet = []
var joiningTypes = [UInt32: UTS46.JoiningType]()
init() throws {
guard let url = bundle.url(forResource: "uts46", withExtension: nil) else { throw CocoaError(.fileNoSuchFile) }
try load(from: url)
}
}
private extension UTS46Loader {
func parseHeader(from data: Data) throws -> UTS46.Header? {
let headerData = data.prefix(8)
guard headerData.count == 8 else { throw UTS46.UTS46Error.badSize }
return UTS46.Header(rawValue: headerData)
}
func load(from url: URL) throws {
let fileData = try Data(contentsOf: url)
guard let header = try? parseHeader(from: fileData) else { return }
guard header.version == 1 else { throw UTS46.UTS46Error.unknownVersion }
let offset = header.dataOffset
guard fileData.count > offset else { throw UTS46.UTS46Error.badSize }
let compressedData = fileData[offset...]
guard let data = self.decompress(data: compressedData, algorithm: header.compression) else {
throw UTS46.UTS46Error.decompressionError
}
var index = 0
while index < data.count {
let marker = data[index]
index += 1
switch marker {
case UTS46.Marker.characterMap:
index = parseCharacterMap(from: data, start: index)
case UTS46.Marker.ignoredCharacters:
index = parseIgnoredCharacters(from: data, start: index)
case UTS46.Marker.disallowedCharacters:
index = parseDisallowedCharacters(from: data, start: index)
case UTS46.Marker.joiningTypes:
index = parseJoiningTypes(from: data, start: index)
default:
throw UTS46.UTS46Error.badMarker
}
}
}
var bundle: Bundle {
#if SWIFT_PACKAGE
return Bundle.module
#else
return Bundle(for: Self.self)
#endif
}
func decompress(data: Data, algorithm: UTS46.CompressionAlgorithm?) -> Data? {
guard let rawAlgorithm = algorithm?.rawAlgorithm else { return data }
let capacity = 131_072 // 128 KB
let destinationBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: capacity)
let decompressed = data.withUnsafeBytes { (rawBuffer) -> Data? in
let bound = rawBuffer.bindMemory(to: UInt8.self)
let decodedCount = compression_decode_buffer(destinationBuffer, capacity, bound.baseAddress!, rawBuffer.count, nil, rawAlgorithm)
if decodedCount == 0 || decodedCount == capacity {
return nil
}
return Data(bytes: destinationBuffer, count: decodedCount)
}
return decompressed
}
func parseCharacterMap(from data: Data, start: Int) -> Int {
characterMap.removeAll()
var index = start
main: while index < data.count {
var accumulator = Data()
while data[index] != UTS46.Marker.sequenceTerminator {
if data[index] > UTS46.Marker.min { break main }
accumulator.append(data[index])
index += 1
}
let str = String(data: accumulator, encoding: .utf8)!
// FIXME: throw an error here.
guard str.count > 0 else { continue }
let codepoint = str.unicodeScalars.first!.value
characterMap[codepoint] = String(str.unicodeScalars.dropFirst())
index += 1
}
return index
}
func parseRanges(from: String) -> [ClosedRange<UnicodeScalar>]? {
guard from.unicodeScalars.count % 2 == 0 else { return nil }
var ranges = [ClosedRange<UnicodeScalar>]()
var first: UnicodeScalar?
for (index, scalar) in from.unicodeScalars.enumerated() {
if index % 2 == 0 {
first = scalar
} else if let first = first {
ranges.append(first...scalar)
}
}
return ranges
}
func parseCharacterSet(from data: Data, start: Int) -> (index: Int, charset: CharacterSet?) {
var index = start
var accumulator = Data()
while index < data.count, data[index] < UTS46.Marker.min {
accumulator.append(data[index])
index += 1
}
let str = String(data: accumulator, encoding: .utf8)!
guard let ranges = parseRanges(from: str) else {
return (index: index, charset: nil)
}
var charset = CharacterSet()
for range in ranges {
charset.insert(charactersIn: range)
}
return (index: index, charset: charset)
}
func parseIgnoredCharacters(from data: Data, start: Int) -> Int {
let (index, charset) = parseCharacterSet(from: data, start: start)
if let charset = charset {
ignoredCharacters = charset
}
return index
}
func parseDisallowedCharacters(from data: Data, start: Int) -> Int {
let (index, charset) = parseCharacterSet(from: data, start: start)
if let charset = charset {
disallowedCharacters = charset
}
return index
}
func parseJoiningTypes(from data: Data, start: Int) -> Int {
var index = start
joiningTypes.removeAll()
main: while index < data.count, data[index] < UTS46.Marker.min {
var accumulator = Data()
while index < data.count {
if data[index] > UTS46.Marker.min { break main }
accumulator.append(data[index])
index += 1
}
let str = String(data: accumulator, encoding: .utf8)!
var type: UTS46.JoiningType?
var first: UnicodeScalar?
for scalar in str.unicodeScalars {
if scalar.isASCII {
type = UTS46.JoiningType(rawValue: Character(scalar))
} else if let type = type {
if first == nil {
first = scalar
} else {
for value in first!.value...scalar.value {
joiningTypes[value] = type
}
first = nil
}
}
}
}
return index
}
}

Binary file not shown.

View File

@ -92,7 +92,7 @@ final class AddFeedViewController: UITableViewController {
let urlString = urlTextField.text ?? ""
let normalizedURLString = urlString.normalizedURL
guard !normalizedURLString.isEmpty, let url = URL(unicodeString: normalizedURLString) else {
guard !normalizedURLString.isEmpty, let url = URL(string: normalizedURLString) else {
return
}

View File

@ -46,8 +46,8 @@ class FeedInspectorViewController: UITableViewController {
alwaysShowReaderViewSwitch.setOn(feed.isArticleExtractorAlwaysOn ?? false, animated: false)
homePageLabel.text = feed.homePageURL?.decodedURLString
feedURLLabel.text = feed.url.decodedURLString
homePageLabel.text = feed.homePageURL
feedURLLabel.text = feed.url
NotificationCenter.default.addObserver(self, selector: #selector(feedIconDidBecomeAvailable(_:)), name: .FeedIconDidBecomeAvailable, object: nil)