NetNewsWire/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift
2024-09-15 20:43:45 -07:00

146 lines
3.1 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// HTMLEntityDecoder.swift
//
//
// Created by Brent Simmons on 9/14/24.
//
import Foundation
public final class HTMLEntityDecoder {
static func decodedString(withEncodedString encodedString: String) -> String {
let scanner = EntityScanner(string: encodedString)
var result = ""
var didDecodeAtLeastOneEntity = false
while true {
let scannedString = scanner.scanUpTo(Character("&"))
if !scannedString.isEmpty {
result.append(scannedString)
}
if scanner.isAtEnd {
break
}
let savedScanLocation = scanner.scanLocation
if let decodedEntity = scanner.scanEntityValue() {
result.append(decodedEntity)
didDecodeAtLeastOneEntity = true
}
else {
result.append("&")
scanner.scanLocation = savedScanLocation + 1
}
if scanner.isAtEnd {
break
}
}
if !didDecodeAtLeastOneEntity { // No entities decoded?
return encodedString
}
return result
}
}
/// Purpose-built version of NSScanner, which has deprecated the parts we want to use.
final class EntityScanner {
let string: String
let count: Int
var scanLocation = 0
var isAtEnd: Bool {
scanLocation >= count
}
var currentCharacter: Character? {
guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else {
return nil
}
return string[index]
}
init(string: String) {
self.string = string
self.count = string.count
}
/// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`.
/// - Returns: the scanned portion before `characterToFind`. May be empty string.
func scanUpTo(_ characterToFind: Character) -> String {
var scanned = ""
while true {
guard let ch = currentCharacter else {
break
}
scanLocation += 1
if ch == characterToFind {
break
}
else {
scanned.append(ch)
}
}
return scanned
}
// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
//
// NSString *s = self.string;
// NSUInteger initialScanLocation = self.scanLocation;
// static NSUInteger maxEntityLength = 20; // Its probably smaller, but this is just for sanity.
//
// while (true) {
//
// unichar ch = [s characterAtIndex:self.scanLocation];
// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
// break;
// }
// if (ch == ';') {
// if (!decodedEntity) {
// return YES;
// }
// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
// *decodedEntity = [rawEntity rs_stringByDecodingEntity];
// self.scanLocation = self.scanLocation + 1;
// return *decodedEntity != nil;
// }
//
// self.scanLocation = self.scanLocation + 1;
// if (self.scanLocation - initialScanLocation > maxEntityLength) {
// break;
// }
// if (self.isAtEnd) {
// break;
// }
// }
//
// return NO;
// }
func scanEntityValue() -> String? {
let initialScanLocation = scanLocation
let maxEntityLength = 20 // Its probably smaller, but this is just for sanity.
while true {
guard let ch = currentCharacter
}
return nil
}
}