Continue progress on HTMLEntityDecoder.

This commit is contained in:
Brent Simmons 2024-09-15 20:43:45 -07:00
parent f835182bc6
commit 26d0a19c8b

View File

@ -11,15 +11,14 @@ public final class HTMLEntityDecoder {
static func decodedString(withEncodedString encodedString: String) -> String {
let scanner = Scanner(string: encodedString)
scanner.charactersToBeSkipped = nil
let scanner = EntityScanner(string: encodedString)
var result = ""
var didDecodeAtLeastOneEntity = false
while true {
var scannedString: NSString? = nil
if scanner.scanUpTo("&", into: &scannedString) {
let scannedString = scanner.scanUpTo(Character("&"))
if !scannedString.isEmpty {
result.append(scannedString)
}
if scanner.isAtEnd {
@ -28,8 +27,7 @@ public final class HTMLEntityDecoder {
let savedScanLocation = scanner.scanLocation
var decodedEntity: String? = nil
if scanner.scanEntityValue(&decodedEntity) {
if let decodedEntity = scanner.scanEntityValue() {
result.append(decodedEntity)
didDecodeAtLeastOneEntity = true
}
@ -43,7 +41,7 @@ public final class HTMLEntityDecoder {
}
}
if !didDecodeAtLeastOneEntity { // No changes made?
if !didDecodeAtLeastOneEntity { // No entities decoded?
return encodedString
}
return result
@ -51,14 +49,21 @@ public final class HTMLEntityDecoder {
}
/// Purpose-built version of NSScanner, which has deprecated the parts we want to use.
final class RSScanner {
final class EntityScanner {
let string: String
let count: Int
var scanLocation = 0
var isAtEnd {
scanLocation >= count - 1
var isAtEnd: Bool {
scanLocation >= count
}
var currentCharacter: Character? {
guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else {
return nil
}
return string[index]
}
init(string: String) {
@ -67,25 +72,74 @@ final class RSScanner {
}
/// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`.
/// - Returns: nil when there were no characters accumulated (next character was `characterToFind` or already at end of string)
func scanUpTo(_ characterToFind: Character) -> String? {
/// - Returns: the scanned portion before `characterToFind`. May be empty string.
func scanUpTo(_ characterToFind: Character) -> String {
if isAtEnd {
return nil
}
var scanned = ""
while true {
guard let ch = currentCharacter else {
break
}
scanLocation += 1
if ch == characterToFind {
break
}
else {
scanned.append(ch)
}
}
return scanned
}
// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
//
// NSString *s = self.string;
// NSUInteger initialScanLocation = self.scanLocation;
// static NSUInteger maxEntityLength = 20; // Its probably smaller, but this is just for sanity.
//
// while (true) {
//
// unichar ch = [s characterAtIndex:self.scanLocation];
// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
// break;
// }
// if (ch == ';') {
// if (!decodedEntity) {
// return YES;
// }
// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
// *decodedEntity = [rawEntity rs_stringByDecodingEntity];
// self.scanLocation = self.scanLocation + 1;
// return *decodedEntity != nil;
// }
//
// self.scanLocation = self.scanLocation + 1;
// if (self.scanLocation - initialScanLocation > maxEntityLength) {
// break;
// }
// if (self.isAtEnd) {
// break;
// }
// }
//
// return NO;
// }
func scanEntityValue() -> String? {
let initialScanLocation = scanLocation
let maxEntityLength = 20 // Its probably smaller, but this is just for sanity.
while true {
guard let ch = currentCharacter
}
return nil
}
private func currentCharacter() -> Character? {
}
private func
}