diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index 4dd12242b..dee3fca5b 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -11,15 +11,14 @@ public final class HTMLEntityDecoder { static func decodedString(withEncodedString encodedString: String) -> String { - let scanner = Scanner(string: encodedString) - scanner.charactersToBeSkipped = nil + let scanner = EntityScanner(string: encodedString) var result = "" var didDecodeAtLeastOneEntity = false while true { - var scannedString: NSString? = nil - if scanner.scanUpTo("&", into: &scannedString) { + let scannedString = scanner.scanUpTo(Character("&")) + if !scannedString.isEmpty { result.append(scannedString) } if scanner.isAtEnd { @@ -28,8 +27,7 @@ public final class HTMLEntityDecoder { let savedScanLocation = scanner.scanLocation - var decodedEntity: String? = nil - if scanner.scanEntityValue(&decodedEntity) { + if let decodedEntity = scanner.scanEntityValue() { result.append(decodedEntity) didDecodeAtLeastOneEntity = true } @@ -43,7 +41,7 @@ public final class HTMLEntityDecoder { } } - if !didDecodeAtLeastOneEntity { // No changes made? + if !didDecodeAtLeastOneEntity { // No entities decoded? return encodedString } return result @@ -51,14 +49,21 @@ public final class HTMLEntityDecoder { } /// Purpose-built version of NSScanner, which has deprecated the parts we want to use. -final class RSScanner { +final class EntityScanner { let string: String let count: Int var scanLocation = 0 - var isAtEnd { - scanLocation >= count - 1 + var isAtEnd: Bool { + scanLocation >= count + } + + var currentCharacter: Character? { + guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else { + return nil + } + return string[index] } init(string: String) { @@ -67,25 +72,74 @@ final class RSScanner { } /// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`. - /// - Returns: nil when there were no characters accumulated (next character was `characterToFind` or already at end of string) - func scanUpTo(_ characterToFind: Character) -> String? { + /// - Returns: the scanned portion before `characterToFind`. May be empty string. + func scanUpTo(_ characterToFind: Character) -> String { - if isAtEnd { - return nil - } + var scanned = "" while true { + guard let ch = currentCharacter else { + break + } + scanLocation += 1 + + if ch == characterToFind { + break + } + else { + scanned.append(ch) + } + } + + return scanned + } + +// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { +// +// NSString *s = self.string; +// NSUInteger initialScanLocation = self.scanLocation; +// static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. +// +// while (true) { +// +// unichar ch = [s characterAtIndex:self.scanLocation]; +// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { +// break; +// } +// if (ch == ';') { +// if (!decodedEntity) { +// return YES; +// } +// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; +// *decodedEntity = [rawEntity rs_stringByDecodingEntity]; +// self.scanLocation = self.scanLocation + 1; +// return *decodedEntity != nil; +// } +// +// self.scanLocation = self.scanLocation + 1; +// if (self.scanLocation - initialScanLocation > maxEntityLength) { +// break; +// } +// if (self.isAtEnd) { +// break; +// } +// } +// +// return NO; +// } + + func scanEntityValue() -> String? { + + let initialScanLocation = scanLocation + let maxEntityLength = 20 // It’s probably smaller, but this is just for sanity. + + while true { + + guard let ch = currentCharacter } + + return nil } - - private func currentCharacter() -> Character? { - - - - } - - private func - }