diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift
index 4dd12242b..dee3fca5b 100644
--- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift
+++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift
@@ -11,15 +11,14 @@ public final class HTMLEntityDecoder {
static func decodedString(withEncodedString encodedString: String) -> String {
- let scanner = Scanner(string: encodedString)
- scanner.charactersToBeSkipped = nil
+ let scanner = EntityScanner(string: encodedString)
var result = ""
var didDecodeAtLeastOneEntity = false
while true {
- var scannedString: NSString? = nil
- if scanner.scanUpTo("&", into: &scannedString) {
+ let scannedString = scanner.scanUpTo(Character("&"))
+ if !scannedString.isEmpty {
result.append(scannedString)
}
if scanner.isAtEnd {
@@ -28,8 +27,7 @@ public final class HTMLEntityDecoder {
let savedScanLocation = scanner.scanLocation
- var decodedEntity: String? = nil
- if scanner.scanEntityValue(&decodedEntity) {
+ if let decodedEntity = scanner.scanEntityValue() {
result.append(decodedEntity)
didDecodeAtLeastOneEntity = true
}
@@ -43,7 +41,7 @@ public final class HTMLEntityDecoder {
}
}
- if !didDecodeAtLeastOneEntity { // No changes made?
+ if !didDecodeAtLeastOneEntity { // No entities decoded?
return encodedString
}
return result
@@ -51,14 +49,21 @@ public final class HTMLEntityDecoder {
}
/// Purpose-built version of NSScanner, which has deprecated the parts we want to use.
-final class RSScanner {
+final class EntityScanner {
let string: String
let count: Int
var scanLocation = 0
- var isAtEnd {
- scanLocation >= count - 1
+ var isAtEnd: Bool {
+ scanLocation >= count
+ }
+
+ var currentCharacter: Character? {
+ guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else {
+ return nil
+ }
+ return string[index]
}
init(string: String) {
@@ -67,25 +72,74 @@ final class RSScanner {
}
/// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`.
- /// - Returns: nil when there were no characters accumulated (next character was `characterToFind` or already at end of string)
- func scanUpTo(_ characterToFind: Character) -> String? {
+ /// - Returns: the scanned portion before `characterToFind`. May be empty string.
+ func scanUpTo(_ characterToFind: Character) -> String {
- if isAtEnd {
- return nil
- }
+ var scanned = ""
while true {
+ guard let ch = currentCharacter else {
+ break
+ }
+ scanLocation += 1
+
+ if ch == characterToFind {
+ break
+ }
+ else {
+ scanned.append(ch)
+ }
+ }
+
+ return scanned
+ }
+
+// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
+//
+// NSString *s = self.string;
+// NSUInteger initialScanLocation = self.scanLocation;
+// static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity.
+//
+// while (true) {
+//
+// unichar ch = [s characterAtIndex:self.scanLocation];
+// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
+// break;
+// }
+// if (ch == ';') {
+// if (!decodedEntity) {
+// return YES;
+// }
+// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
+// *decodedEntity = [rawEntity rs_stringByDecodingEntity];
+// self.scanLocation = self.scanLocation + 1;
+// return *decodedEntity != nil;
+// }
+//
+// self.scanLocation = self.scanLocation + 1;
+// if (self.scanLocation - initialScanLocation > maxEntityLength) {
+// break;
+// }
+// if (self.isAtEnd) {
+// break;
+// }
+// }
+//
+// return NO;
+// }
+
+ func scanEntityValue() -> String? {
+
+ let initialScanLocation = scanLocation
+ let maxEntityLength = 20 // It’s probably smaller, but this is just for sanity.
+
+ while true {
+
+ guard let ch = currentCharacter
}
+
+ return nil
}
-
- private func currentCharacter() -> Character? {
-
-
-
- }
-
- private func
-
}