Continue progress on HTMLEntityDecoder.

This commit is contained in:
Brent Simmons 2024-09-15 21:51:48 -07:00
parent 26d0a19c8b
commit 6779ef94dd
2 changed files with 66 additions and 39 deletions

View File

@ -178,7 +178,7 @@ private extension JSONFeedParser {
} }
if isSpecialCaseTitleWithEntitiesFeed(feedURL) { if isSpecialCaseTitleWithEntitiesFeed(feedURL) {
return (title as NSString).rsparser_stringByDecodingHTMLEntities() return HTMLEntityDecoder.decodedString(title)
} }
return title return title

View File

@ -9,7 +9,9 @@ import Foundation
public final class HTMLEntityDecoder { public final class HTMLEntityDecoder {
static func decodedString(withEncodedString encodedString: String) -> String { static let ampersandCharacter = Character("&")
public static func decodedString(_ encodedString: String) -> String {
let scanner = EntityScanner(string: encodedString) let scanner = EntityScanner(string: encodedString)
var result = "" var result = ""
@ -17,7 +19,7 @@ public final class HTMLEntityDecoder {
while true { while true {
let scannedString = scanner.scanUpTo(Character("&")) let scannedString = scanner.scanUpTo(Self.ampersandCharacter)
if !scannedString.isEmpty { if !scannedString.isEmpty {
result.append(scannedString) result.append(scannedString)
} }
@ -60,10 +62,10 @@ final class EntityScanner {
} }
var currentCharacter: Character? { var currentCharacter: Character? {
guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else { guard !isAtEnd else {
return nil return nil
} }
return string[index] return string.characterAtIntIndex(scanLocation)
} }
init(string: String) { init(string: String) {
@ -95,39 +97,7 @@ final class EntityScanner {
return scanned return scanned
} }
// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { static let semicolonCharacter = Character(";")
//
// NSString *s = self.string;
// NSUInteger initialScanLocation = self.scanLocation;
// static NSUInteger maxEntityLength = 20; // Its probably smaller, but this is just for sanity.
//
// while (true) {
//
// unichar ch = [s characterAtIndex:self.scanLocation];
// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
// break;
// }
// if (ch == ';') {
// if (!decodedEntity) {
// return YES;
// }
// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
// *decodedEntity = [rawEntity rs_stringByDecodingEntity];
// self.scanLocation = self.scanLocation + 1;
// return *decodedEntity != nil;
// }
//
// self.scanLocation = self.scanLocation + 1;
// if (self.scanLocation - initialScanLocation > maxEntityLength) {
// break;
// }
// if (self.isAtEnd) {
// break;
// }
// }
//
// return NO;
// }
func scanEntityValue() -> String? { func scanEntityValue() -> String? {
@ -136,10 +106,67 @@ final class EntityScanner {
while true { while true {
guard let ch = currentCharacter guard let ch = currentCharacter else {
break
}
if CharacterSet.whitespacesAndNewlines.contains(ch.unicodeScalars.first!) {
break
}
if ch == Self.semicolonCharacter {
let entityRange = initialScanLocation..<scanLocation
guard let entity = string.substring(intRange: entityRange), let decodedEntity = decodedEntity(entity) else {
assertionFailure("Unexpected failure scanning entity in scanEntityValue.")
scanLocation = initialScanLocation + 1
return nil
}
scanLocation = initialScanLocation + 1
return decodedEntity
}
scanLocation += 1
if scanLocation - initialScanLocation > maxEntityLength {
break
}
if isAtEnd {
break
}
} }
return nil return nil
} }
} }
extension String {
func indexForInt(_ i: Int) -> Index? {
index(startIndex, offsetBy: i, limitedBy: endIndex)
}
func characterAtIntIndex(_ i: Int) -> Character? {
guard let index = indexForInt(i) else {
return nil
}
return self[index]
}
func substring(intRange: Range<Int>) -> String? {
guard let rangeLower = indexForInt(intRange.lowerBound) else {
return nil
}
guard let rangeUpper = indexForInt(intRange.upperBound) else {
return nil
}
return String(self[rangeLower..<rangeUpper])
}
}
private func decodedEntity(_ rawEntity: String) -> String? {
return nil
}