mirror of
https://github.com/Ranchero-Software/NetNewsWire.git
synced 2025-01-10 17:02:54 +01:00
349 lines
8.0 KiB
Mathematica
349 lines
8.0 KiB
Mathematica
|
//
|
|||
|
// NSString+RSParser.m
|
|||
|
// RSParser
|
|||
|
//
|
|||
|
// Created by Brent Simmons on 9/25/15.
|
|||
|
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
|||
|
//
|
|||
|
|
|||
|
#import "NSString+RSParser.h"
|
|||
|
#import <CommonCrypto/CommonDigest.h>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
@interface NSScanner (RSParser)
|
|||
|
|
|||
|
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
|
|||
|
|
|||
|
@end
|
|||
|
|
|||
|
|
|||
|
@implementation NSString (RSParser)
|
|||
|
|
|||
|
- (BOOL)rsparser_contains:(NSString *)s {
|
|||
|
|
|||
|
return [self rangeOfString:s].location != NSNotFound;
|
|||
|
}
|
|||
|
|
|||
|
- (NSString *)rsparser_stringByDecodingHTMLEntities {
|
|||
|
|
|||
|
@autoreleasepool {
|
|||
|
|
|||
|
NSScanner *scanner = [[NSScanner alloc] initWithString:self];
|
|||
|
scanner.charactersToBeSkipped = nil;
|
|||
|
NSMutableString *result = [[NSMutableString alloc] init];
|
|||
|
|
|||
|
while (true) {
|
|||
|
|
|||
|
NSString *scannedString = nil;
|
|||
|
if ([scanner scanUpToString:@"&" intoString:&scannedString]) {
|
|||
|
[result appendString:scannedString];
|
|||
|
}
|
|||
|
if (scanner.isAtEnd) {
|
|||
|
break;
|
|||
|
}
|
|||
|
NSUInteger savedScanLocation = scanner.scanLocation;
|
|||
|
|
|||
|
NSString *decodedEntity = nil;
|
|||
|
if ([scanner rs_scanEntityValue:&decodedEntity]) {
|
|||
|
[result appendString:decodedEntity];
|
|||
|
}
|
|||
|
else {
|
|||
|
[result appendString:@"&"];
|
|||
|
scanner.scanLocation = savedScanLocation + 1;
|
|||
|
}
|
|||
|
|
|||
|
if (scanner.isAtEnd) {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ([self isEqualToString:result]) {
|
|||
|
return self;
|
|||
|
}
|
|||
|
return [result copy];
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
static NSDictionary *RSEntitiesDictionary(void);
|
|||
|
static NSString *RSParserStringWithValue(uint32_t value);
|
|||
|
|
|||
|
- (NSString * _Nullable)rs_stringByDecodingEntity {
|
|||
|
|
|||
|
// self may or may not have outer & and ; characters.
|
|||
|
|
|||
|
NSMutableString *s = [self mutableCopy];
|
|||
|
|
|||
|
if ([s hasPrefix:@"&"]) {
|
|||
|
[s deleteCharactersInRange:NSMakeRange(0, 1)];
|
|||
|
}
|
|||
|
if ([s hasSuffix:@";"]) {
|
|||
|
[s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)];
|
|||
|
}
|
|||
|
|
|||
|
NSDictionary *entitiesDictionary = RSEntitiesDictionary();
|
|||
|
|
|||
|
NSString *decodedEntity = entitiesDictionary[self];
|
|||
|
if (decodedEntity) {
|
|||
|
return decodedEntity;
|
|||
|
}
|
|||
|
|
|||
|
if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex
|
|||
|
NSScanner *scanner = [[NSScanner alloc] initWithString:s];
|
|||
|
scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"];
|
|||
|
unsigned int hexValue = 0;
|
|||
|
if ([scanner scanHexInt:&hexValue]) {
|
|||
|
return RSParserStringWithValue((uint32_t)hexValue);
|
|||
|
}
|
|||
|
return nil;
|
|||
|
}
|
|||
|
|
|||
|
else if ([s hasPrefix:@"#"]) {
|
|||
|
[s deleteCharactersInRange:NSMakeRange(0, 1)];
|
|||
|
NSInteger value = s.integerValue;
|
|||
|
if (value < 1) {
|
|||
|
return nil;
|
|||
|
}
|
|||
|
return RSParserStringWithValue((uint32_t)value);
|
|||
|
}
|
|||
|
|
|||
|
return nil;
|
|||
|
}
|
|||
|
|
|||
|
- (NSString *)rsparser_stringByEncodingRequiredEntities {
|
|||
|
NSMutableString *result = [NSMutableString string];
|
|||
|
|
|||
|
for (NSUInteger i = 0; i < self.length; ++i) {
|
|||
|
unichar c = [self characterAtIndex:i];
|
|||
|
|
|||
|
switch (c) {
|
|||
|
case '<':
|
|||
|
[result appendString:@"<"];
|
|||
|
break;
|
|||
|
case '>':
|
|||
|
[result appendString:@">"];
|
|||
|
break;
|
|||
|
case '&':
|
|||
|
[result appendString:@"&"];
|
|||
|
break;
|
|||
|
default:
|
|||
|
[result appendFormat:@"%C", c];
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return [result copy];
|
|||
|
}
|
|||
|
|
|||
|
#pragma GCC diagnostic push
|
|||
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|||
|
- (NSData *)_rsparser_md5HashData {
|
|||
|
|
|||
|
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
|
|||
|
unsigned char hash[CC_MD5_DIGEST_LENGTH];
|
|||
|
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
|
|||
|
|
|||
|
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
|
|||
|
}
|
|||
|
#pragma GCC diagnostic pop
|
|||
|
|
|||
|
- (NSString *)rsparser_md5Hash {
|
|||
|
|
|||
|
NSData *md5Data = [self _rsparser_md5HashData];
|
|||
|
const Byte *bytes = md5Data.bytes;
|
|||
|
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
@end
|
|||
|
|
|||
|
@implementation NSScanner (RSParser)
|
|||
|
|
|||
|
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
|
|||
|
|
|||
|
NSString *s = self.string;
|
|||
|
NSUInteger initialScanLocation = self.scanLocation;
|
|||
|
static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity.
|
|||
|
|
|||
|
while (true) {
|
|||
|
|
|||
|
unichar ch = [s characterAtIndex:self.scanLocation];
|
|||
|
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
|
|||
|
break;
|
|||
|
}
|
|||
|
if (ch == ';') {
|
|||
|
if (!decodedEntity) {
|
|||
|
return YES;
|
|||
|
}
|
|||
|
NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
|
|||
|
*decodedEntity = [rawEntity rs_stringByDecodingEntity];
|
|||
|
self.scanLocation = self.scanLocation + 1;
|
|||
|
return *decodedEntity != nil;
|
|||
|
}
|
|||
|
|
|||
|
self.scanLocation = self.scanLocation + 1;
|
|||
|
if (self.scanLocation - initialScanLocation > maxEntityLength) {
|
|||
|
break;
|
|||
|
}
|
|||
|
if (self.isAtEnd) {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return NO;
|
|||
|
}
|
|||
|
|
|||
|
@end
|
|||
|
|
|||
|
static NSString *RSParserStringWithValue(uint32_t value) {
|
|||
|
// From WebCore's HTMLEntityParser
|
|||
|
static const uint32_t windowsLatin1ExtensionArray[32] = {
|
|||
|
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
|
|||
|
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
|
|||
|
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
|
|||
|
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
|
|||
|
};
|
|||
|
|
|||
|
if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160
|
|||
|
value = windowsLatin1ExtensionArray[value - 0x80];
|
|||
|
}
|
|||
|
|
|||
|
value = CFSwapInt32HostToLittle(value);
|
|||
|
|
|||
|
return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding];
|
|||
|
}
|
|||
|
|
|||
|
static NSDictionary *RSEntitiesDictionary(void) {
|
|||
|
|
|||
|
static NSDictionary *entitiesDictionary = nil;
|
|||
|
|
|||
|
static dispatch_once_t onceToken;
|
|||
|
dispatch_once(&onceToken, ^{
|
|||
|
|
|||
|
entitiesDictionary = @{
|
|||
|
// Named entities
|
|||
|
@"AElig": @"Æ",
|
|||
|
@"Aacute": @"Á",
|
|||
|
@"Acirc": @"Â",
|
|||
|
@"Agrave": @"À",
|
|||
|
@"Aring": @"Å",
|
|||
|
@"Atilde": @"Ã",
|
|||
|
@"Auml": @"Ä",
|
|||
|
@"Ccedil": @"Ç",
|
|||
|
@"Dstrok": @"Ð",
|
|||
|
@"ETH": @"Ð",
|
|||
|
@"Eacute": @"É",
|
|||
|
@"Ecirc": @"Ê",
|
|||
|
@"Egrave": @"È",
|
|||
|
@"Euml": @"Ë",
|
|||
|
@"Iacute": @"Í",
|
|||
|
@"Icirc": @"Î",
|
|||
|
@"Igrave": @"Ì",
|
|||
|
@"Iuml": @"Ï",
|
|||
|
@"Ntilde": @"Ñ",
|
|||
|
@"Oacute": @"Ó",
|
|||
|
@"Ocirc": @"Ô",
|
|||
|
@"Ograve": @"Ò",
|
|||
|
@"Oslash": @"Ø",
|
|||
|
@"Otilde": @"Õ",
|
|||
|
@"Ouml": @"Ö",
|
|||
|
@"Pi": @"Π",
|
|||
|
@"THORN": @"Þ",
|
|||
|
@"Uacute": @"Ú",
|
|||
|
@"Ucirc": @"Û",
|
|||
|
@"Ugrave": @"Ù",
|
|||
|
@"Uuml": @"Ü",
|
|||
|
@"Yacute": @"Y",
|
|||
|
@"aacute": @"á",
|
|||
|
@"acirc": @"â",
|
|||
|
@"acute": @"´",
|
|||
|
@"aelig": @"æ",
|
|||
|
@"agrave": @"à",
|
|||
|
@"amp": @"&",
|
|||
|
@"apos": @"'",
|
|||
|
@"aring": @"å",
|
|||
|
@"atilde": @"ã",
|
|||
|
@"auml": @"ä",
|
|||
|
@"brkbar": @"¦",
|
|||
|
@"brvbar": @"¦",
|
|||
|
@"ccedil": @"ç",
|
|||
|
@"cedil": @"¸",
|
|||
|
@"cent": @"¢",
|
|||
|
@"copy": @"©",
|
|||
|
@"curren": @"¤",
|
|||
|
@"deg": @"°",
|
|||
|
@"die": @"¨",
|
|||
|
@"divide": @"÷",
|
|||
|
@"eacute": @"é",
|
|||
|
@"ecirc": @"ê",
|
|||
|
@"egrave": @"è",
|
|||
|
@"eth": @"ð",
|
|||
|
@"euml": @"ë",
|
|||
|
@"euro": @"€",
|
|||
|
@"frac12": @"½",
|
|||
|
@"frac14": @"¼",
|
|||
|
@"frac34": @"¾",
|
|||
|
@"gt": @">",
|
|||
|
@"hearts": @"♥",
|
|||
|
@"hellip": @"…",
|
|||
|
@"iacute": @"í",
|
|||
|
@"icirc": @"î",
|
|||
|
@"iexcl": @"¡",
|
|||
|
@"igrave": @"ì",
|
|||
|
@"iquest": @"¿",
|
|||
|
@"iuml": @"ï",
|
|||
|
@"laquo": @"«",
|
|||
|
@"ldquo": @"“",
|
|||
|
@"lsquo": @"‘",
|
|||
|
@"lt": @"<",
|
|||
|
@"macr": @"¯",
|
|||
|
@"mdash": @"—",
|
|||
|
@"micro": @"µ",
|
|||
|
@"middot": @"·",
|
|||
|
@"ndash": @"–",
|
|||
|
@"not": @"¬",
|
|||
|
@"ntilde": @"ñ",
|
|||
|
@"oacute": @"ó",
|
|||
|
@"ocirc": @"ô",
|
|||
|
@"ograve": @"ò",
|
|||
|
@"ordf": @"ª",
|
|||
|
@"ordm": @"º",
|
|||
|
@"oslash": @"ø",
|
|||
|
@"otilde": @"õ",
|
|||
|
@"ouml": @"ö",
|
|||
|
@"para": @"¶",
|
|||
|
@"pi": @"π",
|
|||
|
@"plusmn": @"±",
|
|||
|
@"pound": @"£",
|
|||
|
@"quot": @"\"",
|
|||
|
@"raquo": @"»",
|
|||
|
@"rdquo": @"”",
|
|||
|
@"reg": @"®",
|
|||
|
@"rsquo": @"’",
|
|||
|
@"sect": @"§",
|
|||
|
@"shy": RSParserStringWithValue(173),
|
|||
|
@"sup1": @"¹",
|
|||
|
@"sup2": @"²",
|
|||
|
@"sup3": @"³",
|
|||
|
@"szlig": @"ß",
|
|||
|
@"thorn": @"þ",
|
|||
|
@"times": @"×",
|
|||
|
@"trade": @"™",
|
|||
|
@"uacute": @"ú",
|
|||
|
@"ucirc": @"û",
|
|||
|
@"ugrave": @"ù",
|
|||
|
@"uml": @"¨",
|
|||
|
@"uuml": @"ü",
|
|||
|
@"yacute": @"y",
|
|||
|
@"yen": @"¥",
|
|||
|
@"yuml": @"ÿ",
|
|||
|
@"infin": @"∞",
|
|||
|
@"nbsp": RSParserStringWithValue(160)
|
|||
|
};
|
|||
|
});
|
|||
|
|
|||
|
return entitiesDictionary;
|
|||
|
}
|