NetNewsWire/Frameworks/RSParser/Utilities/NSString+RSParser.m

428 lines
9.5 KiB
Mathematica
Raw Normal View History

//
// NSString+RSParser.m
// RSParser
//
// Created by Brent Simmons on 9/25/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
2017-06-26 01:32:07 +02:00
#import <CommonCrypto/CommonDigest.h>
#import "NSString+RSParser.h"
@interface NSScanner (RSParser)
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
@end
@implementation NSString (RSParser)
- (NSString *)rsparser_stringByDecodingHTMLEntities {
@autoreleasepool {
NSScanner *scanner = [[NSScanner alloc] initWithString:self];
scanner.charactersToBeSkipped = nil;
NSMutableString *result = [[NSMutableString alloc] init];
while (true) {
NSString *scannedString = nil;
if ([scanner scanUpToString:@"&" intoString:&scannedString]) {
[result appendString:scannedString];
}
if (scanner.isAtEnd) {
break;
}
NSUInteger savedScanLocation = scanner.scanLocation;
NSString *decodedEntity = nil;
if ([scanner rs_scanEntityValue:&decodedEntity]) {
[result appendString:decodedEntity];
}
else {
[result appendString:@"&"];
scanner.scanLocation = savedScanLocation + 1;
}
if (scanner.isAtEnd) {
break;
}
}
if ([self isEqualToString:result]) {
return self;
}
return [result copy];
}
}
static NSDictionary *RSEntitiesDictionary(void);
static NSString *RSParserStringWithValue(unichar value);
- (NSString * _Nullable)rs_stringByDecodingEntity {
// self may or may not have outer & and ; characters.
NSMutableString *s = [self mutableCopy];
if ([s hasPrefix:@"&"]) {
[s deleteCharactersInRange:NSMakeRange(0, 1)];
}
if ([s hasSuffix:@";"]) {
[s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)];
}
NSDictionary *entitiesDictionary = RSEntitiesDictionary();
NSString *decodedEntity = entitiesDictionary[self];
if (decodedEntity) {
return decodedEntity;
}
if ([s hasPrefix:@"#x"]) { // Hex
NSScanner *scanner = [[NSScanner alloc] initWithString:s];
scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#x"];
unsigned int hexValue = 0;
if ([scanner scanHexInt:&hexValue]) {
return RSParserStringWithValue((unichar)hexValue);
}
return nil;
}
else if ([s hasPrefix:@"#"]) {
[s deleteCharactersInRange:NSMakeRange(0, 1)];
NSInteger value = s.integerValue;
if (value < 1) {
return nil;
}
return RSParserStringWithValue((unichar)value);
}
return nil;
}
- (NSData *)_rsparser_md5HashData {
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
unsigned char hash[CC_MD5_DIGEST_LENGTH];
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
}
- (NSString *)rsparser_md5Hash {
NSData *md5Data = [self _rsparser_md5HashData];
const Byte *bytes = md5Data.bytes;
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
}
@end
@implementation NSScanner (RSParser)
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
NSString *s = self.string;
NSUInteger initialScanLocation = self.scanLocation;
static NSUInteger maxEntityLength = 20; // Its probably smaller, but this is just for sanity.
while (true) {
unichar ch = [s characterAtIndex:self.scanLocation];
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
break;
}
if (ch == ';') {
if (!decodedEntity) {
return YES;
}
NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
*decodedEntity = [rawEntity rs_stringByDecodingEntity];
self.scanLocation = self.scanLocation + 1;
return *decodedEntity != nil;
}
self.scanLocation = self.scanLocation + 1;
if (self.scanLocation - initialScanLocation > maxEntityLength) {
break;
}
if (self.isAtEnd) {
break;
}
}
return NO;
}
@end
static NSString *RSParserStringWithValue(unichar value) {
return [[NSString alloc] initWithFormat:@"%C", value];
}
static NSDictionary *RSEntitiesDictionary(void) {
static NSDictionary *entitiesDictionary = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
entitiesDictionary =
@{@"#034": @"\"",
@"#038": @"&",
@"#38": @"&",
@"#039": @"'",
@"#145": @"",
@"#146": @"",
@"#147": @"“",
@"#148": @"”",
@"#149": @"•",
@"#150": @"-",
@"#151": @"—",
@"#153": @"™",
@"#160": RSParserStringWithValue(160),
@"#161": @"¡",
@"#162": @"¢",
@"#163": @"£",
@"#164": @"?",
@"#165": @"¥",
@"#166": @"?",
@"#167": @"§",
@"#168": @"¨",
@"#169": @"©",
@"#170": @"©",
@"#171": @"«",
@"#172": @"¬",
@"#173": @"¬",
@"#174": @"®",
@"#175": @"¯",
@"#176": @"°",
@"#177": @"±",
@"#178": @" ",
@"#179": @" ",
@"#180": @"´",
@"#181": @"µ",
@"#182": @"µ",
@"#183": @"·",
@"#184": @"¸",
@"#185": @" ",
@"#186": @"º",
@"#187": @"»",
@"#188": @"1/4",
@"#189": @"1/2",
@"#190": @"1/2",
@"#191": @"¿",
@"#192": @"À",
@"#193": @"Á",
@"#194": @"Â",
@"#195": @"Ã",
@"#196": @"Ä",
@"#197": @"Å",
@"#198": @"Æ",
@"#199": @"Ç",
@"#200": @"È",
@"#201": @"É",
@"#202": @"Ê",
@"#203": @"Ë",
@"#204": @"Ì",
@"#205": @"Í",
@"#206": @"Î",
@"#207": @"Ï",
@"#208": @"?",
@"#209": @"Ñ",
@"#210": @"Ò",
@"#211": @"Ó",
@"#212": @"Ô",
@"#213": @"Õ",
@"#214": @"Ö",
@"#215": @"x",
@"#216": @"Ø",
@"#217": @"Ù",
@"#218": @"Ú",
@"#219": @"Û",
@"#220": @"Ü",
@"#221": @"Y",
@"#222": @"?",
@"#223": @"ß",
@"#224": @"à",
@"#225": @"á",
@"#226": @"â",
@"#227": @"ã",
@"#228": @"ä",
@"#229": @"å",
@"#230": @"æ",
@"#231": @"ç",
@"#232": @"è",
@"#233": @"é",
@"#234": @"ê",
@"#235": @"ë",
@"#236": @"ì",
@"#237": @"í",
@"#238": @"î",
@"#239": @"ï",
@"#240": @"?",
@"#241": @"ñ",
@"#242": @"ò",
@"#243": @"ó",
@"#244": @"ô",
@"#245": @"õ",
@"#246": @"ö",
@"#247": @"÷",
@"#248": @"ø",
@"#249": @"ù",
@"#250": @"ú",
@"#251": @"û",
@"#252": @"ü",
@"#253": @"y",
@"#254": @"?",
@"#255": @"ÿ",
@"#32": @" ",
@"#34": @"\"",
@"#39": @"",
@"#8194": @" ",
@"#8195": @" ",
@"#8211": @"-",
@"#8212": @"—",
@"#8216": @"",
@"#8217": @"",
@"#8220": @"“",
@"#8221": @"”",
@"#8230": @"…",
@"#8617": RSParserStringWithValue(8617),
@"AElig": @"Æ",
@"Aacute": @"Á",
@"Acirc": @"Â",
@"Agrave": @"À",
@"Aring": @"Å",
@"Atilde": @"Ã",
@"Auml": @"Ä",
@"Ccedil": @"Ç",
@"Dstrok": @"?",
@"ETH": @"?",
@"Eacute": @"É",
@"Ecirc": @"Ê",
@"Egrave": @"È",
@"Euml": @"Ë",
@"Iacute": @"Í",
@"Icirc": @"Î",
@"Igrave": @"Ì",
@"Iuml": @"Ï",
@"Ntilde": @"Ñ",
@"Oacute": @"Ó",
@"Ocirc": @"Ô",
@"Ograve": @"Ò",
@"Oslash": @"Ø",
@"Otilde": @"Õ",
@"Ouml": @"Ö",
@"Pi": @"Π",
@"THORN": @"?",
@"Uacute": @"Ú",
@"Ucirc": @"Û",
@"Ugrave": @"Ù",
@"Uuml": @"Ü",
@"Yacute": @"Y",
@"aacute": @"á",
@"acirc": @"â",
@"acute": @"´",
@"aelig": @"æ",
@"agrave": @"à",
@"amp": @"&",
@"apos": @"'",
@"aring": @"å",
@"atilde": @"ã",
@"auml": @"ä",
@"brkbar": @"?",
@"brvbar": @"?",
@"ccedil": @"ç",
@"cedil": @"¸",
@"cent": @"¢",
@"copy": @"©",
@"curren": @"?",
@"deg": @"°",
@"die": @"?",
@"divide": @"÷",
@"eacute": @"é",
@"ecirc": @"ê",
@"egrave": @"è",
@"eth": @"?",
@"euml": @"ë",
@"euro": @"€",
@"frac12": @"1/2",
@"frac14": @"1/4",
@"frac34": @"3/4",
@"gt": @">",
@"hearts": @"♥",
@"hellip": @"…",
@"iacute": @"í",
@"icirc": @"î",
@"iexcl": @"¡",
@"igrave": @"ì",
@"iquest": @"¿",
@"iuml": @"ï",
@"laquo": @"«",
@"ldquo": @"“",
@"lsquo": @"",
@"lt": @"<",
@"macr": @"¯",
@"mdash": @"—",
@"micro": @"µ",
@"middot": @"·",
@"ndash": @"-",
@"not": @"¬",
@"ntilde": @"ñ",
@"oacute": @"ó",
@"ocirc": @"ô",
@"ograve": @"ò",
@"ordf": @"ª",
@"ordm": @"º",
@"oslash": @"ø",
@"otilde": @"õ",
@"ouml": @"ö",
@"para": @"¶",
@"pi": @"π",
@"plusmn": @"±",
@"pound": @"£",
@"quot": @"\"",
@"raquo": @"»",
@"rdquo": @"”",
@"reg": @"®",
@"rsquo": @"",
@"sect": @"§",
@"shy": @" ",
@"sup1": @" ",
@"sup2": @" ",
@"sup3": @" ",
@"szlig": @"ß",
@"thorn": @"?",
@"times": @"x",
@"trade": @"™",
@"uacute": @"ú",
@"ucirc": @"û",
@"ugrave": @"ù",
@"uml": @"¨",
@"uuml": @"ü",
@"yacute": @"y",
@"yen": @"¥",
@"yuml": @"ÿ",
@"infin": @"∞",
@"nbsp": RSParserStringWithValue(160),
@"#x21A9": RSParserStringWithValue(8617),
@"#xFE0E": RSParserStringWithValue(65038),
@"#x2019": RSParserStringWithValue(8217),
@"#x2026": RSParserStringWithValue(8230),
@"#x201C": RSParserStringWithValue(8220),
@"#x201D": RSParserStringWithValue(8221),
@"#x2014": RSParserStringWithValue(8212)};
});
return entitiesDictionary;
}