2017-06-21 06:18:46 +02:00
|
|
|
|
//
|
2017-06-25 23:06:01 +02:00
|
|
|
|
// NSString+RSParser.m
|
|
|
|
|
// RSParser
|
2017-06-21 06:18:46 +02:00
|
|
|
|
//
|
|
|
|
|
// Created by Brent Simmons on 9/25/15.
|
|
|
|
|
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
|
2017-06-26 01:32:07 +02:00
|
|
|
|
#import <CommonCrypto/CommonDigest.h>
|
2017-06-25 23:06:01 +02:00
|
|
|
|
#import "NSString+RSParser.h"
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
|
@interface NSScanner (RSParser)
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
|
|
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
|
@implementation NSString (RSParser)
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
|
- (NSString *)rsparser_stringByDecodingHTMLEntities {
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
@autoreleasepool {
|
|
|
|
|
|
|
|
|
|
NSScanner *scanner = [[NSScanner alloc] initWithString:self];
|
|
|
|
|
scanner.charactersToBeSkipped = nil;
|
|
|
|
|
NSMutableString *result = [[NSMutableString alloc] init];
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
|
|
NSString *scannedString = nil;
|
|
|
|
|
if ([scanner scanUpToString:@"&" intoString:&scannedString]) {
|
|
|
|
|
[result appendString:scannedString];
|
|
|
|
|
}
|
|
|
|
|
if (scanner.isAtEnd) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
NSUInteger savedScanLocation = scanner.scanLocation;
|
|
|
|
|
|
|
|
|
|
NSString *decodedEntity = nil;
|
|
|
|
|
if ([scanner rs_scanEntityValue:&decodedEntity]) {
|
|
|
|
|
[result appendString:decodedEntity];
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
[result appendString:@"&"];
|
|
|
|
|
scanner.scanLocation = savedScanLocation + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (scanner.isAtEnd) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ([self isEqualToString:result]) {
|
|
|
|
|
return self;
|
|
|
|
|
}
|
|
|
|
|
return [result copy];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static NSDictionary *RSEntitiesDictionary(void);
|
2017-06-25 23:06:01 +02:00
|
|
|
|
static NSString *RSParserStringWithValue(unichar value);
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
- (NSString * _Nullable)rs_stringByDecodingEntity {
|
|
|
|
|
|
|
|
|
|
// self may or may not have outer & and ; characters.
|
|
|
|
|
|
|
|
|
|
NSMutableString *s = [self mutableCopy];
|
|
|
|
|
|
|
|
|
|
if ([s hasPrefix:@"&"]) {
|
|
|
|
|
[s deleteCharactersInRange:NSMakeRange(0, 1)];
|
|
|
|
|
}
|
|
|
|
|
if ([s hasSuffix:@";"]) {
|
|
|
|
|
[s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NSDictionary *entitiesDictionary = RSEntitiesDictionary();
|
|
|
|
|
|
|
|
|
|
NSString *decodedEntity = entitiesDictionary[self];
|
|
|
|
|
if (decodedEntity) {
|
|
|
|
|
return decodedEntity;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ([s hasPrefix:@"#x"]) { // Hex
|
|
|
|
|
NSScanner *scanner = [[NSScanner alloc] initWithString:s];
|
|
|
|
|
scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#x"];
|
|
|
|
|
unsigned int hexValue = 0;
|
|
|
|
|
if ([scanner scanHexInt:&hexValue]) {
|
2017-06-25 23:06:01 +02:00
|
|
|
|
return RSParserStringWithValue((unichar)hexValue);
|
2017-06-21 06:18:46 +02:00
|
|
|
|
}
|
|
|
|
|
return nil;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else if ([s hasPrefix:@"#"]) {
|
|
|
|
|
[s deleteCharactersInRange:NSMakeRange(0, 1)];
|
|
|
|
|
NSInteger value = s.integerValue;
|
|
|
|
|
if (value < 1) {
|
|
|
|
|
return nil;
|
|
|
|
|
}
|
2017-06-25 23:06:01 +02:00
|
|
|
|
return RSParserStringWithValue((unichar)value);
|
2017-06-21 06:18:46 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil;
|
|
|
|
|
}
|
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
|
- (NSData *)_rsparser_md5HashData {
|
|
|
|
|
|
|
|
|
|
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
|
|
|
|
|
unsigned char hash[CC_MD5_DIGEST_LENGTH];
|
|
|
|
|
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
|
|
|
|
|
|
|
|
|
|
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
- (NSString *)rsparser_md5Hash {
|
|
|
|
|
|
|
|
|
|
NSData *md5Data = [self _rsparser_md5HashData];
|
|
|
|
|
const Byte *bytes = md5Data.bytes;
|
|
|
|
|
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-06-21 06:18:46 +02:00
|
|
|
|
@end
|
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
|
@implementation NSScanner (RSParser)
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
|
|
|
|
|
|
|
|
|
|
NSString *s = self.string;
|
|
|
|
|
NSUInteger initialScanLocation = self.scanLocation;
|
|
|
|
|
static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity.
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
|
|
unichar ch = [s characterAtIndex:self.scanLocation];
|
|
|
|
|
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (ch == ';') {
|
|
|
|
|
if (!decodedEntity) {
|
|
|
|
|
return YES;
|
|
|
|
|
}
|
|
|
|
|
NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
|
|
|
|
|
*decodedEntity = [rawEntity rs_stringByDecodingEntity];
|
|
|
|
|
self.scanLocation = self.scanLocation + 1;
|
|
|
|
|
return *decodedEntity != nil;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.scanLocation = self.scanLocation + 1;
|
|
|
|
|
if (self.scanLocation - initialScanLocation > maxEntityLength) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (self.isAtEnd) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NO;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
|
static NSString *RSParserStringWithValue(unichar value) {
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
return [[NSString alloc] initWithFormat:@"%C", value];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NSDictionary *RSEntitiesDictionary(void) {
|
|
|
|
|
|
|
|
|
|
static NSDictionary *entitiesDictionary = nil;
|
|
|
|
|
|
|
|
|
|
static dispatch_once_t onceToken;
|
|
|
|
|
dispatch_once(&onceToken, ^{
|
|
|
|
|
|
|
|
|
|
entitiesDictionary =
|
|
|
|
|
@{@"#034": @"\"",
|
|
|
|
|
@"#038": @"&",
|
|
|
|
|
@"#38": @"&",
|
|
|
|
|
@"#039": @"'",
|
|
|
|
|
@"#145": @"‘",
|
|
|
|
|
@"#146": @"’",
|
|
|
|
|
@"#147": @"“",
|
|
|
|
|
@"#148": @"”",
|
|
|
|
|
@"#149": @"•",
|
|
|
|
|
@"#150": @"-",
|
|
|
|
|
@"#151": @"—",
|
|
|
|
|
@"#153": @"™",
|
2017-06-25 23:06:01 +02:00
|
|
|
|
@"#160": RSParserStringWithValue(160),
|
2017-06-21 06:18:46 +02:00
|
|
|
|
@"#161": @"¡",
|
|
|
|
|
@"#162": @"¢",
|
|
|
|
|
@"#163": @"£",
|
|
|
|
|
@"#164": @"?",
|
|
|
|
|
@"#165": @"¥",
|
|
|
|
|
@"#166": @"?",
|
|
|
|
|
@"#167": @"§",
|
|
|
|
|
@"#168": @"¨",
|
|
|
|
|
@"#169": @"©",
|
|
|
|
|
@"#170": @"©",
|
|
|
|
|
@"#171": @"«",
|
|
|
|
|
@"#172": @"¬",
|
|
|
|
|
@"#173": @"¬",
|
|
|
|
|
@"#174": @"®",
|
|
|
|
|
@"#175": @"¯",
|
|
|
|
|
@"#176": @"°",
|
|
|
|
|
@"#177": @"±",
|
|
|
|
|
@"#178": @" ",
|
|
|
|
|
@"#179": @" ",
|
|
|
|
|
@"#180": @"´",
|
|
|
|
|
@"#181": @"µ",
|
|
|
|
|
@"#182": @"µ",
|
|
|
|
|
@"#183": @"·",
|
|
|
|
|
@"#184": @"¸",
|
|
|
|
|
@"#185": @" ",
|
|
|
|
|
@"#186": @"º",
|
|
|
|
|
@"#187": @"»",
|
|
|
|
|
@"#188": @"1/4",
|
|
|
|
|
@"#189": @"1/2",
|
|
|
|
|
@"#190": @"1/2",
|
|
|
|
|
@"#191": @"¿",
|
|
|
|
|
@"#192": @"À",
|
|
|
|
|
@"#193": @"Á",
|
|
|
|
|
@"#194": @"Â",
|
|
|
|
|
@"#195": @"Ã",
|
|
|
|
|
@"#196": @"Ä",
|
|
|
|
|
@"#197": @"Å",
|
|
|
|
|
@"#198": @"Æ",
|
|
|
|
|
@"#199": @"Ç",
|
|
|
|
|
@"#200": @"È",
|
|
|
|
|
@"#201": @"É",
|
|
|
|
|
@"#202": @"Ê",
|
|
|
|
|
@"#203": @"Ë",
|
|
|
|
|
@"#204": @"Ì",
|
|
|
|
|
@"#205": @"Í",
|
|
|
|
|
@"#206": @"Î",
|
|
|
|
|
@"#207": @"Ï",
|
|
|
|
|
@"#208": @"?",
|
|
|
|
|
@"#209": @"Ñ",
|
|
|
|
|
@"#210": @"Ò",
|
|
|
|
|
@"#211": @"Ó",
|
|
|
|
|
@"#212": @"Ô",
|
|
|
|
|
@"#213": @"Õ",
|
|
|
|
|
@"#214": @"Ö",
|
|
|
|
|
@"#215": @"x",
|
|
|
|
|
@"#216": @"Ø",
|
|
|
|
|
@"#217": @"Ù",
|
|
|
|
|
@"#218": @"Ú",
|
|
|
|
|
@"#219": @"Û",
|
|
|
|
|
@"#220": @"Ü",
|
|
|
|
|
@"#221": @"Y",
|
|
|
|
|
@"#222": @"?",
|
|
|
|
|
@"#223": @"ß",
|
|
|
|
|
@"#224": @"à",
|
|
|
|
|
@"#225": @"á",
|
|
|
|
|
@"#226": @"â",
|
|
|
|
|
@"#227": @"ã",
|
|
|
|
|
@"#228": @"ä",
|
|
|
|
|
@"#229": @"å",
|
|
|
|
|
@"#230": @"æ",
|
|
|
|
|
@"#231": @"ç",
|
|
|
|
|
@"#232": @"è",
|
|
|
|
|
@"#233": @"é",
|
|
|
|
|
@"#234": @"ê",
|
|
|
|
|
@"#235": @"ë",
|
|
|
|
|
@"#236": @"ì",
|
|
|
|
|
@"#237": @"í",
|
|
|
|
|
@"#238": @"î",
|
|
|
|
|
@"#239": @"ï",
|
|
|
|
|
@"#240": @"?",
|
|
|
|
|
@"#241": @"ñ",
|
|
|
|
|
@"#242": @"ò",
|
|
|
|
|
@"#243": @"ó",
|
|
|
|
|
@"#244": @"ô",
|
|
|
|
|
@"#245": @"õ",
|
|
|
|
|
@"#246": @"ö",
|
|
|
|
|
@"#247": @"÷",
|
|
|
|
|
@"#248": @"ø",
|
|
|
|
|
@"#249": @"ù",
|
|
|
|
|
@"#250": @"ú",
|
|
|
|
|
@"#251": @"û",
|
|
|
|
|
@"#252": @"ü",
|
|
|
|
|
@"#253": @"y",
|
|
|
|
|
@"#254": @"?",
|
|
|
|
|
@"#255": @"ÿ",
|
|
|
|
|
@"#32": @" ",
|
|
|
|
|
@"#34": @"\"",
|
|
|
|
|
@"#39": @"",
|
|
|
|
|
@"#8194": @" ",
|
|
|
|
|
@"#8195": @" ",
|
|
|
|
|
@"#8211": @"-",
|
|
|
|
|
@"#8212": @"—",
|
|
|
|
|
@"#8216": @"‘",
|
|
|
|
|
@"#8217": @"’",
|
|
|
|
|
@"#8220": @"“",
|
|
|
|
|
@"#8221": @"”",
|
|
|
|
|
@"#8230": @"…",
|
2017-06-25 23:06:01 +02:00
|
|
|
|
@"#8617": RSParserStringWithValue(8617),
|
2017-06-21 06:18:46 +02:00
|
|
|
|
@"AElig": @"Æ",
|
|
|
|
|
@"Aacute": @"Á",
|
|
|
|
|
@"Acirc": @"Â",
|
|
|
|
|
@"Agrave": @"À",
|
|
|
|
|
@"Aring": @"Å",
|
|
|
|
|
@"Atilde": @"Ã",
|
|
|
|
|
@"Auml": @"Ä",
|
|
|
|
|
@"Ccedil": @"Ç",
|
|
|
|
|
@"Dstrok": @"?",
|
|
|
|
|
@"ETH": @"?",
|
|
|
|
|
@"Eacute": @"É",
|
|
|
|
|
@"Ecirc": @"Ê",
|
|
|
|
|
@"Egrave": @"È",
|
|
|
|
|
@"Euml": @"Ë",
|
|
|
|
|
@"Iacute": @"Í",
|
|
|
|
|
@"Icirc": @"Î",
|
|
|
|
|
@"Igrave": @"Ì",
|
|
|
|
|
@"Iuml": @"Ï",
|
|
|
|
|
@"Ntilde": @"Ñ",
|
|
|
|
|
@"Oacute": @"Ó",
|
|
|
|
|
@"Ocirc": @"Ô",
|
|
|
|
|
@"Ograve": @"Ò",
|
|
|
|
|
@"Oslash": @"Ø",
|
|
|
|
|
@"Otilde": @"Õ",
|
|
|
|
|
@"Ouml": @"Ö",
|
|
|
|
|
@"Pi": @"Π",
|
|
|
|
|
@"THORN": @"?",
|
|
|
|
|
@"Uacute": @"Ú",
|
|
|
|
|
@"Ucirc": @"Û",
|
|
|
|
|
@"Ugrave": @"Ù",
|
|
|
|
|
@"Uuml": @"Ü",
|
|
|
|
|
@"Yacute": @"Y",
|
|
|
|
|
@"aacute": @"á",
|
|
|
|
|
@"acirc": @"â",
|
|
|
|
|
@"acute": @"´",
|
|
|
|
|
@"aelig": @"æ",
|
|
|
|
|
@"agrave": @"à",
|
|
|
|
|
@"amp": @"&",
|
|
|
|
|
@"apos": @"'",
|
|
|
|
|
@"aring": @"å",
|
|
|
|
|
@"atilde": @"ã",
|
|
|
|
|
@"auml": @"ä",
|
|
|
|
|
@"brkbar": @"?",
|
|
|
|
|
@"brvbar": @"?",
|
|
|
|
|
@"ccedil": @"ç",
|
|
|
|
|
@"cedil": @"¸",
|
|
|
|
|
@"cent": @"¢",
|
|
|
|
|
@"copy": @"©",
|
|
|
|
|
@"curren": @"?",
|
|
|
|
|
@"deg": @"°",
|
|
|
|
|
@"die": @"?",
|
|
|
|
|
@"divide": @"÷",
|
|
|
|
|
@"eacute": @"é",
|
|
|
|
|
@"ecirc": @"ê",
|
|
|
|
|
@"egrave": @"è",
|
|
|
|
|
@"eth": @"?",
|
|
|
|
|
@"euml": @"ë",
|
|
|
|
|
@"euro": @"€",
|
|
|
|
|
@"frac12": @"1/2",
|
|
|
|
|
@"frac14": @"1/4",
|
|
|
|
|
@"frac34": @"3/4",
|
|
|
|
|
@"gt": @">",
|
|
|
|
|
@"hearts": @"♥",
|
|
|
|
|
@"hellip": @"…",
|
|
|
|
|
@"iacute": @"í",
|
|
|
|
|
@"icirc": @"î",
|
|
|
|
|
@"iexcl": @"¡",
|
|
|
|
|
@"igrave": @"ì",
|
|
|
|
|
@"iquest": @"¿",
|
|
|
|
|
@"iuml": @"ï",
|
|
|
|
|
@"laquo": @"«",
|
|
|
|
|
@"ldquo": @"“",
|
|
|
|
|
@"lsquo": @"‘",
|
|
|
|
|
@"lt": @"<",
|
|
|
|
|
@"macr": @"¯",
|
|
|
|
|
@"mdash": @"—",
|
|
|
|
|
@"micro": @"µ",
|
|
|
|
|
@"middot": @"·",
|
|
|
|
|
@"ndash": @"-",
|
|
|
|
|
@"not": @"¬",
|
|
|
|
|
@"ntilde": @"ñ",
|
|
|
|
|
@"oacute": @"ó",
|
|
|
|
|
@"ocirc": @"ô",
|
|
|
|
|
@"ograve": @"ò",
|
|
|
|
|
@"ordf": @"ª",
|
|
|
|
|
@"ordm": @"º",
|
|
|
|
|
@"oslash": @"ø",
|
|
|
|
|
@"otilde": @"õ",
|
|
|
|
|
@"ouml": @"ö",
|
|
|
|
|
@"para": @"¶",
|
|
|
|
|
@"pi": @"π",
|
|
|
|
|
@"plusmn": @"±",
|
|
|
|
|
@"pound": @"£",
|
|
|
|
|
@"quot": @"\"",
|
|
|
|
|
@"raquo": @"»",
|
|
|
|
|
@"rdquo": @"”",
|
|
|
|
|
@"reg": @"®",
|
|
|
|
|
@"rsquo": @"’",
|
|
|
|
|
@"sect": @"§",
|
|
|
|
|
@"shy": @" ",
|
|
|
|
|
@"sup1": @" ",
|
|
|
|
|
@"sup2": @" ",
|
|
|
|
|
@"sup3": @" ",
|
|
|
|
|
@"szlig": @"ß",
|
|
|
|
|
@"thorn": @"?",
|
|
|
|
|
@"times": @"x",
|
|
|
|
|
@"trade": @"™",
|
|
|
|
|
@"uacute": @"ú",
|
|
|
|
|
@"ucirc": @"û",
|
|
|
|
|
@"ugrave": @"ù",
|
|
|
|
|
@"uml": @"¨",
|
|
|
|
|
@"uuml": @"ü",
|
|
|
|
|
@"yacute": @"y",
|
|
|
|
|
@"yen": @"¥",
|
|
|
|
|
@"yuml": @"ÿ",
|
|
|
|
|
@"infin": @"∞",
|
2017-06-25 23:06:01 +02:00
|
|
|
|
@"nbsp": RSParserStringWithValue(160),
|
|
|
|
|
@"#x21A9": RSParserStringWithValue(8617),
|
|
|
|
|
@"#xFE0E": RSParserStringWithValue(65038),
|
|
|
|
|
@"#x2019": RSParserStringWithValue(8217),
|
|
|
|
|
@"#x2026": RSParserStringWithValue(8230),
|
|
|
|
|
@"#x201C": RSParserStringWithValue(8220),
|
|
|
|
|
@"#x201D": RSParserStringWithValue(8221),
|
|
|
|
|
@"#x2014": RSParserStringWithValue(8212)};
|
2017-06-21 06:18:46 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return entitiesDictionary;
|
|
|
|
|
}
|