Make it build. Add a README.

This commit is contained in:
Brent Simmons 2017-06-25 16:32:07 -07:00
parent 1ad4474b29
commit e9c19e427a
41 changed files with 454 additions and 485 deletions

@ -8,7 +8,7 @@
import Foundation
// FeedParser knows about the various syndication feed types.
// FeedParser handles the various syndication feed types.
// It might be a good idea to do a plugin-style architecture here instead
// but feed formats dont appear all that often, so its probably not necessary.
@ -21,6 +21,8 @@ public struct FeedParser {
// Can call with partial data while still downloading, for instance.
// If theres not enough data, return .unknown. Ask again when theres more data.
// If its definitely not a feed, return .notAFeed.
//
// This should be fast enough to call on the main thread.
if parserData.data.count < minNumberOfBytesRequired {
return .unknown
@ -50,6 +52,13 @@ public struct FeedParser {
public static func parseFeed(_ parserData: ParserData) throws -> ParsedFeed? {
// All the concrete parsers return a ParsedFeed struct.
// Related: ParsedItem, ParsedAuthor, ParsedHub, ParsedAttachment.
//
// This is probably fast enough to call on the main thread 
// but its probably a good idea to use a background queue if
// you might be doing a lot of parsing. (Such as in a feed reader.)
do {
let type = feedType(parserData)
@ -65,7 +74,7 @@ public struct FeedParser {
return RSSParser.parse(parserData)
case .atom:
return AtomParser.parser(parserData)
return AtomParser.parse(parserData)
case .unknown, .notAFeed:
return nil

@ -0,0 +1,28 @@
//
// AtomParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSSParser wraps the Objective-C RSAtomParser.
//
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
// the same things that JSONFeedParser and RSSInJSONParser create.
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct AtomParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}

@ -6,8 +6,13 @@
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "FeedParser.h"
@import Foundation;
@interface RSAtomParser : NSObject <FeedParser>
@class ParserData;
@class RSParsedFeed;
@interface RSAtomParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@end

@ -9,12 +9,11 @@
#import <libxml/xmlstring.h>
#import "RSAtomParser.h"
#import "RSSAXParser.h"
#import "FeedParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "RSXMLData.h"
#import "NSString+RSXML.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import <RSParser/RSParser-Swift.h>
@interface RSAtomParser () <RSSAXParserDelegate>
@ -44,57 +43,24 @@
#pragma mark - Class Methods
+ (BOOL)canParseFeed:(RSXMLData *)xmlData {
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
// Checking for '<feed' and '<entry' within first n characters should do it.
@autoreleasepool {
NSData *feedData = xmlData.data;
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding];
}
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding];
}
if (!s) {
return NO;
}
static const NSInteger numberOfCharactersToSearch = 4096;
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
if (s.length < numberOfCharactersToSearch) {
rangeToSearch.length = s.length;
}
NSRange feedRange = [s rangeOfString:@"<feed" options:NSLiteralSearch range:rangeToSearch];
NSRange entryRange = [s rangeOfString:@"<entry" options:NSLiteralSearch range:rangeToSearch];
if (feedRange.length < 1 || entryRange.length < 1) {
return NO;
}
if (feedRange.location > entryRange.location) {
return NO; // Wrong order.
}
}
return YES;
RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = xmlData.data;
_urlString = xmlData.urlString;
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_attributesStack = [NSMutableArray new];
_articles = [NSMutableArray new];
@ -105,7 +71,7 @@
#pragma mark - API
- (RSParsedFeed *)parseFeed:(NSError **)error {
- (RSParsedFeed *)parseFeed {
[self parse];
@ -315,7 +281,7 @@ static const NSInteger kSelfLength = 5;
- (NSString *)currentStringWithHTMLEntitiesDecoded {
return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities];
return [self.parser.currentStringWithTrimmedWhitespace rsparser_stringByDecodingHTMLEntities];
}

@ -1,28 +0,0 @@
//
// RSFeedParser.h
// RSParser
//
// Created by Brent Simmons on 1/4/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "FeedParser.h"
// If you have a feed and dont know or care what it is (RSS or Atom),
// then call RSParseFeed or RSParseFeedSync.
@class RSXMLData;
@class RSParsedFeed;
NS_ASSUME_NONNULL_BEGIN
BOOL RSCanParseFeed(RSXMLData *xmlData);
typedef void (^RSParsedFeedBlock)(RSParsedFeed * _Nullable parsedFeed, NSError * _Nullable error);
// callback is called on main queue.
void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback);
RSParsedFeed * _Nullable RSParseFeedSync(RSXMLData *xmlData, NSError * _Nullable * _Nullable error);
NS_ASSUME_NONNULL_END

@ -1,216 +0,0 @@
//
// FeedParser.m
// RSParser
//
// Created by Brent Simmons on 1/4/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "RSFeedParser.h"
#import "FeedParser.h"
#import "RSXMLData.h"
#import "RSRSSParser.h"
#import "RSAtomParser.h"
static NSArray *parserClasses(void) {
static NSArray *gParserClasses = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
gParserClasses = @[[RSRSSParser class], [RSAtomParser class]];
});
return gParserClasses;
}
static BOOL feedMayBeParseable(RSXMLData *xmlData) {
/*Sanity checks.*/
if (!xmlData.data) {
return NO;
}
/*TODO: check size, type, etc.*/
return YES;
}
static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes);
static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes);
static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes);
static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes);
static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes);
static const NSUInteger maxNumberOfBytesToSearch = 4096;
static const NSUInteger minNumberOfBytesToSearch = 20;
static Class parserClassForXMLData(RSXMLData *xmlData) {
if (!feedMayBeParseable(xmlData)) {
return nil;
}
// TODO: check for things like images and movies and return nil.
const char *bytes = xmlData.data.bytes;
NSUInteger numberOfBytes = xmlData.data.length;
if (numberOfBytes > minNumberOfBytesToSearch) {
if (numberOfBytes > maxNumberOfBytesToSearch) {
numberOfBytes = maxNumberOfBytesToSearch;
}
if (!dataHasLeftCaret(bytes, numberOfBytes)) {
return nil;
}
if (optimisticCanParseRSSData(bytes, numberOfBytes)) {
return [RSRSSParser class];
}
if (optimisticCanParseAtomData(bytes, numberOfBytes)) {
return [RSAtomParser class];
}
if (optimisticCanParseRDF(bytes, numberOfBytes)) {
return nil; //TODO: parse RDF feeds
}
if (dataIsProbablyHTML(bytes, numberOfBytes)) {
return nil;
}
if (dataIsSomeWeirdException(bytes, numberOfBytes)) {
return nil;
}
}
for (Class parserClass in parserClasses()) {
if ([parserClass canParseFeed:xmlData]) {
return [[parserClass alloc] initWithXMLData:xmlData];
}
}
return nil;
}
static id<FeedParser> parserForXMLData(RSXMLData *xmlData) {
Class parserClass = parserClassForXMLData(xmlData);
if (!parserClass) {
return nil;
}
return [[parserClass alloc] initWithXMLData:xmlData];
}
static BOOL canParseXMLData(RSXMLData *xmlData) {
return parserClassForXMLData(xmlData) != nil;
}
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
char *foundString = strnstr(bytes, string, numberOfBytes);
return foundString != NULL;
}
static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) {
return didFindString("<", bytes, numberOfBytes);
}
static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
// Wont catch every single case, which is fine.
if (didFindString("<html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<body", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("doctype html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<meta", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<HTML", bytes, numberOfBytes)) {
return YES;
}
return NO;
}
static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes) {
if (didFindString("<errors xmlns='http://schemas.google", bytes, numberOfBytes)) {
return YES;
}
return NO;
}
static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes) {
return didFindString("<rdf:RDF", bytes, numberOfBytes);
}
static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes) {
if (!didFindString("<rss", bytes, numberOfBytes)) {
return NO;
}
return didFindString("<channel", bytes, numberOfBytes);
}
static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes) {
return didFindString("<feed", bytes, numberOfBytes);
}
static void callCallback(RSParsedFeedBlock callback, RSParsedFeed *parsedFeed, NSError *error) {
dispatch_async(dispatch_get_main_queue(), ^{
@autoreleasepool {
if (callback) {
callback(parsedFeed, error);
}
}
});
}
#pragma mark - API
BOOL RSCanParseFeed(RSXMLData *xmlData) {
return canParseXMLData(xmlData);
}
void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback) {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_UTILITY, 0), ^{
NSError *error = nil;
RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error);
callCallback(callback, parsedFeed, error);
});
}
RSParsedFeed *RSParseFeedSync(RSXMLData *xmlData, NSError **error) {
id<FeedParser> parser = parserForXMLData(xmlData);
return [parser parseFeed:error];
}

@ -1,6 +1,6 @@
//
// RSParsedArticle.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.

@ -1,13 +1,14 @@
//
// RSParsedArticle.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
//
#import "RSParsedArticle.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
@implementation RSParsedArticle
@ -89,7 +90,7 @@
NSAssert(!RSParserStringIsEmpty(self.feedURL), nil);
[s appendString:self.feedURL];
return [s rsxml_md5HashString];
return [s rsparser_md5Hash];
}
- (void)calculateArticleID {

@ -1,6 +1,6 @@
//
// RSParsedFeed.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.

@ -1,6 +1,6 @@
//
// RSParsedFeed.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.

@ -0,0 +1,60 @@
//
// RSParsedFeedTransformer.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago.
// They create an RSParsedFeed object and related Objective-C objects.
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
// which is part of providing a single API for feed parsing.
struct RSParsedFeedTransformer {
static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
let items = parsedItems(rsParsedFeed.articles)
return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
}
private extension RSParsedFeedTransformer {
static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> [ParsedItem] {
// Create [ParsedItem] from set of RSParsedArticle.
var items = [ParsedItem]()
for oneParsedArticle in parsedArticles {
items += [parsedItem(oneParsedArticle)]
}
return items
}
static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
let uniqueID = parsedArticle.articleID
let url = parsedArticle.permalink
let externalURL = parsedArticle.link
let title = parsedArticle.title
let contentHTML = parsedArticle.body
let datePublished = parsedArticle.datePublished
let dateModified = parsedArticle.dateModified
let authors = parsedAuthors(parsedArticle.author)
return ParsedItem(uniqueID: uniqueID, url: url, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: nil)
}
static func parsedAuthors(_ authorEmailAddress: String?) -> [ParsedAuthor]? {
guard let authorEmailAddress = authorEmailAddress else {
return nil
}
let author = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
return [author]
}
}

@ -6,8 +6,14 @@
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "FeedParser.h"
@import Foundation;
@class ParserData;
@class RSParsedFeed;
@interface RSRSSParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@interface RSRSSParser : NSObject <FeedParser>
@end

@ -11,10 +11,10 @@
#import "RSSAXParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "RSXMLData.h"
#import "RSXMLInternal.h"
#import "NSString+RSXML.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import <RSParser/RSParser-Swift.h>
@interface RSRSSParser () <RSSAXParserDelegate>
@ -40,57 +40,23 @@
#pragma mark - Class Methods
+ (BOOL)canParseFeed:(RSXMLData *)xmlData {
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
// Checking for '<rss' and '<channel>' within first n characters should do it.
// TODO: handle RSS 1.0
@autoreleasepool {
NSData *feedData = xmlData.data;
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding];
}
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding];
}
if (!s) {
return NO;
}
static const NSInteger numberOfCharactersToSearch = 4096;
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
if (s.length < numberOfCharactersToSearch) {
rangeToSearch.length = s.length;
}
NSRange rssRange = [s rangeOfString:@"<rss" options:NSLiteralSearch range:rangeToSearch];
NSRange channelRange = [s rangeOfString:@"<channel>" options:NSLiteralSearch range:rangeToSearch];
if (rssRange.length < 1 || channelRange.length < 1) {
return NO;
}
if (rssRange.location > channelRange.location) {
return NO; // Wrong order.
}
}
return YES;
RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = xmlData.data;
_urlString = xmlData.urlString;
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_articles = [NSMutableArray new];
@ -100,7 +66,7 @@
#pragma mark - API
- (RSParsedFeed *)parseFeed:(NSError **)error {
- (RSParsedFeed *)parseFeed {
[self parse];
@ -292,7 +258,7 @@ static const NSInteger kTrueLength = 5;
- (NSString *)currentStringWithHTMLEntitiesDecoded {
return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities];
return [self.parser.currentStringWithTrimmedWhitespace rsparser_stringByDecodingHTMLEntities];
}
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {

@ -0,0 +1,28 @@
//
// RSSParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSSParser wraps the Objective-C RSRSSParser.
//
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
// the same things that JSONFeedParser and RSSInJSONParser create.
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct RSSParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}

@ -10,12 +10,12 @@
/*Returns all <a href="some_url">some_text</a> as RSHTMLLink object array.*/
@class RSXMLData;
@class ParserData;
@class RSHTMLLink;
@interface RSHTMLLinkParser : NSObject
+ (NSArray <RSHTMLLink *> *)htmlLinksWithData:(RSXMLData *)xmlData;
+ (NSArray <RSHTMLLink *> *)htmlLinksWithParserData:(ParserData *)parserData;
@end

@ -10,14 +10,14 @@
#import "RSHTMLLinkParser.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSXMLData.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
#import <RSParser/RSParser-Swift.h>
@interface RSHTMLLinkParser() <RSSAXHTMLParserDelegate>
@property (nonatomic, readonly) NSMutableArray *links;
@property (nonatomic, readonly) RSXMLData *xmlData;
@property (nonatomic, readonly) ParserData *parserData;
@property (nonatomic, readonly) NSMutableArray *dictionaries;
@property (nonatomic, readonly) NSURL *baseURL;
@ -38,19 +38,19 @@
#pragma mark - Class Methods
+ (NSArray *)htmlLinksWithData:(RSXMLData *)xmlData {
+ (NSArray *)HTMLMetadataWithParserData:(ParserData *)parserData {
RSHTMLLinkParser *parser = [[self alloc] initWithXMLData:xmlData];
RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData];
return parser.links;
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
- (instancetype)initWithParserData:(ParserData *)parserData {
NSParameterAssert(xmlData.data);
NSParameterAssert(xmlData.urlString);
NSParameterAssert(parserData.data);
NSParameterAssert(parserData.url);
self = [super init];
if (!self) {
@ -58,9 +58,9 @@
}
_links = [NSMutableArray new];
_xmlData = xmlData;
_parserData = parserData;
_dictionaries = [NSMutableArray new];
_baseURL = [NSURL URLWithString:xmlData.urlString];
_baseURL = [NSURL URLWithString:parserData.url];
[self parse];
@ -73,7 +73,7 @@
- (void)parse {
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
[parser parseData:self.xmlData.data];
[parser parseData:self.parserData.data];
[parser finishParsing];
}
@ -127,7 +127,7 @@ static const NSInteger kAnchorLength = 2;
[self.links addObject:link];
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParser_IsEmpty(d)) {
if (!RSParserObjectIsEmpty(d)) {
[self handleLinkAttributes:d];
}

@ -7,7 +7,7 @@
//
#import "RSHTMLMetadata.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
static NSString *urlStringFromDictionary(NSDictionary *d);
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString);

@ -10,17 +10,13 @@
@class RSHTMLMetadata;
@class RSXMLData;
@class ParserData;
NS_ASSUME_NONNULL_BEGIN
@interface RSHTMLMetadataParser : NSObject
+ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData;
- (instancetype)initWithXMLData:(RSXMLData *)xmlData;
@property (nonatomic, readonly) RSHTMLMetadata *metadata;
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData;
@end

@ -1,6 +1,6 @@
//
// RSHTMLMetadataParser.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
@ -8,16 +8,15 @@
#import <libxml/xmlstring.h>
#import "RSHTMLMetadataParser.h"
#import "RSXMLData.h"
#import "RSHTMLMetadata.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
#import <RSParser/RSParser-Swift.h>
@interface RSHTMLMetadataParser () <RSSAXHTMLParserDelegate>
@property (nonatomic, readonly) RSXMLData *xmlData;
@property (nonatomic, readonly) ParserData *parserData;
@property (nonatomic, readwrite) RSHTMLMetadata *metadata;
@property (nonatomic) NSMutableArray *dictionaries;
@property (nonatomic) BOOL didFinishParsing;
@ -30,26 +29,26 @@
#pragma mark - Class Methods
+ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData {
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData {
RSHTMLMetadataParser *parser = [[self alloc] initWithXMLData:xmlData];
RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData];
return parser.metadata;
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
- (instancetype)initWithParserData:(ParserData *)parserData {
NSParameterAssert(xmlData.data);
NSParameterAssert(xmlData.urlString);
NSParameterAssert(parserData.data);
NSParameterAssert(parserData.url);
self = [super init];
if (!self) {
return nil;
}
_xmlData = xmlData;
_parserData = parserData;
_dictionaries = [NSMutableArray new];
[self parse];
@ -63,10 +62,10 @@
- (void)parse {
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
[parser parseData:self.xmlData.data];
[parser parseData:self.parserData.data];
[parser finishParsing];
self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.xmlData.urlString dictionaries:[self.dictionaries copy]];
self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url dictionaries:[self.dictionaries copy]];
}
@ -121,7 +120,7 @@ static const NSInteger kLinkLength = 5;
}
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParser_IsEmpty(d)) {
if (!RSParserObjectIsEmpty(d)) {
[self handleLinkAttributes:d];
}
}

@ -11,7 +11,7 @@
#import <libxml/tree.h>
#import <libxml/xmlstring.h>
#import <libxml/HTMLparser.h>
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
@interface RSSAXHTMLParser ()

@ -1,6 +1,6 @@
//
// RSOPMLAttributes.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.

@ -1,13 +1,13 @@
//
// RSOPMLAttributes.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLAttributes.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
NSString *OPMLTextKey = @"text";

@ -1,6 +1,6 @@
//
// RSOPMLDocument.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.

@ -1,6 +1,6 @@
//
// RSOPMLDocument.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.

@ -0,0 +1,19 @@
//
// RSOPMLError.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
extern NSString *RSOPMLErrorDomain;
typedef NS_ENUM(NSInteger, RSOPMLErrorCode) {
RSOPMLErrorCodeDataIsWrongFormat = 1024
};
NSError *RSOPMLWrongFormatError(NSString *fileName);

@ -0,0 +1,22 @@
//
// RSOPMLError.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLError.h"
NSString *RSOPMLErrorDomain = @"com.ranchero.OPML";
NSError *RSOPMLWrongFormatError(NSString *fileName) {
NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file %@ cant be parsed because its not an OPML file.", @"OPML wrong format");
NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName];
NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format");
NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString};
return [[NSError alloc] initWithDomain:RSOPMLErrorDomain code:RSOPMLErrorCodeDataIsWrongFormat userInfo:userInfo];
}

@ -1,6 +1,6 @@
//
// RSOPMLFeedSpecifier.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.

@ -1,13 +1,13 @@
//
// RSOPMLFeedSpecifier.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLFeedSpecifier.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
@implementation RSOPMLFeedSpecifier

@ -1,6 +1,6 @@
//
// RSOPMLItem.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.

@ -1,6 +1,6 @@
//
// RSOPMLItem.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
@ -9,7 +9,7 @@
#import "RSOPMLItem.h"
#import "RSOPMLAttributes.h"
#import "RSOPMLFeedSpecifier.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
@interface RSOPMLItem ()

@ -1,6 +1,6 @@
//
// RSOPMLParser.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
@ -9,21 +9,12 @@
@import Foundation;
@class RSXMLData;
@class ParserData;
@class RSOPMLDocument;
typedef void (^RSParsedOPMLBlock)(RSOPMLDocument *OPMLDocument, NSError *error);
void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback); //async; calls back on main thread.
@interface RSOPMLParser: NSObject
- (instancetype)initWithXMLData:(RSXMLData *)xmlData;
@property (nonatomic, readonly) RSOPMLDocument *OPMLDocument;
@property (nonatomic, readonly) NSError *error;
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error;
@end

@ -1,6 +1,6 @@
//
// RSOPMLParser.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
@ -8,24 +8,24 @@
#import "RSOPMLParser.h"
#import <libxml/xmlstring.h>
#import "RSXMLData.h"
#import "RSSAXParser.h"
#import "RSOPMLItem.h"
#import "RSOPMLDocument.h"
#import "RSOPMLAttributes.h"
#import "RSXMLError.h"
#import <RSParser/RSParser-Swift.h>
#import "RSOPMLError.h"
void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) {
void RSParseOPML(ParserData *parserData, RSParsedOPMLBlock callback) {
NSCParameterAssert(xmlData);
NSCParameterAssert(parserData);
NSCParameterAssert(callback);
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
@autoreleasepool {
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithXMLData:xmlData];
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData];
RSOPMLDocument *document = parser.OPMLDocument;
NSError *error = parser.error;
@ -50,17 +50,30 @@ void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) {
@implementation RSOPMLParser
#pragma mark - Class Methods
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error {
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData];
RSOPMLDocument *document = parser.OPMLDocument;
if (parser.error) {
*error = parser.error;
return nil;
}
return document;
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)XMLData {
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
[self parse:XMLData];
[self parse:parserData];
return self;
}
@ -68,22 +81,22 @@ void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) {
#pragma mark - Private
- (void)parse:(RSXMLData *)XMLData {
- (void)parse:(ParserData *)parserData {
@autoreleasepool {
if (![self canParseData:XMLData.data]) {
if (![self canParseData:parserData.data]) {
NSString *filename = nil;
NSURL *url = [NSURL URLWithString:XMLData.urlString];
NSURL *url = [NSURL URLWithString:parserData.url];
if (url && url.isFileURL) {
filename = url.path.lastPathComponent;
}
if ([XMLData.urlString hasPrefix:@"http"]) {
filename = XMLData.urlString;
if ([parserData.url hasPrefix:@"http"]) {
filename = parserData.url;
}
if (!filename) {
filename = XMLData.urlString;
filename = parserData.url;
}
self.error = RSOPMLWrongFormatError(filename);
return;
@ -95,7 +108,7 @@ void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) {
self.OPMLDocument = [RSOPMLDocument new];
[self pushItem:self.OPMLDocument];
[parser parseData:XMLData.data];
[parser parseData:parserData.data];
[parser finishParsing];
}
}

@ -8,14 +8,15 @@
import Foundation
public final class ParserData {
@objc public final class ParserData: NSObject {
let url: String
let data: Data
public let url: String
public let data: Data
public init(url: String, data: Data) {
self.url = url
self.data = data
super.init()
}
}

@ -0,0 +1,67 @@
# RSParser
(Note: I havent written tests yet. Its possible that none of this works.)
(Also note: this framework is intended to supersede my [RSXML](https://github.com/brentsimmons/RSXML) framework. Use this one instead. Well, once its working, that is.)
## Whats inside
This framework includes parsers for:
* [RSS](http://cyber.harvard.edu/rss/rss.html), [Atom](https://tools.ietf.org/html/rfc4287), [JSON Feed](https://jsonfeed.org/), and [RSS-in-JSON](https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md)
* [OPML](http://dev.opml.org/)
* Internet dates
* HTML metadata and links
* HTML entities
It also includes Objective-C wrappers for libXML2s XML SAX and HTML SAX parsers. You can write your own parsers on top of these.
This framework builds for macOS. It *could* be made to build for iOS also, but I havent gotten around to it yet.
## How to parse feeds
To get the type of a feed, even with partial data, call `FeedParser.feedType(parserData)`, which will return a `FeedType`.
To parse a feed, call `FeedParser.parseFeed(parserData)`, which will return a `ParsedFeed`. Also see related structs: `ParsedAuthor`, `ParsedItem`, `ParsedAttachment`, and `ParsedHub`.
You do *not* need to know the type of feed when calling `FeedParser.parseFeed` — it will figure it out and use the correct concrete parser.
However, if you do want to use a concrete parser directly, see `RSSInJSONParser`, `JSONFeedParser`, `RSSParser`, and `AtomParser`.
(Note: if you want to write a feed reader app, please do! You have my blessing and encouragement. Let me know when its shipping so I can check it out.)
## How to parse OPML
Call `+[RSOPMLParser parseOPMLWithParserData:error:]`, which returns an `RSOPMLDocument`. See related objects: `RSOPMLItem`, `RSOPMLAttributes`, `RSOPMLFeedSpecifier`, and `RSOPMLError`.
## How to parse dates
Call `RSDateWithString` or `RSDateWithBytes` (see `RSDateParser`). These handle the common internet date formats. You dont need to know which format.
## How to parse HTML
To get an array of `<a href=…` links from from an HTML document, call `+[RSHTMLLinkParser htmlLinksWithParserData:]`. It returns an array of `RSHTMLLink`.
To parse the metadata in an HTML document, call `+[RSHTMLMetadataParser HTMLMetadataWithParserData:]`. It returns an `RSHTMLMetadata` object.
To write your own HTML parser, see `RSSAXHTMLParser`. The two parsers above can serve as examples.
## How to parse HTML entities
When you have a string with things like `&#8212;` and `&euml;` and you want to turn those into the correct characters, call `-[NSString rsparser_stringByDecodingHTMLEntities]`. (See `NSString+RSParser.h`.)
## How to parse XML
If you need to parse some XML that isnt RSS, Atom, or OPML, you can use `RSSAXParser`. Dont subclass it — instead, create an `RSSAXParserDelegate`. See `RSRSSParser`, `RSAtomParser`, and `RSOPMLParser` as examples.
### Why use libXML2s SAX API?
SAX is kind of a pain because of all the state you have to manage.
An alternative is to use `NSXMLParser`, which is event-driven like SAX. However, `RSSAXParser` was written to avoid allocating Objective-C objects except when absolutely needed. Youll note use of things like `memcp` and `strncmp`.
Normally I avoid this kind of thing *strenuously*. I prefer to work at the highest level possible.
But my more-than-a-decade of experience parsing XML has led me to this solution, which — last time I checked, which was, admittedly, a few years ago — was not only fastest but also uses the least memory. (The two things are related, of course: creating objects is bad for performance, so this code attempts to do the minimum possible.)
All that low-level stuff is encapsulated, however. If you just want to parse one of the popular feed formats, see `FeedParser`, which makes it easy and Swift-y.

@ -8,36 +8,42 @@
@import Foundation;
#import <RSParser/NSData+RSParser.h>
// To parse RSS, Atom, JSON Feed, and RSS-in-JSON the easy way, see FeedParser.swift.
// Dates
#import <RSParser/RSDateParser.h>
// OPML
#import <RSParser/RSOPMLParser.h>
#import <RSParser/RSOPMLDocument.h>
#import <RSParser/RSOPMLItem.h>
#import <RSParser/RSOPMLAttributes.h>
#import <RSParser/RSOPMLFeedSpecifier.h>
#import <RSParser/RSOPMLError.h>
// For writing your own XML parser.
#import <RSParser/RSSAXParser.h>
// You should use FeedParser (Swift) instead of these two specific parsers
// and the objects they create.
// But theyre available if you want them.
#import <RSParser/RSRSSParser.h>
#import <RSParser/RSAtomParser.h>
#import <RSParser/RSParsedFeed.h>
#import <RSParser/RSParsedArticle.h>
// HTML
#import <RSParser/RSHTMLMetadataParser.h>
#import <RSParser/RSHTMLMetadata.h>
#import <RSParser/RSHTMLLinkParser.h>
#import <RSParser/RSSAXHTMLParser.h> // For writing your own HTML parser.
// Utilities
#import <RSParser/NSData+RSParser.h>
#import <RSParser/NSString+RSParser.h>
//#import <RSXML/RSSAXParser.h>
//#import <RSXML/RSXMLData.h>
//
//#import <RSXML/RSFeedParser.h>
//#import <RSXML/FeedParser.h>
//#import <RSXML/RSAtomParser.h>
//#import <RSXML/RSRSSParser.h>
//#import <RSXML/RSParsedFeed.h>
//#import <RSXML/RSParsedArticle.h>
//
//#import <RSXML/RSOPMLParser.h>
//#import <RSXML/RSOPMLDocument.h>
//#import <RSXML/RSOPMLItem.h>
//#import <RSXML/RSOPMLAttributes.h>
//#import <RSXML/RSOPMLFeedSpecifier.h>
//
//#import <RSXML/RSXMLError.h>
//
//#import <RSXML/NSString+RSXML.h>
//#import <RSXML/RSDateParser.h>
//
//// HTML
//
//#import <RSXML/RSSAXHTMLParser.h>
//
//#import <RSXML/RSHTMLMetadataParser.h>
//#import <RSXML/RSHTMLMetadata.h>
//#import <RSXML/RSHTMLLinkParser.h>

@ -7,26 +7,32 @@
objects = {
/* Begin PBXBuildFile section */
84285AA81F005D53002E8708 /* RSSParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84285AA71F005D53002E8708 /* RSSParser.swift */; };
84285AAA1F006456002E8708 /* RSParsedFeedTransformer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84285AA91F006456002E8708 /* RSParsedFeedTransformer.swift */; };
84285AAC1F006754002E8708 /* AtomParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84285AAB1F006754002E8708 /* AtomParser.swift */; };
84285AAF1F006BC0002E8708 /* libxml2.2.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 84285AAE1F006BC0002E8708 /* libxml2.2.tbd */; };
84285AB21F00702E002E8708 /* RSOPMLError.h in Headers */ = {isa = PBXBuildFile; fileRef = 84285AB01F00702E002E8708 /* RSOPMLError.h */; settings = {ATTRIBUTES = (Public, ); }; };
84285AB31F00702E002E8708 /* RSOPMLError.m in Sources */ = {isa = PBXBuildFile; fileRef = 84285AB11F00702E002E8708 /* RSOPMLError.m */; };
84469CE21EFA2F3E004A6B28 /* ParserData.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469CE11EFA2F3E004A6B28 /* ParserData.swift */; };
84469CEF1EFA3000004A6B28 /* RSOPMLAttributes.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CE51EFA3000004A6B28 /* RSOPMLAttributes.h */; };
84469CEF1EFA3000004A6B28 /* RSOPMLAttributes.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CE51EFA3000004A6B28 /* RSOPMLAttributes.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469CF01EFA3000004A6B28 /* RSOPMLAttributes.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469CE61EFA3000004A6B28 /* RSOPMLAttributes.m */; };
84469CF11EFA3000004A6B28 /* RSOPMLDocument.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CE71EFA3000004A6B28 /* RSOPMLDocument.h */; };
84469CF11EFA3000004A6B28 /* RSOPMLDocument.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CE71EFA3000004A6B28 /* RSOPMLDocument.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469CF21EFA3000004A6B28 /* RSOPMLDocument.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469CE81EFA3000004A6B28 /* RSOPMLDocument.m */; };
84469CF31EFA3000004A6B28 /* RSOPMLFeedSpecifier.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CE91EFA3000004A6B28 /* RSOPMLFeedSpecifier.h */; };
84469CF31EFA3000004A6B28 /* RSOPMLFeedSpecifier.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CE91EFA3000004A6B28 /* RSOPMLFeedSpecifier.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469CF41EFA3000004A6B28 /* RSOPMLFeedSpecifier.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469CEA1EFA3000004A6B28 /* RSOPMLFeedSpecifier.m */; };
84469CF51EFA3000004A6B28 /* RSOPMLItem.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CEB1EFA3000004A6B28 /* RSOPMLItem.h */; };
84469CF51EFA3000004A6B28 /* RSOPMLItem.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CEB1EFA3000004A6B28 /* RSOPMLItem.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469CF61EFA3000004A6B28 /* RSOPMLItem.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469CEC1EFA3000004A6B28 /* RSOPMLItem.m */; };
84469CF71EFA3000004A6B28 /* RSOPMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CED1EFA3000004A6B28 /* RSOPMLParser.h */; };
84469CF71EFA3000004A6B28 /* RSOPMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CED1EFA3000004A6B28 /* RSOPMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469CF81EFA3000004A6B28 /* RSOPMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469CEE1EFA3000004A6B28 /* RSOPMLParser.m */; };
84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CFA1EFA3069004A6B28 /* RSSAXParser.h */; };
84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CFA1EFA3069004A6B28 /* RSSAXParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469CFD1EFA3069004A6B28 /* RSSAXParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469CFB1EFA3069004A6B28 /* RSSAXParser.m */; };
84469D071EFA307E004A6B28 /* RSHTMLLinkParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CFF1EFA307E004A6B28 /* RSHTMLLinkParser.h */; };
84469D071EFA307E004A6B28 /* RSHTMLLinkParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469CFF1EFA307E004A6B28 /* RSHTMLLinkParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D081EFA307E004A6B28 /* RSHTMLLinkParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D001EFA307E004A6B28 /* RSHTMLLinkParser.m */; };
84469D091EFA307E004A6B28 /* RSHTMLMetadata.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D011EFA307E004A6B28 /* RSHTMLMetadata.h */; };
84469D091EFA307E004A6B28 /* RSHTMLMetadata.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D011EFA307E004A6B28 /* RSHTMLMetadata.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D021EFA307E004A6B28 /* RSHTMLMetadata.m */; };
84469D0B1EFA307E004A6B28 /* RSHTMLMetadataParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D031EFA307E004A6B28 /* RSHTMLMetadataParser.h */; };
84469D0B1EFA307E004A6B28 /* RSHTMLMetadataParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D031EFA307E004A6B28 /* RSHTMLMetadataParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D041EFA307E004A6B28 /* RSHTMLMetadataParser.m */; };
84469D0D1EFA307E004A6B28 /* RSSAXHTMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D051EFA307E004A6B28 /* RSSAXHTMLParser.h */; };
84469D0D1EFA307E004A6B28 /* RSSAXHTMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D051EFA307E004A6B28 /* RSSAXHTMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D0E1EFA307E004A6B28 /* RSSAXHTMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D061EFA307E004A6B28 /* RSSAXHTMLParser.m */; };
84469D161EFA30A2004A6B28 /* NSString+RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D101EFA30A2004A6B28 /* NSString+RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D171EFA30A2004A6B28 /* NSString+RSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D111EFA30A2004A6B28 /* NSString+RSParser.m */; };
@ -34,15 +40,13 @@
84469D191EFA30A2004A6B28 /* RSDateParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D131EFA30A2004A6B28 /* RSDateParser.m */; };
84469D1A1EFA30A2004A6B28 /* RSParserInternal.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D141EFA30A2004A6B28 /* RSParserInternal.h */; };
84469D1B1EFA30A2004A6B28 /* RSParserInternal.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D151EFA30A2004A6B28 /* RSParserInternal.m */; };
84469D271EFA3134004A6B28 /* RSAtomParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D1D1EFA3134004A6B28 /* RSAtomParser.h */; };
84469D271EFA3134004A6B28 /* RSAtomParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D1D1EFA3134004A6B28 /* RSAtomParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D281EFA3134004A6B28 /* RSAtomParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D1E1EFA3134004A6B28 /* RSAtomParser.m */; };
84469D291EFA3134004A6B28 /* RSFeedParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D1F1EFA3134004A6B28 /* RSFeedParser.h */; };
84469D2A1EFA3134004A6B28 /* RSFeedParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D201EFA3134004A6B28 /* RSFeedParser.m */; };
84469D2B1EFA3134004A6B28 /* RSParsedArticle.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D211EFA3134004A6B28 /* RSParsedArticle.h */; };
84469D2B1EFA3134004A6B28 /* RSParsedArticle.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D211EFA3134004A6B28 /* RSParsedArticle.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D2C1EFA3134004A6B28 /* RSParsedArticle.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D221EFA3134004A6B28 /* RSParsedArticle.m */; };
84469D2D1EFA3134004A6B28 /* RSParsedFeed.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D231EFA3134004A6B28 /* RSParsedFeed.h */; };
84469D2D1EFA3134004A6B28 /* RSParsedFeed.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D231EFA3134004A6B28 /* RSParsedFeed.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D2E1EFA3134004A6B28 /* RSParsedFeed.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D241EFA3134004A6B28 /* RSParsedFeed.m */; };
84469D2F1EFA3134004A6B28 /* RSRSSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D251EFA3134004A6B28 /* RSRSSParser.h */; };
84469D2F1EFA3134004A6B28 /* RSRSSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D251EFA3134004A6B28 /* RSRSSParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84469D301EFA3134004A6B28 /* RSRSSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84469D261EFA3134004A6B28 /* RSRSSParser.m */; };
84469D321EFA31CF004A6B28 /* FeedParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D311EFA31CF004A6B28 /* FeedParser.swift */; };
84469D351EFF1190004A6B28 /* NSData+RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84469D331EFF1190004A6B28 /* NSData+RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
@ -73,6 +77,13 @@
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
84285AA71F005D53002E8708 /* RSSParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = RSSParser.swift; sourceTree = "<group>"; };
84285AA91F006456002E8708 /* RSParsedFeedTransformer.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = RSParsedFeedTransformer.swift; sourceTree = "<group>"; };
84285AAB1F006754002E8708 /* AtomParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AtomParser.swift; sourceTree = "<group>"; };
84285AAE1F006BC0002E8708 /* libxml2.2.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libxml2.2.tbd; path = usr/lib/libxml2.2.tbd; sourceTree = SDKROOT; };
84285AB01F00702E002E8708 /* RSOPMLError.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSOPMLError.h; sourceTree = "<group>"; };
84285AB11F00702E002E8708 /* RSOPMLError.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSOPMLError.m; sourceTree = "<group>"; };
84285AB41F007255002E8708 /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
84469CE11EFA2F3E004A6B28 /* ParserData.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ParserData.swift; sourceTree = "<group>"; };
84469CE51EFA3000004A6B28 /* RSOPMLAttributes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSOPMLAttributes.h; sourceTree = "<group>"; };
84469CE61EFA3000004A6B28 /* RSOPMLAttributes.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSOPMLAttributes.m; sourceTree = "<group>"; };
@ -102,8 +113,6 @@
84469D151EFA30A2004A6B28 /* RSParserInternal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSParserInternal.m; sourceTree = "<group>"; };
84469D1D1EFA3134004A6B28 /* RSAtomParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSAtomParser.h; sourceTree = "<group>"; };
84469D1E1EFA3134004A6B28 /* RSAtomParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSAtomParser.m; sourceTree = "<group>"; };
84469D1F1EFA3134004A6B28 /* RSFeedParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSFeedParser.h; sourceTree = "<group>"; };
84469D201EFA3134004A6B28 /* RSFeedParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSFeedParser.m; sourceTree = "<group>"; };
84469D211EFA3134004A6B28 /* RSParsedArticle.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParsedArticle.h; sourceTree = "<group>"; };
84469D221EFA3134004A6B28 /* RSParsedArticle.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSParsedArticle.m; sourceTree = "<group>"; };
84469D231EFA3134004A6B28 /* RSParsedFeed.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParsedFeed.h; sourceTree = "<group>"; };
@ -136,6 +145,7 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
84285AAF1F006BC0002E8708 /* libxml2.2.tbd in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -159,6 +169,14 @@
name = Dates;
sourceTree = "<group>";
};
84285AAD1F006BC0002E8708 /* Frameworks */ = {
isa = PBXGroup;
children = (
84285AAE1F006BC0002E8708 /* libxml2.2.tbd */,
);
name = Frameworks;
sourceTree = "<group>";
};
84469CE31EFA2FB0004A6B28 /* Feeds */ = {
isa = PBXGroup;
children = (
@ -189,6 +207,8 @@
84469CEA1EFA3000004A6B28 /* RSOPMLFeedSpecifier.m */,
84469CEB1EFA3000004A6B28 /* RSOPMLItem.h */,
84469CEC1EFA3000004A6B28 /* RSOPMLItem.m */,
84285AB01F00702E002E8708 /* RSOPMLError.h */,
84285AB11F00702E002E8708 /* RSOPMLError.m */,
);
path = OPML;
sourceTree = "<group>";
@ -233,12 +253,13 @@
84469D1C1EFA3134004A6B28 /* XML */ = {
isa = PBXGroup;
children = (
84285AA71F005D53002E8708 /* RSSParser.swift */,
84285AAB1F006754002E8708 /* AtomParser.swift */,
84285AA91F006456002E8708 /* RSParsedFeedTransformer.swift */,
84469D1D1EFA3134004A6B28 /* RSAtomParser.h */,
84469D1E1EFA3134004A6B28 /* RSAtomParser.m */,
84469D251EFA3134004A6B28 /* RSRSSParser.h */,
84469D261EFA3134004A6B28 /* RSRSSParser.m */,
84469D1F1EFA3134004A6B28 /* RSFeedParser.h */,
84469D201EFA3134004A6B28 /* RSFeedParser.m */,
84469D211EFA3134004A6B28 /* RSParsedArticle.h */,
84469D221EFA3134004A6B28 /* RSParsedArticle.m */,
84469D231EFA3134004A6B28 /* RSParsedFeed.h */,
@ -261,6 +282,7 @@
84FF5F7A1EFA285800C15A01 = {
isa = PBXGroup;
children = (
84285AB41F007255002E8708 /* README.md */,
84D81BDA1EFA28E700652332 /* RSParser.h */,
84469CE11EFA2F3E004A6B28 /* ParserData.swift */,
84469CE31EFA2FB0004A6B28 /* Feeds */,
@ -272,6 +294,7 @@
84D81BD91EFA28E700652332 /* Info.plist */,
84FF5F911EFA285800C15A01 /* RSParserTests */,
84FF5F851EFA285800C15A01 /* Products */,
84285AAD1F006BC0002E8708 /* Frameworks */,
);
sourceTree = "<group>";
};
@ -312,13 +335,13 @@
84469D0B1EFA307E004A6B28 /* RSHTMLMetadataParser.h in Headers */,
84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */,
84469D071EFA307E004A6B28 /* RSHTMLLinkParser.h in Headers */,
84469D291EFA3134004A6B28 /* RSFeedParser.h in Headers */,
84469D0D1EFA307E004A6B28 /* RSSAXHTMLParser.h in Headers */,
84469D2B1EFA3134004A6B28 /* RSParsedArticle.h in Headers */,
84469D2F1EFA3134004A6B28 /* RSRSSParser.h in Headers */,
84469CF31EFA3000004A6B28 /* RSOPMLFeedSpecifier.h in Headers */,
84469CF11EFA3000004A6B28 /* RSOPMLDocument.h in Headers */,
84469D091EFA307E004A6B28 /* RSHTMLMetadata.h in Headers */,
84285AB21F00702E002E8708 /* RSOPMLError.h in Headers */,
84469D161EFA30A2004A6B28 /* NSString+RSParser.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -434,13 +457,15 @@
84469CE21EFA2F3E004A6B28 /* ParserData.swift in Sources */,
84469CF21EFA3000004A6B28 /* RSOPMLDocument.m in Sources */,
84469CF61EFA3000004A6B28 /* RSOPMLItem.m in Sources */,
84469D2A1EFA3134004A6B28 /* RSFeedParser.m in Sources */,
84D81BE41EFA2D3D00652332 /* ParsedItem.swift in Sources */,
84285AAC1F006754002E8708 /* AtomParser.swift in Sources */,
84285AA81F005D53002E8708 /* RSSParser.swift in Sources */,
84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */,
84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */,
84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */,
84469D171EFA30A2004A6B28 /* NSString+RSParser.m in Sources */,
84469D2C1EFA3134004A6B28 /* RSParsedArticle.m in Sources */,
84285AAA1F006456002E8708 /* RSParsedFeedTransformer.swift in Sources */,
84469D2E1EFA3134004A6B28 /* RSParsedFeed.m in Sources */,
84469CF81EFA3000004A6B28 /* RSOPMLParser.m in Sources */,
84469D401EFF29A9004A6B28 /* FeedParserError.swift in Sources */,
@ -448,6 +473,7 @@
84469D281EFA3134004A6B28 /* RSAtomParser.m in Sources */,
84469D361EFF1190004A6B28 /* NSData+RSParser.m in Sources */,
84D81BE61EFA2DFB00652332 /* ParsedAttachment.swift in Sources */,
84285AB31F00702E002E8708 /* RSOPMLError.m in Sources */,
84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */,
84D81BE81EFA2E6700652332 /* ParsedHub.swift in Sources */,
84469D441F002CEF004A6B28 /* JSONFeedParser.swift in Sources */,
@ -594,6 +620,7 @@
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
FRAMEWORK_VERSION = A;
HEADER_SEARCH_PATHS = "${SDKROOT}/usr/include/libxml2";
INFOPLIST_FILE = Info.plist;
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";
@ -617,6 +644,7 @@
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
FRAMEWORK_VERSION = A;
HEADER_SEARCH_PATHS = "${SDKROOT}/usr/include/libxml2";
INFOPLIST_FILE = Info.plist;
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks";

@ -9,7 +9,7 @@
#import <libxml/tree.h>
#import <libxml/xmlstring.h>
#import "RSSAXParser.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
@interface RSSAXParser ()

@ -13,6 +13,7 @@
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes);
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes);
@implementation NSData (RSParser)
@ -55,7 +56,7 @@ static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const cha
return NO;
}
return didFindString("<rss", bytes, length);
return didFindString("<rss", self.bytes, self.length);
}
- (BOOL)isProbablyAtom {
@ -64,7 +65,7 @@ static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const cha
return NO;
}
return didFindString("<feed", bytes, length);
return didFindString("<feed", self.bytes, self.length);
}
@end
@ -81,8 +82,8 @@ static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const cha
NSUInteger i = 0;
for (i = 0; i < numberOfBytes; i++) {
const char *ch = bytes[i];
if (ch == ' ' || ch = '\r' || ch == '\n' || ch == '\t') {
const char ch = bytes[i];
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
continue;
}
@ -110,7 +111,7 @@ static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
return YES;
}
if (didFindString("<", bytes, numberOfBytes) {
if (didFindString("<", bytes, numberOfBytes)) {
if (didFindString("doctype html", bytes, numberOfBytes)) {
return YES;
}
@ -127,6 +128,6 @@ static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes) {
return bytesStartWithStringIgnoringWhiteSpace("<?xml", bytes, numberOfBytes);
return bytesStartWithStringIgnoringWhitespace("<?xml", bytes, numberOfBytes);
}

@ -6,6 +6,7 @@
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import <CommonCrypto/CommonDigest.h>
#import "NSString+RSParser.h"

@ -10,7 +10,7 @@
NS_ASSUME_NONNULL_BEGIN
BOOL RSParser_IsEmpty(id _Nullable obj);
BOOL RSParserObjectIsEmpty(id _Nullable obj);
BOOL RSParserStringIsEmpty(NSString * _Nullable s);

@ -1,5 +1,5 @@
//
// RSXMLInternal.m
// RSParserInternal.m
// RSParser
//
// Created by Brent Simmons on 12/26/16.
@ -7,7 +7,7 @@
//
#import <CommonCrypto/CommonDigest.h>
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
static BOOL RSParserIsNil(id obj) {