2017-06-21 06:18:46 +02:00
|
|
|
//
|
|
|
|
// RSHTMLLinkParser.m
|
2017-06-25 23:06:01 +02:00
|
|
|
// RSParser
|
2017-06-21 06:18:46 +02:00
|
|
|
//
|
|
|
|
// Created by Brent Simmons on 8/7/16.
|
|
|
|
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
|
|
|
//
|
|
|
|
|
|
|
|
#import <libxml/xmlstring.h>
|
2017-10-04 22:28:48 +02:00
|
|
|
#import <RSParser/RSHTMLLinkParser.h>
|
|
|
|
#import <RSParser/RSSAXHTMLParser.h>
|
|
|
|
#import <RSParser/RSSAXParser.h>
|
|
|
|
#import <RSParser/RSParserInternal.h>
|
|
|
|
#import <RSParser/ParserData.h>
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
|
|
|
|
@interface RSHTMLLinkParser() <RSSAXHTMLParserDelegate>
|
|
|
|
|
|
|
|
@property (nonatomic, readonly) NSMutableArray *links;
|
2017-06-26 01:32:07 +02:00
|
|
|
@property (nonatomic, readonly) ParserData *parserData;
|
2017-06-21 06:18:46 +02:00
|
|
|
@property (nonatomic, readonly) NSMutableArray *dictionaries;
|
|
|
|
@property (nonatomic, readonly) NSURL *baseURL;
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
|
|
|
|
|
|
@interface RSHTMLLink()
|
|
|
|
|
|
|
|
@property (nonatomic, readwrite) NSString *urlString; //absolute
|
|
|
|
@property (nonatomic, readwrite) NSString *text;
|
|
|
|
@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
|
|
|
|
|
|
@implementation RSHTMLLinkParser
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Class Methods
|
|
|
|
|
2017-06-26 01:40:29 +02:00
|
|
|
+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData {
|
2017-06-21 06:18:46 +02:00
|
|
|
|
2017-06-26 01:32:07 +02:00
|
|
|
RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData];
|
2017-06-21 06:18:46 +02:00
|
|
|
return parser.links;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Init
|
|
|
|
|
2017-06-26 01:32:07 +02:00
|
|
|
- (instancetype)initWithParserData:(ParserData *)parserData {
|
2017-06-21 06:18:46 +02:00
|
|
|
|
2017-06-26 01:32:07 +02:00
|
|
|
NSParameterAssert(parserData.data);
|
|
|
|
NSParameterAssert(parserData.url);
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
self = [super init];
|
|
|
|
if (!self) {
|
|
|
|
return nil;
|
|
|
|
}
|
|
|
|
|
|
|
|
_links = [NSMutableArray new];
|
2017-06-26 01:32:07 +02:00
|
|
|
_parserData = parserData;
|
2017-06-21 06:18:46 +02:00
|
|
|
_dictionaries = [NSMutableArray new];
|
2017-06-26 01:32:07 +02:00
|
|
|
_baseURL = [NSURL URLWithString:parserData.url];
|
2017-06-21 06:18:46 +02:00
|
|
|
|
|
|
|
[self parse];
|
|
|
|
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Parse
|
|
|
|
|
|
|
|
- (void)parse {
|
|
|
|
|
|
|
|
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
|
2017-06-26 01:32:07 +02:00
|
|
|
[parser parseData:self.parserData.data];
|
2017-06-21 06:18:46 +02:00
|
|
|
[parser finishParsing];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (RSHTMLLink *)currentLink {
|
|
|
|
|
|
|
|
return self.links.lastObject;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static NSString *kHrefKey = @"href";
|
|
|
|
|
|
|
|
- (NSString *)urlStringFromDictionary:(NSDictionary *)d {
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
|
2017-06-21 06:18:46 +02:00
|
|
|
if (!href) {
|
|
|
|
return nil;
|
|
|
|
}
|
|
|
|
|
|
|
|
NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL];
|
|
|
|
return absoluteURL.absoluteString;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static NSString *kTitleKey = @"title";
|
|
|
|
|
|
|
|
- (NSString *)titleFromDictionary:(NSDictionary *)d {
|
|
|
|
|
2017-06-25 23:06:01 +02:00
|
|
|
return [d rsparser_objectForCaseInsensitiveKey:kTitleKey];
|
2017-06-21 06:18:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)handleLinkAttributes:(NSDictionary *)d {
|
|
|
|
|
|
|
|
RSHTMLLink *link = self.currentLink;
|
|
|
|
link.urlString = [self urlStringFromDictionary:d];
|
|
|
|
link.title = [self titleFromDictionary:d];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const char *kAnchor = "a";
|
|
|
|
static const NSInteger kAnchorLength = 2;
|
|
|
|
|
|
|
|
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
|
|
|
|
|
|
|
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
RSHTMLLink *link = [RSHTMLLink new];
|
|
|
|
[self.links addObject:link];
|
|
|
|
|
|
|
|
NSDictionary *d = [SAXParser attributesDictionary:attributes];
|
2017-06-26 01:32:07 +02:00
|
|
|
if (!RSParserObjectIsEmpty(d)) {
|
2017-06-21 06:18:46 +02:00
|
|
|
[self handleLinkAttributes:d];
|
|
|
|
}
|
|
|
|
|
|
|
|
[SAXParser beginStoringCharacters];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName {
|
|
|
|
|
|
|
|
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace;
|
|
|
|
}
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
|
|
@implementation RSHTMLLink
|
|
|
|
|
|
|
|
@end
|