mirror of
https://github.com/Ranchero-Software/NetNewsWire.git
synced 2025-01-11 09:14:06 +01:00
152 lines
3.6 KiB
Objective-C
Executable File
152 lines
3.6 KiB
Objective-C
Executable File
//
|
||
// RSHTMLMetadataParser.m
|
||
// RSParser
|
||
//
|
||
// Created by Brent Simmons on 3/6/16.
|
||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||
//
|
||
|
||
#import "RSHTMLMetadataParser.h"
|
||
#import "RSHTMLMetadata.h"
|
||
#import "RSSAXHTMLParser.h"
|
||
#import "RSSAXHTMLParser.h"
|
||
#import "RSSAXParser.h"
|
||
#import "RSParserInternal.h"
|
||
#import "ParserData.h"
|
||
#import "RSHTMLTag.h"
|
||
|
||
#import <libxml/xmlstring.h>
|
||
|
||
|
||
@interface RSHTMLMetadataParser () <RSSAXHTMLParserDelegate>
|
||
|
||
@property (nonatomic, readonly) ParserData *parserData;
|
||
@property (nonatomic, readwrite) RSHTMLMetadata *metadata;
|
||
@property (nonatomic) NSMutableArray *tags;
|
||
@property (nonatomic) BOOL didFinishParsing;
|
||
@property (nonatomic) BOOL shouldScanPastHeadSection;
|
||
|
||
@end
|
||
|
||
|
||
@implementation RSHTMLMetadataParser
|
||
|
||
|
||
#pragma mark - Class Methods
|
||
|
||
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData {
|
||
|
||
RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData];
|
||
return parser.metadata;
|
||
}
|
||
|
||
|
||
#pragma mark - Init
|
||
|
||
- (instancetype)initWithParserData:(ParserData *)parserData {
|
||
|
||
NSParameterAssert(parserData.data);
|
||
NSParameterAssert(parserData.url);
|
||
|
||
self = [super init];
|
||
if (!self) {
|
||
return nil;
|
||
}
|
||
|
||
_parserData = parserData;
|
||
_tags = [NSMutableArray new];
|
||
|
||
// YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section.
|
||
// This allows for a special case where we continue to scan after the head section.
|
||
// (Yes, this match could yield false positives, but it’s harmless.)
|
||
_shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound;
|
||
|
||
[self parse];
|
||
|
||
return self;
|
||
}
|
||
|
||
|
||
#pragma mark - Parse
|
||
|
||
- (void)parse {
|
||
|
||
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
|
||
[parser parseData:self.parserData.data];
|
||
[parser finishParsing];
|
||
|
||
self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags];
|
||
}
|
||
|
||
|
||
static NSString *kHrefKey = @"href";
|
||
static NSString *kSrcKey = @"src";
|
||
static NSString *kRelKey = @"rel";
|
||
|
||
- (NSString *)linkForDictionary:(NSDictionary *)d {
|
||
|
||
NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
|
||
if (link) {
|
||
return link;
|
||
}
|
||
|
||
return [d rsparser_objectForCaseInsensitiveKey:kSrcKey];
|
||
}
|
||
|
||
- (void)handleLinkAttributes:(NSDictionary *)d {
|
||
|
||
if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) {
|
||
return;
|
||
}
|
||
if (RSParserStringIsEmpty([self linkForDictionary:d])) {
|
||
return;
|
||
}
|
||
|
||
RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d];
|
||
[self.tags addObject:tag];
|
||
}
|
||
|
||
- (void)handleMetaAttributes:(NSDictionary *)d {
|
||
|
||
RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d];
|
||
[self.tags addObject:tag];
|
||
}
|
||
|
||
#pragma mark - RSSAXHTMLParserDelegate
|
||
|
||
static const char *kBody = "body";
|
||
static const NSInteger kBodyLength = 5;
|
||
static const char *kLink = "link";
|
||
static const NSInteger kLinkLength = 5;
|
||
static const char *kMeta = "meta";
|
||
static const NSInteger kMetaLength = 5;
|
||
|
||
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||
|
||
if (self.didFinishParsing) {
|
||
return;
|
||
}
|
||
|
||
if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) {
|
||
self.didFinishParsing = YES;
|
||
return;
|
||
}
|
||
|
||
if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
|
||
NSDictionary *d = [SAXParser attributesDictionary:attributes];
|
||
if (!RSParserObjectIsEmpty(d)) {
|
||
[self handleLinkAttributes:d];
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (RSSAXEqualTags(localName, kMeta, kMetaLength)) {
|
||
NSDictionary *d = [SAXParser attributesDictionary:attributes];
|
||
if (!RSParserObjectIsEmpty(d)) {
|
||
[self handleMetaAttributes:d];
|
||
}
|
||
}
|
||
}
|
||
|
||
@end
|