Support multiple authors in RSS and Atom feeds.

This commit is contained in:
Brent Simmons 2017-12-19 13:03:05 -08:00
parent 83c49bf428
commit ccd87040c3
10 changed files with 107 additions and 12 deletions

View File

@ -9,6 +9,7 @@
@import Foundation;
@class RSParsedEnclosure;
@class RSParsedAuthor;
@interface RSParsedArticle : NSObject
@ -22,13 +23,14 @@
@property (nonatomic, nullable) NSString *body;
@property (nonatomic, nullable) NSString *link;
@property (nonatomic, nullable) NSString *permalink;
@property (nonatomic, nullable) NSString *author;
@property (nonatomic, nullable) NSSet<RSParsedAuthor *> *authors;
@property (nonatomic, nullable) NSSet<RSParsedEnclosure *> *enclosures;
@property (nonatomic, nullable) NSDate *datePublished;
@property (nonatomic, nullable) NSDate *dateModified;
@property (nonatomic, nonnull) NSDate *dateParsed;
- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure;
- (void)addAuthor:(RSParsedAuthor *_Nonnull)author;
@end

View File

@ -9,7 +9,8 @@
#import <RSParser/RSParsedArticle.h>
#import <RSParser/RSParserInternal.h>
#import <RSParser/NSString+RSParser.h>
#import <RSParser/RSParsedAuthor.h>
#import <RSParser/RSParsedEnclosure.h>
@implementation RSParsedArticle
@ -44,7 +45,19 @@
}
}
#pragma mark - Accessors
#pragma mark - Authors
- (void)addAuthor:(RSParsedAuthor *)author {
if (self.authors) {
self.authors = [self.authors setByAddingObject:author];
}
else {
self.authors = [NSSet setWithObject:author];
}
}
#pragma mark - articleID
- (NSString *)articleID {

View File

@ -0,0 +1,19 @@
//
// RSParsedAuthor.h
// RSParserTests
//
// Created by Brent Simmons on 12/19/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@interface RSParsedAuthor : NSObject
@property (nonatomic, nullable) NSString *name;
@property (nonatomic, nullable) NSString *emailAddress;
@property (nonatomic, nullable) NSString *url;
+ (instancetype _Nonnull )authorWithSingleString:(NSString *_Nonnull)s; // Dont know which property it is. Guess based on contents of the string. Common with RSS.
@end

View File

@ -0,0 +1,33 @@
//
// RSParsedAuthor.m
// RSParserTests
//
// Created by Brent Simmons on 12/19/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import <RSParser/NSString+RSParser.h>
#import "RSParsedAuthor.h"
@implementation RSParsedAuthor
+ (instancetype)authorWithSingleString:(NSString *)s {
// The author element in RSS is supposed to be email address but often its a name, and sometimes a URL.
RSParsedAuthor *author = [[self alloc] init];
if ([s rsparser_contains:@"@"]) {
author.emailAddress = s;
}
else if ([s.lowercaseString hasPrefix:@"http"]) {
author.url = s;
}
else {
author.name = s;
}
return author;
}
@end

View File

@ -40,19 +40,23 @@ private extension RSParsedFeedTransformer {
let contentHTML = parsedArticle.body
let datePublished = parsedArticle.datePublished
let dateModified = parsedArticle.dateModified
let authors = parsedAuthors(parsedArticle.author)
let authors = parsedAuthors(parsedArticle.authors)
let attachments = parsedAttachments(parsedArticle.enclosures)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
}
static func parsedAuthors(_ authorEmailAddress: String?) -> Set<ParsedAuthor>? {
static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
guard let authorEmailAddress = authorEmailAddress else {
guard let authors = authors, !authors.isEmpty else {
return nil
}
let author = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
return Set([author])
let transformedAuthors = authors.flatMap { (author) -> ParsedAuthor? in
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
}
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
}
static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {

View File

@ -16,6 +16,7 @@
#import <RSParser/RSDateParser.h>
#import <RSParser/ParserData.h>
#import <RSParser/RSParsedEnclosure.h>
#import <RSParser/RSParsedAuthor.h>
@interface RSRSSParser () <RSSAXParserDelegate>
@ -207,15 +208,22 @@ static const NSInteger kEnclosureLength = 10;
}
}
- (void)addAuthorWithString:(NSString *)authorString {
if (RSParserStringIsEmpty(authorString)) {
return;
}
RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:self.parser.currentStringWithTrimmedWhitespace];
[self.currentArticle addAuthor:author];
}
- (void)addDCElement:(const xmlChar *)localName {
if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) {
self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace;
[self addAuthorWithString:self.parser.currentStringWithTrimmedWhitespace];
}
else if (RSSAXEqualTags(localName, kDate, kDateLength)) {
self.currentArticle.datePublished = self.currentDate;
}
}
@ -304,7 +312,7 @@ static const NSInteger kEnclosureLength = 10;
self.currentArticle.datePublished = self.currentDate;
}
else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace;
[self addAuthorWithString:self.parser.currentStringWithTrimmedWhitespace];
}
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
self.currentArticle.link = [self urlString:self.parser.currentStringWithTrimmedWhitespace];

View File

@ -38,6 +38,7 @@
#import <RSParser/RSParsedFeed.h>
#import <RSParser/RSParsedArticle.h>
#import <RSParser/RSParsedEnclosure.h>
#import <RSParser/RSParsedAuthor.h>
// HTML

View File

@ -66,6 +66,8 @@
84469D441F002CEF004A6B28 /* JSONFeedParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D431F002CEF004A6B28 /* JSONFeedParser.swift */; };
844B5B3E1FE9A13C00C7C76A /* 4fsodonline.atom in Resources */ = {isa = PBXBuildFile; fileRef = 844B5B3D1FE9A13B00C7C76A /* 4fsodonline.atom */; };
844B5B401FE9A45200C7C76A /* expertopinionent.atom in Resources */ = {isa = PBXBuildFile; fileRef = 844B5B3F1FE9A45200C7C76A /* expertopinionent.atom */; };
844B5B441FE9AFDB00C7C76A /* RSParsedAuthor.h in Headers */ = {isa = PBXBuildFile; fileRef = 844B5B411FE9AF2700C7C76A /* RSParsedAuthor.h */; settings = {ATTRIBUTES = (Public, ); }; };
844B5B451FE9AFE000C7C76A /* RSParsedAuthor.m in Sources */ = {isa = PBXBuildFile; fileRef = 844B5B421FE9AF2700C7C76A /* RSParsedAuthor.m */; };
845213251FCB3C76003B6E93 /* coco.html in Resources */ = {isa = PBXBuildFile; fileRef = 845213241FCB3C75003B6E93 /* coco.html */; };
845213281FCB4042003B6E93 /* RSHTMLTag.h in Headers */ = {isa = PBXBuildFile; fileRef = 845213261FCB4042003B6E93 /* RSHTMLTag.h */; settings = {ATTRIBUTES = (Public, ); }; };
845213291FCB4042003B6E93 /* RSHTMLTag.m in Sources */ = {isa = PBXBuildFile; fileRef = 845213271FCB4042003B6E93 /* RSHTMLTag.m */; };
@ -176,6 +178,8 @@
84469D431F002CEF004A6B28 /* JSONFeedParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = JSONFeedParser.swift; path = Feeds/JSON/JSONFeedParser.swift; sourceTree = "<group>"; };
844B5B3D1FE9A13B00C7C76A /* 4fsodonline.atom */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = 4fsodonline.atom; sourceTree = "<group>"; };
844B5B3F1FE9A45200C7C76A /* expertopinionent.atom */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = expertopinionent.atom; sourceTree = "<group>"; };
844B5B411FE9AF2700C7C76A /* RSParsedAuthor.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = RSParsedAuthor.h; sourceTree = "<group>"; };
844B5B421FE9AF2700C7C76A /* RSParsedAuthor.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = RSParsedAuthor.m; sourceTree = "<group>"; };
845213241FCB3C75003B6E93 /* coco.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = coco.html; sourceTree = "<group>"; };
845213261FCB4042003B6E93 /* RSHTMLTag.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = RSHTMLTag.h; sourceTree = "<group>"; };
845213271FCB4042003B6E93 /* RSHTMLTag.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = RSHTMLTag.m; sourceTree = "<group>"; };
@ -355,6 +359,8 @@
84469D241EFA3134004A6B28 /* RSParsedFeed.m */,
8401FF7F1FE862E70080F13F /* RSParsedEnclosure.h */,
8401FF801FE862E70080F13F /* RSParsedEnclosure.m */,
844B5B411FE9AF2700C7C76A /* RSParsedAuthor.h */,
844B5B421FE9AF2700C7C76A /* RSParsedAuthor.m */,
);
name = XML;
path = Feeds/XML;
@ -469,6 +475,7 @@
84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */,
845213281FCB4042003B6E93 /* RSHTMLTag.h in Headers */,
84E7E69F1F85780D0046719D /* ParserData.h in Headers */,
844B5B441FE9AFDB00C7C76A /* RSParsedAuthor.h in Headers */,
8401FF811FE862E70080F13F /* RSParsedEnclosure.h in Headers */,
84469D071EFA307E004A6B28 /* RSHTMLLinkParser.h in Headers */,
84469D0D1EFA307E004A6B28 /* RSSAXHTMLParser.h in Headers */,
@ -624,6 +631,7 @@
84285AAC1F006754002E8708 /* AtomParser.swift in Sources */,
84285AA81F005D53002E8708 /* RSSParser.swift in Sources */,
84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */,
844B5B451FE9AFE000C7C76A /* RSParsedAuthor.m in Sources */,
84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */,
84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */,
84469D171EFA30A2004A6B28 /* NSString+RSParser.m in Sources */,

View File

@ -16,6 +16,8 @@ NS_ASSUME_NONNULL_BEGIN
- (NSString *)rsparser_md5Hash;
- (BOOL)rsparser_contains:(NSString *)s;
@end
NS_ASSUME_NONNULL_END

View File

@ -19,6 +19,11 @@
@implementation NSString (RSParser)
- (BOOL)rsparser_contains:(NSString *)s {
return [self rangeOfString:s].location != NSNotFound;
}
- (NSString *)rsparser_stringByDecodingHTMLEntities {
@autoreleasepool {