Make RSParsedArticle calculate a unique ID only when there’s no guid. Otherwise use the guid. Do not attempt to create a globally-unique ID — that’s out of scope for the parser and should be (and is) handled at the app’s model layer.

This commit is contained in:
Brent Simmons 2017-12-19 10:59:24 -08:00
parent 7ccb531f0c
commit a82cee29b8
5 changed files with 41 additions and 27 deletions

View File

@ -195,9 +195,6 @@ static const NSInteger kLengthLength = 7;
[self.parser parseData:self.feedData];
[self.parser finishParsing];
}
// Optimization: make articles do calculations on this background thread.
[self.articles makeObjectsPerformSelector:@selector(calculateArticleID)];
}

View File

@ -15,7 +15,7 @@
- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL;
@property (nonatomic, readonly, nonnull) NSString *feedURL;
@property (nonatomic, nonnull) NSString *articleID; //Calculated. Don't get until other properties have been set.
@property (nonatomic, nonnull) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.)
@property (nonatomic, nullable) NSString *guid;
@property (nonatomic, nullable) NSString *title;
@ -30,7 +30,5 @@
- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure;
- (void)calculateArticleID; // Optimization. Call after all properties have been set. Call on a background thread.
@end

View File

@ -47,20 +47,26 @@
#pragma mark - Accessors
- (NSString *)articleID {
if (self.guid) {
return self.guid;
}
if (!_articleID) {
_articleID = self.calculatedUniqueID;
_articleID = [self calculatedArticleID];
}
return _articleID;
}
- (NSString *)calculatedUniqueID {
- (NSString *)calculatedArticleID {
/*guid+feedID, or a combination of properties when no guid. Then hash the result.
In general, feeds should have guids. When they don't, re-runs are very likely,
because there's no other 100% reliable way to determine identity.*/
/*Concatenate a combination of properties when no guid. Then hash the result.
In general, feeds should have guids. When they don't, re-runs are very likely,
because there's no other 100% reliable way to determine identity.
This is intended to create an ID unique inside a feed, but not globally unique.
Not suitable for a database ID, in other words.*/
NSMutableString *s = [NSMutableString stringWithString:@""];
@ -69,11 +75,7 @@
datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970];
}
if (!RSParserStringIsEmpty(self.guid)) {
[s appendString:self.guid];
}
else if (!RSParserStringIsEmpty(self.link) && self.datePublished != nil) {
if (!RSParserStringIsEmpty(self.link) && self.datePublished != nil) {
[s appendString:self.link];
[s appendString:datePublishedTimeStampString];
}
@ -99,15 +101,11 @@
[s appendString:self.body];
}
NSAssert(!RSParserStringIsEmpty(self.feedURL), nil);
[s appendString:self.feedURL];
return [s rsparser_md5Hash];
}
- (void)calculateArticleID {
else if (!RSParserStringIsEmpty(self.permalink)) {
[s appendString:self.permalink];
}
(void)self.articleID;
return [s rsparser_md5Hash];
}
@end

View File

@ -172,9 +172,6 @@ static const NSInteger kEnclosureLength = 10;
[self.parser parseData:self.feedData];
[self.parser finishParsing];
}
// Optimization: make articles do calculations on this background thread.
[self.articles makeObjectsPerformSelector:@selector(calculateArticleID)];
}

View File

@ -36,4 +36,28 @@ class AtomParserTests: XCTestCase {
XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this")
}
func testDaringFireball() {
let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //Its actually an Atom feed
let parsedFeed = try! FeedParser.parse(d)!
for article in parsedFeed.items {
XCTAssertNotNil(article.externalURL)
if !article.title!.hasPrefix("") {
XCTAssertNotNil(article.url)
XCTAssert(article.url!.hasPrefix("https://daringfireball.net/"))
}
XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/"))
// XCTAssertEqual(article.authors!.count, 1) // TODO: parse Atom authors
XCTAssertNotNil(article.datePublished)
XCTAssert(article.attachments == nil)
}
}
}