Get twitter:image:src from HTML metadata.

This commit is contained in:
Brent Simmons 2017-11-26 11:49:01 -08:00
parent ff7695c290
commit bfccda8c88
3 changed files with 68 additions and 1 deletions

View File

@ -13,6 +13,7 @@
@class RSHTMLOpenGraphProperties; @class RSHTMLOpenGraphProperties;
@class RSHTMLOpenGraphImage; @class RSHTMLOpenGraphImage;
@class RSHTMLTag; @class RSHTMLTag;
@class RSHTMLTwitterProperties;
@interface RSHTMLMetadata : NSObject @interface RSHTMLMetadata : NSObject
@ -26,6 +27,8 @@
@property (nonatomic, readonly) NSArray <RSHTMLMetadataFeedLink *> *feedLinks; @property (nonatomic, readonly) NSArray <RSHTMLMetadataFeedLink *> *feedLinks;
@property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties; @property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties;
@property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties;
@end @end
@ -67,3 +70,14 @@
@property (nonatomic, readonly) NSString *altText; @property (nonatomic, readonly) NSString *altText;
@end @end
@interface RSHTMLTwitterProperties : NSObject
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src.
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, readonly) NSString *imageURL; // twitter:image:src
@end

View File

@ -71,6 +71,7 @@ static NSString *kTypeKey = @"type";
_feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString); _feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString);
_openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags]; _openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags];
_twitterProperties = [[RSHTMLTwitterProperties alloc] initWithURLString:urlString tags:tags];
return self; return self;
} }
@ -382,4 +383,38 @@ static NSString *ogContentKey = @"content";
@end @end
@implementation RSHTMLTwitterProperties
static NSString *twitterNameKey = @"name";
static NSString *twitterContentKey = @"content";
static NSString *twitterImageSrc = @"twitter:image:src";
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
for (RSHTMLTag *tag in tags) {
if (tag.type != RSHTMLTagTypeMeta) {
continue;
}
NSString *name = tag.attributes[twitterNameKey];
if (!name || ![name isEqualToString:twitterImageSrc]) {
continue;
}
NSString *content = tag.attributes[twitterContentKey];
if (!content || content.length < 1) {
continue;
}
_imageURL = content;
break;
}
return self;
}
@end

View File

@ -83,6 +83,15 @@ class HTMLMetadataTests: XCTestCase {
} }
} }
func testCocoPerformance() {
// 0.004 sec on my 2012 iMac
let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/")
self.measure {
let _ = RSHTMLMetadataParser.htmlMetadata(with: d)
}
}
func testSixColors() { func testSixColors() {
let d = parserData("sixcolors", "html", "http://sixcolors.com/") let d = parserData("sixcolors", "html", "http://sixcolors.com/")
@ -120,4 +129,13 @@ class HTMLMetadataTests: XCTestCase {
let image = openGraphData.images.first! let image = openGraphData.images.first!
XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177")
} }
func testCocoTwitterImage() {
let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/")
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
let twitterData = metadata.twitterProperties!
let imageURL = twitterData.imageURL!
XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177")
}
} }