Make RSParser a local module and rename it to Parser.

This commit is contained in:
Brent Simmons 2024-04-02 21:43:06 -07:00
parent 3859b8ff45
commit 40abf257a6
178 changed files with 37543 additions and 105 deletions

View File

@ -11,7 +11,7 @@ let package = Package(
targets: ["Account"]), targets: ["Account"]),
], ],
dependencies: [ dependencies: [
.package(url: "https://github.com/Ranchero-Software/RSParser.git", .upToNextMajor(from: "2.0.2")), .package(path: "../Parser"),
.package(path: "../Articles"), .package(path: "../Articles"),
.package(path: "../ArticlesDatabase"), .package(path: "../ArticlesDatabase"),
.package(path: "../Web"), .package(path: "../Web"),
@ -25,7 +25,7 @@ let package = Package(
.target( .target(
name: "Account", name: "Account",
dependencies: [ dependencies: [
"RSParser", "Parser",
"Web", "Web",
"Articles", "Articles",
"ArticlesDatabase", "ArticlesDatabase",

View File

@ -12,7 +12,7 @@ import UIKit
import Foundation import Foundation
import Articles import Articles
import RSParser import Parser
import Database import Database
import ArticlesDatabase import ArticlesDatabase
import Web import Web

View File

@ -11,7 +11,7 @@ import CloudKit
import SystemConfiguration import SystemConfiguration
import os.log import os.log
import SyncDatabase import SyncDatabase
import RSParser import Parser
import Articles import Articles
import ArticlesDatabase import ArticlesDatabase
import Web import Web

View File

@ -9,7 +9,7 @@
import Foundation import Foundation
import os.log import os.log
import Web import Web
import RSParser import Parser
import CloudKit import CloudKit
import FoundationExtras import FoundationExtras
import CloudKitExtras import CloudKitExtras

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
import Web import Web
import CloudKit import CloudKit
import Articles import Articles

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
import Web import Web
import CloudKit import CloudKit
import SyncDatabase import SyncDatabase

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import Articles import Articles
import RSParser import Parser
public extension Notification.Name { public extension Notification.Name {
static let FeedSettingDidChange = Notification.Name(rawValue: "FeedSettingDidChangeNotification") static let FeedSettingDidChange = Notification.Name(rawValue: "FeedSettingDidChangeNotification")

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import Web import Web
class FeedFinder { class FeedFinder {

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"] private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"]

View File

@ -8,7 +8,7 @@
import Articles import Articles
import Database import Database
import RSParser import Parser
import Web import Web
import SyncDatabase import SyncDatabase
import os.log import os.log

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
final class FeedbinEntry: Decodable { final class FeedbinEntry: Decodable {

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
struct FeedbinSubscription: Hashable, Codable { struct FeedbinSubscription: Hashable, Codable {

View File

@ -7,7 +7,7 @@
// //
import Articles import Articles
import RSParser import Parser
import Web import Web
import SyncDatabase import SyncDatabase
import os.log import os.log

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import Articles import Articles
import RSParser import Parser
struct FeedlyEntryParser { struct FeedlyEntryParser {
let entry: FeedlyEntry let entry: FeedlyEntry

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
/// Get full entries for the entry identifiers. /// Get full entries for the entry identifiers.
final class FeedlyGetEntriesOperation: FeedlyOperation, FeedlyEntryProviding, FeedlyParsedItemProviding { final class FeedlyGetEntriesOperation: FeedlyOperation, FeedlyEntryProviding, FeedlyParsedItemProviding {

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import os.log import os.log
protocol FeedlyEntryProviding { protocol FeedlyEntryProviding {

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
import SyncDatabase import SyncDatabase
import Secrets import Secrets

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import os.log import os.log
protocol FeedlyParsedItemsByFeedProviding { protocol FeedlyParsedItemsByFeedProviding {

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
import Web import Web
import Secrets import Secrets
import Core import Core

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import os.log import os.log
import Database import Database

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import Web import Web
struct InitialFeedDownloader { struct InitialFeedDownloader {

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
import Articles import Articles
import ArticlesDatabase import ArticlesDatabase
import Web import Web

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import Web import Web
import Articles import Articles
import ArticlesDatabase import ArticlesDatabase

View File

@ -9,7 +9,7 @@
import Articles import Articles
import Database import Database
import RSParser import Parser
import Web import Web
import SyncDatabase import SyncDatabase
import os.log import os.log

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
typealias NewsBlurFolder = NewsBlurFeedsResponse.Folder typealias NewsBlurFolder = NewsBlurFeedsResponse.Folder

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
typealias NewsBlurStory = NewsBlurStoriesResponse.Story typealias NewsBlurStory = NewsBlurStoriesResponse.Story

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
typealias NewsBlurStoryHash = NewsBlurStoryHashesResponse.StoryHash typealias NewsBlurStoryHash = NewsBlurStoryHashesResponse.StoryHash

View File

@ -8,7 +8,7 @@
import Articles import Articles
import Database import Database
@preconcurrency import RSParser @preconcurrency import Parser
import Web import Web
import SyncDatabase import SyncDatabase
import os.log import os.log

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import os.log import os.log
import RSParser import Parser
import Core import Core
@MainActor final class OPMLFile { @MainActor final class OPMLFile {

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
final class OPMLNormalizer { final class OPMLNormalizer {

View File

@ -7,7 +7,7 @@
// //
import Articles import Articles
import RSParser import Parser
import Web import Web
import SyncDatabase import SyncDatabase
import os.log import os.log

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
struct ReaderAPIEntryWrapper: Codable { struct ReaderAPIEntryWrapper: Codable {
let id: String let id: String

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
/* /*

View File

@ -8,7 +8,7 @@
import XCTest import XCTest
@testable import Account @testable import Account
import RSParser import Parser
class FeedlyOrganiseParsedItemsByFeedOperationTests: XCTestCase { class FeedlyOrganiseParsedItemsByFeedOperationTests: XCTestCase {

View File

@ -7,7 +7,7 @@
// //
import XCTest import XCTest
import RSParser import Parser
import Secrets import Secrets
@testable import Account @testable import Account
import os.log import os.log

View File

@ -12,7 +12,7 @@ let package = Package(
targets: ["ArticlesDatabase"]), targets: ["ArticlesDatabase"]),
], ],
dependencies: [ dependencies: [
.package(url: "https://github.com/Ranchero-Software/RSParser.git", .upToNextMajor(from: "2.0.2")), .package(path: "../Parser"),
.package(path: "../Articles"), .package(path: "../Articles"),
.package(path: "../Database"), .package(path: "../Database"),
.package(path: "../FMDB"), .package(path: "../FMDB"),
@ -23,7 +23,7 @@ let package = Package(
name: "ArticlesDatabase", name: "ArticlesDatabase",
dependencies: [ dependencies: [
"Database", "Database",
"RSParser", "Parser",
"Articles", "Articles",
"FMDB", "FMDB",
"FoundationExtras" "FoundationExtras"

View File

@ -10,7 +10,7 @@ import Foundation
import Database import Database
import FMDB import FMDB
import Articles import Articles
import RSParser import Parser
public typealias UnreadCountDictionary = [String: Int] // feedID: unreadCount public typealias UnreadCountDictionary = [String: Int] // feedID: unreadCount

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
import Database import Database
import RSParser import Parser
import Articles import Articles
// This file exists for compatibility  it provides nonisolated functions and callback-based APIs. // This file exists for compatibility  it provides nonisolated functions and callback-based APIs.

View File

@ -9,7 +9,7 @@ import Foundation
import FMDB import FMDB
import Database import Database
import Articles import Articles
import RSParser import Parser
final class ArticlesTable { final class ArticlesTable {

View File

@ -9,7 +9,7 @@
import Foundation import Foundation
import Database import Database
import Articles import Articles
import RSParser import Parser
import FMDB import FMDB
extension Article { extension Article {

View File

@ -9,7 +9,7 @@
import Foundation import Foundation
import Articles import Articles
import Database import Database
import RSParser import Parser
import FMDB import FMDB
// MARK: - DatabaseObject // MARK: - DatabaseObject

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import Articles import Articles
extension ParsedItem { extension ParsedItem {

View File

@ -9,7 +9,7 @@
import Foundation import Foundation
import Database import Database
import Articles import Articles
import RSParser import Parser
import FMDB import FMDB
final class ArticleSearchInfo: Hashable { final class ArticleSearchInfo: Hashable {

View File

@ -11,7 +11,7 @@ import CoreResources
import Tree import Tree
import Articles import Articles
import Account import Account
import RSParser import Parser
// Run add-feed sheet. // Run add-feed sheet.
// If it returns with URL and optional name, // If it returns with URL and optional name,

View File

@ -7,7 +7,7 @@
// //
import Foundation import Foundation
import RSParser import Parser
import Account import Account
import Articles import Articles

View File

@ -141,8 +141,6 @@
5137C2EA26F63AE6009EFEDB /* ArticleThemeImporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5137C2E926F63AE6009EFEDB /* ArticleThemeImporter.swift */; }; 5137C2EA26F63AE6009EFEDB /* ArticleThemeImporter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5137C2E926F63AE6009EFEDB /* ArticleThemeImporter.swift */; };
51386A8E25673277005F3762 /* AccountCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51386A8D25673276005F3762 /* AccountCell.swift */; }; 51386A8E25673277005F3762 /* AccountCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51386A8D25673276005F3762 /* AccountCell.swift */; };
51386A8F25673277005F3762 /* AccountCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51386A8D25673276005F3762 /* AccountCell.swift */; }; 51386A8F25673277005F3762 /* AccountCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51386A8D25673276005F3762 /* AccountCell.swift */; };
5138E95224D3418100AFF0FE /* RSParser in Frameworks */ = {isa = PBXBuildFile; productRef = 5138E95124D3418100AFF0FE /* RSParser */; };
5138E95324D3418100AFF0FE /* RSParser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 5138E95124D3418100AFF0FE /* RSParser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
513C5CE9232571C2003D4054 /* ShareViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 513C5CE8232571C2003D4054 /* ShareViewController.swift */; }; 513C5CE9232571C2003D4054 /* ShareViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 513C5CE8232571C2003D4054 /* ShareViewController.swift */; };
513C5CEC232571C2003D4054 /* MainInterface.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 513C5CEA232571C2003D4054 /* MainInterface.storyboard */; }; 513C5CEC232571C2003D4054 /* MainInterface.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 513C5CEA232571C2003D4054 /* MainInterface.storyboard */; };
513C5CF0232571C2003D4054 /* NetNewsWire iOS Share Extension.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 513C5CE6232571C2003D4054 /* NetNewsWire iOS Share Extension.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; 513C5CF0232571C2003D4054 /* NetNewsWire iOS Share Extension.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 513C5CE6232571C2003D4054 /* NetNewsWire iOS Share Extension.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
@ -222,8 +220,6 @@
51A1699F235E10D700EB091F /* AboutViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51A16995235E10D600EB091F /* AboutViewController.swift */; }; 51A1699F235E10D700EB091F /* AboutViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51A16995235E10D600EB091F /* AboutViewController.swift */; };
51A169A0235E10D700EB091F /* FeedbinAccountViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51A16996235E10D700EB091F /* FeedbinAccountViewController.swift */; }; 51A169A0235E10D700EB091F /* FeedbinAccountViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51A16996235E10D700EB091F /* FeedbinAccountViewController.swift */; };
51A66685238075AE00CB272D /* AddFeedDefaultContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51A66684238075AE00CB272D /* AddFeedDefaultContainer.swift */; }; 51A66685238075AE00CB272D /* AddFeedDefaultContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51A66684238075AE00CB272D /* AddFeedDefaultContainer.swift */; };
51A737C824DB19CC0015FA66 /* RSParser in Frameworks */ = {isa = PBXBuildFile; productRef = 51A737C724DB19CC0015FA66 /* RSParser */; };
51A737C924DB19CC0015FA66 /* RSParser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 51A737C724DB19CC0015FA66 /* RSParser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
51A9A5E12380C4FE0033AADF /* AppDefaults.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51C45255226507D200C03939 /* AppDefaults.swift */; }; 51A9A5E12380C4FE0033AADF /* AppDefaults.swift in Sources */ = {isa = PBXBuildFile; fileRef = 51C45255226507D200C03939 /* AppDefaults.swift */; };
51A9A5E42380C8880033AADF /* ShareFolderPickerAccountCell.xib in Resources */ = {isa = PBXBuildFile; fileRef = 51A9A5E32380C8870033AADF /* ShareFolderPickerAccountCell.xib */; }; 51A9A5E42380C8880033AADF /* ShareFolderPickerAccountCell.xib in Resources */ = {isa = PBXBuildFile; fileRef = 51A9A5E32380C8870033AADF /* ShareFolderPickerAccountCell.xib */; };
51A9A5E62380C8B20033AADF /* ShareFolderPickerFolderCell.xib in Resources */ = {isa = PBXBuildFile; fileRef = 51A9A5E52380C8B20033AADF /* ShareFolderPickerFolderCell.xib */; }; 51A9A5E62380C8B20033AADF /* ShareFolderPickerFolderCell.xib in Resources */ = {isa = PBXBuildFile; fileRef = 51A9A5E52380C8B20033AADF /* ShareFolderPickerFolderCell.xib */; };
@ -385,8 +381,6 @@
653813252680E1D6007A082C /* ArticlesDatabase in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 653813232680E1D6007A082C /* ArticlesDatabase */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 653813252680E1D6007A082C /* ArticlesDatabase in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 653813232680E1D6007A082C /* ArticlesDatabase */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
653813262680E1E4007A082C /* CloudKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 51E4DAEC2425F6940091EB5B /* CloudKit.framework */; }; 653813262680E1E4007A082C /* CloudKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 51E4DAEC2425F6940091EB5B /* CloudKit.framework */; };
653813282680E1EC007A082C /* CrashReporter in Frameworks */ = {isa = PBXBuildFile; productRef = 653813272680E1EC007A082C /* CrashReporter */; }; 653813282680E1EC007A082C /* CrashReporter in Frameworks */ = {isa = PBXBuildFile; productRef = 653813272680E1EC007A082C /* CrashReporter */; };
653813302680E20C007A082C /* RSParser in Frameworks */ = {isa = PBXBuildFile; productRef = 6538132F2680E20C007A082C /* RSParser */; };
653813312680E20C007A082C /* RSParser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 6538132F2680E20C007A082C /* RSParser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
653813392680E22B007A082C /* Secrets in Frameworks */ = {isa = PBXBuildFile; productRef = 653813382680E22B007A082C /* Secrets */; }; 653813392680E22B007A082C /* Secrets in Frameworks */ = {isa = PBXBuildFile; productRef = 653813382680E22B007A082C /* Secrets */; };
6538133A2680E22B007A082C /* Secrets in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 653813382680E22B007A082C /* Secrets */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 6538133A2680E22B007A082C /* Secrets in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 653813382680E22B007A082C /* Secrets */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
6538133B2680E28D007A082C /* Subscribe to Feed MAS.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 65ED409D235DEF770081F399 /* Subscribe to Feed MAS.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; 6538133B2680E28D007A082C /* Subscribe to Feed MAS.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 65ED409D235DEF770081F399 /* Subscribe to Feed MAS.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
@ -621,6 +615,14 @@
8454C3F3263F2D8700E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; }; 8454C3F3263F2D8700E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; };
8454C3F8263F3AD400E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; }; 8454C3F8263F3AD400E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; };
8454C3FD263F3AD600E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; }; 8454C3FD263F3AD600E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; };
8456116B2BBD145200507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; };
8456116C2BBD145200507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
8456116E2BBD145200507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; };
8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
845611712BBD145D00507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; };
845611722BBD145D00507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
845611742BBD145D00507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; };
845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
845A29091FC74B8E007B49E3 /* SingleFaviconDownloader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29081FC74B8E007B49E3 /* SingleFaviconDownloader.swift */; }; 845A29091FC74B8E007B49E3 /* SingleFaviconDownloader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29081FC74B8E007B49E3 /* SingleFaviconDownloader.swift */; };
845A29221FC9251E007B49E3 /* SidebarCellLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29211FC9251E007B49E3 /* SidebarCellLayout.swift */; }; 845A29221FC9251E007B49E3 /* SidebarCellLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29211FC9251E007B49E3 /* SidebarCellLayout.swift */; };
845A29241FC9255E007B49E3 /* SidebarCellAppearance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29231FC9255E007B49E3 /* SidebarCellAppearance.swift */; }; 845A29241FC9255E007B49E3 /* SidebarCellAppearance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29231FC9255E007B49E3 /* SidebarCellAppearance.swift */; };
@ -934,11 +936,12 @@
dstSubfolderSpec = 10; dstSubfolderSpec = 10;
files = ( files = (
513F32782593EE6F0003048F /* Secrets in Embed Frameworks */, 513F32782593EE6F0003048F /* Secrets in Embed Frameworks */,
5138E95324D3418100AFF0FE /* RSParser in Embed Frameworks */,
513F327B2593EE6F0003048F /* SyncDatabase in Embed Frameworks */, 513F327B2593EE6F0003048F /* SyncDatabase in Embed Frameworks */,
845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */,
513F32722593EE6F0003048F /* Articles in Embed Frameworks */, 513F32722593EE6F0003048F /* Articles in Embed Frameworks */,
513F32812593EF180003048F /* Account in Embed Frameworks */, 513F32812593EF180003048F /* Account in Embed Frameworks */,
513F32752593EE6F0003048F /* ArticlesDatabase in Embed Frameworks */, 513F32752593EE6F0003048F /* ArticlesDatabase in Embed Frameworks */,
845611722BBD145D00507B73 /* Parser in Embed Frameworks */,
); );
name = "Embed Frameworks"; name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
@ -971,7 +974,6 @@
dstPath = ""; dstPath = "";
dstSubfolderSpec = 10; dstSubfolderSpec = 10;
files = ( files = (
653813312680E20C007A082C /* RSParser in Embed Frameworks */,
6538133A2680E22B007A082C /* Secrets in Embed Frameworks */, 6538133A2680E22B007A082C /* Secrets in Embed Frameworks */,
653813252680E1D6007A082C /* ArticlesDatabase in Embed Frameworks */, 653813252680E1D6007A082C /* ArticlesDatabase in Embed Frameworks */,
653813222680E1D0007A082C /* Articles in Embed Frameworks */, 653813222680E1D0007A082C /* Articles in Embed Frameworks */,
@ -1014,10 +1016,11 @@
files = ( files = (
513277442590FBB60064F1E7 /* Account in Embed Frameworks */, 513277442590FBB60064F1E7 /* Account in Embed Frameworks */,
5132775F2590FC640064F1E7 /* Articles in Embed Frameworks */, 5132775F2590FC640064F1E7 /* Articles in Embed Frameworks */,
8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */,
513277662590FC780064F1E7 /* Secrets in Embed Frameworks */, 513277662590FC780064F1E7 /* Secrets in Embed Frameworks */,
513277652590FC640064F1E7 /* SyncDatabase in Embed Frameworks */, 513277652590FC640064F1E7 /* SyncDatabase in Embed Frameworks */,
513277622590FC640064F1E7 /* ArticlesDatabase in Embed Frameworks */, 513277622590FC640064F1E7 /* ArticlesDatabase in Embed Frameworks */,
51A737C924DB19CC0015FA66 /* RSParser in Embed Frameworks */, 8456116C2BBD145200507B73 /* Parser in Embed Frameworks */,
); );
name = "Embed Frameworks"; name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
@ -1331,6 +1334,7 @@
845B14A51FC2299E0013CF92 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; }; 845B14A51FC2299E0013CF92 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
845EE7B01FC2366500854A1F /* StarredFeedDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StarredFeedDelegate.swift; sourceTree = "<group>"; }; 845EE7B01FC2366500854A1F /* StarredFeedDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StarredFeedDelegate.swift; sourceTree = "<group>"; };
845EE7C01FC2488C00854A1F /* SmartFeed.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SmartFeed.swift; sourceTree = "<group>"; }; 845EE7C01FC2488C00854A1F /* SmartFeed.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SmartFeed.swift; sourceTree = "<group>"; };
846799F42BBD120A000854CB /* Parser */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Parser; sourceTree = "<group>"; };
84702AA31FA27AC0006B8943 /* MarkStatusCommand.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MarkStatusCommand.swift; sourceTree = "<group>"; }; 84702AA31FA27AC0006B8943 /* MarkStatusCommand.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MarkStatusCommand.swift; sourceTree = "<group>"; };
847120D62B8AE6AF00BBFC34 /* UTType+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "UTType+Extensions.swift"; sourceTree = "<group>"; }; 847120D62B8AE6AF00BBFC34 /* UTType+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "UTType+Extensions.swift"; sourceTree = "<group>"; };
8472058020142E8900AD578B /* FeedInspectorViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedInspectorViewController.swift; sourceTree = "<group>"; }; 8472058020142E8900AD578B /* FeedInspectorViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedInspectorViewController.swift; sourceTree = "<group>"; };
@ -1560,7 +1564,6 @@
84DCA5272BABBB6200792720 /* Core in Frameworks */, 84DCA5272BABBB6200792720 /* Core in Frameworks */,
841CECDA2BAD04B80001EE72 /* Tree in Frameworks */, 841CECDA2BAD04B80001EE72 /* Tree in Frameworks */,
84DCA5182BABB77E00792720 /* FoundationExtras in Frameworks */, 84DCA5182BABB77E00792720 /* FoundationExtras in Frameworks */,
653813302680E20C007A082C /* RSParser in Frameworks */,
6538131E2680E1CA007A082C /* Account in Frameworks */, 6538131E2680E1CA007A082C /* Account in Frameworks */,
653813282680E1EC007A082C /* CrashReporter in Frameworks */, 653813282680E1EC007A082C /* CrashReporter in Frameworks */,
84DCA51A2BABB78700792720 /* AppKitExtras in Frameworks */, 84DCA51A2BABB78700792720 /* AppKitExtras in Frameworks */,
@ -1586,7 +1589,8 @@
84DCA51E2BABB79900792720 /* FoundationExtras in Frameworks */, 84DCA51E2BABB79900792720 /* FoundationExtras in Frameworks */,
84C1A8582BBBA5BD006E3E96 /* Web in Frameworks */, 84C1A8582BBBA5BD006E3E96 /* Web in Frameworks */,
516B695F24D2F33B00B5702F /* Account in Frameworks */, 516B695F24D2F33B00B5702F /* Account in Frameworks */,
5138E95224D3418100AFF0FE /* RSParser in Frameworks */, 845611742BBD145D00507B73 /* ParserObjC in Frameworks */,
845611712BBD145D00507B73 /* Parser in Frameworks */,
513F32712593EE6F0003048F /* Articles in Frameworks */, 513F32712593EE6F0003048F /* Articles in Frameworks */,
513F32772593EE6F0003048F /* Secrets in Frameworks */, 513F32772593EE6F0003048F /* Secrets in Frameworks */,
84DCA5292BABBB6A00792720 /* Core in Frameworks */, 84DCA5292BABBB6A00792720 /* Core in Frameworks */,
@ -1612,7 +1616,6 @@
84DCA5122BABB75600792720 /* FoundationExtras in Frameworks */, 84DCA5122BABB75600792720 /* FoundationExtras in Frameworks */,
513277612590FC640064F1E7 /* ArticlesDatabase in Frameworks */, 513277612590FC640064F1E7 /* ArticlesDatabase in Frameworks */,
51C4CFF624D37DD500AF9874 /* Secrets in Frameworks */, 51C4CFF624D37DD500AF9874 /* Secrets in Frameworks */,
51A737C824DB19CC0015FA66 /* RSParser in Frameworks */,
179C39EA26F76B0500D4E741 /* Zip in Frameworks */, 179C39EA26F76B0500D4E741 /* Zip in Frameworks */,
51E4DAED2425F6940091EB5B /* CloudKit.framework in Frameworks */, 51E4DAED2425F6940091EB5B /* CloudKit.framework in Frameworks */,
84DCA5142BABB76100792720 /* AppKitExtras in Frameworks */, 84DCA5142BABB76100792720 /* AppKitExtras in Frameworks */,
@ -1621,6 +1624,8 @@
514C16CE24D2E63F009A3AFA /* Account in Frameworks */, 514C16CE24D2E63F009A3AFA /* Account in Frameworks */,
519CA8E525841DB700EB079A /* CrashReporter in Frameworks */, 519CA8E525841DB700EB079A /* CrashReporter in Frameworks */,
841CECD82BAD04B20001EE72 /* Tree in Frameworks */, 841CECD82BAD04B20001EE72 /* Tree in Frameworks */,
8456116B2BBD145200507B73 /* Parser in Frameworks */,
8456116E2BBD145200507B73 /* ParserObjC in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -2353,6 +2358,7 @@
841550F52B9E4D6800D4B345 /* FMDB */, 841550F52B9E4D6800D4B345 /* FMDB */,
84DCA50F2BABB65600792720 /* CloudKitExtras */, 84DCA50F2BABB65600792720 /* CloudKitExtras */,
849FEDC32BBB225E0053FB21 /* Web */, 849FEDC32BBB225E0053FB21 /* Web */,
846799F42BBD120A000854CB /* Parser */,
84DCA5232BABBA8100792720 /* Core */, 84DCA5232BABBA8100792720 /* Core */,
841CECD62BAD03C60001EE72 /* Tree */, 841CECD62BAD03C60001EE72 /* Tree */,
84DCA5102BABB6A100792720 /* UIKitExtras */, 84DCA5102BABB6A100792720 /* UIKitExtras */,
@ -2898,7 +2904,6 @@
653813202680E1D0007A082C /* Articles */, 653813202680E1D0007A082C /* Articles */,
653813232680E1D6007A082C /* ArticlesDatabase */, 653813232680E1D6007A082C /* ArticlesDatabase */,
653813272680E1EC007A082C /* CrashReporter */, 653813272680E1EC007A082C /* CrashReporter */,
6538132F2680E20C007A082C /* RSParser */,
653813382680E22B007A082C /* Secrets */, 653813382680E22B007A082C /* Secrets */,
84DCA5172BABB77E00792720 /* FoundationExtras */, 84DCA5172BABB77E00792720 /* FoundationExtras */,
84DCA5192BABB78700792720 /* AppKitExtras */, 84DCA5192BABB78700792720 /* AppKitExtras */,
@ -2948,7 +2953,6 @@
name = "NetNewsWire-iOS"; name = "NetNewsWire-iOS";
packageProductDependencies = ( packageProductDependencies = (
516B695E24D2F33B00B5702F /* Account */, 516B695E24D2F33B00B5702F /* Account */,
5138E95124D3418100AFF0FE /* RSParser */,
513F32702593EE6F0003048F /* Articles */, 513F32702593EE6F0003048F /* Articles */,
513F32732593EE6F0003048F /* ArticlesDatabase */, 513F32732593EE6F0003048F /* ArticlesDatabase */,
513F32762593EE6F0003048F /* Secrets */, 513F32762593EE6F0003048F /* Secrets */,
@ -2962,6 +2966,8 @@
84DCA5282BABBB6A00792720 /* Core */, 84DCA5282BABBB6A00792720 /* Core */,
841CECDB2BAD04BF0001EE72 /* Tree */, 841CECDB2BAD04BF0001EE72 /* Tree */,
84C1A8572BBBA5BD006E3E96 /* Web */, 84C1A8572BBBA5BD006E3E96 /* Web */,
845611702BBD145D00507B73 /* Parser */,
845611732BBD145D00507B73 /* ParserObjC */,
); );
productName = "NetNewsWire-iOS"; productName = "NetNewsWire-iOS";
productReference = 840D617C2029031C009BC708 /* NetNewsWire.app */; productReference = 840D617C2029031C009BC708 /* NetNewsWire.app */;
@ -2992,7 +2998,6 @@
packageProductDependencies = ( packageProductDependencies = (
514C16CD24D2E63F009A3AFA /* Account */, 514C16CD24D2E63F009A3AFA /* Account */,
51C4CFF524D37DD500AF9874 /* Secrets */, 51C4CFF524D37DD500AF9874 /* Secrets */,
51A737C724DB19CC0015FA66 /* RSParser */,
17192AD92567B3D500AAEACA /* RSSparkle */, 17192AD92567B3D500AAEACA /* RSSparkle */,
519CA8E425841DB700EB079A /* CrashReporter */, 519CA8E425841DB700EB079A /* CrashReporter */,
5132775D2590FC640064F1E7 /* Articles */, 5132775D2590FC640064F1E7 /* Articles */,
@ -3007,6 +3012,8 @@
84DCA5242BABBB5A00792720 /* Core */, 84DCA5242BABBB5A00792720 /* Core */,
8438C2DA2BABE0B00040C9EE /* CoreResources */, 8438C2DA2BABE0B00040C9EE /* CoreResources */,
841CECD72BAD04B20001EE72 /* Tree */, 841CECD72BAD04B20001EE72 /* Tree */,
8456116A2BBD145200507B73 /* Parser */,
8456116D2BBD145200507B73 /* ParserObjC */,
); );
productName = NetNewsWire; productName = NetNewsWire;
productReference = 849C64601ED37A5D003D8FC0 /* NetNewsWire.app */; productReference = 849C64601ED37A5D003D8FC0 /* NetNewsWire.app */;
@ -3120,7 +3127,6 @@
); );
mainGroup = 849C64571ED37A5D003D8FC0; mainGroup = 849C64571ED37A5D003D8FC0;
packageReferences = ( packageReferences = (
51B0DF2324D2C7FA000AD99E /* XCRemoteSwiftPackageReference "RSParser" */,
17192AD82567B3D500AAEACA /* XCRemoteSwiftPackageReference "Sparkle-Binary" */, 17192AD82567B3D500AAEACA /* XCRemoteSwiftPackageReference "Sparkle-Binary" */,
519CA8E325841DB700EB079A /* XCRemoteSwiftPackageReference "plcrashreporter" */, 519CA8E325841DB700EB079A /* XCRemoteSwiftPackageReference "plcrashreporter" */,
179D280926F6F93D003B2E0A /* XCRemoteSwiftPackageReference "Zip" */, 179D280926F6F93D003B2E0A /* XCRemoteSwiftPackageReference "Zip" */,
@ -4753,14 +4759,6 @@
minimumVersion = 1.8.1; minimumVersion = 1.8.1;
}; };
}; };
51B0DF2324D2C7FA000AD99E /* XCRemoteSwiftPackageReference "RSParser" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/Ranchero-Software/RSParser.git";
requirement = {
kind = upToNextMajorVersion;
minimumVersion = 2.0.3;
};
};
/* End XCRemoteSwiftPackageReference section */ /* End XCRemoteSwiftPackageReference section */
/* Begin XCSwiftPackageProductDependency section */ /* Begin XCSwiftPackageProductDependency section */
@ -4799,11 +4797,6 @@
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
productName = SyncDatabase; productName = SyncDatabase;
}; };
5138E95124D3418100AFF0FE /* RSParser */ = {
isa = XCSwiftPackageProductDependency;
package = 51B0DF2324D2C7FA000AD99E /* XCRemoteSwiftPackageReference "RSParser" */;
productName = RSParser;
};
513F32702593EE6F0003048F /* Articles */ = { 513F32702593EE6F0003048F /* Articles */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
productName = Articles; productName = Articles;
@ -4833,11 +4826,6 @@
package = 519CA8E325841DB700EB079A /* XCRemoteSwiftPackageReference "plcrashreporter" */; package = 519CA8E325841DB700EB079A /* XCRemoteSwiftPackageReference "plcrashreporter" */;
productName = CrashReporter; productName = CrashReporter;
}; };
51A737C724DB19CC0015FA66 /* RSParser */ = {
isa = XCSwiftPackageProductDependency;
package = 51B0DF2324D2C7FA000AD99E /* XCRemoteSwiftPackageReference "RSParser" */;
productName = RSParser;
};
51BC2F3724D3439A00E90810 /* Account */ = { 51BC2F3724D3439A00E90810 /* Account */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
productName = Account; productName = Account;
@ -4867,11 +4855,6 @@
package = 519CA8E325841DB700EB079A /* XCRemoteSwiftPackageReference "plcrashreporter" */; package = 519CA8E325841DB700EB079A /* XCRemoteSwiftPackageReference "plcrashreporter" */;
productName = CrashReporter; productName = CrashReporter;
}; };
6538132F2680E20C007A082C /* RSParser */ = {
isa = XCSwiftPackageProductDependency;
package = 51B0DF2324D2C7FA000AD99E /* XCRemoteSwiftPackageReference "RSParser" */;
productName = RSParser;
};
653813382680E22B007A082C /* Secrets */ = { 653813382680E22B007A082C /* Secrets */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
productName = Secrets; productName = Secrets;
@ -4900,6 +4883,22 @@
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
productName = CoreResources; productName = CoreResources;
}; };
8456116A2BBD145200507B73 /* Parser */ = {
isa = XCSwiftPackageProductDependency;
productName = Parser;
};
8456116D2BBD145200507B73 /* ParserObjC */ = {
isa = XCSwiftPackageProductDependency;
productName = ParserObjC;
};
845611702BBD145D00507B73 /* Parser */ = {
isa = XCSwiftPackageProductDependency;
productName = Parser;
};
845611732BBD145D00507B73 /* ParserObjC */ = {
isa = XCSwiftPackageProductDependency;
productName = ParserObjC;
};
8479ABE22B9E906E00F84C4D /* Database */ = { 8479ABE22B9E906E00F84C4D /* Database */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
productName = Database; productName = Database;

View File

@ -1,5 +1,5 @@
{ {
"originHash" : "cdabcd707e9a2d05d419a03b18324728290923e8dae1ae2d92a2cdd8f1627191", "originHash" : "8b50ed607f7b51065653df0252108c35d369c5a0c0753a1f820a72682b56c528",
"pins" : [ "pins" : [
{ {
"identity" : "plcrashreporter", "identity" : "plcrashreporter",
@ -10,15 +10,6 @@
"version" : "1.11.0" "version" : "1.11.0"
} }
}, },
{
"identity" : "rsparser",
"kind" : "remoteSourceControl",
"location" : "https://github.com/Ranchero-Software/RSParser.git",
"state" : {
"revision" : "d5b50ff78905ebfaf26dd698e0e5d3ed8269dd9b",
"version" : "2.0.3"
}
},
{ {
"identity" : "sparkle-binary", "identity" : "sparkle-binary",
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",

8
Parser/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
.DS_Store
/.build
/Packages
xcuserdata/
DerivedData/
.swiftpm/configuration/registries.json
.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
.netrc

43
Parser/Package.swift Normal file
View File

@ -0,0 +1,43 @@
// swift-tools-version:5.10
// The swift-tools-version declares the minimum version of Swift required to build this package.
import PackageDescription
let package = Package(
name: "Parser",
platforms: [.macOS(.v14), .iOS(.v17)],
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "Parser",
type: .dynamic,
targets: ["Parser"]),
.library(
name: "ParserObjC",
type: .dynamic,
targets: ["ParserObjC"]),
],
dependencies: [
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "Parser",
dependencies: ["ParserObjC"],
path: "Sources/Swift"),
.target(
name: "ParserObjC",
dependencies: [],
path: "Sources/ObjC",
cSettings: [
.headerSearchPath("include")
]),
.testTarget(
name: "ParserTests",
dependencies: ["Parser"],
exclude: ["Info.plist"],
resources: [.copy("Resources")]),
]
)

View File

@ -0,0 +1,24 @@
//
// FeedParser.h
// RSXML
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSParsedFeed;
@class RSXMLData;
@protocol FeedParser <NSObject>
+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData;
- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData;
- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error;
@end

View File

@ -0,0 +1,26 @@
//
// NSData+RSParser.h
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@interface NSData (RSParser)
- (BOOL)isProbablyHTML;
- (BOOL)isProbablyXML;
- (BOOL)isProbablyJSON;
- (BOOL)isProbablyJSONFeed;
- (BOOL)isProbablyRSSInJSON;
- (BOOL)isProbablyRSS;
- (BOOL)isProbablyAtom;
@end

View File

@ -0,0 +1,139 @@
//
// NSData+RSParser.m
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "NSData+RSParser.h"
/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes);
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes);
@implementation NSData (RSParser)
- (BOOL)isProbablyHTML {
return bytesAreProbablyHTML(self.bytes, self.length);
}
- (BOOL)isProbablyXML {
return bytesAreProbablyXML(self.bytes, self.length);
}
- (BOOL)isProbablyJSON {
return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length);
}
- (BOOL)isProbablyJSONFeed {
if (![self isProbablyJSON]) {
return NO;
}
return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length);
}
- (BOOL)isProbablyRSSInJSON {
if (![self isProbablyJSON]) {
return NO;
}
const char *bytes = self.bytes;
NSUInteger length = self.length;
return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length);
}
- (BOOL)isProbablyRSS {
if (didFindString("<rss", self.bytes, self.length) || didFindString("<rdf:RDF", self.bytes, self.length)) {
return YES;
}
// At this writing (7 Dec. 2017), https://www.natashatherobot.com/feed/ is missing an opening <rss> tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find.
return (didFindString("<channel>", self.bytes, self.length) && didFindString("<pubDate>", self.bytes, self.length));
}
- (BOOL)isProbablyAtom {
return didFindString("<feed", self.bytes, self.length);
}
@end
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
char *foundString = strnstr(bytes, string, numberOfBytes);
return foundString != NULL;
}
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes) {
NSUInteger i = 0;
for (i = 0; i < numberOfBytes; i++) {
const char ch = bytes[i];
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
continue;
}
if (ch == string[0]) {
return strnstr(bytes, string, numberOfBytes) == bytes + i;
}
// Allow for a BOM of up to four bytes. ASSUMPTION: BOM will only be at the start of the data.
if (i < 4) continue;
break;
}
return NO;
}
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
if (didFindString("<html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<HTML", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<body", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<meta", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<", bytes, numberOfBytes)) {
if (didFindString("doctype html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
return YES;
}
}
return NO;
}
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes) {
return bytesStartWithStringIgnoringWhitespace("<?xml", bytes, numberOfBytes);
}

View File

@ -0,0 +1,26 @@
//
// NSString+RSParser.h
// RSParser
//
// Created by Brent Simmons on 9/25/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface NSString (RSParser)
- (NSString *)rsparser_stringByDecodingHTMLEntities;
/// Returns a copy of \c self with <, >, and & entity-encoded.
@property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities;
- (NSString *)rsparser_md5Hash;
- (BOOL)rsparser_contains:(NSString *)s;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,348 @@
//
// NSString+RSParser.m
// RSParser
//
// Created by Brent Simmons on 9/25/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import "NSString+RSParser.h"
#import <CommonCrypto/CommonDigest.h>
@interface NSScanner (RSParser)
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
@end
@implementation NSString (RSParser)
- (BOOL)rsparser_contains:(NSString *)s {
return [self rangeOfString:s].location != NSNotFound;
}
- (NSString *)rsparser_stringByDecodingHTMLEntities {
@autoreleasepool {
NSScanner *scanner = [[NSScanner alloc] initWithString:self];
scanner.charactersToBeSkipped = nil;
NSMutableString *result = [[NSMutableString alloc] init];
while (true) {
NSString *scannedString = nil;
if ([scanner scanUpToString:@"&" intoString:&scannedString]) {
[result appendString:scannedString];
}
if (scanner.isAtEnd) {
break;
}
NSUInteger savedScanLocation = scanner.scanLocation;
NSString *decodedEntity = nil;
if ([scanner rs_scanEntityValue:&decodedEntity]) {
[result appendString:decodedEntity];
}
else {
[result appendString:@"&"];
scanner.scanLocation = savedScanLocation + 1;
}
if (scanner.isAtEnd) {
break;
}
}
if ([self isEqualToString:result]) {
return self;
}
return [result copy];
}
}
static NSDictionary *RSEntitiesDictionary(void);
static NSString *RSParserStringWithValue(uint32_t value);
- (NSString * _Nullable)rs_stringByDecodingEntity {
// self may or may not have outer & and ; characters.
NSMutableString *s = [self mutableCopy];
if ([s hasPrefix:@"&"]) {
[s deleteCharactersInRange:NSMakeRange(0, 1)];
}
if ([s hasSuffix:@";"]) {
[s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)];
}
NSDictionary *entitiesDictionary = RSEntitiesDictionary();
NSString *decodedEntity = entitiesDictionary[self];
if (decodedEntity) {
return decodedEntity;
}
if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex
NSScanner *scanner = [[NSScanner alloc] initWithString:s];
scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"];
unsigned int hexValue = 0;
if ([scanner scanHexInt:&hexValue]) {
return RSParserStringWithValue((uint32_t)hexValue);
}
return nil;
}
else if ([s hasPrefix:@"#"]) {
[s deleteCharactersInRange:NSMakeRange(0, 1)];
NSInteger value = s.integerValue;
if (value < 1) {
return nil;
}
return RSParserStringWithValue((uint32_t)value);
}
return nil;
}
- (NSString *)rsparser_stringByEncodingRequiredEntities {
NSMutableString *result = [NSMutableString string];
for (NSUInteger i = 0; i < self.length; ++i) {
unichar c = [self characterAtIndex:i];
switch (c) {
case '<':
[result appendString:@"&lt;"];
break;
case '>':
[result appendString:@"&gt;"];
break;
case '&':
[result appendString:@"&amp;"];
break;
default:
[result appendFormat:@"%C", c];
break;
}
}
return [result copy];
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- (NSData *)_rsparser_md5HashData {
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
unsigned char hash[CC_MD5_DIGEST_LENGTH];
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
}
#pragma GCC diagnostic pop
- (NSString *)rsparser_md5Hash {
NSData *md5Data = [self _rsparser_md5HashData];
const Byte *bytes = md5Data.bytes;
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
}
@end
@implementation NSScanner (RSParser)
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
NSString *s = self.string;
NSUInteger initialScanLocation = self.scanLocation;
static NSUInteger maxEntityLength = 20; // Its probably smaller, but this is just for sanity.
while (true) {
unichar ch = [s characterAtIndex:self.scanLocation];
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
break;
}
if (ch == ';') {
if (!decodedEntity) {
return YES;
}
NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
*decodedEntity = [rawEntity rs_stringByDecodingEntity];
self.scanLocation = self.scanLocation + 1;
return *decodedEntity != nil;
}
self.scanLocation = self.scanLocation + 1;
if (self.scanLocation - initialScanLocation > maxEntityLength) {
break;
}
if (self.isAtEnd) {
break;
}
}
return NO;
}
@end
static NSString *RSParserStringWithValue(uint32_t value) {
// From WebCore's HTMLEntityParser
static const uint32_t windowsLatin1ExtensionArray[32] = {
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
};
if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160
value = windowsLatin1ExtensionArray[value - 0x80];
}
value = CFSwapInt32HostToLittle(value);
return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding];
}
static NSDictionary *RSEntitiesDictionary(void) {
static NSDictionary *entitiesDictionary = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
entitiesDictionary = @{
// Named entities
@"AElig": @"Æ",
@"Aacute": @"Á",
@"Acirc": @"Â",
@"Agrave": @"À",
@"Aring": @"Å",
@"Atilde": @"Ã",
@"Auml": @"Ä",
@"Ccedil": @"Ç",
@"Dstrok": @"Ð",
@"ETH": @"Ð",
@"Eacute": @"É",
@"Ecirc": @"Ê",
@"Egrave": @"È",
@"Euml": @"Ë",
@"Iacute": @"Í",
@"Icirc": @"Î",
@"Igrave": @"Ì",
@"Iuml": @"Ï",
@"Ntilde": @"Ñ",
@"Oacute": @"Ó",
@"Ocirc": @"Ô",
@"Ograve": @"Ò",
@"Oslash": @"Ø",
@"Otilde": @"Õ",
@"Ouml": @"Ö",
@"Pi": @"Π",
@"THORN": @"Þ",
@"Uacute": @"Ú",
@"Ucirc": @"Û",
@"Ugrave": @"Ù",
@"Uuml": @"Ü",
@"Yacute": @"Y",
@"aacute": @"á",
@"acirc": @"â",
@"acute": @"´",
@"aelig": @"æ",
@"agrave": @"à",
@"amp": @"&",
@"apos": @"'",
@"aring": @"å",
@"atilde": @"ã",
@"auml": @"ä",
@"brkbar": @"¦",
@"brvbar": @"¦",
@"ccedil": @"ç",
@"cedil": @"¸",
@"cent": @"¢",
@"copy": @"©",
@"curren": @"¤",
@"deg": @"°",
@"die": @"¨",
@"divide": @"÷",
@"eacute": @"é",
@"ecirc": @"ê",
@"egrave": @"è",
@"eth": @"ð",
@"euml": @"ë",
@"euro": @"€",
@"frac12": @"½",
@"frac14": @"¼",
@"frac34": @"¾",
@"gt": @">",
@"hearts": @"♥",
@"hellip": @"…",
@"iacute": @"í",
@"icirc": @"î",
@"iexcl": @"¡",
@"igrave": @"ì",
@"iquest": @"¿",
@"iuml": @"ï",
@"laquo": @"«",
@"ldquo": @"“",
@"lsquo": @"",
@"lt": @"<",
@"macr": @"¯",
@"mdash": @"—",
@"micro": @"µ",
@"middot": @"·",
@"ndash": @"",
@"not": @"¬",
@"ntilde": @"ñ",
@"oacute": @"ó",
@"ocirc": @"ô",
@"ograve": @"ò",
@"ordf": @"ª",
@"ordm": @"º",
@"oslash": @"ø",
@"otilde": @"õ",
@"ouml": @"ö",
@"para": @"¶",
@"pi": @"π",
@"plusmn": @"±",
@"pound": @"£",
@"quot": @"\"",
@"raquo": @"»",
@"rdquo": @"”",
@"reg": @"®",
@"rsquo": @"",
@"sect": @"§",
@"shy": RSParserStringWithValue(173),
@"sup1": @"¹",
@"sup2": @"²",
@"sup3": @"³",
@"szlig": @"ß",
@"thorn": @"þ",
@"times": @"×",
@"trade": @"™",
@"uacute": @"ú",
@"ucirc": @"û",
@"ugrave": @"ù",
@"uml": @"¨",
@"uuml": @"ü",
@"yacute": @"y",
@"yen": @"¥",
@"yuml": @"ÿ",
@"infin": @"∞",
@"nbsp": RSParserStringWithValue(160)
};
});
return entitiesDictionary;
}

View File

@ -0,0 +1,23 @@
//
// ParserData.h
// RSParser
//
// Created by Brent Simmons on 10/4/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface ParserData : NSObject
@property (nonatomic, readonly) NSString *url;
@property (nonatomic, readonly) NSData *data;
- (instancetype)initWithURL:(NSString *)url data:(NSData *)data;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,26 @@
//
// ParserData.m
// RSParser
//
// Created by Brent Simmons on 10/4/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "ParserData.h"
@implementation ParserData
- (instancetype)initWithURL:(NSString *)url data:(NSData *)data {
self = [super init];
if (!self) {
return nil;
}
_url = url;
_data = data;
return self;
}
@end

View File

@ -0,0 +1,18 @@
//
// RSAtomParser.h
// RSParser
//
// Created by Brent Simmons on 1/15/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
@import Foundation;
@class ParserData;
@class RSParsedFeed;
@interface RSAtomParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@end

View File

@ -0,0 +1,679 @@
//
// RSAtomParser.m
// RSParser
//
// Created by Brent Simmons on 1/15/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "RSAtomParser.h"
#import "RSSAXParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import "ParserData.h"
#import "RSParsedEnclosure.h"
#import "RSParsedAuthor.h"
#import <libxml/xmlstring.h>
@interface RSAtomParser () <RSSAXParserDelegate>
@property (nonatomic) NSData *feedData;
@property (nonatomic) NSString *urlString;
@property (nonatomic) BOOL endFeedFound;
@property (nonatomic) BOOL parsingXHTML;
@property (nonatomic) BOOL parsingSource;
@property (nonatomic) BOOL parsingArticle;
@property (nonatomic) BOOL parsingAuthor;
@property (nonatomic) NSMutableArray *attributesStack;
@property (nonatomic, readonly) NSDictionary *currentAttributes;
@property (nonatomic) NSMutableString *xhtmlString;
@property (nonatomic) NSString *link;
@property (nonatomic) NSString *title;
@property (nonatomic) NSMutableArray *articles;
@property (nonatomic) NSDate *dateParsed;
@property (nonatomic) RSSAXParser *parser;
@property (nonatomic, readonly) RSParsedArticle *currentArticle;
@property (nonatomic) RSParsedAuthor *currentAuthor;
@property (nonatomic, readonly) NSDate *currentDate;
@property (nonatomic) NSString *language;
@end
@implementation RSAtomParser
#pragma mark - Class Methods
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_attributesStack = [NSMutableArray new];
_articles = [NSMutableArray new];
return self;
}
#pragma mark - API
- (RSParsedFeed *)parseFeed {
[self parse];
RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles];
return parsedFeed;
}
#pragma mark - Constants
static NSString *kTypeKey = @"type";
static NSString *kXHTMLType = @"xhtml";
static NSString *kRelKey = @"rel";
static NSString *kAlternateValue = @"alternate";
static NSString *kHrefKey = @"href";
static NSString *kXMLKey = @"xml";
static NSString *kBaseKey = @"base";
static NSString *kLangKey = @"lang";
static NSString *kXMLBaseKey = @"xml:base";
static NSString *kXMLLangKey = @"xml:lang";
static NSString *kTextHTMLValue = @"text/html";
static NSString *kRelatedValue = @"related";
static NSString *kEnclosureValue = @"enclosure";
static NSString *kShortURLValue = @"shorturl";
static NSString *kHTMLValue = @"html";
static NSString *kEnValue = @"en";
static NSString *kTextValue = @"text";
static NSString *kSelfValue = @"self";
static NSString *kLengthKey = @"length";
static NSString *kTitleKey = @"title";
static const char *kID = "id";
static const NSInteger kIDLength = 3;
static const char *kTitle = "title";
static const NSInteger kTitleLength = 6;
static const char *kContent = "content";
static const NSInteger kContentLength = 8;
static const char *kSummary = "summary";
static const NSInteger kSummaryLength = 8;
static const char *kLink = "link";
static const NSInteger kLinkLength = 5;
static const char *kPublished = "published";
static const NSInteger kPublishedLength = 10;
static const char *kIssued = "issued";
static const NSInteger kIssuedLength = 7;
static const char *kUpdated = "updated";
static const NSInteger kUpdatedLength = 8;
static const char *kModified = "modified";
static const NSInteger kModifiedLength = 9;
static const char *kAuthor = "author";
static const NSInteger kAuthorLength = 7;
static const char *kName = "name";
static const NSInteger kNameLength = 5;
static const char *kEmail = "email";
static const NSInteger kEmailLength = 6;
static const char *kURI = "uri";
static const NSInteger kURILength = 4;
static const char *kEntry = "entry";
static const NSInteger kEntryLength = 6;
static const char *kSource = "source";
static const NSInteger kSourceLength = 7;
static const char *kFeed = "feed";
static const NSInteger kFeedLength = 5;
static const char *kType = "type";
static const NSInteger kTypeLength = 5;
static const char *kRel = "rel";
static const NSInteger kRelLength = 4;
static const char *kAlternate = "alternate";
static const NSInteger kAlternateLength = 10;
static const char *kHref = "href";
static const NSInteger kHrefLength = 5;
static const char *kXML = "xml";
static const NSInteger kXMLLength = 4;
static const char *kBase = "base";
static const NSInteger kBaseLength = 5;
static const char *kLang = "lang";
static const NSInteger kLangLength = 5;
static const char *kTextHTML = "text/html";
static const NSInteger kTextHTMLLength = 10;
static const char *kRelated = "related";
static const NSInteger kRelatedLength = 8;
static const char *kShortURL = "shorturl";
static const NSInteger kShortURLLength = 9;
static const char *kHTML = "html";
static const NSInteger kHTMLLength = 5;
static const char *kEn = "en";
static const NSInteger kEnLength = 3;
static const char *kText = "text";
static const NSInteger kTextLength = 5;
static const char *kSelf = "self";
static const NSInteger kSelfLength = 5;
static const char *kEnclosure = "enclosure";
static const NSInteger kEnclosureLength = 10;
static const char *kLength = "length";
static const NSInteger kLengthLength = 7;
#pragma mark - Parsing
- (void)parse {
self.dateParsed = [NSDate date];
@autoreleasepool {
[self.parser parseData:self.feedData];
[self.parser finishParsing];
}
}
- (void)addArticle {
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString];
article.dateParsed = self.dateParsed;
[self.articles addObject:article];
}
- (RSParsedArticle *)currentArticle {
return self.articles.lastObject;
}
- (NSDictionary *)currentAttributes {
return self.attributesStack.lastObject;
}
- (NSDate *)currentDate {
return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length);
}
- (void)addFeedLink {
if (self.link && self.link.length > 0) {
return;
}
NSString *related = self.currentAttributes[kRelKey];
if (related == kAlternateValue) {
self.link = self.currentAttributes[kHrefKey];
}
}
- (void)addFeedTitle {
if (self.title.length < 1) {
self.title = [self currentString];
}
}
- (void)addFeedLanguage {
if (self.language.length < 0) {
self.language = self.currentAttributes[kXMLLangKey]
;
}
}
- (void)addLink {
NSDictionary *attributes = self.currentAttributes;
NSString *urlString = attributes[kHrefKey];
if (urlString.length < 1) {
return;
}
RSParsedArticle *article = self.currentArticle;
NSString *rel = attributes[kRelKey];
if (rel.length < 1) {
rel = kAlternateValue;
}
if (rel == kRelatedValue) {
if (!article.link) {
article.link = urlString;
}
}
else if (rel == kAlternateValue) {
if (!article.permalink) {
article.permalink = urlString;
}
}
else if (rel == kEnclosureValue) {
RSParsedEnclosure *enclosure = [self enclosureWithURLString:urlString attributes:attributes];
[article addEnclosure:enclosure];
}
}
- (RSParsedEnclosure *)enclosureWithURLString:(NSString *)urlString attributes:(NSDictionary *)attributes {
RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init];
enclosure.url = urlString;
enclosure.title = attributes[kTitleKey];
enclosure.mimeType = attributes[kTypeKey];
enclosure.length = [attributes[kLengthKey] integerValue];
return enclosure;
}
- (void)addContent {
self.currentArticle.body = [self currentString];
}
- (void)addSummary {
if (!self.currentArticle.body) {
self.currentArticle.body = [self currentString];
}
}
- (NSString *)currentString {
return self.parser.currentStringWithTrimmedWhitespace;
}
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
if (prefix) {
return;
}
if (RSSAXEqualTags(localName, kID, kIDLength)) {
self.currentArticle.guid = [self currentString];
}
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
self.currentArticle.title = [self currentString];
}
else if (RSSAXEqualTags(localName, kContent, kContentLength)) {
[self addContent];
}
else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) {
[self addSummary];
}
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
[self addLink];
}
else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) {
self.currentArticle.datePublished = self.currentDate;
}
else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) {
self.currentArticle.dateModified = self.currentDate;
}
// Atom 0.3 dates
else if (RSSAXEqualTags(localName, kIssued, kIssuedLength)) {
if (!self.currentArticle.datePublished) {
self.currentArticle.datePublished = self.currentDate;
}
}
else if (RSSAXEqualTags(localName, kModified, kModifiedLength)) {
if (!self.currentArticle.dateModified) {
self.currentArticle.dateModified = self.currentDate;
}
}
}
- (void)addXHTMLTag:(const xmlChar *)localName {
if (!localName) {
return;
}
[self.xhtmlString appendString:@"<"];
[self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]];
if (self.currentAttributes.count < 1) {
[self.xhtmlString appendString:@">"];
return;
}
for (NSString *oneKey in self.currentAttributes) {
[self.xhtmlString appendString:@" "];
NSString *oneValue = self.currentAttributes[oneKey];
[self.xhtmlString appendString:oneKey];
[self.xhtmlString appendString:@"=\""];
oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"&quot;"];
[self.xhtmlString appendString:oneValue];
[self.xhtmlString appendString:@"\""];
}
[self.xhtmlString appendString:@">"];
}
#pragma mark - RSSAXParserDelegate
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
if (self.endFeedFound) {
return;
}
NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
if (!xmlAttributes) {
xmlAttributes = [NSDictionary dictionary];
}
[self.attributesStack addObject:xmlAttributes];
if (self.parsingXHTML) {
[self addXHTMLTag:localName];
return;
}
if (RSSAXEqualTags(localName, kEntry, kEntryLength)) {
self.parsingArticle = YES;
[self addArticle];
return;
}
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.parsingAuthor = YES;
self.currentAuthor = [[RSParsedAuthor alloc] init];
return;
}
if (RSSAXEqualTags(localName, kSource, kSourceLength)) {
self.parsingSource = YES;
return;
}
BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength);
BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength);
if (self.parsingArticle && (isContentTag || isSummaryTag)) {
if (isContentTag) {
self.currentArticle.language = xmlAttributes[kXMLLangKey];
}
NSString *contentType = xmlAttributes[kTypeKey];
if ([contentType isEqualToString:kXHTMLType]) {
self.parsingXHTML = YES;
self.xhtmlString = [NSMutableString stringWithString:@""];
return;
}
}
if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) {
[self addFeedLink];
return;
}
if (RSSAXEqualTags(localName, kFeed, kFeedLength)) {
[self addFeedLanguage];
}
[self.parser beginStoringCharacters];
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
if (RSSAXEqualTags(localName, kFeed, kFeedLength)) {
self.endFeedFound = YES;
return;
}
if (self.endFeedFound) {
return;
}
if (self.parsingXHTML) {
BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength);
BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength);
if (self.parsingArticle && (isContentTag || isSummaryTag)) {
if (isContentTag) {
self.currentArticle.body = [self.xhtmlString copy];
}
else if (isSummaryTag) {
if (self.currentArticle.body.length < 1) {
self.currentArticle.body = [self.xhtmlString copy];
}
}
}
if (isContentTag || isSummaryTag) {
self.parsingXHTML = NO;
}
[self.xhtmlString appendString:@"</"];
[self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]];
[self.xhtmlString appendString:@">"];
}
else if (self.parsingAuthor) {
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.parsingAuthor = NO;
RSParsedAuthor *author = self.currentAuthor;
if (author.name || author.emailAddress || author.url) {
[self.currentArticle addAuthor:author];
}
self.currentAuthor = nil;
}
else if (RSSAXEqualTags(localName, kName, kNameLength)) {
self.currentAuthor.name = [self currentString];
}
else if (RSSAXEqualTags(localName, kEmail, kEmailLength)) {
self.currentAuthor.emailAddress = [self currentString];
}
else if (RSSAXEqualTags(localName, kURI, kURILength)) {
self.currentAuthor.url = [self currentString];
}
}
else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) {
self.parsingArticle = NO;
}
else if (self.parsingArticle && !self.parsingSource) {
[self addArticleElement:localName prefix:prefix];
}
else if (RSSAXEqualTags(localName, kSource, kSourceLength)) {
self.parsingSource = NO;
}
else if (!self.parsingArticle && !self.parsingSource && RSSAXEqualTags(localName, kTitle, kTitleLength)) {
[self addFeedTitle];
}
[self.attributesStack removeLastObject];
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) {
if (RSSAXEqualTags(name, kBase, kBaseLength)) {
return kXMLBaseKey;
}
if (RSSAXEqualTags(name, kLang, kLangLength)) {
return kXMLLangKey;
}
}
if (prefix) {
return nil;
}
if (RSSAXEqualTags(name, kRel, kRelLength)) {
return kRelKey;
}
if (RSSAXEqualTags(name, kType, kTypeLength)) {
return kTypeKey;
}
if (RSSAXEqualTags(name, kHref, kHrefLength)) {
return kHrefKey;
}
if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) {
return kAlternateValue;
}
if (RSSAXEqualTags(name, kLength, kLengthLength)) {
return kLengthKey;
}
if (RSSAXEqualTags(name, kTitle, kTitleLength)) {
return kTitleKey;
}
return nil;
}
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
return memcmp(bytes1, bytes2, length) == 0;
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
static const NSUInteger alternateLength = kAlternateLength - 1;
static const NSUInteger textHTMLLength = kTextHTMLLength - 1;
static const NSUInteger relatedLength = kRelatedLength - 1;
static const NSUInteger shortURLLength = kShortURLLength - 1;
static const NSUInteger htmlLength = kHTMLLength - 1;
static const NSUInteger enLength = kEnLength - 1;
static const NSUInteger textLength = kTextLength - 1;
static const NSUInteger selfLength = kSelfLength - 1;
static const NSUInteger enclosureLength = kEnclosureLength - 1;
if (length == alternateLength && equalBytes(bytes, kAlternate, alternateLength)) {
return kAlternateValue;
}
if (length == enclosureLength && equalBytes(bytes, kEnclosure, enclosureLength)) {
return kEnclosureValue;
}
if (length == textHTMLLength && equalBytes(bytes, kTextHTML, textHTMLLength)) {
return kTextHTMLValue;
}
if (length == relatedLength && equalBytes(bytes, kRelated, relatedLength)) {
return kRelatedValue;
}
if (length == shortURLLength && equalBytes(bytes, kShortURL, shortURLLength)) {
return kShortURLValue;
}
if (length == htmlLength && equalBytes(bytes, kHTML, htmlLength)) {
return kHTMLValue;
}
if (length == enLength && equalBytes(bytes, kEn, enLength)) {
return kEnValue;
}
if (length == textLength && equalBytes(bytes, kText, textLength)) {
return kTextValue;
}
if (length == selfLength && equalBytes(bytes, kSelf, selfLength)) {
return kSelfValue;
}
return nil;
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length {
if (self.parsingXHTML) {
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)characters length:length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (s == nil) {
return;
}
// libxml decodes all entities; we need to re-encode certain characters
// (<, >, and &) when inside XHTML text content.
[self.xhtmlString appendString:s.rsparser_stringByEncodingRequiredEntities];
}
}
@end

View File

@ -0,0 +1,22 @@
//
// RSDateParser.h
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
// Common web dates -- RFC 822 and 8601 -- are handled here: the formats you find in JSON and XML feeds.
// These may return nil. They may also return garbage, given bad input.
NSDate *RSDateWithString(NSString *dateString);
// If you're using a SAX parser, you have the bytes and don't need to convert to a string first.
// It's faster and uses less memory.
// (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.)
NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes);

View File

@ -0,0 +1,461 @@
//
// RSDateParser.m
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSDateParser.h"
#import <time.h>
typedef struct {
const char *abbreviation;
const NSInteger offsetHours;
const NSInteger offsetMinutes;
} RSTimeZoneAbbreviationAndOffset;
#define kNumberOfTimeZones 96
static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = {
{"GMT", 0, 0}, //Most common at top, for performance
{"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0},
{"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0},
{"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0},
{"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0},
{"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0},
{"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0},
{"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30},
{"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0},
{"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0},
{"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0},
{"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0},
{"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0},
{"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0},
{"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0},
{"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0},
{"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30},
{"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0},
{"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45},
{"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0},
{"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0},
{"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0},
{"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0},
{"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0},
{"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0}
}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/
#pragma mark - Parser
enum {
RSJanuary = 1,
RSFebruary,
RSMarch,
RSApril,
RSMay,
RSJune,
RSJuly,
RSAugust,
RSSeptember,
RSOctober,
RSNovember,
RSDecember
};
static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) {
/*Months are 1-based -- January is 1, Dec is 12.
Lots of short-circuits here. Not strict. GIGO.*/
NSUInteger i;// = startingIndex;
NSUInteger numberOfAlphaCharactersFound = 0;
char monthCharacters[3] = {0, 0, 0};
for (i = startingIndex; i < numberOfBytes; i++) {
*finalIndex = i;
char character = bytes[i];
BOOL isAlphaCharacter = (BOOL)isalpha(character);
if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1)
continue;
if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0)
break;
numberOfAlphaCharactersFound++;
if (numberOfAlphaCharactersFound == 1) {
if (character == 'F' || character == 'f')
return RSFebruary;
if (character == 'S' || character == 's')
return RSSeptember;
if (character == 'O' || character == 'o')
return RSOctober;
if (character == 'N' || character == 'n')
return RSNovember;
if (character == 'D' || character == 'd')
return RSDecember;
}
monthCharacters[numberOfAlphaCharactersFound - 1] = character;
if (numberOfAlphaCharactersFound >=3)
break;
}
if (numberOfAlphaCharactersFound < 2)
return NSNotFound;
if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul
if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A')
return RSJanuary;
if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') {
if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N')
return RSJune;
return RSJuly;
}
return RSJanuary;
}
if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May
if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y')
return RSMay;
return RSMarch;
}
if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August
if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U')
return RSAugust;
return RSApril;
}
return RSJanuary; //should never get here
}
static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) {
/*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years).
*finalIndex will be the index of the last character looked at.*/
if (maximumNumberOfDigits > 4)
maximumNumberOfDigits = 4;
NSUInteger i = 0;
NSUInteger numberOfDigitsFound = 0;
NSInteger digits[4] = {0, 0, 0, 0};
for (i = startingIndex; i < numberOfBytes; i++) {
*finalIndex = i;
BOOL isDigit = (BOOL)isdigit(bytes[i]);
if (!isDigit && numberOfDigitsFound < 1)
continue;
if (!isDigit && numberOfDigitsFound > 0)
break;
digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48
numberOfDigitsFound++;
if (numberOfDigitsFound >= maximumNumberOfDigits)
break;
}
if (numberOfDigitsFound < 1)
return NSNotFound;
if (numberOfDigitsFound == 1)
return digits[0];
if (numberOfDigitsFound == 2)
return (digits[0] * 10) + digits[1];
if (numberOfDigitsFound == 3)
return (digits[0] * 100) + (digits[1] * 10) + digits[2];
return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3];
}
static BOOL hasAtLeastOneAlphaCharacter(const char *s) {
NSUInteger length = strlen(s);
NSUInteger i = 0;
for (i = 0; i < length; i++) {
if (isalpha(s[i]))
return YES;
}
return NO;
}
#pragma mark - Time Zones and offsets
static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) {
/*Linear search should be fine. It's a C array, and short (under 100 items).
Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/
NSUInteger i;
for (i = 0; i < kNumberOfTimeZones; i++) {
RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i];
if (strcmp(abbreviation, zone.abbreviation) == 0) {
if (zone.offsetHours < 0)
return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60);
return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60);
}
}
return 0;
}
static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) {
BOOL isPlus = timeZoneCharacters[0] == '+';
NSUInteger finalIndex = 0;
NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex);
NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex);
if (hours == NSNotFound)
hours = 0;
if (minutes == NSNotFound)
minutes = 0;
if (hours == 0 && minutes == 0)
return 0;
NSInteger seconds = (hours * 60 * 60) + (minutes * 60);
if (!isPlus)
seconds = 0 - seconds;
return seconds;
}
static const char *rs_GMT = "GMT";
static const char *rs_UTC = "UTC";
static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) {
/*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST
Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT.
If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/
char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character
NSUInteger i = 0;
NSUInteger numberOfCharactersFound = 0;
for (i = startingIndex; i < numberOfBytes; i++) {
char ch = bytes[i];
if (ch == ':' || ch == ' ')
continue;
if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') {
numberOfCharactersFound++;
timeZoneCharacters[numberOfCharactersFound - 1] = ch;
}
if (numberOfCharactersFound >= 5)
break;
}
if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z')
return 0;
if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC))
return 0;
if (hasAtLeastOneAlphaCharacter(timeZoneCharacters))
return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters);
return offsetInSecondsForOffsetCharacters(timeZoneCharacters);
}
#pragma mark - Date Creation
static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) {
struct tm timeInfo;
timeInfo.tm_sec = (int)second;
timeInfo.tm_min = (int)minute;
timeInfo.tm_hour = (int)hour;
timeInfo.tm_mday = (int)day;
timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in
timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900
timeInfo.tm_wday = -1;
timeInfo.tm_yday = -1;
timeInfo.tm_isdst = -1;
timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT];
timeInfo.tm_zone = nil;
NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone)
if (rawTime == (time_t)ULONG_MAX) {
/*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/
NSDateComponents *dateComponents = [NSDateComponents new];
dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset];
dateComponents.year = year;
dateComponents.month = month;
dateComponents.day = day;
dateComponents.hour = hour;
dateComponents.minute = minute;
dateComponents.second = second + (milliseconds / 1000);
return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents];
}
if (milliseconds > 0) {
rawTime += ((float)milliseconds / 1000.0f);
}
return [NSDate dateWithTimeIntervalSince1970:rawTime];
}
#pragma mark - Standard Formats
static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) {
/*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ"
@"EEE, dd MMM yyyy HH:mm:ss zzz"
@"dd MMM yyyy HH:mm zzz"
@"dd MMM yyyy HH:mm ZZZ"
@"EEE, dd MMM yyyy"
@"EEE, dd MMM yyyy HH:mm zzz"
etc.*/
NSUInteger finalIndex = 0;
NSInteger day = 1;
NSInteger month = RSJanuary;
NSInteger year = 1970;
NSInteger hour = 0;
NSInteger minute = 0;
NSInteger second = 0;
NSInteger timeZoneOffset = 0;
day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex);
if (day < 1 || day == NSNotFound)
day = 1;
month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex);
year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex);
hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
if (hour == NSNotFound)
hour = 0;
minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
if (minute == NSNotFound)
minute = 0;
NSUInteger currentIndex = finalIndex + 1;
BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':');
if (hasSeconds)
second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex);
currentIndex = finalIndex + 1;
BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' ');
if (hasTimeZone)
timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex);
return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset);
}
static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) {
/*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss"
@"yyyy-MM-dd'T'HH:mm:sszzz"
@"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz"
etc.*/
NSUInteger finalIndex = 0;
NSInteger day = 1;
NSInteger month = RSJanuary;
NSInteger year = 1970;
NSInteger hour = 0;
NSInteger minute = 0;
NSInteger second = 0;
NSInteger milliseconds = 0;
NSInteger timeZoneOffset = 0;
year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex);
month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
NSUInteger currentIndex = finalIndex + 1;
BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.');
if (hasMilliseconds) {
milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex);
currentIndex = finalIndex + 1;
}
timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex);
return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset);
}
static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) {
NSUInteger i = 0;
for (i = 0; i < numberOfBytes; i++) {
if (bytes[i] == ' ' || bytes[i] == ',')
return YES;
}
return NO;
}
static BOOL dateIsW3CDate(const char *bytes, NSUInteger numberOfBytes) {
// Something like 2010-11-17T08:40:07-05:00
// But might be missing T character in the middle.
// Looks for four digits in a row followed by a -.
for (NSUInteger i = 0; i < numberOfBytes; i++) {
char ch = bytes[i];
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
continue;
}
if (numberOfBytes - i < 5) {
return NO;
}
return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == '-';
}
return NO;
}
static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) {
return numberOfBytes < 6 || numberOfBytes > 150;
}
#pragma mark - API
NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) {
if (numberOfBytesIsOutsideReasonableRange(numberOfBytes))
return nil;
if (dateIsW3CDate(bytes, numberOfBytes)) {
return RSParseW3CWithBytes(bytes, numberOfBytes);
}
if (dateIsPubDate(bytes, numberOfBytes))
return RSParsePubDateWithBytes(bytes, numberOfBytes);
// Fallback, in case our detection fails.
return RSParseW3CWithBytes(bytes, numberOfBytes);
}
NSDate *RSDateWithString(NSString *dateString) {
const char *utf8String = [dateString UTF8String];
return RSDateWithBytes(utf8String, strlen(utf8String));
}

View File

@ -0,0 +1,35 @@
//
// RSHTMLLinkParser.h
// RSParser
//
// Created by Brent Simmons on 8/7/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
/*Returns all <a href="some_url">some_text</a> as RSHTMLLink object array.*/
@class ParserData;
@class RSHTMLLink;
@interface RSHTMLLinkParser : NSObject
+ (NSArray <RSHTMLLink *> *)htmlLinksWithParserData:(ParserData *)parserData;
@end
@interface RSHTMLLink : NSObject
// Any of these, even urlString, may be nil, because HTML can be bad.
@property (nonatomic, nullable, readonly) NSString *urlString; //absolute
@property (nonatomic, nullable, readonly) NSString *text;
@property (nonatomic, nullable, readonly) NSString *title; //title attribute inside anchor tag
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,154 @@
//
// RSHTMLLinkParser.m
// RSParser
//
// Created by Brent Simmons on 8/7/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLLinkParser.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import "ParserData.h"
#import <libxml/xmlstring.h>
@interface RSHTMLLinkParser() <RSSAXHTMLParserDelegate>
@property (nonatomic, readonly) NSMutableArray *links;
@property (nonatomic, readonly) ParserData *parserData;
@property (nonatomic, readonly) NSMutableArray *dictionaries;
@property (nonatomic, readonly) NSURL *baseURL;
@end
@interface RSHTMLLink()
@property (nonatomic, readwrite) NSString *urlString; //absolute
@property (nonatomic, readwrite) NSString *text;
@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag
@end
@implementation RSHTMLLinkParser
#pragma mark - Class Methods
+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData {
RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData];
return parser.links;
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
NSParameterAssert(parserData.data);
NSParameterAssert(parserData.url);
self = [super init];
if (!self) {
return nil;
}
_links = [NSMutableArray new];
_parserData = parserData;
_dictionaries = [NSMutableArray new];
_baseURL = [NSURL URLWithString:parserData.url];
[self parse];
return self;
}
#pragma mark - Parse
- (void)parse {
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
[parser parseData:self.parserData.data];
[parser finishParsing];
}
- (RSHTMLLink *)currentLink {
return self.links.lastObject;
}
static NSString *kHrefKey = @"href";
- (NSString *)urlStringFromDictionary:(NSDictionary *)d {
NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
if (!href) {
return nil;
}
NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL];
return absoluteURL.absoluteString;
}
static NSString *kTitleKey = @"title";
- (NSString *)titleFromDictionary:(NSDictionary *)d {
return [d rsparser_objectForCaseInsensitiveKey:kTitleKey];
}
- (void)handleLinkAttributes:(NSDictionary *)d {
RSHTMLLink *link = self.currentLink;
link.urlString = [self urlStringFromDictionary:d];
link.title = [self titleFromDictionary:d];
}
static const char *kAnchor = "a";
static const NSInteger kAnchorLength = 2;
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
return;
}
RSHTMLLink *link = [RSHTMLLink new];
[self.links addObject:link];
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParserObjectIsEmpty(d)) {
[self handleLinkAttributes:d];
}
[SAXParser beginStoringCharacters];
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName {
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
return;
}
self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace;
}
@end
@implementation RSHTMLLink
@end

View File

@ -0,0 +1,97 @@
//
// RSHTMLMetadata.h
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@import CoreGraphics;
@class RSHTMLMetadataFeedLink;
@class RSHTMLMetadataAppleTouchIcon;
@class RSHTMLMetadataFavicon;
@class RSHTMLOpenGraphProperties;
@class RSHTMLOpenGraphImage;
@class RSHTMLTag;
@class RSHTMLTwitterProperties;
NS_ASSUME_NONNULL_BEGIN
@interface RSHTMLMetadata : NSObject
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, readonly) NSString *baseURLString;
@property (nonatomic, readonly) NSArray <RSHTMLTag *> *tags;
@property (nonatomic, readonly) NSArray <NSString *> *faviconLinks DEPRECATED_MSG_ATTRIBUTE("Use the favicons property instead.");
@property (nonatomic, readonly) NSArray <RSHTMLMetadataFavicon *> *favicons;
@property (nonatomic, readonly) NSArray <RSHTMLMetadataAppleTouchIcon *> *appleTouchIcons;
@property (nonatomic, readonly) NSArray <RSHTMLMetadataFeedLink *> *feedLinks;
@property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties;
@property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties;
@end
@interface RSHTMLMetadataAppleTouchIcon : NSObject
@property (nonatomic, readonly) NSString *rel;
@property (nonatomic, nullable, readonly) NSString *sizes;
@property (nonatomic, readonly) CGSize size;
@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute.
@end
@interface RSHTMLMetadataFeedLink : NSObject
@property (nonatomic, nullable, readonly) NSString *title;
@property (nonatomic, nullable, readonly) NSString *type;
@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute.
@end
@interface RSHTMLMetadataFavicon : NSObject
@property (nonatomic, nullable, readonly) NSString *type;
@property (nonatomic, nullable, readonly) NSString *urlString;
@end
@interface RSHTMLOpenGraphProperties : NSObject
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
// See http://ogp.me/
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, readonly) NSArray <RSHTMLOpenGraphImage *> *images;
@end
@interface RSHTMLOpenGraphImage : NSObject
@property (nonatomic, nullable, readonly) NSString *url;
@property (nonatomic, nullable, readonly) NSString *secureURL;
@property (nonatomic, nullable, readonly) NSString *mimeType;
@property (nonatomic, readonly) CGFloat width;
@property (nonatomic, readonly) CGFloat height;
@property (nonatomic, nullable, readonly) NSString *altText;
@end
@interface RSHTMLTwitterProperties : NSObject
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src.
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, nullable, readonly) NSString *imageURL; // twitter:image:src
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,483 @@
//
// RSHTMLMetadata.m
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLMetadata.h"
#import "RSParserInternal.h"
#import "RSHTMLTag.h"
static NSString *urlStringFromDictionary(NSDictionary *d);
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString);
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString);
static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString);
static NSString *relValue(NSDictionary *d);
static BOOL typeIsFeedType(NSString *type);
static NSString *kIconRelValue = @"icon";
static NSString *kHrefKey = @"href";
static NSString *kSrcKey = @"src";
static NSString *kAppleTouchIconValue = @"apple-touch-icon";
static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed";
static NSString *kSizesKey = @"sizes";
static NSString *kTitleKey = @"title";
static NSString *kRelKey = @"rel";
static NSString *kAlternateKey = @"alternate";
static NSString *kRSSSuffix = @"/rss+xml";
static NSString *kAtomSuffix = @"/atom+xml";
static NSString *kJSONSuffix = @"/json";
static NSString *kTypeKey = @"type";
@interface RSHTMLMetadataAppleTouchIcon ()
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@interface RSHTMLMetadataFeedLink ()
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@interface RSHTMLMetadataFavicon ()
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@implementation RSHTMLMetadata
#pragma mark - Init
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
_baseURLString = urlString;
_tags = tags;
_favicons = [self resolvedFaviconLinks];
NSArray *appleTouchIconTags = [self appleTouchIconTags];
_appleTouchIcons = objectsOfClassWithTags([RSHTMLMetadataAppleTouchIcon class], appleTouchIconTags, urlString);
NSArray *feedLinkTags = [self feedLinkTags];
_feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString);
_openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags];
_twitterProperties = [[RSHTMLTwitterProperties alloc] initWithURLString:urlString tags:tags];
return self;
}
#pragma mark - Private
- (NSArray<RSHTMLTag *> *)linkTagsWithMatchingRel:(NSString *)valueToMatch {
// Case-insensitive; matches a whitespace-delimited word
NSMutableArray<RSHTMLTag *> *tags = [NSMutableArray array];
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink || RSParserStringIsEmpty(urlStringFromDictionary(tag.attributes))) {
continue;
}
NSString *oneRelValue = relValue(tag.attributes);
if (oneRelValue) {
NSArray *relValues = [oneRelValue componentsSeparatedByCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet];
for (NSString *relValue in relValues) {
if ([relValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) {
[tags addObject:tag];
break;
}
}
}
}
return tags;
}
- (NSArray<RSHTMLTag *> *)appleTouchIconTags {
NSMutableArray *tags = [NSMutableArray new];
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSString *oneRelValue = relValue(tag.attributes).lowercaseString;
if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) {
[tags addObject:tag];
}
}
return tags;
}
- (NSArray<RSHTMLTag *> *)feedLinkTags {
NSMutableArray *tags = [NSMutableArray new];
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSDictionary *oneDictionary = tag.attributes;
NSString *oneRelValue = relValue(oneDictionary).lowercaseString;
if (![oneRelValue isEqualToString:kAlternateKey]) {
continue;
}
NSString *oneType = [oneDictionary rsparser_objectForCaseInsensitiveKey:kTypeKey];
if (!typeIsFeedType(oneType)) {
continue;
}
if (RSParserStringIsEmpty(urlStringFromDictionary(oneDictionary))) {
continue;
}
[tags addObject:tag];
}
return tags;
}
- (NSArray<NSString *> *)faviconLinks {
NSMutableArray *urls = [NSMutableArray array];
for (RSHTMLMetadataFavicon *favicon in self.favicons) {
[urls addObject:favicon.urlString];
}
return urls;
}
- (NSArray<RSHTMLMetadataFavicon *> *)resolvedFaviconLinks {
NSArray<RSHTMLTag *> *tags = [self linkTagsWithMatchingRel:kIconRelValue];
NSMutableArray *links = [NSMutableArray array];
NSMutableSet<NSString *> *seenHrefs = [NSMutableSet setWithCapacity:tags.count];
for (RSHTMLTag *tag in tags) {
RSHTMLMetadataFavicon *link = [[RSHTMLMetadataFavicon alloc] initWithTag:tag baseURLString:self.baseURLString];
NSString *urlString = link.urlString;
if (urlString == nil) {
continue;
}
if (![seenHrefs containsObject:urlString]) {
[links addObject:link];
[seenHrefs addObject:urlString];
}
}
return links;
}
@end
static NSString *relValue(NSDictionary *d) {
return [d rsparser_objectForCaseInsensitiveKey:kRelKey];
}
static NSString *urlStringFromDictionary(NSDictionary *d) {
NSString *urlString = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
if (urlString) {
return urlString;
}
return [d rsparser_objectForCaseInsensitiveKey:kSrcKey];
}
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) {
NSURL *url = [NSURL URLWithString:baseURLString];
if (!url) {
return nil;
}
NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url];
return absoluteURL.absoluteURL.standardizedURL.absoluteString;
}
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) {
NSString *urlString = urlStringFromDictionary(d);
if (RSParserStringIsEmpty(urlString)) {
return nil;
}
return absoluteURLStringWithRelativeURLString(urlString, baseURLString);
}
static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString) {
NSMutableArray *objects = [NSMutableArray new];
for (RSHTMLTag *tag in tags) {
id oneObject = [[class alloc] initWithTag:tag baseURLString:baseURLString];
if (oneObject) {
[objects addObject:oneObject];
}
}
return objects;
}
static BOOL typeIsFeedType(NSString *type) {
type = type.lowercaseString;
return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix] || [type hasSuffix:kJSONSuffix];
}
@implementation RSHTMLMetadataAppleTouchIcon
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_sizes = [d rsparser_objectForCaseInsensitiveKey:kSizesKey];
_rel = [d rsparser_objectForCaseInsensitiveKey:kRelKey];
_size = CGSizeZero;
if (_sizes) {
NSArray *components = [_sizes componentsSeparatedByString:@"x"];
if (components.count == 2) {
CGFloat width = [components[0] floatValue];
CGFloat height = [components[1] floatValue];
_size = CGSizeMake(width, height);
}
}
return self;
}
@end
@implementation RSHTMLMetadataFeedLink
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_title = [d rsparser_objectForCaseInsensitiveKey:kTitleKey];
_type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey];
return self;
}
@end
@implementation RSHTMLMetadataFavicon
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey];
return self;
}
@end
@interface RSHTMLOpenGraphImage ()
@property (nonatomic, readwrite) NSString *url;
@property (nonatomic, readwrite) NSString *secureURL;
@property (nonatomic, readwrite) NSString *mimeType;
@property (nonatomic, readwrite) CGFloat width;
@property (nonatomic, readwrite) CGFloat height;
@property (nonatomic, readwrite) NSString *altText;
@end
@implementation RSHTMLOpenGraphImage
@end
@interface RSHTMLOpenGraphProperties ()
@property (nonatomic) NSMutableArray *ogImages;
@end
@implementation RSHTMLOpenGraphProperties
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
_ogImages = [NSMutableArray new];
[self parseTags:tags];
return self;
}
- (RSHTMLOpenGraphImage *)currentImage {
return self.ogImages.lastObject;
}
- (RSHTMLOpenGraphImage *)pushImage {
RSHTMLOpenGraphImage *image = [RSHTMLOpenGraphImage new];
[self.ogImages addObject:image];
return image;
}
- (RSHTMLOpenGraphImage *)ensureImage {
RSHTMLOpenGraphImage *image = [self currentImage];
if (image != nil) {
return image;
}
return [self pushImage];
}
- (NSArray *)images {
return self.ogImages;
}
static NSString *ogPrefix = @"og:";
static NSString *ogImage = @"og:image";
static NSString *ogImageURL = @"og:image:url";
static NSString *ogImageSecureURL = @"og:image:secure_url";
static NSString *ogImageType = @"og:image:type";
static NSString *ogImageWidth = @"og:image:width";
static NSString *ogImageHeight = @"og:image:height";
static NSString *ogImageAlt = @"og:image:alt";
static NSString *ogPropertyKey = @"property";
static NSString *ogContentKey = @"content";
- (void)parseTags:(NSArray *)tags {
for (RSHTMLTag *tag in tags) {
if (tag.type != RSHTMLTagTypeMeta) {
continue;
}
NSString *propertyName = tag.attributes[ogPropertyKey];
if (!propertyName || ![propertyName hasPrefix:ogPrefix]) {
continue;
}
NSString *content = tag.attributes[ogContentKey];
if (!content) {
continue;
}
if ([propertyName isEqualToString:ogImage]) {
RSHTMLOpenGraphImage *image = [self currentImage];
if (!image || image.url) { // Most likely case, since og:image will probably appear before other image attributes.
image = [self pushImage];
}
image.url = content;
}
else if ([propertyName isEqualToString:ogImageURL]) {
[self ensureImage].url = content;
}
else if ([propertyName isEqualToString:ogImageSecureURL]) {
[self ensureImage].secureURL = content;
}
else if ([propertyName isEqualToString:ogImageType]) {
[self ensureImage].mimeType = content;
}
else if ([propertyName isEqualToString:ogImageAlt]) {
[self ensureImage].altText = content;
}
else if ([propertyName isEqualToString:ogImageWidth]) {
[self ensureImage].width = [content floatValue];
}
else if ([propertyName isEqualToString:ogImageHeight]) {
[self ensureImage].height = [content floatValue];
}
}
}
@end
@implementation RSHTMLTwitterProperties
static NSString *twitterNameKey = @"name";
static NSString *twitterContentKey = @"content";
static NSString *twitterImageSrc = @"twitter:image:src";
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
for (RSHTMLTag *tag in tags) {
if (tag.type != RSHTMLTagTypeMeta) {
continue;
}
NSString *name = tag.attributes[twitterNameKey];
if (!name || ![name isEqualToString:twitterImageSrc]) {
continue;
}
NSString *content = tag.attributes[twitterContentKey];
if (!content || content.length < 1) {
continue;
}
_imageURL = content;
break;
}
return self;
}
@end

View File

@ -0,0 +1,24 @@
//
// RSHTMLMetadataParser.h
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSHTMLMetadata;
@class ParserData;
NS_ASSUME_NONNULL_BEGIN
@interface RSHTMLMetadataParser : NSObject
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,151 @@
//
// RSHTMLMetadataParser.m
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLMetadataParser.h"
#import "RSHTMLMetadata.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import "ParserData.h"
#import "RSHTMLTag.h"
#import <libxml/xmlstring.h>
@interface RSHTMLMetadataParser () <RSSAXHTMLParserDelegate>
@property (nonatomic, readonly) ParserData *parserData;
@property (nonatomic, readwrite) RSHTMLMetadata *metadata;
@property (nonatomic) NSMutableArray *tags;
@property (nonatomic) BOOL didFinishParsing;
@property (nonatomic) BOOL shouldScanPastHeadSection;
@end
@implementation RSHTMLMetadataParser
#pragma mark - Class Methods
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData {
RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData];
return parser.metadata;
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
NSParameterAssert(parserData.data);
NSParameterAssert(parserData.url);
self = [super init];
if (!self) {
return nil;
}
_parserData = parserData;
_tags = [NSMutableArray new];
// YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section.
// This allows for a special case where we continue to scan after the head section.
// (Yes, this match could yield false positives, but its harmless.)
_shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound;
[self parse];
return self;
}
#pragma mark - Parse
- (void)parse {
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
[parser parseData:self.parserData.data];
[parser finishParsing];
self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags];
}
static NSString *kHrefKey = @"href";
static NSString *kSrcKey = @"src";
static NSString *kRelKey = @"rel";
- (NSString *)linkForDictionary:(NSDictionary *)d {
NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
if (link) {
return link;
}
return [d rsparser_objectForCaseInsensitiveKey:kSrcKey];
}
- (void)handleLinkAttributes:(NSDictionary *)d {
if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) {
return;
}
if (RSParserStringIsEmpty([self linkForDictionary:d])) {
return;
}
RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d];
[self.tags addObject:tag];
}
- (void)handleMetaAttributes:(NSDictionary *)d {
RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d];
[self.tags addObject:tag];
}
#pragma mark - RSSAXHTMLParserDelegate
static const char *kBody = "body";
static const NSInteger kBodyLength = 5;
static const char *kLink = "link";
static const NSInteger kLinkLength = 5;
static const char *kMeta = "meta";
static const NSInteger kMetaLength = 5;
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
if (self.didFinishParsing) {
return;
}
if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) {
self.didFinishParsing = YES;
return;
}
if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParserObjectIsEmpty(d)) {
[self handleLinkAttributes:d];
}
return;
}
if (RSSAXEqualTags(localName, kMeta, kMetaLength)) {
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParserObjectIsEmpty(d)) {
[self handleMetaAttributes:d];
}
}
}
@end

View File

@ -0,0 +1,33 @@
//
// RSHTMLTag.h
// RSParser
//
// Created by Brent Simmons on 11/26/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
extern NSString *RSHTMLTagNameLink; // @"link"
extern NSString *RSHTMLTagNameMeta; // @"meta"
typedef NS_ENUM(NSInteger, RSHTMLTagType) {
RSHTMLTagTypeLink,
RSHTMLTagTypeMeta
};
@interface RSHTMLTag : NSObject
- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes;
+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes;
+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes;
@property (nonatomic, readonly) RSHTMLTagType type;
@property (nonatomic, readonly) NSDictionary *attributes;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,43 @@
//
// RSHTMLTag.m
// RSParser
//
// Created by Brent Simmons on 11/26/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLTag.h"
NSString *RSHTMLTagNameLink = @"link";
NSString *RSHTMLTagNameMeta = @"meta";
@implementation RSHTMLTag
- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes {
self = [super init];
if (!self) {
return nil;
}
_type = type;
_attributes = attributes;
return self;
}
+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes {
return [[self alloc] initWithType:RSHTMLTagTypeLink attributes:attributes];
}
+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes {
return [[self alloc] initWithType:RSHTMLTagTypeMeta attributes:attributes];
}
- (NSString *)description {
return [NSString stringWithFormat:@"<%@: %p> type: %ld attributes: %@", NSStringFromClass([self class]), self, (long)self.type, self.attributes];
}
@end

View File

@ -0,0 +1,36 @@
//
// RSOPMLAttributes.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
// OPML allows for arbitrary attributes.
// These are the common attributes in OPML files used as RSS subscription lists.
extern NSString *OPMLTextKey; //text
extern NSString *OPMLTitleKey; //title
extern NSString *OPMLDescriptionKey; //description
extern NSString *OPMLTypeKey; //type
extern NSString *OPMLVersionKey; //version
extern NSString *OPMLHMTLURLKey; //htmlUrl
extern NSString *OPMLXMLURLKey; //xmlUrl
@interface NSDictionary (RSOPMLAttributes)
// A frequent error in OPML files is to mess up the capitalization,
// so these do a case-insensitive lookup.
@property (nonatomic, readonly) NSString *opml_text;
@property (nonatomic, readonly) NSString *opml_title;
@property (nonatomic, readonly) NSString *opml_description;
@property (nonatomic, readonly) NSString *opml_type;
@property (nonatomic, readonly) NSString *opml_version;
@property (nonatomic, readonly) NSString *opml_htmlUrl;
@property (nonatomic, readonly) NSString *opml_xmlUrl;
@end

View File

@ -0,0 +1,68 @@
//
// RSOPMLAttributes.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLAttributes.h"
#import "RSParserInternal.h"
NSString *OPMLTextKey = @"text";
NSString *OPMLTitleKey = @"title";
NSString *OPMLDescriptionKey = @"description";
NSString *OPMLTypeKey = @"type";
NSString *OPMLVersionKey = @"version";
NSString *OPMLHMTLURLKey = @"htmlUrl";
NSString *OPMLXMLURLKey = @"xmlUrl";
@implementation NSDictionary (RSOPMLAttributes)
- (NSString *)opml_text {
return [self rsparser_objectForCaseInsensitiveKey:OPMLTextKey];
}
- (NSString *)opml_title {
return [self rsparser_objectForCaseInsensitiveKey:OPMLTitleKey];
}
- (NSString *)opml_description {
return [self rsparser_objectForCaseInsensitiveKey:OPMLDescriptionKey];
}
- (NSString *)opml_type {
return [self rsparser_objectForCaseInsensitiveKey:OPMLTypeKey];
}
- (NSString *)opml_version {
return [self rsparser_objectForCaseInsensitiveKey:OPMLVersionKey];
}
- (NSString *)opml_htmlUrl {
return [self rsparser_objectForCaseInsensitiveKey:OPMLHMTLURLKey];
}
- (NSString *)opml_xmlUrl {
return [self rsparser_objectForCaseInsensitiveKey:OPMLXMLURLKey];
}
@end

View File

@ -0,0 +1,21 @@
//
// RSOPMLDocument.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
#import "RSOPMLItem.h"
@interface RSOPMLDocument : RSOPMLItem
@property (nonatomic) NSString *title;
@property (nonatomic) NSString *url;
@end

View File

@ -0,0 +1,14 @@
//
// RSOPMLDocument.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLDocument.h"
@implementation RSOPMLDocument
@end

View File

@ -0,0 +1,19 @@
//
// RSOPMLError.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
extern NSString *RSOPMLErrorDomain;
typedef NS_ENUM(NSInteger, RSOPMLErrorCode) {
RSOPMLErrorCodeDataIsWrongFormat = 1024
};
NSError *RSOPMLWrongFormatError(NSString *fileName);

View File

@ -0,0 +1,22 @@
//
// RSOPMLError.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLError.h"
NSString *RSOPMLErrorDomain = @"com.ranchero.OPML";
NSError *RSOPMLWrongFormatError(NSString *fileName) {
NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file %@ cant be parsed because its not an OPML file.", @"OPML wrong format");
NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName];
NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format");
NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString};
return [[NSError alloc] initWithDomain:RSOPMLErrorDomain code:RSOPMLErrorCodeDataIsWrongFormat userInfo:userInfo];
}

View File

@ -0,0 +1,24 @@
//
// RSOPMLFeedSpecifier.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface RSOPMLFeedSpecifier : NSObject
- (instancetype)initWithTitle:(NSString * _Nullable)title feedDescription:(NSString * _Nullable)feedDescription homePageURL:(NSString * _Nullable)homePageURL feedURL:(NSString *)feedURL;
@property (nonatomic, nullable, readonly) NSString *title;
@property (nonatomic, nullable, readonly) NSString *feedDescription;
@property (nonatomic, nullable, readonly) NSString *homePageURL;
@property (nonatomic, readonly) NSString *feedURL;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,51 @@
//
// RSOPMLFeedSpecifier.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLFeedSpecifier.h"
#import "RSParserInternal.h"
@implementation RSOPMLFeedSpecifier
- (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL {
NSParameterAssert(!RSParserStringIsEmpty(feedURL));
self = [super init];
if (!self) {
return nil;
}
if (RSParserStringIsEmpty(title)) {
_title = nil;
}
else {
_title = title;
}
if (RSParserStringIsEmpty(feedDescription)) {
_feedDescription = nil;
}
else {
_feedDescription = feedDescription;
}
if (RSParserStringIsEmpty(homePageURL)) {
_homePageURL = nil;
}
else {
_homePageURL = homePageURL;
}
_feedURL = feedURL;
return self;
}
@end

View File

@ -0,0 +1,30 @@
//
// RSOPMLItem.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSOPMLFeedSpecifier;
NS_ASSUME_NONNULL_BEGIN
@interface RSOPMLItem : NSObject
@property (nonatomic, nullable) NSDictionary *attributes;
@property (nonatomic, nullable) NSArray <RSOPMLItem *> *children;
- (void)addChild:(RSOPMLItem *)child;
@property (nonatomic, nullable, readonly) RSOPMLFeedSpecifier *feedSpecifier;
@property (nonatomic, nullable, readonly) NSString *titleFromAttributes;
@property (nonatomic, readonly) BOOL isFolder;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,87 @@
//
// RSOPMLItem.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLItem.h"
#import "RSOPMLAttributes.h"
#import "RSOPMLFeedSpecifier.h"
#import "RSParserInternal.h"
@interface RSOPMLItem ()
@property (nonatomic) NSMutableArray *mutableChildren;
@end
@implementation RSOPMLItem
@synthesize children = _children;
@synthesize feedSpecifier = _feedSpecifier;
- (NSArray *)children {
return [self.mutableChildren copy];
}
- (void)setChildren:(NSArray *)children {
_children = children;
self.mutableChildren = [_children mutableCopy];
}
- (void)addChild:(RSOPMLItem *)child {
if (!self.mutableChildren) {
self.mutableChildren = [NSMutableArray new];
}
[self.mutableChildren addObject:child];
}
- (RSOPMLFeedSpecifier *)feedSpecifier {
if (_feedSpecifier) {
return _feedSpecifier;
}
NSString *feedURL = self.attributes.opml_xmlUrl;
if (RSParserObjectIsEmpty(feedURL)) {
return nil;
}
_feedSpecifier = [[RSOPMLFeedSpecifier alloc] initWithTitle:self.titleFromAttributes feedDescription:self.attributes.opml_description homePageURL:self.attributes.opml_htmlUrl feedURL:feedURL];
return _feedSpecifier;
}
- (NSString *)titleFromAttributes {
NSString *title = self.attributes.opml_title;
if (title) {
return title;
}
title = self.attributes.opml_text;
if (title) {
return title;
}
return nil;
}
- (BOOL)isFolder {
return self.mutableChildren.count > 0;
}
@end

View File

@ -0,0 +1,26 @@
//
// RSOPMLParser.h
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class ParserData;
@class RSOPMLDocument;
typedef void (^OPMLParserCallback)(RSOPMLDocument *opmlDocument, NSError *error);
// Parses on background thread; calls back on main thread.
void RSParseOPML(ParserData *parserData, OPMLParserCallback callback);
@interface RSOPMLParser: NSObject
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error;
@end

View File

@ -0,0 +1,310 @@
//
// RSOPMLParser.m
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLParser.h"
#import "RSSAXParser.h"
#import "RSOPMLItem.h"
#import "RSOPMLDocument.h"
#import "RSOPMLAttributes.h"
#import "RSOPMLError.h"
#import "RSOPMLParser.h"
#import "ParserData.h"
#import <libxml/xmlstring.h>
@interface RSOPMLParser () <RSSAXParserDelegate>
@property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument;
@property (nonatomic, readwrite) NSError *error;
@property (nonatomic) NSMutableArray *itemStack;
@end
void RSParseOPML(ParserData *parserData, OPMLParserCallback callback) {
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
@autoreleasepool {
NSError *error = nil;
RSOPMLDocument *opmlDocument = [RSOPMLParser parseOPMLWithParserData:parserData error:&error];
dispatch_async(dispatch_get_main_queue(), ^{
callback(opmlDocument, error);
});
}
});
}
@implementation RSOPMLParser
#pragma mark - Class Methods
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error {
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData];
RSOPMLDocument *document = parser.OPMLDocument;
document.url = parserData.url;
if (parser.error && error) {
*error = parser.error;
return nil;
}
return document;
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
[self parse:parserData];
return self;
}
#pragma mark - Private
- (void)parse:(ParserData *)parserData {
@autoreleasepool {
if (![self canParseData:parserData.data]) {
NSString *filename = nil;
NSURL *url = [NSURL URLWithString:parserData.url];
if (url && url.isFileURL) {
filename = url.path.lastPathComponent;
}
if ([parserData.url hasPrefix:@"http"]) {
filename = parserData.url;
}
if (!filename) {
filename = parserData.url;
}
self.error = RSOPMLWrongFormatError(filename);
return;
}
RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self];
self.itemStack = [NSMutableArray new];
self.OPMLDocument = [RSOPMLDocument new];
[self pushItem:self.OPMLDocument];
[parser parseData:parserData.data];
[parser finishParsing];
}
}
- (BOOL)canParseData:(NSData *)d {
// Check for <opml and <outline near the top.
@autoreleasepool {
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)d.bytes length:d.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (!s) {
NSDictionary *options = @{NSStringEncodingDetectionSuggestedEncodingsKey : @[@(NSUTF8StringEncoding)]};
(void)[NSString stringEncodingForData:d encodingOptions:options convertedString:&s usedLossyConversion:nil];
}
if (!s) {
return NO;
}
static const NSInteger numberOfCharactersToSearch = 4096;
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
if (s.length < numberOfCharactersToSearch) {
rangeToSearch.length = s.length;
}
NSRange opmlRange = [s rangeOfString:@"<opml" options:NSCaseInsensitiveSearch range:rangeToSearch];
if (opmlRange.length < 1) {
return NO;
}
}
return YES;
}
- (void)pushItem:(RSOPMLItem *)item {
[self.itemStack addObject:item];
}
- (void)popItem {
NSAssert(self.itemStack.count > 0, nil);
/*If itemStack is empty, bad things are happening.
But we still shouldn't crash in production.*/
if (self.itemStack.count > 0) {
[self.itemStack removeLastObject];
}
}
- (RSOPMLItem *)currentItem {
return self.itemStack.lastObject;
}
#pragma mark - RSSAXParserDelegate
static const char *kOutline = "outline";
static const char kOutlineLength = 8;
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
[SAXParser beginStoringCharacters];
return;
}
if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
return;
}
RSOPMLItem *item = [RSOPMLItem new];
item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
[[self currentItem] addChild:item];
[self pushItem:item];
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
RSOPMLItem* item = [self currentItem];
if ([item isKindOfClass:[RSOPMLDocument class]]) {
((RSOPMLDocument *)item).title = SAXParser.currentStringWithTrimmedWhitespace;
}
return;
}
if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
[self popItem];
}
}
static const char *kText = "text";
static const NSInteger kTextLength = 5;
static const char *kTitle = "title";
static const NSInteger kTitleLength = 6;
static const char *kDescription = "description";
static const NSInteger kDescriptionLength = 12;
static const char *kType = "type";
static const NSInteger kTypeLength = 5;
static const char *kVersion = "version";
static const NSInteger kVersionLength = 8;
static const char *kHTMLURL = "htmlUrl";
static const NSInteger kHTMLURLLength = 8;
static const char *kXMLURL = "xmlUrl";
static const NSInteger kXMLURLLength = 7;
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
if (prefix) {
return nil;
}
size_t nameLength = strlen((const char *)name);
if (nameLength == kTextLength - 1) {
if (RSSAXEqualTags(name, kText, kTextLength)) {
return OPMLTextKey;
}
if (RSSAXEqualTags(name, kType, kTypeLength)) {
return OPMLTypeKey;
}
}
else if (nameLength == kTitleLength - 1) {
if (RSSAXEqualTags(name, kTitle, kTitleLength)) {
return OPMLTitleKey;
}
}
else if (nameLength == kXMLURLLength - 1) {
if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) {
return OPMLXMLURLKey;
}
}
else if (nameLength == kVersionLength - 1) {
if (RSSAXEqualTags(name, kVersion, kVersionLength)) {
return OPMLVersionKey;
}
if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) {
return OPMLHMTLURLKey;
}
}
else if (nameLength == kDescriptionLength - 1) {
if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) {
return OPMLDescriptionKey;
}
}
return nil;
}
static const char *kRSSUppercase = "RSS";
static const char *kRSSLowercase = "rss";
static const NSUInteger kRSSLength = 3;
static NSString *RSSUppercaseValue = @"RSS";
static NSString *RSSLowercaseValue = @"rss";
static NSString *emptyString = @"";
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
return memcmp(bytes1, bytes2, length) == 0;
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
if (length < 1) {
return emptyString;
}
if (length == kRSSLength) {
if (equalBytes(bytes, kRSSUppercase, kRSSLength)) {
return RSSUppercaseValue;
}
else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) {
return RSSLowercaseValue;
}
}
return nil;
}
@end

View File

@ -0,0 +1,37 @@
//
// RSParsedArticle.h
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
//
@import Foundation;
@class RSParsedEnclosure;
@class RSParsedAuthor;
@interface RSParsedArticle : NSObject
- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL;
@property (nonatomic, readonly, nonnull) NSString *feedURL;
@property (nonatomic, nonnull) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.)
@property (nonatomic, nullable) NSString *guid;
@property (nonatomic, nullable) NSString *title;
@property (nonatomic, nullable) NSString *body;
@property (nonatomic, nullable) NSString *link;
@property (nonatomic, nullable) NSString *permalink;
@property (nonatomic, nullable) NSSet<RSParsedAuthor *> *authors;
@property (nonatomic, nullable) NSSet<RSParsedEnclosure *> *enclosures;
@property (nonatomic, nullable) NSDate *datePublished;
@property (nonatomic, nullable) NSDate *dateModified;
@property (nonatomic, nonnull) NSDate *dateParsed;
@property (nonatomic, nullable) NSString *language;
- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure;
- (void)addAuthor:(RSParsedAuthor *_Nonnull)author;
@end

View File

@ -0,0 +1,134 @@
//
// RSParsedArticle.m
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
//
#import "RSParsedArticle.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
#import "RSParsedAuthor.h"
#import "RSParsedEnclosure.h"
@implementation RSParsedArticle
#pragma mark - Init
- (instancetype)initWithFeedURL:(NSString *)feedURL {
NSParameterAssert(feedURL != nil);
self = [super init];
if (!self) {
return nil;
}
_feedURL = feedURL;
_dateParsed = [NSDate date];
return self;
}
#pragma mark - Enclosures
- (void)addEnclosure:(RSParsedEnclosure *)enclosure {
if (self.enclosures) {
self.enclosures = [self.enclosures setByAddingObject:enclosure];
}
else {
self.enclosures = [NSSet setWithObject:enclosure];
}
}
#pragma mark - Authors
- (void)addAuthor:(RSParsedAuthor *)author {
if (self.authors) {
self.authors = [self.authors setByAddingObject:author];
}
else {
self.authors = [NSSet setWithObject:author];
}
}
#pragma mark - articleID
- (NSString *)articleID {
if (self.guid) {
return self.guid;
}
if (!_articleID) {
_articleID = [self calculatedArticleID];
}
return _articleID;
}
- (NSString *)calculatedArticleID {
/*Concatenate a combination of properties when no guid. Then hash the result.
In general, feeds should have guids. When they don't, re-runs are very likely,
because there's no other 100% reliable way to determine identity.
This is intended to create an ID unique inside a feed, but not globally unique.
Not suitable for a database ID, in other words.*/
NSMutableString *s = [NSMutableString stringWithString:@""];
NSString *datePublishedTimeStampString = nil;
if (self.datePublished) {
datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970];
}
// Ideally we have a permalink and a pubDate. Either one would probably be a good guid, but together they should be rock-solid. (In theory. Feeds are buggy, though.)
if (!RSParserStringIsEmpty(self.permalink) && datePublishedTimeStampString) {
[s appendString:self.permalink];
[s appendString:datePublishedTimeStampString];
}
else if (!RSParserStringIsEmpty(self.link) && datePublishedTimeStampString) {
[s appendString:self.link];
[s appendString:datePublishedTimeStampString];
}
else if (!RSParserStringIsEmpty(self.title) && datePublishedTimeStampString) {
[s appendString:self.title];
[s appendString:datePublishedTimeStampString];
}
else if (datePublishedTimeStampString) {
[s appendString:datePublishedTimeStampString];
}
else if (!RSParserStringIsEmpty(self.permalink)) {
[s appendString:self.permalink];
}
else if (!RSParserStringIsEmpty(self.link)) {
[s appendString:self.link];
}
else if (!RSParserStringIsEmpty(self.title)) {
[s appendString:self.title];
}
else if (!RSParserStringIsEmpty(self.body)) {
[s appendString:self.body];
}
return [s rsparser_md5Hash];
}
@end

View File

@ -0,0 +1,19 @@
//
// RSParsedAuthor.h
// RSParserTests
//
// Created by Brent Simmons on 12/19/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@interface RSParsedAuthor : NSObject
@property (nonatomic, nullable) NSString *name;
@property (nonatomic, nullable) NSString *emailAddress;
@property (nonatomic, nullable) NSString *url;
+ (instancetype _Nonnull )authorWithSingleString:(NSString *_Nonnull)s; // Dont know which property it is. Guess based on contents of the string. Common with RSS.
@end

View File

@ -0,0 +1,34 @@
//
// RSParsedAuthor.m
// RSParserTests
//
// Created by Brent Simmons on 12/19/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "NSString+RSParser.h"
#import "RSParsedAuthor.h"
@implementation RSParsedAuthor
+ (instancetype)authorWithSingleString:(NSString *)s {
// The author element in RSS is supposed to be email address but often its a name, and sometimes a URL.
RSParsedAuthor *author = [[self alloc] init];
if ([s rsparser_contains:@"@"]) {
author.emailAddress = s;
}
else if ([s.lowercaseString hasPrefix:@"http"]) {
author.url = s;
}
else {
author.name = s;
}
return author;
}
@end

View File

@ -0,0 +1,22 @@
//
// RSParsedEnclosure.h
// RSParser
//
// Created by Brent Simmons on 12/18/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface RSParsedEnclosure : NSObject
@property (nonatomic) NSString *url;
@property (nonatomic) NSInteger length;
@property (nonatomic, nullable) NSString *mimeType;
@property (nonatomic, nullable) NSString *title;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,13 @@
//
// RSParsedEnclosure.m
// RSParser
//
// Created by Brent Simmons on 12/18/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "RSParsedEnclosure.h"
@implementation RSParsedEnclosure
@end

View File

@ -0,0 +1,23 @@
//
// RSParsedFeed.h
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSParsedArticle;
@interface RSParsedFeed : NSObject
- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link language:(NSString * _Nullable)language articles:(NSArray <RSParsedArticle *>* _Nonnull)articles;
@property (nonatomic, readonly, nonnull) NSString *urlString;
@property (nonatomic, readonly, nullable) NSString *title;
@property (nonatomic, readonly, nullable) NSString *link;
@property (nonatomic, readonly, nullable) NSString *language;
@property (nonatomic, readonly, nonnull) NSSet <RSParsedArticle *>*articles;
@end

View File

@ -0,0 +1,32 @@
//
// RSParsedFeed.m
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSParsedFeed.h"
@implementation RSParsedFeed
- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link language:(NSString *)language articles:(NSSet *)articles {
self = [super init];
if (!self) {
return nil;
}
_urlString = urlString;
_title = title;
_link = link;
_language = language;
_articles = articles;
return self;
}
@end

View File

@ -0,0 +1,24 @@
//
// RSParserInternal.h
// RSParser
//
// Created by Brent Simmons on 12/26/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
BOOL RSParserObjectIsEmpty(id _Nullable obj);
BOOL RSParserStringIsEmpty(NSString * _Nullable s);
@interface NSDictionary (RSParserInternal)
- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,61 @@
//
// RSParserInternal.m
// RSParser
//
// Created by Brent Simmons on 12/26/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSParserInternal.h"
#import <CommonCrypto/CommonDigest.h>
static BOOL RSParserIsNil(id obj) {
return obj == nil || obj == [NSNull null];
}
BOOL RSParserObjectIsEmpty(id obj) {
if (RSParserIsNil(obj)) {
return YES;
}
if ([obj respondsToSelector:@selector(count)]) {
return [obj count] < 1;
}
if ([obj respondsToSelector:@selector(length)]) {
return [obj length] < 1;
}
return NO; /*Shouldn't get here very often.*/
}
BOOL RSParserStringIsEmpty(NSString *s) {
return RSParserIsNil(s) || s.length < 1;
}
@implementation NSDictionary (RSParserInternal)
- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key {
id obj = self[key];
if (obj) {
return obj;
}
for (NSString *oneKey in self.allKeys) {
if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) {
return self[oneKey];
}
}
return nil;
}
@end

View File

@ -0,0 +1,19 @@
//
// RSRSSParser.h
// RSParser
//
// Created by Brent Simmons on 1/6/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
@import Foundation;
@class ParserData;
@class RSParsedFeed;
@interface RSRSSParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@end

523
Parser/Sources/ObjC/RSRSSParser.m Executable file
View File

@ -0,0 +1,523 @@
//
// RSRSSParser.m
// RSParser
//
// Created by Brent Simmons on 1/6/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "RSRSSParser.h"
#import "RSSAXParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import "ParserData.h"
#import "RSParsedEnclosure.h"
#import "RSParsedAuthor.h"
#import <libxml/xmlstring.h>
@interface RSRSSParser () <RSSAXParserDelegate>
@property (nonatomic) NSData *feedData;
@property (nonatomic) NSString *urlString;
@property (nonatomic) NSDictionary *currentAttributes;
@property (nonatomic) RSSAXParser *parser;
@property (nonatomic) NSMutableArray *articles;
@property (nonatomic) BOOL parsingArticle;
@property (nonatomic) BOOL parsingAuthor;
@property (nonatomic, readonly) RSParsedArticle *currentArticle;
@property (nonatomic) BOOL parsingChannelImage;
@property (nonatomic, readonly) NSDate *currentDate;
@property (nonatomic) BOOL endRSSFound;
@property (nonatomic) NSString *link;
@property (nonatomic) NSString *title;
@property (nonatomic) NSDate *dateParsed;
@property (nonatomic) BOOL isRDF;
@property (nonatomic) NSString *language;
@end
@implementation RSRSSParser
#pragma mark - Class Methods
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_articles = [NSMutableArray new];
return self;
}
#pragma mark - API
- (RSParsedFeed *)parseFeed {
[self parse];
RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles];
return parsedFeed;
}
#pragma mark - Constants
static NSString *kIsPermaLinkKey = @"isPermaLink";
static NSString *kURLKey = @"url";
static NSString *kLengthKey = @"length";
static NSString *kTypeKey = @"type";
static NSString *kFalseValue = @"false";
static NSString *kTrueValue = @"true";
static NSString *kContentEncodedKey = @"content:encoded";
static NSString *kDCDateKey = @"dc:date";
static NSString *kDCCreatorKey = @"dc:creator";
static NSString *kRDFAboutKey = @"rdf:about";
static const char *kItem = "item";
static const NSInteger kItemLength = 5;
static const char *kImage = "image";
static const NSInteger kImageLength = 6;
static const char *kLink = "link";
static const NSInteger kLinkLength = 5;
static const char *kTitle = "title";
static const NSInteger kTitleLength = 6;
static const char *kDC = "dc";
static const NSInteger kDCLength = 3;
static const char *kCreator = "creator";
static const NSInteger kCreatorLength = 8;
static const char *kDate = "date";
static const NSInteger kDateLength = 5;
static const char *kContent = "content";
static const NSInteger kContentLength = 8;
static const char *kEncoded = "encoded";
static const NSInteger kEncodedLength = 8;
static const char *kGuid = "guid";
static const NSInteger kGuidLength = 5;
static const char *kPubDate = "pubDate";
static const NSInteger kPubDateLength = 8;
static const char *kAuthor = "author";
static const NSInteger kAuthorLength = 7;
static const char *kDescription = "description";
static const NSInteger kDescriptionLength = 12;
static const char *kRSS = "rss";
static const NSInteger kRSSLength = 4;
static const char *kURL = "url";
static const NSInteger kURLLength = 4;
static const char *kLength = "length";
static const NSInteger kLengthLength = 7;
static const char *kType = "type";
static const NSInteger kTypeLength = 5;
static const char *kIsPermaLink = "isPermaLink";
static const NSInteger kIsPermaLinkLength = 12;
static const char *kRDF = "rdf";
static const NSInteger kRDFlength = 4;
static const char *kAbout = "about";
static const NSInteger kAboutLength = 6;
static const char *kFalse = "false";
static const NSInteger kFalseLength = 6;
static const char *kTrue = "true";
static const NSInteger kTrueLength = 5;
static const char *kUppercaseRDF = "RDF";
static const NSInteger kUppercaseRDFLength = 4;
static const char *kEnclosure = "enclosure";
static const NSInteger kEnclosureLength = 10;
static const char *kLanguage = "language";
static const NSInteger kLanguageLength = 9;
#pragma mark - Parsing
- (void)parse {
self.dateParsed = [NSDate date];
@autoreleasepool {
[self.parser parseData:self.feedData];
[self.parser finishParsing];
}
}
- (void)addArticle {
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString];
article.dateParsed = self.dateParsed;
[self.articles addObject:article];
}
- (RSParsedArticle *)currentArticle {
return self.articles.lastObject;
}
- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
if (prefix != NULL) {
return;
}
if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
if (!self.link) {
self.link = [self currentString];
}
}
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
self.title = [self currentString];
}
else if (RSSAXEqualTags(localName, kLanguage, kLanguageLength)) {
self.language = [self currentString];
}
}
- (void)addAuthorWithString:(NSString *)authorString {
if (RSParserStringIsEmpty(authorString)) {
return;
}
RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:[self currentString]];
[self.currentArticle addAuthor:author];
}
- (void)addDCElement:(const xmlChar *)localName {
if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) {
[self addAuthorWithString:[self currentString]];
}
else if (RSSAXEqualTags(localName, kDate, kDateLength)) {
self.currentArticle.datePublished = self.currentDate;
}
}
- (void)addGuid {
NSString *guid = [self currentString];
self.currentArticle.guid = guid;
NSString *isPermaLinkValue = [self.currentAttributes rsparser_objectForCaseInsensitiveKey:@"ispermalink"];
if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) {
if ([self stringIsProbablyAURLOrRelativePath:guid]) {
self.currentArticle.permalink = [self urlString:guid];
}
}
}
- (void)addEnclosure {
NSDictionary *attributes = self.currentAttributes;
NSString *url = attributes[kURLKey];
if (!url || url.length < 1) {
return;
}
RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init];
enclosure.url = url;
enclosure.length = [attributes[kLengthKey] integerValue];
enclosure.mimeType = attributes[kTypeKey];
[self.currentArticle addEnclosure:enclosure];
}
- (BOOL)stringIsProbablyAURLOrRelativePath:(NSString *)s {
/*The RSS guid is defined as a permalink, except when it appears like this:
<guid isPermaLink="false">someidentifier</guid>
However, people often seem to think its *not* a permalink by default, even
though it is. So we try to detect the situation where the value is not a URL string,
and not even a relative path. This may need to evolve over time as we find
feeds broken in different ways.*/
if (![s rsparser_contains:@"/"]) {
// This seems to be just about the best possible check.
// Bad guids are often just integers, for instance.
return NO;
}
if ([s.lowercaseString hasPrefix:@"tag:"]) { // A common non-URL guid form
return NO;
}
return YES;
}
- (NSString *)urlString:(NSString *)s {
/*Resolve against home page URL (if available) or feed URL.*/
if ([[s lowercaseString] hasPrefix:@"http"]) {
return s;
}
if (!self.link) {
//TODO: get feed URL and use that to resolve URL.*/
return s;
}
NSURL *baseURL = [NSURL URLWithString:self.link];
if (!baseURL) {
return s;
}
NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL];
if (resolvedURL.absoluteString) {
return resolvedURL.absoluteString;
}
return s;
}
- (NSString *)currentString {
return self.parser.currentStringWithTrimmedWhitespace;
}
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
if (RSSAXEqualTags(prefix, kDC, kDCLength)) {
[self addDCElement:localName];
return;
}
if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) {
NSString *s = [self currentString];
if (!RSParserStringIsEmpty(s)) {
self.currentArticle.body = s;
}
return;
}
if (prefix != NULL) {
return;
}
if (RSSAXEqualTags(localName, kGuid, kGuidLength)) {
[self addGuid];
}
else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) {
self.currentArticle.datePublished = self.currentDate;
}
else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
[self addAuthorWithString:[self currentString]];
}
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
self.currentArticle.link = [self urlString:[self currentString]];
}
else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) {
if (!self.currentArticle.body) {
self.currentArticle.body = [self currentString];
}
}
else if (!self.parsingAuthor && RSSAXEqualTags(localName, kTitle, kTitleLength)) {
NSString *articleTitle = [self currentString];
if (articleTitle != nil) {
self.currentArticle.title = articleTitle;
}
}
else if (RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) {
[self addEnclosure];
}
}
- (NSDate *)currentDate {
return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length);
}
#pragma mark - RSSAXParserDelegate
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
if (self.endRSSFound) {
return;
}
if (RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) {
self.isRDF = YES;
return;
}
NSDictionary *xmlAttributes = nil;
if ((self.isRDF && RSSAXEqualTags(localName, kItem, kItemLength)) || RSSAXEqualTags(localName, kGuid, kGuidLength) || RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) {
xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
}
if (self.currentAttributes != xmlAttributes) {
self.currentAttributes = xmlAttributes;
}
if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) {
[self addArticle];
self.parsingArticle = YES;
if (self.isRDF && xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/
self.currentArticle.guid = xmlAttributes[kRDFAboutKey];
self.currentArticle.permalink = self.currentArticle.guid;
}
}
else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) {
self.parsingChannelImage = YES;
}
else if (!prefix && RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
if (self.parsingArticle) {
self.parsingAuthor = true;
}
}
if (!self.parsingChannelImage) {
[self.parser beginStoringCharacters];
}
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
if (self.endRSSFound) {
return;
}
if (self.isRDF && RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) {
self.endRSSFound = YES;
}
else if (RSSAXEqualTags(localName, kRSS, kRSSLength)) {
self.endRSSFound = YES;
}
else if (RSSAXEqualTags(localName, kImage, kImageLength)) {
self.parsingChannelImage = NO;
}
else if (RSSAXEqualTags(localName, kItem, kItemLength)) {
self.parsingArticle = NO;
}
else if (self.parsingArticle) {
[self addArticleElement:localName prefix:prefix];
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.parsingAuthor = NO;
}
}
else if (!self.parsingChannelImage) {
[self addFeedElement:localName prefix:prefix];
}
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) {
if (RSSAXEqualTags(name, kAbout, kAboutLength)) {
return kRDFAboutKey;
}
return nil;
}
if (prefix) {
return nil;
}
if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) {
return kIsPermaLinkKey;
}
if (RSSAXEqualTags(name, kURL, kURLLength)) {
return kURLKey;
}
if (RSSAXEqualTags(name, kLength, kLengthLength)) {
return kLengthKey;
}
if (RSSAXEqualTags(name, kType, kTypeLength)) {
return kTypeKey;
}
return nil;
}
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
return memcmp(bytes1, bytes2, length) == 0;
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
static const NSUInteger falseLength = kFalseLength - 1;
static const NSUInteger trueLength = kTrueLength - 1;
if (length == falseLength && equalBytes(bytes, kFalse, falseLength)) {
return kFalseValue;
}
if (length == trueLength && equalBytes(bytes, kTrue, trueLength)) {
return kTrueValue;
}
return nil;
}
@end

View File

@ -0,0 +1,55 @@
//
// RSSAXHTMLParser.h
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@class RSSAXHTMLParser;
@protocol RSSAXHTMLParserDelegate <NSObject>
@optional
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char *_Nullable*_Nullable)attributes;
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(nullable const unsigned char *)localName;
// Length is guaranteed to be greater than 0.
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(nullable const unsigned char *)characters length:(NSUInteger)length;
- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might).
@end
@interface RSSAXHTMLParser : NSObject
- (instancetype)initWithDelegate:(id<RSSAXHTMLParserDelegate>)delegate;
- (void)parseData:(NSData *)data;
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
- (void)finishParsing;
- (void)cancel;
@property (nullable, nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded.
@property (nullable, nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters.
@property (nullable, nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.
// Delegate can call from within XMLStartElement.
- (nullable NSDictionary *)attributesDictionary:(const unsigned char *_Nullable*_Nullable)attributes;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,321 @@
//
// RSSAXHTMLParser.m
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import <libxml/tree.h>
#import <libxml/xmlstring.h>
#import <libxml/HTMLparser.h>
@interface RSSAXHTMLParser ()
@property (nonatomic) id<RSSAXHTMLParserDelegate> delegate;
@property (nonatomic, assign) htmlParserCtxtPtr context;
@property (nonatomic, assign) BOOL storingCharacters;
@property (nonatomic) NSMutableData *characters;
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
@end
@implementation RSSAXHTMLParser
+ (void)initialize {
RSSAXInitLibXMLParser();
}
#pragma mark - Init
- (instancetype)initWithDelegate:(id<RSSAXHTMLParserDelegate>)delegate {
self = [super init];
if (self == nil)
return nil;
_delegate = delegate;
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) {
_delegateRespondsToStartElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) {
_delegateRespondsToEndElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
_delegateRespondsToCharactersFoundMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
_delegateRespondsToEndOfDocumentMethod = YES;
}
return self;
}
#pragma mark - Dealloc
- (void)dealloc {
if (_context != nil) {
htmlFreeParserCtxt(_context);
_context = nil;
}
_delegate = nil;
}
#pragma mark - API
static xmlSAXHandler saxHandlerStruct;
- (void)parseData:(NSData *)data {
[self parseBytes:data.bytes numberOfBytes:data.length];
}
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
if (self.context == nil) {
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding);
htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT);
}
@autoreleasepool {
htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
}
}
- (void)finishParsing {
NSAssert(self.context != nil, nil);
if (self.context == nil)
return;
@autoreleasepool {
htmlParseChunk(self.context, nil, 0, 1);
htmlFreeParserCtxt(self.context);
self.context = nil;
self.characters = nil;
}
}
- (void)cancel {
@autoreleasepool {
xmlStopParser(self.context);
}
}
- (void)beginStoringCharacters {
self.storingCharacters = YES;
self.characters = [NSMutableData new];
}
- (void)endStoringCharacters {
self.storingCharacters = NO;
self.characters = nil;
}
- (NSData *)currentCharacters {
if (!self.storingCharacters) {
return nil;
}
return self.characters;
}
- (NSString *)currentString {
NSData *d = self.currentCharacters;
if (RSParserObjectIsEmpty(d)) {
return nil;
}
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
}
- (NSString *)currentStringWithTrimmedWhitespace {
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
#pragma mark - Attributes Dictionary
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes {
if (!attributes) {
return nil;
}
NSMutableDictionary *d = [NSMutableDictionary new];
NSInteger ix = 0;
NSString *currentKey = nil;
while (true) {
const xmlChar *oneAttribute = attributes[ix];
ix++;
if (!currentKey && !oneAttribute) {
break;
}
if (!currentKey) {
currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute];
}
else {
NSString *value = nil;
if (oneAttribute) {
value = [NSString stringWithUTF8String:(const char *)oneAttribute];
}
d[currentKey] = value ? value : @"";
currentKey = nil;
}
}
return [d copy];
}
#pragma mark - Callbacks
- (void)xmlEndDocument {
@autoreleasepool {
if (self.delegateRespondsToEndOfDocumentMethod) {
[self.delegate saxParserDidReachEndOfDocument:self];
}
[self endStoringCharacters];
}
}
- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length {
if (length < 1) {
return;
}
@autoreleasepool {
if (self.storingCharacters) {
[self.characters appendBytes:(const void *)ch length:length];
}
if (self.delegateRespondsToCharactersFoundMethod) {
[self.delegate saxParser:self XMLCharactersFound:ch length:length];
}
}
}
- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
@autoreleasepool {
if (self.delegateRespondsToStartElementMethod) {
[self.delegate saxParser:self XMLStartElement:localName attributes:attributes];
}
}
}
- (void)xmlEndElement:(const xmlChar *)localName {
@autoreleasepool {
if (self.delegateRespondsToEndElementMethod) {
[self.delegate saxParser:self XMLEndElement:localName];
}
[self endStoringCharacters];
}
}
@end
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) {
[(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes];
}
static void endElementSAX(void *context, const xmlChar *localname) {
[(__bridge RSSAXHTMLParser *)context xmlEndElement:localname];
}
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
[(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
}
static void endDocumentSAX(void *context) {
[(__bridge RSSAXHTMLParser *)context xmlEndDocument];
}
static htmlSAXHandler saxHandlerStruct = {
nil, /* internalSubset */
nil, /* isStandalone */
nil, /* hasInternalSubset */
nil, /* hasExternalSubset */
nil, /* resolveEntity */
nil, /* getEntity */
nil, /* entityDecl */
nil, /* notationDecl */
nil, /* attributeDecl */
nil, /* elementDecl */
nil, /* unparsedEntityDecl */
nil, /* setDocumentLocator */
nil, /* startDocument */
endDocumentSAX, /* endDocument */
startElementSAX, /* startElement*/
endElementSAX, /* endElement */
nil, /* reference */
charactersFoundSAX, /* characters */
nil, /* ignorableWhitespace */
nil, /* processingInstruction */
nil, /* comment */
nil, /* warning */
nil, /* error */
nil, /* fatalError //: unused error() get all the errors */
nil, /* getParameterEntity */
nil, /* cdataBlock */
nil, /* externalSubset */
XML_SAX2_MAGIC,
nil,
nil, /* startElementNs */
nil, /* endElementNs */
nil /* serror */
};

View File

@ -0,0 +1,69 @@
//
// RSSAXParser.h
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
/*Thread-safe, not re-entrant.
Calls to the delegate will happen on the same thread where the parser runs.
This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use.
This class is not meant to be sub-classed. Use the delegate methods.
*/
@class RSSAXParser;
@protocol RSSAXParserDelegate <NSObject>
@optional
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes;
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri;
// Length is guaranteed to be greater than 0.
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length;
- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length;
@end
void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser.
/*For use by delegate.*/
BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength);
@interface RSSAXParser : NSObject
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate;
- (void)parseData:(NSData *)data;
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
- (void)finishParsing;
- (void)cancel;
@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/
@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/
/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/
- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes;
@end

353
Parser/Sources/ObjC/RSSAXParser.m Executable file
View File

@ -0,0 +1,353 @@
//
// RSSAXParser.m
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import <libxml/parser.h>
#import <libxml/tree.h>
#import <libxml/xmlstring.h>
@interface RSSAXParser ()
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
@property (nonatomic, assign) xmlParserCtxtPtr context;
@property (nonatomic, assign) BOOL storingCharacters;
@property (nonatomic) NSMutableData *characters;
@property (nonatomic) BOOL delegateRespondsToInternedStringMethod;
@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod;
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
@end
@implementation RSSAXParser
+ (void)initialize {
RSSAXInitLibXMLParser();
}
#pragma mark - Init
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate {
self = [super init];
if (self == nil)
return nil;
_delegate = delegate;
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) {
_delegateRespondsToInternedStringMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) {
_delegateRespondsToInternedStringForValueMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) {
_delegateRespondsToStartElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) {
_delegateRespondsToEndElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
_delegateRespondsToCharactersFoundMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
_delegateRespondsToEndOfDocumentMethod = YES;
}
return self;
}
#pragma mark - Dealloc
- (void)dealloc {
if (_context != nil) {
xmlFreeParserCtxt(_context);
_context = nil;
}
_delegate = nil;
}
#pragma mark - API
static xmlSAXHandler saxHandlerStruct;
- (void)parseData:(NSData *)data {
[self parseBytes:data.bytes numberOfBytes:data.length];
}
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
if (self.context == nil) {
self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil);
xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT);
}
@autoreleasepool {
xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
}
}
- (void)finishParsing {
NSAssert(self.context != nil, nil);
if (self.context == nil)
return;
@autoreleasepool {
xmlParseChunk(self.context, nil, 0, 1);
xmlFreeParserCtxt(self.context);
self.context = nil;
self.characters = nil;
}
}
- (void)cancel {
@autoreleasepool {
xmlStopParser(self.context);
}
}
- (void)beginStoringCharacters {
self.storingCharacters = YES;
self.characters = [NSMutableData new];
}
- (void)endStoringCharacters {
self.storingCharacters = NO;
self.characters = nil;
}
- (NSData *)currentCharacters {
if (!self.storingCharacters) {
return nil;
}
return self.characters;
}
- (NSString *)currentString {
NSData *d = self.currentCharacters;
if (RSParserObjectIsEmpty(d)) {
return nil;
}
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
}
- (NSString *)currentStringWithTrimmedWhitespace {
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
#pragma mark - Attributes Dictionary
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes {
if (numberOfAttributes < 1 || !attributes) {
return nil;
}
NSMutableDictionary *d = [NSMutableDictionary new];
@autoreleasepool {
NSInteger i = 0, j = 0;
for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) {
NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]);
NSString *value = nil;
if (self.delegateRespondsToInternedStringForValueMethod) {
value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue];
}
if (!value) {
value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding];
}
NSString *attributeName = nil;
if (self.delegateRespondsToInternedStringMethod) {
attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]];
}
if (!attributeName) {
attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]];
if (attributes[j + 1]) {
NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]];
attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName];
}
}
if (value && attributeName) {
d[attributeName] = value;
}
}
}
return d;
}
#pragma mark - Equal Tags
BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) {
if (!localName) {
return NO;
}
return !strncmp((const char *)localName, tag, (size_t)tagLength);
}
#pragma mark - Callbacks
- (void)xmlEndDocument {
@autoreleasepool {
if (self.delegateRespondsToEndOfDocumentMethod) {
[self.delegate saxParserDidReachEndOfDocument:self];
}
[self endStoringCharacters];
}
}
- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length {
if (length < 1) {
return;
}
@autoreleasepool {
if (self.storingCharacters) {
[self.characters appendBytes:(const void *)ch length:length];
}
if (self.delegateRespondsToCharactersFoundMethod) {
[self.delegate saxParser:self XMLCharactersFound:ch length:length];
}
}
}
- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
@autoreleasepool {
if (self.delegateRespondsToStartElementMethod) {
[self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes];
}
}
}
- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
@autoreleasepool {
if (self.delegateRespondsToEndElementMethod) {
[self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri];
}
[self endStoringCharacters];
}
}
@end
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) {
[(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes];
}
static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) {
[(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI];
}
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
[(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
}
static void endDocumentSAX(void *context) {
[(__bridge RSSAXParser *)context xmlEndDocument];
}
static xmlSAXHandler saxHandlerStruct = {
nil, /* internalSubset */
nil, /* isStandalone */
nil, /* hasInternalSubset */
nil, /* hasExternalSubset */
nil, /* resolveEntity */
nil, /* getEntity */
nil, /* entityDecl */
nil, /* notationDecl */
nil, /* attributeDecl */
nil, /* elementDecl */
nil, /* unparsedEntityDecl */
nil, /* setDocumentLocator */
nil, /* startDocument */
endDocumentSAX, /* endDocument */
nil, /* startElement*/
nil, /* endElement */
nil, /* reference */
charactersFoundSAX, /* characters */
nil, /* ignorableWhitespace */
nil, /* processingInstruction */
nil, /* comment */
nil, /* warning */
nil, /* error */
nil, /* fatalError //: unused error() get all the errors */
nil, /* getParameterEntity */
nil, /* cdataBlock */
nil, /* externalSubset */
XML_SAX2_MAGIC,
nil,
startElementSAX, /* startElementNs */
endElementSAX, /* endElementNs */
nil /* serror */
};
void RSSAXInitLibXMLParser(void) {
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
xmlInitParser();
});
}

View File

@ -0,0 +1,56 @@
//
// RSParser.h
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
#import "../ParserData.h"
#import "../RSDateParser.h"
// OPML
#import "../RSOPMLParser.h"
#import "../RSOPMLDocument.h"
#import "../RSOPMLItem.h"
#import "../RSOPMLAttributes.h"
#import "../RSOPMLFeedSpecifier.h"
#import "../RSOPMLError.h"
// For writing your own XML parser.
#import "../RSSAXParser.h"
// You should use FeedParser (Swift) instead of these two specific parsers
// and the objects they create.
// But theyre available if you want them.
#import "../RSRSSParser.h"
#import "../RSAtomParser.h"
#import "../RSParsedFeed.h"
#import "../RSParsedArticle.h"
#import "../RSParsedEnclosure.h"
#import "../RSParsedAuthor.h"
// HTML
#import "../RSHTMLMetadataParser.h"
#import "../RSHTMLMetadata.h"
#import "../RSHTMLLinkParser.h"
#import "../RSSAXHTMLParser.h" // For writing your own HTML parser.
#import "../RSHTMLTag.h"
// Utilities
#import "../NSData+RSParser.h"
#import "../NSString+RSParser.h"

View File

@ -0,0 +1,9 @@
//
// Exports.swift
//
//
// Created by Stuart Breckenridge on 29/7/20.
//
import Foundation
@_exported import ParserObjC

View File

@ -0,0 +1,91 @@
//
// FeedParser.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
import ParserObjC
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
// You dont need to know the type of feed.
public typealias FeedParserCallback = (_ parsedFeed: ParsedFeed?, _ error: Error?) -> Void
public struct FeedParser {
private static let parseQueue = DispatchQueue(label: "FeedParser parse queue")
public static func canParse(_ parserData: ParserData) -> Bool {
let type = feedType(parserData)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom:
return true
default:
return false
}
}
public static func mightBeAbleToParseBasedOnPartialData(_ parserData: ParserData) -> Bool {
let type = feedType(parserData, isPartialData: true)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom, .unknown:
return true
default:
return false
}
}
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
// This is generally fast enough to call on the main thread 
// but its probably a good idea to use a background queue if
// you might be doing a lot of parsing. (Such as in a feed reader.)
do {
let type = feedType(parserData)
switch type {
case .jsonFeed:
return try JSONFeedParser.parse(parserData)
case .rssInJSON:
return try RSSInJSONParser.parse(parserData)
case .rss:
return RSSParser.parse(parserData)
case .atom:
return AtomParser.parse(parserData)
case .unknown, .notAFeed:
return nil
}
}
catch { throw error }
}
public static func parse(_ parserData: ParserData, _ completion: @escaping FeedParserCallback) {
parseQueue.async {
do {
let parsedFeed = try parse(parserData)
DispatchQueue.main.async {
completion(parsedFeed, nil)
}
}
catch {
DispatchQueue.main.async {
completion(nil, error)
}
}
}
}
}

View File

@ -0,0 +1,29 @@
//
// FeedParserError.swift
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct FeedParserError: Error, Sendable {
public enum FeedParserErrorType {
case rssChannelNotFound
case rssItemsNotFound
case jsonFeedVersionNotFound
case jsonFeedItemsNotFound
case jsonFeedTitleNotFound
case invalidJSON
}
public let errorType: FeedParserErrorType
public init(_ errorType: FeedParserErrorType) {
self.errorType = errorType
}
}

Some files were not shown because too many files have changed in this diff Show More