Create HTMLParserTests.

This commit is contained in:
Brent Simmons 2024-09-21 22:10:47 -07:00
parent 20b222f455
commit a7c4669bd6
9 changed files with 149 additions and 61 deletions

View File

@ -0,0 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1530"
version = "1.7">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES"
buildArchitectures = "Automatic">
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
shouldAutocreateTestPlan = "YES">
<Testables>
<TestableReference
skipped = "NO">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "HTMLParserTests"
BuildableName = "HTMLParserTests"
BlueprintName = "HTMLParserTests"
ReferencedContainer = "container:">
</BuildableReference>
</TestableReference>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>

View File

@ -126,6 +126,16 @@
ReferencedContainer = "container:">
</BuildableReference>
</TestableReference>
<TestableReference
skipped = "NO">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "HTMLParserTests"
BuildableName = "HTMLParserTests"
BlueprintName = "HTMLParserTests"
ReferencedContainer = "container:">
</BuildableReference>
</TestableReference>
</Testables>
</TestAction>
<LaunchAction

View File

@ -82,6 +82,10 @@ let package = Package(
name: "OPMLParserTests",
dependencies: ["OPMLParser"],
resources: [.copy("Resources")]),
.testTarget(
name: "HTMLParserTests",
dependencies: ["HTMLParser"],
resources: [.copy("Resources")]),
.testTarget(
name: "DateParserTests",
dependencies: ["DateParser"])

View File

@ -9,7 +9,14 @@ import Foundation
public final class HTMLLink {
let urlString: String? // Absolute URL string
let text: String?
let title: String? // Title attribute inside anchor tag
public var urlString: String? // Absolute URL string
public var text: String?
public var title: String? // Title attribute inside anchor tag
init(urlString: String? = nil, text: String? = nil, title: String? = nil) {
self.urlString = urlString
self.text = text
self.title = title
}
}

View File

@ -15,7 +15,7 @@ public final class HTMLLinkParser {
private let parserData: ParserData
private let baseURL: URL?
public static func htmlLinks(parserData: ParserData) -> [HTMLLink] {
public static func htmlLinks(with parserData: ParserData) -> [HTMLLink] {
let parser = HTMLLinkParser(parserData)
parser.parse()
@ -38,23 +38,23 @@ private extension HTMLLinkParser {
}
}
private extension HTMLLinkParser: SAXHTMLParserDelegate {
extension HTMLLinkParser: SAXHTMLParserDelegate {
var currentLink: HTMLLink? {
private var currentLink: HTMLLink? {
links.last
}
struct HTMLAttributeName {
let href = "href"
let title = "title"
private struct HTMLAttributeName {
static let href = "href"
static let title = "title"
}
func title(_ attributesDictionary: HTMLAttributesDictionary) -> String? {
private func title(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? {
attributesDictionary.object(object(forCaseInsensitiveKey: HTMLAttributeName.title))
attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.title)
}
func urlString(_ attributesDictionary: HTMLAttributesDictionary) -> String? {
private func urlString(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? {
guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href) else {
return nil
@ -68,22 +68,22 @@ private extension HTMLLinkParser: SAXHTMLParserDelegate {
return absoluteURL.absoluteString
}
func handleLinkAttributes(_ attributesDictionary: HTMLAttributesDictionary) {
private func handleLinkAttributes(_ attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) {
guard let currentLink else {
assertionFailure("currentLink must not be nil")
return
}
link.urlString = urlString(attributesDictionary)
link.title = title(attributesDictionary)
currentLink.urlString = urlString(with: attributesDictionary)
currentLink.title = title(with: attributesDictionary)
}
struct HTMLName {
private struct HTMLName {
static let a = "a".utf8CString
}
func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
guard SAXEqualTags(name, HTMLName.a) else {
return
@ -99,15 +99,20 @@ private extension HTMLLinkParser: SAXHTMLParserDelegate {
saxHTMLParser.beginStoringCharacters()
}
func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) {
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) {
guard SAXEqualTags(name, HTMLName.a) else {
return
}
guard let currentLink else {
assertionFailure("currentLink must not be nil.")
return
}
currentLink.text = saxHTMLParser.currentStringWithTrimmedWhitespace
}
func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) {
public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) {
// Nothing needed.
}
}

View File

@ -1,42 +0,0 @@
//
// HTMLLinkTests.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import XCTest
import FeedParser
//class HTMLLinkTests: XCTestCase {
//
// func testSixColorsPerformance() {
//
// // 0.003 sec on my 2012 iMac
// let d = parserData("sixcolors", "html", "http://sixcolors.com/")
// self.measure {
// let _ = RSHTMLLinkParser.htmlLinks(with: d)
// }
// }
//
// func testSixColorsLink() {
//
// let d = parserData("sixcolors", "html", "http://sixcolors.com/")
// let links = RSHTMLLinkParser.htmlLinks(with: d)
//
// let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php"
// let textToFind = "this weeks episode of The Incomparable"
//
// var found = false
// for oneLink in links {
// if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind {
// found = true
// }
// }
//
// XCTAssertTrue(found)
// XCTAssertEqual(links.count, 131)
// }
//
//}

View File

@ -0,0 +1,50 @@
//
// HTMLLinkTests.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import XCTest
import FeedParser
import HTMLParser
import SAX
class HTMLLinkTests: XCTestCase {
func testSixColorsPerformance() {
// 0.003 sec on my 2012 iMac
let d = parserData("sixcolors", "html", "http://sixcolors.com/")
self.measure {
let _ = HTMLLinkParser.htmlLinks(with: d)
}
}
func testSixColorsLink() {
let d = parserData("sixcolors", "html", "http://sixcolors.com/")
let links = HTMLLinkParser.htmlLinks(with: d)
let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php"
let textToFind = "this weeks episode of The Incomparable"
var found = false
for oneLink in links {
if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind {
found = true
}
}
XCTAssertTrue(found)
XCTAssertEqual(links.count, 131)
}
}
func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData {
let filename = "Resources/\(filename)"
let path = Bundle.module.path(forResource: filename, ofType: fileExtension)!
let data = try! Data(contentsOf: URL(fileURLWithPath: path))
return ParserData(url: url, data: data)
}