Support status links with non-ASCII characters (Bugfix 1546) (#1550)
* Allow creation of URL objects from strings containing non-ASCII characters Adds a new initializer for creating URL objects with a flag to specify that non-ASCII characters found in the path or query string should first be URL encoded. * Add basic test for creating HTMLString objects * Encode link paths and queries when parsing statuses It's common to use non-ASCII characters in URLs even though they're technically invalid characters. Every modern browser handles this by silently encoding the invalid characters on the user's behalf. However, trying to create a URL object with un-encoded characters will result in nil so we need to encode the invalid characters before creating the URL object. The unencoded version should still be shown in the displayed status. The parsing of the URL string is a little messy because we can't use the URL class for this scenario and need to duplicate some of its work. * Only encode link URLs as a backup If a URL can be created from a status href, don't try URL encoding it as this could result in double encoding. Only encode the string if the creation of a URL fails. This is also more efficient.
This commit is contained in:
parent
edf36d4b30
commit
30f9da06c8
|
@ -11,7 +11,7 @@ public struct HTMLString: Codable, Equatable, Hashable, @unchecked Sendable {
|
|||
public var asMarkdown: String = ""
|
||||
public var asRawText: String = ""
|
||||
public var statusesURLs = [URL]()
|
||||
public private(set) var links = [Link]()
|
||||
private(set) public var links = [Link]()
|
||||
|
||||
public var asSafeMarkdownAttributedString: AttributedString = .init()
|
||||
private var main_regex: NSRegularExpression?
|
||||
|
@ -151,14 +151,25 @@ public struct HTMLString: Codable, Equatable, Hashable, @unchecked Sendable {
|
|||
handleNode(node: nn)
|
||||
}
|
||||
let finish = asMarkdown.endIndex
|
||||
|
||||
var linkRef = href
|
||||
|
||||
// Try creating a URL from the string. If it fails, try URL encoding
|
||||
// the string first.
|
||||
var url = URL(string: href)
|
||||
if url == nil {
|
||||
url = URL(string: href, encodePath: true)
|
||||
}
|
||||
if let linkUrl = url {
|
||||
linkRef = linkUrl.absoluteString
|
||||
let displayString = asMarkdown[start..<finish]
|
||||
links.append(Link(linkUrl, displayString: String(displayString)))
|
||||
}
|
||||
|
||||
asMarkdown += "]("
|
||||
asMarkdown += href
|
||||
asMarkdown += linkRef
|
||||
asMarkdown += ")"
|
||||
|
||||
if let url = URL(string: href) {
|
||||
let displayString = asMarkdown[start ..< finish]
|
||||
links.append(Link(url, displayString: String(displayString)))
|
||||
}
|
||||
return
|
||||
} else if node.nodeName() == "#text" {
|
||||
var txt = node.description
|
||||
|
@ -190,19 +201,19 @@ public struct HTMLString: Codable, Equatable, Hashable, @unchecked Sendable {
|
|||
self.displayString = displayString
|
||||
|
||||
switch displayString.first {
|
||||
case "@":
|
||||
type = .mention
|
||||
title = displayString
|
||||
case "#":
|
||||
type = .hashtag
|
||||
title = String(displayString.dropFirst())
|
||||
default:
|
||||
type = .url
|
||||
var hostNameUrl = url.host ?? url.absoluteString
|
||||
if hostNameUrl.hasPrefix("www.") {
|
||||
hostNameUrl = String(hostNameUrl.dropFirst(4))
|
||||
}
|
||||
title = hostNameUrl
|
||||
case "@":
|
||||
self.type = .mention
|
||||
self.title = displayString
|
||||
case "#":
|
||||
self.type = .hashtag
|
||||
self.title = String(displayString.dropFirst())
|
||||
default:
|
||||
self.type = .url
|
||||
var hostNameUrl = url.host ?? url.absoluteString
|
||||
if hostNameUrl.hasPrefix("www.") {
|
||||
hostNameUrl = String(hostNameUrl.dropFirst(4))
|
||||
}
|
||||
self.title = hostNameUrl
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,3 +224,45 @@ public struct HTMLString: Codable, Equatable, Hashable, @unchecked Sendable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension URL {
|
||||
|
||||
// It's common to use non-ASCII characters in URLs even though they're technically
|
||||
// invalid characters. Every modern browser handles this by silently encoding
|
||||
// the invalid characters on the user's behalf. However, trying to create a URL
|
||||
// object with un-encoded characters will result in nil so we need to encode the
|
||||
// invalid characters before creating the URL object. The unencoded version
|
||||
// should still be shown in the displayed status.
|
||||
public init?(string: String, encodePath: Bool) {
|
||||
var encodedUrlString = ""
|
||||
if encodePath,
|
||||
string.starts(with: "http://") || string.starts(with: "https://"),
|
||||
var startIndex = string.firstIndex(of: "/")
|
||||
{
|
||||
startIndex = string.index(startIndex, offsetBy: 1)
|
||||
|
||||
// We don't want to encode the host portion of the URL
|
||||
if var startIndex = string[startIndex...].firstIndex(of: "/") {
|
||||
encodedUrlString = String(string[...startIndex])
|
||||
while let endIndex = string[string.index(after: startIndex)...].firstIndex(of: "/") {
|
||||
let componentStartIndex = string.index(after: startIndex)
|
||||
encodedUrlString = encodedUrlString + (string[componentStartIndex...endIndex].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "")
|
||||
startIndex = endIndex
|
||||
}
|
||||
|
||||
// The last part of the path may have a query string appended to it
|
||||
let componentStartIndex = string.index(after: startIndex)
|
||||
if let queryStartIndex = string[componentStartIndex...].firstIndex(of: "?") {
|
||||
encodedUrlString = encodedUrlString + (string[componentStartIndex..<queryStartIndex].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "")
|
||||
encodedUrlString = encodedUrlString + (string[queryStartIndex...].addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? "")
|
||||
} else {
|
||||
encodedUrlString = encodedUrlString + (string[componentStartIndex...].addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? "")
|
||||
}
|
||||
}
|
||||
}
|
||||
if encodedUrlString.isEmpty {
|
||||
encodedUrlString = string
|
||||
}
|
||||
self.init(string: encodedUrlString)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
@testable import Models
|
||||
import XCTest
|
||||
|
||||
final class HTMLStringTests: XCTestCase {
|
||||
func testURLInit() throws {
|
||||
XCTAssertNil(URL(string: "go to www.google.com", encodePath: true))
|
||||
XCTAssertNil(URL(string: "go to www.google.com", encodePath: false))
|
||||
XCTAssertNil(URL(string: "", encodePath: true))
|
||||
|
||||
let simpleUrl = URL(string: "https://www.google.com", encodePath: true)
|
||||
XCTAssertEqual("https://www.google.com", simpleUrl?.absoluteString)
|
||||
|
||||
let urlWithTrailingSlash = URL(string: "https://www.google.com/", encodePath: true)
|
||||
XCTAssertEqual("https://www.google.com/", urlWithTrailingSlash?.absoluteString)
|
||||
|
||||
let extendedCharPath = URL(string: "https://en.wikipedia.org/wiki/Elbbrücken_station", encodePath: true)
|
||||
XCTAssertEqual("https://en.wikipedia.org/wiki/Elbbr%C3%BCcken_station", extendedCharPath?.absoluteString)
|
||||
XCTAssertNil(URL(string: "https://en.wikipedia.org/wiki/Elbbrücken_station", encodePath: false))
|
||||
|
||||
let extendedCharQuery = URL(string: "http://test.com/blah/city?name=京都市", encodePath: true)
|
||||
XCTAssertEqual("http://test.com/blah/city?name=%E4%BA%AC%E9%83%BD%E5%B8%82", extendedCharQuery?.absoluteString)
|
||||
|
||||
// Double encoding will happen if you ask to encodePath on an already encoded string
|
||||
let alreadyEncodedPath = URL(string: "https://en.wikipedia.org/wiki/Elbbr%C3%BCcken_station", encodePath: true)
|
||||
XCTAssertEqual("https://en.wikipedia.org/wiki/Elbbr%25C3%25BCcken_station", alreadyEncodedPath?.absoluteString)
|
||||
}
|
||||
|
||||
func testHTMLStringInit() throws {
|
||||
let decoder = JSONDecoder()
|
||||
|
||||
let basicContent = "\"<p>This is a test</p>\""
|
||||
var htmlString = try decoder.decode(HTMLString.self, from: Data(basicContent.utf8))
|
||||
XCTAssertEqual("This is a test", htmlString.asRawText)
|
||||
XCTAssertEqual("<p>This is a test</p>", htmlString.htmlValue)
|
||||
XCTAssertEqual("This is a test", htmlString.asMarkdown)
|
||||
XCTAssertEqual(0, htmlString.statusesURLs.count)
|
||||
XCTAssertEqual(0, htmlString.links.count)
|
||||
|
||||
let basicLink = "\"<p>This is a <a href=\\\"https://test.com\\\">test</a></p>\""
|
||||
htmlString = try decoder.decode(HTMLString.self, from: Data(basicLink.utf8))
|
||||
XCTAssertEqual("This is a test", htmlString.asRawText)
|
||||
XCTAssertEqual("<p>This is a <a href=\"https://test.com\">test</a></p>", htmlString.htmlValue)
|
||||
XCTAssertEqual("This is a [test](https://test.com)", htmlString.asMarkdown)
|
||||
XCTAssertEqual(0, htmlString.statusesURLs.count)
|
||||
XCTAssertEqual(1, htmlString.links.count)
|
||||
XCTAssertEqual("https://test.com", htmlString.links[0].url.absoluteString)
|
||||
XCTAssertEqual("test", htmlString.links[0].displayString)
|
||||
|
||||
let extendedCharLink = "\"<p>This is a <a href=\\\"https://test.com/goßëña\\\">test</a></p>\""
|
||||
htmlString = try decoder.decode(HTMLString.self, from: Data(extendedCharLink.utf8))
|
||||
XCTAssertEqual("This is a test", htmlString.asRawText)
|
||||
XCTAssertEqual("<p>This is a <a href=\"https://test.com/goßëña\">test</a></p>", htmlString.htmlValue)
|
||||
XCTAssertEqual("This is a [test](https://test.com/go%C3%9F%C3%AB%C3%B1a)", htmlString.asMarkdown)
|
||||
XCTAssertEqual(0, htmlString.statusesURLs.count)
|
||||
XCTAssertEqual(1, htmlString.links.count)
|
||||
XCTAssertEqual("https://test.com/go%C3%9F%C3%AB%C3%B1a", htmlString.links[0].url.absoluteString)
|
||||
XCTAssertEqual("test", htmlString.links[0].displayString)
|
||||
|
||||
let alreadyEncodedLink = "\"<p>This is a <a href=\\\"https://test.com/go%C3%9F%C3%AB%C3%B1a\\\">test</a></p>\""
|
||||
htmlString = try decoder.decode(HTMLString.self, from: Data(alreadyEncodedLink.utf8))
|
||||
XCTAssertEqual("This is a test", htmlString.asRawText)
|
||||
XCTAssertEqual("<p>This is a <a href=\"https://test.com/go%C3%9F%C3%AB%C3%B1a\">test</a></p>", htmlString.htmlValue)
|
||||
XCTAssertEqual("This is a [test](https://test.com/go%C3%9F%C3%AB%C3%B1a)", htmlString.asMarkdown)
|
||||
XCTAssertEqual(0, htmlString.statusesURLs.count)
|
||||
XCTAssertEqual(1, htmlString.links.count)
|
||||
XCTAssertEqual("https://test.com/go%C3%9F%C3%AB%C3%B1a", htmlString.links[0].url.absoluteString)
|
||||
XCTAssertEqual("test", htmlString.links[0].displayString)
|
||||
}
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
@testable import Models
|
||||
import XCTest
|
||||
|
||||
final class ModelsTests: XCTestCase {
|
||||
func testExample() throws {
|
||||
// This is an example of a functional test case.
|
||||
// Use XCTAssert and related functions to verify your tests produce the correct
|
||||
// results.
|
||||
XCTAssertEqual(Models().text, "Hello, World!")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue