mirror of
https://github.com/yang991178/fluent-reader.git
synced 2025-04-13 18:02:14 +02:00
infer html charset from meta tag
This commit is contained in:
parent
42e819834b
commit
5c6cbf5be0
@ -216,7 +216,7 @@ class Article extends React.Component<ArticleProps, ArticleState> {
|
|||||||
try {
|
try {
|
||||||
const result = await fetch(this.props.item.link)
|
const result = await fetch(this.props.item.link)
|
||||||
if (!result || !result.ok) throw new Error()
|
if (!result || !result.ok) throw new Error()
|
||||||
const html = await decodeFetchResponse(result)
|
const html = await decodeFetchResponse(result, true)
|
||||||
this.setState({ fullContent: html })
|
this.setState({ fullContent: html })
|
||||||
} catch {
|
} catch {
|
||||||
this.setState({ loaded: true, error: true, errorDescription: "MERCURY_PARSER_FAILURE" })
|
this.setState({ loaded: true, error: true, errorDescription: "MERCURY_PARSER_FAILURE" })
|
||||||
|
@ -29,12 +29,20 @@ const rssParser = new Parser({
|
|||||||
})
|
})
|
||||||
|
|
||||||
const CHARSET_RE = /charset=([^()<>@,;:\"/[\]?.=\s]*)/i
|
const CHARSET_RE = /charset=([^()<>@,;:\"/[\]?.=\s]*)/i
|
||||||
export async function decodeFetchResponse(response: Response) {
|
export async function decodeFetchResponse(response: Response, isHTML = false) {
|
||||||
const buffer = await response.arrayBuffer()
|
const buffer = await response.arrayBuffer()
|
||||||
const ctype = response.headers.has("content-type") && response.headers.get("content-type")
|
const ctype = response.headers.has("content-type") && response.headers.get("content-type")
|
||||||
const charset = (ctype && CHARSET_RE.test(ctype)) ? CHARSET_RE.exec(ctype)[1] : "utf-8"
|
const charset = (ctype && CHARSET_RE.test(ctype)) ? CHARSET_RE.exec(ctype)[1] : undefined
|
||||||
const decoder = new TextDecoder(charset)
|
const decoder = new TextDecoder(charset)
|
||||||
return decoder.decode(buffer)
|
let content = decoder.decode(buffer)
|
||||||
|
if (charset === undefined && isHTML) {
|
||||||
|
const dom = domParser.parseFromString(content, "text/html")
|
||||||
|
const meta = dom.querySelector("meta[charset]")
|
||||||
|
if (meta) {
|
||||||
|
content = (new TextDecoder(meta.getAttribute("charset"))).decode(buffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function parseRSS(url: string) {
|
export async function parseRSS(url: string) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user