mirror of
https://github.com/yang991178/fluent-reader.git
synced 2025-03-05 20:07:48 +01:00
infer html charset from meta tag
This commit is contained in:
parent
42e819834b
commit
5c6cbf5be0
@ -216,7 +216,7 @@ class Article extends React.Component<ArticleProps, ArticleState> {
|
||||
try {
|
||||
const result = await fetch(this.props.item.link)
|
||||
if (!result || !result.ok) throw new Error()
|
||||
const html = await decodeFetchResponse(result)
|
||||
const html = await decodeFetchResponse(result, true)
|
||||
this.setState({ fullContent: html })
|
||||
} catch {
|
||||
this.setState({ loaded: true, error: true, errorDescription: "MERCURY_PARSER_FAILURE" })
|
||||
|
@ -29,12 +29,20 @@ const rssParser = new Parser({
|
||||
})
|
||||
|
||||
const CHARSET_RE = /charset=([^()<>@,;:\"/[\]?.=\s]*)/i
|
||||
export async function decodeFetchResponse(response: Response) {
|
||||
export async function decodeFetchResponse(response: Response, isHTML = false) {
|
||||
const buffer = await response.arrayBuffer()
|
||||
const ctype = response.headers.has("content-type") && response.headers.get("content-type")
|
||||
const charset = (ctype && CHARSET_RE.test(ctype)) ? CHARSET_RE.exec(ctype)[1] : "utf-8"
|
||||
const charset = (ctype && CHARSET_RE.test(ctype)) ? CHARSET_RE.exec(ctype)[1] : undefined
|
||||
const decoder = new TextDecoder(charset)
|
||||
return decoder.decode(buffer)
|
||||
let content = decoder.decode(buffer)
|
||||
if (charset === undefined && isHTML) {
|
||||
const dom = domParser.parseFromString(content, "text/html")
|
||||
const meta = dom.querySelector("meta[charset]")
|
||||
if (meta) {
|
||||
content = (new TextDecoder(meta.getAttribute("charset"))).decode(buffer)
|
||||
}
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
export async function parseRSS(url: string) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user