[feature] Parse content warning to HTML, serialize via client API as plaintext (#3876)

* [feature] Parse content warning as HTML, serialize via API to plaintext

* tidy up some cruft

* whoops

* oops

* i'm da joker baybee

* clemency muy lorde

* rename some of the text functions for clarity

* jiggle the opts

* fiddle de deee

* hopefully the last test fix i ever have to do in my beautiful life
This commit is contained in:
tobi
2025-03-07 15:04:34 +01:00
committed by GitHub
parent 424f62dd70
commit d8113c11e4
48 changed files with 985 additions and 635 deletions

View File

@@ -18,9 +18,7 @@
package text
import (
"html"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
)
@@ -163,29 +161,10 @@ var regular *bluemonday.Policy = func() *bluemonday.Policy {
// Source: https://github.com/microcosm-cc/bluemonday#usage
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
// removeHTML strictly removes *all* recognized
// HTML elements from the given string.
func removeHTML(in string) string {
return strict.Sanitize(in)
}
// SanitizeToHTML sanitizes only risky html elements
// SanitizeHTML sanitizes only risky html elements
// from the given string, allowing safe ones through.
func SanitizeToHTML(in string) string {
return regular.Sanitize(in)
}
// SanitizeToPlaintext runs text through basic sanitization.
// This removes any html elements that were in the string,
// and returns clean plaintext.
func SanitizeToPlaintext(in string) string {
// Unescape first to catch any tricky critters.
content := html.UnescapeString(in)
// Remove all detected HTML.
content = removeHTML(content)
// Unescape again to return plaintext.
content = html.UnescapeString(content)
return strings.TrimSpace(content)
//
// It returns an HTML string.
func SanitizeHTML(html string) string {
return regular.Sanitize(html)
}