mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[feature] Parse content warning to HTML, serialize via client API as plaintext (#3876)
* [feature] Parse content warning as HTML, serialize via API to plaintext * tidy up some cruft * whoops * oops * i'm da joker baybee * clemency muy lorde * rename some of the text functions for clarity * jiggle the opts * fiddle de deee * hopefully the last test fix i ever have to do in my beautiful life
This commit is contained in:
@@ -20,6 +20,8 @@ package text
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"codeberg.org/gruf/go-byteutil"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
@@ -27,11 +29,15 @@ import (
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
"github.com/yuin/goldmark/renderer"
|
||||
"github.com/yuin/goldmark/renderer/html"
|
||||
)
|
||||
|
||||
// FromMarkdown fulfils FormatFunc by parsing
|
||||
// the given markdown input into a FormatResult.
|
||||
//
|
||||
// Inline (aka unsafe) HTML elements are allowed,
|
||||
// as they should be sanitized afterwards anyway.
|
||||
func (f *Formatter) FromMarkdown(
|
||||
ctx context.Context,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
@@ -39,18 +45,85 @@ func (f *Formatter) FromMarkdown(
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
result := new(FormatResult)
|
||||
return f.fromMarkdown(
|
||||
ctx,
|
||||
false, // basic = false
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
// FromMarkdownBasic fulfils FormatFunc by parsing
|
||||
// the given markdown input into a FormatResult.
|
||||
//
|
||||
// Unlike FromMarkdown, it will only parse emojis with
|
||||
// the custom renderer, leaving aside mentions and tags.
|
||||
//
|
||||
// Inline (aka unsafe) HTML elements are not allowed.
|
||||
//
|
||||
// If the result is a single paragraph,
|
||||
// it will not be wrapped in <p> tags.
|
||||
func (f *Formatter) FromMarkdownBasic(
|
||||
ctx context.Context,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
res := f.fromMarkdown(
|
||||
ctx,
|
||||
true, // basic = true
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
input,
|
||||
)
|
||||
|
||||
res.HTML = unwrapParagraph(res.HTML)
|
||||
return res
|
||||
}
|
||||
|
||||
// fromMarkdown parses the given input text either
|
||||
// with or without emojis, and returns the result.
|
||||
func (f *Formatter) fromMarkdown(
|
||||
ctx context.Context,
|
||||
basic bool,
|
||||
parseMention gtsmodel.ParseMentionFunc,
|
||||
authorID string,
|
||||
statusID string,
|
||||
input string,
|
||||
) *FormatResult {
|
||||
var (
|
||||
result = new(FormatResult)
|
||||
opts []renderer.Option
|
||||
)
|
||||
|
||||
if basic {
|
||||
// Don't allow raw HTML tags,
|
||||
// markdown syntax only.
|
||||
opts = []renderer.Option{
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
}
|
||||
} else {
|
||||
opts = []renderer.Option{
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
|
||||
// Allow raw HTML tags, we
|
||||
// sanitize at the end anyway.
|
||||
html.WithUnsafe(),
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate goldmark parser for
|
||||
// markdown, using custom renderer
|
||||
// to add hashtag/mention links.
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithXHTML(),
|
||||
html.WithHardWraps(),
|
||||
// Allows raw HTML. We sanitize
|
||||
// at the end so this is OK.
|
||||
html.WithUnsafe(),
|
||||
opts...,
|
||||
),
|
||||
goldmark.WithExtensions(
|
||||
&customRenderer{
|
||||
@@ -59,7 +132,9 @@ func (f *Formatter) FromMarkdown(
|
||||
parseMention,
|
||||
authorID,
|
||||
statusID,
|
||||
false, // emojiOnly = false.
|
||||
// If basic, pass
|
||||
// emojiOnly = true.
|
||||
basic,
|
||||
result,
|
||||
},
|
||||
// Turns URLs into links.
|
||||
@@ -85,8 +160,36 @@ func (f *Formatter) FromMarkdown(
|
||||
|
||||
// Clean and shrink HTML.
|
||||
result.HTML = byteutil.B2S(htmlBytes.Bytes())
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
result.HTML = SanitizeHTML(result.HTML)
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
var parasRegexp = regexp.MustCompile(`</?p>`)
|
||||
|
||||
// unwrapParagraph removes opening and closing paragraph tags
|
||||
// of input HTML, if input html is a single paragraph only.
|
||||
func unwrapParagraph(html string) string {
|
||||
if !strings.HasPrefix(html, "<p>") {
|
||||
return html
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(html, "</p>") {
|
||||
return html
|
||||
}
|
||||
|
||||
// Make a substring excluding the
|
||||
// opening and closing paragraph tags.
|
||||
sub := html[3 : len(html)-4]
|
||||
|
||||
// If there are still other paragraph tags left
|
||||
// inside the substring, return html unchanged.
|
||||
containsOtherParas := parasRegexp.MatchString(sub)
|
||||
if containsOtherParas {
|
||||
return html
|
||||
}
|
||||
|
||||
// Return the substring.
|
||||
return sub
|
||||
}
|
||||
|
Reference in New Issue
Block a user