mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[chore] Text formatting overhaul (#1406)
* Implement goldmark debug print for hashtags and mentions * Minify HTML in FromPlain * Convert plaintext status parser to goldmark * Move mention/tag/emoji finding logic into formatter * Combine mention and hashtag boundary characters * Normalize unicode when rendering hashtags
This commit is contained in:
312
internal/text/goldmark_extension.go
Normal file
312
internal/text/goldmark_extension.go
Normal file
@@ -0,0 +1,312 @@
|
||||
/*
|
||||
GoToSocial
|
||||
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package text
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/util"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/renderer"
|
||||
"github.com/yuin/goldmark/text"
|
||||
mdutil "github.com/yuin/goldmark/util"
|
||||
)
|
||||
|
||||
// A goldmark extension that parses potential mentions and hashtags separately from regular
|
||||
// text, so that they stay as one contiguous text fragment in the AST, and then renders
|
||||
// them separately too, to avoid scanning normal text for mentions and tags.
|
||||
|
||||
// mention and hashtag fulfil the goldmark ast.Node interface.
|
||||
type mention struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
type hashtag struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
type emoji struct {
|
||||
ast.BaseInline
|
||||
Segment text.Segment
|
||||
}
|
||||
|
||||
var kindMention = ast.NewNodeKind("Mention")
|
||||
var kindHashtag = ast.NewNodeKind("Hashtag")
|
||||
var kindEmoji = ast.NewNodeKind("Emoji")
|
||||
|
||||
func (n *mention) Kind() ast.NodeKind {
|
||||
return kindMention
|
||||
}
|
||||
|
||||
func (n *hashtag) Kind() ast.NodeKind {
|
||||
return kindHashtag
|
||||
}
|
||||
|
||||
func (n *emoji) Kind() ast.NodeKind {
|
||||
return kindEmoji
|
||||
}
|
||||
|
||||
// Dump can be used for debugging.
|
||||
func (n *mention) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
func (n *hashtag) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
func (n *emoji) Dump(source []byte, level int) {
|
||||
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
||||
}
|
||||
|
||||
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
|
||||
// The contained segment is used in rendering.
|
||||
func newMention(s text.Segment) *mention {
|
||||
return &mention{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
func newHashtag(s text.Segment) *hashtag {
|
||||
return &hashtag{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
func newEmoji(s text.Segment) *emoji {
|
||||
return &emoji{
|
||||
BaseInline: ast.BaseInline{},
|
||||
Segment: s,
|
||||
}
|
||||
}
|
||||
|
||||
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
|
||||
type mentionParser struct {
|
||||
}
|
||||
|
||||
type hashtagParser struct {
|
||||
}
|
||||
|
||||
type emojiParser struct {
|
||||
}
|
||||
|
||||
func (p *mentionParser) Trigger() []byte {
|
||||
return []byte{'@'}
|
||||
}
|
||||
|
||||
func (p *hashtagParser) Trigger() []byte {
|
||||
return []byte{'#'}
|
||||
}
|
||||
|
||||
func (p *emojiParser) Trigger() []byte {
|
||||
return []byte{':'}
|
||||
}
|
||||
|
||||
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
||||
before := block.PrecendingCharacter()
|
||||
line, segment := block.PeekLine()
|
||||
|
||||
if !util.IsMentionOrHashtagBoundary(before) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// unideal for performance but makes use of existing regex
|
||||
loc := regexes.MentionFinder.FindIndex(line)
|
||||
switch {
|
||||
case loc == nil:
|
||||
fallthrough
|
||||
case loc[0] != 0: // fail if not found at start
|
||||
return nil
|
||||
default:
|
||||
block.Advance(loc[1])
|
||||
return newMention(segment.WithStop(segment.Start + loc[1]))
|
||||
}
|
||||
}
|
||||
|
||||
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
||||
before := block.PrecendingCharacter()
|
||||
line, segment := block.PeekLine()
|
||||
s := string(line)
|
||||
|
||||
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i, r := range s {
|
||||
switch {
|
||||
case r == '#' && i == 0:
|
||||
// ignore initial #
|
||||
continue
|
||||
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
|
||||
// Fake hashtag, don't trust it
|
||||
return nil
|
||||
case util.IsMentionOrHashtagBoundary(r):
|
||||
if i <= 1 {
|
||||
// empty
|
||||
return nil
|
||||
}
|
||||
// End of hashtag
|
||||
block.Advance(i)
|
||||
return newHashtag(segment.WithStop(segment.Start + i))
|
||||
}
|
||||
}
|
||||
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
|
||||
block.Advance(segment.Len())
|
||||
return newHashtag(segment)
|
||||
}
|
||||
|
||||
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
||||
line, segment := block.PeekLine()
|
||||
|
||||
// unideal for performance but makes use of existing regex
|
||||
loc := regexes.EmojiFinder.FindIndex(line)
|
||||
switch {
|
||||
case loc == nil:
|
||||
fallthrough
|
||||
case loc[0] != 0: // fail if not found at start
|
||||
return nil
|
||||
default:
|
||||
block.Advance(loc[1])
|
||||
return newEmoji(segment.WithStop(segment.Start + loc[1]))
|
||||
}
|
||||
}
|
||||
|
||||
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
|
||||
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
|
||||
// fields are used to report tags and mentions to the caller for use as metadata.
|
||||
type customRenderer struct {
|
||||
f *formatter
|
||||
ctx context.Context
|
||||
parseMention gtsmodel.ParseMentionFunc
|
||||
accountID string
|
||||
statusID string
|
||||
emojiOnly bool
|
||||
result *FormatResult
|
||||
}
|
||||
|
||||
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
|
||||
reg.Register(kindMention, r.renderMention)
|
||||
reg.Register(kindHashtag, r.renderHashtag)
|
||||
reg.Register(kindEmoji, r.renderEmoji)
|
||||
}
|
||||
|
||||
func (r *customRenderer) Extend(m goldmark.Markdown) {
|
||||
// 1000 is set as the lowest priority, but it's arbitrary
|
||||
m.Parser().AddOptions(parser.WithInlineParsers(
|
||||
mdutil.Prioritized(&emojiParser{}, 1000),
|
||||
))
|
||||
if !r.emojiOnly {
|
||||
m.Parser().AddOptions(parser.WithInlineParsers(
|
||||
mdutil.Prioritized(&mentionParser{}, 1000),
|
||||
mdutil.Prioritized(&hashtagParser{}, 1000),
|
||||
))
|
||||
}
|
||||
m.Renderer().AddOptions(renderer.WithNodeRenderers(
|
||||
mdutil.Prioritized(r, 1000),
|
||||
))
|
||||
}
|
||||
|
||||
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
|
||||
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
n, ok := node.(*mention) // this function is only registered for kindMention
|
||||
if !ok {
|
||||
log.Errorf("type assertion failed")
|
||||
}
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
html := r.replaceMention(text)
|
||||
|
||||
// we don't have much recourse if this fails
|
||||
if _, err := w.WriteString(html); err != nil {
|
||||
log.Errorf("error writing HTML: %s", err)
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
|
||||
if !ok {
|
||||
log.Errorf("type assertion failed")
|
||||
}
|
||||
text := string(n.Segment.Value(source))
|
||||
|
||||
html := r.replaceHashtag(text)
|
||||
|
||||
_, err := w.WriteString(html)
|
||||
// we don't have much recourse if this fails
|
||||
if err != nil {
|
||||
log.Errorf("error writing HTML: %s", err)
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
|
||||
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
|
||||
n, ok := node.(*emoji) // this function is only registered for kindEmoji
|
||||
if !ok {
|
||||
log.Errorf("type assertion failed")
|
||||
}
|
||||
text := string(n.Segment.Value(source))
|
||||
shortcode := text[1 : len(text)-1]
|
||||
|
||||
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
|
||||
if err != nil {
|
||||
if err != db.ErrNoEntries {
|
||||
log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
|
||||
}
|
||||
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
|
||||
listed := false
|
||||
for _, e := range r.result.Emojis {
|
||||
if e.Shortcode == emoji.Shortcode {
|
||||
listed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !listed {
|
||||
r.result.Emojis = append(r.result.Emojis, emoji)
|
||||
}
|
||||
}
|
||||
|
||||
// we don't have much recourse if this fails
|
||||
if _, err := w.WriteString(text); err != nil {
|
||||
log.Errorf("error writing HTML: %s", err)
|
||||
}
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
Reference in New Issue
Block a user