mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2024-12-12 00:26:39 +01:00
49beb17a8f
* Implement goldmark debug print for hashtags and mentions * Minify HTML in FromPlain * Convert plaintext status parser to goldmark * Move mention/tag/emoji finding logic into formatter * Combine mention and hashtag boundary characters * Normalize unicode when rendering hashtags
313 lines
8.6 KiB
Go
313 lines
8.6 KiB
Go
/*
|
|
GoToSocial
|
|
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package text
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/db"
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
|
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
|
"github.com/superseriousbusiness/gotosocial/internal/util"
|
|
"github.com/yuin/goldmark"
|
|
"github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/parser"
|
|
"github.com/yuin/goldmark/renderer"
|
|
"github.com/yuin/goldmark/text"
|
|
mdutil "github.com/yuin/goldmark/util"
|
|
)
|
|
|
|
// A goldmark extension that parses potential mentions and hashtags separately from regular
|
|
// text, so that they stay as one contiguous text fragment in the AST, and then renders
|
|
// them separately too, to avoid scanning normal text for mentions and tags.
|
|
|
|
// mention and hashtag fulfil the goldmark ast.Node interface.
|
|
type mention struct {
|
|
ast.BaseInline
|
|
Segment text.Segment
|
|
}
|
|
|
|
type hashtag struct {
|
|
ast.BaseInline
|
|
Segment text.Segment
|
|
}
|
|
|
|
type emoji struct {
|
|
ast.BaseInline
|
|
Segment text.Segment
|
|
}
|
|
|
|
var kindMention = ast.NewNodeKind("Mention")
|
|
var kindHashtag = ast.NewNodeKind("Hashtag")
|
|
var kindEmoji = ast.NewNodeKind("Emoji")
|
|
|
|
func (n *mention) Kind() ast.NodeKind {
|
|
return kindMention
|
|
}
|
|
|
|
func (n *hashtag) Kind() ast.NodeKind {
|
|
return kindHashtag
|
|
}
|
|
|
|
func (n *emoji) Kind() ast.NodeKind {
|
|
return kindEmoji
|
|
}
|
|
|
|
// Dump can be used for debugging.
|
|
func (n *mention) Dump(source []byte, level int) {
|
|
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
|
}
|
|
|
|
func (n *hashtag) Dump(source []byte, level int) {
|
|
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
|
}
|
|
|
|
func (n *emoji) Dump(source []byte, level int) {
|
|
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
|
}
|
|
|
|
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
|
|
// The contained segment is used in rendering.
|
|
func newMention(s text.Segment) *mention {
|
|
return &mention{
|
|
BaseInline: ast.BaseInline{},
|
|
Segment: s,
|
|
}
|
|
}
|
|
|
|
func newHashtag(s text.Segment) *hashtag {
|
|
return &hashtag{
|
|
BaseInline: ast.BaseInline{},
|
|
Segment: s,
|
|
}
|
|
}
|
|
|
|
func newEmoji(s text.Segment) *emoji {
|
|
return &emoji{
|
|
BaseInline: ast.BaseInline{},
|
|
Segment: s,
|
|
}
|
|
}
|
|
|
|
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
|
|
type mentionParser struct {
|
|
}
|
|
|
|
type hashtagParser struct {
|
|
}
|
|
|
|
type emojiParser struct {
|
|
}
|
|
|
|
func (p *mentionParser) Trigger() []byte {
|
|
return []byte{'@'}
|
|
}
|
|
|
|
func (p *hashtagParser) Trigger() []byte {
|
|
return []byte{'#'}
|
|
}
|
|
|
|
func (p *emojiParser) Trigger() []byte {
|
|
return []byte{':'}
|
|
}
|
|
|
|
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
before := block.PrecendingCharacter()
|
|
line, segment := block.PeekLine()
|
|
|
|
if !util.IsMentionOrHashtagBoundary(before) {
|
|
return nil
|
|
}
|
|
|
|
// unideal for performance but makes use of existing regex
|
|
loc := regexes.MentionFinder.FindIndex(line)
|
|
switch {
|
|
case loc == nil:
|
|
fallthrough
|
|
case loc[0] != 0: // fail if not found at start
|
|
return nil
|
|
default:
|
|
block.Advance(loc[1])
|
|
return newMention(segment.WithStop(segment.Start + loc[1]))
|
|
}
|
|
}
|
|
|
|
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
before := block.PrecendingCharacter()
|
|
line, segment := block.PeekLine()
|
|
s := string(line)
|
|
|
|
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
|
|
return nil
|
|
}
|
|
|
|
for i, r := range s {
|
|
switch {
|
|
case r == '#' && i == 0:
|
|
// ignore initial #
|
|
continue
|
|
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
|
|
// Fake hashtag, don't trust it
|
|
return nil
|
|
case util.IsMentionOrHashtagBoundary(r):
|
|
if i <= 1 {
|
|
// empty
|
|
return nil
|
|
}
|
|
// End of hashtag
|
|
block.Advance(i)
|
|
return newHashtag(segment.WithStop(segment.Start + i))
|
|
}
|
|
}
|
|
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
|
|
block.Advance(segment.Len())
|
|
return newHashtag(segment)
|
|
}
|
|
|
|
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
line, segment := block.PeekLine()
|
|
|
|
// unideal for performance but makes use of existing regex
|
|
loc := regexes.EmojiFinder.FindIndex(line)
|
|
switch {
|
|
case loc == nil:
|
|
fallthrough
|
|
case loc[0] != 0: // fail if not found at start
|
|
return nil
|
|
default:
|
|
block.Advance(loc[1])
|
|
return newEmoji(segment.WithStop(segment.Start + loc[1]))
|
|
}
|
|
}
|
|
|
|
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
|
|
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
|
|
// fields are used to report tags and mentions to the caller for use as metadata.
|
|
type customRenderer struct {
|
|
f *formatter
|
|
ctx context.Context
|
|
parseMention gtsmodel.ParseMentionFunc
|
|
accountID string
|
|
statusID string
|
|
emojiOnly bool
|
|
result *FormatResult
|
|
}
|
|
|
|
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
|
|
reg.Register(kindMention, r.renderMention)
|
|
reg.Register(kindHashtag, r.renderHashtag)
|
|
reg.Register(kindEmoji, r.renderEmoji)
|
|
}
|
|
|
|
func (r *customRenderer) Extend(m goldmark.Markdown) {
|
|
// 1000 is set as the lowest priority, but it's arbitrary
|
|
m.Parser().AddOptions(parser.WithInlineParsers(
|
|
mdutil.Prioritized(&emojiParser{}, 1000),
|
|
))
|
|
if !r.emojiOnly {
|
|
m.Parser().AddOptions(parser.WithInlineParsers(
|
|
mdutil.Prioritized(&mentionParser{}, 1000),
|
|
mdutil.Prioritized(&hashtagParser{}, 1000),
|
|
))
|
|
}
|
|
m.Renderer().AddOptions(renderer.WithNodeRenderers(
|
|
mdutil.Prioritized(r, 1000),
|
|
))
|
|
}
|
|
|
|
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
|
|
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
if !entering {
|
|
return ast.WalkSkipChildren, nil
|
|
}
|
|
|
|
n, ok := node.(*mention) // this function is only registered for kindMention
|
|
if !ok {
|
|
log.Errorf("type assertion failed")
|
|
}
|
|
text := string(n.Segment.Value(source))
|
|
|
|
html := r.replaceMention(text)
|
|
|
|
// we don't have much recourse if this fails
|
|
if _, err := w.WriteString(html); err != nil {
|
|
log.Errorf("error writing HTML: %s", err)
|
|
}
|
|
return ast.WalkSkipChildren, nil
|
|
}
|
|
|
|
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
if !entering {
|
|
return ast.WalkSkipChildren, nil
|
|
}
|
|
|
|
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
|
|
if !ok {
|
|
log.Errorf("type assertion failed")
|
|
}
|
|
text := string(n.Segment.Value(source))
|
|
|
|
html := r.replaceHashtag(text)
|
|
|
|
_, err := w.WriteString(html)
|
|
// we don't have much recourse if this fails
|
|
if err != nil {
|
|
log.Errorf("error writing HTML: %s", err)
|
|
}
|
|
return ast.WalkSkipChildren, nil
|
|
}
|
|
|
|
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
|
|
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
if !entering {
|
|
return ast.WalkSkipChildren, nil
|
|
}
|
|
|
|
n, ok := node.(*emoji) // this function is only registered for kindEmoji
|
|
if !ok {
|
|
log.Errorf("type assertion failed")
|
|
}
|
|
text := string(n.Segment.Value(source))
|
|
shortcode := text[1 : len(text)-1]
|
|
|
|
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
|
|
if err != nil {
|
|
if err != db.ErrNoEntries {
|
|
log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
|
|
}
|
|
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
|
|
listed := false
|
|
for _, e := range r.result.Emojis {
|
|
if e.Shortcode == emoji.Shortcode {
|
|
listed = true
|
|
break
|
|
}
|
|
}
|
|
if !listed {
|
|
r.result.Emojis = append(r.result.Emojis, emoji)
|
|
}
|
|
}
|
|
|
|
// we don't have much recourse if this fails
|
|
if _, err := w.WriteString(text); err != nil {
|
|
log.Errorf("error writing HTML: %s", err)
|
|
}
|
|
return ast.WalkSkipChildren, nil
|
|
}
|